From 152cc125da118e5abbddc6242a67d152b2e4353d Mon Sep 17 00:00:00 2001 From: John Ferlito Date: Wed, 3 Jul 2024 00:21:49 +1000 Subject: [PATCH] Switch from XML to rocrate and switch from proxyist to direct s3 access --- Dockerfile | 2 +- Gemfile | 1 + Gemfile.lock | 8 ++ README.md | 1 - app/controllers/essences_controller.rb | 6 +- app/controllers/items_controller.rb | 29 ++-- app/controllers/repository_controller.rb | 38 +---- app/models/item_content_language.rb | 2 +- .../catalog_db_sync_validator_service.rb | 2 - app/services/catalog_metadata_service.rb | 9 +- .../collection_destruction_service.rb | 17 +-- app/services/essence_destruction_service.rb | 11 +- app/services/item_destruction_service.rb | 6 +- .../admin/file_processing/_paths.html.haml | 2 +- app/views/api/v1/oni/object_meta.json.jb | 3 - app/views/items/show.html.haml | 16 ++- cdk/lib/app-stack.ts | 42 +----- config/application.rb | 9 +- config/routes.rb | 4 +- docker-compose.yml | 9 +- docker/nginx.conf-dev | 10 -- docker/proxyist.Dockerfile | 5 - docker/proxyist.config.js | 35 ----- docker/proxyist.config.prod.js | 25 ---- lib/nabu/catalog.rb | 130 ++++++++++++++++++ lib/proxyist.rb | 83 ----------- .../essence_destruction_service_spec.rb | 10 +- 27 files changed, 207 insertions(+), 308 deletions(-) delete mode 100644 docker/proxyist.Dockerfile delete mode 100644 docker/proxyist.config.js delete mode 100644 docker/proxyist.config.prod.js create mode 100644 lib/nabu/catalog.rb delete mode 100644 lib/proxyist.rb diff --git a/Dockerfile b/Dockerfile index f88da3b7..c54c00e6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -44,7 +44,7 @@ RUN echo $GIT_SHA > REVISION RUN bundle exec bootsnap precompile app/ lib/ # Precompiling assets for production without requiring secret RAILS_MASTER_KEY -RUN ASSET_PRECOMPILE=1 SECRET_KEY_BASE_DUMMY=1 PROXYIST_URL=dummy ./bin/rails assets:precompile +RUN ASSET_PRECOMPILE=1 SECRET_KEY_BASE_DUMMY=1 ./bin/rails assets:precompile # Final stage for app image FROM base diff --git a/Gemfile b/Gemfile index cb47a55a..d90d97a8 100644 --- a/Gemfile +++ b/Gemfile @@ -100,6 +100,7 @@ gem 'paper_trail' # Keep an audit trail of all the changes # Background processing gem 'aws-sdk-rails' # Send emails via SES +gem 'aws-sdk-s3' # Talk to the catalog gem 'daemons' # Needed by delayed_job gem 'delayed_job_active_record' # Delay jobs and queue them in the database diff --git a/Gemfile.lock b/Gemfile.lock index b24a699a..38a61e1c 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -104,6 +104,9 @@ GEM aws-sdk-dynamodb (1.108.0) aws-sdk-core (~> 3, >= 3.193.0) aws-sigv4 (~> 1.1) + aws-sdk-kms (1.82.0) + aws-sdk-core (~> 3, >= 3.193.0) + aws-sigv4 (~> 1.1) aws-sdk-rails (3.12.0) aws-record (~> 2) aws-sdk-ses (~> 1, >= 1.50.0) @@ -112,6 +115,10 @@ GEM aws-sessionstore-dynamodb (~> 2) concurrent-ruby (~> 1) railties (>= 5.2.0) + aws-sdk-s3 (1.151.0) + aws-sdk-core (~> 3, >= 3.194.0) + aws-sdk-kms (~> 1) + aws-sigv4 (~> 1.8) aws-sdk-ses (1.61.0) aws-sdk-core (~> 3, >= 3.193.0) aws-sigv4 (~> 1.1) @@ -635,6 +642,7 @@ DEPENDENCIES activeadmin annotaterb aws-sdk-rails + aws-sdk-s3 bootsnap cancancan capybara diff --git a/README.md b/README.md index 24b150f1..95bc1693 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,6 @@ docker compose up This brings up the following containers * app - the rails app * search - Solr instance for search (dev + test) -* proxyist - S3 proxy * db - mysql data base (dev + test) * s3 - s3 mock diff --git a/app/controllers/essences_controller.rb b/app/controllers/essences_controller.rb index fbb2fce6..bfab1928 100644 --- a/app/controllers/essences_controller.rb +++ b/app/controllers/essences_controller.rb @@ -1,5 +1,3 @@ -require 'proxyist' - class EssencesController < ApplicationController load_and_authorize_resource :collection, find_by: :identifier, except: [:list_mimetypes] load_and_authorize_resource :item, find_by: :identifier, through: :collection, except: [:list_mimetypes] @@ -26,14 +24,14 @@ def show def download Download.create! user: current_user, essence: @essence - location = Proxyist.get_object(@essence.item.full_identifier, @essence.filename, download: true) + location = Nabu::Catalog.instance.essence_url(@essence, as_attachment: true) raise ActionController::RoutingError, 'Essence file not found' unless location redirect_to location, allow_other_host: true end def display - location = Proxyist.get_object(@essence.item.full_identifier, @essence.filename) + location = Nabu::Catalog.instance.essence_url(@essence) raise ActionController::RoutingError, 'Essence file not found' unless location redirect_to location, allow_other_host: true diff --git a/app/controllers/items_controller.rb b/app/controllers/items_controller.rb index a09c5404..7ae167d1 100644 --- a/app/controllers/items_controller.rb +++ b/app/controllers/items_controller.rb @@ -181,13 +181,29 @@ def bulk_update redirect_to bulk_update_items_path + "?#{params[:original_search_params]}" end - def display - location = Proxyist.get_object(@item.full_identifier, "#{@item.full_identifier}-CAT-PDSC_ADMIN.xml") + def s3_rocrate + location = Nabu::Catalog.instance.item_admin_url(@item, 'ro-crate-metadata.json') raise ActionController::RoutingError, 'PDSC file not found' unless location redirect_to location, allow_other_host: true end + def private_rocrate + @data = @item + @is_item = true + @admin_rocrate = true + + render template: 'api/v1/oni/object_meta', formats: [:json], handlers: [:jb] + end + + def public_rocrate + @data = @item + @is_item = true + @admin_rocrate = false + + render template: 'api/v1/oni/object_meta', formats: [:json], handlers: [:jb] + end + def new_report @page_title = 'Nabu - Depositor Item Report Request' end @@ -252,17 +268,10 @@ def data video_values[essence_basename] ||= [] video_values[essence_basename] << repository_essence_url when 'jpg', 'jpeg', 'png' - thumbnail_url = repository_essence_url.gsub(".#{essence_extension}", '-thumb-PDSC_ADMIN.jpg') - - # Copied from Essence#path and Essence#full_identifier. - thumbnail_exists = Proxyist.exists?(essence.item.identifier, File.basename(thumbnail_url)) - thumbnail_url = nil unless thumbnail_exists - # REQUIREMENTS: There are scenarios where multiple originals have the same essence basename. Is that ok as far as the player is concerned? unless images_values.key?(essence_basename) images_values[essence_basename] = { - 'originals' => [], - 'thumbnail' => thumbnail_url + 'originals' => [] } end images_values[essence_basename]['originals'] << repository_essence_url diff --git a/app/controllers/repository_controller.rb b/app/controllers/repository_controller.rb index c5554add..c274ce2f 100644 --- a/app/controllers/repository_controller.rb +++ b/app/controllers/repository_controller.rb @@ -31,43 +31,11 @@ def essence essence = item.essences.find_by(filename: params[:essence_filename]) raise ActionController::RoutingError, "Essence not found: #{params[:essence_filename]}" if essence.nil? - # if a standard essence file was found, return that as usual - if essence.present? - authorize! :read, essence - location = Proxyist.get_object(essence.item.full_identifier, essence.filename, download: true) - raise ActionController::RoutingError, 'Essence file not found' unless location - - redirect_to location, allow_other_host: true - - return - elsif params[:essence_filename].include?('PDSC_ADMIN') # otherwise look up to see if there is a hidden admin file (thumbnails, soundimage file, etc.) - location = admin_essence_location(collection, item, params[:essence_filename]) - raise ActionController::RoutingError, 'Essence file not found' unless location - - redirect_to location, allow_other_host: true if location - - return - end - - raise ActionController::RoutingError, "Repository file not found: #{params[:essence_filename]}" - end - - private - - # this expects any admin-style files to have a name of the form "--PDSC_ADMIN." - # e.g. AA1-001-essence-file-goes-here-thumb-PDSC_ADMIN.jpg where collection AA1 has item 001 with essence "essence-file-goes-here" - def admin_essence_location(collection, item, essence_filename) - item_prefix = "#{collection.identifier}-#{item.identifier}-" - essence_part = essence_filename.sub(item_prefix, '').sub(/^(.+?)-[^-]+?-PDSC_ADMIN\..+/, '\1') - essence = item.essences.where('filename LIKE :prefix', prefix: "#{item_prefix}#{essence_part}%").first - - # don't allow the user to randomly access data, must relate directly to an essence file - return if essence.nil? - authorize! :read, essence - return unless Proxyist.exists? item.full_identifier, essence_filename + location = Nabu::Catalog.instance.essence_url(essence, as_attachment: true) + raise ActionController::RoutingError, 'Essence file not found' unless location - Proxyist.get_object(item.full_identifier, essence_filename, download: true) + redirect_to location, allow_other_host: true end end diff --git a/app/models/item_content_language.rb b/app/models/item_content_language.rb index 8004bbdc..abcdf1df 100644 --- a/app/models/item_content_language.rb +++ b/app/models/item_content_language.rb @@ -24,7 +24,7 @@ class ItemContentLanguage < ApplicationRecord belongs_to :item validates :language_id, presence: true - #validates :item_id, presence: true + # validates :item_id, presence: true def self.ransackable_attributes(_ = nil) %w[id item_id language_id] diff --git a/app/services/catalog_db_sync_validator_service.rb b/app/services/catalog_db_sync_validator_service.rb index e29a00b0..81c3a070 100644 --- a/app/services/catalog_db_sync_validator_service.rb +++ b/app/services/catalog_db_sync_validator_service.rb @@ -1,6 +1,4 @@ class CatalogDbSyncValidatorService - # TODO: Make this support proxyist - attr_reader :catalog_dir, :verbose def initialize(verbose: false) diff --git a/app/services/catalog_metadata_service.rb b/app/services/catalog_metadata_service.rb index 9a36d126..d8b4ec28 100644 --- a/app/services/catalog_metadata_service.rb +++ b/app/services/catalog_metadata_service.rb @@ -12,9 +12,12 @@ def save_file } data = Api::V1::OniController.render :object_meta, assigns: local_data - identifier = @data.full_identifier - filename = 'pdsc_admin/ro-crate-metadata.json' + filename = 'ro-crate-metadata.json' - Proxyist.upload_object identifier, filename, data, 'Content-Type' => 'application/json' + if @is_item + Nabu::Catalog.instance.upload_item_admin(@data, filename, data, 'application/json') + else + Nabu::Catalog.instance.upload_collection_admin(@data, filename, data, 'application/json') + end end end diff --git a/app/services/collection_destruction_service.rb b/app/services/collection_destruction_service.rb index 68a9f035..a19f1785 100644 --- a/app/services/collection_destruction_service.rb +++ b/app/services/collection_destruction_service.rb @@ -1,7 +1,5 @@ class CollectionDestructionService def self.destroy(collection) - item_identifiers = collection.items.map(&:full_identifier) - essences = collection.items.map(&:essences).flatten essence_ids = essences.map(&:id) @@ -15,17 +13,14 @@ def self.destroy(collection) collection.destroy # Remove The items just in case - item_identifiers.each do |item_identifier| - files = Proxyist.list(item_identifier) - files.each { |file| Proxyist.delete_object(item_identifier, file) } - - Rails.logger.info "[DELETE] Removed entire item directory at [#{item_identifier}] #{files.size} files" + collection.items.each do |item| + count = Nabu::Catalog.instance.delete_item(item) + Rails.logger.info "[DELETE] Removed entire item directory at [#{item.identifier}] #{count} files" end - files = Proxyist.list(collection.identifier) - files.each { |file| Proxyist.delete_object(collection.identifier, file) } - Rails.logger.info "[DELETE] Removed entire collection directory at [#{collection.identifier}] #{files.size} files" - rescue => e + count = Nabu::Catalog.instance.delete_collection(collection) + Rails.logger.info "[DELETE] Removed entire collection directory at [#{collection.identifier}] #{count} files" + rescue StandardError => e return { success: false, messages: { diff --git a/app/services/essence_destruction_service.rb b/app/services/essence_destruction_service.rb index c7ae40b1..fef7ea28 100644 --- a/app/services/essence_destruction_service.rb +++ b/app/services/essence_destruction_service.rb @@ -3,20 +3,11 @@ class EssenceDestructionService def self.destroy(essence) result = true - response = Proxyist.delete_object(essence.item.full_identifier, essence.filename) + response = Nabu::Catalog.instance.delete_essence(essence) result = false if response.code != '204' Rails.logger.info "[DELETE] Removed essence file at [#{essence.item.full_identifier}:#{essence.filename}" - files = Proxyist.list(essence.item.full_identifier) - - # NOTE: This logic might be broken as it deletes checksum files which cover more than a single essence - admin_files_regex = essence.filename.sub(/\..+?$/, '.*PDSC_ADMIN.*') - admin_files = files.grep(Regexp.new(admin_files_regex)) - admin_files.each { |file| Proxyist.delete_object(essence.item.full_identifier, file) } - - Rails.logger.info "[DELETE] Removed any admin files for essence at [#{admin_files_regex}]" - essence.destroy if result diff --git a/app/services/item_destruction_service.rb b/app/services/item_destruction_service.rb index bbfbb913..5913428d 100644 --- a/app/services/item_destruction_service.rb +++ b/app/services/item_destruction_service.rb @@ -8,11 +8,9 @@ def self.destroy(item) item.destroy - # remove directory and PDSC_ADMIN files on disk - files = Proxyist.list(item.full_identifier) - files.each { |file| Proxyist.delete_object(item.full_identifier, file) } + count = Nabu::Catalog.instance.delete_item(item) - Rails.logger.info "[DELETE] Removed entire item directory at [#{item.full_identifier}]: #{files.size} files" + Rails.logger.info "[DELETE] Removed entire item directory at [#{item.full_identifier}]: #{count} files" if deleted_essence_count.positive? response[:messages][:notice] = 'Item and all its contents removed permanently (no undo possible)' diff --git a/app/views/admin/file_processing/_paths.html.haml b/app/views/admin/file_processing/_paths.html.haml index c0293096..5eb04336 100644 --- a/app/views/admin/file_processing/_paths.html.haml +++ b/app/views/admin/file_processing/_paths.html.haml @@ -6,7 +6,7 @@ %th Directory %tr %td Archive - %td= "S3 via #{Rails.configuration.proxyist_url}" + %td= "S3 via #{Rails.configuration.catalog_bucket}" %tr %td Upload Location %td diff --git a/app/views/api/v1/oni/object_meta.json.jb b/app/views/api/v1/oni/object_meta.json.jb index 101ecac8..678c7f38 100644 --- a/app/views/api/v1/oni/object_meta.json.jb +++ b/app/views/api/v1/oni/object_meta.json.jb @@ -1,9 +1,6 @@ # frozen_string_literal: truemeta def id - p 'STIX' - p @is_item - p @data @is_item ? repository_item_url(@data.collection, @data) : repository_collection_url(@data) end diff --git a/app/views/items/show.html.haml b/app/views/items/show.html.haml index 4d8ea22a..af591975 100644 --- a/app/views/items/show.html.haml +++ b/app/views/items/show.html.haml @@ -280,11 +280,19 @@ %th Comments %td= h(@item.admin_comment).gsub(/\n/, '
').html_safe - - if admin_user_signed_in? - %tr - %th CAT-PDSC file - %td= link_to 'View item XML file', display_collection_item_path(@collection, @item) + %fieldset + %legend Metadata + %table.form.show + %tr + %th RO-Crate Metadata + %td + %ul + - if admin_user_signed_in? + %li= link_to 'S3', s3_rocrate_collection_item_path(@collection, @item) + %li= link_to 'Live (Public)', public_rocrate_collection_item_path(@collection, @item) + - if admin_user_signed_in? + %li= link_to 'Live (Private)', private_rocrate_collection_item_path(@collection, @item) %fieldset %legend Comments diff --git a/cdk/lib/app-stack.ts b/cdk/lib/app-stack.ts index 1c0d6c17..6ef61a06 100644 --- a/cdk/lib/app-stack.ts +++ b/cdk/lib/app-stack.ts @@ -197,46 +197,6 @@ export class AppStack extends cdk.Stack { }); cluster.addAsgCapacityProvider(capacityProvider); - // //////////////////////// - // Proxyist - // //////////////////////// - - const proxyistTaskDefinition = new ecs.Ec2TaskDefinition(this, 'ProxyistTaskDefinition'); - NagSuppressions.addResourceSuppressions(proxyistTaskDefinition, [ - { id: 'AwsSolutions-ECS2', reason: 'We are fine with env variables' }, - ]); - proxyistTaskDefinition.addContainer('ProxyistContainer', { - memoryLimitMiB: 256, - image: ecs.ContainerImage.fromAsset('..', { - file: 'docker/proxyist.Dockerfile', - }), - stopTimeout: cdk.Duration.seconds(5), - portMappings: [{ name: 'proxyist', containerPort: 3000 }], - logging: ecs.LogDrivers.awsLogs({ streamPrefix: 'ProxyistService' }), - environment: { - AWS_REGION: region, - BUCKET_NAME: catalogBucket.bucketName, - }, - }); - catalogBucket.grantReadWrite(proxyistTaskDefinition.taskRole); - - new ecs.Ec2Service(this, 'ProxyistService', { - serviceName: 'proxyist', - cluster, - taskDefinition: proxyistTaskDefinition, - enableExecuteCommand: true, - serviceConnectConfiguration: { - logDriver: ecs.LogDrivers.awsLogs({ - streamPrefix: 'sc-traffic', - }), - services: [ - { - portMappingName: 'proxyist', - }, - ], - }, - }); - // //////////////////////// // Viewer // //////////////////////// @@ -303,7 +263,7 @@ export class AppStack extends cdk.Stack { RAILS_SERVE_STATIC_FILES: 'true', // TODO: do we need nginx in production?? RAILS_ENV: railsEnv, OPENSEARCH_URL: `https://${searchDomain.domainEndpoint}`, - PROXYIST_URL: 'http://proxyist.nabu:3000', + NABU_CATALOG_BUCKET: catalogBucket.bucketName, SENTRY_DSN: 'https://aa8f28b06df84f358949b927e85a924e@o4504801902985216.ingest.sentry.io/4504801910980608', DOI_PREFIX: '10.26278', DATACITE_BASE_URL: env === 'prod' ? 'https://api.datacite.org' : 'https://api.test.datacite.org', diff --git a/config/application.rb b/config/application.rb index 94dfbf07..6e6df7b8 100644 --- a/config/application.rb +++ b/config/application.rb @@ -28,16 +28,15 @@ class Application < Rails::Application # Out Stuff ################### - ActiveSupport::Dependencies.autoload_paths << File::join( Rails.root, 'app', 'services') - ActiveSupport::Dependencies.autoload_paths << File::join( Rails.root, 'lib') + ActiveSupport::Dependencies.autoload_paths << Rails.root.join('app/services') + ActiveSupport::Dependencies.autoload_paths << Rails.root.join('lib') config.viewer_url = '/viewer' config.assets.precompile << 'delayed/web/application.css' - ## Proxyist - config.proxyist_url = ENV.fetch('PROXYIST_URL') - throw 'Must set PROXYIST_URL' unless config.proxyist_url + config.catalog_bucket = ENV.fetch('NABU_CATALOG_BUCKET') + throw 'Must set NABU_CATALOG_BUCKET' unless config.catalog_bucket end end diff --git a/config/routes.rb b/config/routes.rb index b2700545..a12e48c9 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -47,7 +47,9 @@ end resources :items, except: %i[index] do member do - get :display + get :s3_rocrate + get :private_rocrate + get :public_rocrate get :data patch :inherit_details end diff --git a/docker-compose.yml b/docker-compose.yml index 3219df65..22ae6574 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,7 +15,7 @@ services: AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID} AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY} AWS_SESSION_TOKEN: ${AWS_SESSION_TOKEN} - PROXYIST_URL: http://proxyist:3000 + NABU_CATALOG_BUCKET: nabu OPENSEARCH_URL: http://admin:fqo6bzr27*6Rdsshgsa7@search:9200 volumes: - .:/rails @@ -65,13 +65,6 @@ services: environment: OPENSEARCH_HOSTS: '["http://search:9200"]' - proxyist: - image: ghcr.io/paradisec-archive/proxyist - volumes: - - ./docker/proxyist.config.js:/usr/src/app/proxyist.config.js - environment: - PROXYIST_ADAPTER_NAME: '@paradisec/proxyist-adapter-s3' - s3: image: adobe/s3mock ports: diff --git a/docker/nginx.conf-dev b/docker/nginx.conf-dev index 17604357..2f29f868 100644 --- a/docker/nginx.conf-dev +++ b/docker/nginx.conf-dev @@ -1,6 +1,3 @@ -upstream proxyist { - server proxyist:3000; -} server { server_name catalog.paradisec.org.au; @@ -23,13 +20,6 @@ server { proxy_set_header Connection "Upgrade"; proxy_pass http://app:3000; proxy_set_header X-Sendfile-Type X-Accel-Redirect; - proxy_set_header X-Accel-Mapping /proxyist/=/proxyist/; - } - - location /proxyist/ { - internal; - set $stored_real_location $upstream_http_x_real_location; - proxy_pass http://proxyist$stored_real_location; } listen 443 ssl; diff --git a/docker/proxyist.Dockerfile b/docker/proxyist.Dockerfile deleted file mode 100644 index 8d21a493..00000000 --- a/docker/proxyist.Dockerfile +++ /dev/null @@ -1,5 +0,0 @@ -FROM ghcr.io/paradisec-archive/proxyist:v0.3.0 - -COPY docker/proxyist.config.prod.js /usr/src/app/proxyist.config.js - -ENV PROXYIST_ADAPTER_NAME="@paradisec/proxyist-adapter-s3" diff --git a/docker/proxyist.config.js b/docker/proxyist.config.js deleted file mode 100644 index 3b3a9e0c..00000000 --- a/docker/proxyist.config.js +++ /dev/null @@ -1,35 +0,0 @@ -import { S3 } from '@aws-sdk/client-s3'; - -const s3Config = { - forcePathStyle: true, - credentials: { - accessKeyId: 'S3RVER', - secretAccessKey: 'S3RVER', - }, - endpoint: 'http://s3:9090', - region: 'ap-southeast-2', -}; - -const s3 = new S3(s3Config); - -export default { - bucket: 'nabu', - returnRedirects: true, - s3, - transform: (identifier) => { - if (identifier.includes('/')) { - throw new Error('Identifer cannot contain "/"'); - } - - if (identifier.includes('-')) { - const [, item] = identifier.split('-', 2); - if (item === 'root') { - throw new Error('Item cannot be named "root"'); - } - - return identifier.replace(/-/, '/'); - } - - return `${identifier}/root`; - }, -}; diff --git a/docker/proxyist.config.prod.js b/docker/proxyist.config.prod.js deleted file mode 100644 index 37b57bf7..00000000 --- a/docker/proxyist.config.prod.js +++ /dev/null @@ -1,25 +0,0 @@ -const bucketName = process.env.BUCKET_NAME; -if (!bucketName) { - throw new Error('BUCKET_NAME environment variable must be set'); -} - -export default { - bucket: bucketName, - returnRedirects: true, - transform: (identifier) => { - if (identifier.includes('/')) { - throw new Error('Identifer cannot contain "/"'); - } - - if (identifier.includes('-')) { - const [, item] = identifier.split('-', 2); - if (item === '__object__') { - throw new Error('Item cannot be named "root"'); - } - - return identifier.replace(/-/, '/'); - } - - return `${identifier}/__object__`; - }, -}; diff --git a/lib/nabu/catalog.rb b/lib/nabu/catalog.rb new file mode 100644 index 00000000..fe1e24de --- /dev/null +++ b/lib/nabu/catalog.rb @@ -0,0 +1,130 @@ +require 'singleton' + +require 'aws-sdk-s3' + +module Nabu + class Catalog + include Singleton + + def initialize + params = { + region: 'ap-southeast-2' + } + + if Rails.env.development? + # s3 mock + params.merge!( + region: 'us-east-1', + access_key_id: 'S3RVER', + secret_access_key: 'S3RVER', + endpoint: 'http://s3:9090', + force_path_style: true + ) + end + + @s3 = Aws::S3::Client.new(params) + @presigner = Aws::S3::Presigner.new(client: @s3) + end + + def delete_collection(collection) + Rails.logger.debug { "Nabu::Catalog: Deleting collection #{collection.identifier}" } + delete_by_prefix(collection.identifier) + end + + def delete_item(item) + Rails.logger.debug { "Nabu::Catalog: Deleting item #{item.full_identifier}" } + parts = [item.collection.identifier, item.identifier] + + delete_by_prefix(parts.join('/')) + end + + def delete_essence(essence) + Rails.logger.debug { "Nabu::Catalog: Deleting essence #{essence.item.full_identifier}:#{essence.filename}" } + parts = [essence.item.collection.identifier, essence.item.identifier, essence.filename] + + delete_by_prefix(parts.join('/')) + end + + def upload_item_admin(item, filename, data, content_type) + Rails.logger.debug { "Nabu::Catalog: Uploading item admin file #{item.full_identifier}:#{filename}" } + parts = [item.collection.identifier, item.identifier, 'pdsc_admin', filename] + + upload(parts.join('/'), data, content_type) + end + + def upload_collection_admin(collection, filename, data, content_type) + Rails.logger.debug { "Nabu::Catalog: Uploading collection admin file #{collection.identifier}:#{filename}" } + parts = [collection.identifier, 'pdsc_admin', filename] + + upload(parts.join('/'), data, content_type) + end + + def item_admin_url(item, filename) + Rails.logger.debug { "Nabu::Catalog: Downloading item admin file #{item.full_identifier}:#{filename}" } + parts = [item.collection.identifier, item.identifier, 'pdsc_admin', filename] + + download(parts.join('/')) + end + + def collection_admin_url(collection, filename) + Rails.logger.debug { "Nabu::Catalog: Downloading collection admin file #{collection.identifier}:#{filename}" } + parts = [collection.identifier, 'pdsc_admin', filename] + + download(parts.join('/')) + end + + def essence_url(essence, as_attachment: false) + Rails.logger.debug { "Nabu::Catalog: Get essence URL #{item.full_identifier}:#{essence.filename}" } + parts = [essence.item.collection.identifier, essence.item.identifier, essence.filename] + + download(parts.join('/'), as_attachment:) + end + + private + + def bucket_name + @bucket_name ||= Rails.configuration.catalog_bucket + end + + def upload(key, data, content_type) + @s3.put_object( + bucket: bucket_name, + key:, + body: data, + content_type: + ) + end + + def download(key, as_attachment: false) + @presigner.presigned_url( + :get_object, + bucket: bucket_name, + key:, + response_content_disposition: as_attachment ? 'attachment' : nil + ) + end + + def delete_by_prefix(prefix) + response = @s3.list_objects_v2( + bucket: bucket_name, + prefix: + ) + + keys = response.contents.map(&:key) + + Rails.logger.debug { "Deleting #{keys.join(',')} files" } + + del_response = @s3.delete_objects( + bucket: bucket_name, + delete: { + objects: keys.map { |key| { key: } }, + quiet: true + } + ) + + return keys.size unless del_response.errors.any? + + throw "Error deleting files: #{del_response.errors}" + end + end +end diff --git a/lib/proxyist.rb b/lib/proxyist.rb deleted file mode 100644 index fb28c784..00000000 --- a/lib/proxyist.rb +++ /dev/null @@ -1,83 +0,0 @@ -require 'net/http' - -BASE_URL = Rails.configuration.proxyist_url - -# NOTE: This implementation of proxyist assumes S3 with redirect turned on for performance reasons - -module Net - class HTTP < Protocol - def self.put(url, data, header = nil) - start(url.hostname, url.port, use_ssl: url.scheme == 'https') do |http| - http.put(url.path, data, header) - end - end - - def self.head(url) - start(url.hostname, url.port, use_ssl: url.scheme == 'https') do |http| - http.head(url.path) - end - end - - def self.delete(url) - start(url.hostname, url.port, use_ssl: url.scheme == 'https') do |http| - http.delete(url.path) - end - end - end -end - -module Proxyist - def self.list(identifier) - url = generate_url(identifier) - - response = Net::HTTP.get_response(url) - - raise 'Proxyist request failed' unless response.is_a?(Net::HTTPOK) - - JSON.parse(response.body) - end - - def self.get_object(identifier, filename, params = {}) - is_downloadable = params[:download] - url = generate_url(identifier, filename, is_downloadable:) - - response = Net::HTTP.get_response(url) - - return if response.is_a?(Net::HTTPNotFound) - - raise 'Proxyist is misonfigured, we only support redirects' unless response.is_a?(Net::HTTPRedirection) - - response['Location'] - end - - def self.upload_object(identifier, filename, data, headers = nil) - url = generate_url(identifier, filename) - - Net::HTTP.put(url, data, headers) - end - - def self.delete_object(identifier, filename) - url = generate_url(identifier, filename) - - Net::HTTP.delete(url) - end - - def self.exists?(identifier, filename) - url = generate_url(identifier, filename) - - Net::HTTP.head(url) - end - - def self.generate_url(identifier, filename = nil, is_downloadable: false) - query = {} - query[:disposition] = 'attachment' if is_downloadable - - path = "/object/#{URI.encode_uri_component(identifier)}" - path += "/#{URI.encode_uri_component(filename)}" if filename - - uri = URI.join(BASE_URL, path) - uri.query = URI.encode_www_form(query) unless query.empty? - - uri - end -end diff --git a/spec/services/essence_destruction_service_spec.rb b/spec/services/essence_destruction_service_spec.rb index 9d5aa3ec..7bb99526 100644 --- a/spec/services/essence_destruction_service_spec.rb +++ b/spec/services/essence_destruction_service_spec.rb @@ -5,21 +5,21 @@ context 'when essence file is present on the server' do before do - allow(Proxyist).to receive(:delete_object).and_return(OpenStruct.new(code: '204')) + allow(Nabu::Catalog.instance).to receive(:delete_essence).and_return(OpenStruct.new(code: '204')) end - it 'should proceed without errors' do + it 'proceeds without errors' do response = EssenceDestructionService.destroy(essence) expect(response).to have_key(:notice) - expect(response).to_not have_key(:error) + expect(response).not_to have_key(:error) expect(response[:notice]).to eq('Essence removed successfully, and file deleted from archive (undo not possible).') end end context 'when essence file is not present on the server' do - it 'should proceed with errors' do + it 'proceeds with errors' do response = EssenceDestructionService.destroy(essence) - expect(response).to_not have_key(:notice) + expect(response).not_to have_key(:notice) expect(response).to have_key(:error) expect(response[:error]).to eq('Essence removed, but deleting file failed: Not Found') end