Skip to content

Commit

Permalink
Switch from XML to rocrate and switch from proxyist to direct s3 access
Browse files Browse the repository at this point in the history
  • Loading branch information
johnf committed Jul 2, 2024
1 parent 893739a commit 152cc12
Show file tree
Hide file tree
Showing 27 changed files with 207 additions and 308 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ RUN echo $GIT_SHA > REVISION
RUN bundle exec bootsnap precompile app/ lib/

# Precompiling assets for production without requiring secret RAILS_MASTER_KEY
RUN ASSET_PRECOMPILE=1 SECRET_KEY_BASE_DUMMY=1 PROXYIST_URL=dummy ./bin/rails assets:precompile
RUN ASSET_PRECOMPILE=1 SECRET_KEY_BASE_DUMMY=1 ./bin/rails assets:precompile

# Final stage for app image
FROM base
Expand Down
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ gem 'paper_trail' # Keep an audit trail of all the changes

# Background processing
gem 'aws-sdk-rails' # Send emails via SES
gem 'aws-sdk-s3' # Talk to the catalog
gem 'daemons' # Needed by delayed_job
gem 'delayed_job_active_record' # Delay jobs and queue them in the database

Expand Down
8 changes: 8 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ GEM
aws-sdk-dynamodb (1.108.0)
aws-sdk-core (~> 3, >= 3.193.0)
aws-sigv4 (~> 1.1)
aws-sdk-kms (1.82.0)
aws-sdk-core (~> 3, >= 3.193.0)
aws-sigv4 (~> 1.1)
aws-sdk-rails (3.12.0)
aws-record (~> 2)
aws-sdk-ses (~> 1, >= 1.50.0)
Expand All @@ -112,6 +115,10 @@ GEM
aws-sessionstore-dynamodb (~> 2)
concurrent-ruby (~> 1)
railties (>= 5.2.0)
aws-sdk-s3 (1.151.0)
aws-sdk-core (~> 3, >= 3.194.0)
aws-sdk-kms (~> 1)
aws-sigv4 (~> 1.8)
aws-sdk-ses (1.61.0)
aws-sdk-core (~> 3, >= 3.193.0)
aws-sigv4 (~> 1.1)
Expand Down Expand Up @@ -635,6 +642,7 @@ DEPENDENCIES
activeadmin
annotaterb
aws-sdk-rails
aws-sdk-s3
bootsnap
cancancan
capybara
Expand Down
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ docker compose up
This brings up the following containers
* app - the rails app
* search - Solr instance for search (dev + test)
* proxyist - S3 proxy
* db - mysql data base (dev + test)
* s3 - s3 mock

Expand Down
6 changes: 2 additions & 4 deletions app/controllers/essences_controller.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
require 'proxyist'

class EssencesController < ApplicationController
load_and_authorize_resource :collection, find_by: :identifier, except: [:list_mimetypes]
load_and_authorize_resource :item, find_by: :identifier, through: :collection, except: [:list_mimetypes]
Expand All @@ -26,14 +24,14 @@ def show
def download
Download.create! user: current_user, essence: @essence

location = Proxyist.get_object(@essence.item.full_identifier, @essence.filename, download: true)
location = Nabu::Catalog.instance.essence_url(@essence, as_attachment: true)
raise ActionController::RoutingError, 'Essence file not found' unless location

redirect_to location, allow_other_host: true
end

def display
location = Proxyist.get_object(@essence.item.full_identifier, @essence.filename)
location = Nabu::Catalog.instance.essence_url(@essence)
raise ActionController::RoutingError, 'Essence file not found' unless location

redirect_to location, allow_other_host: true
Expand Down
29 changes: 19 additions & 10 deletions app/controllers/items_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -181,13 +181,29 @@ def bulk_update
redirect_to bulk_update_items_path + "?#{params[:original_search_params]}"
end

def display
location = Proxyist.get_object(@item.full_identifier, "#{@item.full_identifier}-CAT-PDSC_ADMIN.xml")
def s3_rocrate
location = Nabu::Catalog.instance.item_admin_url(@item, 'ro-crate-metadata.json')
raise ActionController::RoutingError, 'PDSC file not found' unless location

redirect_to location, allow_other_host: true
end

def private_rocrate
@data = @item
@is_item = true
@admin_rocrate = true

render template: 'api/v1/oni/object_meta', formats: [:json], handlers: [:jb]
end

def public_rocrate
@data = @item
@is_item = true
@admin_rocrate = false

render template: 'api/v1/oni/object_meta', formats: [:json], handlers: [:jb]
end

def new_report
@page_title = 'Nabu - Depositor Item Report Request'
end
Expand Down Expand Up @@ -252,17 +268,10 @@ def data
video_values[essence_basename] ||= []
video_values[essence_basename] << repository_essence_url
when 'jpg', 'jpeg', 'png'
thumbnail_url = repository_essence_url.gsub(".#{essence_extension}", '-thumb-PDSC_ADMIN.jpg')

# Copied from Essence#path and Essence#full_identifier.
thumbnail_exists = Proxyist.exists?(essence.item.identifier, File.basename(thumbnail_url))
thumbnail_url = nil unless thumbnail_exists

# REQUIREMENTS: There are scenarios where multiple originals have the same essence basename. Is that ok as far as the player is concerned?
unless images_values.key?(essence_basename)
images_values[essence_basename] = {
'originals' => [],
'thumbnail' => thumbnail_url
'originals' => []
}
end
images_values[essence_basename]['originals'] << repository_essence_url
Expand Down
38 changes: 3 additions & 35 deletions app/controllers/repository_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,43 +31,11 @@ def essence
essence = item.essences.find_by(filename: params[:essence_filename])
raise ActionController::RoutingError, "Essence not found: #{params[:essence_filename]}" if essence.nil?

# if a standard essence file was found, return that as usual
if essence.present?
authorize! :read, essence
location = Proxyist.get_object(essence.item.full_identifier, essence.filename, download: true)
raise ActionController::RoutingError, 'Essence file not found' unless location

redirect_to location, allow_other_host: true

return
elsif params[:essence_filename].include?('PDSC_ADMIN') # otherwise look up to see if there is a hidden admin file (thumbnails, soundimage file, etc.)
location = admin_essence_location(collection, item, params[:essence_filename])
raise ActionController::RoutingError, 'Essence file not found' unless location

redirect_to location, allow_other_host: true if location

return
end

raise ActionController::RoutingError, "Repository file not found: #{params[:essence_filename]}"
end

private

# this expects any admin-style files to have a name of the form "<essence identifier part>-<type>-PDSC_ADMIN.<extension>"
# e.g. AA1-001-essence-file-goes-here-thumb-PDSC_ADMIN.jpg where collection AA1 has item 001 with essence "essence-file-goes-here"
def admin_essence_location(collection, item, essence_filename)
item_prefix = "#{collection.identifier}-#{item.identifier}-"
essence_part = essence_filename.sub(item_prefix, '').sub(/^(.+?)-[^-]+?-PDSC_ADMIN\..+/, '\1')
essence = item.essences.where('filename LIKE :prefix', prefix: "#{item_prefix}#{essence_part}%").first

# don't allow the user to randomly access data, must relate directly to an essence file
return if essence.nil?

authorize! :read, essence

return unless Proxyist.exists? item.full_identifier, essence_filename
location = Nabu::Catalog.instance.essence_url(essence, as_attachment: true)
raise ActionController::RoutingError, 'Essence file not found' unless location

Proxyist.get_object(item.full_identifier, essence_filename, download: true)
redirect_to location, allow_other_host: true
end
end
2 changes: 1 addition & 1 deletion app/models/item_content_language.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class ItemContentLanguage < ApplicationRecord
belongs_to :item

validates :language_id, presence: true
#validates :item_id, presence: true
# validates :item_id, presence: true

def self.ransackable_attributes(_ = nil)
%w[id item_id language_id]
Expand Down
2 changes: 0 additions & 2 deletions app/services/catalog_db_sync_validator_service.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
class CatalogDbSyncValidatorService
# TODO: Make this support proxyist

attr_reader :catalog_dir, :verbose

def initialize(verbose: false)
Expand Down
9 changes: 6 additions & 3 deletions app/services/catalog_metadata_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,12 @@ def save_file
}
data = Api::V1::OniController.render :object_meta, assigns: local_data

identifier = @data.full_identifier
filename = 'pdsc_admin/ro-crate-metadata.json'
filename = 'ro-crate-metadata.json'

Proxyist.upload_object identifier, filename, data, 'Content-Type' => 'application/json'
if @is_item
Nabu::Catalog.instance.upload_item_admin(@data, filename, data, 'application/json')
else
Nabu::Catalog.instance.upload_collection_admin(@data, filename, data, 'application/json')
end
end
end
17 changes: 6 additions & 11 deletions app/services/collection_destruction_service.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
class CollectionDestructionService
def self.destroy(collection)
item_identifiers = collection.items.map(&:full_identifier)

essences = collection.items.map(&:essences).flatten
essence_ids = essences.map(&:id)

Expand All @@ -15,17 +13,14 @@ def self.destroy(collection)
collection.destroy

# Remove The items just in case
item_identifiers.each do |item_identifier|
files = Proxyist.list(item_identifier)
files.each { |file| Proxyist.delete_object(item_identifier, file) }

Rails.logger.info "[DELETE] Removed entire item directory at [#{item_identifier}] #{files.size} files"
collection.items.each do |item|
count = Nabu::Catalog.instance.delete_item(item)
Rails.logger.info "[DELETE] Removed entire item directory at [#{item.identifier}] #{count} files"
end

files = Proxyist.list(collection.identifier)
files.each { |file| Proxyist.delete_object(collection.identifier, file) }
Rails.logger.info "[DELETE] Removed entire collection directory at [#{collection.identifier}] #{files.size} files"
rescue => e
count = Nabu::Catalog.instance.delete_collection(collection)
Rails.logger.info "[DELETE] Removed entire collection directory at [#{collection.identifier}] #{count} files"
rescue StandardError => e
return {
success: false,
messages: {
Expand Down
11 changes: 1 addition & 10 deletions app/services/essence_destruction_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,11 @@ class EssenceDestructionService
def self.destroy(essence)
result = true

response = Proxyist.delete_object(essence.item.full_identifier, essence.filename)
response = Nabu::Catalog.instance.delete_essence(essence)
result = false if response.code != '204'

Rails.logger.info "[DELETE] Removed essence file at [#{essence.item.full_identifier}:#{essence.filename}"

files = Proxyist.list(essence.item.full_identifier)

# NOTE: This logic might be broken as it deletes checksum files which cover more than a single essence
admin_files_regex = essence.filename.sub(/\..+?$/, '.*PDSC_ADMIN.*')
admin_files = files.grep(Regexp.new(admin_files_regex))
admin_files.each { |file| Proxyist.delete_object(essence.item.full_identifier, file) }

Rails.logger.info "[DELETE] Removed any admin files for essence at [#{admin_files_regex}]"

essence.destroy

if result
Expand Down
6 changes: 2 additions & 4 deletions app/services/item_destruction_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@ def self.destroy(item)

item.destroy

# remove directory and PDSC_ADMIN files on disk
files = Proxyist.list(item.full_identifier)
files.each { |file| Proxyist.delete_object(item.full_identifier, file) }
count = Nabu::Catalog.instance.delete_item(item)

Rails.logger.info "[DELETE] Removed entire item directory at [#{item.full_identifier}]: #{files.size} files"
Rails.logger.info "[DELETE] Removed entire item directory at [#{item.full_identifier}]: #{count} files"

if deleted_essence_count.positive?
response[:messages][:notice] = 'Item and all its contents removed permanently (no undo possible)'
Expand Down
2 changes: 1 addition & 1 deletion app/views/admin/file_processing/_paths.html.haml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
%th Directory
%tr
%td Archive
%td= "S3 via #{Rails.configuration.proxyist_url}"
%td= "S3 via #{Rails.configuration.catalog_bucket}"
%tr
%td Upload Location
%td
Expand Down
3 changes: 0 additions & 3 deletions app/views/api/v1/oni/object_meta.json.jb
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
# frozen_string_literal: truemeta

def id
p 'STIX'
p @is_item
p @data
@is_item ? repository_item_url(@data.collection, @data) : repository_collection_url(@data)
end

Expand Down
16 changes: 12 additions & 4 deletions app/views/items/show.html.haml
Original file line number Diff line number Diff line change
Expand Up @@ -280,11 +280,19 @@
%th Comments
%td= h(@item.admin_comment).gsub(/\n/, '<br/>').html_safe

- if admin_user_signed_in?
%tr
%th CAT-PDSC file
%td= link_to 'View item XML file', display_collection_item_path(@collection, @item)
%fieldset
%legend Metadata

%table.form.show
%tr
%th RO-Crate Metadata
%td
%ul
- if admin_user_signed_in?
%li= link_to 'S3', s3_rocrate_collection_item_path(@collection, @item)
%li= link_to 'Live (Public)', public_rocrate_collection_item_path(@collection, @item)
- if admin_user_signed_in?
%li= link_to 'Live (Private)', private_rocrate_collection_item_path(@collection, @item)

%fieldset
%legend Comments
Expand Down
42 changes: 1 addition & 41 deletions cdk/lib/app-stack.ts
Original file line number Diff line number Diff line change
Expand Up @@ -197,46 +197,6 @@ export class AppStack extends cdk.Stack {
});
cluster.addAsgCapacityProvider(capacityProvider);

// ////////////////////////
// Proxyist
// ////////////////////////

const proxyistTaskDefinition = new ecs.Ec2TaskDefinition(this, 'ProxyistTaskDefinition');
NagSuppressions.addResourceSuppressions(proxyistTaskDefinition, [
{ id: 'AwsSolutions-ECS2', reason: 'We are fine with env variables' },
]);
proxyistTaskDefinition.addContainer('ProxyistContainer', {
memoryLimitMiB: 256,
image: ecs.ContainerImage.fromAsset('..', {
file: 'docker/proxyist.Dockerfile',
}),
stopTimeout: cdk.Duration.seconds(5),
portMappings: [{ name: 'proxyist', containerPort: 3000 }],
logging: ecs.LogDrivers.awsLogs({ streamPrefix: 'ProxyistService' }),
environment: {
AWS_REGION: region,
BUCKET_NAME: catalogBucket.bucketName,
},
});
catalogBucket.grantReadWrite(proxyistTaskDefinition.taskRole);

new ecs.Ec2Service(this, 'ProxyistService', {
serviceName: 'proxyist',
cluster,
taskDefinition: proxyistTaskDefinition,
enableExecuteCommand: true,
serviceConnectConfiguration: {
logDriver: ecs.LogDrivers.awsLogs({
streamPrefix: 'sc-traffic',
}),
services: [
{
portMappingName: 'proxyist',
},
],
},
});

// ////////////////////////
// Viewer
// ////////////////////////
Expand Down Expand Up @@ -303,7 +263,7 @@ export class AppStack extends cdk.Stack {
RAILS_SERVE_STATIC_FILES: 'true', // TODO: do we need nginx in production??
RAILS_ENV: railsEnv,
OPENSEARCH_URL: `https://${searchDomain.domainEndpoint}`,
PROXYIST_URL: 'http://proxyist.nabu:3000',
NABU_CATALOG_BUCKET: catalogBucket.bucketName,
SENTRY_DSN: 'https://aa8f28b06df84f358949b927e85a924e@o4504801902985216.ingest.sentry.io/4504801910980608',
DOI_PREFIX: '10.26278',
DATACITE_BASE_URL: env === 'prod' ? 'https://api.datacite.org' : 'https://api.test.datacite.org',
Expand Down
9 changes: 4 additions & 5 deletions config/application.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,15 @@ class Application < Rails::Application
# Out Stuff
###################

ActiveSupport::Dependencies.autoload_paths << File::join( Rails.root, 'app', 'services')
ActiveSupport::Dependencies.autoload_paths << File::join( Rails.root, 'lib')
ActiveSupport::Dependencies.autoload_paths << Rails.root.join('app/services')
ActiveSupport::Dependencies.autoload_paths << Rails.root.join('lib')

config.viewer_url = '/viewer'

config.assets.precompile << 'delayed/web/application.css'

## Proxyist
config.proxyist_url = ENV.fetch('PROXYIST_URL')
throw 'Must set PROXYIST_URL' unless config.proxyist_url
config.catalog_bucket = ENV.fetch('NABU_CATALOG_BUCKET')
throw 'Must set NABU_CATALOG_BUCKET' unless config.catalog_bucket
end
end

Expand Down
Loading

0 comments on commit 152cc12

Please sign in to comment.