From c925e4f163c5ee86e94647cc8b7f2b7bff66b75b Mon Sep 17 00:00:00 2001 From: John Ferlito Date: Sun, 26 May 2024 21:41:00 +1000 Subject: [PATCH] Switch to v4 DOI schema and REST/JSON API --- Gemfile | 4 +- app/models/collection.rb | 5 -- app/models/essence.rb | 5 -- app/models/identifiable_by_doi.rb | 112 ++++++++++++++++------------ app/models/item.rb | 5 -- app/services/doi_minting_service.rb | 48 ++++++------ cdk/lib/app-stack.ts | 2 +- 7 files changed, 88 insertions(+), 93 deletions(-) diff --git a/Gemfile b/Gemfile index 9cbcbd27..1b260044 100644 --- a/Gemfile +++ b/Gemfile @@ -78,7 +78,7 @@ gem 'dotenv-rails', require: 'dotenv/load' # , groups: [:development, :test] # L # Views gem 'haml-rails', '~> 2.0' # We use HAML for templates instead of erb -gem 'jb' # for json templates, simploer and faster than jbuilder +gem 'jb' # for json templates, simpler and faster than jbuilder gem 'kaminari' # Pagination gem 'oai' # OAI-PMH gem 'rexml' # OAI needs it https://github.com/code4lib/ruby-oai/issues/68 @@ -113,9 +113,9 @@ gem 'graphql' # Search gem 'faraday_middleware-aws-sigv4' gem 'opensearch-ruby' +gem 'rails-reverse-proxy' # so we can get to opensearch dashboard gem 'searchjoy' gem 'searchkick' -gem 'rails-reverse-proxy' # so we can get to opensearch dashboard # Other gem 'curb' # Download CSVs for import diff --git a/app/models/collection.rb b/app/models/collection.rb index 3472f95d..6e741422 100644 --- a/app/models/collection.rb +++ b/app/models/collection.rb @@ -297,11 +297,6 @@ def xml_key "paradisec.org.au/collection/#{identifier}" end - # for DOI relationship linking: nil <- Collection <- Item <- Essence - def parent - nil - end - def citation cite = '' cite += "#{collector.name} (collector)" if collector diff --git a/app/models/essence.rb b/app/models/essence.rb index 09d2ff5e..06f90613 100644 --- a/app/models/essence.rb +++ b/app/models/essence.rb @@ -114,11 +114,6 @@ def full_path "#{item.full_path}/essences/#{id}" end - # for DOI relationship linking: nil <- Collection <- Item <- Essence - def parent - item - end - def self.ransackable_attributes(_ = nil) %w[bitrate channels created_at derived_files_generated doi duration filename fps id item_id mimetype samplerate size updated_at] end diff --git a/app/models/identifiable_by_doi.rb b/app/models/identifiable_by_doi.rb index 72e2c4c3..60550583 100644 --- a/app/models/identifiable_by_doi.rb +++ b/app/models/identifiable_by_doi.rb @@ -1,57 +1,71 @@ module IdentifiableByDoi - def to_doi_xml - xml = ::Builder::XmlMarkup.new - xml.tag! 'resource', schema_definitions do - xml.tag! 'creators' do - xml.tag! 'creator' do - xml.tag! 'creatorName', collector_name - end - end - xml.tag! 'identifier', '10.4225/72/bcndhj78437hjk', identifierType: 'DOI' - xml.tag! 'titles' do - xml.tag! 'title', title - end - xml.tag! 'publisher', 'PARADISEC' - # Items are the only type which contain the true publication date, so prefer that, but fall back to the date it was added to Nabu - publicationYear = case - when is_a?(Item) - originated_on || created_at - when is_a?(Essence) - item.originated_on || created_at - else - created_at - end.year - - xml.tag! 'publicationYear', publicationYear - - xml.tag! 'contributors' do - xml.tag! 'contributor', contributorType: 'DataCollector' do - xml.tag! 'contributorName', collector_name - end - - if respond_to?(:university_name) && university_name.present? - xml.tag! 'contributor', contributorType: 'DataCollector' do - xml.tag! 'contributorName', university_name - end - end - end - - # parent should exist for everything except Collection - if parent.present? - xml.tag! 'relatedIdentifiers' do - xml.tag! 'relatedIdentifier', parent.doi, relatedIdentifierType: 'DOI', relationType: is_a?(Item) ? 'IsPartOf' : 'IsSourceOf' - end - end + def to_doi_json(prefix) + # NOTE: Items are the only type which contain the true publication date, so prefer that, but fall back to the date it was added to Nabu + publication_date = if is_a?(Item) + originated_on || created_at + elsif is_a?(Essence) + item.originated_on || created_at + else + created_at + end + + parent = if is_a?(Item) + collection + elsif is_a?(Essence) + item + end + + resource_type_general = if is_a?(Item) + 'Dataset' + elsif is_a?(Essence) + essence_resource_type + else + 'Collection' + end + + contributors = [{ name: collector_name, contributorType: 'DataCollector' }] + contributors.push({ name: university_name, contributorType: 'DataCollector' }) if respond_to?(:university_name) + + attributes = { + event: 'publish', + prefix:, + creators: [{ name: collector_name }], + titles: [{ title: }], + publisher: 'PARADISEC', + publicationYear: publication_date.year, + contributors:, + url: full_path, + types: { + resourceTypeGeneral: resource_type_general + } + } + + # NOTE: parent should exist for everything except Collection + if parent + attributes['relatedIdentifiers'] = [{ + relatedIdentifier: parent.doi, + relatedIdentifierType: 'DOI', + relationType: is_a?(Item) ? 'IsPartOf' : 'IsSourceOf' + }] end + + { + data: { + type: 'dois', + attributes: + } + }.to_json end private - def schema_definitions - { - 'xmlns' => 'http://datacite.org/schema/kernel-3', - 'xsi:schemaLocation' => 'http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd', - 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance', - } + def essence_resource_type + case Essence.first.mimetype.split('/')[0] + when 'audio' then 'Sound' + when 'video' then 'Audiovisual' + when 'image' then 'Image' + when 'text' then 'Text' + else 'Other' + end end end diff --git a/app/models/item.rb b/app/models/item.rb index 9ef71e27..249e1481 100644 --- a/app/models/item.rb +++ b/app/models/item.rb @@ -186,11 +186,6 @@ def full_path "http://catalog.paradisec.org.au/collections/#{collection.identifier}/items/#{identifier}" end - # for DOI relationship linking: nil <- Collection <- Item <- Essence - def parent - collection - end - def xml_key "paradisec.org.au/item/#{full_identifier}" end diff --git a/app/services/doi_minting_service.rb b/app/services/doi_minting_service.rb index e091cad6..41d717a4 100644 --- a/app/services/doi_minting_service.rb +++ b/app/services/doi_minting_service.rb @@ -4,9 +4,9 @@ class DoiMintingService def initialize(dry_run) @base_url = ENV.fetch('DATACITE_BASE_URL') - @prefix = ENV.fetch('DOI_PREFIX') @user = ENV.fetch('DATACITE_USER') @pass = ENV.fetch('DATACITE_PASS') + @prefix = ENV.fetch('DOI_PREFIX') @dry_run = dry_run end @@ -16,22 +16,16 @@ def mint_doi(doiable) return end - response = post_to_mds :metadata, doiable.to_doi_xml - - if response.code != '201' + response = post '/dois', doiable.to_doi_json(@prefix) + unless response Rails.logger.error "DOI minting failed for #{doiable.full_path}" return end - doi = response.body[/\AOK \(([^\)]+)\)\z/, 1] - - response = post_to_mds :doi, "doi=#{doi}\nurl=#{doiable.full_path}" - if response.code != '201' - Rails.logger.error "DOI minting failed for #{doiable.full_path}" - return false - end + doi = response['data']['id'] Rails.logger.info "DOI #{doi} minted for #{doiable.full_path}" + doiable.doi = doi doiable.save end @@ -39,27 +33,29 @@ def mint_doi(doiable) private def url_for(action) - "#{@base_url}/#{action}/#{@prefix}" if %i[metadata doi].include? action + "#{@base_url}#{action}" end - def content_type_for(action) - case action - when :metadata - 'application/xml;charset=UTF-8' - when :doi - 'text/plain;charset=UTF-8' - end - end - - def post_to_mds(action, body) + def post(action, body) uri = URI.parse url_for(action) connection = Net::HTTP.new uri.host, uri.port - connection.use_ssl = true + connection.use_ssl = (uri.scheme == 'https') - request = Net::HTTP::Put.new uri.request_uri - request['Content-Type'] = content_type_for(action) + request = Net::HTTP::Post.new uri.request_uri + request['Content-Type'] = 'application/json' request.basic_auth @user, @pass request.body = body - connection.request(request) + + response = connection.request(request) + + if response.code != '201' + Sentry.capture_message 'DOI creation failed', extra: { code: response.code, body: response.body } + Rails.logger.error "DOI code: #{response.code}" + Rails.logger.error "DOI response: #{response.body}" + + return + end + + JSON.parse(response.body) end end diff --git a/cdk/lib/app-stack.ts b/cdk/lib/app-stack.ts index 06c88e99..86eb145f 100644 --- a/cdk/lib/app-stack.ts +++ b/cdk/lib/app-stack.ts @@ -294,7 +294,7 @@ export class AppStack extends cdk.Stack { PROXYIST_URL: 'http://proxyist.nabu:3000', SENTRY_DSN: 'https://aa8f28b06df84f358949b927e85a924e@o4504801902985216.ingest.sentry.io/4504801910980608', DOI_PREFIX: '10.26278', - DATACITE_BASE_URL: 'https://mds.datacite.org', + DATACITE_BASE_URL: env === 'prod' ? 'https://api.datacite.org' : 'https://api.test.datacite.org', AWS_REGION: region, }, secrets: {