diff --git a/app/models/genome/external_resources.rb b/app/models/genome/external_resources.rb index 908d03e8..33f63d0b 100644 --- a/app/models/genome/external_resources.rb +++ b/app/models/genome/external_resources.rb @@ -55,10 +55,27 @@ def external_sra_to_biosamples(acc) 'XREF_LINK[DB[text() = "ENA-SAMPLE"]]/ID' ).map(&:text) elsif ng.xpath('//EXPERIMENT_SET').present? - ng.xpath( - '//EXPERIMENT_SET/EXPERIMENT/DESIGN/SAMPLE_DESCRIPTOR/' \ - 'IDENTIFIERS/PRIMARY_ID' - ).map(&:text) + # Unfortunately, we should prefer external IDs over primary IDs because + # NCBI E-Utils has a strange tendency to return the wrong biosample when + # using SRS... accessions. For example, see: + # + # - https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=biosample + # &id=SRS22988103&rettype=xml&retmode=text + # - https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=biosample + # &id=SAMN13193749&rettype=xml&retmode=text + # + # The first is using the accession SRS22988103 but it (wrongly) retrieves + # data for SAMN22988103 (= SRS11001113). Apparently the backend code + # simply strips off the alphabetic prefix and uses the numeric part + # without checking + sample_id = + ng.xpath( + '//EXPERIMENT_SET/EXPERIMENT/DESIGN/SAMPLE_DESCRIPTOR/IDENTIFIERS' + ) + biosample_id = + sample_id.xpath('EXTERNAL_ID[@namespace="BioSample"]').map(&:text) + biosample_id.present? ? biosample_id : + sample_id.xpath('PRIMARY_ID').map(&:text) else [] # Unknown XML specification end @@ -82,7 +99,7 @@ def external_biosample_hash_ebi(acc) ng = Nokogiri::XML(body) {}.tap do |hash| - h = {} + h = { api: 'EBI' } h[:title] = ng.xpath('//SAMPLE_SET/SAMPLE/TITLE').text h[:description] = ng.xpath('//SAMPLE_SET/SAMPLE/DESCRIPTION').text h[:attributes] = Hash[ @@ -103,7 +120,7 @@ def external_biosample_hash_ncbi(acc) ng = Nokogiri::XML(body) {}.tap do |hash| - h = {} + h = { api: 'NCBI' } h[:title] = ng.xpath('//BioSampleSet/BioSample/Description/Title').text h[:description] = ng.xpath('//BioSampleSet/BioSample/Description/Comment/Paragraph').text diff --git a/app/views/genomes/_samples.html.erb b/app/views/genomes/_samples.html.erb index b7ae05bc..f8d4d4f6 100644 --- a/app/views/genomes/_samples.html.erb +++ b/app/views/genomes/_samples.html.erb @@ -108,9 +108,16 @@
diff --git a/config/environments/development.rb b/config/environments/development.rb index 4fed91c5..0aff5ef9 100644 --- a/config/environments/development.rb +++ b/config/environments/development.rb @@ -62,7 +62,7 @@ config.file_watcher = ActiveSupport::EventedFileUpdateChecker # Turn off API checks for external services - config.bypass_external_apis = true + config.bypass_external_apis = !ENV['ALLOW_EXTERNAL_APIS'].present? # DataCite access config.datacite = {