Skip to content

Commit

Permalink
Strain parsing
Browse files Browse the repository at this point in the history
- Improve strain parsing
- See also #28
- Implement check for Recommendation 19
  • Loading branch information
lmrodriguezr committed Jan 4, 2024
1 parent 066efcf commit 734d912
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 40 deletions.
46 changes: 13 additions & 33 deletions app/helpers/names_helper.rb
Original file line number Diff line number Diff line change
@@ -1,37 +1,17 @@
module NamesHelper
def strain_html(str, opts = {})
def strain_html(name, opts = {})
opts[:ext] = true if opts[:ext].nil?

ext = opts[:ext] ? '<sup class="fas fa-external-link-alt "> </sup>' : ''
collections = {
DSM: 'https://www.dsmz.de/collection/catalogue/details/culture/DSM-',
JCM: 'https://www.jcm.riken.jp/cgi-bin/jcm/jcm_number?JCM=',
KCTC: 'https://kctc.kribb.re.kr/collection/view?sn=',
ATCC: 'https://www.atcc.org/products/',
BCRC: 'https://catalog.bcrc.firdi.org.tw/BcrcContent?bid=',
LMG: 'https://bccm.belspo.be/catalogues/lmg-strain-details?NUM=',
NBRC: 'https://www.nite.go.jp/nbrc/catalogue/' \
'NBRCCatalogueDetailServlet?ID=IFO&CAT=',
IFO: 'https://www.nite.go.jp/nbrc/catalogue/' \
'NBRCCatalogueDetailServlet?ID=IFO&CAT=',
NCTC: 'https://www.culturecollections.org.uk/products/bacteria/' \
'detail.jsp?collection=nctc&refId=NCTC+',
CIP: 'https://catalogue-crbip.pasteur.fr/' \
'fiche_catalogue.xhtml?crbip=CIP%20',
PCC: 'https://catalogue-crbip.pasteur.fr/' \
'fiche_catalogue.xhtml?crbip=PCC%20',
CCUG: 'https://www.ccug.se/strain?id=',
NRRL: 'https://nrrl.ncaur.usda.gov/cgi-bin/usda/prokaryote/' \
'report.html?nrrlcodes='
}
o = sanitize(str).gsub(/\s*=\s*/, ' = ')
collections.each do |k, v|
o = o.gsub(
/(?<=Strain: | = |^)(#{k})[ -]([\d\-A-Za-z]+)(?= = |$)/,
"<a href='#{v}\\2' target='_blank'>\\1 \\2 #{ext}</a>"
)
end
o.html_safe
ext = opts[:ext] ? '<sup class="fas fa-external-link-alt "> </sup>' : ''
parsed = name.type_is_strain? ? :type_strain_parsed : :genome_strain_parsed
name.send(parsed).map do |str|
if str.is_a? Hash
'<a href="%s" target="_blank">%s %s</a>' % [
str[:url], sanitize(str[:accession]), ext
]
else
str
end
end.join(' = ').html_safe
end

def link_to_name_type(name)
Expand All @@ -51,7 +31,7 @@ def link_to_name_type(name)
fa_icon('external-link-alt', class: 'ml-1')
end
elsif name.type_is_strain?
strain_html(name.type_text)
content_tag(:span, 'Strain: ') + strain_html(name)
else
name.type_text
end
Expand Down
59 changes: 59 additions & 0 deletions app/models/name.rb
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,30 @@ def type_material_hash
def type_material_name(type)
type_material_hash[type.to_sym]&.[](:name)
end

def culture_collections
{
DSM: 'https://www.dsmz.de/collection/catalogue/details/culture/DSM-%s',
JCM: 'https://www.jcm.riken.jp/cgi-bin/jcm/jcm_number?JCM=%s',
KCTC: 'https://kctc.kribb.re.kr/collection/view?sn=%s',
ATCC: 'https://www.atcc.org/products/%s',
BCRC: 'https://catalog.bcrc.firdi.org.tw/BcrcContent?bid=%s',
LMG: 'https://bccm.belspo.be/catalogues/lmg-strain-details?NUM=%s',
NBRC: 'https://www.nite.go.jp/nbrc/catalogue/' \
'NBRCCatalogueDetailServlet?ID=IFO&CAT=%s',
IFO: 'https://www.nite.go.jp/nbrc/catalogue/' \
'NBRCCatalogueDetailServlet?ID=IFO&CAT=%s',
NCTC: 'https://www.culturecollections.org.uk/products/bacteria/' \
'detail.jsp?collection=nctc&refId=NCTC+%s',
CIP: 'https://catalogue-crbip.pasteur.fr/' \
'fiche_catalogue.xhtml?crbip=CIP%%20%s',
PCC: 'https://catalogue-crbip.pasteur.fr/' \
'fiche_catalogue.xhtml?crbip=PCC%%20%s',
CCUG: 'https://www.ccug.se/strain?id=%s',
NRRL: 'https://nrrl.ncaur.usda.gov/cgi-bin/usda/prokaryote/' \
'report.html?nrrlcodes=%s'
}
end
end

# ============ --- STATUS --- ============
Expand Down Expand Up @@ -663,6 +687,41 @@ def type_genome
end
end

def type_strain_parsed
return {} unless type_is_strain?
strain_parsed(type_accession)
end

def genome_strain_parsed
return {} unless genome_strain?
strain_parsed(genome_strain)
end

def strain_parsed(strain)
strain.split(/ *= */).map do |str|
parts = str.split(/[ -]+/, 2)
coll = parts.count == 2 ? parts[0].upcase.to_sym : nil

if url_base = self.class.culture_collections[coll]
{
collection: coll,
accession: parts.join(' '),
url: url_base % parts[1]
}
else
str
end
end
end

def genome_strain_collections
return unless genome_strain?

genome_strain_parsed
.map { |i| i.is_a?(Hash) ? i[:collection] : nil }
.compact.uniq.count
end

def expected_type_rank
return nil if !rank? || %w[species subspecies domain].include?(rank)
return 'species' if rank == 'genus'
Expand Down
10 changes: 9 additions & 1 deletion app/models/name/quality_checks.rb
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,12 @@ class QcWarning
# - Rule 18c requires the implementation of neotype designations
# [TODO: issue #12] no checks are to be implemented
# - Rule 19 is implied by the SeqCode Registry structure
# - Recommendation 19 [TODO: Checklist, TODO: issue #28]:
# - Recommendation 19:
unavailable_reference_strain: {
message: 'If isolated, reference strains should be submitted to two ' \
'culture collections',
recommendations: %w[19]
}.merge(@@link_to_edit_type),
# When a strain belonging to a taxon named under the SeqCode is
# isolated, a reference strain should be designated and submitted to two
# culture collections in different countries. Reference strains have no
Expand Down Expand Up @@ -772,6 +777,9 @@ def qc_warnings

if type_is_genome?
@qc_warnings.add(:ambiguous_type_genome) # check
if genome_strain? && genome_strain_collections < 2
@qc_warnings.add(:unavailable_reference_strain)
end
@qc_warnings.add(:missing_genome_kind) unless type_genome.kind.present?
@qc_warnings.add(:sequence_not_found) if type_genome.auto_failed.present?

Expand Down
2 changes: 1 addition & 1 deletion app/views/names/_name_pdf.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
<div class="avoid-page-break">
<dt>Reference Strain</dt>
<dd>
<%= strain_html(name.genome_strain, ext: false) %>
<%= strain_html(name, ext: false) %>
</dd>
</div>
<% end %>
Expand Down
8 changes: 4 additions & 4 deletions app/views/names/_nomenclature.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,13 @@
<dt>
<%= fa_icon('flask') %> Reference strain
<%= help_message('Reference Strain') do %>
This strain indicates the source of the genome used as type
material. However, the strain itself is not considered
type material and does not have standing in nomenclature.
This cultured strain indicates the source of the genome used as type
material. However, the strain itself is not considered type material and
does not have standing in nomenclature.
<% end %>
</dt>
<dd>
<%= strain_html(@name.genome_strain) %>
<%= strain_html(@name) %>
</dd>
<% end %>
Expand Down
2 changes: 1 addition & 1 deletion app/views/names/edit_type.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
<div id="name_genome_strain_container">
<%=
f.input(
:genome_strain, label: 'Genome strain (an isolate)',
:genome_strain, label: 'Genome strain (an isolate or other culture)',
hint: 'Strain associated to the type genome. ' \
'Use "=" to separate multiple equivalent strain accessions.'
)
Expand Down

0 comments on commit 734d912

Please sign in to comment.