Skip to content

Commit

Permalink
Added ChEBI mappings, updated ui output to show original names of ent…
Browse files Browse the repository at this point in the history
…ries matched via normalized shorthand name in other databases.
  • Loading branch information
nilshoffmann committed Feb 9, 2024
1 parent e73f11f commit 60e8b58
Show file tree
Hide file tree
Showing 10 changed files with 234 additions and 17 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>org.lifs-tools</groupId>
<artifactId>lifs-goslin-webapp</artifactId>
<version>2.2.0</version>
<version>2.2.1</version>
<packaging>jar</packaging>
<description>Grammar of Succinct Lipid Nomenclature - Webapplication and REST service for parsing, validation and normalization of lipid names.</description>
<name>LIFS Goslin webapp.</name>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,12 @@ public RedirectView validate(@ModelAttribute("validationFileRequest") Validation
RedirectAttributes redirectAttributes, HttpServletRequest request,
HttpSession session, Principal principal) throws IOException {
ValidationRequest validationRequest = new ValidationRequest();
validationRequest.setLipidNames(new String(validationFileRequest.getFile().getBytes(), StandardCharsets.UTF_8).lines().filter((t) -> {
return !t.isEmpty();
}).map((t) -> {
return t.trim().replaceAll("\\s+", " ");
}).collect(Collectors.toList()));
validationRequest.setLipidNames(new String(validationFileRequest.getFile().getBytes(), StandardCharsets.UTF_8).
lines().
filter((t) -> !t.isEmpty()).
map((t) -> t.trim().replaceAll("\\s+", " ")).
collect(Collectors.toList())
);
redirectAttributes.addFlashAttribute("validationRequest", validationRequest);
return new RedirectView("/validate", true);
}
Expand Down Expand Up @@ -310,6 +311,9 @@ private String toTable(ValidationResults vr) {
m.put("Swiss Lipids References", t.getSwissLipidsReferences().stream().flatMap(Collection::stream).map((r) -> {
return r.getDatabaseUrl() + r.getDatabaseElementId();
}).collect(Collectors.joining(" | ")));
m.put("ChEBI References", t.getChebiReferences().stream().flatMap(Collection::stream).map((r) -> {
return r.getDatabaseUrl();
}).collect(Collectors.joining(" | ")));
m.put("Functional Class Abbr", "[" + lclass.getClassName() + "]");
m.put("Functional Class Synonyms", "[" + lclass.getSynonyms().stream().collect(Collectors.joining(", ")) + "]");
LipidSpeciesInfo info = t.getLipidAdduct().getLipid().getInfo();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
* Copyright 2020 nils.hoffmann.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.lifstools.jgoslin.webapp.domain;

import com.fasterxml.jackson.annotation.JsonProperty;

/**
* An external database reference in ChEBI for a lipid encodes the link to
* identify the referenced lipid, contains its native abbreviation and name, the
* database's element id and the name as normalized by Goslin / PaLiNom.
*
* @author nils.hoffmann
*/
public class ExternalChebiDatabaseReference {

/*
ID COMPOUND_ID TYPE SOURCE NAME ADAPTED LANGUAGE
*/
private final String BASE_URL = "https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:";

private String databaseUrl;
private String id;
private String compoundId;
private String type;
private String source;
private String name;
private String adapted;
private String language;

public ExternalChebiDatabaseReference() {
}

public ExternalChebiDatabaseReference(String id, String compoundId, String type, String source, String name, String adapted, String language) {
this.id = id;
this.compoundId = compoundId;
this.type = type;
this.source = source;
this.name = name;
this.adapted = adapted;
this.language = language;
}

public String getDatabaseUrl() {
return BASE_URL + this.compoundId;
}

@JsonProperty("ID")
public String getId() {
return id;
}

public void setId(String id) {
this.id = id;
}

@JsonProperty("COMPOUND_ID")
public String getCompoundId() {
return compoundId;
}

public void setCompoundId(String compoundId) {
this.compoundId = compoundId;
}

@JsonProperty("TYPE")
public String getType() {
return type;
}

public void setType(String type) {
this.type = type;
}

@JsonProperty("SOURCE")
public String getSource() {
return source;
}

public void setSource(String source) {
this.source = source;
}

@JsonProperty("NAME")
public String getName() {
return name;
}

public void setName(String name) {
this.name = name;
}

@JsonProperty("ADAPTED")
public String getAdapted() {
return adapted;
}

public void setAdapted(String adapted) {
this.adapted = adapted;
}

@JsonProperty("LANGUAGE")
public String getLanguage() {
return language;
}

public void setLanguage(String language) {
this.language = language;
}

@Override
public String toString() {
return "ExternalChebiDatabaseReference{" + "databaseUrl=" + getDatabaseUrl() + ", id=" + id + ", compoundId=" + compoundId + ", type=" + type + ", source=" + source + ", name=" + name + ", adapted=" + adapted + ", language=" + language + '}';
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ public static Grammar forName(String name) {

private Optional<Collection<ExternalDatabaseReference>> swissLipidsReferences;

private Optional<Collection<ExternalDatabaseReference>> chebiReferences;

public static String toFunctionalGroupString(LipidAdduct la, FattyAcid fa) {
return fa.getFunctionalGroups().entrySet().stream().map((entry) -> {
return entry.getValue().stream().map((functionalGroup) -> {
Expand All @@ -122,7 +124,7 @@ public ValidationResult() {
public ValidationResult(String lipidName, Grammar grammar, LipidAdduct lipidAdduct, Map<String, Integer> functionalGroupCounts, String normalizedName, String lipidMapsCategory, String lipidMapsClass,
String lipidCategoryName, String lipidClassName, String lipidExtendedSpeciesName,
String lipidSpeciesName, String lipidMolecularSpeciesName, String lipidSnPositionName, String lipidStructureDefinedName, String lipidFullStructureName, String lipidCompleteStructureName,
Double mass, String sumFormula, Optional<Collection<ExternalDatabaseReference>> lipidMapsReferences, Optional<Collection<ExternalDatabaseReference>> swissLipidsReferences) {
Double mass, String sumFormula, Optional<Collection<ExternalDatabaseReference>> lipidMapsReferences, Optional<Collection<ExternalDatabaseReference>> swissLipidsReferences, Optional<Collection<ExternalDatabaseReference>> chebiReferences) {
this.lipidName = lipidName;
this.grammar = grammar;
this.lipidAdduct = lipidAdduct;
Expand All @@ -144,6 +146,7 @@ public ValidationResult(String lipidName, Grammar grammar, LipidAdduct lipidAddu
this.sumFormula = sumFormula;
this.lipidMapsReferences = lipidMapsReferences;
this.swissLipidsReferences = swissLipidsReferences;
this.chebiReferences = chebiReferences;
}

public String getLipidName() {
Expand Down Expand Up @@ -313,6 +316,14 @@ public Optional<Collection<ExternalDatabaseReference>> getSwissLipidsReferences(
public void setSwissLipidsReferences(Optional<Collection<ExternalDatabaseReference>> swissLipidsReferences) {
this.swissLipidsReferences = swissLipidsReferences;
}

public Optional<Collection<ExternalDatabaseReference>> getChebiReferences() {
return chebiReferences;
}

public void setChebiReferences(Optional<Collection<ExternalDatabaseReference>> chebiReferences) {
this.chebiReferences = chebiReferences;
}

@Override
public String toString() {
Expand All @@ -333,7 +344,9 @@ public String toString() {
+ ", mass=" + mass
+ ", sumFormula=" + sumFormula
+ ", lipidMapsReferences=" + lipidMapsReferences
+ ", swissLipidsReferences=" + swissLipidsReferences + '}';
+ ", swissLipidsReferences=" + swissLipidsReferences
+ ", chebiReferences=" + chebiReferences
+ '}';
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
import java.util.stream.Collectors;
import org.apache.commons.collections4.MultiValuedMap;
import org.apache.commons.collections4.multimap.ArrayListValuedHashMap;
import org.lifstools.jgoslin.domain.LipidAdduct;
import org.lifstools.jgoslin.parser.GoslinParser;
import org.lifstools.jgoslin.webapp.domain.ExternalChebiDatabaseReference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.core.io.ClassPathResource;
Expand All @@ -44,32 +47,68 @@ public class ExternalDatabaseMappingLoader {

private final MultiValuedMap<String, ExternalDatabaseReference> lipidMapsReferences;
private final MultiValuedMap<String, ExternalDatabaseReference> swissLipidsReferences;
private final MultiValuedMap<String, ExternalDatabaseReference> chebiReferences;

public ExternalDatabaseMappingLoader() {
this.lipidMapsReferences = new ArrayListValuedHashMap<>();
this.swissLipidsReferences = new ArrayListValuedHashMap<>();
this.chebiReferences = new ArrayListValuedHashMap<>();
int lipidMapsEntries = 0;
for (ExternalDatabaseReference edr : loadObjectList(ExternalDatabaseReference.class, "lipidmaps-normalized.tsv", '\t')) {
this.lipidMapsReferences.put(edr.getNormalizedName(), edr);
this.lipidMapsReferences.put(edr.getNativeAbbreviation(), edr);
this.lipidMapsReferences.put(edr.getNativeName(), edr);
// this.lipidMapsReferences.put(edr.getNativeAbbreviation(), edr);
// this.lipidMapsReferences.put(edr.getNativeName(), edr);
lipidMapsEntries++;
}
log.info("Loaded {} records for Lipid MAPS!", lipidMapsEntries);
int swissLipidsEntries = 0;
for (ExternalDatabaseReference edr : loadObjectList(ExternalDatabaseReference.class, "swiss-lipids-normalized.tsv", '\t')) {
this.swissLipidsReferences.put(edr.getNormalizedName(), edr);
this.swissLipidsReferences.put(edr.getNativeAbbreviation(), edr);
this.swissLipidsReferences.put(edr.getNativeName(), edr);
// this.swissLipidsReferences.put(edr.getNativeAbbreviation(), edr);
// this.swissLipidsReferences.put(edr.getNativeName(), edr);
swissLipidsEntries++;
}

log.info("Loaded {} records for Swiss Lipids!", swissLipidsEntries);

int chebiEntries = 0;
int totalChebiEntries = 0;
GoslinParser parser = new GoslinParser();
for (ExternalChebiDatabaseReference ecdr : loadObjectList(ExternalChebiDatabaseReference.class, "names-chebi-Jan-18-2024.tsv", '\t')) {
totalChebiEntries++;
if ("SYNONYM".equals(ecdr.getType())) {
switch(ecdr.getSource()) {
case "ChEBI":
case "LIPID MAPS":
case "SUBMITTER":
case "MetaCyc":
log.debug("Processing entry {}", ecdr.getName());
LipidAdduct lipidAdduct = parser.parse(ecdr.getName(), parser.newEventHandler(), false);
if (lipidAdduct != null) {
String lipidName = lipidAdduct.getLipidString();
log.debug("Detected valid lipid name: {} for reference: {}", lipidName, ecdr);
ExternalDatabaseReference edr = new ExternalDatabaseReference(
ecdr.getDatabaseUrl(),
ecdr.getCompoundId(),
lipidAdduct.getLipidLevel().name(),
ecdr.getName(),
ecdr.getName(),
lipidName
);
log.debug("Adding external database reference: {}", edr);
this.chebiReferences.put(edr.getNormalizedName(), edr);
chebiEntries++;
}
break;
}
}
}
log.info("Loaded {}/{} records for ChEBI!", chebiEntries, totalChebiEntries);
}

public Optional<Collection<ExternalDatabaseReference>> findSwissLipidsEntry(String... names) {
List<String> namesList = Arrays.asList(names);
return Optional.of(namesList.stream().filter((t) -> t!=null).map((t) -> {
return Optional.of(namesList.stream().filter((t) -> t != null).map((t) -> {
return this.swissLipidsReferences.get(t);
}).flatMap(Collection::stream).filter((t) -> {
return t != null;
Expand All @@ -78,13 +117,22 @@ public Optional<Collection<ExternalDatabaseReference>> findSwissLipidsEntry(Stri

public Optional<Collection<ExternalDatabaseReference>> findLipidMapsEntry(String... lipidMapsNames) {
List<String> namesList = Arrays.asList(lipidMapsNames);
return Optional.of(namesList.stream().filter((t) -> t!=null).map((t) -> {
return Optional.of(namesList.stream().filter((t) -> t != null).map((t) -> {
return this.lipidMapsReferences.get(t);
}).flatMap(Collection::stream).filter((t) -> {
return t != null;
}).distinct().collect(Collectors.toList()));
}

public Optional<Collection<ExternalDatabaseReference>> findChebiEntry(String... names) {
List<String> namesList = Arrays.asList(names);
return Optional.of(namesList.stream().filter((t) -> t != null).map((t) -> {
return this.chebiReferences.get(t);
}).flatMap(Collection::stream).filter((t) -> {
return t != null;
}).distinct().collect(Collectors.toList()));
}

protected final <T> List<T> loadObjectList(Class<T> type, String fileName, char columnSeparator) {
try {
CsvSchema bootstrapSchema = CsvSchema.emptySchema().withHeader().withColumnSeparator(columnSeparator);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ private ValidationResult createValidationResult(LipidAdduct la, String lipidName
result.setNormalizedName(normalizedName);
result.setLipidMapsReferences(dbLoader.findLipidMapsEntry(normalizedName, lipidName, lipidSpeciesName));
result.setSwissLipidsReferences(dbLoader.findSwissLipidsEntry(normalizedName, lipidName, lipidSpeciesName));
result.setChebiReferences(dbLoader.findChebiEntry(normalizedName, lipidName, lipidSpeciesName));
} catch (RuntimeException re) {
log.debug("Error while trying to resolve database hits for {}!", lipidName);
}
Expand Down
1 change: 1 addition & 0 deletions src/main/resources/application-dev.properties
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ logging.level.org.springframework.web=TRACE
logging.level.org.springframework.ldap=DEBUG
logging.level.org.lifstools.jgoslin.webapp.services.LipidNameValidationService=WARN
logging.level.org.lifstools=WARN
logging.level.org.lifstools.jgoslin.webapp.services.ExternalDatabaseMappingLoader=INFO

spring.thymeleaf.cache=false

Expand Down
3 changes: 3 additions & 0 deletions src/main/resources/names-chebi-Jan-18-2024.tsv
Git LFS file not shown
7 changes: 6 additions & 1 deletion src/main/resources/news.properties
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,9 @@ news.news[10].content = <p>This release fixes the REST API description. To track
news.news[11].link = https://github.com/lifs-tools/goslin-webapp/releases/tag/v2.2.0
news.news[11].date = v2.2.0 - October 24th, 2023
news.news[11].title = Update to Goslin 2.2.0
news.news[11].content = <p>This release updates the internal implementation to use Goslin 2.2.0. This release improves support for mediators, gangliosides, HMDB name support and parsing of adducts with labeled isotopes. It also fixes wrong lyso classifications for SPB, SPBP, LHexCer and LSM. The web application now outputs the names of each lipid on all supported levels of the shorthand name hierarchy, both for tabular file export and REST API. To track progress of Goslin updates, please check our jgoslin <a href="https://github.com/lifs-tools/jgoslin/releases" target="_blank">release page</a> and <a href="https://github.com/lifs-tools/goslin/issues" target="_blank">GitHub issues page</a>.</p>
news.news[11].content = <p>This release updates the internal implementation to use Goslin 2.2.0. This release improves support for mediators, gangliosides, HMDB name support and parsing of adducts with labeled isotopes. It also fixes wrong lyso classifications for SPB, SPBP, LHexCer and LSM. The web application now outputs the names of each lipid on all supported levels of the shorthand name hierarchy, both for tabular file export and REST API. To track progress of Goslin updates, please check our jgoslin <a href="https://github.com/lifs-tools/jgoslin/releases" target="_blank">release page</a> and <a href="https://github.com/lifs-tools/goslin/issues" target="_blank">GitHub issues page</a>.</p>

news.news[12].link = https://github.com/lifs-tools/goslin-webapp/releases/tag/v2.2.1
news.news[12].date = v2.2.1 - February 9th, 2024
news.news[12].title = Integration of ChEbi
news.news[12].content = <p>This release adds mappings of lipid names to ChEbi entries (version as of Jan 18th, 2024). Please note that the user interface will now return original names of matched lipid shorthand names in other databases, not the shorthand name. The REST response and tabular output will contain direct URLs to the corresponding entries in the external databases. To track progress of Goslin updates, please check our jgoslin <a href="https://github.com/lifs-tools/jgoslin/releases" target="_blank">release page</a> and <a href="https://github.com/lifs-tools/goslin/issues" target="_blank">GitHub issues page</a>.</p>
Loading

0 comments on commit 60e8b58

Please sign in to comment.