diff --git a/pom.xml b/pom.xml index d2d59c4b..f6ee496d 100644 --- a/pom.xml +++ b/pom.xml @@ -5,15 +5,15 @@ baseCode baseCode baseCode - 1.1.21 + 1.1.22 2003 - https://github.com/pavlidisLab/basecode + https://github.com/PavlidisLab/baseCode - UBC Michael Smith Laboratories - http://www.msl.ubc.ca/ + Pavlidis Lab + https://pavlab.msl.ubc.ca/ jar @@ -27,7 +27,7 @@ The Apache Software License, Version 2.0 - http://www.apache.org/licenses/LICENSE-2.0.txt + https://www.apache.org/licenses/LICENSE-2.0.txt repo @@ -81,7 +81,7 @@ commons-io commons-io - 2.15.1 + 2.16.1 org.apache.commons @@ -144,45 +144,7 @@ org.apache.jena jena-core - 2.7.4 - - - log4j - log4j - - - org.slf4j - slf4j-log4j12 - - - - - - - - org.apache.jena - jena-larq - 1.0.0-incubating - - - log4j - log4j - - - org.slf4j - slf4j-log4j12 - - - icu4j - com.ibm.icu - - - - - org.apache.jena - jena-arq - 2.9.4 + 2.13.0 log4j @@ -264,16 +226,28 @@ 4.11.0 test + + org.assertj + assertj-core + 3.25.3 + test + + + org.apache.logging.log4j + log4j-api + 2.23.1 + test + org.apache.logging.log4j log4j-core - 2.21.1 + 2.23.1 test org.apache.logging.log4j log4j-slf4j-impl - 2.21.1 + 2.23.1 test @@ -341,7 +315,7 @@ com.amashchenko.maven.plugin gitflow-maven-plugin - 1.16.0 + 1.21.0 false @@ -444,7 +418,7 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.1.2 + 3.2.2 diff --git a/src/ontology.properties b/src/basecode.properties similarity index 95% rename from src/ontology.properties rename to src/basecode.properties index 2610a9b1..61406ce7 100644 --- a/src/ontology.properties +++ b/src/basecode.properties @@ -37,4 +37,7 @@ url.fmaOntology=http://purl.obolibrary.org/obo/fma.owl ontology.index.dir= ontology.cache.dir= -ncbo.api.key= \ No newline at end of file +ncbo.api.key= + +rserve.start.command= +rlibpath= \ No newline at end of file diff --git a/src/ubic/basecode/ontology/jena/AbstractOntologyMemoryBackedService.java b/src/ubic/basecode/ontology/jena/AbstractOntologyMemoryBackedService.java deleted file mode 100644 index 32b96f3b..00000000 --- a/src/ubic/basecode/ontology/jena/AbstractOntologyMemoryBackedService.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * The baseCode project - * - * Copyright (c) 2013 University of British Columbia - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the - * specific language governing permissions and limitations under the License. - */ -package ubic.basecode.ontology.jena; - -import com.hp.hpl.jena.ontology.OntModelSpec; -import com.hp.hpl.jena.ontology.ProfileRegistry; -import com.hp.hpl.jena.rdf.model.ModelFactory; -import com.hp.hpl.jena.reasoner.ReasonerFactory; -import com.hp.hpl.jena.reasoner.rulesys.OWLFBRuleReasonerFactory; -import com.hp.hpl.jena.reasoner.rulesys.OWLMicroReasonerFactory; -import com.hp.hpl.jena.reasoner.rulesys.OWLMiniReasonerFactory; -import com.hp.hpl.jena.reasoner.transitiveReasoner.TransitiveReasonerFactory; -import ubic.basecode.ontology.model.OntologyModel; -import ubic.basecode.util.Configuration; - -import java.io.IOException; -import java.io.InputStream; - -/** - * This class has some stuff that's specific to in-memory ontologies. Unlike database backed ontologies we don't use a - * pool keeping only one instance of model in memory. - * - * @author paul - */ -public abstract class AbstractOntologyMemoryBackedService extends AbstractOntologyService { - - @Override - protected String getOntologyUrl() { - return Configuration.getString( "url." + getOntologyName() ); - } - - @Override - protected OntologyModel loadModel( boolean processImports, LanguageLevel languageLevel, InferenceMode inferenceMode ) throws IOException { - return new OntologyModelImpl( OntologyLoader.loadMemoryModel( this.getOntologyUrl(), this.getCacheName(), processImports, this.getSpec( languageLevel, inferenceMode ) ) ); - } - - @Override - protected OntologyModel loadModelFromStream( InputStream is, boolean processImports, LanguageLevel languageLevel, InferenceMode inferenceMode ) throws IOException { - return new OntologyModelImpl( OntologyLoader.loadMemoryModel( is, this.getOntologyUrl(), processImports, this.getSpec( languageLevel, inferenceMode ) ) ); - } - - private OntModelSpec getSpec( LanguageLevel languageLevel, InferenceMode inferenceMode ) { - String profile; - switch ( languageLevel ) { - case FULL: - profile = ProfileRegistry.OWL_LANG; - break; - case DL: - profile = ProfileRegistry.OWL_DL_LANG; - break; - case LITE: - profile = ProfileRegistry.OWL_LITE_LANG; - break; - default: - throw new UnsupportedOperationException( String.format( "Unsupported OWL language level %s.", languageLevel ) ); - } - ReasonerFactory reasonerFactory; - switch ( inferenceMode ) { - case FULL: - reasonerFactory = OWLFBRuleReasonerFactory.theInstance(); - break; - case MINI: - reasonerFactory = OWLMiniReasonerFactory.theInstance(); - break; - case MICRO: - reasonerFactory = OWLMicroReasonerFactory.theInstance(); - break; - case TRANSITIVE: - reasonerFactory = TransitiveReasonerFactory.theInstance(); - break; - case NONE: - reasonerFactory = null; - break; - default: - throw new UnsupportedOperationException( String.format( "Unsupported inference level %s.", inferenceMode ) ); - } - return new OntModelSpec( ModelFactory.createMemModelMaker(), null, reasonerFactory, profile ); - } -} diff --git a/src/ubic/basecode/ontology/jena/AbstractOntologyResource.java b/src/ubic/basecode/ontology/jena/AbstractOntologyResource.java index bd06b312..e821a06a 100644 --- a/src/ubic/basecode/ontology/jena/AbstractOntologyResource.java +++ b/src/ubic/basecode/ontology/jena/AbstractOntologyResource.java @@ -25,11 +25,8 @@ import ubic.basecode.ontology.model.OntologyResource; import javax.annotation.Nullable; -import java.util.Comparator; import java.util.Objects; -import static java.util.Comparator.*; - /** * @author pavlidis */ @@ -37,25 +34,16 @@ abstract class AbstractOntologyResource implements OntologyResource { protected static final Logger log = LoggerFactory.getLogger( AbstractOntologyResource.class ); - private static final Comparator comparator = Comparator - .comparing( OntologyResource::getScore, nullsLast( reverseOrder() ) ) - .thenComparing( OntologyResource::getUri, nullsLast( naturalOrder() ) ); - private final OntResource res; - @Nullable - private final Double score; private String _label; private boolean _isLabelNull = false; - protected AbstractOntologyResource( OntResource resource ) { - this.res = resource; - this.score = null; - } + private String _label; + private boolean _isLabelNull = false; - public AbstractOntologyResource( OntResource resource, double score ) { + protected AbstractOntologyResource( OntResource resource ) { this.res = resource; - this.score = score; } @Override @@ -97,17 +85,6 @@ public boolean isObsolete() { return res.hasLiteral( OWL2.deprecated, true ); } - @Override - @Nullable - public Double getScore() { - return score; - } - - @Override - public int compareTo( OntologyResource other ) { - return Objects.compare( this, other, comparator ); - } - @Override public boolean equals( Object obj ) { if ( this == obj ) return true; diff --git a/src/ubic/basecode/ontology/jena/AbstractOntologyService.java b/src/ubic/basecode/ontology/jena/AbstractOntologyService.java index 4caa2851..ef8e1d84 100644 --- a/src/ubic/basecode/ontology/jena/AbstractOntologyService.java +++ b/src/ubic/basecode/ontology/jena/AbstractOntologyService.java @@ -20,12 +20,17 @@ package ubic.basecode.ontology.jena; import com.hp.hpl.jena.ontology.*; -import com.hp.hpl.jena.rdf.arp.ARPErrorNumbers; -import com.hp.hpl.jena.rdf.arp.ParseException; +import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.NodeIterator; import com.hp.hpl.jena.rdf.model.Property; import com.hp.hpl.jena.rdf.model.Resource; -import com.hp.hpl.jena.rdf.model.ResourceFactory; +import com.hp.hpl.jena.rdfxml.xmlinput.ARPErrorNumbers; +import com.hp.hpl.jena.rdfxml.xmlinput.ParseException; +import com.hp.hpl.jena.reasoner.ReasonerFactory; +import com.hp.hpl.jena.reasoner.rulesys.OWLFBRuleReasonerFactory; +import com.hp.hpl.jena.reasoner.rulesys.OWLMicroReasonerFactory; +import com.hp.hpl.jena.reasoner.rulesys.OWLMiniReasonerFactory; +import com.hp.hpl.jena.reasoner.transitiveReasoner.TransitiveReasonerFactory; import com.hp.hpl.jena.util.iterator.ExtendedIterator; import com.hp.hpl.jena.vocabulary.DC_11; import org.apache.commons.lang3.RandomStringUtils; @@ -39,7 +44,7 @@ import ubic.basecode.ontology.model.OntologyTerm; import ubic.basecode.ontology.providers.OntologyService; import ubic.basecode.ontology.search.OntologySearchException; -import ubic.basecode.util.Configuration; +import ubic.basecode.ontology.search.OntologySearchResult; import javax.annotation.Nullable; import java.io.IOException; @@ -47,9 +52,6 @@ import java.io.InterruptedIOException; import java.nio.channels.ClosedByInterruptException; import java.util.*; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReadWriteLock; -import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.function.Predicate; import java.util.stream.Collectors; @@ -68,31 +70,46 @@ public abstract class AbstractOntologyService implements OntologyService { /** * Properties through which propagation is allowed for {@link #getParents(Collection, boolean, boolean)}} */ - private static final Set DEFAULT_ADDITIONAL_PROPERTIES; + private static final Set DEFAULT_ADDITIONAL_PROPERTIES; static { DEFAULT_ADDITIONAL_PROPERTIES = new HashSet<>(); - DEFAULT_ADDITIONAL_PROPERTIES.add( BFO.partOf.getURI() ); - DEFAULT_ADDITIONAL_PROPERTIES.add( RO.properPartOf.getURI() ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.partOf ); + // all those are sub-properties of partOf, but some ontologies might not have them + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.activeIngredientIn ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.boundingLayerOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.branchingPartOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.determinedBy ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.ends ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.isSubsequenceOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.isEndSequenceOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.isStartSequenceOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.lumenOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.luminalSpaceOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.mainStemOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.memberOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.occurrentPartOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.skeletonOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.starts ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.subclusterOf ); + // used by some older ontologies + //noinspection deprecation + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.properPartOf ); } /** - * Lock used to prevent reads while the ontology is being initialized. - */ - private final ReadWriteLock rwLock = new ReentrantReadWriteLock(); - - /** - * Internal state protected by {@link #rwLock}. + * Internal state. */ @Nullable - private State state = null; + private volatile State state = null; /* settings (applicable for next initialization) */ private LanguageLevel languageLevel = LanguageLevel.FULL; private InferenceMode inferenceMode = InferenceMode.TRANSITIVE; private boolean processImports = true; private boolean searchEnabled = true; - private Set additionalPropertyUris = DEFAULT_ADDITIONAL_PROPERTIES; + private Set excludedWordsFromStemming = Collections.emptySet(); + private Set additionalPropertyUris = DEFAULT_ADDITIONAL_PROPERTIES.stream().map( Property::getURI ).collect( Collectors.toSet() ); @Override public String getName() { @@ -150,6 +167,16 @@ public void setSearchEnabled( boolean searchEnabled ) { this.searchEnabled = searchEnabled; } + @Override + public Set getExcludedWordsFromStemming() { + return getState().map( state -> state.excludedWordsFromStemming ).orElse( excludedWordsFromStemming ); + } + + @Override + public void setExcludedWordsFromStemming( Set excludedWordsFromStemming ) { + this.excludedWordsFromStemming = excludedWordsFromStemming; + } + @Override public Set getAdditionalPropertyUris() { return getState().map( state -> state.additionalPropertyUris ).orElse( additionalPropertyUris ); @@ -168,7 +195,7 @@ public void initialize( InputStream stream, boolean forceIndexing ) { initialize( stream, true, forceIndexing ); } - private void initialize( @Nullable InputStream stream, boolean forceLoad, boolean forceIndexing ) { + private synchronized void initialize( @Nullable InputStream stream, boolean forceLoad, boolean forceIndexing ) { if ( !forceLoad && state != null ) { log.warn( "{} is already loaded, and force=false, not restarting", this ); return; @@ -178,12 +205,11 @@ private void initialize( @Nullable InputStream stream, boolean forceLoad, boolea String ontologyUrl = getOntologyUrl(); String ontologyName = getOntologyName(); String cacheName = getCacheName(); - Set additionalProperties = this.additionalPropertyUris.stream() - .map( ResourceFactory::createProperty ).collect( Collectors.toSet() ); LanguageLevel languageLevel = this.languageLevel; InferenceMode inferenceMode = this.inferenceMode; boolean processImports = this.processImports; boolean searchEnabled = this.searchEnabled; + Set excludedWordsFromStemming = this.excludedWordsFromStemming; // Detect configuration problems. if ( StringUtils.isBlank( ontologyUrl ) ) { @@ -211,7 +237,7 @@ private void initialize( @Nullable InputStream stream, boolean forceLoad, boolea SearchIndex index; // loading the model from disk or URL is lengthy - if ( checkIfInterrupted() ) + if ( Thread.currentThread().isInterrupted() ) return; try { @@ -223,6 +249,8 @@ private void initialize( @Nullable InputStream stream, boolean forceLoad, boolea } } catch ( Exception e ) { if ( isCausedByInterrupt( e ) ) { + // make sure that the thread is interrupted + Thread.currentThread().interrupt(); return; } else { throw new RuntimeException( String.format( "Failed to load ontology model for %s.", this ), e ); @@ -230,27 +258,26 @@ private void initialize( @Nullable InputStream stream, boolean forceLoad, boolea } // retrieving restrictions is lengthy - if ( checkIfInterrupted() ) + if ( Thread.currentThread().isInterrupted() ) return; // compute additional restrictions - Set additionalRestrictions = model.listRestrictions() - .filterKeep( new RestrictionWithOnPropertyFilter( additionalProperties ) ) - .toSet(); + Set additionalProperties = additionalPropertyUris.stream().map( model::getProperty ).collect( Collectors.toSet() ); + Set additionalRestrictions = JenaUtils.listRestrictionsOnProperties( model, additionalProperties, true ).toSet(); + // indexing is lengthy, don't bother if we're interrupted - if ( checkIfInterrupted() ) + if ( Thread.currentThread().isInterrupted() ) return; if ( searchEnabled && cacheName != null ) { //Checks if the current ontology has changed since it was last loaded. boolean changed = OntologyLoader.hasChanged( cacheName ); - boolean indexExists = OntologyIndexer.getSubjectIndex( cacheName ) != null; + boolean indexExists = OntologyIndexer.getSubjectIndex( cacheName, excludedWordsFromStemming ) != null; boolean forceReindexing = forceLoad && forceIndexing; // indexing is slow, don't do it if we don't have to. try { - index = OntologyIndexer.indexOntology( cacheName, model, - forceReindexing || changed || !indexExists ); + index = OntologyIndexer.indexOntology( cacheName, model, excludedWordsFromStemming, forceReindexing || changed || !indexExists ); } catch ( Exception e ) { if ( isCausedByInterrupt( e ) ) { return; @@ -263,23 +290,17 @@ private void initialize( @Nullable InputStream stream, boolean forceLoad, boolea } // if interrupted, we don't need to replace the model and clear the *old* cache - if ( checkIfInterrupted() ) + if ( Thread.currentThread().isInterrupted() ) return; - Lock lock = rwLock.writeLock(); - try { - lock.lock(); - this.state = new State( model, index, additionalRestrictions, languageLevel, inferenceMode, processImports, additionalProperties.stream().map( Property::getURI ).collect( Collectors.toSet() ) ); - if ( cacheName != null ) { - // now that the terms have been replaced, we can clear old caches - try { - OntologyLoader.deleteOldCache( cacheName ); - } catch ( IOException e ) { - log.error( String.format( String.format( "Failed to delete old cache directory for %s.", this ), e ) ); - } + this.state = new State( model, index, excludedWordsFromStemming, additionalRestrictions, languageLevel, inferenceMode, processImports, additionalProperties.stream().map( Property::getURI ).collect( Collectors.toSet() ), null ); + if ( cacheName != null ) { + // now that the terms have been replaced, we can clear old caches + try { + OntologyLoader.deleteOldCache( cacheName ); + } catch ( IOException e ) { + log.error( String.format( String.format( "Failed to delete old cache directory for %s.", this ), e ) ); } - } finally { - lock.unlock(); } loadTime.stop(); @@ -287,14 +308,6 @@ private void initialize( @Nullable InputStream stream, boolean forceLoad, boolea log.info( "Finished loading {} in {}s", this, String.format( "%.2f", loadTime.getTime() / 1000.0 ) ); } - private boolean checkIfInterrupted() { - if ( Thread.interrupted() ) { - log.warn( "The current thread is interrupted, initialization of {} will be stop.", this ); - return true; - } - return false; - } - private static boolean isCausedByInterrupt( Exception e ) { return hasCauseMatching( e, cause -> ( ( cause instanceof ParseException ) && ( ( ParseException ) cause ).getErrorNumber() == ARPErrorNumbers.ERR_INTERRUPTED ) ) || hasCause( e, InterruptedException.class ) || @@ -311,107 +324,87 @@ private static boolean hasCauseMatching( Throwable t, Predicate predi } @Override - public Collection findIndividuals( String search, boolean keepObsoletes ) throws + public Set> findIndividuals( String search, int maxResults, boolean keepObsoletes ) throws OntologySearchException { - Lock lock = rwLock.readLock(); - try { - lock.lock(); - if ( state == null ) { - log.warn( "Ontology {} is not ready, no individuals will be returned.", this ); - return Collections.emptySet(); - } - if ( state.index == null ) { - log.warn( "Attempt to search {} when index is null, no results will be returned.", this ); - return Collections.emptySet(); - } - return OntologySearch.matchIndividuals( state.model, state.index, search ) - .mapWith( i -> ( OntologyIndividual ) new OntologyIndividualImpl( i.result, state.additionalRestrictions, i.score ) ) - .filterKeep( where( ontologyTerm -> keepObsoletes || !ontologyTerm.isObsolete() ) ) - .toSet(); - } finally { - lock.unlock(); + State state = this.state; + if ( state == null ) { + log.warn( "Ontology {} is not ready, no individuals will be returned.", this ); + return Collections.emptySet(); + } + if ( state.index == null ) { + log.warn( "Attempt to search {} when index is null, no results will be returned.", this ); + return Collections.emptySet(); } + return state.index.searchIndividuals( state.model, search, maxResults ) + .mapWith( i -> new OntologySearchResult<>( ( OntologyIndividual ) new OntologyIndividualImpl( i.result.as( Individual.class ), state.additionalRestrictions ), i.score ) ) + .filterKeep( where( ontologyTerm -> keepObsoletes || !ontologyTerm.getResult().isObsolete() ) ) + .toSet(); } @Override - public Collection findResources( String searchString, boolean keepObsoletes ) throws + public Collection> findResources( String searchString, int maxResults, boolean keepObsoletes ) throws OntologySearchException { - Lock lock = rwLock.readLock(); - try { - lock.lock(); - if ( state == null ) { - log.warn( "Ontology {} is not ready, no resources will be returned.", this ); - return Collections.emptySet(); - } - if ( state.index == null ) { - log.warn( "Attempt to search {} when index is null, no results will be returned.", this ); - return Collections.emptySet(); - } - return OntologySearch.matchResources( state.model, state.index, searchString ) - .filterKeep( where( r -> r.result.canAs( OntClass.class ) || r.result.canAs( Individual.class ) ) ) - .mapWith( r -> { - try { - if ( r.result.canAs( OntClass.class ) ) { - return new OntologyTermImpl( r.result.as( OntClass.class ), state.additionalRestrictions, r.score ); - } else if ( r.result.canAs( Individual.class ) ) { - return new OntologyIndividualImpl( r.result.as( Individual.class ), state.additionalRestrictions, r.score ); - } else { - return ( OntologyResource ) null; - } - } catch ( ConversionException e ) { - log.warn( "Conversion failed for " + r, e ); + State state = this.state; + if ( state == null ) { + log.warn( "Ontology {} is not ready, no resources will be returned.", this ); + return Collections.emptySet(); + } + if ( state.index == null ) { + log.warn( "Attempt to search {} when index is null, no results will be returned.", this ); + return Collections.emptySet(); + } + return state.index.search( state.model, searchString, maxResults ) + .filterKeep( where( r -> r.result.canAs( OntClass.class ) || r.result.canAs( Individual.class ) ) ) + .mapWith( r -> { + try { + if ( r.result.canAs( OntClass.class ) ) { + return new OntologySearchResult<>( ( OntologyResource ) new OntologyTermImpl( r.result.as( OntClass.class ), state.additionalRestrictions ), r.score ); + } else if ( r.result.canAs( Individual.class ) ) { + return new OntologySearchResult<>( ( OntologyResource ) new OntologyIndividualImpl( r.result.as( Individual.class ), state.additionalRestrictions ), r.score ); + } else { return null; } - } ) - .filterKeep( where( Objects::nonNull ) ) - .filterKeep( where( ontologyTerm -> keepObsoletes || !ontologyTerm.isObsolete() ) ) - .toSet(); - } finally { - lock.unlock(); - } + } catch ( ConversionException e ) { + log.warn( "Conversion failed for {}", r, e ); + return null; + } + } ) + .filterKeep( where( Objects::nonNull ) ) + .filterKeep( where( ontologyTerm -> keepObsoletes || !ontologyTerm.getResult().isObsolete() ) ) + .toSet(); } @Override - public Collection findTerm( String search, boolean keepObsoletes ) throws OntologySearchException { - if ( log.isDebugEnabled() ) log.debug( "Searching " + this + " for '" + search + "'" ); - Lock lock = rwLock.readLock(); - try { - lock.lock(); - if ( state == null ) { - log.warn( "Ontology {} is not ready, no terms will be returned.", this ); - return Collections.emptySet(); - } - if ( state.index == null ) { - log.warn( "Attempt to search {} when index is null, no results will be returned.", this ); - return Collections.emptySet(); - } - return OntologySearch.matchClasses( state.model, state.index, search ) - .mapWith( r -> ( OntologyTerm ) new OntologyTermImpl( r.result, state.additionalRestrictions, r.score ) ) - .filterKeep( where( ontologyTerm -> keepObsoletes || !ontologyTerm.isObsolete() ) ) - .toSet(); - } finally { - lock.unlock(); + public Collection> findTerm( String search, int maxResults, boolean keepObsoletes ) throws OntologySearchException { + State state = this.state; + if ( state == null ) { + log.warn( "Ontology {} is not ready, no terms will be returned.", this ); + return Collections.emptySet(); } + if ( state.index == null ) { + log.warn( "Attempt to search {} when index is null, no results will be returned.", this ); + return Collections.emptySet(); + } + return state.index.searchClasses( state.model, search, maxResults ) + .mapWith( r -> new OntologySearchResult<>( ( OntologyTerm ) new OntologyTermImpl( r.result.as( OntClass.class ), state.additionalRestrictions ), r.score ) ) + .filterKeep( where( ontologyTerm -> keepObsoletes || !ontologyTerm.getResult().isObsolete() ) ) + .toSet(); } @Override public OntologyTerm findUsingAlternativeId( String alternativeId ) { - Lock lock = state != null && state.alternativeIDs != null ? rwLock.readLock() : rwLock.writeLock(); - try { - lock.lock(); - if ( state == null ) { - log.warn( "Ontology {} is not ready, null will be returned for alternative ID match.", this ); - return null; - } - if ( state.alternativeIDs == null ) { - log.info( "init search by alternativeID" ); - initSearchByAlternativeId( state ); - } - String termUri = state.alternativeIDs.get( alternativeId ); - return termUri != null ? getTerm( termUri ) : null; - } finally { - lock.unlock(); + State state = this.state; + if ( state == null ) { + log.warn( "Ontology {} is not ready, null will be returned for alternative ID match.", this ); + return null; + } + if ( state.alternativeIDs == null ) { + log.info( "init search by alternativeID" ); + this.state = initSearchByAlternativeId( state ); } + assert state.alternativeIDs != null; + String termUri = state.alternativeIDs.get( alternativeId ); + return termUri != null ? getTerm( termUri ) : null; } @Override @@ -500,13 +493,8 @@ public Set getChildren( Collection terms, boolean di @Override public boolean isEnabled() { - // quick path: just lookup the configuration - String configParameter = "load." + getOntologyName(); - if ( Configuration.getBoolean( configParameter ) ) { - return true; - } // could have forced, without setting config - return getState().isPresent(); + return isOntologyEnabled() || isOntologyLoaded() || isInitializationThreadAlive(); } @Override @@ -515,7 +503,7 @@ public boolean isOntologyLoaded() { return state != null; } - private Thread initializationThread = null; + private volatile Thread initializationThread = null; @Override public synchronized void startInitializationThread( boolean forceLoad, boolean forceIndexing ) { @@ -577,16 +565,9 @@ public void waitForInitializationThread() throws InterruptedException { protected abstract String getOntologyUrl(); /** - * Delegates the call as to load the model into memory or leave it on disk. Simply delegates to either - * OntologyLoader.loadMemoryModel( url ); OR OntologyLoader.loadPersistentModel( url, spec ); + * Indicate if this ontology is enabled. */ - protected abstract OntologyModel loadModel( boolean processImports, LanguageLevel languageLevel, InferenceMode inferenceMode ) throws IOException; - - - /** - * Load a model from a given input stream. - */ - protected abstract OntologyModel loadModelFromStream( InputStream stream, boolean processImports, LanguageLevel languageLevel, InferenceMode inferenceMode ) throws IOException; + protected abstract boolean isOntologyEnabled(); /** * A name for caching this ontology, or null to disable caching. @@ -594,12 +575,63 @@ public void waitForInitializationThread() throws InterruptedException { * Note that if null is returned, the ontology will not have full-text search capabilities. */ @Nullable - protected String getCacheName() { - return getOntologyName(); + protected abstract String getCacheName(); + + /** + * Delegates the call as to load the model into memory or leave it on disk. Simply delegates to either + * OntologyLoader.loadMemoryModel( url ); OR OntologyLoader.loadPersistentModel( url, spec ); + */ + protected OntologyModel loadModel( boolean processImports, LanguageLevel languageLevel, InferenceMode inferenceMode ) throws IOException { + return new OntologyModelImpl( OntologyLoader.loadMemoryModel( this.getOntologyUrl(), this.getCacheName(), processImports, this.getSpec( languageLevel, inferenceMode ) ) ); + } + + /** + * Load a model from a given input stream. + */ + protected OntologyModel loadModelFromStream( InputStream is, boolean processImports, LanguageLevel languageLevel, InferenceMode inferenceMode ) throws IOException { + return new OntologyModelImpl( OntologyLoader.loadMemoryModel( is, this.getOntologyUrl(), processImports, this.getSpec( languageLevel, inferenceMode ) ) ); + } + + private OntModelSpec getSpec( LanguageLevel languageLevel, InferenceMode inferenceMode ) { + String profile; + switch ( languageLevel ) { + case FULL: + profile = ProfileRegistry.OWL_LANG; + break; + case DL: + profile = ProfileRegistry.OWL_DL_LANG; + break; + case LITE: + profile = ProfileRegistry.OWL_LITE_LANG; + break; + default: + throw new UnsupportedOperationException( String.format( "Unsupported OWL language level %s.", languageLevel ) ); + } + ReasonerFactory reasonerFactory; + switch ( inferenceMode ) { + case FULL: + reasonerFactory = OWLFBRuleReasonerFactory.theInstance(); + break; + case MINI: + reasonerFactory = OWLMiniReasonerFactory.theInstance(); + break; + case MICRO: + reasonerFactory = OWLMicroReasonerFactory.theInstance(); + break; + case TRANSITIVE: + reasonerFactory = TransitiveReasonerFactory.theInstance(); + break; + case NONE: + reasonerFactory = null; + break; + default: + throw new UnsupportedOperationException( String.format( "Unsupported inference level %s.", inferenceMode ) ); + } + return new OntModelSpec( ModelFactory.createMemModelMaker(), null, reasonerFactory, profile ); } @Override - public void index( boolean force ) { + public synchronized void index( boolean force ) { String cacheName = getCacheName(); if ( cacheName == null ) { log.warn( "This ontology does not support indexing; assign a cache name to be used." ); @@ -609,29 +641,20 @@ public void index( boolean force ) { log.warn( "Search is not enabled for this ontology." ); return; } + State state = this.state; + if ( state == null ) { + log.warn( "Ontology {} is not initialized, cannot index it.", this ); + return; + } SearchIndex index; - Lock lock = rwLock.readLock(); try { - lock.lock(); - if ( state == null ) { - log.warn( "Ontology {} is not initialized, cannot index it.", this ); - return; - } - index = OntologyIndexer.indexOntology( getCacheName(), state.model, force ); + index = OntologyIndexer.indexOntology( cacheName, state.model, state.excludedWordsFromStemming, force ); } catch ( IOException e ) { log.error( "Failed to generate index for {}.", this, e ); return; - } finally { - lock.unlock(); } // now we replace the index - lock = rwLock.writeLock(); - try { - lock.lock(); - this.state.index = index; - } finally { - lock.unlock(); - } + this.state = new State( state.model, index, state.excludedWordsFromStemming, state.additionalRestrictions, state.languageLevel, state.inferenceMode, state.processImports, state.additionalPropertyUris, state.alternativeIDs ); } /** @@ -648,8 +671,8 @@ public void index( boolean force ) { * trying HP_0001453ibrary.org/obo/HP_0001453 -----> * HP_0000005 */ - private void initSearchByAlternativeId( State state ) { - state.alternativeIDs = new HashMap<>(); + private State initSearchByAlternativeId( State state ) { + Map alternativeIDs = new HashMap<>(); // for all Ontology terms that exist in the tree ExtendedIterator iterator = state.model.listClasses(); while ( iterator.hasNext() ) { @@ -661,12 +684,13 @@ private void initSearchByAlternativeId( State state ) { String baseOntologyUri = ontologyTerm.getUri().substring( 0, ontologyTerm.getUri().lastIndexOf( "/" ) + 1 ); for ( String alternativeId : ontologyTerm.getAlternativeIds() ) { // first way - state.alternativeIDs.put( alternativeId, ontologyTerm.getUri() ); + alternativeIDs.put( alternativeId, ontologyTerm.getUri() ); // second way String alternativeIdModified = alternativeId.replace( ':', '_' ); - state.alternativeIDs.put( baseOntologyUri + alternativeIdModified, ontologyTerm.getUri() ); + alternativeIDs.put( baseOntologyUri + alternativeIdModified, ontologyTerm.getUri() ); } } + return new State( state.model, state.index, state.excludedWordsFromStemming, state.additionalRestrictions, state.languageLevel, state.inferenceMode, state.processImports, state.additionalPropertyUris, alternativeIDs ); } @Override @@ -681,12 +705,11 @@ public void loadTermsInNameSpace( InputStream is, boolean forceIndex ) { while ( initializationThread.isAlive() ) { try { initializationThread.join( 5000 ); - log.warn( "Waiting for auto-initialization to stop so manual initialization can begin ..." ); } catch ( InterruptedException e ) { Thread.currentThread().interrupt(); - log.warn( "Got interrupted while waiting for the initialization thread of {} to finish.", this ); return; } + log.warn( "Waiting for auto-initialization to stop so manual initialization can begin ..." ); ++wait; if ( wait >= maxWait && !initializationThread.isAlive() ) { throw new RuntimeException( String.format( "Got tired of waiting for %s's initialization thread.", this ) ); @@ -703,13 +726,7 @@ public String toString() { } private Optional getState() { - Lock lock = this.rwLock.readLock(); - try { - lock.lock(); - return Optional.ofNullable( state ); - } finally { - lock.unlock(); - } + return Optional.ofNullable( state ); } private Set getOntClassesFromTerms( OntModel model, Collection terms ) { @@ -728,23 +745,26 @@ private Set getOntClassesFromTerms( OntModel model, Collection excludedWordsFromStemming; private final Set additionalRestrictions; private final LanguageLevel languageLevel; private final InferenceMode inferenceMode; private final boolean processImports; private final Set additionalPropertyUris; @Nullable - private Map alternativeIDs; + private final Map alternativeIDs; - private State( OntModel model, @Nullable SearchIndex index, Set additionalRestrictions, @Nullable LanguageLevel languageLevel, InferenceMode inferenceMode, boolean processImports, Set additionalPropertyUris ) { + private State( OntModel model, @Nullable SearchIndex index, Set excludedWordsFromStemming, Set additionalRestrictions, @Nullable LanguageLevel languageLevel, InferenceMode inferenceMode, boolean processImports, Set additionalPropertyUris, @Nullable Map alternativeIDs ) { this.model = model; this.index = index; + this.excludedWordsFromStemming = excludedWordsFromStemming; this.additionalRestrictions = additionalRestrictions; this.languageLevel = languageLevel; this.inferenceMode = inferenceMode; this.processImports = processImports; this.additionalPropertyUris = additionalPropertyUris; + this.alternativeIDs = alternativeIDs; } } } \ No newline at end of file diff --git a/src/ubic/basecode/ontology/jena/BFO.java b/src/ubic/basecode/ontology/jena/BFO.java deleted file mode 100644 index 274636df..00000000 --- a/src/ubic/basecode/ontology/jena/BFO.java +++ /dev/null @@ -1,8 +0,0 @@ -package ubic.basecode.ontology.jena; - -import com.hp.hpl.jena.rdf.model.Property; -import com.hp.hpl.jena.rdf.model.ResourceFactory; - -class BFO { - public static final Property partOf = ResourceFactory.createProperty( "http://purl.obolibrary.org/obo/BFO_0000050" ); -} diff --git a/src/ubic/basecode/ontology/jena/IAO.java b/src/ubic/basecode/ontology/jena/IAO.java new file mode 100644 index 00000000..8053c3b7 --- /dev/null +++ b/src/ubic/basecode/ontology/jena/IAO.java @@ -0,0 +1,9 @@ +package ubic.basecode.ontology.jena; + +import com.hp.hpl.jena.rdf.model.Property; +import com.hp.hpl.jena.rdf.model.ResourceFactory; + +public class IAO { + + public static final Property alternativeLabel = ResourceFactory.createProperty( "http://purl.obolibrary.org/obo/IAO_0000118" ); +} diff --git a/src/ubic/basecode/ontology/jena/IndexerSelector.java b/src/ubic/basecode/ontology/jena/IndexerSelector.java deleted file mode 100644 index cf52a0ba..00000000 --- a/src/ubic/basecode/ontology/jena/IndexerSelector.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * The baseCode project - * - * Copyright (c) 2008-2019 University of British Columbia - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -package ubic.basecode.ontology.jena; - -import com.hp.hpl.jena.ontology.ConversionException; -import com.hp.hpl.jena.rdf.model.*; -import com.hp.hpl.jena.vocabulary.OWL2; -import com.hp.hpl.jena.vocabulary.RDFS; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Collection; -import java.util.HashSet; - -/** - * Used to limit which parts of ontologies get indexed for searching. This avoids indexing some parts of ontologies such - * as "examples" and "definitions" but this is set up in a partly ontology-specific way (that is, hard-coded). - * - * @author paul - */ -class IndexerSelector implements Selector { - - private static final Logger log = LoggerFactory.getLogger( IndexerSelector.class ); - - private static final Collection wantedForIndexing; - - static { - wantedForIndexing = new HashSet<>(); - wantedForIndexing.add( RDFS.label ); - wantedForIndexing.add( RDFS.comment ); - - wantedForIndexing.add( OBO.id ); - wantedForIndexing.add( OBO.hasDbXref ); - wantedForIndexing.add( OBO.hasSynonym ); - wantedForIndexing.add( OBO.hasExactSynonym ); - wantedForIndexing.add( OBO.hasBroadSynonym ); - wantedForIndexing.add( OBO.hasNarrowSynonym ); - wantedForIndexing.add( OBO.hasRelatedSynonym ); - wantedForIndexing.add( OBO.alternativeLabel ); - } - - /* - * (non-Javadoc) - * - * @see com.hp.hpl.jena.rdf.model.Selector#getObject() - */ - @Override - public RDFNode getObject() { - return null; - } - - /* - * (non-Javadoc) - * - * @see com.hp.hpl.jena.rdf.model.Selector#getPredicate() - */ - @Override - public Property getPredicate() { - return null; - } - - /* - * (non-Javadoc) - * - * @see com.hp.hpl.jena.rdf.model.Selector#getSubject() - */ - @Override - public Resource getSubject() { - return null; - } - - /* - * (non-Javadoc) - * - * @see com.hp.hpl.jena.rdf.model.Selector#isSimple() - */ - @Override - public boolean isSimple() { - return false; - } - - /* - * (non-Javadoc) - * - * @see com.hp.hpl.jena.rdf.model.Selector#test(com.hp.hpl.jena.rdf.model.Statement) - */ - @Override - public boolean test( Statement s ) { - if ( s.getSubject().getURI() == null ) { - return false; - } - - boolean retain = wantedForIndexing.contains( s.getPredicate() ); - - // bit of a special case ... - if ( s.getPredicate().equals( OWL2.annotatedProperty ) && s.getObject().canAs( Property.class ) ) { - try { - retain = wantedForIndexing.contains( s.getObject().as( Property.class ) ); - } catch ( ConversionException e ) { - log.warn( "Conversion of " + s.getObject() + " to " + Property.class.getName() + " failed.", e ); - } - } - - return retain; - } -} diff --git a/src/ubic/basecode/ontology/jena/JenaUtils.java b/src/ubic/basecode/ontology/jena/JenaUtils.java index 11edc116..ba639f3b 100644 --- a/src/ubic/basecode/ontology/jena/JenaUtils.java +++ b/src/ubic/basecode/ontology/jena/JenaUtils.java @@ -1,9 +1,6 @@ package ubic.basecode.ontology.jena; -import com.hp.hpl.jena.ontology.ConversionException; -import com.hp.hpl.jena.ontology.OntClass; -import com.hp.hpl.jena.ontology.OntModel; -import com.hp.hpl.jena.ontology.Restriction; +import com.hp.hpl.jena.ontology.*; import com.hp.hpl.jena.rdf.model.*; import com.hp.hpl.jena.util.iterator.ExtendedIterator; import com.hp.hpl.jena.util.iterator.Filter; @@ -193,7 +190,6 @@ public static Resource getRestrictionValue( Restriction r ) { } } - /** * Use to pretty-print a RDFNode */ @@ -228,8 +224,31 @@ public static Optional as( RDFNode resource, Class cla try { return Optional.of( resource.as( clazz ) ); } catch ( ConversionException e ) { - log.warn( "Conversion of " + resource + " to " + clazz.getName() + " failed." ); + log.warn( "Conversion of {} to {} failed.", resource, clazz.getName() ); return Optional.empty(); } } + + /** + * List all restrictions in the given model on any of the given properties. + */ + public static ExtendedIterator listRestrictionsOnProperties( OntModel model, Set props, boolean includeSubProperties ) { + if ( includeSubProperties ) { + Set allProps = new HashSet<>( props ); + for ( Property p : props ) { + Property property = p.inModel( model ); + // include sub-properties for inference + if ( property.canAs( OntProperty.class ) ) { + OntProperty op = property.as( OntProperty.class ); + ExtendedIterator it = op.listSubProperties( false ); + while ( it.hasNext() ) { + OntProperty sp = it.next(); + allProps.add( sp ); + } + } + } + props = allProps; + } + return model.listRestrictions().filterKeep( new RestrictionWithOnPropertyFilter( props ) ); + } } diff --git a/src/ubic/basecode/ontology/jena/OBO.java b/src/ubic/basecode/ontology/jena/OBO.java index 1be6bb71..0b596727 100644 --- a/src/ubic/basecode/ontology/jena/OBO.java +++ b/src/ubic/basecode/ontology/jena/OBO.java @@ -19,7 +19,6 @@ private static Property property( String name ) { public static final Property hasBroadSynonym = property( "hasBroadSynonm" ); public static final Property hasNarrowSynonym = property( "hasNarrowSynonym" ); public static final Property hasRelatedSynonym = property( "hasRelatedSynonym" ); - public static final Property alternativeLabel = ResourceFactory.createProperty( "http://purl.obolibrary.org/obo/IAO_0000118" ); public static final Resource ObsoleteClass = ResourceFactory.createResource( "http://www.geneontology.org/formats/oboInOwl#ObsoleteClass" ); public static final Property ObsoleteProperty = property( "ObsoleteProperty" ); } diff --git a/src/ubic/basecode/ontology/jena/ObjectPropertyImpl.java b/src/ubic/basecode/ontology/jena/ObjectPropertyImpl.java index eb9dcf4f..4230727a 100644 --- a/src/ubic/basecode/ontology/jena/ObjectPropertyImpl.java +++ b/src/ubic/basecode/ontology/jena/ObjectPropertyImpl.java @@ -53,7 +53,7 @@ public Collection getRange() { OntClass class1 = r.asClass(); result.add( new OntologyTermImpl( class1, additionalRestrictions ) ); } else { - log.warn( "Don't know how to deal with " + r ); + log.warn( "Don't know how to deal with {}", r ); } } return result; diff --git a/src/ubic/basecode/ontology/jena/OntologyIndexer.java b/src/ubic/basecode/ontology/jena/OntologyIndexer.java index 281e30ae..51f56472 100644 --- a/src/ubic/basecode/ontology/jena/OntologyIndexer.java +++ b/src/ubic/basecode/ontology/jena/OntologyIndexer.java @@ -18,31 +18,50 @@ */ package ubic.basecode.ontology.jena; +import com.hp.hpl.jena.datatypes.DatatypeFormatException; +import com.hp.hpl.jena.datatypes.xsd.XSDDateTime; import com.hp.hpl.jena.ontology.OntModel; -import com.hp.hpl.jena.rdf.model.StmtIterator; +import com.hp.hpl.jena.ontology.OntResource; +import com.hp.hpl.jena.rdf.model.*; import com.hp.hpl.jena.shared.JenaException; +import com.hp.hpl.jena.util.iterator.ExtendedIterator; +import com.hp.hpl.jena.util.iterator.WrappedIterator; +import com.hp.hpl.jena.vocabulary.RDFS; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; -import org.apache.jena.larq.IndexBuilderSubject; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MultiReader; +import org.apache.lucene.queryParser.MultiFieldQueryParser; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.search.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import ubic.basecode.ontology.search.OntologySearchException; import ubic.basecode.util.Configuration; -import java.io.File; +import javax.annotation.Nullable; import java.io.IOException; +import java.nio.file.Path; import java.nio.file.Paths; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.Stream; /** + * A Lucene-based ontology indexer. + * * @author pavlidis */ class OntologyIndexer { @@ -50,139 +69,293 @@ class OntologyIndexer { private static final Logger log = LoggerFactory.getLogger( OntologyIndexer.class ); /** - * @return indexlarq with default analyzer (English), or null if no index is available. DOES not create the - * index if it doesn't exist. + * Those are build-in fields that are always indexed. */ - public static SearchIndex getSubjectIndex( String name ) { - Analyzer analyzer = new EnglishAnalyzer( Version.LUCENE_36 ); - return getSubjectIndex( name, analyzer ); - } + private static final String + ID_FIELD = "_ID", + LOCAL_NAME_FIELD = "_LOCAL_NAME", + IS_CLASS_FIELD = "_IS_CLASS", + IS_INDIVIDUAL_FIELD = "_IS_INDIVIDUAL"; - /** - * Loads or creates an index from an existing OntModel. Any existing index will loaded unless force=true. It will be - * created if there isn't one already, or if force=true. - */ - public static SearchIndex indexOntology( String name, OntModel model, boolean force ) throws JenaException, IOException { + public static class IndexableProperty { + private final Property property; + private final boolean analyzed; - if ( force ) { - return index( name, model ); + public IndexableProperty( Property property, boolean analyzed ) { + this.property = property; + this.analyzed = analyzed; } - SearchIndex index = getSubjectIndex( name ); - if ( index == null ) { - log.warn( "Index not found, or there was an error, re-indexing " + name ); - return index( name, model ); + public Property getProperty() { + return property; } - log.info( "A valid index for " + name + " already exists, using" ); - return index; + public boolean isAnalyzed() { + return analyzed; + } } - private static File getIndexPath( String name ) { - if ( StringUtils.isBlank( name ) ) { - throw new IllegalArgumentException( "The ontology must have a suitable name for being indexed." ); - } - String ontologyDir = Configuration.getString( "ontology.index.dir" ); // e.g., /something/gemmaData/compass - if ( StringUtils.isBlank( ontologyDir ) ) { - return Paths.get( System.getProperty( "java.io.tmpdir" ), "searchIndices", "ontology", name ).toFile(); - } - return Paths.get( ontologyDir, "ontology", name ).toFile(); + public static final Collection DEFAULT_INDEXABLE_PROPERTIES; + + static { + DEFAULT_INDEXABLE_PROPERTIES = new HashSet<>(); + DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( RDFS.label, true ) ); + DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( OBO.id, true ) ); + DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( OBO.hasDbXref, true ) ); + DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( OBO.hasSynonym, true ) ); + DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( OBO.hasExactSynonym, true ) ); + DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( OBO.hasBroadSynonym, true ) ); + DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( OBO.hasNarrowSynonym, true ) ); + DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( OBO.hasRelatedSynonym, true ) ); + DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( IAO.alternativeLabel, true ) ); } /** - * Find the search index (will not create it) - * - * @return Index, or null if there is no index. + * Obtain an ontology index with the default indexable properties. */ - private static SearchIndex getSubjectIndex( String name, Analyzer analyzer ) { - log.debug( "Loading index: " + name ); - File indexdir = getIndexPath( name ); - File indexdirstd = getIndexPath( name + ".std" ); + @Nullable + public static SearchIndex getSubjectIndex( String name, Set excludedFromStemming ) { + return getSubjectIndex( name, DEFAULT_INDEXABLE_PROPERTIES, excludedFromStemming ); + } + + /** + * Obtain an index with default analyzer (English), or null if no index is available. + *

+ * DOES not create the index if it doesn't exist. + */ + @Nullable + public static SearchIndex getSubjectIndex( String name, Collection indexableProperties, Set excludedFromStemming ) { + log.debug( "Loading index for {}...", name ); try { // we do not put this in the try-with-open because we want these to *stay* open - FSDirectory directory = FSDirectory.open( indexdir ); - FSDirectory directorystd = FSDirectory.open( indexdirstd ); - + FSDirectory directory = FSDirectory.open( getIndexPath( name ).toFile() ); + FSDirectory directoryStd = FSDirectory.open( getIndexPath( name + ".std" ).toFile() ); if ( !IndexReader.indexExists( directory ) ) { return null; } - if ( !IndexReader.indexExists( directorystd ) ) { + if ( !IndexReader.indexExists( directoryStd ) ) { return null; } - - IndexReader reader = IndexReader.open( directory ); - IndexReader readerstd = IndexReader.open( directorystd ); - MultiReader r = new MultiReader( reader, readerstd ); - return new SearchIndex( r, analyzer ); - + return openIndex( directory, directoryStd, indexableProperties, excludedFromStemming ); } catch ( IOException e ) { - log.warn( "Index for " + name + " could not be read: " + e.getMessage() ); + log.warn( "Index for {} could not be opened.", name, e ); return null; } } /** - * Create an on-disk index from an existing OntModel. Any existing index will be deleted/overwritten. + * Index an ontology with the default indexable properties. */ - private static SearchIndex index( String name, OntModel model ) throws JenaException, IOException { + public static SearchIndex indexOntology( String name, OntModel model, Set excludedFromStemming, boolean force ) throws JenaException, IOException { + return indexOntology( name, model, DEFAULT_INDEXABLE_PROPERTIES, excludedFromStemming, force ); + } - File indexdir = getIndexPath( name ); + /** + * Loads or creates an index from an existing OntModel. Any existing index will loaded unless force=true. It will be + * created if there isn't one already, or if force=true. + */ + public static SearchIndex indexOntology( String name, OntModel model, Collection indexableProperties, Set excludedFromStemming, boolean force ) throws JenaException, IOException { + if ( force ) { + return index( name, model, indexableProperties, excludedFromStemming ); + } + SearchIndex index = getSubjectIndex( name, excludedFromStemming ); + if ( index == null ) { + log.warn( "Index not found, or there was an error, re-indexing {}...", name ); + return index( name, model, indexableProperties, excludedFromStemming ); + } + log.debug( "A valid index for {} already exists, using", name ); + return index; + } - StopWatch timer = new StopWatch(); - timer.start(); - FSDirectory dir = FSDirectory.open( indexdir ); - log.info( "Indexing " + name + " to: " + indexdir ); + private static Path getIndexPath( String name ) { + if ( StringUtils.isBlank( name ) ) { + throw new IllegalArgumentException( "The ontology must have a suitable name for being indexed." ); + } + String ontologyDir = Configuration.getString( "ontology.index.dir" ); // e.g., /something/gemmaData/compass + if ( StringUtils.isBlank( ontologyDir ) ) { + return Paths.get( System.getProperty( "java.io.tmpdir" ), "searchIndices", "ontology", name ); + } + return Paths.get( ontologyDir, "ontology", name ); + } - /* - * adjust the analyzer ... - */ - Analyzer analyzer = new EnglishAnalyzer( Version.LUCENE_36 ); + /** + * Create an on-disk index from an existing OntModel. Any existing index will be deleted/overwritten. + */ + private static SearchIndex index( String name, OntModel model, Collection indexableProperties, Set excludedFromStemming ) throws JenaException, IOException { + Directory dir = index( name, model, new EnglishAnalyzer( Version.LUCENE_36, EnglishAnalyzer.getDefaultStopSet(), excludedFromStemming ), getIndexPath( name ), indexableProperties ); + // we need to also analyze using the Standard analyzer, which doesn't do stemming and allows wildcard. + Directory dirStd = index( name, model, new StandardAnalyzer( Version.LUCENE_36 ), getIndexPath( name + ".std" ), indexableProperties ); + return openIndex( dir, dirStd, indexableProperties, excludedFromStemming ); + } + + private static Directory index( String name, OntModel model, Analyzer analyzer, Path indexDir, Collection indexableProperties ) throws IOException { + StopWatch timer = StopWatch.createStarted(); + FSDirectory dir = FSDirectory.open( indexDir.toFile() ); + log.debug( "Indexing {} to: {}...", name, indexDir ); IndexWriterConfig config = new IndexWriterConfig( Version.LUCENE_36, analyzer ); - IndexWriter indexWriter = new IndexWriter( dir, config ); - indexWriter.deleteAll(); // start with clean slate. - assert 0 == indexWriter.numDocs(); + try ( IndexWriter indexWriter = new IndexWriter( dir, config ) ) { + indexWriter.deleteAll(); // start with clean slate. + assert 0 == indexWriter.numDocs(); + Map indexablePropertiesByField = indexableProperties.stream() + .collect( Collectors.toMap( p -> p.getProperty().getURI(), p -> p ) ); + ExtendedIterator subjects = model.listSubjects() + .filterDrop( new BnodeFilter<>() ); + while ( subjects.hasNext() ) { + Resource subject = subjects.next(); + String id = subject.getURI(); + Document doc = new Document(); + doc.add( new Field( ID_FIELD, id, Field.Store.YES, Field.Index.NOT_ANALYZED ) ); + doc.add( new Field( LOCAL_NAME_FIELD, subject.getLocalName(), Field.Store.NO, Field.Index.NOT_ANALYZED ) ); + boolean isClass, isIndividual; + if ( subject.canAs( OntResource.class ) ) { + isClass = subject.as( OntResource.class ).isClass(); + isIndividual = subject.as( OntResource.class ).isIndividual(); + } else { + isClass = false; + isIndividual = false; + } + doc.add( new NumericField( IS_CLASS_FIELD ).setIntValue( isClass ? 1 : 0 ) ); + doc.add( new NumericField( IS_INDIVIDUAL_FIELD ).setIntValue( isIndividual ? 1 : 0 ) ); + for ( IndexableProperty prop : indexableProperties ) { + StmtIterator listStatements = subject.listProperties( prop.property ); + while ( listStatements.hasNext() ) { + Statement s = listStatements.next(); + String field = s.getPredicate().getURI(); + Fieldable f; + if ( s.getObject().isLiteral() ) { + Literal l = s.getObject().asLiteral(); + Object v; + try { + v = l.getValue(); + } catch ( DatatypeFormatException e ) { + log.warn( "Invalid datatype for literal: {}", l, e ); + continue; + } + if ( v instanceof String ) { + f = new Field( field, ( String ) v, Field.Store.NO, indexablePropertiesByField.get( field ).isAnalyzed() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED ); + } else if ( v instanceof Number ) { + NumericField nf = new NumericField( field ); + if ( v instanceof Integer ) { + nf.setIntValue( ( Integer ) v ); + } else if ( v instanceof Long ) { + nf.setLongValue( ( Long ) v ); + } else if ( v instanceof Float ) { + nf.setFloatValue( ( Float ) v ); + } else if ( v instanceof Double ) { + nf.setDoubleValue( ( Double ) v ); + } else { + log.warn( "Skipping numeric literal of unsupported type: {}", l ); + continue; + } + f = nf; + } else if ( v instanceof XSDDateTime ) { + f = new NumericField( field ) + .setLongValue( ( ( XSDDateTime ) v ).asCalendar().getTime().getTime() ); + } else if ( v instanceof Boolean ) { + f = new NumericField( field ).setIntValue( Boolean.TRUE.equals( v ) ? 1 : 0 ); + } else { + log.warn( "Skipping literal of unsupported type: {}", l ); + continue; + } + } else if ( s.getObject().isURIResource() ) { + // index the URI + f = new Field( field, s.getObject().asResource().getURI(), Field.Store.NO, Field.Index.NOT_ANALYZED ); + } else { + // could be a blank node + continue; + } + if ( isIndividual ) { + System.out.println( doc ); + } + doc.add( f ); + } + } + indexWriter.addDocument( doc ); + } + indexWriter.commit(); + log.debug( "Done indexing {} subjects of {} in {} s.", indexWriter.numDocs(), name, String.format( "%.2f", timer.getTime() / 1000.0 ) ); + } + return dir; + } - IndexBuilderSubject larqSubjectBuilder = new IndexBuilderSubject( indexWriter ); - StmtIterator listStatements = model.listStatements( new IndexerSelector() ); - larqSubjectBuilder.indexStatements( listStatements ); - indexWriter.commit(); - log.info( indexWriter.numDocs() + " Statements indexed..." ); - indexWriter.close(); + private static SearchIndex openIndex( Directory dir, Directory dirStd, Collection indexableProperties, Set excludedFromStemming ) throws IOException { + String[] searchableFields = Stream.concat( Stream.of( ID_FIELD, LOCAL_NAME_FIELD ), indexableProperties.stream().map( p -> p.property ).map( Resource::getURI ) ) + .distinct() + .toArray( String[]::new ); + return new LuceneSearchIndex( searchableFields, new MultiReader( IndexReader.open( dir ), IndexReader.open( dirStd ) ), new EnglishAnalyzer( Version.LUCENE_36, EnglishAnalyzer.getDefaultStopSet(), excludedFromStemming ) ); + } - Directory dirstd = indexStd( name, model ); + private static class LuceneSearchIndex implements SearchIndex { - MultiReader r = new MultiReader( IndexReader.open( dir ), IndexReader.open( dirstd ) ); + private static final Logger log = LoggerFactory.getLogger( LuceneSearchIndex.class ); - // workaround to get the EnglishAnalyzer. - SearchIndex index = new SearchIndex( r, new EnglishAnalyzer( Version.LUCENE_36 ) ); - // larqSubjectBuilder.getIndex(); // always returns a StandardAnalyazer - assert index.getLuceneQueryParser().getAnalyzer() instanceof EnglishAnalyzer; + private final String[] searchableFields; + private final IndexReader index; + private final Analyzer analyzer; - log.info( "Done indexing of " + name + " in " + String.format( "%.2f", timer.getTime() / 1000.0 ) + "s" ); + public LuceneSearchIndex( String[] searchableFields, IndexReader index, Analyzer analyzer ) { + this.searchableFields = searchableFields; + this.index = index; + this.analyzer = analyzer; + } - return index; - } + @Override + public ExtendedIterator search( OntModel model, String queryString, int maxResults ) throws OntologySearchException { + return search( model, queryString, null, maxResults ); + } - /** - * We need to also analyze using the Standard analyzer, which doesn't do stemming and allows wildcard. - */ - private static Directory indexStd( String name, OntModel model ) throws JenaException, IOException { + @Override + public ExtendedIterator searchClasses( OntModel model, String queryString, int maxResults ) throws OntologySearchException { + return search( model, queryString, NumericRangeFilter.newIntRange( IS_CLASS_FIELD, 1, 1, true, true ), maxResults ); + } - File file = getIndexPath( name + ".std" ); + @Override + public ExtendedIterator searchIndividuals( OntModel model, String queryString, int maxResults ) throws OntologySearchException { + return search( model, queryString, NumericRangeFilter.newIntRange( IS_INDIVIDUAL_FIELD, 1, 1, true, true ), maxResults ); + } - FSDirectory dir = FSDirectory.open( file ); - dir.getLockFactory().clearLock( dir.getLockID() ); - log.info( "Index to: " + file ); - Analyzer analyzer = new StandardAnalyzer( Version.LUCENE_36 ); - IndexWriterConfig config = new IndexWriterConfig( Version.LUCENE_36, analyzer ); - IndexWriter indexWriter = new IndexWriter( dir, config ); - indexWriter.deleteAll(); - IndexBuilderSubject larqSubjectBuilder = new IndexBuilderSubject( indexWriter ); - StmtIterator listStatements = model.listStatements( new IndexerSelector() ); - larqSubjectBuilder.indexStatements( listStatements ); - indexWriter.commit(); - log.info( indexWriter.numDocs() + " Statements indexed..." ); - indexWriter.close(); - return dir; + private ExtendedIterator search( OntModel model, String queryString, @Nullable Filter filter, int maxResults ) throws OntologySearchException { + if ( StringUtils.isBlank( queryString ) ) { + throw new IllegalArgumentException( "Query cannot be blank" ); + } + StopWatch timer = StopWatch.createStarted(); + try { + Query query = new MultiFieldQueryParser( Version.LUCENE_36, searchableFields, analyzer ).parse( queryString ); + // in general, results are found in both regular and std index, so we divide by 2 the initial capacity + // we also have to double the number of hits to account for duplicates + TopDocs hits = new IndexSearcher( index ).search( query, filter, maxResults * 2 ); + Set seenIds = new HashSet<>( hits.totalHits / 2 ); + List resources = new ArrayList<>( hits.totalHits / 2 ); + for ( int i = 0; i < hits.scoreDocs.length; i++ ) { + Document doc = index.document( hits.scoreDocs[i].doc ); + String id = doc.get( ID_FIELD ); + if ( seenIds.contains( id ) ) { + continue; + } + Resource res = model.getResource( id ); + resources.add( new JenaSearchResult( res, hits.scoreDocs[i].score ) ); + seenIds.add( id ); + if ( seenIds.size() >= maxResults ) { + break; + } + } + return WrappedIterator.create( resources.iterator() ); + } catch ( ParseException e ) { + throw new OntologySearchException( "Failed to parse search query.", queryString, e ); + } catch ( IOException e ) { + throw new OntologySearchException( "An I/O error occured while searching.", queryString, e ); + } finally { + timer.stop(); + if ( timer.getTime() > 100 ) { + log.warn( "Ontology resource search for: {} took {} ms.", queryString, timer.getTime() ); + } + } + } + + @Override + public void close() throws IOException { + index.close(); + } } + } diff --git a/src/ubic/basecode/ontology/jena/OntologyIndividualImpl.java b/src/ubic/basecode/ontology/jena/OntologyIndividualImpl.java index 61731377..e888875f 100644 --- a/src/ubic/basecode/ontology/jena/OntologyIndividualImpl.java +++ b/src/ubic/basecode/ontology/jena/OntologyIndividualImpl.java @@ -44,12 +44,6 @@ public OntologyIndividualImpl( Individual ind, Set additionalRestri this.additionalRestrictions = additionalRestrictions; } - public OntologyIndividualImpl( Individual ind, Set additionalRestrictions, double score ) { - super( ind, score ); - this.ind = ind; - this.additionalRestrictions = additionalRestrictions; - } - @Override public OntologyTerm getInstanceOf() { Resource type = ind.getRDFType(); diff --git a/src/ubic/basecode/ontology/jena/OntologyLoader.java b/src/ubic/basecode/ontology/jena/OntologyLoader.java index 38c27a7c..f31278b4 100644 --- a/src/ubic/basecode/ontology/jena/OntologyLoader.java +++ b/src/ubic/basecode/ontology/jena/OntologyLoader.java @@ -52,18 +52,18 @@ class OntologyLoader { private static final String OLD_CACHE_SUFFIX = ".old"; private static final String TMP_CACHE_SUFFIX = ".tmp"; - public static OntModel loadMemoryModel( InputStream is, String url ) throws JenaException { + public static OntModel loadMemoryModel( InputStream is, String url ) throws JenaException, IOException { return loadMemoryModel( is, url, true ); } /** * Load an ontology into memory. Use this type of model when fast access is critical and memory is available. */ - public static OntModel loadMemoryModel( InputStream is, String url, boolean processImports ) throws JenaException { + public static OntModel loadMemoryModel( InputStream is, String url, boolean processImports ) throws JenaException, IOException { return loadMemoryModel( is, url, processImports, OntModelSpec.OWL_MEM_TRANS_INF ); } - public static OntModel loadMemoryModel( InputStream is, String url, boolean processImports, OntModelSpec spec ) throws JenaException { + public static OntModel loadMemoryModel( InputStream is, String url, boolean processImports, OntModelSpec spec ) throws JenaException, IOException { OntModel model = getMemoryModel( url, processImports, spec ); model.read( is, null ); return model; @@ -107,7 +107,7 @@ public static OntModel loadMemoryModel( String url, @Nullable String cacheName, } catch ( ClosedByInterruptException e ) { throw e; } catch ( IOException e ) { - log.error( "Failed to load ontology model for " + url + ", will attempt to load from disk.", e ); + log.error( "Failed to load ontology model for {}, will attempt to load from disk.", url, e ); attemptToLoadFromDisk = true; } finally { if ( urlc instanceof HttpURLConnection ) { @@ -129,7 +129,7 @@ public static OntModel loadMemoryModel( String url, @Nullable String cacheName, // the ontology. FileUtils.createParentDirectories( oldFile ); Files.copy( f.toPath(), oldFile.toPath(), StandardCopyOption.REPLACE_EXISTING ); - log.info( "Load model from disk: " + timer.getTime() + "ms" ); + log.debug( "Load model from disk took {} ms", timer.getTime() ); } } else { throw new RuntimeException( @@ -138,7 +138,7 @@ public static OntModel loadMemoryModel( String url, @Nullable String cacheName, } else if ( tempFile.exists() ) { // Model was successfully loaded into memory from URL with given cacheName // Save cache to disk (rename temp file) - log.info( "Caching ontology to disk: " + cacheName + " under " + f.getAbsolutePath() ); + log.debug( "Caching ontology to disk: {} under {}", cacheName, f.getAbsolutePath() ); try { // Need to compare previous to current so instead of overwriting we'll move the old file if ( f.exists() ) { @@ -149,12 +149,12 @@ public static OntModel loadMemoryModel( String url, @Nullable String cacheName, } Files.move( tempFile.toPath(), f.toPath(), StandardCopyOption.REPLACE_EXISTING ); } catch ( IOException e ) { - log.error( "Failed to cache ontology " + url + " to disk.", e ); + log.error( "Failed to cache ontology {} to disk.", url, e ); } } } - log.info( "Loading ontology model for " + url + " took " + timer.getTime() + "ms" ); + log.debug( "Loading ontology model for {} took {} ms", url, timer.getTime() ); return model; } @@ -236,7 +236,7 @@ public static URLConnection openConnection( String url ) throws IOException { if ( StringUtils.isBlank( newUrl ) ) { throw new RuntimeException( String.format( "Redirect response for %s is lacking a 'Location' header.", url ) ); } - log.debug( "Redirect to " + newUrl + " from " + url ); + log.debug( "Redirect to {} from {}", newUrl, url ); urlc = openConnectionInternal( newUrl ); } } @@ -251,7 +251,7 @@ private static URLConnection openConnectionInternal( String url ) throws IOExcep if ( urlc instanceof HttpURLConnection ) { ( ( HttpURLConnection ) urlc ).setInstanceFollowRedirects( true ); } - log.debug( "Connecting to " + url ); + log.debug( "Connecting to {}", url ); urlc.connect(); // Will error here on bad URL return urlc; } diff --git a/src/ubic/basecode/ontology/jena/OntologySearch.java b/src/ubic/basecode/ontology/jena/OntologySearch.java deleted file mode 100644 index a51b9974..00000000 --- a/src/ubic/basecode/ontology/jena/OntologySearch.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * The basecode project - * - * Copyright (c) 2007-2019 University of British Columbia - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -package ubic.basecode.ontology.jena; - -import com.hp.hpl.jena.ontology.ConversionException; -import com.hp.hpl.jena.ontology.Individual; -import com.hp.hpl.jena.ontology.OntClass; -import com.hp.hpl.jena.ontology.OntModel; -import com.hp.hpl.jena.rdf.model.Model; -import com.hp.hpl.jena.rdf.model.RDFNode; -import com.hp.hpl.jena.rdf.model.Resource; -import com.hp.hpl.jena.shared.JenaException; -import com.hp.hpl.jena.util.iterator.ExtendedIterator; -import com.hp.hpl.jena.util.iterator.Map1Iterator; -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.time.StopWatch; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import ubic.basecode.ontology.search.OntologySearchException; - -import java.util.Objects; -import java.util.Optional; - -import static com.hp.hpl.jena.sparql.util.ModelUtils.convertGraphNodeToRDFNode; -import static ubic.basecode.ontology.jena.JenaUtils.where; - -/** - * @author pavlidis - */ -class OntologySearch { - - private static final Logger log = LoggerFactory.getLogger( OntologySearch.class ); - - /** - * Find classes that match the query string. - * - * @param model that goes with the index - * @param index to search - * @return Collection of OntologyTerm objects - */ - public static ExtendedIterator> matchClasses( OntModel model, SearchIndex index, String queryString ) throws OntologySearchException { - return runSearch( model, index, queryString ) - .filterKeep( where( r -> r.result.isURIResource() && r.result.canAs( OntClass.class ) ) ) - .mapWith( r -> r.as( OntClass.class ) ) - .filterKeep( where( Objects::nonNull ) ); - } - - /** - * Find individuals that match the query string - * - * @param model that goes with the index - * @param index to search - * @return Collection of OntologyTerm objects - */ - public static ExtendedIterator> matchIndividuals( OntModel model, SearchIndex index, String queryString ) throws OntologySearchException { - return runSearch( model, index, queryString ) - .filterKeep( where( r -> r.result.isURIResource() && r.result.canAs( Individual.class ) ) ) - .mapWith( r -> r.as( Individual.class ) ) - .filterKeep( where( Objects::nonNull ) ); - } - - /** - * Find OntologyIndividuals and OntologyTerms that match the query string. Search with a wildcard is attempted - * whenever possible. - * - * @param model that goes with the index - * @param index to search - * @return Collection of OntologyResource objects - */ - public static ExtendedIterator> matchResources( OntModel model, SearchIndex index, String queryString ) throws OntologySearchException { - return runSearch( model, index, queryString ) - .filterKeep( where( o -> o.result.isURIResource() && o.result.isResource() ) ) - .mapWith( r -> r.as( Resource.class ) ) - .filterKeep( where( Objects::nonNull ) ); - } - - private static ExtendedIterator> runSearch( Model model, SearchIndex index, String queryString ) throws OntologySearchJenaException { - if ( StringUtils.isBlank( queryString ) ) { - throw new IllegalArgumentException( "Query cannot be blank" ); - } - StopWatch timer = StopWatch.createStarted(); - try { - return new Map1Iterator<>( o -> new SearchResult<>( o.getLuceneDocId(), convertGraphNodeToRDFNode( o.getNode(), model ), o.getScore() ), index.search( queryString ) ); - } catch ( JenaException e ) { - throw new OntologySearchJenaException( "Failed to search with query.", queryString, e ); - } finally { - timer.stop(); - if ( timer.getTime() > 100 ) { - log.warn( "Ontology resource search for: {} took {} ms.", queryString, timer.getTime() ); - } - } - } - - public static class SearchResult { - public final int docId; - public final T result; - public final double score; - - private SearchResult( int docId, T result, double score ) { - this.docId = docId; - this.result = result; - this.score = score; - } - - @Override - public boolean equals( Object obj ) { - if ( obj instanceof SearchResult ) { - return Objects.equals( result, ( ( SearchResult ) obj ).result ); - } - return false; - } - - @Override - public int hashCode() { - return Objects.hash( result ); - } - - @Override - public String toString() { - return String.format( "%s [docId = %d, score = %f]", result, docId, score ); - } - - private SearchResult as( Class clazz ) { - try { - return new SearchResult<>( docId, result.as( clazz ), score ); - } catch ( ConversionException e ) { - log.warn( "Conversion of " + result + " to " + clazz.getName() + " failed.", e ); - return null; - } - } - } -} diff --git a/src/ubic/basecode/ontology/jena/OntologySearchJenaException.java b/src/ubic/basecode/ontology/jena/OntologySearchJenaException.java deleted file mode 100644 index bcb4cc71..00000000 --- a/src/ubic/basecode/ontology/jena/OntologySearchJenaException.java +++ /dev/null @@ -1,22 +0,0 @@ -package ubic.basecode.ontology.jena; - -import com.hp.hpl.jena.shared.JenaException; -import ubic.basecode.ontology.search.OntologySearchException; - -/** - * Base class for Jena-related ontology search exceptions. - */ -class OntologySearchJenaException extends OntologySearchException { - - private final JenaException cause; - - public OntologySearchJenaException( String message, String query, JenaException cause ) { - super( message, query, cause ); - this.cause = cause; - } - - @Override - public JenaException getCause() { - return cause; - } -} diff --git a/src/ubic/basecode/ontology/jena/OntologyTermImpl.java b/src/ubic/basecode/ontology/jena/OntologyTermImpl.java index 04e9edb5..fba48075 100644 --- a/src/ubic/basecode/ontology/jena/OntologyTermImpl.java +++ b/src/ubic/basecode/ontology/jena/OntologyTermImpl.java @@ -61,12 +61,6 @@ public OntologyTermImpl( OntClass resource, Set additionalRestricti this.additionalRestrictions = additionalRestrictions; } - public OntologyTermImpl( OntClass resource, Set additionalRestrictions, double score ) { - super( resource, score ); - this.ontResource = resource; - this.additionalRestrictions = additionalRestrictions; - } - @Override public Collection getAlternativeIds() { return getAnnotations( HAS_ALTERNATE_ID ).stream().map( AnnotationProperty::getContents ).collect( Collectors.toSet() ); diff --git a/src/ubic/basecode/ontology/jena/RO.java b/src/ubic/basecode/ontology/jena/RO.java index 86d5ec73..2c555e13 100644 --- a/src/ubic/basecode/ontology/jena/RO.java +++ b/src/ubic/basecode/ontology/jena/RO.java @@ -5,5 +5,33 @@ class RO { + private static final String NS = "http://purl.obolibrary.org/obo/"; + + private static Property property( String localName ) { + return ResourceFactory.createProperty( NS + localName ); + } + + public static final Property partOf = property( "BFO_0000050" ); + public static final Property activeIngredientIn = property( "RO_0002249" ); + public static final Property boundingLayerOf = property( "RO_0002007" ); + public static final Property branchingPartOf = property( "RO_0002380" ); + public static final Property determinedBy = property( "RO_0002507" ); + public static final Property ends = property( "RO_0002229" ); + public static final Property isSubsequenceOf = property( "RO_0002525" ); + public static final Property isEndSequenceOf = property( "RO_0002519" ); + public static final Property isStartSequenceOf = property( "RO_0002517" ); + public static final Property lumenOf = property( "RO_0002571" ); + public static final Property luminalSpaceOf = property( "RO_0002572" ); + public static final Property mainStemOf = property( "RO_0002381" ); + public static final Property memberOf = property( "RO_0002350" ); + public static final Property occurrentPartOf = property( "RO_0002012" ); + public static final Property skeletonOf = property( "RO_0002576" ); + public static final Property starts = property( "RO_0002223" ); + public static final Property subclusterOf = property( "RO_0015003" ); + + /** + * This term is still used in older ontologies. + */ + @Deprecated public static final Property properPartOf = ResourceFactory.createProperty( "http://www.obofoundry.org/ro/ro.owl#proper_part_of" ); } diff --git a/src/ubic/basecode/ontology/jena/RestrictionWithOnPropertyFilter.java b/src/ubic/basecode/ontology/jena/RestrictionWithOnPropertyFilter.java index cb5ed9c7..95b97d1b 100644 --- a/src/ubic/basecode/ontology/jena/RestrictionWithOnPropertyFilter.java +++ b/src/ubic/basecode/ontology/jena/RestrictionWithOnPropertyFilter.java @@ -10,9 +10,9 @@ * Filter that retain only the restrictions on any of the given properties. */ class RestrictionWithOnPropertyFilter extends Filter { - private final Set properties; + private final Set properties; - public RestrictionWithOnPropertyFilter( Set properties ) { + public RestrictionWithOnPropertyFilter( Set properties ) { this.properties = properties; } diff --git a/src/ubic/basecode/ontology/jena/SearchIndex.java b/src/ubic/basecode/ontology/jena/SearchIndex.java index 0157263d..7b6a3b73 100644 --- a/src/ubic/basecode/ontology/jena/SearchIndex.java +++ b/src/ubic/basecode/ontology/jena/SearchIndex.java @@ -1,74 +1,46 @@ -/* - * The baseCode project - * - * Copyright (c) 2013 University of British Columbia - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the - * specific language governing permissions and limitations under the License. - */ package ubic.basecode.ontology.jena; -import com.hp.hpl.jena.util.iterator.Map1Iterator; -import org.apache.jena.larq.ARQLuceneException; -import org.apache.jena.larq.HitLARQ; -import org.apache.jena.larq.IndexLARQ; -import org.apache.jena.larq.LARQ; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.MultiReader; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TopDocs; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Iterator; - -/** - * Customization to deal with MultiReader and 'open' while indexing is going on ...? Might not be needed. - * - * @author Paul - */ -class SearchIndex extends IndexLARQ { - - public SearchIndex( MultiReader r, Analyzer a ) { - super( r, a ); - } - - @Override - public Iterator search( String queryString ) { - try { - final IndexSearcher s = getIndexSearcher(); - Query query = getLuceneQueryParser().parse( queryString ); - - TopDocs topDocs = s.search( query, null, LARQ.NUM_RESULTS ); - - return new Map1Iterator<>( object -> new HitLARQ( s, object ), Arrays.asList( topDocs.scoreDocs ).iterator() ); - } catch ( Exception e ) { - throw new ARQLuceneException( "Error during search for '" + queryString + ";", e ); +import com.hp.hpl.jena.ontology.OntModel; +import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.util.iterator.ExtendedIterator; +import ubic.basecode.ontology.search.OntologySearchException; + +interface SearchIndex extends AutoCloseable { + + /** + * Find RDF nodes matching the given query string. + */ + ExtendedIterator search( OntModel model, String queryString, int maxResults ) throws OntologySearchException; + + /** + * Find classes that match the query string. + * + * @param model that goes with the index + * @return Collection of OntologyTerm objects + */ + ExtendedIterator searchClasses( OntModel model, String queryString, int maxResults ) throws OntologySearchException; + + /** + * Find individuals that match the query string + * + * @param model that goes with the index + * @return Collection of OntologyTerm objects + */ + ExtendedIterator searchIndividuals( OntModel model, String queryString, int maxResults ) throws OntologySearchException; + + class JenaSearchResult { + + public final Resource result; + public final double score; + + JenaSearchResult( Resource result, double score ) { + this.result = result; + this.score = score; } - } - - private synchronized IndexSearcher getIndexSearcher() throws IOException { - if ( !reader.isCurrent() ) { - // this is the problematic line ... multireader cannot be reopened; was IndexReader newReader = - // IndexReader.openIfChanged(reader, true) ; - IndexReader newReader = IndexReader.openIfChanged( reader ); - if ( newReader != null ) { - reader.close(); - reader = newReader; - searcher = new IndexSearcher( reader ); - } + @Override + public String toString() { + return String.format( "%s score=%f", result, score ); } - - return searcher; } - } diff --git a/src/ubic/basecode/ontology/jena/package-info.java b/src/ubic/basecode/ontology/jena/package-info.java index 5d1211e8..a10a5631 100644 --- a/src/ubic/basecode/ontology/jena/package-info.java +++ b/src/ubic/basecode/ontology/jena/package-info.java @@ -1,5 +1,5 @@ /** - * + * Implementation of {@link ubic.basecode.ontology.providers.OntologyService} using Apache Jena. */ @ParametersAreNonnullByDefault package ubic.basecode.ontology.jena; diff --git a/src/ubic/basecode/ontology/model/OntologyResource.java b/src/ubic/basecode/ontology/model/OntologyResource.java index 2284ebc0..f7fecbb1 100644 --- a/src/ubic/basecode/ontology/model/OntologyResource.java +++ b/src/ubic/basecode/ontology/model/OntologyResource.java @@ -23,7 +23,7 @@ /** * @author pavlidis */ -public interface OntologyResource extends Comparable { +public interface OntologyResource { /** * A URI if known, otherwise null. @@ -52,10 +52,4 @@ public interface OntologyResource extends Comparable { * Whether the resource is marked as obsolete. */ boolean isObsolete(); - - /** - * If this is result from a free-text search, a corresponding score, otherwise null. - */ - @Nullable - Double getScore(); } diff --git a/src/ubic/basecode/ontology/model/OntologyTermSimple.java b/src/ubic/basecode/ontology/model/OntologyTermSimple.java index d22bb703..5aadf4da 100644 --- a/src/ubic/basecode/ontology/model/OntologyTermSimple.java +++ b/src/ubic/basecode/ontology/model/OntologyTermSimple.java @@ -17,7 +17,6 @@ import javax.annotation.Nullable; import java.io.Serializable; import java.util.Collection; -import java.util.Comparator; import java.util.Objects; /** @@ -131,17 +130,6 @@ public boolean isObsolete() { return obsolete; } - @Nullable - @Override - public Double getScore() { - return null; - } - - @Override - public int compareTo( OntologyResource other ) { - return Objects.compare( getUri(), other.getUri(), Comparator.nullsLast( Comparator.naturalOrder() ) ); - } - @Override public boolean equals( Object obj ) { if ( this == obj ) return true; diff --git a/src/ubic/basecode/ontology/providers/AbstractBaseCodeOntologyService.java b/src/ubic/basecode/ontology/providers/AbstractBaseCodeOntologyService.java new file mode 100644 index 00000000..5c4b0c3b --- /dev/null +++ b/src/ubic/basecode/ontology/providers/AbstractBaseCodeOntologyService.java @@ -0,0 +1,48 @@ +package ubic.basecode.ontology.providers; + +import ubic.basecode.util.Configuration; + +import javax.annotation.Nullable; + +/** + * Base class for all ontologies built-in to the baseCode project. + *

+ * The ontologies that subclass this will honor settings in the {@code basecode.properties} file for loading and + * locating the ontology. + * + * @author poirigui + */ +public abstract class AbstractBaseCodeOntologyService extends AbstractOntologyService { + + private final String name; + private final String cacheName; + + /** + * Intentionally package-private constructor. + */ + AbstractBaseCodeOntologyService( String name, String cacheName ) { + this.name = name; + this.cacheName = cacheName; + } + + @Override + protected String getOntologyName() { + return name; + } + + @Override + protected String getOntologyUrl() { + return Configuration.getString( "url." + cacheName ); + } + + @Override + protected boolean isOntologyEnabled() { + return Boolean.TRUE.equals( Configuration.getBoolean( "load." + cacheName ) ); + } + + @Nullable + @Override + public String getCacheName() { + return cacheName; + } +} diff --git a/src/ubic/basecode/ontology/providers/AbstractOntologyService.java b/src/ubic/basecode/ontology/providers/AbstractOntologyService.java new file mode 100644 index 00000000..7549a348 --- /dev/null +++ b/src/ubic/basecode/ontology/providers/AbstractOntologyService.java @@ -0,0 +1,9 @@ +package ubic.basecode.ontology.providers; + +/** + * Base class for all ontology services. + *

+ * The actual implementation is provided by the {@code ubic.basecode.ontology.jena} package. + */ +public abstract class AbstractOntologyService extends ubic.basecode.ontology.jena.AbstractOntologyService { +} diff --git a/src/ubic/basecode/ontology/providers/CellLineOntologyService.java b/src/ubic/basecode/ontology/providers/CellLineOntologyService.java index 9c60ff24..e77be6a9 100644 --- a/src/ubic/basecode/ontology/providers/CellLineOntologyService.java +++ b/src/ubic/basecode/ontology/providers/CellLineOntologyService.java @@ -1,8 +1,8 @@ /* * The baseCode project - * + * * Copyright (c) 2010 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,26 +19,14 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** - * See http://www.obofoundry.org/cgi-bin/detail.cgi?id=CLO - * + * Cell Line Ontology + * * @author paul - * */ -public class CellLineOntologyService extends AbstractOntologyMemoryBackedService { - - private static final String ONTOLOGY_URL = "url.cellLineOntology"; - - @Override - protected String getOntologyName() { - return "cellLineOntology"; - } +public class CellLineOntologyService extends AbstractBaseCodeOntologyService { - @Override - protected String getOntologyUrl() { - return Configuration.getString( ONTOLOGY_URL ); + public CellLineOntologyService() { + super( "Cell Line Ontology", "cellLineOntology" ); } } diff --git a/src/ubic/basecode/ontology/providers/CellTypeOntologyService.java b/src/ubic/basecode/ontology/providers/CellTypeOntologyService.java index 6ebb09e8..bfbcf55b 100644 --- a/src/ubic/basecode/ontology/providers/CellTypeOntologyService.java +++ b/src/ubic/basecode/ontology/providers/CellTypeOntologyService.java @@ -1,8 +1,8 @@ /* * The baseCode project - * + * * Copyright (c) 2010 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,26 +19,14 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** - * See http://www.obofoundry.org/cgi-bin/detail.cgi?id=cell - * + * Cell Ontology + * * @author paul - * */ -public class CellTypeOntologyService extends AbstractOntologyMemoryBackedService { - - private static final String ONTOLOGY_URL = "url.cellTypeOntology"; - - @Override - protected String getOntologyName() { - return "cellTypeOntology"; - } +public class CellTypeOntologyService extends AbstractBaseCodeOntologyService { - @Override - protected String getOntologyUrl() { - return Configuration.getString( ONTOLOGY_URL ); + public CellTypeOntologyService() { + super( "Cell Ontology", "cellTypeOntology" ); } -} +} \ No newline at end of file diff --git a/src/ubic/basecode/ontology/providers/ChebiOntologyService.java b/src/ubic/basecode/ontology/providers/ChebiOntologyService.java index d6659c6f..59b87ee2 100644 --- a/src/ubic/basecode/ontology/providers/ChebiOntologyService.java +++ b/src/ubic/basecode/ontology/providers/ChebiOntologyService.java @@ -1,8 +1,8 @@ /* * The basecode project - * + * * Copyright (c) 2007-2019 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,27 +19,14 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** - * Loads the CHEBI Ontology at startup in its own thread. Controlled in build.properties by load.chebiOntology - * + * Chemical Entities of Biological Interest + * * @author klc - * */ -public class ChebiOntologyService extends AbstractOntologyMemoryBackedService { +public class ChebiOntologyService extends AbstractBaseCodeOntologyService { - private static final String CHEBI_ONTOLOGY_URL = "url.chebiOntology"; - - @Override - protected String getOntologyName() { - return "chebiOntology"; - } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( CHEBI_ONTOLOGY_URL ); + public ChebiOntologyService() { + super( "CHEBI", "chebiOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/DiseaseOntologyService.java b/src/ubic/basecode/ontology/providers/DiseaseOntologyService.java index 7ade3f5b..11ec6808 100644 --- a/src/ubic/basecode/ontology/providers/DiseaseOntologyService.java +++ b/src/ubic/basecode/ontology/providers/DiseaseOntologyService.java @@ -1,8 +1,8 @@ /* * The Gemma21 project - * + * * Copyright (c) 2007-2019 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -18,26 +18,16 @@ */ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** * Holds a copy of the Disease Ontology. - * + * * @author klc + * @deprecated use MONDO instead */ -public class DiseaseOntologyService extends AbstractOntologyMemoryBackedService { +@Deprecated +public class DiseaseOntologyService extends AbstractBaseCodeOntologyService { - private static final String DISEASE_ONTOLOGY_URL = "url.diseaseOntology"; - - @Override - protected String getOntologyName() { - return "diseaseOntology"; - } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( DISEASE_ONTOLOGY_URL ); + public DiseaseOntologyService() { + super( "Disease Ontology", "diseaseOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/ExperimentalFactorOntologyService.java b/src/ubic/basecode/ontology/providers/ExperimentalFactorOntologyService.java index cdde9564..3ebc37a0 100644 --- a/src/ubic/basecode/ontology/providers/ExperimentalFactorOntologyService.java +++ b/src/ubic/basecode/ontology/providers/ExperimentalFactorOntologyService.java @@ -1,38 +1,34 @@ /* * The baseCode project - * + * * Copyright (c) 2012 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. */ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; import ubic.basecode.util.Configuration; /** + * Experimental Factor Ontology + * * @author Paul - * */ -public class ExperimentalFactorOntologyService extends AbstractOntologyMemoryBackedService { +public class ExperimentalFactorOntologyService extends AbstractBaseCodeOntologyService { - private static final String EF_ONTOLOGY_URL = "url.efOntology"; - - @Override - protected String getOntologyName() { - return "experimentalFactorOntology"; + public ExperimentalFactorOntologyService() { + super( "Experimental Factor Ontology", "experimentalFactorOntology" ); } @Override protected String getOntologyUrl() { - return Configuration.getString( EF_ONTOLOGY_URL ); + return Configuration.getString( "url.efOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/FMAOntologyService.java b/src/ubic/basecode/ontology/providers/FMAOntologyService.java index 1f80f650..b12b43d8 100644 --- a/src/ubic/basecode/ontology/providers/FMAOntologyService.java +++ b/src/ubic/basecode/ontology/providers/FMAOntologyService.java @@ -1,8 +1,8 @@ /* * The Gemma21 project - * + * * Copyright (c) 2007-2019 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,27 +19,16 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** - * Holds a copy of the FMA Ontology on disk. This gets loaded on startup. - * + * Foundational Model of Anatomy Ontology (subset) + * * @author klc - * + * @deprecated this ontology is inactive, use UBERON instead */ -public class FMAOntologyService extends AbstractOntologyMemoryBackedService { +@Deprecated +public class FMAOntologyService extends AbstractBaseCodeOntologyService { - private static final String FMA_ONTOLOGY_URL = "url.fmaOntology"; - - @Override - protected String getOntologyName() { - return "fmaOntology"; - } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( FMA_ONTOLOGY_URL ); + public FMAOntologyService() { + super( "Foundational Model of Anatomy Ontology (subset)", "fmaOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/GenericOntologyService.java b/src/ubic/basecode/ontology/providers/GenericOntologyService.java index 2747c235..d83762ee 100644 --- a/src/ubic/basecode/ontology/providers/GenericOntologyService.java +++ b/src/ubic/basecode/ontology/providers/GenericOntologyService.java @@ -14,31 +14,47 @@ */ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; +import org.apache.commons.lang3.StringUtils; + +import javax.annotation.Nullable; /** - * A way to create ad hoc ontology services (in memory) for testing + * A way to create ad-hoc in-memory ontology services. * * @author Paul */ -public class GenericOntologyService extends AbstractOntologyMemoryBackedService { +public class GenericOntologyService extends AbstractOntologyService { private final String url; private final String name; - private final boolean cache; + @Nullable + private final String cacheName; + + public GenericOntologyService( String name, String url, @Nullable String cacheName ) { + this.name = name; + this.url = url; + this.cacheName = cacheName; + } public GenericOntologyService( String name, String url ) { - this( name, url, false ); + this( name, url, null ); } + /** + * @deprecated use {@link #GenericOntologyService(String, String, String)} with an explicit cache name instead + */ + @Deprecated public GenericOntologyService( String name, String url, boolean cache ) { - this( name, url, cache, true ); + this( name, url, cache ? StringUtils.deleteWhitespace( name ) : null ); } + /** + * @deprecated use {@link #GenericOntologyService(String, String, String)} with an explicit cache name instead and + * {@link #setProcessImports(boolean)} + */ + @Deprecated public GenericOntologyService( String name, String url, boolean cache, boolean processImports ) { - this.name = name; - this.url = url; - this.cache = cache; + this( name, url, cache ); setProcessImports( processImports ); } @@ -53,7 +69,13 @@ protected String getOntologyUrl() { } @Override + protected boolean isOntologyEnabled() { + return true; + } + + @Override + @Nullable protected String getCacheName() { - return this.cache ? this.name : null; + return cacheName; } } diff --git a/src/ubic/basecode/ontology/providers/HumanDevelopmentOntologyService.java b/src/ubic/basecode/ontology/providers/HumanDevelopmentOntologyService.java index 171a029f..8f69b197 100644 --- a/src/ubic/basecode/ontology/providers/HumanDevelopmentOntologyService.java +++ b/src/ubic/basecode/ontology/providers/HumanDevelopmentOntologyService.java @@ -1,8 +1,8 @@ /* * The baseCode project - * + * * Copyright (c) 2010 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,25 +19,16 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** + * Human developmental anatomy, abstract + * * @author paul - * + * @deprecated this ontology was last updated in unmaintained since 2013 */ -public class HumanDevelopmentOntologyService extends AbstractOntologyMemoryBackedService { +@Deprecated +public class HumanDevelopmentOntologyService extends AbstractBaseCodeOntologyService { - private static final String ONTOLOGY_URL = "url.humanDevelOntology"; - - @Override - protected String getOntologyName() { - return "humanDevelOntology"; - } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( ONTOLOGY_URL ); + public HumanDevelopmentOntologyService() { + super( "Human Development Ontology", "humanDevelOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/HumanPhenotypeOntologyService.java b/src/ubic/basecode/ontology/providers/HumanPhenotypeOntologyService.java index 2b7a201d..b8b24e65 100644 --- a/src/ubic/basecode/ontology/providers/HumanPhenotypeOntologyService.java +++ b/src/ubic/basecode/ontology/providers/HumanPhenotypeOntologyService.java @@ -1,8 +1,8 @@ /* * The baseCode project - * + * * Copyright (c) 2011 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,27 +19,14 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** - * See http://bioportal.bioontology.org/ontologies/45774?p=terms&conceptid=HP%3A0001264 for example. - * + * Human Phenotype Ontology + * * @author paul - * */ -public class HumanPhenotypeOntologyService extends AbstractOntologyMemoryBackedService { +public class HumanPhenotypeOntologyService extends AbstractBaseCodeOntologyService { - private static final String ONTOLOGY_URL = "url.humanPhenotypeOntology"; - - @Override - protected String getOntologyName() { - return "humanPhenotypeOntology"; - } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( ONTOLOGY_URL ); + public HumanPhenotypeOntologyService() { + super( "Human Phenotype Ontology", "humanPhenotypeOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/MammalianPhenotypeOntologyService.java b/src/ubic/basecode/ontology/providers/MammalianPhenotypeOntologyService.java index ffa43d00..9acb5d6f 100644 --- a/src/ubic/basecode/ontology/providers/MammalianPhenotypeOntologyService.java +++ b/src/ubic/basecode/ontology/providers/MammalianPhenotypeOntologyService.java @@ -1,8 +1,8 @@ /* * The basecode project - * + * * Copyright (c) 2007-2019 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,27 +19,14 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** + * Mammalian Phenotype Ontology + * * @author klc - * */ -public class MammalianPhenotypeOntologyService extends AbstractOntologyMemoryBackedService { - - private static final String ONTOLOGY_URL = "url.mammalPhenotypeOntology"; +public class MammalianPhenotypeOntologyService extends AbstractBaseCodeOntologyService { - @Override - protected String getOntologyName() { - - return "mammalPhenotypeOntology"; + public MammalianPhenotypeOntologyService() { + super( "Mammalian Phenotype Ontology", "mammalPhenotypeOntology" ); } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( ONTOLOGY_URL ); - - } - } diff --git a/src/ubic/basecode/ontology/providers/MedicOntologyService.java b/src/ubic/basecode/ontology/providers/MedicOntologyService.java index 96d56e49..8a4d9820 100644 --- a/src/ubic/basecode/ontology/providers/MedicOntologyService.java +++ b/src/ubic/basecode/ontology/providers/MedicOntologyService.java @@ -18,7 +18,6 @@ */ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; import ubic.basecode.ontology.model.OntologyModel; import java.io.IOException; @@ -29,21 +28,21 @@ * MEDIC ONTOLOGY USED BY PHENOCARTA, its represents MESH terms as a tree so with can use the parent structure that a * normal mesh term doesnt have *

- * MEDIC comes from the CTD folks. See http://ctd.mdibl.org/voc.go?type=disease. Unfortunately I do not know where our + * MEDIC comes from the CTD folks. See .... Unfortunately I do not know where our * medic.owl file came from (PP) * * @author Nicolas */ -public class MedicOntologyService extends AbstractOntologyMemoryBackedService { +@Deprecated +public class MedicOntologyService extends AbstractBaseCodeOntologyService { /** * FIXME this shouldn't be hard-coded like this, we should load it like any other ontology service. */ private static final String MEDIC_ONTOLOGY_FILE = "/data/loader/ontology/medic.owl.gz"; - @Override - protected String getOntologyName() { - return "medicOntology"; + public MedicOntologyService() { + super( "Medic Ontology", "medicOntology" ); } @Override diff --git a/src/ubic/basecode/ontology/providers/MouseDevelopmentOntologyService.java b/src/ubic/basecode/ontology/providers/MouseDevelopmentOntologyService.java index 2dcf1826..cbcc54f0 100644 --- a/src/ubic/basecode/ontology/providers/MouseDevelopmentOntologyService.java +++ b/src/ubic/basecode/ontology/providers/MouseDevelopmentOntologyService.java @@ -1,8 +1,8 @@ /* * The baseCode project - * + * * Copyright (c) 2010 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,25 +19,14 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** + * Mouse Developmental Anatomy Ontology + * * @author paul - * */ -public class MouseDevelopmentOntologyService extends AbstractOntologyMemoryBackedService { +public class MouseDevelopmentOntologyService extends AbstractBaseCodeOntologyService { - private static final String ONTOLOGY_URL = "url.mouseDevelOntology"; - - @Override - protected String getOntologyName() { - return "mouseDevelOntology"; - } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( ONTOLOGY_URL ); + public MouseDevelopmentOntologyService() { + super( "Mouse Development Anatomy Ontology", "mouseDevelOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/NIFSTDOntologyService.java b/src/ubic/basecode/ontology/providers/NIFSTDOntologyService.java index ad2ec55a..1b133b10 100644 --- a/src/ubic/basecode/ontology/providers/NIFSTDOntologyService.java +++ b/src/ubic/basecode/ontology/providers/NIFSTDOntologyService.java @@ -14,7 +14,6 @@ */ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; import ubic.basecode.ontology.model.OntologyModel; import java.io.IOException; @@ -24,19 +23,16 @@ /** * @author paul */ -public class NIFSTDOntologyService extends AbstractOntologyMemoryBackedService { +@Deprecated +public class NIFSTDOntologyService extends AbstractBaseCodeOntologyService { private static final String NIFSTD_ONTOLOGY_FILE = "/data/loader/ontology/nif-gemma.owl.gz"; public NIFSTDOntologyService() { + super( "NISFTD", "nisftdOntology" ); setProcessImports( false ); } - @Override - protected String getOntologyName() { - return "nifstdOntology"; - } - @Override protected String getOntologyUrl() { return "classpath:" + NIFSTD_ONTOLOGY_FILE; diff --git a/src/ubic/basecode/ontology/providers/ObiService.java b/src/ubic/basecode/ontology/providers/ObiService.java index f8e4ce9f..910b9ed4 100644 --- a/src/ubic/basecode/ontology/providers/ObiService.java +++ b/src/ubic/basecode/ontology/providers/ObiService.java @@ -1,8 +1,8 @@ /* * The baseCode project - * + * * Copyright (c) 2010 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,25 +19,14 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** + * Ontology for Biomedical Investigations + * * @author paul - * */ -public class ObiService extends AbstractOntologyMemoryBackedService { +public class ObiService extends AbstractBaseCodeOntologyService { - private static final String ONTOLOGY_URL = "url.obiOntology"; - - @Override - protected String getOntologyName() { - return "obiOntology"; - } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( ONTOLOGY_URL ); + public ObiService() { + super( "Ontology for Biomedical Investigations", "obiOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/OntologyService.java b/src/ubic/basecode/ontology/providers/OntologyService.java index fb8970ce..8cc3ccd1 100644 --- a/src/ubic/basecode/ontology/providers/OntologyService.java +++ b/src/ubic/basecode/ontology/providers/OntologyService.java @@ -4,6 +4,7 @@ import ubic.basecode.ontology.model.OntologyResource; import ubic.basecode.ontology.model.OntologyTerm; import ubic.basecode.ontology.search.OntologySearchException; +import ubic.basecode.ontology.search.OntologySearchResult; import javax.annotation.Nullable; import java.io.InputStream; @@ -116,9 +117,9 @@ enum InferenceMode { *

* Search is enabled by default. * - * @see #findTerm(String, boolean) - * @see #findIndividuals(String, boolean) - * @see #findResources(String, boolean) + * @see #findTerm(String, int, boolean) + * @see #findIndividuals(String, int, boolean) + * @see #findResources(String, int, boolean) */ boolean isSearchEnabled(); @@ -129,6 +130,19 @@ enum InferenceMode { */ void setSearchEnabled( boolean searchEnabled ); + /** + * Obtain the words that should be excluded from stemming. + *

+ * By default, all words are subject to stemming. The exact implementation of stemming depends on the actual search + * implementation. + */ + Set getExcludedWordsFromStemming(); + + /** + * Set words that should be excluded from stemming when searching. + */ + void setExcludedWordsFromStemming( Set excludedWordsFromStemming ); + /** * Obtain the URIs used as additional properties when inferring parents and children. *

@@ -169,8 +183,8 @@ enum InferenceMode { *

* Obsolete terms are filtered out. */ - default Collection findIndividuals( String search ) throws OntologySearchException { - return findIndividuals( search, false ); + default Collection> findIndividuals( String search, int maxResults ) throws OntologySearchException { + return findIndividuals( search, maxResults, false ); } /** @@ -179,7 +193,7 @@ default Collection findIndividuals( String search ) throws O * @param search search query * @param keepObsoletes retain obsolete terms */ - Collection findIndividuals( String search, boolean keepObsoletes ) throws OntologySearchException; + Set> findIndividuals( String search, int maxResults, boolean keepObsoletes ) throws OntologySearchException; /** * Looks for any resources (terms or individuals) that match the given search string @@ -189,8 +203,8 @@ default Collection findIndividuals( String search ) throws O * @return results, or an empty collection if the results are empty OR the ontology is not available to be * searched. */ - default Collection findResources( String searchString ) throws OntologySearchException { - return findResources( searchString, false ); + default Collection> findResources( String searchString, int maxResults ) throws OntologySearchException { + return findResources( searchString, maxResults, false ); } /** @@ -199,15 +213,15 @@ default Collection findResources( String searchString ) throws * @param search search query * @param keepObsoletes retain obsolete terms */ - Collection findResources( String search, boolean keepObsoletes ) throws OntologySearchException; + Collection> findResources( String search, int maxResults, boolean keepObsoletes ) throws OntologySearchException; /** * Looks for any terms that match the given search string. *

* Obsolete terms are filtered out. */ - default Collection findTerm( String search ) throws OntologySearchException { - return findTerm( search, false ); + default Collection> findTerm( String search, int maxResults ) throws OntologySearchException { + return findTerm( search, maxResults, false ); } @@ -217,7 +231,7 @@ default Collection findTerm( String search ) throws OntologySearch * @param search search query * @param keepObsoletes retain obsolete terms */ - Collection findTerm( String search, boolean keepObsoletes ) throws OntologySearchException; + Collection> findTerm( String search, int maxResults, boolean keepObsoletes ) throws OntologySearchException; /** * Find a term using an alternative ID. @@ -288,7 +302,6 @@ default Set getChildren( Collection terms, boolean d */ Set getChildren( Collection terms, boolean direct, boolean includeAdditionalProperties, boolean keepObsoletes ); - /** * Check if this ontology is enabled. */ diff --git a/src/ubic/basecode/ontology/providers/SequenceOntologyService.java b/src/ubic/basecode/ontology/providers/SequenceOntologyService.java index 6aa71d33..55b707b1 100644 --- a/src/ubic/basecode/ontology/providers/SequenceOntologyService.java +++ b/src/ubic/basecode/ontology/providers/SequenceOntologyService.java @@ -1,8 +1,8 @@ /* * The baseCode project - * + * * Copyright (c) 2013 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,27 +19,14 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** - * Support for the Sequence Ontology - * + * Sequence types and features ontology + * * @author Paul - * */ -public class SequenceOntologyService extends AbstractOntologyMemoryBackedService { +public class SequenceOntologyService extends AbstractBaseCodeOntologyService { - private static final String SO_ONTOLOGY_URL = "url.seqOntology"; - - @Override - protected String getOntologyName() { - return "seqOntology"; - } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( SO_ONTOLOGY_URL ); + public SequenceOntologyService() { + super( "Sequence types and features ontology", "seqOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/UberonOntologyService.java b/src/ubic/basecode/ontology/providers/UberonOntologyService.java index 51967a98..d9e2c6e1 100644 --- a/src/ubic/basecode/ontology/providers/UberonOntologyService.java +++ b/src/ubic/basecode/ontology/providers/UberonOntologyService.java @@ -1,8 +1,8 @@ /* * The baseCode project - * + * * Copyright (c) 2015 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,27 +19,14 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** - * Uberon - * + * Uberon multi-species anatomy ontology + * * @author paul - * */ -public class UberonOntologyService extends AbstractOntologyMemoryBackedService { +public class UberonOntologyService extends AbstractBaseCodeOntologyService { - private static final String ONTOLOGY_URL = "url.uberonOntology"; - - @Override - protected String getOntologyName() { - return "uberonOntology"; - } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( ONTOLOGY_URL ); + public UberonOntologyService() { + super( "Uberon multi-species anatomy ontology", "uberonOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/UnitsOntologyService.java b/src/ubic/basecode/ontology/providers/UnitsOntologyService.java index ed1df01c..1670b011 100644 --- a/src/ubic/basecode/ontology/providers/UnitsOntologyService.java +++ b/src/ubic/basecode/ontology/providers/UnitsOntologyService.java @@ -1,8 +1,8 @@ /* * The baseCode project - * + * * Copyright (c) 2013 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,27 +19,14 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** - * Support for the units ontology - * + * Units of measurement ontology + * * @author Paul - * */ -public class UnitsOntologyService extends AbstractOntologyMemoryBackedService { +public class UnitsOntologyService extends AbstractBaseCodeOntologyService { - private static final String UNITs_ONTOLOGY_URL = "url.unitsOntology"; - - @Override - protected String getOntologyName() { - return "unitsOntology"; - } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( UNITs_ONTOLOGY_URL ); + public UnitsOntologyService() { + super( "Units of measurement ontology", "unitsOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/package-info.java b/src/ubic/basecode/ontology/providers/package-info.java index 46d274b3..c3735344 100644 --- a/src/ubic/basecode/ontology/providers/package-info.java +++ b/src/ubic/basecode/ontology/providers/package-info.java @@ -1,5 +1,11 @@ /** + * This package contains baseCode built-in ontologies and a {@link ubic.basecode.ontology.providers.GenericOntologyService} + * to implement your own ontologies. * + * @author klc + * @author nicolas + * @author paul + * @author poirigui */ @ParametersAreNonnullByDefault package ubic.basecode.ontology.providers; diff --git a/src/ubic/basecode/ontology/search/OntologySearch.java b/src/ubic/basecode/ontology/search/OntologySearch.java deleted file mode 100644 index bf16d97f..00000000 --- a/src/ubic/basecode/ontology/search/OntologySearch.java +++ /dev/null @@ -1,32 +0,0 @@ -package ubic.basecode.ontology.search; - -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.text.StringEscapeUtils; - -public class OntologySearch { - - // Lucene cannot properly parse these characters... gives a query parse error. - // OntologyTerms don't contain them anyway - private final static char[] INVALID_CHARS = {':', '(', ')', '?', '^', '[', ']', '{', '}', '!', '~', '"', '\''}; - - /** - * Will remove characters that jena is unable to parse. Will also escape and remove leading and trailing white space - * (which also causes jena to die) - * - * @param toStrip the string to clean - * @return - */ - public static String stripInvalidCharacters( String toStrip ) { - String result = StringUtils.strip( toStrip ); - for ( char badChar : INVALID_CHARS ) { - result = StringUtils.remove( result, badChar ); - } - /* - * Queries cannot start with '*' or ? - */ - result = result.replaceAll( "^\\**", "" ); - result = result.replaceAll( "^\\?*", "" ); - - return StringEscapeUtils.escapeJava( result ).trim(); - } -} diff --git a/src/ubic/basecode/ontology/search/OntologySearchResult.java b/src/ubic/basecode/ontology/search/OntologySearchResult.java new file mode 100644 index 00000000..f50af3e5 --- /dev/null +++ b/src/ubic/basecode/ontology/search/OntologySearchResult.java @@ -0,0 +1,51 @@ +package ubic.basecode.ontology.search; + +import ubic.basecode.ontology.model.OntologyResource; + +import java.util.Comparator; +import java.util.Objects; + +/** + * Represents a search result from an ontology. + * + * @author poirigui + */ +public class OntologySearchResult implements Comparator> { + + private final T result; + private final double score; + + public OntologySearchResult( T result, double score ) { + this.result = result; + this.score = score; + } + + public T getResult() { + return result; + } + + public double getScore() { + return score; + } + + @Override + public int hashCode() { + return Objects.hash( result ); + } + + @Override + public boolean equals( Object obj ) { + if ( this == obj ) { + return true; + } + if ( !( obj instanceof OntologySearchResult ) ) { + return false; + } + return Objects.equals( result, ( ( OntologySearchResult ) obj ).result ); + } + + @Override + public int compare( OntologySearchResult searchResult, OntologySearchResult t1 ) { + return Double.compare( searchResult.score, t1.score ); + } +} diff --git a/src/ubic/basecode/util/Configuration.java b/src/ubic/basecode/util/Configuration.java index 3f5c030b..19641797 100644 --- a/src/ubic/basecode/util/Configuration.java +++ b/src/ubic/basecode/util/Configuration.java @@ -18,112 +18,95 @@ */ package ubic.basecode.util; -import java.util.Iterator; - -import org.apache.commons.configuration2.CompositeConfiguration; -import org.apache.commons.configuration2.PropertiesConfiguration; -import org.apache.commons.configuration2.SystemConfiguration; -import org.apache.commons.configuration2.ex.ConfigurationException; -import org.apache.commons.configuration2.io.FileHandler; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.annotation.Nullable; +import java.io.IOException; +import java.io.InputStream; +import java.util.Properties; + /** * Configuration of ontology services and other things. + *

+ * Configurations are retrieved from three locations: properties set at runtime with {@link #setString(String, String)}, + * system properties and a default properties file named {@code basecode.properties} at the root of the classpath in + * that order. + *

+ * Properties set via system properties must be prefixed with {@code basecode.} to be considered. + *

+ * Properties set at runtime can be reset with {@link #reset()} and {@link #reset(String)}. * * @author paul - * */ public class Configuration { - private static CompositeConfiguration config; - - /** - * Name of the resource containing defaults - */ - private static final String DEFAULT_CONFIGURATION = "ontology.properties"; - - private static Logger log = LoggerFactory.getLogger( Configuration.class ); + private static final Logger log = LoggerFactory.getLogger( Configuration.class ); - /** - * The name of the file users can use to customize. - */ - private static final String USER_CONFIGURATION = "basecode.properties"; + private static final String SYSTEM_PROPERTY_PREFIX = "basecode."; + private static final Properties defaultProps = new Properties(); + private static final Properties props = new Properties(); static { - - config = new CompositeConfiguration(); - config.addConfiguration( new SystemConfiguration() ); - - /* - * the order matters - first come, first serve. Items added later do not overwrite items defined earlier. Thus - * the user configuration has to be listed first. - */ - - try { - // purely for backwards compatibility, if the user hasn't set up ontology.properties. - PropertiesConfiguration pc = new PropertiesConfiguration(); - FileHandler handler = new FileHandler( pc ); - handler.setFileName( "Gemma.properties" ); - handler.load(); - config.addConfiguration( pc ); - } catch ( ConfigurationException e ) { + try ( InputStream is = Configuration.class.getResourceAsStream( "/basecode.properties" ) ) { + if ( is != null ) { + defaultProps.load( is ); + } else { + log.warn( "No basecode.properties was found in the classpath, only system and manually set properties will be considered." ); + } + } catch ( IOException e ) { + throw new RuntimeException( e ); } + } - try { - PropertiesConfiguration pc = new PropertiesConfiguration(); - FileHandler handler = new FileHandler( pc ); - handler.setFileName( USER_CONFIGURATION ); - handler.load(); - config.addConfiguration( pc ); - } catch ( ConfigurationException e ) { + /** + * Obtain a configuration value by key. + */ + @Nullable + public static String getString( String key ) { + String val = props.getProperty( key ); + if ( val == null ) { + val = System.getProperty( SYSTEM_PROPERTY_PREFIX + key ); } - - try { - PropertiesConfiguration pc = new PropertiesConfiguration(); - FileHandler handler = new FileHandler( pc ); - handler.setFileName( DEFAULT_CONFIGURATION ); - handler.load(); - config.addConfiguration( pc ); - } catch ( ConfigurationException e ) { - log.error( DEFAULT_CONFIGURATION + " is missing, ontology loading may fail" ); + if ( val == null ) { + val = defaultProps.getProperty( key ); } + return val; + } - // step through the result and do a final round of variable substitution - for ( Iterator it = config.getKeys(); it.hasNext(); ) { - String key = it.next(); - String property = config.getString( key ); - if ( property != null && property.startsWith( "${" ) && property.endsWith( "}" ) ) { - String keyToSubstitute = property.substring( 2, property.length() - 1 ); - String valueToSubstitute = config.getString( keyToSubstitute ); - log.debug( key + "=" + property + " -> " + valueToSubstitute ); - config.setProperty( key, valueToSubstitute ); - } + /** + * Obtain a boolean configuration value by key. + * + * @see Boolean#parseBoolean(String) + */ + @Nullable + public static Boolean getBoolean( String key ) { + String val = getString( key ); + if ( val != null ) { + return Boolean.parseBoolean( val ); + } else { + return null; } - } /** - * @param key - * @return + * Set a configuration by key. */ - public static boolean getBoolean( String key ) { - return config.getBoolean( key, false ); + public static void setString( String key, String value ) { + props.setProperty( key, value ); } /** - * @param key - * @return + * Reset all configurations set at runtime. */ - public static String getString( String key ) { - return config.getString( key ); + public static void reset() { + props.clear(); } /** - * @param key - * @return + * Reset a specific configuration by key. */ - public static void setString( String key, Object value ) { - config.setProperty( key, value ); + public static void reset( String key ) { + props.remove( key ); } } diff --git a/src/ubic/basecode/util/r/AbstractRClient.java b/src/ubic/basecode/util/r/AbstractRClient.java index 65efc7b1..645659d9 100644 --- a/src/ubic/basecode/util/r/AbstractRClient.java +++ b/src/ubic/basecode/util/r/AbstractRClient.java @@ -622,7 +622,7 @@ public List listEval( Class listEntryType, String command ) { public boolean loadLibrary( String libraryName ) { try { - String userLibPath = Configuration.getString( "basecode.rlibpath" ); + String userLibPath = Configuration.getString( "rlibpath" ); if ( StringUtils.isNotBlank( userLibPath ) ) { voidEval( ".libPaths(" + userLibPath + ")" ); } diff --git a/src/ubic/basecode/util/r/RServeClient.java b/src/ubic/basecode/util/r/RServeClient.java index 0f823515..7fe5f4d9 100644 --- a/src/ubic/basecode/util/r/RServeClient.java +++ b/src/ubic/basecode/util/r/RServeClient.java @@ -18,15 +18,6 @@ */ package ubic.basecode.util.r; -import java.io.File; -import java.io.IOException; -import java.net.URL; -import java.util.Iterator; -import java.util.List; - -import org.apache.commons.configuration2.PropertiesConfiguration; -import org.apache.commons.configuration2.ex.ConfigurationException; -import org.apache.commons.configuration2.io.FileHandler; import org.apache.commons.lang3.StringUtils; import org.rosuda.REngine.REXP; import org.rosuda.REngine.REXPMismatchException; @@ -34,10 +25,14 @@ import org.rosuda.REngine.RList; import org.rosuda.REngine.Rserve.RConnection; import org.rosuda.REngine.Rserve.RserveException; - import ubic.basecode.dataStructure.matrix.DenseDoubleMatrix; import ubic.basecode.dataStructure.matrix.DoubleMatrix; -import ubic.basecode.util.ConfigUtils; +import ubic.basecode.util.Configuration; + +import java.io.File; +import java.io.IOException; +import java.util.Iterator; +import java.util.List; /** * @author pavlidis @@ -56,24 +51,8 @@ public class RServeClient extends AbstractRClient { private final static String os = System.getProperty( "os.name" ).toLowerCase(); - /** - * @return - * @throws ConfigurationException - */ - protected static String findRserveCommand() throws ConfigurationException { - URL userSpecificConfigFileLocation = ConfigUtils.locate( "local.properties" ); - - PropertiesConfiguration userConfig = null; - if ( userSpecificConfigFileLocation != null ) { - userConfig = new PropertiesConfiguration(); - FileHandler handler = new FileHandler( userConfig ); - handler.setFileName( "local.properties" ); - handler.load(); - } - String rserveExecutable = null; - if ( userConfig != null ) { - rserveExecutable = userConfig.getString( "rserve.start.command" ); - } + protected static String findRserveCommand() { + String rserveExecutable = Configuration.getString( "rserve.start.command" ); if ( StringUtils.isBlank( rserveExecutable ) ) { log.info( "Rserve command not configured? Trying fallbacks" ); if ( os.startsWith( "windows" ) ) { // lower cased diff --git a/test/ubic/basecode/ontology/AbstractOntologyTest.java b/test/ubic/basecode/ontology/AbstractOntologyTest.java index b9eb42c4..8face625 100644 --- a/test/ubic/basecode/ontology/AbstractOntologyTest.java +++ b/test/ubic/basecode/ontology/AbstractOntologyTest.java @@ -17,12 +17,9 @@ public class AbstractOntologyTest { protected static Path tempDir; - private static String prevCacheDir, prevIndexDir; @BeforeClass public static void setUpOntologyCacheDir() throws IOException { - prevCacheDir = Configuration.getString( "ontology.cache.dir" ); - prevIndexDir = Configuration.getString( "ontology.index.dir" ); tempDir = Files.createTempDirectory( "baseCode" ); Configuration.setString( "ontology.cache.dir", tempDir.resolve( "ontologyCache" ).toAbsolutePath().toString() ); Configuration.setString( "ontology.index.dir", tempDir.resolve( "searchIndices" ).toAbsolutePath().toString() ); @@ -33,8 +30,8 @@ public static void clearOntologyCacheDir() throws IOException { try { PathUtils.deleteDirectory( tempDir ); } finally { - Configuration.setString( "ontology.cache.dir", prevCacheDir ); - Configuration.setString( "ontology.index.dir", prevIndexDir ); + Configuration.reset( "ontology.cache.dir" ); + Configuration.reset( "ontology.index.dir" ); } } } diff --git a/test/ubic/basecode/ontology/jena/OntologyLoaderTest.java b/test/ubic/basecode/ontology/jena/OntologyLoaderTest.java index 7430ce0c..5260bdbf 100644 --- a/test/ubic/basecode/ontology/jena/OntologyLoaderTest.java +++ b/test/ubic/basecode/ontology/jena/OntologyLoaderTest.java @@ -19,6 +19,7 @@ import ubic.basecode.ontology.AbstractOntologyTest; import ubic.basecode.ontology.model.OntologyTerm; import ubic.basecode.ontology.providers.GenericOntologyService; +import ubic.basecode.ontology.search.OntologySearchResult; import java.io.File; import java.io.InputStream; @@ -62,7 +63,7 @@ public void testCacheOntologyToDisk() throws Exception { assertFalse( OntologyLoader.getTmpDiskCachePath( name ).exists() ); assertFalse( OntologyLoader.getOldDiskCachePath( name ).exists() ); - Collection r = s.findTerm( "Mouse" ); + Collection> r = s.findTerm( "Mouse", 500 ); assertFalse( r.isEmpty() ); // Recreate OntologyService using this cache file @@ -73,7 +74,7 @@ public void testCacheOntologyToDisk() throws Exception { assertFalse( OntologyLoader.getTmpDiskCachePath( name ).exists() ); assertFalse( OntologyLoader.getOldDiskCachePath( name ).exists() ); - r = s.findTerm( "Mouse" ); + r = s.findTerm( "Mouse", 500 ); assertFalse( r.isEmpty() ); // Recreate OntologyService with bad URL and no cache diff --git a/test/ubic/basecode/ontology/jena/OntologySearchTest.java b/test/ubic/basecode/ontology/jena/OntologySearchTest.java index 20bca5da..ed0d0631 100644 --- a/test/ubic/basecode/ontology/jena/OntologySearchTest.java +++ b/test/ubic/basecode/ontology/jena/OntologySearchTest.java @@ -16,20 +16,21 @@ import com.hp.hpl.jena.ontology.OntClass; import com.hp.hpl.jena.ontology.OntModel; -import com.hp.hpl.jena.shared.JenaException; import com.hp.hpl.jena.vocabulary.OWL2; +import com.hp.hpl.jena.vocabulary.RDFS; +import org.assertj.core.api.Assertions; import org.junit.Test; import ubic.basecode.ontology.AbstractOntologyTest; import java.io.InputStream; import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; import java.util.Set; import java.util.zip.GZIPInputStream; import static java.util.Objects.requireNonNull; import static org.junit.Assert.*; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.*; /** * Most of these tests were moved over from Gemma. @@ -43,17 +44,45 @@ public final void testIndexing() throws Exception { InputStream is = new GZIPInputStream( requireNonNull( this.getClass().getResourceAsStream( "/data/mged.owl.gz" ) ) ); OntModel model = OntologyLoader.loadMemoryModel( is, "owl-test", false ); - SearchIndex index = OntologyIndexer.indexOntology( "MGEDTEST", model, true ); + HashSet indexableProperties = new HashSet<>( OntologyIndexer.DEFAULT_INDEXABLE_PROPERTIES ); + indexableProperties.add( new OntologyIndexer.IndexableProperty( RDFS.comment, true ) ); + SearchIndex index = OntologyIndexer.indexOntology( "MGEDTEST", model, indexableProperties, Collections.emptySet(), true ); - Collection> name = OntologySearch.matchClasses( model, index, "Bedding" ).toSet(); + Collection name = index.searchClasses( model, "Bedding", 500 ).toSet(); - assertEquals( 2, name.size() ); + assertEquals( 1, name.size() ); index.close(); - index = OntologyIndexer.indexOntology( "MGEDTEST", model, true ); - name = OntologySearch.matchClasses( model, index, "Bedding" ).toSet(); + index = OntologyIndexer.indexOntology( "MGEDTEST", model, indexableProperties, Collections.emptySet(), true ); + name = index.searchClasses( model, "Bedding", 500 ).toSet(); + + assertEquals( 1, name.size() ); + index.close(); + } + + @Test + public void testStemming() throws Exception { + InputStream is = new GZIPInputStream( requireNonNull( this.getClass().getResourceAsStream( "/data/mged.owl.gz" ) ) ); + OntModel model = OntologyLoader.loadMemoryModel( is, "owl-test", false ); + HashSet indexableProperties = new HashSet<>( OntologyIndexer.DEFAULT_INDEXABLE_PROPERTIES ); + indexableProperties.add( new OntologyIndexer.IndexableProperty( RDFS.comment, true ) ); + SearchIndex index = OntologyIndexer.indexOntology( "MGEDTEST", model, indexableProperties, Collections.emptySet(), true ); + + // bedding is stemmed to bed + Set results = index.searchClasses( model, "bed", 500 ).toSet(); + Assertions.assertThat( results ).extracting( sr -> sr.result.as( OntClass.class ).getURI() ) + .containsExactly( "http://mged.sourceforge.net/ontologies/MGEDOntology.owl#Bedding" ); + + // plural query + results = index.searchClasses( model, "beddings", 500 ).toSet(); + Assertions.assertThat( results ).extracting( sr -> sr.result.as( OntClass.class ).getURI() ) + .containsExactly( "http://mged.sourceforge.net/ontologies/MGEDOntology.owl#Bedding" ); + + // plural query + results = index.searchClasses( model, "beds", 500 ).toSet(); + Assertions.assertThat( results ).extracting( sr -> sr.result.as( OntClass.class ).getURI() ) + .containsExactly( "http://mged.sourceforge.net/ontologies/MGEDOntology.owl#Bedding" ); - assertEquals( 2, name.size() ); index.close(); } @@ -69,22 +98,24 @@ public final void testOmitBadPredicates() throws Exception { model = OntologyLoader.loadMemoryModel( is, "NIFTEST" ); } - SearchIndex index = OntologyIndexer.indexOntology( "NIFTEST", model, true ); + HashSet indexableProperties = new HashSet<>( OntologyIndexer.DEFAULT_INDEXABLE_PROPERTIES ); + indexableProperties.add( new OntologyIndexer.IndexableProperty( RDFS.comment, true ) ); + SearchIndex index = OntologyIndexer.indexOntology( "NIFTEST", model, indexableProperties, Collections.emptySet(), true ); - Collection> name = OntologySearch.matchClasses( model, index, "Organ" ).toSet(); + Collection name = index.searchClasses( model, "Organ", 500 ).toSet(); // for ( OntClass ontologyTerm : name ) { // log.debug( ontologyTerm.toString() ); // } // should get : Organ, Human Tissue and Organ Resource for Research, United Network for Organ Sharing assertEquals( 3, name.size() ); - name = OntologySearch.matchClasses( model, index, "Anatomical entity" ).toSet(); + name = index.searchClasses( model, "Anatomical entity", 500 ).toSet(); // for ( OntClass ontologyTerm : name ) { // log.debug( ontologyTerm.toString() ); // } assertEquals( 1, name.size() ); - name = OntologySearch.matchClasses( model, index, "liver" ).toSet(); // this is an "example" that we want to avoid + name = index.searchClasses( model, "liver", 500 ).toSet(); // this is an "example" that we want to avoid // leading to "Organ". // for ( OntClass ontologyTerm : name ) { @@ -106,16 +137,16 @@ public final void testOmitBadPredicates2() throws Exception { model = OntologyLoader.loadMemoryModel( is, "EFTEST" ); } - SearchIndex index = OntologyIndexer.indexOntology( "EFTEST", model, true ); + SearchIndex index = OntologyIndexer.indexOntology( "EFTEST", model, Collections.emptySet(), true ); // positive control - Collection> searchResults = OntologySearch.matchClasses( model, index, "monocyte" ).toSet(); + Collection searchResults = index.searchClasses( model, "monocyte", 500 ).toSet(); assertFalse( "Should have found something for 'monocyte'", searchResults.isEmpty() ); assertEquals( 1, searchResults.size() ); // this is a "definition" that we want to avoid leading to "Monocyte". - searchResults = OntologySearch.matchClasses( model, index, "liver" ).toSet(); - for ( OntologySearch.SearchResult ontologyTerm : searchResults ) { + searchResults = index.searchClasses( model, "liver", 500 ).toSet(); + for ( SearchIndex.JenaSearchResult ontologyTerm : searchResults ) { fail( "Should not have found " + ontologyTerm.toString() ); } @@ -130,15 +161,15 @@ public final void testOmitDefinitions() throws Exception { model = OntologyLoader.loadMemoryModel( is, "DO_TEST" ); } - SearchIndex index = OntologyIndexer.indexOntology( "DO_TEST", model, true ); + SearchIndex index = OntologyIndexer.indexOntology( "DO_TEST", model, Collections.emptySet(), true ); // positive control - Set> searchResults = OntologySearch.matchClasses( model, index, "acute leukemia" ).toSet(); + Set searchResults = index.searchClasses( model, "acute leukemia", 500 ).toSet(); assertFalse( "Should have found something for 'acute leukemia'", searchResults.isEmpty() ); // this is a "definition" that we want to avoid leading to "acute leukemia". - searchResults = OntologySearch.matchClasses( model, index, "liver" ).toSet(); - for ( OntologySearch.SearchResult ontologyTerm : searchResults ) { + searchResults = index.searchClasses( model, "liver", 500 ).toSet(); + for ( SearchIndex.JenaSearchResult ontologyTerm : searchResults ) { fail( "Should not have found " + ontologyTerm.toString() ); } @@ -153,33 +184,36 @@ public final void testOmitDefinitions2() throws Exception { model = OntologyLoader.loadMemoryModel( is, "NIFORG_TEST", false ); } - SearchIndex index = OntologyIndexer.indexOntology( "NIFORG_TEST", model, true ); + HashSet indexableProperties = new HashSet<>( OntologyIndexer.DEFAULT_INDEXABLE_PROPERTIES ); + indexableProperties.add( new OntologyIndexer.IndexableProperty( RDFS.comment, true ) ); + SearchIndex index = OntologyIndexer.indexOntology( "NIFORG_TEST", model, indexableProperties, Collections.emptySet(), true ); // positive control - Collection> searchResults = OntologySearch.matchClasses( model, index, "Mammal" ).toSet(); + Collection searchResults = index.searchClasses( model, "Mammal", 500 ).toSet(); assertFalse( "Should have found something for 'Mammal'", searchResults.isEmpty() ); // this is a "definition" that we want to avoid leading to "acute leukemia". - searchResults = OntologySearch.matchClasses( model, index, "skin" ).toSet(); - for ( OntologySearch.SearchResult ontologyTerm : searchResults ) { + searchResults = index.searchClasses( model, "skin", 500 ).toSet(); + for ( SearchIndex.JenaSearchResult ontologyTerm : searchResults ) { fail( "Should not have found " + ontologyTerm.toString() + " for 'skin'" ); } - searchResults = OntologySearch.matchClasses( model, index, "approximate" ).toSet(); - for ( OntologySearch.SearchResult ontologyTerm : searchResults ) { + searchResults = index.searchClasses( model, "approximate", 500 ).toSet(); + for ( SearchIndex.JenaSearchResult ontologyTerm : searchResults ) { fail( "Should not have found " + ontologyTerm.toString() + " for 'approximate'" ); } - searchResults = OntologySearch.matchClasses( model, index, "Bug" ).toSet(); - for ( OntologySearch.SearchResult ontologyTerm : searchResults ) { + searchResults = index.searchClasses( model, "Bug", 500 ).toSet(); + for ( SearchIndex.JenaSearchResult ontologyTerm : searchResults ) { fail( "Should not have found " + ontologyTerm.toString() + " for 'Bug'" ); } - searchResults = OntologySearch.matchClasses( model, index, "birnlex_2" ) + searchResults = index.searchClasses( model, "birnlex_2", 500 ) .toSet(); - assertEquals( 1, searchResults.size() ); - assertTrue( searchResults.iterator().next().result.hasLiteral( OWL2.deprecated, true ) ); - + Assertions.assertThat( searchResults ).hasSize( 1 ).extracting( sr -> sr.result ) + .satisfiesOnlyOnce( r -> { + assertTrue( r.as( OntClass.class ).hasLiteral( OWL2.deprecated, true ) ); + } ); index.close(); } @@ -191,15 +225,15 @@ public final void testOmitDefinitions3() throws Exception { model = OntologyLoader.loadMemoryModel( is, "OBI_TEST" ); } - SearchIndex index = OntologyIndexer.indexOntology( "OBI_TEST", model, true ); + SearchIndex index = OntologyIndexer.indexOntology( "OBI_TEST", model, Collections.emptySet(), true ); // positive control - Set> searchResults = OntologySearch.matchClasses( model, index, "irradiation" ).toSet(); + Set searchResults = index.searchClasses( model, "irradiation", 500 ).toSet(); assertFalse( "Should have found something for 'irradiation'", searchResults.isEmpty() ); // this is a "definition" that we want to avoid leading to "acute leukemia". - searchResults = OntologySearch.matchClasses( model, index, "skin" ).toSet(); - for ( OntologySearch.SearchResult ontologyTerm : searchResults ) { + searchResults = index.searchClasses( model, "skin", 500 ).toSet(); + for ( SearchIndex.JenaSearchResult ontologyTerm : searchResults ) { fail( "Should not have found " + ontologyTerm.toString() + " for 'skin'" ); } @@ -213,15 +247,15 @@ public final void testOmitDefinitions4() throws Exception { OntModel model = OntologyLoader.loadMemoryModel( is, "NIFAN_TEST2", false ); is.close(); - SearchIndex index = OntologyIndexer.indexOntology( "NIFAN_TEST2", model, true ); + SearchIndex index = OntologyIndexer.indexOntology( "NIFAN_TEST2", model, Collections.emptySet(), true ); // positive control - Collection> searchResults = OntologySearch.matchClasses( model, index, "eye" ).toSet(); + Collection searchResults = index.searchClasses( model, "eye", 500 ).toSet(); assertFalse( "Should have found something for 'eye'", searchResults.isEmpty() ); // this is a "definition" that we want to avoid leading to "brain" - searchResults = OntologySearch.matchClasses( model, index, "muscle" ).toSet(); - for ( OntologySearch.SearchResult ontologyTerm : searchResults ) { + searchResults = index.searchClasses( model, "muscle", 500 ).toSet(); + for ( SearchIndex.JenaSearchResult ontologyTerm : searchResults ) { fail( "Should not have found " + ontologyTerm.toString() + " for 'muscle'" ); } @@ -234,60 +268,30 @@ public final void testPersistence() throws Exception { InputStream is = new GZIPInputStream( requireNonNull( this.getClass().getResourceAsStream( "/data/mged.owl.gz" ) ) ); OntModel model = OntologyLoader.loadMemoryModel( is, "owl-test", false ); - SearchIndex index = OntologyIndexer.indexOntology( "MGEDTEST", model, false ); + HashSet indexableProperties = new HashSet<>( OntologyIndexer.DEFAULT_INDEXABLE_PROPERTIES ); + indexableProperties.add( new OntologyIndexer.IndexableProperty( RDFS.comment, true ) ); + + SearchIndex index = OntologyIndexer.indexOntology( "MGEDTEST", model, indexableProperties, Collections.emptySet(), false ); index.close(); // now load it off disk - index = OntologyIndexer.getSubjectIndex( "MGEDTEST" ); + index = OntologyIndexer.getSubjectIndex( "MGEDTEST", indexableProperties, Collections.emptySet() ); assertNotNull( index ); - Collection> name = OntologySearch.matchClasses( model, index, "bedding" ).toSet(); - assertEquals( 2, name.size() ); + Collection name = index.searchClasses( model, "bedding", 500 ).toSet(); + assertEquals( 1, name.size() ); // test wildcard. Works with stemmed term, wild card doesn't do anything - name = OntologySearch.matchClasses( model, index, "bed*" ).toSet(); + name = index.searchClasses( model, "bed*", 500 ).toSet(); assertEquals( 2, name.size() ); // stemmed term. - name = OntologySearch.matchClasses( model, index, "bed" ).toSet(); - assertEquals( 2, name.size() ); + name = index.searchClasses( model, "bed", 500 ).toSet(); + assertEquals( 1, name.size() ); - name = OntologySearch.matchClasses( model, index, "beddin*" ).toSet(); + name = index.searchClasses( model, "beddin*", 500 ).toSet(); assertEquals( 2, name.size() ); index.close(); } - - @Test - public final void matchClasses_whenIndexRaisesJenaException_thenWrapItWithOntologyJenaSearchException() { - OntModel model = mock( OntModel.class ); - SearchIndex index = mock( SearchIndex.class ); - when( index.search( any() ) ).thenThrow( new JenaException( "Some random exception raised by Jena." ) ); - OntologySearchJenaException e = assertThrows( OntologySearchJenaException.class, () -> OntologySearch.matchClasses( model, index, "test" ) ); - assertEquals( "test", e.getQuery() ); - assertEquals( "Some random exception raised by Jena.", e.getCause().getMessage() ); - verify( index ).search( "test" ); - } - - @Test - public final void matchIndividuals_whenIndexRaisesJenaException_thenWrapItWithOntologyJenaSearchException() { - OntModel model = mock( OntModel.class ); - SearchIndex index = mock( SearchIndex.class ); - when( index.search( any() ) ).thenThrow( new JenaException( "Some random exception raised by Jena." ) ); - OntologySearchJenaException e = assertThrows( OntologySearchJenaException.class, () -> OntologySearch.matchIndividuals( model, index, "test" ) ); - assertEquals( "test", e.getQuery() ); - assertEquals( "Some random exception raised by Jena.", e.getCause().getMessage() ); - verify( index ).search( "test" ); - } - - @Test - public final void matchResources_whenIndexRaisesJenaException_thenWrapItWithOntologyJenaSearchException() { - OntModel model = mock( OntModel.class ); - SearchIndex index = mock( SearchIndex.class ); - when( index.search( any() ) ).thenThrow( new JenaException( "Some random exception raised by Jena." ) ); - OntologySearchJenaException e = assertThrows( OntologySearchJenaException.class, () -> OntologySearch.matchIndividuals( model, index, "test" ) ); - assertEquals( "test", e.getQuery() ); - assertEquals( "Some random exception raised by Jena.", e.getCause().getMessage() ); - verify( index ).search( "test" ); - } } diff --git a/test/ubic/basecode/ontology/jena/UberonOntologySearchTest.java b/test/ubic/basecode/ontology/jena/UberonOntologySearchTest.java index f0c41fc5..7c325c5d 100644 --- a/test/ubic/basecode/ontology/jena/UberonOntologySearchTest.java +++ b/test/ubic/basecode/ontology/jena/UberonOntologySearchTest.java @@ -3,12 +3,14 @@ import com.hp.hpl.jena.ontology.OntClass; import com.hp.hpl.jena.ontology.OntModel; import com.hp.hpl.jena.ontology.OntModelSpec; +import com.hp.hpl.jena.vocabulary.RDFS; import org.junit.BeforeClass; import org.junit.Test; import ubic.basecode.ontology.search.OntologySearchException; import java.io.IOException; import java.io.InputStream; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -26,7 +28,9 @@ public class UberonOntologySearchTest { public static void setUpUberon() throws IOException { try ( InputStream is = new GZIPInputStream( requireNonNull( OntologySearchTest.class.getResourceAsStream( "/data/uberon.owl.gz" ) ) ) ) { uberon = OntologyLoader.loadMemoryModel( is, "UBERON_TEST2", true, OntModelSpec.OWL_MEM ); - uberonIndex = OntologyIndexer.indexOntology( "UBERON_TEST2", uberon, false ); + HashSet indexableProperties = new HashSet<>( OntologyIndexer.DEFAULT_INDEXABLE_PROPERTIES ); + indexableProperties.add( new OntologyIndexer.IndexableProperty( RDFS.comment, true ) ); + uberonIndex = OntologyIndexer.indexOntology( "UBERON_TEST2", uberon, indexableProperties, Collections.emptySet(), true ); } } @@ -34,7 +38,7 @@ public static void setUpUberon() throws IOException { public void testOmitDefinition() throws OntologySearchException { OntClass brain = uberon.getOntClass( "http://purl.obolibrary.org/obo/UBERON_0000955" ); assertNotNull( brain ); - Set> searchResults = OntologySearch.matchClasses( uberon, uberonIndex, "brain" ).toSet(); + Set searchResults = uberonIndex.searchClasses( uberon, "brain", 500 ).toSet(); assertEquals( 128, searchResults.size() ); } @@ -42,9 +46,8 @@ public void testOmitDefinition() throws OntologySearchException { public void testScore() throws OntologySearchException { OntClass brain = uberon.getOntClass( "http://purl.obolibrary.org/obo/UBERON_0000955" ); assertNotNull( brain ); - List> searchResults = OntologySearch.matchClasses( uberon, uberonIndex, "brain" ).toList(); - assertEquals( 446, searchResults.size() ); - assertEquals( 3.33, searchResults.get( 0 ).score, 0.01 ); - assertEquals( 128, new HashSet<>( searchResults ).size() ); + List searchResults = uberonIndex.searchClasses( uberon, "brain", 500 ).toList(); + assertEquals( 128, searchResults.size() ); + assertEquals( 3.85, searchResults.get( 0 ).score, 0.01 ); } } diff --git a/test/ubic/basecode/ontology/providers/GenericOntologyServiceTest.java b/test/ubic/basecode/ontology/providers/GenericOntologyServiceTest.java index 09f25e7d..a74cb90a 100644 --- a/test/ubic/basecode/ontology/providers/GenericOntologyServiceTest.java +++ b/test/ubic/basecode/ontology/providers/GenericOntologyServiceTest.java @@ -19,9 +19,11 @@ package ubic.basecode.ontology.providers; +import org.assertj.core.api.Assertions; import org.junit.Test; import ubic.basecode.ontology.AbstractOntologyTest; import ubic.basecode.ontology.model.OntologyTerm; +import ubic.basecode.ontology.search.OntologySearchResult; import ubic.basecode.util.Configuration; import java.net.URL; @@ -44,7 +46,7 @@ public void testGenericOntologyServiceMem() throws Exception { s1.initialize( true, false ); GenericOntologyService s = s1; - Collection r = s.findTerm( "Mouse" ); + Collection> r = s.findTerm( "Mouse", 500 ); assertFalse( r.isEmpty() ); } @@ -79,10 +81,10 @@ public void testWithoutOntologyCacheDir() { new GenericOntologyService( "foo", resource.toString(), false, false ) .initialize( true, true ); } ); - assertTrue( e.getMessage().matches( "No cache directory is set for foo \\[file:.+], cannot force indexing." ) ); + Assertions.assertThat( e ) + .hasMessageMatching( "No cache directory is set for foo.+, cannot force indexing\\." ); } finally { - Configuration.setString( "ontology.cache.dir", prevCacheDir ); - Configuration.setString( "ontology.index.dir", prevIndexDir ); + Configuration.reset(); } } } diff --git a/test/ubic/basecode/ontology/providers/ObiServiceTest.java b/test/ubic/basecode/ontology/providers/ObiServiceTest.java index 6dd8752d..b8aeb354 100644 --- a/test/ubic/basecode/ontology/providers/ObiServiceTest.java +++ b/test/ubic/basecode/ontology/providers/ObiServiceTest.java @@ -16,14 +16,10 @@ import org.junit.Test; import ubic.basecode.ontology.AbstractOntologyTest; -import ubic.basecode.ontology.model.OntologyIndividual; import ubic.basecode.ontology.model.OntologyResource; -import ubic.basecode.ontology.model.OntologyTerm; +import ubic.basecode.ontology.search.OntologySearchResult; -import java.util.Collection; - -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.assertj.core.api.Assertions.assertThat; /** * @author paul @@ -35,16 +31,21 @@ public void testLoadAndSearch() throws Exception { ObiService m = new ObiService(); m.setInferenceMode( OntologyService.InferenceMode.NONE ); m.initialize( true, false ); - - assertTrue( m.isOntologyLoaded() ); - - Collection hits = m.findTerm( "batch" ); - assertFalse( hits.isEmpty() ); - - Collection ihits = m.findIndividuals( "batch" ); - assertFalse( ihits.isEmpty() ); - - Collection rhits = m.findResources( "batch" ); - assertFalse( rhits.isEmpty() ); + assertThat( m.isOntologyLoaded() ).isTrue(); + + assertThat( m.findTerm( "batch", 500 ) ) + .extracting( OntologySearchResult::getResult ) + .extracting( OntologyResource::getUri ) + .contains( "http://purl.obolibrary.org/obo/IAO_0000132" ); + + assertThat( m.findIndividuals( "failed exploratory term", 500 ) ) + .extracting( OntologySearchResult::getResult ) + .extracting( OntologyResource::getUri ) + .contains( "http://purl.obolibrary.org/obo/IAO_0000103" ); + + assertThat( m.findResources( "batch", 500 ) ) + .extracting( OntologySearchResult::getResult ) + .extracting( OntologyResource::getUri ) + .contains( "http://purl.obolibrary.org/obo/IAO_0000132" ); } } diff --git a/test/ubic/basecode/ontology/providers/UberonOntologyServiceTest.java b/test/ubic/basecode/ontology/providers/UberonOntologyServiceTest.java index 9ae9754e..ad4d3810 100644 --- a/test/ubic/basecode/ontology/providers/UberonOntologyServiceTest.java +++ b/test/ubic/basecode/ontology/providers/UberonOntologyServiceTest.java @@ -7,11 +7,13 @@ import ubic.basecode.ontology.OntologyTermTest; import ubic.basecode.ontology.model.OntologyTerm; import ubic.basecode.ontology.search.OntologySearchException; +import ubic.basecode.ontology.search.OntologySearchResult; import java.io.IOException; import java.io.InputStream; import java.util.Arrays; import java.util.Collection; +import java.util.stream.Collectors; import java.util.zip.GZIPInputStream; import static java.util.Objects.requireNonNull; @@ -55,7 +57,7 @@ public void testGetParentsFromMultipleTerms() { OntologyTerm brain = uberon.getTerm( "http://purl.obolibrary.org/obo/UBERON_0000955" ); OntologyTerm liver = uberon.getTerm( "http://purl.obolibrary.org/obo/UBERON_0002107" ); Collection children = uberon.getParents( Arrays.asList( brain, liver ), false, true ); - assertEquals( 30, children.size() ); + assertEquals( 41, children.size() ); assertFalse( children.contains( uberon.getTerm( OWL2.Nothing.getURI() ) ) ); } @@ -64,7 +66,7 @@ public void testGetParentsHasPart() { OntologyTerm t = uberon.getTerm( "http://purl.obolibrary.org/obo/UBERON_0000955" ); assertNotNull( t ); Collection parents = t.getParents( true ); - assertEquals( 3, parents.size() ); + assertEquals( 4, parents.size() ); // does not contain itself assertFalse( parents.contains( t ) ); // via subclass @@ -79,9 +81,9 @@ public void testGetParentsHasPart() { public void testGetChildrenHasPart() { OntologyTerm t = uberon.getTerm( "http://purl.obolibrary.org/obo/UBERON_0000955" ); assertNotNull( t ); - assertEquals( 76, t.getChildren( true ).size() ); + assertEquals( 81, t.getChildren( true ).size() ); Collection children = t.getChildren( false ); - assertEquals( 1496, children.size() ); + assertEquals( 1995, children.size() ); // via subclass of, insect adult brain assertTrue( children.contains( uberon.getTerm( "http://purl.obolibrary.org/obo/UBERON_6003624" ) ) ); // via part of, nucleus of brain @@ -95,22 +97,22 @@ public void testGetChildrenFromMultipleTerms() { OntologyTerm brain = uberon.getTerm( "http://purl.obolibrary.org/obo/UBERON_0000955" ); OntologyTerm liver = uberon.getTerm( "http://purl.obolibrary.org/obo/UBERON_0002107" ); Collection children = uberon.getChildren( Arrays.asList( brain, liver ), false, true ); - assertEquals( 1562, children.size() ); + assertEquals( 2077, children.size() ); } @Test public void testGetChildrenFromMultipleTermsWithSearch() throws OntologySearchException { - Collection terms = uberon.findTerm( "brain" ); - Collection matches = uberon.getChildren( terms, false, true ); - assertEquals( 1870, matches.size() ); + Collection> terms = uberon.findTerm( "brain", 500 ); + Collection matches = uberon.getChildren( terms.stream().map( OntologySearchResult::getResult ).collect( Collectors.toSet() ), false, true ); + assertEquals( 2684, matches.size() ); } @Test public void testFindTerm() throws OntologySearchException { - assertEquals( 123, uberon.findTerm( "brain" ).size() ); - assertEquals( 128, uberon.findTerm( "brain", true ).size() ); - OntologyTerm firstResult = uberon.findTerm( "brain" ).iterator().next(); - assertNotNull( firstResult.getScore() ); - assertEquals( 2.8577, firstResult.getScore(), 0.0001 ); + assertEquals( 98, uberon.findTerm( "brain", 500 ).size() ); + assertEquals( 103, uberon.findTerm( "brain", 500, true ).size() ); + OntologySearchResult firstResult = uberon.findTerm( "brain", 500 ).iterator().next(); + assertNotNull( firstResult ); + assertEquals( 1.5367, firstResult.getScore(), 0.0001 ); } }