Skip to content

Commit

Permalink
OPENNLP-1654 Add thread-safe version of NameFinderME
Browse files Browse the repository at this point in the history
- adds ThreadSafeNameFinderME
- adds additional constructor to ThreadSafeTokenizerME & ThreadSafeSentenceDetectorME to be consistent with ThreadSafePOSTaggerME
- improves existing JavaDoc along the path
  • Loading branch information
mawiesne authored and rzo1 committed Nov 25, 2024
1 parent ec09b7e commit 7b38536
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,18 @@
/**
* A thread-safe version of the {@link LemmatizerME}. Using it is completely transparent.
* You can use it in a single-threaded context as well, it only incurs a minimal overhead.
* <p>
* Note, however, that this implementation uses a {@link ThreadLocal}. Although the implementation is
*
* @implNote
* This implementation uses a {@link ThreadLocal}. Although the implementation is
* lightweight because the model is not duplicated, if you have many long-running threads,
* you may run into memory problems.
* </p>
* <p>
* Be careful when using this in a Jakarta EE application, for example.
* </p>
* The user is responsible for clearing the {@link ThreadLocal}.
*
* @see Lemmatizer
* @see LemmatizerME
*/
@ThreadSafe
public class ThreadSafeLemmatizerME implements Lemmatizer, AutoCloseable {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package opennlp.tools.namefind;

import opennlp.tools.commons.ThreadSafe;
import opennlp.tools.util.Span;

/**
* A thread-safe version of {@link NameFinderME}. Using it is completely transparent.
* You can use it in a single-threaded context as well, it only incurs a minimal overhead.
*
* @implNote
* This implementation uses a {@link ThreadLocal}. Although the implementation is
* lightweight because the model is not duplicated, if you have many long-running threads,
* you may run into memory problems.
* <p>
* Be careful when using this in a Jakarta EE application, for example.
* </p>
* The user is responsible for clearing the {@link ThreadLocal}.
*
* @see NameFinderME
* @see TokenNameFinder
*/
@ThreadSafe
public class ThreadSafeNameFinderME implements TokenNameFinder, AutoCloseable {

private final TokenNameFinderModel model;

private final ThreadLocal<NameFinderME> threadLocal = new ThreadLocal<>();

/**
* Initializes a {@link ThreadSafeNameFinderME} with the specified {@code model}.
*
* @param model A valid {@link TokenNameFinderModel}.
*/
public ThreadSafeNameFinderME(TokenNameFinderModel model) {
super();
this.model = model;
}

// If a thread-local version exists, return it. Otherwise, create, then return.
private NameFinderME getNameFinder() {
NameFinderME sd = threadLocal.get();
if (sd == null) {
sd = new NameFinderME(model);
threadLocal.set(sd);
}
return sd;
}

@Override
public void close() {
threadLocal.remove();
}

@Override
public Span[] find(String[] tokens) {
return getNameFinder().find(tokens);
}

@Override
public void clearAdaptiveData() {
getNameFinder().clearAdaptiveData();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,18 @@
/**
* A thread-safe version of the {@link POSTaggerME}. Using it is completely transparent.
* You can use it in a single-threaded context as well, it only incurs a minimal overhead.
* <p>
* Note, however, that this implementation uses a {@link ThreadLocal}. Although the implementation is
*
* @implNote
* This implementation uses a {@link ThreadLocal}. Although the implementation is
* lightweight because the model is not duplicated, if you have many long-running threads,
* you may run into memory problems.
* </p>
* <p>
* Be careful when using this in a Jakarta EE application, for example.
* </p>
* The user is responsible for clearing the {@link ThreadLocal}.
*
* @see POSTagger
* @see POSTaggerME
*/
@ThreadSafe
public class ThreadSafePOSTaggerME implements POSTagger, AutoCloseable {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,30 +17,51 @@

package opennlp.tools.sentdetect;

import java.io.IOException;

import opennlp.tools.commons.ThreadSafe;
import opennlp.tools.util.DownloadUtil;
import opennlp.tools.util.Span;

/**
* A thread-safe version of SentenceDetectorME. Using it is completely transparent. You can use it in
* a single-threaded context as well, it only incurs a minimal overhead.
* <p>
* Note, however, that this implementation uses a {@link ThreadLocal}. Although the implementation is
* A thread-safe version of {@link SentenceDetectorME}. Using it is completely transparent.
* You can use it in a single-threaded context as well, it only incurs a minimal overhead.
*
* @implNote
* This implementation uses a {@link ThreadLocal}. Although the implementation is
* lightweight because the model is not duplicated, if you have many long-running threads,
* you may run into memory problems.
* </p>
* <p>
* Be careful when using this in a Jakarta EE application, for example.
* </p>
* The user is responsible for clearing the {@link ThreadLocal}.
*
* @see SentenceDetector
* @see SentenceDetectorME
*/
@ThreadSafe
public class ThreadSafeSentenceDetectorME implements SentenceDetector, AutoCloseable {

private final SentenceModel model;

private final ThreadLocal<SentenceDetectorME> threadLocal =
new ThreadLocal<>();
private final ThreadLocal<SentenceDetectorME> threadLocal = new ThreadLocal<>();

/**
* Initializes a {@link ThreadSafeSentenceDetectorME} by downloading a default model
* for a given {@code language}.
*
* @param language An ISO conform language code.
* @throws IOException Thrown if the model could not be downloaded or saved.
*/
public ThreadSafeSentenceDetectorME(String language) throws IOException {
this(DownloadUtil.downloadModel(language, DownloadUtil.ModelType.SENTENCE_DETECTOR, SentenceModel.class));
}

/**
* Initializes a {@link ThreadSafeSentenceDetectorME} with the specified {@code model}.
*
* @param model A valid {@link SentenceModel}.
*/
public ThreadSafeSentenceDetectorME(SentenceModel model) {
super();
this.model = model;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,27 @@

package opennlp.tools.tokenize;

import java.io.IOException;

import opennlp.tools.commons.ThreadSafe;
import opennlp.tools.util.DownloadUtil;
import opennlp.tools.util.Span;

/**
* A thread-safe version of TokenizerME. Using it is completely transparent. You can use it in
* a single-threaded context as well, it only incurs a minimal overhead.
* <p>
* Note, however, that this implementation uses a {@link ThreadLocal}. Although the implementation is
* A thread-safe version of {@link TokenizerME}. Using it is completely transparent.
* You can use it in a single-threaded context as well, it only incurs a minimal overhead.
*
* @implNote
* This implementation uses a {@link ThreadLocal}. Although the implementation is
* lightweight because the model is not duplicated, if you have many long-running threads,
* you may run into memory problems.
* </p>
* <p>
* Be careful when using this in a Jakarta EE application, for example.
* </p>
* The user is responsible for clearing the {@link ThreadLocal}.
*
* @see Tokenizer
* @see TokenizerME
*/
@ThreadSafe
public class ThreadSafeTokenizerME implements Tokenizer, AutoCloseable {
Expand All @@ -40,6 +46,22 @@ public class ThreadSafeTokenizerME implements Tokenizer, AutoCloseable {

private final ThreadLocal<TokenizerME> threadLocal = new ThreadLocal<>();

/**
* Initializes a {@link ThreadSafeTokenizerME} by downloading a default model
* for a given {@code language}.
*
* @param language An ISO conform language code.
* @throws IOException Thrown if the model could not be downloaded or saved.
*/
public ThreadSafeTokenizerME(String language) throws IOException {
this(DownloadUtil.downloadModel(language, DownloadUtil.ModelType.TOKENIZER, TokenizerModel.class));
}

/**
* Initializes a {@link ThreadSafeTokenizerME} with the specified {@code model}.
*
* @param model A valid {@link TokenizerModel}.
*/
public ThreadSafeTokenizerME(TokenizerModel model) {
super();
this.model = model;
Expand Down

0 comments on commit 7b38536

Please sign in to comment.