Skip to content

Commit

Permalink
OPENNLP-1631 Convert existing ModelLoader tests to integration tests
Browse files Browse the repository at this point in the history
- converts the three existing test classes to "IT" ending so that they get executed during failsafe plugin phase
- adds new 18 languages for each model type (sent, pos, tokens)
- fixes missing language checks for "nl" (Dutch) in DownloadParserTest
  • Loading branch information
mawiesne committed Oct 29, 2024
1 parent e2ce958 commit d77fd06
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,16 @@
import opennlp.tools.util.DownloadUtil;

@EnabledWhenCDNAvailable(hostname = "dlcdn.apache.org")
public class POSModelLoaderTest extends AbstractModelLoaderTest {
public class POSModelLoaderIT extends AbstractModelLoaderTest {

// SUT
private POSModelLoader loader;

@BeforeAll
public static void initResources() {
List<String> resources = List.of("en", "de");
resources.forEach(lang -> {
List<String> langs = List.of("en", "fr", "de", "it", "nl", "bg", "cs", "da",
"es", "et", "fi", "hr", "lv", "no", "pl", "pt", "ro", "ru", "sk", "sl", "sr", "sv", "uk");
langs.forEach(lang -> {
try {
DownloadUtil.downloadModel(lang,
DownloadUtil.ModelType.POS, POSModel.class);
Expand All @@ -57,7 +58,10 @@ public void setup() {
}

@ParameterizedTest(name = "Verify \"{0}\" POS model loading")
@ValueSource(strings = {"en-ud-ewt", "de-ud-gsd"})
@ValueSource(strings = {"en-ud-ewt", "fr-ud-gsd", "de-ud-gsd", "it-ud-vit", "nl-ud-alpino",
"bg-ud-btb", "cs-ud-pdt", "da-ud-ddt", "es-ud-gsd", "et-ud-edt", "fi-ud-tdt", "hr-ud-set",
"lv-ud-lvtb", "no-ud-bokmaal", "pl-ud-pdb", "pt-ud-gsd", "ro-ud-rrt", "ru-ud-gsd",
"sr-ud-set", "sk-ud-snk", "sl-ud-ssj", "sv-ud-talbanken", "uk-ud-iu"})
public void testLoadModelByLanguage(String langModel) throws IOException {
String modelName = "opennlp-" + langModel + "-pos-1.1-2.4.0.bin";
POSModel model = loader.loadModel(Files.newInputStream(OPENNLP_DIR.resolve(modelName)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,16 @@
import opennlp.tools.util.DownloadUtil;

@EnabledWhenCDNAvailable(hostname = "dlcdn.apache.org")
public class SentenceModelLoaderTest extends AbstractModelLoaderTest {
public class SentenceModelLoaderIT extends AbstractModelLoaderTest {

// SUT
private SentenceModelLoader loader;

@BeforeAll
public static void initResources() {
List<String> resources = List.of("en", "de");
resources.forEach(lang -> {
List<String> langs = List.of("en", "fr", "de", "it", "nl", "bg", "cs", "da",
"es", "et", "fi", "hr", "lv", "no", "pl", "pt", "ro", "ru", "sk", "sl", "sr", "sv", "uk");
langs.forEach(lang -> {
try {
DownloadUtil.downloadModel(lang,
DownloadUtil.ModelType.SENTENCE_DETECTOR, SentenceModel.class);
Expand All @@ -57,7 +58,10 @@ public void setup() {
}

@ParameterizedTest(name = "Verify \"{0}\" sentence model loading")
@ValueSource(strings = {"en-ud-ewt", "de-ud-gsd"})
@ValueSource(strings = {"en-ud-ewt", "fr-ud-gsd", "de-ud-gsd", "it-ud-vit", "nl-ud-alpino",
"bg-ud-btb", "cs-ud-pdt", "da-ud-ddt", "es-ud-gsd", "et-ud-edt", "fi-ud-tdt", "hr-ud-set",
"lv-ud-lvtb", "no-ud-bokmaal", "pl-ud-pdb", "pt-ud-gsd", "ro-ud-rrt", "ru-ud-gsd",
"sr-ud-set", "sk-ud-snk", "sl-ud-ssj", "sv-ud-talbanken", "uk-ud-iu"})
public void testLoadModelByLanguage(String langModel) throws IOException {
String modelName = "opennlp-" + langModel + "-sentence-1.1-2.4.0.bin";
SentenceModel model = loader.loadModel(Files.newInputStream(OPENNLP_DIR.resolve(modelName)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,16 @@
import opennlp.tools.util.DownloadUtil;

@EnabledWhenCDNAvailable(hostname = "dlcdn.apache.org")
public class TokenizerModelLoaderTest extends AbstractModelLoaderTest {
public class TokenizerModelLoaderIT extends AbstractModelLoaderTest {

// SUT
private TokenizerModelLoader loader;

@BeforeAll
public static void initResources() {
List<String> resources = List.of("en", "de");
resources.forEach(lang -> {
List<String> langs = List.of("en", "fr", "de", "it", "nl", "bg", "cs", "da",
"es", "et", "fi", "hr", "lv", "no", "pl", "pt", "ro", "ru", "sk", "sl", "sr", "sv", "uk");
langs.forEach(lang -> {
try {
DownloadUtil.downloadModel(lang,
DownloadUtil.ModelType.TOKENIZER, TokenizerModel.class);
Expand All @@ -57,7 +58,10 @@ public void setup() {
}

@ParameterizedTest(name = "Verify \"{0}\" tokenizer model loading")
@ValueSource(strings = {"en-ud-ewt", "de-ud-gsd"})
@ValueSource(strings = {"en-ud-ewt", "fr-ud-gsd", "de-ud-gsd", "it-ud-vit", "nl-ud-alpino",
"bg-ud-btb", "cs-ud-pdt", "da-ud-ddt", "es-ud-gsd", "et-ud-edt", "fi-ud-tdt", "hr-ud-set",
"lv-ud-lvtb", "no-ud-bokmaal", "pl-ud-pdb", "pt-ud-gsd", "ro-ud-rrt", "ru-ud-gsd",
"sr-ud-set", "sk-ud-snk", "sl-ud-ssj", "sv-ud-talbanken", "uk-ud-iu"})
public void testLoadModelByLanguage(String langModel) throws IOException {
String modelName = "opennlp-" + langModel + "-tokens-1.1-2.4.0.bin";
TokenizerModel model = loader.loadModel(Files.newInputStream(OPENNLP_DIR.resolve(modelName)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ public static Stream<Arguments> expectedModels() {
DownloadUtil.ModelType.SENTENCE_DETECTOR, OPENNLP + "it-ud-vit-" + MODEL_SENT + VER + BIN,
DownloadUtil.ModelType.TOKENIZER, OPENNLP + "it-ud-vit-" + MODEL_TOK + VER + BIN,
DownloadUtil.ModelType.POS, OPENNLP + "it-ud-vit-" + MODEL_POS + VER + BIN)),
Arguments.of("nl", Map.of(
DownloadUtil.ModelType.SENTENCE_DETECTOR, OPENNLP + "nl-ud-alpino-" + MODEL_SENT + VER + BIN,
DownloadUtil.ModelType.TOKENIZER, OPENNLP + "nl-ud-alpino-" + MODEL_TOK + VER + BIN,
DownloadUtil.ModelType.POS, OPENNLP + "nl-ud-alpino-" + MODEL_POS + VER + BIN)),
Arguments.of("bg", Map.of(
DownloadUtil.ModelType.SENTENCE_DETECTOR, OPENNLP + "bg-ud-btb-" + MODEL_SENT + VER + BIN,
DownloadUtil.ModelType.TOKENIZER, OPENNLP + "bg-ud-btb-" + MODEL_TOK + VER + BIN,
Expand Down Expand Up @@ -137,10 +141,6 @@ public static Stream<Arguments> expectedModels() {
DownloadUtil.ModelType.SENTENCE_DETECTOR, OPENNLP + "lv-ud-lvtb-" + MODEL_SENT + VER + BIN,
DownloadUtil.ModelType.TOKENIZER, OPENNLP + "lv-ud-lvtb-" + MODEL_TOK + VER + BIN,
DownloadUtil.ModelType.POS, OPENNLP + "lv-ud-lvtb-" + MODEL_POS + VER + BIN)),
Arguments.of("lv", Map.of(
DownloadUtil.ModelType.SENTENCE_DETECTOR, OPENNLP + "lv-ud-lvtb-" + MODEL_SENT + VER + BIN,
DownloadUtil.ModelType.TOKENIZER, OPENNLP + "lv-ud-lvtb-" + MODEL_TOK + VER + BIN,
DownloadUtil.ModelType.POS, OPENNLP + "lv-ud-lvtb-" + MODEL_POS + VER + BIN)),
Arguments.of("no", Map.of(
DownloadUtil.ModelType.SENTENCE_DETECTOR, OPENNLP + "no-ud-bokmaal-" + MODEL_SENT + VER + BIN,
DownloadUtil.ModelType.TOKENIZER, OPENNLP + "no-ud-bokmaal-" + MODEL_TOK + VER + BIN,
Expand Down

0 comments on commit d77fd06

Please sign in to comment.