Skip to content

Commit

Permalink
When no filter is specified, only include kbts that are associated wi…
Browse files Browse the repository at this point in the history
…th the texts of the corpus
  • Loading branch information
Enkidu93 committed Nov 27, 2024
1 parent 069dc40 commit 788beb9
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,10 @@ public async Task RunAsync_EnableKeyTerms()
(int src1Count, int src2Count, int trgCount, int termCount) = await env.GetTrainCountAsync();
Assert.Multiple(() =>

Check failure on line 119 in src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs

View workflow job for this annotation

GitHub Actions / NUnit Tests

Serval.Machine.Shared.Services.PreprocessBuildJobTests ► RunAsync_EnableKeyTerms

Failed test found in: src/Machine/test/Serval.Machine.Shared.Tests/TestResults/test-results.trx Error: Expected: 166 But was: 1
Raw output
  Expected: 166
  But was:  1

   at Serval.Machine.Shared.Services.PreprocessBuildJobTests.RunAsync_EnableKeyTerms() in /home/runner/work/serval/serval/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs:line 119
   at NUnit.Framework.Internal.TaskAwaitAdapter.GenericAdapter`1.BlockUntilCompleted()
   at NUnit.Framework.Internal.MessagePumpStrategy.NoMessagePumpStrategy.WaitForCompletion(AwaitAdapter awaiter)
   at NUnit.Framework.Internal.AsyncToSyncAdapter.Await(Func`1 invoke)
   at NUnit.Framework.Internal.Commands.TestMethodCommand.RunTestMethod(TestExecutionContext context)
   at NUnit.Framework.Internal.Commands.TestMethodCommand.Execute(TestExecutionContext context)
   at NUnit.Framework.Internal.Execution.SimpleWorkItem.<>c__DisplayClass4_0.<PerformWork>b__0()
   at NUnit.Framework.Internal.ContextUtils.<>c__DisplayClass1_0`1.<DoIsolated>b__0(Object _)

1)    at Serval.Machine.Shared.Services.PreprocessBuildJobTests.<>c__DisplayClass7_0.<RunAsync_EnableKeyTerms>b__0() in /home/runner/work/serval/serval/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs:line 124
   at NUnit.Framework.Assert.Multiple(TestDelegate testDelegate)
   at Serval.Machine.Shared.Services.PreprocessBuildJobTests.RunAsync_EnableKeyTerms() in /home/runner/work/serval/serval/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs:line 119

{
Assert.That(src1Count, Is.EqualTo(0));
Assert.That(src1Count, Is.EqualTo(14));
Assert.That(src2Count, Is.EqualTo(0));
Assert.That(trgCount, Is.EqualTo(0));
Assert.That(termCount, Is.EqualTo(5726));
Assert.That(trgCount, Is.EqualTo(1));
Assert.That(termCount, Is.EqualTo(166));
});
}

Expand All @@ -136,9 +136,9 @@ public async Task RunAsync_DisableKeyTerms()
(int src1Count, int src2Count, int trgCount, int termCount) = await env.GetTrainCountAsync();
Assert.Multiple(() =>
{
Assert.That(src1Count, Is.EqualTo(0));
Assert.That(src1Count, Is.EqualTo(14));
Assert.That(src2Count, Is.EqualTo(0));
Assert.That(trgCount, Is.EqualTo(0));
Assert.That(trgCount, Is.EqualTo(1));
Assert.That(termCount, Is.EqualTo(0));
});
}
Expand Down Expand Up @@ -853,8 +853,8 @@ public TestEnvironment()
Id = "src_1",
Language = "es",
Files = [ParatextFile("pt-source1")],
TrainOnTextIds = [],
PretranslateTextIds = []
TrainOnTextIds = null,
PretranslateTextIds = null
}
},
TargetCorpora = new List<MonolingualCorpus>()
Expand All @@ -864,7 +864,7 @@ public TestEnvironment()
Id = "trg_1",
Language = "en",
Files = [ParatextFile("pt-target1")],
TrainOnTextIds = []
TrainOnTextIds = null
}
}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public IEnumerable<ITextCorpus> CreateTermCorpora(
IReadOnlyList<(CorpusFile File, Dictionary<string, HashSet<int>> Chapters)> corpora
)
{
foreach ((CorpusFile file, Dictionary<string, HashSet<int>> chapters) in corpora)
foreach ((CorpusFile file, Dictionary<string, HashSet<int>>? chapters) in corpora)
{
switch (file.Format)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,18 @@ public void Preprocess(
if (useKeyTerms)
{
ITextCorpus[]? sourceTermCorpora = _corpusService
.CreateTermCorpora(corpus.SourceCorpora.SelectMany(GetChaptersPerFile).ToArray())
.CreateTermCorpora(
sourceCorpora
.SelectMany(corpus => GetChaptersPerFile(corpus.Corpus, corpus.TextCorpus))
.ToArray()
)
.ToArray();
ITextCorpus[]? targetTermCorpora = _corpusService
.CreateTermCorpora(corpus.TargetCorpora.SelectMany(GetChaptersPerFile).ToArray())
.CreateTermCorpora(
targetCorpora
.SelectMany(corpus => GetChaptersPerFile(corpus.Corpus, corpus.TextCorpus))
.ToArray()
)
.ToArray();
if (sourceTermCorpora is not null && targetTermCorpora is not null)
{
Expand Down Expand Up @@ -114,15 +122,16 @@ public void Preprocess(
}

private static IEnumerable<(CorpusFile File, Dictionary<string, HashSet<int>> Chapters)> GetChaptersPerFile(
MonolingualCorpus mc
MonolingualCorpus mc,
ITextCorpus tc
)
{
Dictionary<string, HashSet<int>>? chapters = mc.TrainOnChapters;
if (chapters is null && mc.TrainOnTextIds is not null)
{
chapters = mc.TrainOnTextIds.Select(tid => (tid, new HashSet<int> { })).ToDictionary();
}
chapters ??= [];
chapters ??= tc.Texts.Select(t => (t.Id, new HashSet<int>() { })).ToDictionary();
return mc.Files.Select(f => (f, chapters));
}

Expand Down

0 comments on commit 788beb9

Please sign in to comment.