From e300f330013e29fb58a10eb64ce444e2df23a2d3 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Thu, 28 Nov 2024 07:21:52 -0500 Subject: [PATCH] A start --- .../AssessmentEnginesController.cs | 1 - .../Serval.Assessment/Models/CorpusFile.cs | 9 ----- .../Services/EngineService.cs | 4 +-- src/Serval/src/Serval.Client/Client.g.cs | 31 +++++++++++++--- .../Consumers/GetCorpusConsumer.cs | 11 ++---- .../Contracts/CorpusFileDto.cs | 2 +- .../Contracts/DataFileReferenceDto.cs | 9 +++++ .../Controllers/CorporaController.cs | 25 ++++++++----- .../src/Serval.DataFiles/Models/CorpusFile.cs | 2 +- .../Models/DataFileReference.cs | 8 +++++ .../Services/CorpusService.cs | 2 +- .../Services/ICorpusService.cs | 6 +++- .../Serval.Shared/Contracts/CorpusResult.cs | 2 +- .../src/Serval.Shared/Models/CorpusFile.cs | 35 +++++++++++++++++++ .../Serval.Shared/Models/MonolingualCorpus.cs | 20 +++++++++++ .../src/Serval.Shared/Serval.Shared.csproj | 1 + src/Serval/src/Serval.Shared/Usings.cs | 2 ++ .../TranslationEnginesController.cs | 18 ++++++---- .../src/Serval.Translation/Models/Corpus.cs | 14 ++++++++ .../Serval.Translation/Models/CorpusFile.cs | 9 ----- .../src/Serval.Translation/Models/Engine.cs | 13 +++++++ .../Models/MonolingualCorpus.cs | 9 ----- .../Models/ParallelCorpus.cs | 11 ++++++ .../Services/EngineService.cs | 16 ++++----- .../Services/PretranslationService.cs | 8 ++--- .../TranslationEngineTests.cs | 6 ++-- .../Services/CorpusServiceTests.cs | 6 ++-- 27 files changed, 198 insertions(+), 82 deletions(-) delete mode 100644 src/Serval/src/Serval.Assessment/Models/CorpusFile.cs create mode 100644 src/Serval/src/Serval.DataFiles/Contracts/DataFileReferenceDto.cs create mode 100644 src/Serval/src/Serval.DataFiles/Models/DataFileReference.cs create mode 100644 src/Serval/src/Serval.Shared/Models/CorpusFile.cs create mode 100644 src/Serval/src/Serval.Shared/Models/MonolingualCorpus.cs delete mode 100644 src/Serval/src/Serval.Translation/Models/CorpusFile.cs delete mode 100644 src/Serval/src/Serval.Translation/Models/MonolingualCorpus.cs diff --git a/src/Serval/src/Serval.Assessment/Controllers/AssessmentEnginesController.cs b/src/Serval/src/Serval.Assessment/Controllers/AssessmentEnginesController.cs index 459d3b34..ff9b99ed 100644 --- a/src/Serval/src/Serval.Assessment/Controllers/AssessmentEnginesController.cs +++ b/src/Serval/src/Serval.Assessment/Controllers/AssessmentEnginesController.cs @@ -657,7 +657,6 @@ CancellationToken cancellationToken new CorpusFile { Id = fileConfig.FileId, - Filename = result.Message.Filename, TextId = fileConfig.TextId ?? result.Message.Name, Format = result.Message.Format } diff --git a/src/Serval/src/Serval.Assessment/Models/CorpusFile.cs b/src/Serval/src/Serval.Assessment/Models/CorpusFile.cs deleted file mode 100644 index fa491558..00000000 --- a/src/Serval/src/Serval.Assessment/Models/CorpusFile.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Serval.Assessment.Models; - -public record CorpusFile -{ - public required string Id { get; init; } - public required string Filename { get; init; } - public required FileFormat Format { get; init; } - public required string TextId { get; init; } -} diff --git a/src/Serval/src/Serval.Assessment/Services/EngineService.cs b/src/Serval/src/Serval.Assessment/Services/EngineService.cs index 01ced93a..972edccf 100644 --- a/src/Serval/src/Serval.Assessment/Services/EngineService.cs +++ b/src/Serval/src/Serval.Assessment/Services/EngineService.cs @@ -227,13 +227,13 @@ private V1.Corpus Map(Models.Corpus source) return new V1.Corpus { Language = source.Language, Files = { source.Files.Select(Map) } }; } - private V1.CorpusFile Map(Models.CorpusFile source) + private V1.CorpusFile Map(Shared.Models.CorpusFile source) { return new V1.CorpusFile { TextId = source.TextId, Format = (V1.FileFormat)source.Format, - Location = Path.Combine(_dataFileOptions.CurrentValue.FilesDirectory, source.Filename) + Location = Path.Combine(_dataFileOptions.CurrentValue.FilesDirectory, source.GetFilename()) }; } } diff --git a/src/Serval/src/Serval.Client/Client.g.cs b/src/Serval/src/Serval.Client/Client.g.cs index ee4ce398..8b8e2615 100644 --- a/src/Serval/src/Serval.Client/Client.g.cs +++ b/src/Serval/src/Serval.Client/Client.g.cs @@ -9293,7 +9293,7 @@ public partial class CorpusFile { [Newtonsoft.Json.JsonProperty("file", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required] - public DataFile File { get; set; } = new DataFile(); + public DataFileReference File { get; set; } = new DataFileReference(); [Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public string? TextId { get; set; } = default!; @@ -9301,7 +9301,7 @@ public partial class CorpusFile } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class DataFile + public partial class DataFileReference { [Newtonsoft.Json.JsonProperty("id", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] @@ -9319,9 +9319,6 @@ public partial class DataFile [Newtonsoft.Json.JsonConverter(typeof(Newtonsoft.Json.Converters.StringEnumConverter))] public FileFormat Format { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("revision", Required = Newtonsoft.Json.Required.Always)] - public int Revision { get; set; } = default!; - } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] @@ -9364,6 +9361,30 @@ public partial class CorpusFileConfig } + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class DataFile + { + [Newtonsoft.Json.JsonProperty("id", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Id { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("url", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Url { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? Name { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("format", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + [Newtonsoft.Json.JsonConverter(typeof(Newtonsoft.Json.Converters.StringEnumConverter))] + public FileFormat Format { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("revision", Required = Newtonsoft.Json.Required.Always)] + public int Revision { get; set; } = default!; + + } + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] public partial class TranslationEngine { diff --git a/src/Serval/src/Serval.DataFiles/Consumers/GetCorpusConsumer.cs b/src/Serval/src/Serval.DataFiles/Consumers/GetCorpusConsumer.cs index c369d528..dd6b806b 100644 --- a/src/Serval/src/Serval.DataFiles/Consumers/GetCorpusConsumer.cs +++ b/src/Serval/src/Serval.DataFiles/Consumers/GetCorpusConsumer.cs @@ -20,16 +20,11 @@ await context.RespondAsync( Name = corpus.Name, Language = corpus.Language, Files = corpus - .Files.Select(f => new CorpusFileResult + .Files.Select(f => new Shared.Models.CorpusFile { + Id = f.FileReference.Id, TextId = f.TextId!, - File = new DataFileResult - { - DataFileId = f.File.Id, - Filename = f.File.Filename, - Format = f.File.Format, - Name = f.File.Name - } + Format = f.FileReference.Format }) .ToList() } diff --git a/src/Serval/src/Serval.DataFiles/Contracts/CorpusFileDto.cs b/src/Serval/src/Serval.DataFiles/Contracts/CorpusFileDto.cs index d2d175be..596c9599 100644 --- a/src/Serval/src/Serval.DataFiles/Contracts/CorpusFileDto.cs +++ b/src/Serval/src/Serval.DataFiles/Contracts/CorpusFileDto.cs @@ -2,6 +2,6 @@ namespace Serval.DataFiles.Contracts; public record CorpusFileDto { - public required DataFileDto File { get; init; } + public required DataFileReferenceDto File { get; init; } public string? TextId { get; init; } } diff --git a/src/Serval/src/Serval.DataFiles/Contracts/DataFileReferenceDto.cs b/src/Serval/src/Serval.DataFiles/Contracts/DataFileReferenceDto.cs new file mode 100644 index 00000000..e996ddf3 --- /dev/null +++ b/src/Serval/src/Serval.DataFiles/Contracts/DataFileReferenceDto.cs @@ -0,0 +1,9 @@ +namespace Serval.DataFiles.Contracts; + +public record DataFileReferenceDto +{ + public required string Id { get; init; } + public required string Url { get; init; } + public string? Name { get; init; } + public required FileFormat Format { get; init; } +} diff --git a/src/Serval/src/Serval.DataFiles/Controllers/CorporaController.cs b/src/Serval/src/Serval.DataFiles/Controllers/CorporaController.cs index 29bf041e..1b4b307a 100644 --- a/src/Serval/src/Serval.DataFiles/Controllers/CorporaController.cs +++ b/src/Serval/src/Serval.DataFiles/Controllers/CorporaController.cs @@ -166,18 +166,18 @@ private async Task MapAsync(CorpusConfigDto corpusConfig, string id, Can }; } - private async Task> MapAsync( + private async Task> MapAsync( IReadOnlyList files, CancellationToken cancellationToken ) { - var dataFiles = new List(); + var dataFiles = new List(); foreach (CorpusFileConfigDto file in files) { DataFile? dataFile = await _dataFileService.GetAsync(file.FileId, cancellationToken); if (dataFile == null) throw new InvalidOperationException($"DataFile with id {file.FileId} does not exist."); - dataFiles.Add(new CorpusFile { File = dataFile, TextId = file.TextId }); + dataFiles.Add(new Models.CorpusFile { FileReference = Map(dataFile), TextId = file.TextId }); } return dataFiles; } @@ -195,20 +195,29 @@ private CorpusDto Map(Corpus source) }; } - private CorpusFileDto Map(CorpusFile source) + private CorpusFileDto Map(Models.CorpusFile source) { - return new CorpusFileDto { File = Map(source.File), TextId = source.TextId }; + return new CorpusFileDto { File = Map(source.FileReference), TextId = source.TextId }; } - private DataFileDto Map(DataFile source) + private DataFileReferenceDto Map(DataFileReference source) { - return new DataFileDto + return new DataFileReferenceDto { Id = source.Id, Url = _urlService.GetUrl(Endpoints.GetDataFile, new { id = source.Id }), Name = source.Name, Format = source.Format, - Revision = source.Revision + }; + } + + private static DataFileReference Map(DataFile source) + { + return new DataFileReference + { + Id = source.Id, + Format = source.Format, + Name = source.Name }; } } diff --git a/src/Serval/src/Serval.DataFiles/Models/CorpusFile.cs b/src/Serval/src/Serval.DataFiles/Models/CorpusFile.cs index a4311e39..a08a5578 100644 --- a/src/Serval/src/Serval.DataFiles/Models/CorpusFile.cs +++ b/src/Serval/src/Serval.DataFiles/Models/CorpusFile.cs @@ -2,6 +2,6 @@ namespace Serval.DataFiles.Models; public record CorpusFile { - public required DataFile File { get; init; } + public required DataFileReference FileReference { get; init; } public string? TextId { get; init; } } diff --git a/src/Serval/src/Serval.DataFiles/Models/DataFileReference.cs b/src/Serval/src/Serval.DataFiles/Models/DataFileReference.cs new file mode 100644 index 00000000..ec59f865 --- /dev/null +++ b/src/Serval/src/Serval.DataFiles/Models/DataFileReference.cs @@ -0,0 +1,8 @@ +namespace Serval.DataFiles.Models; + +public record DataFileReference +{ + public string Id { get; set; } = ""; + public required string Name { get; init; } + public required FileFormat Format { get; init; } +} diff --git a/src/Serval/src/Serval.DataFiles/Services/CorpusService.cs b/src/Serval/src/Serval.DataFiles/Services/CorpusService.cs index f5b8e4b6..a092f45b 100644 --- a/src/Serval/src/Serval.DataFiles/Services/CorpusService.cs +++ b/src/Serval/src/Serval.DataFiles/Services/CorpusService.cs @@ -12,7 +12,7 @@ public async Task GetAsync(string id, string owner, CancellationToken ca public async Task UpdateAsync( string id, - IReadOnlyList files, + IReadOnlyList files, CancellationToken cancellationToken = default ) { diff --git a/src/Serval/src/Serval.DataFiles/Services/ICorpusService.cs b/src/Serval/src/Serval.DataFiles/Services/ICorpusService.cs index a4f0e242..d5aa3645 100644 --- a/src/Serval/src/Serval.DataFiles/Services/ICorpusService.cs +++ b/src/Serval/src/Serval.DataFiles/Services/ICorpusService.cs @@ -6,6 +6,10 @@ public interface ICorpusService Task GetAsync(string id, CancellationToken cancellationToken = default); Task GetAsync(string id, string owner, CancellationToken cancellationToken = default); Task CreateAsync(Corpus corpus, CancellationToken cancellationToken = default); - Task UpdateAsync(string id, IReadOnlyList files, CancellationToken cancellationToken = default); + Task UpdateAsync( + string id, + IReadOnlyList files, + CancellationToken cancellationToken = default + ); Task DeleteAsync(string id, CancellationToken cancellationToken = default); } diff --git a/src/Serval/src/Serval.Shared/Contracts/CorpusResult.cs b/src/Serval/src/Serval.Shared/Contracts/CorpusResult.cs index 0c0f8380..1e2d5376 100644 --- a/src/Serval/src/Serval.Shared/Contracts/CorpusResult.cs +++ b/src/Serval/src/Serval.Shared/Contracts/CorpusResult.cs @@ -5,5 +5,5 @@ public record CorpusResult public required string CorpusId { get; init; } public required string Language { get; init; } public string? Name { get; init; } - public required IReadOnlyList Files { get; set; } + public required IReadOnlyList Files { get; set; } } diff --git a/src/Serval/src/Serval.Shared/Models/CorpusFile.cs b/src/Serval/src/Serval.Shared/Models/CorpusFile.cs new file mode 100644 index 00000000..33496227 --- /dev/null +++ b/src/Serval/src/Serval.Shared/Models/CorpusFile.cs @@ -0,0 +1,35 @@ +namespace Serval.Shared.Models; + +public record CorpusFile +{ + public required string Id { get; set; } + public required FileFormat Format { get; set; } + public required string TextId { get; set; } + + private string? _filename; + + public async Task PopulateFilenameAsync( + IRequestClient getDataFileClient, + string owner, + CancellationToken cancellationToken + ) + { + Response response = await getDataFileClient.GetResponse< + DataFileResult, + DataFileNotFound + >(new GetDataFile { DataFileId = Id, Owner = owner }, cancellationToken); + if (response.Is(out Response? result)) + { + _filename = result.Message.Filename; + } + throw new InvalidOperationException($"The data file {Id} cannot be found."); + } + + public string GetFilename() + { + return _filename + ?? throw new InvalidOperationException( + "The filename has not been populated. It is not stored in the database." + ); + } +} diff --git a/src/Serval/src/Serval.Shared/Models/MonolingualCorpus.cs b/src/Serval/src/Serval.Shared/Models/MonolingualCorpus.cs new file mode 100644 index 00000000..d2c9ab4f --- /dev/null +++ b/src/Serval/src/Serval.Shared/Models/MonolingualCorpus.cs @@ -0,0 +1,20 @@ +namespace Serval.Shared.Models; + +public record MonolingualCorpus +{ + public required string Id { get; set; } + public string? Name { get; set; } + public required string Language { get; set; } + public required IReadOnlyList Files { get; set; } + + public async Task PopulateFilenamesAsync( + IRequestClient getDataFileClient, + string owner, + CancellationToken cancellationToken + ) + { + await Task.WhenAll( + Files.Select(file => file.PopulateFilenameAsync(getDataFileClient, owner, cancellationToken)) + ); + } +} diff --git a/src/Serval/src/Serval.Shared/Serval.Shared.csproj b/src/Serval/src/Serval.Shared/Serval.Shared.csproj index f2607b7b..b9d5a90c 100644 --- a/src/Serval/src/Serval.Shared/Serval.Shared.csproj +++ b/src/Serval/src/Serval.Shared/Serval.Shared.csproj @@ -21,6 +21,7 @@ + diff --git a/src/Serval/src/Serval.Shared/Usings.cs b/src/Serval/src/Serval.Shared/Usings.cs index 3e84144f..901c11a6 100644 --- a/src/Serval/src/Serval.Shared/Usings.cs +++ b/src/Serval/src/Serval.Shared/Usings.cs @@ -3,6 +3,7 @@ global using System.Text.Json.Serialization; global using Grpc.Core; global using Grpc.Net.ClientFactory; +global using MassTransit; global using Microsoft.AspNetCore.Authorization; global using Microsoft.AspNetCore.Http; global using Microsoft.AspNetCore.Mvc; @@ -12,6 +13,7 @@ global using Microsoft.Extensions.Logging; global using Microsoft.Extensions.Options; global using Serval.Shared.Configuration; +global using Serval.Shared.Contracts; global using Serval.Shared.Models; global using Serval.Shared.Services; global using Serval.Shared.Utils; diff --git a/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs b/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs index 9b735a01..8fc5bac1 100644 --- a/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs +++ b/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs @@ -852,6 +852,7 @@ CancellationToken cancellationToken /// The corpus id or parallel corpus id /// The text id /// The source[s] of the data to populate the USFM file with. + /// The data file client /// /// The book in USFM format /// The specified book does not exist in the source or target corpus. @@ -878,11 +879,13 @@ public async Task GetPretranslatedUsfmAsync( [NotNull] string textId, [FromQuery(Name = "text-origin")] PretranslationUsfmTextOrigin? textOrigin, [FromQuery] PretranslationUsfmTemplate? template, + [FromServices] IRequestClient getDataFileClient, CancellationToken cancellationToken ) { Engine engine = await _engineService.GetAsync(id, cancellationToken); await AuthorizeAsync(engine); + await engine.PopulateFilenamesAsync(getDataFileClient, cancellationToken); if (!engine.Corpora.Any(c => c.Id == corpusId) && !engine.ParallelCorpora.Any(c => c.Id == corpusId)) return NotFound(); if (engine.ModelRevision == 0) @@ -1031,6 +1034,7 @@ CancellationToken cancellationToken /// /// The translation engine id /// The build config (see remarks) + /// The data file client /// /// The new build job /// The build configuration was invalid. @@ -1051,6 +1055,7 @@ CancellationToken cancellationToken public async Task> StartBuildAsync( [NotNull] string id, [FromBody] TranslationBuildConfigDto buildConfig, + [FromServices] IRequestClient getDataFileClient, CancellationToken cancellationToken ) { @@ -1058,6 +1063,7 @@ CancellationToken cancellationToken Engine engine = await _engineService.GetAsync(id, cancellationToken); await AuthorizeAsync(engine); + await engine.PopulateFilenamesAsync(getDataFileClient, cancellationToken); Build build = Map(engine, buildConfig, deploymentVersion); await _engineService.StartBuildAsync(build, cancellationToken); @@ -1220,7 +1226,7 @@ CancellationToken cancellationToken } private async Task MapAsync( - IRequestClient getDataFileClient, + IRequestClient getCorpusClient, string corpusId, TranslationParallelCorpusConfigDto source, CancellationToken cancellationToken @@ -1229,8 +1235,8 @@ CancellationToken cancellationToken return new ParallelCorpus { Id = corpusId, - SourceCorpora = await MapAsync(getDataFileClient, source.SourceCorpusIds, cancellationToken), - TargetCorpora = await MapAsync(getDataFileClient, source.TargetCorpusIds, cancellationToken) + SourceCorpora = await MapAsync(getCorpusClient, source.SourceCorpusIds, cancellationToken), + TargetCorpora = await MapAsync(getCorpusClient, source.TargetCorpusIds, cancellationToken) }; } @@ -1253,7 +1259,6 @@ CancellationToken cancellationToken new CorpusFile { Id = fileConfig.FileId, - Filename = result.Message.Filename, TextId = fileConfig.TextId ?? result.Message.Name, Format = result.Message.Format } @@ -1291,9 +1296,8 @@ CancellationToken cancellationToken Files = result .Message.Files.Select(f => new CorpusFile { - Id = f.File.DataFileId, - Filename = f.File.Filename, - Format = f.File.Format, + Id = f.Id, + Format = f.Format, TextId = f.TextId }) .ToList(), diff --git a/src/Serval/src/Serval.Translation/Models/Corpus.cs b/src/Serval/src/Serval.Translation/Models/Corpus.cs index e8e3a4d4..7019dcbc 100644 --- a/src/Serval/src/Serval.Translation/Models/Corpus.cs +++ b/src/Serval/src/Serval.Translation/Models/Corpus.cs @@ -8,4 +8,18 @@ public record Corpus public required string TargetLanguage { get; set; } public required IReadOnlyList SourceFiles { get; set; } public required IReadOnlyList TargetFiles { get; set; } + + public async Task PopulateFilenamesAsync( + IRequestClient getDataFileClient, + string owner, + CancellationToken cancellationToken + ) + { + await Task.WhenAll( + SourceFiles.Select(file => file.PopulateFilenameAsync(getDataFileClient, owner, cancellationToken)) + ); + await Task.WhenAll( + TargetFiles.Select(file => file.PopulateFilenameAsync(getDataFileClient, owner, cancellationToken)) + ); + } } diff --git a/src/Serval/src/Serval.Translation/Models/CorpusFile.cs b/src/Serval/src/Serval.Translation/Models/CorpusFile.cs deleted file mode 100644 index 2672ba56..00000000 --- a/src/Serval/src/Serval.Translation/Models/CorpusFile.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Serval.Translation.Models; - -public record CorpusFile -{ - public required string Id { get; set; } - public required string Filename { get; set; } - public required FileFormat Format { get; set; } - public required string TextId { get; set; } -} diff --git a/src/Serval/src/Serval.Translation/Models/Engine.cs b/src/Serval/src/Serval.Translation/Models/Engine.cs index b4d0f55b..f3f84946 100644 --- a/src/Serval/src/Serval.Translation/Models/Engine.cs +++ b/src/Serval/src/Serval.Translation/Models/Engine.cs @@ -16,4 +16,17 @@ public record Engine : IOwnedEntity public int ModelRevision { get; init; } public double Confidence { get; init; } public int CorpusSize { get; init; } + + public async Task PopulateFilenamesAsync( + IRequestClient getDataFileClient, + CancellationToken cancellationToken + ) + { + await Task.WhenAll( + Corpora.Select(corpus => corpus.PopulateFilenamesAsync(getDataFileClient, Owner, cancellationToken)) + ); + await Task.WhenAll( + ParallelCorpora.Select(corpus => corpus.PopulateFilenamesAsync(getDataFileClient, Owner, cancellationToken)) + ); + } } diff --git a/src/Serval/src/Serval.Translation/Models/MonolingualCorpus.cs b/src/Serval/src/Serval.Translation/Models/MonolingualCorpus.cs deleted file mode 100644 index 0762e878..00000000 --- a/src/Serval/src/Serval.Translation/Models/MonolingualCorpus.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Serval.Translation.Models; - -public record MonolingualCorpus -{ - public required string Id { get; set; } - public string? Name { get; set; } - public required string Language { get; set; } - public required IReadOnlyList Files { get; set; } -} diff --git a/src/Serval/src/Serval.Translation/Models/ParallelCorpus.cs b/src/Serval/src/Serval.Translation/Models/ParallelCorpus.cs index 0fd059c7..e3ffad9d 100644 --- a/src/Serval/src/Serval.Translation/Models/ParallelCorpus.cs +++ b/src/Serval/src/Serval.Translation/Models/ParallelCorpus.cs @@ -5,4 +5,15 @@ public record ParallelCorpus public required string Id { get; set; } public IReadOnlyList SourceCorpora { get; set; } = new List(); public IReadOnlyList TargetCorpora { get; set; } = new List(); + + public async Task PopulateFilenamesAsync( + IRequestClient getDataFileClient, + string owner, + CancellationToken cancellationToken + ) + { + await Task.WhenAll( + SourceCorpora.Select(corpus => corpus.PopulateFilenamesAsync(getDataFileClient, owner, cancellationToken)) + ); + } } diff --git a/src/Serval/src/Serval.Translation/Services/EngineService.cs b/src/Serval/src/Serval.Translation/Services/EngineService.cs index a8bb3a05..c3ef933c 100644 --- a/src/Serval/src/Serval.Translation/Services/EngineService.cs +++ b/src/Serval/src/Serval.Translation/Services/EngineService.cs @@ -385,11 +385,11 @@ public Task AddCorpusAsync(string engineId, Models.Corpus corpus, CancellationTo return Entities.UpdateAsync(engineId, u => u.Add(e => e.Corpora, corpus), cancellationToken: cancellationToken); } - public async Task UpdateCorpusAsync( + public async Task UpdateCorpusAsync( string engineId, string corpusId, - IReadOnlyList? sourceFiles, - IReadOnlyList? targetFiles, + IReadOnlyList? sourceFiles, + IReadOnlyList? targetFiles, CancellationToken cancellationToken = default ) { @@ -465,8 +465,8 @@ public Task AddParallelCorpusAsync( public async Task UpdateParallelCorpusAsync( string engineId, string parallelCorpusId, - IReadOnlyList? sourceCorpora, - IReadOnlyList? targetCorpora, + IReadOnlyList? sourceCorpora, + IReadOnlyList? targetCorpora, CancellationToken cancellationToken = default ) { @@ -790,7 +790,7 @@ bool pretranslateOnAllCorpora } private V1.MonolingualCorpus Map( - Models.MonolingualCorpus inputCorpus, + Shared.Models.MonolingualCorpus inputCorpus, ParallelCorpusFilter? trainingFilter, ParallelCorpusFilter? pretranslateFilter, string? referenceFileLocation, @@ -880,13 +880,13 @@ pretranslateFilter is not null return returnCorpus; } - private V1.CorpusFile Map(Models.CorpusFile source) + private V1.CorpusFile Map(Shared.Models.CorpusFile source) { return new V1.CorpusFile { TextId = source.TextId, Format = (V1.FileFormat)source.Format, - Location = Path.Combine(_dataFileOptions.CurrentValue.FilesDirectory, source.Filename) + Location = Path.Combine(_dataFileOptions.CurrentValue.FilesDirectory, source.GetFilename()) }; } } diff --git a/src/Serval/src/Serval.Translation/Services/PretranslationService.cs b/src/Serval/src/Serval.Translation/Services/PretranslationService.cs index 516e634e..4e0e6097 100644 --- a/src/Serval/src/Serval.Translation/Services/PretranslationService.cs +++ b/src/Serval/src/Serval.Translation/Services/PretranslationService.cs @@ -63,10 +63,10 @@ public async Task GetUsfmAsync( throw new InvalidOperationException("USFM format is not valid for non-Scripture corpora."); ParatextProjectSettings sourceSettings = _scriptureDataFileService.GetParatextProjectSettings( - sourceFile.Filename + sourceFile.GetFilename() ); ParatextProjectSettings targetSettings = _scriptureDataFileService.GetParatextProjectSettings( - targetFile.Filename + targetFile.GetFilename() ); IEnumerable<(IReadOnlyList Refs, string Translation)> pretranslations = ( @@ -90,7 +90,7 @@ await GetAllAsync(engineId, modelRevision, corpusId, textId, cancellationToken) ((IReadOnlyList)p.Refs.Select(r => r.ToRelaxed()).ToArray(), p.Translation) ); using Shared.Services.ZipParatextProjectTextUpdater updater = - _scriptureDataFileService.GetZipParatextProjectTextUpdater(targetFile.Filename); + _scriptureDataFileService.GetZipParatextProjectTextUpdater(targetFile.GetFilename()); string usfm = ""; switch (textOrigin) { @@ -139,7 +139,7 @@ await GetAllAsync(engineId, modelRevision, corpusId, textId, cancellationToken) if (template is PretranslationUsfmTemplate.Auto or PretranslationUsfmTemplate.Source) { using Shared.Services.ZipParatextProjectTextUpdater updater = - _scriptureDataFileService.GetZipParatextProjectTextUpdater(sourceFile.Filename); + _scriptureDataFileService.GetZipParatextProjectTextUpdater(sourceFile.GetFilename()); // Copy and update the source book if it exists switch (textOrigin) diff --git a/src/Serval/test/Serval.ApiServer.IntegrationTests/TranslationEngineTests.cs b/src/Serval/test/Serval.ApiServer.IntegrationTests/TranslationEngineTests.cs index d66b3557..5973751a 100644 --- a/src/Serval/test/Serval.ApiServer.IntegrationTests/TranslationEngineTests.cs +++ b/src/Serval/test/Serval.ApiServer.IntegrationTests/TranslationEngineTests.cs @@ -184,21 +184,21 @@ public async Task SetUp() Id = SOURCE_CORPUS_ID_1, Language = "en", Owner = "client1", - Files = [new() { File = srcFile, TextId = "all" }] + Files = [new() { FileReference = srcFile, TextId = "all" }] }; var srcCorpus2 = new DataFiles.Models.Corpus { Id = SOURCE_CORPUS_ID_2, Language = "en", Owner = "client1", - Files = [new() { File = srcFile, TextId = "all" }] + Files = [new() { FileReference = srcFile, TextId = "all" }] }; var trgCorpus = new DataFiles.Models.Corpus { Id = TARGET_CORPUS_ID, Language = "en", Owner = "client1", - Files = [new() { File = trgFile, TextId = "all" }] + Files = [new() { FileReference = trgFile, TextId = "all" }] }; await _env.Corpora.InsertAllAsync([srcCorpus, srcCorpus2, trgCorpus]); } diff --git a/src/Serval/test/Serval.DataFiles.Tests/Services/CorpusServiceTests.cs b/src/Serval/test/Serval.DataFiles.Tests/Services/CorpusServiceTests.cs index 22cdd14e..8c88699b 100644 --- a/src/Serval/test/Serval.DataFiles.Tests/Services/CorpusServiceTests.cs +++ b/src/Serval/test/Serval.DataFiles.Tests/Services/CorpusServiceTests.cs @@ -5,13 +5,11 @@ public class CorpusServiceTests { private const string CorpusId = "c00000000000000000000001"; - private static readonly DataFile DefaultDataFile = + private static readonly DataFileReference DefaultDataFile = new() { Id = "df0000000000000000000001", - Owner = "owner1", Name = "file1", - Filename = "file1.txt", Format = FileFormat.Text }; private static readonly Corpus DefaultCorpus = @@ -21,7 +19,7 @@ public class CorpusServiceTests Owner = "owner1", Name = "corpus1", Language = "en", - Files = new List() { new() { File = DefaultDataFile } } + Files = new List() { new() { FileReference = DefaultDataFile } } }; [Test]