From 58a364663c95c5fc148d21e35db27e72f559e650 Mon Sep 17 00:00:00 2001 From: Mike Bryant Date: Wed, 24 Nov 2021 15:50:42 +0000 Subject: [PATCH] Add a way of 'harvesting' a set of files via a list of URLs Since there was too much duplication in the different methods for CRUDing harvest configurations these have been combined into a single controller which dispatches to the appropriate service as necessary. On the frontend there is also a new way of resizing certain modal dialogs and some other UI tweaks. Partial fix for #1403 --- conf/evolutions/default/1.sql | 14 + ...220207_add_import_url_set_config_table.sql | 12 + .../harvesting/ResourceSyncHarvester.scala | 2 +- .../actors/harvesting/UrlSetHarvester.scala | 135 +++++++++ modules/admin/app/assets/css/datasets.scss | 82 +++++- .../app/assets/js/datasets/__mocks__/api.ts | 4 +- modules/admin/app/assets/js/datasets/api.ts | 68 ++--- .../js/datasets/components/_drag-handle.vue | 24 +- .../js/datasets/components/_editor-urlset.vue | 140 +++++++++ .../js/datasets/components/_editor-xquery.vue | 14 +- .../components/_form-http-basic-auth.vue | 63 ++++ .../datasets/components/_manager-dataset.vue | 29 +- ...er-rs.spec.ts => _manager-harvest.spec.ts} | 8 +- ...anager-oaipmh.vue => _manager-harvest.vue} | 65 +++-- .../js/datasets/components/_manager-rs.vue | 229 --------------- .../js/datasets/components/_modal-alert.vue | 2 +- .../components/_modal-dataset-config.vue | 1 + .../components/_modal-oaipmh-config.vue | 60 +--- .../datasets/components/_modal-rs-config.vue | 26 +- .../components/_modal-urlset-config.vue | 169 +++++++++++ .../js/datasets/components/_modal-window.vue | 59 +++- modules/admin/app/assets/js/datasets/types.ts | 15 +- .../controllers/datasets/HarvestConfigs.scala | 270 ++++++++++++++++++ .../controllers/datasets/ImportDatasets.scala | 17 +- .../controllers/datasets/OaiPmhConfigs.scala | 83 ------ .../datasets/ResourceSyncConfigs.scala | 95 ------ .../admin/app/models/BasicAuthConfig.scala | 24 ++ modules/admin/app/models/HarvestConfig.scala | 23 ++ modules/admin/app/models/ImportDataset.scala | 1 + modules/admin/app/models/OaiPmhConfig.scala | 35 +-- .../admin/app/models/ResourceSyncConfig.scala | 22 +- modules/admin/app/models/UrlSetConfig.scala | 37 +++ .../harvesting/HarvestConfigService.scala | 15 + .../harvesting/ResourceSyncClient.scala | 5 +- .../ResourceSyncConfigService.scala | 2 +- .../SqlResourceSyncConfigService.scala | 12 +- .../harvesting/SqlUrlSetConfigService.scala | 50 ++++ .../harvesting/UrlSetConfigService.scala | 16 ++ .../harvesting/WSResourceSyncClient.scala | 20 +- modules/admin/conf/datasets.routes | 18 +- .../controllers/base/CoreActionBuilders.scala | 1 + .../harvesting/HarvesterManagerSpec.scala | 48 +++- .../harvesting/OaiPmhHarvesterSpec.scala | 4 +- .../harvesting/UrlSetHarvesterSpec.scala | 62 ++++ .../admin/HarvestConfigsSpec.scala | 46 +++ test/resources/import-dataset-fixtures.sql | 8 +- .../harvesting/MockResourceSyncClient.scala | 2 +- .../harvesting/WSOaiPmhClientSpec.scala | 4 +- 48 files changed, 1468 insertions(+), 673 deletions(-) create mode 100644 etc/db_migrations/20220207_add_import_url_set_config_table.sql create mode 100644 modules/admin/app/actors/harvesting/UrlSetHarvester.scala create mode 100644 modules/admin/app/assets/js/datasets/components/_editor-urlset.vue create mode 100644 modules/admin/app/assets/js/datasets/components/_form-http-basic-auth.vue rename modules/admin/app/assets/js/datasets/components/{_manager-rs.spec.ts => _manager-harvest.spec.ts} (57%) rename modules/admin/app/assets/js/datasets/components/{_manager-oaipmh.vue => _manager-harvest.vue} (80%) delete mode 100644 modules/admin/app/assets/js/datasets/components/_manager-rs.vue create mode 100644 modules/admin/app/assets/js/datasets/components/_modal-urlset-config.vue create mode 100644 modules/admin/app/controllers/datasets/HarvestConfigs.scala delete mode 100644 modules/admin/app/controllers/datasets/OaiPmhConfigs.scala delete mode 100644 modules/admin/app/controllers/datasets/ResourceSyncConfigs.scala create mode 100644 modules/admin/app/models/BasicAuthConfig.scala create mode 100644 modules/admin/app/models/HarvestConfig.scala create mode 100644 modules/admin/app/models/UrlSetConfig.scala create mode 100644 modules/admin/app/services/harvesting/HarvestConfigService.scala create mode 100644 modules/admin/app/services/harvesting/SqlUrlSetConfigService.scala create mode 100644 modules/admin/app/services/harvesting/UrlSetConfigService.scala create mode 100644 test/actors/harvesting/UrlSetHarvesterSpec.scala create mode 100644 test/integration/admin/HarvestConfigsSpec.scala diff --git a/conf/evolutions/default/1.sql b/conf/evolutions/default/1.sql index b106197311..a1e119e31b 100644 --- a/conf/evolutions/default/1.sql +++ b/conf/evolutions/default/1.sql @@ -159,6 +159,19 @@ CREATE TABLE resourcesync_config ( ON DELETE CASCADE ); +CREATE TABLE import_url_set_config ( + repo_id VARCHAR(50) NOT NULL, + import_dataset_id VARCHAR(50) NOT NULL, + url_map JSONB NOT NULL, + created TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + comments TEXT, + PRIMARY KEY (repo_id, import_dataset_id), + CONSTRAINT import_url_set_config_repo_id_import_dataset_id + FOREIGN KEY (repo_id, import_dataset_id) + REFERENCES import_dataset (repo_id, id) + ON DELETE CASCADE +); + CREATE TABLE harvest_event ( id SERIAL PRIMARY KEY, repo_id VARCHAR(50) NOT NULL, @@ -357,6 +370,7 @@ DROP TABLE IF EXISTS import_config CASCADE; DROP TABLE IF EXISTS data_transformation CASCADE; DROP TABLE IF EXISTS transformation_config CASCADE; DROP TABLE IF EXISTS harvest_event CASCADE; +DROP TABLE IF EXISTS import_url_set_config CASCADE; DROP TABLE IF EXISTS resourcesync_config CASCADE; DROP TABLE IF EXISTS oaipmh_config CASCADE; DROP TABLE IF EXISTS import_dataset CASCADE; diff --git a/etc/db_migrations/20220207_add_import_url_set_config_table.sql b/etc/db_migrations/20220207_add_import_url_set_config_table.sql new file mode 100644 index 0000000000..d76a00b413 --- /dev/null +++ b/etc/db_migrations/20220207_add_import_url_set_config_table.sql @@ -0,0 +1,12 @@ +CREATE TABLE import_url_set_config ( + repo_id VARCHAR(50) NOT NULL, + import_dataset_id VARCHAR(50) NOT NULL, + url_map JSONB NOT NULL, + created TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + comments TEXT, + PRIMARY KEY (repo_id, import_dataset_id), + CONSTRAINT import_url_set_config_repo_id_import_dataset_id + FOREIGN KEY (repo_id, import_dataset_id) + REFERENCES import_dataset (repo_id, id) + ON DELETE CASCADE +); diff --git a/modules/admin/app/actors/harvesting/ResourceSyncHarvester.scala b/modules/admin/app/actors/harvesting/ResourceSyncHarvester.scala index fc3bbcdbe8..8d446a1749 100644 --- a/modules/admin/app/actors/harvesting/ResourceSyncHarvester.scala +++ b/modules/admin/app/actors/harvesting/ResourceSyncHarvester.scala @@ -114,7 +114,7 @@ case class ResourceSyncHarvester (client: ResourceSyncClient, storage: FileStora // Either the hash doesn't match or the file's not there yet // so upload it now... case _ => - val bytes = client.get(item) + val bytes = client.get(job.data.config, item) storage.putBytes( path, bytes, diff --git a/modules/admin/app/actors/harvesting/UrlSetHarvester.scala b/modules/admin/app/actors/harvesting/UrlSetHarvester.scala new file mode 100644 index 0000000000..0ab502fa2f --- /dev/null +++ b/modules/admin/app/actors/harvesting/UrlSetHarvester.scala @@ -0,0 +1,135 @@ +package actors.harvesting + +import actors.LongRunningJob.Cancel +import actors.harvesting.Harvester.HarvestJob +import akka.actor.Status.Failure +import akka.actor.{Actor, ActorLogging, ActorRef} +import akka.stream.scaladsl.Source +import akka.util.ByteString +import models.{BasicAuthConfig, UrlNameMap, UrlSetConfig, UserProfile} +import play.api.http.HeaderNames +import play.api.libs.ws.{WSAuthScheme, WSClient} +import services.storage.FileStorage + +import java.time.{Duration, LocalDateTime} +import scala.concurrent.Future.{successful => immediate} +import scala.concurrent.{ExecutionContext, Future} + + +object UrlSetHarvester { + + // Internal message we send ourselves + sealed trait UrlSetAction + private case class Fetch(urls: List[UrlNameMap], count: Int, fresh: Int) extends UrlSetAction + + /** + * A description of an URL set harvest task. + * + * @param config the endpoint configuration + * @param prefix the path prefix on which to save files, after + * which the item identifier will be appended + */ + case class UrlSetHarvesterData( + config: UrlSetConfig, + prefix: String, + ) + + /** + * A single harvest job with a unique ID. + */ + case class UrlSetHarvesterJob(repoId: String, datasetId: String, jobId: String, data: UrlSetHarvesterData) + extends HarvestJob +} + +case class UrlSetHarvester (client: WSClient, storage: FileStorage)( + implicit userOpt: Option[UserProfile], ec: ExecutionContext) extends Actor with ActorLogging { + import Harvester._ + import UrlSetHarvester._ + import akka.pattern.pipe + + override def receive: Receive = { + // Start the initial harvest + case job: UrlSetHarvesterJob => + val msgTo = sender() + context.become(running(job, msgTo, 0, 0, LocalDateTime.now())) + msgTo ! Starting + msgTo ! ToDo(job.data.config.urlMap.size) + self ! Fetch(job.data.config.urls.toList, 0, 0) + } + + + // The harvest is running + def running(job: UrlSetHarvesterJob, msgTo: ActorRef, done: Int, fresh: Int, start: LocalDateTime): Receive = { + // Harvest an individual item + case Fetch(item :: rest, count, fresh) => + log.debug(s"Calling become with new total: $count") + context.become(running(job, msgTo, count, fresh, start)) + + copyItem(job, item).map { case (name, isFresh) => + msgTo ! DoneFile(name) + Fetch(rest, count + 1, if (isFresh) fresh + 1 else fresh) + }.pipeTo(self) + + // Finished harvesting this resource list + case Fetch(Nil, done, fresh) => + msgTo ! Completed(done, fresh, time(start)) + + // Cancel harvest + case Cancel => + msgTo ! Cancelled(done, fresh, time(start)) + + case Failure(e) => + msgTo ! e + + case m => + log.error(s"Unexpected message: $m: ${m.getClass}") + } + + private def copyItem(job: UrlSetHarvesterJob, item: UrlNameMap): Future[(String, Boolean)] = { + // Strip the hostname from the file URL but use the + // rest of the path + val name = item.name + val path = job.data.prefix + name + + val req = job.data.config.auth.fold(client.url(item.url)) { case BasicAuthConfig(username, password) => + client.url(item.url).withAuth(username, password, WSAuthScheme.BASIC) + } + + req.head().flatMap { headReq => + val etag: Option[String] = headReq.headerValues(HeaderNames.ETAG).headOption + val ct: Option[String] = headReq.headerValues(HeaderNames.CONTENT_TYPE).headOption + + // file metadata + val meta = Map( + "source" -> "download", + "download-endpoint" -> item.url, + "download-job-id" -> job.jobId, + ) ++ etag.map(tag => "hash" -> tag) + + log.debug(s"Item: $meta") + + storage.info(path).flatMap { + + // If it exists and matches we've got nowt to do.. + case Some((_, userMeta)) if userMeta.contains("hash") && userMeta.get("hash") == etag => + immediate(("~ " + name, false)) + + // Either the hash doesn't match or the file's not there yet + // so upload it now... + case _ => + val bytes: Future[Source[ByteString, _]] = req.get().map(r => r.bodyAsSource) + bytes.flatMap { src => + storage.putBytes( + path, + src, + ct, + meta = meta + ).map { _ => ("+ " + name, true) } + } + } + } + } + + private def time(from: LocalDateTime): Long = + Duration.between(from, LocalDateTime.now()).toMillis / 1000 +} diff --git a/modules/admin/app/assets/css/datasets.scss b/modules/admin/app/assets/css/datasets.scss index a1fc60d6b0..660ceefdc1 100644 --- a/modules/admin/app/assets/css/datasets.scss +++ b/modules/admin/app/assets/css/datasets.scss @@ -180,6 +180,41 @@ $active-table-row: #e7f1ff; background-color: rgba(0, 0, 0, 0.1); } +.modal-content.resizable { + min-height: 30rem; +} + +.modal-content.resizable > .modal-body { + display: flex; + flex-direction: column; + flex: 1; +} + +.modal-resize-handle { + position: absolute; + right: 0; + bottom: 0; + display: inline; + width: 1rem; + height: 1rem; + cursor: nwse-resize; +} + +.modal-resize-handle:after { + content:''; + display:block; + border-left: 5px solid transparent; + border-right: 5px solid transparent; + border-bottom: 5px solid $ehri-border-gray; + width: 2px; + height: 5px; + position: absolute; + pointer-events: none; + right: -1px; + bottom: 1px; + transform: rotate(135deg); +} + #stage-tabs, #dataset-manager-tabs { @@ -531,18 +566,18 @@ $active-table-row: #e7f1ff; border: 1px solid $ehri-border-gray; } -.xquery-editor-data { +.tabular-editor-data { @extend %expanding-column; @extend %overflow-contents; overflow-x: unset; } -.xquery-editor-toolbar { +.tabular-editor-toolbar { display: flex; justify-content: right; } -.xquery-editor-toolbar-info { +.tabular-editor-toolbar-info { align-self: center; margin-left: auto; font-size: $font-size-xs; @@ -550,7 +585,7 @@ $active-table-row: #e7f1ff; padding: $margin-xs; } -.xquery-editor-header { +.tabular-editor-header { position: sticky; top: 0; background-color: $white; @@ -560,18 +595,43 @@ $active-table-row: #e7f1ff; } } -.xquery-editor-header, -.xquery-editor-mappings { +.tabular-editor-header, +.tabular-editor-mappings +{ font-size: $font-size-xs; - display: grid; - grid-template-columns: 1fr 1fr 1fr 1fr; - .selected { background-color: lighten($blue, 60%); } } +.xquery-editor .tabular-editor-header, +.xquery-editor .tabular-editor-mappings { + display: grid; + grid-template-columns: repeat(4, 1fr); +} + + +.urlset-editor { + display: flex; + flex-direction: column; + flex: 1; +} + +.urlset-editor-input { + display: flex; + flex: 1; + background-color: $gray-100; + margin-bottom: $margin-sm; +} + + +.urlset-editor .tabular-editor-header, +.urlset-editor .tabular-editor-mappings { + display: grid; + grid-template-columns: 2fr 1fr; +} + .xslt-editor { position: relative; @extend %expanding-column; @@ -883,6 +943,10 @@ $active-table-row: #e7f1ff; padding: $margin-md; } +.resizable .options-form { + flex: 1; +} + .options-form .small.form-text { color: $text-muted; } diff --git a/modules/admin/app/assets/js/datasets/__mocks__/api.ts b/modules/admin/app/assets/js/datasets/__mocks__/api.ts index 0d4efcf67e..5560ae0bab 100644 --- a/modules/admin/app/assets/js/datasets/__mocks__/api.ts +++ b/modules/admin/app/assets/js/datasets/__mocks__/api.ts @@ -1,4 +1,4 @@ -import {ResourceSyncConfig, FileList} from "../types"; +import {FileList, HarvestConfig} from "../types"; export class DatasetManagerApi { constructor(service: object, repoId: string) { @@ -26,7 +26,7 @@ export class DatasetManagerApi { ) } - getSyncConfig(ds: string): Promise { + getHarvestConfig(ds: string): Promise { return Promise.resolve(null); } } diff --git a/modules/admin/app/assets/js/datasets/api.ts b/modules/admin/app/assets/js/datasets/api.ts index 0b1d8ef932..87e2780a26 100644 --- a/modules/admin/app/assets/js/datasets/api.ts +++ b/modules/admin/app/assets/js/datasets/api.ts @@ -1,16 +1,26 @@ - import axios from "axios"; import {apiCall} from "./common"; import { - Cleanup, CleanupSummary, - ConvertConfig, Coreference, - DataTransformation, DataTransformationInfo, FileInfo, + Cleanup, + CleanupSummary, + ConvertConfig, + Coreference, + DataTransformation, + DataTransformationInfo, + FileInfo, FileToUpload, - ImportConfig, ImportDataset, ImportDatasetInfo, ImportLog, ImportLogSummary, + HarvestConfig, + ImportConfig, + ImportDataset, + ImportDatasetInfo, + ImportLog, + ImportLogSummary, JobMonitor, - OaiPmhConfig, RepositoryDatasets, - ResourceSyncConfig, Snapshot, SnapshotInfo, ValidationResult + RepositoryDatasets, + Snapshot, + SnapshotInfo, + ValidationResult } from "./types"; @@ -106,48 +116,28 @@ export class DatasetManagerApi { }); } - sync(ds: string, config: ResourceSyncConfig): Promise { - return apiCall(this.service.ResourceSyncConfigs.sync(this.repoId, ds), config); - } - - getSyncConfig(ds: string): Promise { - return apiCall(this.service.ResourceSyncConfigs.get(this.repoId, ds)); - } - - saveSyncConfig(ds: string, config: ResourceSyncConfig): Promise { - return apiCall(this.service.ResourceSyncConfigs.save(this.repoId, ds), config); - } - - deleteSyncConfig(ds: string): Promise { - return apiCall(this.service.ResourceSyncConfigs.delete(this.repoId, ds)); - } - - testSyncConfig(ds: string, config: ResourceSyncConfig): Promise<{ok: true}> { - return apiCall(this.service.ResourceSyncConfigs.test(this.repoId, ds), config); - } - - cleanSyncConfig(ds: string, config: ResourceSyncConfig): Promise { - return apiCall(this.service.ResourceSyncConfigs.clean(this.repoId, ds), config); + harvest(ds: string, config: HarvestConfig, fromLast?: boolean): Promise { + return apiCall(this.service.HarvestConfigs.harvest(this.repoId, ds, fromLast), config); } - getOaiPmhConfig(ds: string): Promise { - return apiCall(this.service.OaiPmhConfigs.get(this.repoId, ds)); + getHarvestConfig(ds: string): Promise { + return apiCall(this.service.HarvestConfigs.get(this.repoId, ds)); } - saveOaiPmhConfig(ds: string, config: OaiPmhConfig): Promise { - return apiCall(this.service.OaiPmhConfigs.save(this.repoId, ds), config); + saveHarvestConfig(ds: string, config: HarvestConfig): Promise { + return apiCall(this.service.HarvestConfigs.save(this.repoId, ds), config); } - deleteOaiPmhConfig(ds: string): Promise { - return apiCall(this.service.OaiPmhConfigs.delete(this.repoId, ds)); + deleteHarvestConfig(ds: string): Promise { + return apiCall(this.service.HarvestConfigs.delete(this.repoId, ds)); } - testOaiPmhConfig(ds: string, config: OaiPmhConfig): Promise { - return apiCall(this.service.OaiPmhConfigs.test(this.repoId, ds), config); + testHarvestConfig(ds: string, config: HarvestConfig): Promise<{ok: true}> { + return apiCall(this.service.HarvestConfigs.test(this.repoId, ds), config); } - harvest(ds: string, config: OaiPmhConfig, fromLast: boolean): Promise { - return apiCall(this.service.OaiPmhConfigs.harvest(this.repoId, ds, fromLast), config); + cleanHarvestConfig(ds: string, config: HarvestConfig): Promise { + return apiCall(this.service.HarvestConfigs.clean(this.repoId, ds), config); } convert(ds: string, key: string|null, config: ConvertConfig): Promise { diff --git a/modules/admin/app/assets/js/datasets/components/_drag-handle.vue b/modules/admin/app/assets/js/datasets/components/_drag-handle.vue index 4df187b4ec..cdd79a5cfb 100644 --- a/modules/admin/app/assets/js/datasets/components/_drag-handle.vue +++ b/modules/admin/app/assets/js/datasets/components/_drag-handle.vue @@ -13,7 +13,8 @@ export default { }, methods: { - move: function (evt) { + move: function (evt: MouseEvent) { + evt.preventDefault(); // Calculate the height of the topmost panel in percent. let container = this.container(), p2 = this.p2(); @@ -29,7 +30,8 @@ export default { let perc = 100 - percentHeight; p2.style.flexBasis = perc + "%"; }, - startDrag: function (evt) { + startDrag: function (evt: MouseEvent) { + evt.preventDefault(); // Calculate the height of the topmost panel in percent. let container = this.container(), p2 = this.p2(); @@ -40,13 +42,15 @@ export default { container.addEventListener("mousemove", this.move); container.style.userSelect = "none"; container.style.cursor = "ns-resize"; - window.addEventListener("mouseup", () => { - console.debug("Stop resize"); - this.offset = 0; - this.$emit("resize", p2.clientHeight); - container.style.userSelect = us; - container.style.cursor = cursor; - container.removeEventListener("mousemove", this.move); + window.addEventListener("mouseup", (evt: MouseEvent) => { + if (evt.button === 0) { + console.debug("Stop resize"); + this.offset = 0; + this.$emit("resize", p2.clientHeight); + container.style.userSelect = us; + container.style.cursor = cursor; + container.removeEventListener("mousemove", this.move); + } }, {once: true}); }, }, @@ -54,6 +58,6 @@ export default { diff --git a/modules/admin/app/assets/js/datasets/components/_editor-urlset.vue b/modules/admin/app/assets/js/datasets/components/_editor-urlset.vue new file mode 100644 index 0000000000..c5bd170b76 --- /dev/null +++ b/modules/admin/app/assets/js/datasets/components/_editor-urlset.vue @@ -0,0 +1,140 @@ + + + + diff --git a/modules/admin/app/assets/js/datasets/components/_editor-xquery.vue b/modules/admin/app/assets/js/datasets/components/_editor-xquery.vue index 6620258d3b..4533b09750 100644 --- a/modules/admin/app/assets/js/datasets/components/_editor-xquery.vue +++ b/modules/admin/app/assets/js/datasets/components/_editor-xquery.vue @@ -25,7 +25,7 @@ export default { return Vue.nextTick(); }, focus: function(row, col): void { - let elem = this.$refs[_padStart(row, 4, 0) + '-' + col]; + let elem = this.$refs[_padStart(row, 4, '0') + '-' + col]; if (elem && elem[0]) { elem[0].focus(); } @@ -103,15 +103,15 @@ export default {