Skip to content

Commit

Permalink
Support POST requests for URL set harvesting
Browse files Browse the repository at this point in the history
Fixes #1492
  • Loading branch information
mikesname committed Jan 29, 2024
1 parent 8670fec commit 6c6286b
Show file tree
Hide file tree
Showing 14 changed files with 106 additions and 28 deletions.
1 change: 1 addition & 0 deletions conf/evolutions/default/1.sql
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ CREATE TABLE import_url_set_config (
repo_id VARCHAR(50) NOT NULL,
import_dataset_id VARCHAR(50) NOT NULL,
url_map JSONB NOT NULL,
method VARCHAR(10) NOT NULL DEFAULT 'GET',
headers JSONB,
created TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
comments TEXT,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ALTER TABLE import_url_set_config ADD COLUMN method VARCHAR(10) NOT NULL DEFAULT 'GET';

7 changes: 4 additions & 3 deletions modules/admin/app/actors/harvesting/UrlSetHarvester.scala
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,9 @@ case class UrlSetHarvester (client: WSClient, storage: FileStorage)(
val name = item.name
val path = job.data.prefix + name

val withHeaders = job.data.config.headers.fold(client.url(item.url)) { headers =>
client.url(item.url).withHttpHeaders(headers: _*)
val withMethod = client.url(item.url).withMethod(job.data.config.method)
val withHeaders = job.data.config.headers.fold(withMethod) { headers =>
withMethod.withHttpHeaders(headers: _*)
}
val req = job.data.config.auth.fold(withHeaders) { case BasicAuthConfig(username, password) =>
withHeaders.withAuth(username, password, WSAuthScheme.BASIC)
Expand Down Expand Up @@ -120,7 +121,7 @@ case class UrlSetHarvester (client: WSClient, storage: FileStorage)(
// Either the hash doesn't match or the file's not there yet
// so upload it now...
case _ =>
val bytes: Future[Source[ByteString, _]] = req.get().map(r => r.bodyAsSource)
val bytes: Future[Source[ByteString, _]] = req.execute().map(r => r.bodyAsSource)
bytes.flatMap { src =>
storage.putBytes(
path,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@ export default {
</script>

<template>
<div class="http-header-params">
<div class="http-headers">
<div class="form-group">
<div class="form-check">
<input type="checkbox" class="form-check-input" id="opt-header" v-model="show"/>
<label class="form-check-label" for="opt-header">
HTTP Header
<input type="checkbox" class="form-check-input" id="opt-http-headers" v-model="show"/>
<label class="form-check-label" for="opt-http-headers">
HTTP Headers
</label>
</div>
</div>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
<script lang="ts">
export default {
props: {
modelValue: String,
},
data: function () {
return {
str: this.modelValue ? this.modelValue : "GET",
}
},
methods: {
update: function () {
this.$emit("update:modelValue", this.str);
},
},
watch: {
str: function () {
this.update();
}
}
}
</script>

<template>
<div class="http-method">
<fieldset>
<div class="form-group">
<label class="form-label" for="http-method">
HTTP Method
</label>
<select id="http-method" class="form-control" v-model="str">
<option value="GET" selected>GET</option>
<option value="POST">POST</option>
</select>
</div>
</fieldset>
</div>
</template>
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@ import ModalAlert from './_modal-alert';
import {DatasetManagerApi} from '../api';
import EditorUrlset from "./_editor-urlset.vue";
import FormHttpBasicAuth from "./_form-http-basic-auth";
import FormHttpMethod from "./_form-http-method";
import FormHttpHeaders from "./_form-http-headers";
import {decodeTsv, encodeTsv} from "../common";
export default {
components: {EditorUrlset, ModalAlert, ModalWindow, FormHttpBasicAuth, FormHttpHeaders},
components: {EditorUrlset, ModalAlert, ModalWindow, FormHttpBasicAuth, FormHttpHeaders, FormHttpMethod},
props: {
waiting: Boolean,
datasetId: String,
Expand All @@ -21,6 +22,7 @@ export default {
return {
urlMap: this.opts ? this.opts.urlMap : null,
filter: this.opts ? this.opts.filter : null,
method: this.opts ? this.opts.method : null,
auth: this.opts ? this.opts.auth : null,
headers: this.opts ? this.opts.headers : null,
tested: null,
Expand All @@ -37,6 +39,7 @@ export default {
isValidConfig: function () {
return this.urlMap !== null
&& this.urlMap.length > 0
&& (!this.method || this.method === "GET" || this.method === "POST")
&& (!this.auth || (this.auth.username !== "" && this.auth.password !== ""))
&& (!this.headers || (this.headers.length > 0));
},
Expand All @@ -59,6 +62,7 @@ export default {
try {
let data = await this.api.saveHarvestConfig(this.datasetId, {
urlMap: this.urlMap,
method: this.method,
auth: null,
headers: this.headers
});
Expand All @@ -75,7 +79,12 @@ export default {
this.testing = true;
this.error = null;
try {
await this.api.testHarvestConfig(this.datasetId, {urlMap: this.urlMap, auth: this.auth, headers: this.headers});
await this.api.testHarvestConfig(this.datasetId, {
urlMap: this.urlMap,
method: this.method,
auth: this.auth,
headers: this.headers
});
this.tested = true;
} catch (e) {
this.tested = false;
Expand All @@ -90,7 +99,11 @@ export default {
cleanEndpoint: async function () {
this.cleaning = true;
try {
this.orphanCheck = await this.api.cleanHarvestConfig(this.datasetId, {urlMap: this.urlMap, auth: null})
this.orphanCheck = await this.api.cleanHarvestConfig(this.datasetId, {
urlMap: this.urlMap,
method: this.method,
auth: null
})
} catch (e) {
this.error = e.message;
} finally {
Expand Down Expand Up @@ -184,6 +197,7 @@ export default {
<editor-urlset v-model.lazy="urlMapText"/>
</div>
<div v-else class="options-form">
<form-http-method v-model="method"/>
<form-http-basic-auth v-model="auth"/>
<form-http-headers v-model="headers"/>
</div>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ case class HarvestConfigs @Inject()(
} else {
// Check all rows response to a HEAD request with the right info...
def req(url: String): Future[Option[(String, String)]] = try {
authReq(url, config.auth, config.headers).head().map { r =>
authReq(url, auth = config.auth, headers = config.headers).head().map { r =>
checkRemoteFile(r).map(err => url -> err)
}
} catch {
Expand Down
5 changes: 4 additions & 1 deletion modules/admin/app/models/UrlSetConfig.scala
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
package models

import play.api.libs.json.Reads
import akka.http.scaladsl.model.HttpMethods

case class UrlNameMap(url: String, name: String)

case class UrlSetConfig(
urlMap: Seq[(String, String)],
method: String = HttpMethods.GET.value,
auth: Option[BasicAuthConfig] = None,
headers: Option[Seq[(String, String)]] = None,
) extends HarvestConfig {
Expand All @@ -24,6 +25,7 @@ case class UrlSetConfig(

object UrlSetConfig {
val URLS = "urlMap"
val METHOD = "method"
val AUTH = "auth"
val HEADERS = "headers"

Expand All @@ -34,6 +36,7 @@ object UrlSetConfig {
(__ \ URLS).read[Seq[(String, String)]](Reads.seq(Reads.Tuple2R[String, String](
Reads.filter(JsonValidationError("errors.invalidUrl"))(forms.isValidUrl),
Reads.pattern("^[\\w.\\-_]+$".r, "harvesting.error.invalidFileName")))) and
(__ \ METHOD).readNullable[String].map(_.getOrElse(HttpMethods.GET.value)) and
(__ \ AUTH).readNullable[BasicAuthConfig] and
(__ \ HEADERS).readNullable[Seq[(String, String)]]
) (UrlSetConfig.apply _)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@ case class SqlUrlSetConfigService @Inject()(db: Database, actorSystem: ActorSyst

private implicit val parser: RowParser[UrlSetConfig] = {
SqlParser.get("url_map")(fromJson[Seq[(String, String)]]) ~
SqlParser.str("method") ~
SqlParser.get("headers")(fromJson[Seq[(String, String)]]).?
}.map { case m ~ h => UrlSetConfig(m, headers = h)}
}.map { case um ~ m ~ h => UrlSetConfig(um, method = m, headers = h)}

override def get(id: String, ds: String): Future[Option[UrlSetConfig]] = Future {
db.withConnection { implicit conn =>
SQL"""SELECT url_map, headers
SQL"""SELECT url_map, method, headers
FROM import_url_set_config
WHERE repo_id = $id
AND import_dataset_id = $ds"""
Expand All @@ -42,17 +43,19 @@ case class SqlUrlSetConfigService @Inject()(db: Database, actorSystem: ActorSyst
override def save(id: String, ds: String, data: UrlSetConfig): Future[UrlSetConfig] = Future {
db.withTransaction { implicit conn =>
SQL"""INSERT INTO import_url_set_config
(repo_id, import_dataset_id, url_map, headers)
(repo_id, import_dataset_id, url_map, method, headers)
VALUES (
$id,
$ds,
${asJson(data.urlMap)},
${data.method},
${data.headers.map(kv => Json.toJson(kv))}
) ON CONFLICT (repo_id, import_dataset_id) DO UPDATE
SET
url_map = ${asJson(data.urlMap)},
method = ${data.method},
headers = ${data.headers.map(kv => Json.toJson(kv))}
RETURNING url_map, headers
RETURNING url_map, method, headers
""".executeInsert(parser.single)
}
}
Expand Down
2 changes: 1 addition & 1 deletion test/actors/cleanup/CleanupRunnerManagerSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import actors.LongRunningJob
import actors.cleanup.CleanupRunner.CleanupJob
import akka.actor.{ActorContext, ActorRef, Props}
import com.google.inject.name.Names
import controllers.datasets.{CleanupConfirmation, LongRunningJobs}
import controllers.datasets.CleanupConfirmation
import helpers.IntegrationTestRunner
import mockdata.adminUserProfile
import models._
Expand Down
2 changes: 1 addition & 1 deletion test/actors/harvesting/HarvesterManagerSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class HarvesterManagerSpec extends IntegrationTestRunner {
val urls = itemIds.map(id => s"${serviceConfig.baseUrl}/classes/DocumentaryUnit/$id/ead" -> s"$id.xml")
UrlSetHarvesterJob("r1", datasetId, jobId, UrlSetHarvesterData(
// the URLs we're harvesting and the server auth
config = UrlSetConfig(urls, serviceConfig.credentials.map { case (u, pw) => BasicAuthConfig(u, pw)}),
config = UrlSetConfig(urls, auth = serviceConfig.credentials.map { case (u, pw) => BasicAuthConfig(u, pw)}),
prefix = "urlset/r1/"
))
}
Expand Down
23 changes: 22 additions & 1 deletion test/actors/harvesting/UrlSetHarvesterSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,17 @@ class UrlSetHarvesterSpec extends IntegrationTestRunner {
val urls = itemIds.map(id => s"${serviceConfig.baseUrl}/classes/DocumentaryUnit/$id/ead" -> s"$id.xml")
UrlSetHarvesterJob("r1", datasetId, jobId, UrlSetHarvesterData(
// the URLs we're harvesting and the server auth
config = UrlSetConfig(urls, serviceConfig.credentials.map { case (u, pw) => BasicAuthConfig(u, pw)}),
config = UrlSetConfig(urls, auth = serviceConfig.credentials.map { case (u, pw) => BasicAuthConfig(u, pw) }),
prefix = "urlset/r1/"
))
}

private def postJob(implicit app: Application) = {
// harvest the OAI-PMH endpoint as a test, since it supports POST
val serviceConfig = ServiceConfig("ehridata", config)
val urls = Seq(s"${serviceConfig.baseUrl}/oaipmh" -> s"test.xml")
UrlSetHarvesterJob("r1", datasetId, jobId, UrlSetHarvesterData(
config = UrlSetConfig(urls, method = "POST", auth = serviceConfig.credentials.map { case (u, pw) => BasicAuthConfig(u, pw)}),
prefix = "urlset/r1/"
))
}
Expand Down Expand Up @@ -58,5 +68,16 @@ class UrlSetHarvesterSpec extends IntegrationTestRunner {
runner ! Cancel
expectMsgClass(classOf[Cancelled])
}

"support POST requests to endpoints" in new ITestAppWithAkka {
val runner = system.actorOf(Props(UrlSetHarvester(client, storage)))

runner ! postJob

expectMsg(Starting)
expectMsg(ToDo(1))
expectMsg(DoneFile("+ test.xml"))
expectMsgClass(classOf[Completed])
}
}
}
6 changes: 0 additions & 6 deletions test/integration/admin/HarvestConfigsSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,6 @@ class HarvestConfigsSpec extends IntegrationTestRunner with ResourceUtils {
}


// Mock user who belongs to admin
private val userProfile = UserProfile(
data = UserProfileF(id = Some(privilegedUser.id), identifier = "test", name = "test user"),
groups = List(Group(GroupF(id = Some("admin"), identifier = "admin", name = "Administrators")))
)

"HarvestConfigs API" should {
"save oaipmh configs" in new DBTestApp("import-dataset-fixtures.sql") {
val c = OaiPmhConfig("https://foo.bar/baz", "oai_ead", Some("test"), from = Some(Instant.now().minusSeconds(3600)))
Expand Down
6 changes: 3 additions & 3 deletions test/resources/import-url-set-config-fixtures.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ VALUES
('r1', '2', '2', 'urlset', 'test 2'),
('r1', '3', '3', 'urlset', 'test 3');

INSERT INTO import_url_set_config
INSERT INTO import_url_set_config (repo_id, import_dataset_id, url_map, method, headers, created, comments)
VALUES
('r1', '1', '[["http://example.com", "test"]]'::jsonb, NULL, '2020-06-12 10:00:00', NULL),
('r1', '2', '[["http://example.com", "test"]]'::jsonb, '[["Accept", "text/xml"]]'::jsonb, '2020-06-12 10:00:00', NULL);
('r1', '1', '[["http://example.com", "test"]]'::jsonb, 'GET', NULL, '2020-06-12 10:00:00', NULL),
('r1', '2', '[["http://example.com", "test"]]'::jsonb, 'GET', '[["Accept", "text/xml"]]'::jsonb, '2020-06-12 10:00:00', NULL);

0 comments on commit 6c6286b

Please sign in to comment.