Skip to content

Commit

Permalink
allow tar upload (#1836)
Browse files Browse the repository at this point in the history
* allow tar upload

* fix tests

* remove tmp file location

* accept HTML files directly

* remove unused function and fix undefined var

* POST methods are now available in the API

* allow file upload from the UI
  • Loading branch information
deniak authored Jun 5, 2024
1 parent 38183cd commit 1ad9fc5
Show file tree
Hide file tree
Showing 10 changed files with 564 additions and 227 deletions.
3 changes: 3 additions & 0 deletions .cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"FPLC",
"FPWD",
"FPWG",
"fileupload",
"hellip",
"hilite",
"historyuri",
Expand Down Expand Up @@ -84,7 +85,9 @@
"uarr",
"Unmocked",
"unneutral",
"valfile",
"validateoptions",
"valuri",
"vcard",
"WHATWG",
"wcag",
Expand Down
30 changes: 21 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -228,17 +228,17 @@ Use either `url` or `file` to pass along the document (neither `source` nor `doc

Note: If you want to use the public W3C instance of Specberus, you can replace `<host>` with `https://www.w3.org/pubrules`.

There are three `GET` methods available.
The different endpoints are described below.

### `version`
### `version` (GET)

Returns the version string, eg `1.5.3`.

### `metadata`
### `metadata` (GET and POST)

Extract all known metadata from a document; see [below](#return-values) for information about the return value.

### `validate`
### `validate` (GET and POST)

Check the document ([syntax](#validateoptions)).
Many of [the options understood by the JS method `validate`](#validateoptions) are accepted.
Expand All @@ -249,21 +249,33 @@ The special profile `auto` is also available.

#### 1. Get API version of Pubrules

`https://www.w3.org/pubrules/api/version`

e.g. https://www.w3.org/pubrules/api/version
`curl https://www.w3.org/pubrules/api/version`

#### 2. Get metadata of one document.

`https://www.w3.org/pubrules/api/metadata?url=https://example.com/doc.html`
```sh
# GET
curl "https://www.w3.org/pubrules/api/metadata?url=https://example.com/doc.html"

# POST
curl "https://www.w3.org/pubrules/api/metadata" -F "file=@/tmp/foo.html"
```

Metadata is a bunch of data extracted from the document. It includes the type (profile) of the document, publish date, editors' names, Patent Policy version the document is under, etc...

e.g. https://www.w3.org/pubrules/api/metadata?url=https://www.w3.org/TR/2021/WD-i18n-glossary-20210708/

#### 3. Validate the document using profile: auto

`https://www.w3.org/pubrules/api/validate?url=https://example.com/doc.html&profile=auto`
```sh
# GET
curl "https://www.w3.org/pubrules/api/validate?url=https://example.com/doc.html&profile=auto"

# POST
curl "https://www.w3.org/pubrules/api/validate" -F "file=@/tmp/foo.html" -F "profile=auto"
```

Note: The POST method will skip some checks requiring the document to be staged online such as checking if [assets in the same folder](https://github.com/w3c/specberus/blob/main/lib/rules/links/linkchecker.js).

`auto` profile is the easiest way to validate a document. The validation relies on the automatically extracted data.

Expand Down
119 changes: 80 additions & 39 deletions app.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@
import compression from 'compression';
import cors from 'cors';
import express from 'express';
import fileUpload from 'express-fileupload';
import { writeFile } from 'fs';
import http from 'http';
import insafe from 'insafe';
import morgan from 'morgan';
import { Server } from 'socket.io';
import tmp from 'tmp';
import * as api from './lib/api.js';
import * as l10n from './lib/l10n.js';
import { Sink } from './lib/sink.js';
Expand All @@ -34,6 +37,13 @@ const io = new Server(server);
app.use(morgan('combined'));
app.use(compression());
app.use('/badterms.json', cors());
app.use(
fileUpload({
createParentPath: true,
useTempFiles: true,
tempFileDir: '/tmp/',
})
);

app.use(express.static('public'));
api.setUp(app);
Expand All @@ -47,8 +57,10 @@ server.listen(process.argv[2] || process.env.PORT || DEFAULT_PORT);
io.on('connection', socket => {
socket.emit('handshake', { version });
socket.on('extractMetadata', data => {
if (!data.url)
return socket.emit('exception', { message: 'URL not provided.' });
if (!data.url && !data.file)
return socket.emit('exception', {
message: 'URL or file not provided.',
});
const specberus = new Specberus();
const handler = new Sink();
handler.on('err', (type, data) => {
Expand Down Expand Up @@ -88,14 +100,14 @@ io.on('connection', socket => {
handler.on('exception', data => {
socket.emit('exception', data);
});
specberus.extractMetadata({
url: data.url,
events: handler,
});
data.events = handler;
specberus.extractMetadata(data);
});
socket.on('validate', async data => {
if (!data.url)
return socket.emit('exception', { message: 'URL not provided.' });
if (!data.url && !data.file)
return socket.emit('exception', {
message: 'URL or file not provided.',
});
if (!data.profile)
return socket.emit('exception', {
message: 'Profile not provided.',
Expand Down Expand Up @@ -156,41 +168,70 @@ io.on('connection', socket => {
handler.on('exception', data => {
socket.emit('exception', data);
});
insafe
.check({
url: data.url,
statusCodesAccepted: ['301', '406'],
})
.then(res => {
if (res.status) {
try {
specberus.validate({
url: data.url,
profile,
events: handler,
validation: data.validation,
informativeOnly: data.informativeOnly,
echidnaReady: data.echidnaReady,
patentPolicy: data.patentPolicy,
});
} catch (e) {
socket.emit('exception', {
message: `Validation blew up: ${e}`,
});
socket.emit('finished');
if (data.url) {
insafe
.check({
url: data.url,
statusCodesAccepted: ['301', '406'],
})
.then(res => {
if (res.status) {
try {
specberus.validate({
url: data.url,
profile,
events: handler,
validation: data.validation,
informativeOnly: data.informativeOnly,
echidnaReady: data.echidnaReady,
patentPolicy: data.patentPolicy,
});
} catch (e) {
socket.emit('exception', {
message: `Validation blew up: ${e}`,
});
socket.emit('finished');
}
} else {
const message = `Error while resolving <a href="${data.url}"><code>${data.url}</code></a>;
check the spelling of the host, the protocol (<code>HTTP</code>, <code>HTTPS</code>)
and ensure that the page is accessible from the public internet.`;
socket.emit('exception', { message });
}
} else {
const message = `Error while resolving <a href="${data.url}"><code>${data.url}</code></a>;
check the spelling of the host, the protocol (<code>HTTP</code>, <code>HTTPS</code>)
and ensure that the page is accessible from the public internet.`;
socket.emit('exception', { message });
}
})
.catch(e => {
})
.catch(e => {
socket.emit('exception', {
message: `Insafe check blew up: ${e}`,
});
socket.emit('finished');
});
} else {
try {
specberus.validate({
file: data.file,
profile,
events: handler,
validation: data.validation,
informativeOnly: data.informativeOnly,
echidnaReady: data.echidnaReady,
patentPolicy: data.patentPolicy,
});
} catch (e) {
socket.emit('exception', {
message: `Insafe check blew up: ${e}`,
message: `Validation blew up: ${e}`,
});
socket.emit('finished');
}
}
});

socket.on('upload', (file, callback) => {
const tmpfile = tmp.fileSync().name;
writeFile(tmpfile, file, err => {
callback({
status: err ? 'failure' : 'success',
filename: tmpfile,
});
});
});
});
Loading

0 comments on commit 1ad9fc5

Please sign in to comment.