From c60ca246ca36732e0f7085f56f5d731c189a8139 Mon Sep 17 00:00:00 2001 From: ernestd Date: Fri, 16 Dec 2022 04:43:41 -0500 Subject: [PATCH] Add config file and logic --- index.mjs | 23 ++++++++++++++++++++++- links.json | 18 ++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 links.json diff --git a/index.mjs b/index.mjs index 7077d7a..3517957 100644 --- a/index.mjs +++ b/index.mjs @@ -1,5 +1,6 @@ import fetch from 'node-fetch' import { load } from 'cheerio' +import links from './links.json' assert { type: 'json' } export async function check(url = new URL('http://example.com'), referer, seen = new Set(), depth = 10) { url.hash = ''; // since hash is client-side only, we remove it in order to avoid duplicate requests @@ -8,6 +9,26 @@ export async function check(url = new URL('http://example.com'), referer, seen = } seen.add(url.href); const res = await fetch(url, { headers: { 'user-agent': 'npm:links-awakening' } }).catch(e => e); + const html = res.ok ? await res.text() : null; + + // deal with config cases + for (const link of links.config) { + if (url.href.includes(link.url)) { // TODO: this check needs precision in url comparison (www vs no www), etc + if (res.status !== link.status) { + console.log(`🟨 ${url.href} differs from the expected value in the config (status: ${res.status}, referer: ${referer})`); + return; + } else if (link.status === 200) { + if (html.includes(link.body)) { + console.log(`🟨 ${url.href} final content is no longer available (status: ${res.status}, referer: ${referer})`); + return; + } + } else { + console.log(`🟨 ${url.href} is preset to have status: ${res.status}`); + return; + } + } + } + if (res.ok) { console.log(`✅ ${url.href}`); } else if (res.status) { @@ -20,7 +41,7 @@ export async function check(url = new URL('http://example.com'), referer, seen = if (depth === 0) { return; } - const html = await res.text(); + const $ = load(html); const hrefs = $('a[href]').map((_, el) => el.attribs.href).get(); for (const href of hrefs) { diff --git a/links.json b/links.json new file mode 100644 index 0000000..274f450 --- /dev/null +++ b/links.json @@ -0,0 +1,18 @@ +{ + "config": [ + { + "url": "https://www.youtube.com", + "body": "This video isn't available", + "status": 200 + }, + { + "url": "https://www.twitter.com", + "body": "this page doesn't exist", + "status": 200 + }, + { + "url": "https://vercel.com/docs", + "status": 302 + } + ] +}