From df74625d8090fec20d938eb9563340af3ddd79fb Mon Sep 17 00:00:00 2001 From: zeme-wana <15709674+zeme-wana@users.noreply.github.com> Date: Wed, 9 Oct 2024 10:20:50 +0200 Subject: [PATCH] Add new ignored urls and fix logic in check-broken-links.sh (#6552) --- scripts/check-broken-links.sh | 38 ++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/scripts/check-broken-links.sh b/scripts/check-broken-links.sh index 53ba5c377a7..53b5e4a36a6 100755 --- a/scripts/check-broken-links.sh +++ b/scripts/check-broken-links.sh @@ -5,30 +5,36 @@ TARGETS=( *.adoc ) -# For some reason linkchecker fails to check these URLs though they are valid +# For some reason linkchecker fails to check these URLs though they are valid. +# It's plausible that these domains are blocking the linkchecker user agent, or +# that we are running into rate-limiting issues. IGNORE_URLS=( - --ignore-url="^https://pvp\.haskell\.org.*" - --ignore-url="^https://www\.haskell\.org/cabal.*" - --ignore-url="^https://img\.shields\.io/matrix/plutus-core%3Amatrix\.org.*" + https://pvp.haskell.org + https://www.haskell.org/cabal ) FAILED=0 +check_links() { + linkchecker --no-warnings --recursion-level 0 --output failures --check-extern --stdin +} + grep_links() { - grep -oE "\b(https?://|www\.)[^\[\(\)\"]+\b" "$1" + for file in $(find "${TARGETS[@]}"); do + grep -oE "\b(https?://|www\.)[^\[\(\)\"]+\b" "${file}" + done } -check_links() { - linkchecker --no-warnings --recursion-level 0 --output failures --check-extern "${IGNORE_URLS[@]}" --stdin +valid_links() { + local all_links="$(grep_links | sort | uniq | tr ' ' '\n')" + local ignore_links="$(echo "${IGNORE_URLS[@]}" | sort | uniq | tr ' ' '\n')" + comm -3 <(echo "$all_links") <(echo "$ignore_links") } -for file in $(find "${TARGETS[@]}"); do - echo "Checking ${file}" - grep_links "${file}" | check_links - if [ $? -ne 0 ]; then - echo "${file} has broken links, see output above" - FAILED=1 - fi -done +check_links <<< "$(valid_links)" + +if [[ "$?" != "0" ]]; then + echo "Found broken links, see output above" + exit 1 +fi -exit "${FAILED}"