From 6861d98d4da5e6f2eb50da57d790aa1a73eb608a Mon Sep 17 00:00:00 2001 From: plocket <52798256+plocket@users.noreply.github.com> Date: Sun, 14 Jul 2024 11:39:00 -0400 Subject: [PATCH] Improve log codes checker, close #920 --- CHANGELOG.md | 6 +- tests/log_codes/check_codes.sh | 270 +++++++++++------- tests/log_codes/deleted_codes.json | 8 - .../log_codes/log_code_expected_instances.txt | 209 ++++++++++++++ 4 files changed, 376 insertions(+), 117 deletions(-) delete mode 100644 tests/log_codes/deleted_codes.json create mode 100644 tests/log_codes/log_code_expected_instances.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index a35e1491..c077ce42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,7 +43,11 @@ Format: - --> - +## [Unreleased] + +## Internal + +- Check log codes more robustly and flexibly. See [#920](https://github.com/SuffolkLITLab/ALKiln/issues/920). ## [5.13.0] - 2024-07-11 diff --git a/tests/log_codes/check_codes.sh b/tests/log_codes/check_codes.sh index b7691cab..e8b03b2f 100755 --- a/tests/log_codes/check_codes.sh +++ b/tests/log_codes/check_codes.sh @@ -1,139 +1,193 @@ #!/bin/bash -# Identify missing and duplicate message code issues +# Identify missing and/or duplicate log codes + +# Profiling performance +# set -x +# PS4='+ $EPOCHREALTIME ($LINENO) ' +# alias echo='time echo' exit_code=0 -# Check if the caller sent an argument to the script -if [ $# -eq 0 ]; then - # If there was no argument, set a default value - directory="../.." -else - # If there was an argument, use it - directory="$1" -fi +echo " ------------------------------------------------------- +| Usage: | +| bash $(basename $0) [dir] [c file] [c folder] [-l arg] +| dir: The directory in which to search for logs | +| c file: Name of the log instance counter file | +| c folder: Path to the folder of the log counter file | +| -l arg: \"1\" prints extra logs | + -------------------------------------------------------" + +# Get flags and their values +loudness="0" +while getopts ':l:' opt; do + case "${opt}" in + l) loudness="${OPTARG}";; + \?) script_args+=("-$OPTARG");; + esac +done -# The grepped files will include the list of all the codes that have been -# removed and are no longer used. Syntax used works with GitHub cli ([[:digit:]]) -# https://stackoverflow.com/a/6901221 -lines=$(grep -roh --exclude="tests/log_codes/check_codes.sh" --exclude="debug_log.txt" --exclude="cucumber-report.txt" --exclude-dir="ALKilnTests" --exclude-dir="node_modules" --exclude-dir='alkiln-*' --exclude-dir='_alkiln*' 'ALK[[:digit:]][[:digit:]][[:digit:]][[:digit:]]' "$directory") -sorted=$(echo "$lines" | sort -n) +# Use global "option index" to get the next args +where_to_look=${@:$OPTIND:1} # User's value +if [ "$where_to_look" = "" ]; then + where_to_look="../.." # Default value +fi -echo "=== Missing codes ===" +expected_instances_file=${@:$OPTIND+1:1} # User's value +if [ "$expected_instances_file" = "" ]; then + expected_instances_file="log_code_expected_instances.txt" # Default value +fi -# # -- Version 1 -- -# # More understandable version that doesn't yet come out with a final variable -# # Keeping it here for discussion -# nums=$(echo "$sorted" | sed 's/^ALK0*//') +expected_instances_folder=${@:$OPTIND+2:1} # User's value +if [ "$expected_instances_folder" = "" ]; then + expected_instances_folder="$where_to_look/tests/log_codes" # Default value +fi -# missing_numbers=() -# prev_number=0 -# echo "$nums" | while read -r current_number; do +expected_instances_path="$expected_instances_folder/$expected_instances_file" -# diff=$((current_number - prev_number)) -# # If the difference is more than 1, print the missing -# # numbers until we catch up with the current number -# if [ $diff -gt 1 ]; then -# for ((i=prev_number+1; i>> highest_code <<<: $highest_code" + echo "Paths of ALK0000: ${codes_unique_paths[0]}" +fi -# 0's in front of numbers confuses bash about the number format. Remove them. -just_numbers=$(echo "$sorted" | sed 's/^ALK0*//') +indx=0 +too_many=() +missing=() +while [ "$indx" -lt "$highest_code" ]; do + # Turn the index into a log code by prepending ALK and adding leading zeros + log_code=$(printf "ALK%04d" "$indx") + # Count ";" - a stand-in for the number of + # instances of the log code + num_paths=$(echo "${codes_unique_paths[$indx]}" | awk '{orig_len = length($0); gsub(/;/, "", $0); new_len = length($0); print orig_len - new_len}') + # The # of expected instances of the log code + # in all the relevant ALKiln files. + num_expected=($(grep -oh "$log_code:[[:digit:]]" "$expected_instances_path" | sed 's/^ALK[[:digit:]][[:digit:]][[:digit:]][[:digit:]]://')) + # The file tracking the # of expected instances + # may have a typo. One likely typo is we can catch: + # Putting 2 entries for one log code in the file. + num_expectations_found=${#num_expected[@]} + if [[ "$num_expectations_found" -gt "1" ]]; then + echo "WARNING: Multiple entries of '$log_code' in $expected_instances_path. Using the first one because that's easiest: ${num_expected[0]}" + num_expected=${num_expected[0]} + fi + + if [[ "$num_expected" == "" ]]; then + num_expected="1" + fi + + # To add to the list of strings to print later + short_msg="$log_code $num_paths/$num_expected" + long_msg="$log_code act/exp " + long_msg+="$num_paths/$num_expected:" + long_msg+=$(echo "${codes_unique_paths[$indx]}" | sed 's/;/\n - /g') + + # missing + if [ "$num_paths" -lt "$num_expected" ]; then + if [[ "$loudness" != "0" ]]; then + missing+=("$long_msg") + else + missing+=("$short_msg") + fi + + # too_many + elif [ "$num_paths" -gt "$num_expected" ]; then + if [[ "$loudness" != "0" ]]; then + too_many+=("$long_msg") + else + too_many+=("$short_msg") + fi + fi + + let indx++ +done -# Convert the list into an array -IFS=$'\n' read -rd '' -a num_array <<<"$just_numbers" -# Find the minimum and maximum numbers in the array -min_num=$(printf "%s\n" "${num_array[@]}" | sort -n | head -n 1) -max_num=$(printf "%s\n" "${num_array[@]}" | sort -n | tail -n 1) -# Generate a sequence of numbers from min to max with leading zeros -expected_sequence="" -for ((i=min_num; i<=max_num; i++)); do - # Make sure these won't look like numbers to avoid confusing bash - expected_sequence+=$(printf "ALK%04d" $i)$'\n' -done -# # Alternative code for above. seq doesn't exist everywhere. Keep this -# till we look up installing seq to avoid loop) -# expected_sequence=$(printf "ALK%04d\n" $(seq $min_num $max_num)) - -# Compare the expected sequence with the actual numbers -missing_numbers=$(comm -23 <(printf "%s\n" "$expected_sequence" | sort -n) <(printf "%s\n" "$sorted")) -if [ -z "$missing_numbers" ]; then - echo "None" -else - echo "$missing_numbers" - ((exit_code+=2)) +if [[ "$loudness" != "0" ]]; then + echo "" + echo "Missing count: ${#missing[@]}" + echo "Too many count: ${#too_many[@]}" fi +# === Results === -echo " -=== Duplicate codes ===" - -# We know these duplicates are ok -accepted_duplicates="ALK0000 -ALK0002 -ALK0003 -ALK0006 -ALK0007 -ALK0008 -ALK0009 -ALK0010 -ALK0011 -ALK0012 -ALK0018 -ALK0019 -ALK0023 -ALK0024 -ALK0026 -ALK0027 -ALK0028 -ALK0030 -ALK0049 -ALK0184" - -# Use a sorted list. `uniq` only detects consecutive duplicates -duplicates=$(echo "$sorted" | uniq -d) -unaccepted_duplicates=$(echo "$duplicates" | grep -v -f <(echo "$accepted_duplicates")) - -if [ -z "$unaccepted_duplicates" ]; then - echo "None" +printf "\n=== Missing instances (actual/expected) ===\n" +if [[ "${#missing[@]}" > 0 ]]; then + printf '%s\n' "${missing[@]}" + ((exit_code+=2)) else - echo "$unaccepted_duplicates" - ((exit_code+=20)) + printf "None" fi -# === Final messages === -echo " -Exit code meanings: -- code 2: missing codes -- code 20: duplicate codes -- code 22: both -" +printf "\n\n=== Too many instances (actual/expected) ===\n" +if [[ "${#too_many[@]}" > 0 ]]; then + printf '%s\n' "${too_many[@]}" + ((exit_code+=20)) +else + printf "None" +fi if test $exit_code -eq 0; then - echo "šŸŒˆ Passed! The codes for logs are as they should be." - highest=$(echo "$sorted" | tail -n 1) - echo "The highest log code is $highest" + printf "\n\nšŸŒˆ Passed! The codes for logs are as they should be." + printf "\n\nšŸ’” Highest log code: $highest_code\n" else - echo "šŸ¤• ERROR: Log codes are messed up. Exited with exit code $exit_code. See above for more details." + printf "\n\nšŸ¤• ERROR: " + if [[ "$exit_code" == "2" ]]; then + printf "Missing codes" + elif [[ "$exit_code" == "20" ]]; then + printf "Duplicate codes" + elif [[ "$exit_code" == "22" ]]; then + printf "Both missing and duplicate codes" + fi + echo "" + echo "Exit code: $exit_code" fi + exit $exit_code diff --git a/tests/log_codes/deleted_codes.json b/tests/log_codes/deleted_codes.json deleted file mode 100644 index 5dd83155..00000000 --- a/tests/log_codes/deleted_codes.json +++ /dev/null @@ -1,8 +0,0 @@ -[ - "== Actual deleted codes ==", - "None", - "== Tests ==", - "ALK test 0059", - "ALK test 0092", - "ALK test 0225" -] \ No newline at end of file diff --git a/tests/log_codes/log_code_expected_instances.txt b/tests/log_codes/log_code_expected_instances.txt new file mode 100644 index 00000000..8588a775 --- /dev/null +++ b/tests/log_codes/log_code_expected_instances.txt @@ -0,0 +1,209 @@ +The log codes we have or have had in our ALKiln code base and the number of times each log code should appear in our code base to make sure that we don't repeat codes where we shouldn't and that we don't leave out/skip code numbers. 0 means a code we have deleted from ALKiln. + +Why we need duplicate codes sometimes: GitHub actions sometimes don't have ways to avoid writing a code in both the `name` of a step and in the `run` value. Using it in both places is a decision we've currently made. Other codes are used in multiple places as defaults. These are a couple of examples of why this can be useful. + +The script that tests the log codes will ignore this file (or any file with this name), and any file with these names: check_codes.sh, debug_log.txt, cucumber-report.txt. It will also ignore files in folders with these names:, node_modules, ALKilnTests, alkiln-*, _alkiln*. That list might get updated, so for an up-to-date list, always check the file check_codes.sh where we use `--exclude=` and such. + +ALK0000:8 +ALK0001:1 +ALK0002:2 +ALK0003:2 +ALK0004:1 +ALK0005:1 +ALK0006:2 +ALK0007:3 +ALK0008:3 +ALK0009:2 +ALK0010:2 +ALK0011:3 +ALK0012:2 +ALK0013:1 +ALK0014:1 +ALK0015:1 +ALK0016:1 +ALK0017:1 +ALK0018:2 +ALK0019:2 +ALK0020:1 +ALK0021:1 +ALK0022:1 +ALK0023:2 +ALK0024:2 +ALK0025:1 +ALK0026:2 +ALK0027:2 +ALK0028:3 +ALK0029:1 +ALK0030:2 +ALK0031:1 +ALK0032:1 +ALK0033:1 +ALK0034:1 +ALK0035:1 +ALK0036:1 +ALK0037:1 +ALK0038:1 +ALK0039:1 +ALK0040:1 +ALK0041:1 +ALK0042:1 +ALK0043:1 +ALK0044:1 +ALK0045:1 +ALK0046:1 +ALK0047:1 +ALK0048:1 +ALK0049:1 +ALK0050:1 +ALK0051:1 +ALK0052:1 +ALK0053:1 +ALK0054:1 +ALK0055:1 +ALK0056:1 +ALK0057:1 +ALK0058:1 +ALK0059:1 +ALK0060:1 +ALK0061:1 +ALK0062:1 +ALK0063:1 +ALK0064:1 +ALK0065:1 +ALK0066:1 +ALK0067:1 +ALK0068:1 +ALK0069:1 +ALK0070:1 +ALK0071:1 +ALK0072:1 +ALK0073:1 +ALK0074:1 +ALK0075:1 +ALK0076:1 +ALK0077:1 +ALK0078:1 +ALK0079:1 +ALK0080:1 +ALK0081:1 +ALK0082:1 +ALK0083:1 +ALK0084:1 +ALK0085:1 +ALK0086:1 +ALK0087:1 +ALK0088:1 +ALK0089:1 +ALK0090:1 +ALK0091:1 +ALK0092:1 +ALK0093:1 +ALK0094:1 +ALK0095:1 +ALK0096:1 +ALK0097:1 +ALK0098:1 +ALK0099:1 +ALK0100:1 +ALK0101:1 +ALK0102:1 +ALK0103:1 +ALK0104:1 +ALK0105:1 +ALK0106:1 +ALK0107:1 +ALK0108:1 +ALK0109:1 +ALK0110:1 +ALK0111:1 +ALK0112:1 +ALK0113:1 +ALK0114:1 +ALK0115:1 +ALK0116:1 +ALK0117:1 +ALK0118:1 +ALK0119:1 +ALK0120:1 +ALK0121:1 +ALK0122:1 +ALK0123:1 +ALK0124:1 +ALK0125:1 +ALK0126:1 +ALK0127:1 +ALK0128:1 +ALK0129:1 +ALK0130:1 +ALK0131:1 +ALK0132:1 +ALK0133:1 +ALK0134:1 +ALK0135:1 +ALK0136:1 +ALK0137:1 +ALK0138:1 +ALK0139:1 +ALK0140:1 +ALK0141:1 +ALK0142:1 +ALK0143:1 +ALK0144:1 +ALK0145:1 +ALK0146:1 +ALK0147:1 +ALK0148:1 +ALK0149:1 +ALK0150:1 +ALK0151:1 +ALK0152:1 +ALK0153:1 +ALK0154:1 +ALK0155:1 +ALK0156:1 +ALK0157:1 +ALK0158:1 +ALK0159:1 +ALK0160:1 +ALK0161:1 +ALK0162:1 +ALK0163:1 +ALK0164:1 +ALK0165:1 +ALK0166:1 +ALK0167:1 +ALK0168:1 +ALK0169:1 +ALK0170:1 +ALK0171:1 +ALK0172:1 +ALK0173:1 +ALK0174:1 +ALK0175:1 +ALK0176:1 +ALK0177:1 +ALK0178:1 +ALK0179:1 +ALK0180:1 +ALK0181:1 +ALK0182:1 +ALK0183:1 +ALK0184:2 +ALK0185:1 +ALK0186:1 +ALK0187:1 +ALK0188:1 +ALK0189:1 +ALK0190:1 +ALK0191:1 +ALK0192:1 +ALK0193:1 +ALK0194:1 +ALK0195:1 +ALK0196:1 +ALK0197:1 +ALK0198:1 +ALK0199:1 +ALK0200:1 +ALK0201:1 +ALK0202:1 \ No newline at end of file