From e3deedf097ed9c68edf8651f197a1ad14ac3f73e Mon Sep 17 00:00:00 2001 From: Stan Brubaker <120737309+stanbrub@users.noreply.github.com> Date: Wed, 25 Sep 2024 11:50:20 -0600 Subject: [PATCH] Adhoc Auto Provisioning (#342) --- .../adhoc-benchmark-docker-compose.yml | 2 +- .github/scripts/adhoc.sh | 155 +++++++++++++++++ .github/scripts/base.sh | 16 ++ .../build-server-distribution-remote.sh | 2 +- .github/scripts/fetch-results-local.sh | 19 ++- .github/scripts/manage-deephaven-remote.sh | 28 +++- .github/scripts/setup-ssh-local.sh | 4 +- .github/scripts/setup-test-server-remote.sh | 26 +-- .../adhoc-auto-remote-benchmarks.yml | 157 +++++++++++++++++- .../adhoc-exist-remote-benchmarks.yml | 86 +++++++++- .github/workflows/adhoc-remote-benchmarks.yml | 69 -------- .../workflows/compare-remote-benchmarks.yml | 2 + .../workflows/nightly-remote-benchmarks.yml | 2 + .../workflows/release-remote-benchmarks.yml | 2 + .github/workflows/remote-benchmarks.yml | 32 ++-- 15 files changed, 477 insertions(+), 125 deletions(-) create mode 100755 .github/scripts/adhoc.sh create mode 100755 .github/scripts/base.sh delete mode 100644 .github/workflows/adhoc-remote-benchmarks.yml diff --git a/.github/resources/adhoc-benchmark-docker-compose.yml b/.github/resources/adhoc-benchmark-docker-compose.yml index 22803ada..8b52b615 100644 --- a/.github/resources/adhoc-benchmark-docker-compose.yml +++ b/.github/resources/adhoc-benchmark-docker-compose.yml @@ -6,7 +6,7 @@ services: volumes: - ./data:/data environment: - - "START_OPTS=-Xmx24g -DAuthHandlers=io.deephaven.auth.AnonymousAuthenticationHandler" + - "START_OPTS=-DAuthHandlers=io.deephaven.auth.AnonymousAuthenticationHandler ${CONFIG_OPTS}" redpanda: command: diff --git a/.github/scripts/adhoc.sh b/.github/scripts/adhoc.sh new file mode 100755 index 00000000..03c49f9a --- /dev/null +++ b/.github/scripts/adhoc.sh @@ -0,0 +1,155 @@ +#!/usr/bin/env bash + +set -o errexit +set -o pipefail + +# Provides what is needed to set up an adhoc benchmark run, including bare metal and labels +# ex. adhoc.sh make-labels "where" "0.36.0" "user123:branch-name-123" +# ex. adhoc.sh metal-deploy api-key project-id c3.small.x86 server-name "2 days" +# ex. adhoc.sh metal-delete api-key device-id service-name + +if [[ $# < 2 ]]; then + echo "$0: Missing action or its arguments" + exit 1 +fi + +ACTION=$1 +SCRIPT_DIR=$(dirname "$0") +OUTPUT_NAME=adhoc-${ACTION}.out + +rm -f ${OUTPUT_NAME}; touch ${OUTPUT_NAME} + +# Get metal device info including ip address +getDeviceInfo() { + curl --no-progress-meter --max-time 10 -X GET -H "X-Auth-Token: $1" \ + "https://api.equinix.com/metal/v1/devices/$2?include=ip_addresses,state&exclude=root_password,ssh_keys" \ + | jq | tee get-device-response.json | jq -r "$3" +} + +# Get the label part of an image/branch name +# ex. edge@sha256:15ab331629805076cdf5ed6666186c6b578298ab493a980779338d153214640e +# ex. user123:1111-my-pull-request +# ex. 0.36.0 or edge +getSetLabel() { + SUFFIX=$2 + if [[ $2 == *"@sha"*":"* ]]; then + SUFFIX=$(echo "$2" | sed 's/@sha.*:/_/g' | head -c 20) + elif [[ $2 == *":"* ]]; then + SUFFIX=$(echo "$2" | sed 's/.*://g' | head -c 20) + fi + echo "${PREFIX}_${SUFFIX}" | sed -E 's/(^[0-9])/_\1/g' | sed 's/[^0-9a-zA-Z_]/_/g' +} + +# Make set labels from a prefix and image/branch names +if [[ ${ACTION} == "make-labels" ]]; then + PREFIX=$2 + IMAGE1=$3 + IMAGE2=$4 + echo "Making Labels: ${PREFIX}" + + LABEL1=$(getSetLabel ${PREFIX} ${IMAGE1}) + LABEL2=$(getSetLabel ${PREFIX} ${IMAGE2}) + + echo "PREFIX=${PREFIX}" | tee -a ${OUTPUT_NAME} + echo "SET_LABEL_1=${LABEL1}" | tee -a ${OUTPUT_NAME} + echo "SET_LABEL_2=${LABEL2}" | tee -a ${OUTPUT_NAME} +fi + +# Make a regex from a list of wilcarded test class names +if [[ ${ACTION} == "make-test-regex" ]]; then + WILDCARDS=$2 + echo "Making Test Regex: ${WILDCARDS}" + + TEST_REGEX="^.*[.](" + for r in $(echo ${WILDCARDS} | sed 's/\s*,\s*/ /g'); do + TEST_REGEX="${TEST_REGEX}"$(echo "(${r}Test)|" | sed 's/\*/.*/g') + done + TEST_REGEX=$(echo ${TEST_REGEX} | sed -E 's/\|+$//g') + TEST_REGEX="${TEST_REGEX})$" + + echo "WILDCARDS=${WILDCARDS}" | tee -a ${OUTPUT_NAME} + echo "TEST_CLASS_REGEX=${TEST_REGEX}" | tee -a ${OUTPUT_NAME} +fi + +# Format some number used for scaling the tests +if [[ ${ACTION} == "scale-nums" ]]; then + INPUT_ROW_COUNT=$2 + INPUT_ITERATIONS=$3 + echo "Scaling Numbers" + + TEST_ROW_COUNT=$((${INPUT_ROW_COUNT} * 1000000)) + TEST_ITERATIONS=${INPUT_ITERATIONS} + if [ $((${INPUT_ITERATIONS} % 2)) == 0 ]; then + TEST_ITERATIONS=$((${INPUT_ITERATIONS} + 1)) + fi + + echo "INPUT_ROW_COUNT=${INPUT_ROW_COUNT}" | tee -a ${OUTPUT_NAME} + echo "INPUT_ITERATIONS=${INPUT_ITERATIONS}" | tee -a ${OUTPUT_NAME} + echo "TEST_ROW_COUNT=${TEST_ROW_COUNT}" | tee -a ${OUTPUT_NAME} + echo "TEST_ITERATIONS=${TEST_ITERATIONS}" | tee -a ${OUTPUT_NAME} +fi + +# Deploy a bare metal server using the Equinix ReST API +if [[ ${ACTION} == "deploy-metal" ]]; then + API_KEY=$2 + PROJECT_ID=$3 + PLAN=$4 + ACTOR=$(echo "adhoc-$5-"$(${SCRIPT_DIR}/base.sh $(date +%s%03N) 36) | tr '[:upper:]' '[:lower:]') + EXPIRE_WHEN=$6 + echo "Deploying Server: ${ACTOR}" + + BEGIN_SECS=$(date +%s) + DEVICE_ID=$(curl --fail-with-body -X POST \ + -H "Content-Type: application/json" -H "X-Auth-Token: ${API_KEY}" \ + "https://api.equinix.com/metal/v1/projects/${PROJECT_ID}/devices?exclude=plan,ssh_keys,provisioning_events,network_ports,operating_system" \ + -d '{ + "metro": "da", + "plan": "'${PLAN}'", + "operating_system": "ubuntu_22_04", + "hostname": "'${ACTOR}'", + "termination_time": "'$(date --iso-8601=seconds -d "+${EXPIRE_WHEN}")'" + }' | jq | tee create-device-response.json | jq -r '.id') + + IP_ADDRESS="null" + for i in $(seq 100); do + echo -n "$i) Device Status: " + STATE=$(getDeviceInfo ${API_KEY} ${DEVICE_ID} ".state") + echo "${STATE}" + if [[ "${STATE}" == "active" ]]; then break; fi + sleep 6 + done + + DURATION=$(($(date +%s) - ${BEGIN_SECS})) + + IP_ADDRESS=$(getDeviceInfo ${API_KEY} ${DEVICE_ID} ".ip_addresses[0].address") + STATE=$(getDeviceInfo ${API_KEY} ${DEVICE_ID} ".state") + if [[ "${IP_ADDRESS}" == "null" ]] || [[ "${STATE}" != "active" ]]; then + echo "Failed to provision device after ${DURATION} seconds" + exit 1 + fi + + echo "ACTION=${ACTION}" | tee -a ${OUTPUT_NAME} + echo "PROVISION_SECS=${DURATION}" | tee -a ${OUTPUT_NAME} + echo "DEVICE_NAME=${ACTOR}" | tee -a ${OUTPUT_NAME} + echo "DEVICE_ID=${DEVICE_ID}" | tee -a ${OUTPUT_NAME} + echo "DEVICE_ADDR=${IP_ADDRESS}" | tee -a ${OUTPUT_NAME} + echo "DEVICE_EXPIRE=${EXPIRE_WHEN}" | tee -a ${OUTPUT_NAME} +fi + +# Delete a bare metal server using the Equlinix ReST API +if [[ ${ACTION} == "delete-metal" ]]; then + API_KEY=$2 + DEVICE_ID=$3 + DEVICE_NAME=$4 + + curl --no-progress-meter --max-time 10 --fail-with-body -X DELETE -H "X-Auth-Token: ${API_KEY}" \ + "https://api.equinix.com/metal/v1/devices/${DEVICE_ID}" \ + | jq | tee delete-device-response.json + + echo "ACTION=${ACTION}" | tee -a ${OUTPUT_NAME} + echo "DEVICE_NAME=${DEVICE_NAME}" | tee -a ${OUTPUT_NAME} + echo "DEVICE_ID=${DEVICE_ID}" | tee -a ${OUTPUT_NAME} +fi + + + diff --git a/.github/scripts/base.sh b/.github/scripts/base.sh new file mode 100755 index 00000000..0a40340a --- /dev/null +++ b/.github/scripts/base.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +set -o errexit +set -o pipefail + +# Convert the given Base 10 number to the given radix up to base 62 +# ex. base.sh 1718738365297350992 62 +# ex. base.sh $(date +%s%03N) 36 + +DECNUM=$1 +RADIX=$2 +BASE62=($(echo {0..9} {A..Z} {a..z})) + +for i in $(bc <<< "obase=${RADIX}; ${DECNUM}"); do + echo -n ${BASE62[$(( 10#$i ))]} +done && echo diff --git a/.github/scripts/build-server-distribution-remote.sh b/.github/scripts/build-server-distribution-remote.sh index 1c517eef..497647c0 100755 --- a/.github/scripts/build-server-distribution-remote.sh +++ b/.github/scripts/build-server-distribution-remote.sh @@ -16,7 +16,7 @@ HOST=`hostname` GIT_DIR=/root/git DEEPHAVEN_DIR=/root/deephaven DOCKER_IMG=$1 -BRANCH_DELIM="::" +BRANCH_DELIM=":" BUILD_JAVA=temurin-11-jdk-amd64 if [ ! -d "${DEEPHAVEN_DIR}" ]; then diff --git a/.github/scripts/fetch-results-local.sh b/.github/scripts/fetch-results-local.sh index 9fdc8e7d..98f10c60 100755 --- a/.github/scripts/fetch-results-local.sh +++ b/.github/scripts/fetch-results-local.sh @@ -5,7 +5,8 @@ set -o pipefail set -o nounset # Fetches Benchmark results and logs from the remote test server and -# compresses the runs before upload +# compresses the runs before upload. Writes an output file with the +# SET_LABEL that was used for the set directory name if [[ $# != 7 ]]; then echo "$0: Missing host, user, run type, script dir, actor, docker img, or run label arguments" @@ -17,23 +18,27 @@ USER=$2 SCRIPT_DIR=$3 RUN_TYPE=$4 ACTOR=$5 -RUN_LABEL=${6:-$(echo -n "set-"; ${SCRIPT_DIR}/base62.sh $(date +%s%03N))} +SET_LABEL=${6:-$(echo -n "set-"; ${SCRIPT_DIR}/base.sh $(date +%s%03N) 62)} DOCKER_IMG=$7 RUN_DIR=/root/run +OUTPUT_NAME=fetch-results-local.out + +rm -f ${OUTPUT_NAME}; touch ${OUTPUT_NAME} # Get the date for the Set Label, since Github Workflows don't have 'with: ${{github.date}}' -if [ "${RUN_LABEL}" = "" ]; then - RUN_LABEL=$(date '+%Y-%m-%d') +if [ "${SET_LABEL}" = "" ]; then + SET_LABEL=$(date '+%Y-%m-%d') fi # Get the version for the Set Label, since Github Workflows don't have 'with: ${{github.date}}' -if [ "${RUN_LABEL}" = "" ]; then +if [ "${SET_LABEL}" = "" ]; then vers=${DOCKER_IMG} major=$(printf '%02d\n' $(echo ${vers} | cut -d "." -f 1)) minor=$(printf '%03d\n' $(echo ${vers} | cut -d "." -f 2)) patch=$(printf '%02d\n' $(echo ${vers} | cut -d "." -f 3)) - RUN_LABEL="${major}.${minor}.${patch}" + SET_LABEL="${major}.${minor}.${patch}" fi +echo "SET_LABEL=${SET_LABEL}" | tee -a ${OUTPUT_NAME} # Pull results from the benchmark server scp -r ${USER}@${HOST}:${RUN_DIR}/results . @@ -41,7 +46,7 @@ scp -r ${USER}@${HOST}:${RUN_DIR}/logs . scp -r ${USER}@${HOST}:${RUN_DIR}/*.jar . # Move the results into the destination directory -DEST_DIR=${RUN_TYPE}/${ACTOR}/${RUN_LABEL} +DEST_DIR=${RUN_TYPE}/${ACTOR}/${SET_LABEL} mkdir -p ${DEST_DIR} rm -rf ${DEST_DIR} mv results/ ${DEST_DIR}/ diff --git a/.github/scripts/manage-deephaven-remote.sh b/.github/scripts/manage-deephaven-remote.sh index d6b16861..4e04906d 100755 --- a/.github/scripts/manage-deephaven-remote.sh +++ b/.github/scripts/manage-deephaven-remote.sh @@ -7,16 +7,16 @@ set -o pipefail # The directives argument can be start or stop # The supplied image argument can be an image name or :: -if [[ $# != 2 ]]; then - echo "$0: Missing docker directive or image/branch argument" +if [[ $# -lt 3 ]]; then + echo "$0: Missing docker directive, image/branch, config options argument" exit 1 fi -HOST=`hostname` -DEEPHAVEN_DIR=/root/deephaven DIRECTIVE=$1 DOCKER_IMG=$2 -BRANCH_DELIM="::" +CONFIG_OPTS="${@:3}" +HOST=`hostname` +DEEPHAVEN_DIR=/root/deephaven if [ ! -d "${DEEPHAVEN_DIR}" ]; then echo "$0: Missing one or more Benchmark setup directories" @@ -29,11 +29,23 @@ title "- Setting up Remote Docker Image on ${HOST} -" cd ${DEEPHAVEN_DIR} -if [[ ${DOCKER_IMG} != *"${BRANCH_DELIM}"* ]]; then - echo "DOCKER_IMG=ghcr.io/deephaven/server:${DOCKER_IMG}" > .env +if [[ ${CONFIG_OPTS} == "" ]]; then + CONFIG_OPTS="-Xmx24g" +fi +echo "CONFIG_OPTS=${CONFIG_OPTS}" > .env + +IS_BRANCH="false" +if [[ ${DOCKER_IMG} == *"@sha"*":"* ]]; then + IS_BRANCH="false" +elif [[ ${DOCKER_IMG} == *":"* ]]; then + IS_BRANCH="true" +fi + +if [[ ${IS_BRANCH} == "false" ]]; then + echo "DOCKER_IMG=ghcr.io/deephaven/server:${DOCKER_IMG}" >> .env docker compose pull else - echo "DOCKER_IMG=deephaven/server:benchmark-local" > .env + echo "DOCKER_IMG=deephaven/server:benchmark-local" >> .env fi if [[ ${DIRECTIVE} == 'start' ]]; then diff --git a/.github/scripts/setup-ssh-local.sh b/.github/scripts/setup-ssh-local.sh index 86ac9d36..64408671 100644 --- a/.github/scripts/setup-ssh-local.sh +++ b/.github/scripts/setup-ssh-local.sh @@ -11,8 +11,8 @@ PRIVATE_KEY=$2 PRIVATE_FILE=~/.ssh/id_ed25519 if [[ $# != 2 ]]; then - echo "$0: Missing host or private key arguments" - exit 1 + echo "$0: Missing host or private key arguments" + exit 1 fi mkdir -p logs diff --git a/.github/scripts/setup-test-server-remote.sh b/.github/scripts/setup-test-server-remote.sh index 181749c5..e511c806 100755 --- a/.github/scripts/setup-test-server-remote.sh +++ b/.github/scripts/setup-test-server-remote.sh @@ -32,17 +32,15 @@ title "-- Adding OS Applications --" UPDATED=$(update-alternatives --list java | grep -i temurin; echo $?) if [[ ${UPDATED} != 0 ]]; then title "-- Adding Adoptium to APT registry --" - apt install -y wget apt-transport-https gpg - wget -O - https://packages.adoptium.net/artifactory/api/gpg/key/public | apt-key add - + apt -y install wget apt-transport-https gpg + wget -qO - https://packages.adoptium.net/artifactory/api/gpg/key/public | gpg --dearmor | tee /etc/apt/trusted.gpg.d/adoptium.gpg > /dev/null echo "deb https://packages.adoptium.net/artifactory/deb $(awk -F= '/^VERSION_CODENAME/{print$2}' /etc/os-release) main" | tee /etc/apt/sources.list.d/adoptium.list - apt update + apt -y update fi title "-- Installing JVMs --" apt -y install temurin-11-jdk apt -y install temurin-21-jdk -# Look at installed packages: dpkg --list | grep jdk -# Configure default java: update-alternatives --config java title "-- Installing Maven --" apt -y install maven @@ -54,16 +52,18 @@ command_exists() { if command_exists docker; then echo "Docker already installed... skipping" else - apt-get install ca-certificates curl gnupg - install -m 0755 -d /etc/apt/keyringsA - rm -f /etc/apt/keyrings/docker.gpg - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg - chmod a+r /etc/apt/keyrings/docker.gpg + apt -y update + apt -y install ca-certificates curl + install -m 0755 -d /etc/apt/keyrings + curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc + chmod a+r /etc/apt/keyrings/docker.asc + echo \ - "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ - "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ tee /etc/apt/sources.list.d/docker.list > /dev/null - apt-get --assume-yes install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + apt -y update + apt -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin fi title "-- Removing Git Benchmark Repositories --" diff --git a/.github/workflows/adhoc-auto-remote-benchmarks.yml b/.github/workflows/adhoc-auto-remote-benchmarks.yml index eb2b6859..9d6568fc 100644 --- a/.github/workflows/adhoc-auto-remote-benchmarks.yml +++ b/.github/workflows/adhoc-auto-remote-benchmarks.yml @@ -1,19 +1,164 @@ +# Provision a server and run benchmarks on that system, cleaning up after +# - Deploys a new server on the fly +# - Calls the reusable worflow remote-benchmarks.yml to run and upload benchmarks +# - Runs a comparison between two images/branches +# - Deletes the server on success or failure name: Adhoc Benchmarks (Auto-provisioned Server) on: workflow_dispatch: inputs: - message: - message: 'Message' + docker_image_1: + description: 'Deephaven Image or Core Branch' required: true + default: '0.35.0' + type: string + docker_image_2: + description: 'Deephaven Image or Core Branch' + required: true + default: '0.36.0' + type: string + set_label_prefix: + description: 'Set Label Prefix' + required: true + type: string default: '' + test_class_list: + description: 'Benchmark Test Classes' + required: true + default: 'Where, Avg*' + type: string + test_iterations: + description: 'Benchmark Iterations' + required: true + default: '1' + type: string + scale_row_count: + description: 'Benchmark Scale Row Count (Millions)' + required: true + default: '10' type: string + distribution: + description: 'Benchmark Data Distribution' + required: true + default: 'random' + type: choice + options: + - random + - ascending + - descending + - runlength jobs: - hello-world: - runs-on: ubuntu-22.04 + setup-benchmark-system: + runs-on: ubuntu-22.04 + outputs: + set_label_1: ${{ steps.make-labels.outputs.SET_LABEL_1 }} + set_label_2: ${{ steps.make-labels.outputs.SET_LABEL_2 }} + test_class_regex: "${{ steps.make-test-regex.outputs.TEST_CLASS_REGEX }}" + metal_device_id: ${{ steps.deploy-metal.outputs.DEVICE_ID }} + metal_device_name: ${{ steps.deploy-metal.outputs.DEVICE_NAME }} + metal_ip_addr: ${{ steps.deploy-metal.outputs.DEVICE_ADDR }} + test_row_count: ${{ steps.scale-nums.outputs.TEST_ROW_COUNT }} + test_iterations: ${{ steps.scale-nums.outputs.TEST_ITERATIONS }} + env: + SD: .github/scripts + METAL_EXPIRE: "2 days" + METAL_API_KEY: ${{ secrets.BENCHMARK_METAL_AUTH_TOKEN }} + METAL_PROJECT_ID: ${{ secrets.BENCHMARK_METAL_PROJECT_ID }} + METAL_PLAN: "c3.small.x86" + METAL_ACTOR: "${{ github.actor }}" + + steps: + - uses: actions/checkout@v4 + - name: Make Set Labels + id: make-labels + run: | + ${SD}/adhoc.sh make-labels ${{ inputs.set_label_prefix }} ${{ inputs.docker_image_1 }} ${{ inputs.docker_image_2 }} + cat adhoc-make-labels.out >> "$GITHUB_OUTPUT" + + - name: Make Wildcard Regex + id: make-test-regex + run: | + ${SD}/adhoc.sh make-test-regex "${{ inputs.test_class_list }}" + cat adhoc-make-test-regex.out >> "$GITHUB_OUTPUT" + + - name: Scale Input Numbers + id: scale-nums + run: | + ${SD}/adhoc.sh scale-nums ${{ inputs.scale_row_count }} ${{ inputs.test_iterations }} + cat adhoc-scale-nums.out >> "$GITHUB_OUTPUT" + + - name: Deploy Bare Metal + id: deploy-metal + run: | + ${SD}/adhoc.sh deploy-metal ${METAL_API_KEY} ${METAL_PROJECT_ID} ${METAL_PLAN} ${METAL_ACTOR} ${METAL_EXPIRE} + cat adhoc-deploy-metal.out >> "$GITHUB_OUTPUT" + + - name: Archive Setup Logs + uses: actions/upload-artifact@v4 + with: + name: Provisioning Setup Logs + path: | + *.out + *.json + + process-adhoc-benchmarks-1: + needs: setup-benchmark-system + uses: ./.github/workflows/remote-benchmarks.yml + with: + run_type: adhoc + docker_image: ${{ inputs.docker_image_1 }} + run_label: ${{ needs.setup-benchmark-system.outputs.set_label_1 }} + test_package: "io.deephaven.benchmark.tests.standard" + test_class_regex: "${{ needs.setup-benchmark-system.outputs.test_class_regex }}" + test_iterations: ${{ needs.setup-benchmark-system.outputs.test_iterations }} + scale_row_count: ${{ needs.setup-benchmark-system.outputs.test_row_count }} + distribution: ${{ inputs.distribution }} + test_device_addr: ${{ needs.setup-benchmark-system.outputs.metal_ip_addr }} + config_options: "" + secrets: inherit + + process-adhoc-benchmarks-2: + needs: [setup-benchmark-system, process-adhoc-benchmarks-1] + uses: ./.github/workflows/remote-benchmarks.yml + with: + run_type: adhoc + docker_image: ${{ inputs.docker_image_2 }} + run_label: ${{ needs.setup-benchmark-system.outputs.set_label_2 }} + test_package: "io.deephaven.benchmark.tests.standard" + test_class_regex: "${{ needs.setup-benchmark-system.outputs.test_class_regex }}" + test_iterations: ${{ needs.setup-benchmark-system.outputs.test_iterations }} + scale_row_count: ${{ needs.setup-benchmark-system.outputs.test_row_count }} + distribution: ${{ inputs.distribution }} + test_device_addr: ${{ needs.setup-benchmark-system.outputs.metal_ip_addr }} + config_options: "" + secrets: inherit + + teardown-benchmark-system: + if: ${{ always() && needs.setup-benchmark-system.outputs.metal_device_id }} + needs: [setup-benchmark-system, process-adhoc-benchmarks-2] + runs-on: ubuntu-22.04 + env: + SD: .github/scripts + METAL_API_KEY: ${{ secrets.BENCHMARK_METAL_AUTH_TOKEN }} + METAL_DEVICE_ID: ${{ needs.setup-benchmark-system.outputs.metal_device_id }} + METAL_DEVICE_NAME: ${{ needs.setup-benchmark-system.outputs.metal_device_name }} + steps: - - name: Print Message - run: echo "Hello ${{ inputs.message }}" + - uses: actions/checkout@v4 + - name: Delete Bare Metal + id: delete-metal + run: | + ${SD}/adhoc.sh delete-metal ${METAL_API_KEY} ${METAL_DEVICE_ID} ${METAL_DEVICE_NAME} + cat adhoc-delete-metal.out >> "$GITHUB_OUTPUT" + + - name: Archive Teardown Logs + uses: actions/upload-artifact@v4 + with: + name: Provisioning Teardown Logs + path: | + *.out + *.json diff --git a/.github/workflows/adhoc-exist-remote-benchmarks.yml b/.github/workflows/adhoc-exist-remote-benchmarks.yml index 7a962a35..22ebb5f0 100644 --- a/.github/workflows/adhoc-exist-remote-benchmarks.yml +++ b/.github/workflows/adhoc-exist-remote-benchmarks.yml @@ -7,15 +7,89 @@ name: Adhoc Benchmarks (Existing Server) on: workflow_dispatch: inputs: - message: - message: 'Message' + docker_image: + description: 'Docker Image Name or DH Core Branch' required: true + default: 'edge' + type: string + config_options: + description: 'Deephaven JVM Options' + default: '-Xmx24g' + type: string + run_label: + description: 'Set Label' + required: true + type: string default: '' + test_package: + description: 'Benchmark Test Package' + required: true + default: 'io.deephaven.benchmark.tests.standard' + type: string + test_class_list: + description: 'Benchmark Test Classes' + required: true + default: 'Where, Avg*' type: string + test_iterations: + description: 'Benchmark Iterations' + required: true + default: '1' + type: string + scale_row_count: + description: 'Benchmark Scale Row Count (Millions)' + required: true + default: '10' + type: string + distribution: + description: 'Benchmark Data Distribution' + required: true + default: 'random' + type: choice + options: + - random + - ascending + - descending + - runlength jobs: - hello-world: - runs-on: ubuntu-22.04 + setup-benchmark-system: + runs-on: ubuntu-22.04 + outputs: + test_class_regex: "${{ steps.make-test-regex.outputs.TEST_CLASS_REGEX }}" + test_row_count: ${{ steps.scale-nums.outputs.TEST_ROW_COUNT }} + test_iterations: ${{ steps.scale-nums.outputs.TEST_ITERATIONS }} + env: + SD: .github/scripts + steps: - - name: Print Message - run: echo "Hello ${{ inputs.message }}" + - uses: actions/checkout@v4 + - name: Make Wildcard Regex + id: make-test-regex + run: | + ${SD}/adhoc.sh make-test-regex "${{ inputs.test_class_list }}" + cat adhoc-make-test-regex.out >> "$GITHUB_OUTPUT" + + - name: Scale Input Numbers + id: scale-nums + run: | + ${SD}/adhoc.sh scale-nums ${{ inputs.scale_row_count }} ${{ inputs.test_iterations }} + cat adhoc-scale-nums.out >> "$GITHUB_OUTPUT" + + process-adhoc-benchmarks: + needs: [setup-benchmark-system] + uses: ./.github/workflows/remote-benchmarks.yml + with: + run_type: adhoc + docker_image: ${{ inputs.docker_image }} + run_label: ${{ inputs.run_label }} + test_package: ${{ inputs.test_package }} + test_class_regex: "${{ needs.setup-benchmark-system.outputs.test_class_regex }}" + test_iterations: ${{ needs.setup-benchmark-system.outputs.test_iterations }} + scale_row_count: ${{ needs.setup-benchmark-system.outputs.test_row_count }} + distribution: ${{ inputs.distribution }} + test_device_addr: "" + config_options: "${{ inputs.config_options }}" + + secrets: inherit + diff --git a/.github/workflows/adhoc-remote-benchmarks.yml b/.github/workflows/adhoc-remote-benchmarks.yml deleted file mode 100644 index e7290ef9..00000000 --- a/.github/workflows/adhoc-remote-benchmarks.yml +++ /dev/null @@ -1,69 +0,0 @@ -# Run benchmarks on a remote system according to -# resources/release-benchmark-docker-compose.yml -# - Calls the reusable worflow remote-benchmarks.yml -# - Used for testing the full workflow without affecting -# permanent data in other categoriees (like release, nightly) -# - Produces temporary data that is not preserved long term - -name: Adhoc Benchmark Test on Docker Deephaven - -on: - workflow_dispatch: - inputs: - docker_image: - description: 'Docker Image Name or DH Core Branch' - required: true - default: 'edge' - type: string - run_label: - description: 'Set Label' - required: true - type: string - default: '' - test_package: - description: 'Benchmark Test Package' - required: true - default: 'io.deephaven.benchmark.tests.standard' - type: string - test_class_regex: - description: 'Benchmark Test Class Pattern' - required: true - default: '^.*[.]MixedCombo.*Test.*$' - type: string - test_iterations: - description: 'Benchmark Iterations' - required: true - default: '1' - type: string - scale_row_count: - description: 'Benchmark Scale Row Count' - required: true - default: '1000000' - type: string - distribution: - description: 'Benchmark Data Distribution' - required: true - default: 'random' - type: choice - options: - - random - - ascending - - descending - - runlength - -jobs: - process-adhoc-benchmarks: - if: ${{github.repository_owner != 'deephaven'}} - uses: ./.github/workflows/remote-benchmarks.yml - with: - run_type: adhoc - docker_image: ${{ inputs.docker_image }} - run_label: ${{ inputs.run_label }} - test_package: ${{ inputs.test_package }} - test_class_regex: ${{ inputs.test_class_regex }} - test_iterations: ${{ inputs.test_iterations }} - scale_row_count: ${{ inputs.scale_row_count }} - distribution: ${{ inputs.distribution }} - secrets: inherit - - diff --git a/.github/workflows/compare-remote-benchmarks.yml b/.github/workflows/compare-remote-benchmarks.yml index a8e6f3ef..3f7c30fe 100644 --- a/.github/workflows/compare-remote-benchmarks.yml +++ b/.github/workflows/compare-remote-benchmarks.yml @@ -26,4 +26,6 @@ jobs: test_iterations: 5 scale_row_count: 70000000 distribution: random + test_device_addr: "" + config_options: "" secrets: inherit diff --git a/.github/workflows/nightly-remote-benchmarks.yml b/.github/workflows/nightly-remote-benchmarks.yml index fb46abeb..c3710fd8 100644 --- a/.github/workflows/nightly-remote-benchmarks.yml +++ b/.github/workflows/nightly-remote-benchmarks.yml @@ -20,4 +20,6 @@ jobs: test_iterations: 5 scale_row_count: 10000000 distribution: random + test_device_addr: "" + config_options: "" secrets: inherit diff --git a/.github/workflows/release-remote-benchmarks.yml b/.github/workflows/release-remote-benchmarks.yml index 5e07d8a1..74a355f3 100644 --- a/.github/workflows/release-remote-benchmarks.yml +++ b/.github/workflows/release-remote-benchmarks.yml @@ -25,4 +25,6 @@ jobs: test_iterations: 5 scale_row_count: 10000000 distribution: random + test_device_addr: "" + config_options: "" secrets: inherit diff --git a/.github/workflows/remote-benchmarks.yml b/.github/workflows/remote-benchmarks.yml index 5e3f432f..3b6ca4ff 100644 --- a/.github/workflows/remote-benchmarks.yml +++ b/.github/workflows/remote-benchmarks.yml @@ -32,6 +32,12 @@ on: distribution: required: true type: string + test_device_addr: + required: true + type: string + config_options: + required: true + type: string jobs: setup-benchmarks: @@ -40,13 +46,14 @@ jobs: matrix-iterations: ${{ steps.matrix-iterations.outputs.matrix-iterations }} env: SD: .github/scripts - HOST: ${{secrets.BENCHMARK_HOST}} + HOST: "${{ inputs.test_device_addr == '' && secrets.BENCHMARK_HOST || inputs.test_device_addr }}" USER: ${{secrets.BENCHMARK_USER}} REPO: ${{github.repository}} BRANCH: ${{github.ref_name}} RUN_TYPE: ${{inputs.run_type}} DOCKER_IMG: ${{inputs.docker_image}} TEST_ITERS: ${{inputs.test_iterations}} + CONFIG_OPTS: "${{inputs.config_options}}" steps: - uses: actions/checkout@v4 @@ -55,7 +62,6 @@ jobs: with: java-version: '21' distribution: 'temurin' - cache: maven - name: Setup Local Scripts run: | @@ -67,18 +73,18 @@ jobs: ${SD}/run-ssh-local.sh ${HOST} ${USER} ${SD} setup-test-server-remote ${REPO} ${BRANCH} ${RUN_TYPE} "${DOCKER_IMG}" - name: Run Remote Server Distribution Build - if: ${{ contains(env.DOCKER_IMG, '::') }} + if: ${{ !contains(env.DOCKER_IMG, '@sha256:') && contains(env.DOCKER_IMG, ':') }} run: | ${SD}/run-ssh-local.sh ${HOST} ${USER} ${SD} build-server-distribution-remote "${DOCKER_IMG}" - name: Run Remote Docker Image Build - if: ${{ contains(env.DOCKER_IMG, '::') }} + if: ${{ !contains(env.DOCKER_IMG, '@sha256:') && contains(env.DOCKER_IMG, ':') }} run: | ${SD}/run-ssh-local.sh ${HOST} ${USER} ${SD} build-docker-image-remote - name: Start Remote Deephaven Server run: | - ${SD}/run-ssh-local.sh ${HOST} ${USER} ${SD} manage-deephaven-remote start "${DOCKER_IMG}" + ${SD}/run-ssh-local.sh ${HOST} ${USER} ${SD} manage-deephaven-remote start "${DOCKER_IMG}" "${CONFIG_OPTS}" - name: Run Remote Benchmark Artifact Build run: | @@ -98,11 +104,11 @@ jobs: tag: ${{ fromJson(needs.setup-benchmarks.outputs.matrix-iterations) }} env: SD: .github/scripts - HOST: ${{secrets.BENCHMARK_HOST}} + HOST: "${{ inputs.test_device_addr == '' && secrets.BENCHMARK_HOST || inputs.test_device_addr }}" USER: ${{secrets.BENCHMARK_USER}} RUN_TYPE: ${{inputs.run_type}} TEST_PKG: ${{inputs.test_package}} - TEST_RGX: ${{inputs.test_class_regex}} + TEST_RGX: "${{inputs.test_class_regex}}" ROW_CNT: ${{inputs.scale_row_count}} DISTRIB: ${{inputs.distribution}} @@ -123,12 +129,12 @@ jobs: runs-on: ubuntu-22.04 env: SD: .github/scripts - HOST: ${{secrets.BENCHMARK_HOST}} + HOST: "${{ inputs.test_device_addr == '' && secrets.BENCHMARK_HOST || inputs.test_device_addr }}" USER: ${{secrets.BENCHMARK_USER}} RUN_TYPE: ${{inputs.run_type}} DOCKER_IMG: ${{inputs.docker_image}} RUN_LABEL: ${{inputs.run_label}} - ACTOR: "${{ inputs.run_label == 'adhoc' && github.actor || github.repository_owner }}" + ACTOR: "${{ inputs.run_type == 'adhoc' && github.actor || github.repository_owner }}" steps: - uses: actions/checkout@v4 @@ -138,7 +144,6 @@ jobs: with: java-version: '21' distribution: 'temurin' - cache: maven - name: Setup Local Scripts run: | @@ -146,8 +151,10 @@ jobs: ${SD}/setup-ssh-local.sh ${HOST} "${{secrets.BENCHMARK_KEY}}" - name: Fetch Benchmark Results and Prepare for Upload + id: fetch-results run: | ${SD}/fetch-results-local.sh ${HOST} ${USER} ${SD} ${RUN_TYPE} "${ACTOR}" "${RUN_LABEL}" "${DOCKER_IMG}" + cat fetch-results-local.out >> "$GITHUB_OUTPUT" - name: Authorize GCloud Credentials uses: google-github-actions/auth@v2 @@ -162,8 +169,10 @@ jobs: with: path: ${{env.RUN_TYPE}} destination: deephaven-benchmark + process_gcloudignore: false - name: Sync GCloud with Demo NFS + if: ${{ env.RUN_TYPE != 'adhoc' }} run: | gcloud compute ssh --zone "us-central1-a" --project "deephaven-oss" dhc-demo-nfs-client --command="sudo gsutil -m rsync -d -r gs://deephaven-benchmark /nfs/deephaven-benchmark" gcloud compute ssh --zone "us-central1-a" --project "deephaven-oss" dhc-demo-nfs-client --command="sudo find /nfs/deephaven-benchmark -mindepth 1 -type d -empty -delete" @@ -175,9 +184,8 @@ jobs: - name: Archive Results uses: actions/upload-artifact@v4 with: - name: Benchmarks and Logs + name: Benchmarks and Logs for Set ${{steps.fetch-results.outputs.SET_LABEL}} path: | ${{env.RUN_TYPE}}/**/* logs/**/* -