From 4a0712e36a49b696112b64d8d4037147c4c760cd Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Fri, 8 Nov 2024 15:41:43 -0700 Subject: [PATCH 01/56] Updated CI --- .github/workflows/ci.yml | 132 ++++++++++++++++++++++++ .github/workflows/docs.yml | 14 ++- .github/workflows/nightly.yml | 99 ++++++++++++++++++ .gitlab-ci.yml | 189 ---------------------------------- 4 files changed, 244 insertions(+), 190 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/nightly.yml delete mode 100644 .gitlab-ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..f842563 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,132 @@ +# ======================================================================================== +# (C) (or copyright) 2024. Triad National Security, LLC. All rights reserved. +# +# This program was produced under U.S. Government contract 89233218CNA000001 for Los +# Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC +# for the U.S. Department of Energy/National Nuclear Security Administration. All rights +# in the program are reserved by Triad National Security, LLC, and the U.S. Department +# of Energy/National Nuclear Security Administration. The Government is granted for +# itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide +# license in this material to reproduce, prepare derivative works, distribute copies to +# the public, perform publicly and display publicly, and to permit others to do so. +# ======================================================================================== + +# This file was created in part or in whole by one of OpenAI's generative AI models + +name: Continuous Integration + +on: + pull_request: + types: [opened, synchronize, reopened] + +jobs: + ci_format_job: + if: > + ${{ !contains(github.event.pull_request.title, 'Draft:') && + !contains(github.event.pull_request.title, 'WIP:') }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: recursive + - name: Run format check + run: | + source env/bash + VERBOSE=1 ./style/format.sh + git diff --exit-code --ignore-submodules + + ci_cpu_job: + if: > + ${{ !contains(github.event.pull_request.title, 'Draft:') && + !contains(github.event.pull_request.title, 'WIP:') }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: recursive + - name: Set up environment + run: | + source env/bash + - name: Run CPU tests + run: | + cd tst + python3 run_tests.py regression.suite \ + --save_build \ + --make_nproc=32 \ + --cmake=-DCMAKE_C_COMPILER=gcc \ + --cmake=-DCMAKE_CXX_COMPILER=g++ \ + --log_file=ci_cpu_log.txt + - name: Upload CPU test log + if: always() + uses: actions/upload-artifact@v3 + with: + name: ci_cpu_log.txt + path: tst/ci_cpu_log.txt + retention-days: 3 + - name: Upload figures + if: always() + uses: actions/upload-artifact@v3 + with: + name: figs + path: tst/figs + retention-days: 3 + + ci_gpu_job: + if: > + ${{ !contains(github.event.pull_request.title, 'Draft:') && + !contains(github.event.pull_request.title, 'WIP:') }} + runs-on: ubuntu-latest # Update to a runner with GPU support if available + steps: + - uses: actions/checkout@v3 + with: + submodules: recursive + - name: Set up environment + run: | + source env/bash + - name: Run GPU tests + run: | + cd tst + python3 run_tests.py gpu.suite \ + --save_build \ + --make_nproc=32 \ + --cmake=-DARTEMIS_ENABLE_CUDA=On \ + --cmake=-DKokkos_ARCH_VOLTA70=On \ + --cmake=-DCMAKE_CXX_COMPILER=external/parthenon/external/Kokkos/bin/nvcc_wrapper \ + --log_file=ci_gpu_log.txt + - name: Upload GPU test log + if: always() + uses: actions/upload-artifact@v3 + with: + name: ci_gpu_log.txt + path: tst/ci_gpu_log.txt + retention-days: 3 + - name: Upload figures + if: always() + uses: actions/upload-artifact@v3 + with: + name: figs + path: tst/figs + retention-days: 3 + + ci_doc_job: + if: > + ${{ !contains(github.event.pull_request.title, 'Draft:') && + !contains(github.event.pull_request.title, 'WIP:') && + github.event.pull_request.head.ref != github.event.repository.default_branch }} + runs-on: ubuntu-latest + container: + image: python:latest + steps: + - uses: actions/checkout@v3 + with: + submodules: recursive + - name: Build documentation + run: | + cd doc + pip install -U sphinx-rtd-theme pyyaml + sphinx-build -b html . ../public + - name: Upload documentation + uses: actions/upload-artifact@v3 + with: + name: docs + path: public diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index c04c77e..cdea1b4 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,3 +1,15 @@ +# ======================================================================================== +# (C) (or copyright) 2024. Triad National Security, LLC. All rights reserved. +# +# This program was produced under U.S. Government contract 89233218CNA000001 for Los +# Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC +# for the U.S. Department of Energy/National Nuclear Security Administration. All rights +# in the program are reserved by Triad National Security, LLC, and the U.S. Department +# of Energy/National Nuclear Security Administration. The Government is granted for +# itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide +# license in this material to reproduce, prepare derivative works, distribute copies to +# the public, perform publicly and display publicly, and to permit others to do so. +# ======================================================================================== name: Build And Deploy Docs @@ -31,5 +43,5 @@ jobs: uses: peaceiris/actions-gh-pages@v4 with: github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: ./public + publish_dir: ./public force_orphan: true diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml new file mode 100644 index 0000000..db65130 --- /dev/null +++ b/.github/workflows/nightly.yml @@ -0,0 +1,99 @@ +# ======================================================================================== +# (C) (or copyright) 2024. Triad National Security, LLC. All rights reserved. +# +# This program was produced under U.S. Government contract 89233218CNA000001 for Los +# Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC +# for the U.S. Department of Energy/National Nuclear Security Administration. All rights +# in the program are reserved by Triad National Security, LLC, and the U.S. Department +# of Energy/National Nuclear Security Administration. The Government is granted for +# itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide +# license in this material to reproduce, prepare derivative works, distribute copies to +# the public, perform publicly and display publicly, and to permit others to do so. +# ======================================================================================== + +# This file was created in part or in whole by one of OpenAI's generative AI models + +name: Nightly Tests + +on: + schedule: + - cron: '0 0 * * *' # Runs daily at midnight; adjust as needed + +jobs: + ci_nightly_cpu_job: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: recursive + - name: Update Parthenon submodule + run: | + cd external/parthenon + git pull origin develop + echo "==> Current Parthenon commit hash:" + git rev-parse HEAD + - name: Set up environment + run: | + source env/bash + - name: Run CPU tests + run: | + cd tst + python3 run_tests.py regression.suite \ + --make_nproc=32 \ + --cmake=-DCMAKE_C_COMPILER=gcc \ + --cmake=-DCMAKE_CXX_COMPILER=g++ \ + --log_file=ci_cpu_log.txt + - name: Upload CPU test log + if: always() + uses: actions/upload-artifact@v3 + with: + name: ci_cpu_log.txt + path: tst/ci_cpu_log.txt + retention-days: 3 + - name: Upload figures + if: always() + uses: actions/upload-artifact@v3 + with: + name: figs + path: tst/figs + retention-days: 3 + + ci_nightly_gpu_job: + runs-on: ubuntu-latest # Update to a runner with GPU support if available + steps: + - uses: actions/checkout@v3 + with: + submodules: recursive + - name: Update Parthenon submodule + run: | + cd external/parthenon + git pull origin develop + echo "==> Current Parthenon commit hash:" + git rev-parse HEAD + - name: Set up environment + run: | + source env/bash + - name: Run GPU tests + run: | + cd tst + python3 run_tests.py gpu.suite \ + --save_build \ + --make_nproc=32 \ + --cmake=-DARTEMIS_ENABLE_CUDA=On \ + --cmake=-DKokkos_ARCH_VOLTA70=On \ + --cmake=-DCMAKE_CXX_COMPILER=external/parthenon/external/Kokkos/bin/nvcc_wrapper \ + --log_file=ci_gpu_log.txt + - name: Upload GPU test log + if: always() + uses: actions/upload-artifact@v3 + with: + name: ci_gpu_log.txt + path: tst/ci_gpu_log.txt + retention-days: 3 + - name: Upload figures + if: always() + uses: actions/upload-artifact@v3 + with: + name: figs + path: tst/figs + retention-days: 3 diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml deleted file mode 100644 index 1fd46c7..0000000 --- a/.gitlab-ci.yml +++ /dev/null @@ -1,189 +0,0 @@ -# ======================================================================================== -# (C) (or copyright) 2023-2024. Triad National Security, LLC. All rights reserved. -# -# This program was produced under U.S. Government contract 89233218CNA000001 for Los -# Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC -# for the U.S. Department of Energy/National Nuclear Security Administration. All rights -# in the program are reserved by Triad National Security, LLC, and the U.S. Department -# of Energy/National Nuclear Security Administration. The Government is granted for -# itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide -# license in this material to reproduce, prepare derivative works, distribute copies to -# the public, perform publicly and display publicly, and to permit others to do so. -# ======================================================================================== - -variables: - GIT_SUBMODULE_STRATEGY: recursive - -stages: - - ci_doc - - ci_format - - ci_test - - deploy - -.default-job: - tags: - - darwin-slurm-shared - id_tokens: - SITE_ID_TOKEN: - aud: https://gitlab.lanl.gov - -.default-ci-job: - extends: .default-job - rules: - - if: '$CI_MERGE_REQUEST_TITLE =~ /Draft:/ || $CI_MERGE_REQUEST_TITLE =~ /WIP:/' - when: never - - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' - -.default-nightly-job: - extends: .default-job - rules: - - if: '$CI_PIPELINE_SOURCE == "schedule"' - - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' - when: never - -ci_format-job: - extends: .default-ci-job - stage: ci_format - variables: - SCHEDULER_PARAMETERS: "--nodes=1 --partition=skylake-gold --time=04:00:00" - script: - - cd $CI_PROJECT_DIR - - source env/bash - - VERBOSE=1 ./style/format.sh - - git diff --exit-code --ignore-submodules - -ci_cpu-job: - extends: .default-ci-job - stage: ci_test - variables: - SCHEDULER_PARAMETERS: "--nodes=1 --partition=skylake-gold --time=04:00:00" - script: - - cd $CI_PROJECT_DIR - - source env/bash - - cd $CI_PROJECT_DIR/tst - - python3 run_tests.py - regression.suite - --save_build - --make_nproc=32 - --cmake=-DCMAKE_C_COMPILER=gcc - --cmake=-DCMAKE_CXX_COMPILER=g++ - --log_file=ci_cpu_log.txt - artifacts: - when: always - expire_in: 3 days - paths: - - tst/ci_cpu_log.txt - - tst/figs - -ci_gpu-job: - extends: .default-ci-job - stage: ci_test - variables: - SCHEDULER_PARAMETERS: "--nodes=1 --partition=volta-x86 --time=04:00:00" - script: - - cd $CI_PROJECT_DIR - - source env/bash - - cd $CI_PROJECT_DIR/tst - - python3 run_tests.py - gpu.suite - --save_build - --make_nproc=32 - --cmake=-DARTEMIS_ENABLE_CUDA=On - --cmake=-DKokkos_ARCH_VOLTA70=On - --cmake=-DCMAKE_CXX_COMPILER=$CI_PROJECT_DIR/external/parthenon/external/Kokkos/bin/nvcc_wrapper - --log_file=ci_gpu_log.txt - artifacts: - when: always - expire_in: 3 days - paths: - - tst/ci_gpu_log.txt - - tst/figs - -ci_nightly-cpu-job: - extends: .default-nightly-job - stage: ci_test - variables: - SCHEDULER_PARAMETERS: "--nodes=1 --partition=skylake-gold --time=08:00:00" - script: - - cd $CI_PROJECT_DIR - - cd external/parthenon - - git pull origin develop - - echo "==> Current Parthenon commit hash:" - - git rev-parse HEAD - - cd ../../ - - source env/bash - - cd $CI_PROJECT_DIR/tst - - python3 run_tests.py - regression.suite - --make_nproc=32 - --cmake=-DCMAKE_C_COMPILER=gcc - --cmake=-DCMAKE_CXX_COMPILER=g++ - --log_file=ci_cpu_log.txt - artifacts: - when: always - expire_in: 3 days - paths: - - tst/ci_cpu_log.txt - - tst/figs - -ci_nightly-gpu-job: - extends: .default-nightly-job - stage: ci_test - variables: - SCHEDULER_PARAMETERS: "--nodes=1 --partition=volta-x86 --time=08:00:00" - script: - - cd $CI_PROJECT_DIR - - cd external/parthenon - - git pull origin develop - - echo "==> Current Parthenon commit hash:" - - git rev-parse HEAD - - cd ../../ - - source env/bash - - cd $CI_PROJECT_DIR/tst - - python3 run_tests.py - gpu.suite - --save_build - --make_nproc=32 - --make_nproc=32 - --cmake=-DARTEMIS_ENABLE_CUDA=On - --cmake=-DKokkos_ARCH_VOLTA70=On - --cmake=-DCMAKE_CXX_COMPILER=$CI_PROJECT_DIR/external/parthenon/external/Kokkos/bin/nvcc_wrapper - --log_file=ci_gpu_log.txt - artifacts: - when: always - expire_in: 3 days - paths: - - tst/ci_gpu_log.txt - - tst/figs - - -ci-doc-job: - extends: .default-ci-job - image: python:latest - stage: ci_doc - script: - - cd $CI_PROJECT_DIR/doc - - pip install -U sphinx-rtd-theme - - pip install -U pyyaml - - sphinx-build -b html . ../public - artifacts: - paths: - - public - rules: - - if: $CI_COMMIT_REF_NAME != $CI_DEFAULT_BRANCH - - -pages: - extends: .default-job - image: python:latest - stage: deploy - script: - - cd $CI_PROJECT_DIR/doc - - pip install -U sphinx-rtd-theme - - pip install -U pyyaml - - sphinx-build -b html . ../public - artifacts: - paths: - - public - rules: - - if: $CI_COMMIT_REF_NAME == $CI_DEFAULT_BRANCH From b4c129003c9df93935f3c1419f4df68712ff8fc0 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Fri, 8 Nov 2024 15:49:05 -0700 Subject: [PATCH 02/56] python dependencies --- .github/workflows/ci.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f842563..865b026 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,6 +29,13 @@ jobs: - uses: actions/checkout@v3 with: submodules: recursive + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' # Specify the Python version you need + - name: Install dependencies + run: | + pip install black - name: Run format check run: | source env/bash @@ -44,6 +51,13 @@ jobs: - uses: actions/checkout@v3 with: submodules: recursive + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' # Specify the Python version you need + - name: Install dependencies + run: | + pip install numpy matplotlib h5py - name: Set up environment run: | source env/bash @@ -80,6 +94,13 @@ jobs: - uses: actions/checkout@v3 with: submodules: recursive + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' # Specify the Python version you need + - name: Install dependencies + run: | + pip install numpy matplotlib h5py - name: Set up environment run: | source env/bash From efc7a354c6cc94fc3b4fe4aaa17fe1b16f2a3ded Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Fri, 8 Nov 2024 15:53:14 -0700 Subject: [PATCH 03/56] different dependnecies approach --- .github/workflows/ci.yml | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 865b026..035d7d6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -51,13 +51,15 @@ jobs: - uses: actions/checkout@v3 with: submodules: recursive - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.x' # Specify the Python version you need - name: Install dependencies run: | - pip install numpy matplotlib h5py + sudo apt-get update -qq + sudo apt-get install -qq --no-install-recommends tzdata + sudo apt-get install -qq git + sudo apt-get install -qq make cmake g++ + sudo apt-get install -qq libopenmpi-dev libhdf5-openmpi-dev + sudo apt-get install -qq openssh-client + sudo apt-get install -qq python3 python3-numpy python3-h5py python3-matplotlib - name: Set up environment run: | source env/bash @@ -94,13 +96,15 @@ jobs: - uses: actions/checkout@v3 with: submodules: recursive - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.x' # Specify the Python version you need - name: Install dependencies run: | - pip install numpy matplotlib h5py + sudo apt-get update -qq + sudo apt-get install -qq --no-install-recommends tzdata + sudo apt-get install -qq git + sudo apt-get install -qq make cmake g++ + sudo apt-get install -qq libopenmpi-dev libhdf5-openmpi-dev + sudo apt-get install -qq openssh-client + sudo apt-get install -qq python3 python3-numpy python3-h5py python3-matplotlib - name: Set up environment run: | source env/bash From 43378702da897f55973512f6215a9ca309c2dd79 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Fri, 8 Nov 2024 15:54:39 -0700 Subject: [PATCH 04/56] No redundant doc jobs --- .github/workflows/ci.yml | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 035d7d6..228ae01 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -132,26 +132,3 @@ jobs: name: figs path: tst/figs retention-days: 3 - - ci_doc_job: - if: > - ${{ !contains(github.event.pull_request.title, 'Draft:') && - !contains(github.event.pull_request.title, 'WIP:') && - github.event.pull_request.head.ref != github.event.repository.default_branch }} - runs-on: ubuntu-latest - container: - image: python:latest - steps: - - uses: actions/checkout@v3 - with: - submodules: recursive - - name: Build documentation - run: | - cd doc - pip install -U sphinx-rtd-theme pyyaml - sphinx-build -b html . ../public - - name: Upload documentation - uses: actions/upload-artifact@v3 - with: - name: docs - path: public From da94f1f4c08be0ed4c5e8c9800ccff77da9b7d5d Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Fri, 8 Nov 2024 16:04:00 -0700 Subject: [PATCH 05/56] try getting nvidia on gpu runner --- .github/workflows/ci.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 228ae01..03eaf29 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -105,6 +105,8 @@ jobs: sudo apt-get install -qq libopenmpi-dev libhdf5-openmpi-dev sudo apt-get install -qq openssh-client sudo apt-get install -qq python3 python3-numpy python3-h5py python3-matplotlib + sudo apt-get install -qq -y cuda + sudo apt-get install -qq -y nvidia-driver-525 - name: Set up environment run: | source env/bash @@ -116,7 +118,7 @@ jobs: --make_nproc=32 \ --cmake=-DARTEMIS_ENABLE_CUDA=On \ --cmake=-DKokkos_ARCH_VOLTA70=On \ - --cmake=-DCMAKE_CXX_COMPILER=external/parthenon/external/Kokkos/bin/nvcc_wrapper \ + --cmake=-DCMAKE_CXX_COMPILER=$GITHUB_WORKSPACE/external/parthenon/external/Kokkos/bin/nvcc_wrapper \ --log_file=ci_gpu_log.txt - name: Upload GPU test log if: always() From 20cf3254b5a974f84d5f6dddbb1f44851fe94565 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Fri, 8 Nov 2024 16:13:15 -0700 Subject: [PATCH 06/56] Fewer nproc during make --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 03eaf29..1cc3e24 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -68,7 +68,7 @@ jobs: cd tst python3 run_tests.py regression.suite \ --save_build \ - --make_nproc=32 \ + --make_nproc=4 \ --cmake=-DCMAKE_C_COMPILER=gcc \ --cmake=-DCMAKE_CXX_COMPILER=g++ \ --log_file=ci_cpu_log.txt From d7962da983f324cbdbfee692a1fec52e6bd4197e Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Fri, 8 Nov 2024 16:27:31 -0700 Subject: [PATCH 07/56] Remove GPU jobs for now --- .github/workflows/ci.yml | 94 +++++++++++++++++------------------ .github/workflows/nightly.yml | 87 +++++++++++++++++--------------- 2 files changed, 95 insertions(+), 86 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1cc3e24..85a2702 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -87,50 +87,50 @@ jobs: path: tst/figs retention-days: 3 - ci_gpu_job: - if: > - ${{ !contains(github.event.pull_request.title, 'Draft:') && - !contains(github.event.pull_request.title, 'WIP:') }} - runs-on: ubuntu-latest # Update to a runner with GPU support if available - steps: - - uses: actions/checkout@v3 - with: - submodules: recursive - - name: Install dependencies - run: | - sudo apt-get update -qq - sudo apt-get install -qq --no-install-recommends tzdata - sudo apt-get install -qq git - sudo apt-get install -qq make cmake g++ - sudo apt-get install -qq libopenmpi-dev libhdf5-openmpi-dev - sudo apt-get install -qq openssh-client - sudo apt-get install -qq python3 python3-numpy python3-h5py python3-matplotlib - sudo apt-get install -qq -y cuda - sudo apt-get install -qq -y nvidia-driver-525 - - name: Set up environment - run: | - source env/bash - - name: Run GPU tests - run: | - cd tst - python3 run_tests.py gpu.suite \ - --save_build \ - --make_nproc=32 \ - --cmake=-DARTEMIS_ENABLE_CUDA=On \ - --cmake=-DKokkos_ARCH_VOLTA70=On \ - --cmake=-DCMAKE_CXX_COMPILER=$GITHUB_WORKSPACE/external/parthenon/external/Kokkos/bin/nvcc_wrapper \ - --log_file=ci_gpu_log.txt - - name: Upload GPU test log - if: always() - uses: actions/upload-artifact@v3 - with: - name: ci_gpu_log.txt - path: tst/ci_gpu_log.txt - retention-days: 3 - - name: Upload figures - if: always() - uses: actions/upload-artifact@v3 - with: - name: figs - path: tst/figs - retention-days: 3 +# ci_gpu_job: +# if: > +# ${{ !contains(github.event.pull_request.title, 'Draft:') && +# !contains(github.event.pull_request.title, 'WIP:') }} +# runs-on: ubuntu-latest # Update to a runner with GPU support if available +# steps: +# - uses: actions/checkout@v3 +# with: +# submodules: recursive +# - name: Install dependencies +# run: | +# sudo apt-get update -qq +# sudo apt-get install -qq --no-install-recommends tzdata +# sudo apt-get install -qq git +# sudo apt-get install -qq make cmake g++ +# sudo apt-get install -qq libopenmpi-dev libhdf5-openmpi-dev +# sudo apt-get install -qq openssh-client +# sudo apt-get install -qq python3 python3-numpy python3-h5py python3-matplotlib +# sudo apt-get install -qq -y cuda +# sudo apt-get install -qq -y nvidia-driver-525 +# - name: Set up environment +# run: | +# source env/bash +# - name: Run GPU tests +# run: | +# cd tst +# python3 run_tests.py gpu.suite \ +# --save_build \ +# --make_nproc=32 \ +# --cmake=-DARTEMIS_ENABLE_CUDA=On \ +# --cmake=-DKokkos_ARCH_VOLTA70=On \ +# --cmake=-DCMAKE_CXX_COMPILER=$GITHUB_WORKSPACE/external/parthenon/external/Kokkos/bin/nvcc_wrapper \ +# --log_file=ci_gpu_log.txt +# - name: Upload GPU test log +# if: always() +# uses: actions/upload-artifact@v3 +# with: +# name: ci_gpu_log.txt +# path: tst/ci_gpu_log.txt +# retention-days: 3 +# - name: Upload figures +# if: always() +# uses: actions/upload-artifact@v3 +# with: +# name: figs +# path: tst/figs +# retention-days: 3 diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index db65130..ebeec0b 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -26,6 +26,15 @@ jobs: - uses: actions/checkout@v3 with: submodules: recursive + - name: Install dependencies + run: | + sudo apt-get update -qq + sudo apt-get install -qq --no-install-recommends tzdata + sudo apt-get install -qq git + sudo apt-get install -qq make cmake g++ + sudo apt-get install -qq libopenmpi-dev libhdf5-openmpi-dev + sudo apt-get install -qq openssh-client + sudo apt-get install -qq python3 python3-numpy python3-h5py python3-matplotlib - name: Update Parthenon submodule run: | cd external/parthenon @@ -58,42 +67,42 @@ jobs: path: tst/figs retention-days: 3 - ci_nightly_gpu_job: - runs-on: ubuntu-latest # Update to a runner with GPU support if available - steps: - - uses: actions/checkout@v3 - with: - submodules: recursive - - name: Update Parthenon submodule - run: | - cd external/parthenon - git pull origin develop - echo "==> Current Parthenon commit hash:" - git rev-parse HEAD - - name: Set up environment - run: | - source env/bash - - name: Run GPU tests - run: | - cd tst - python3 run_tests.py gpu.suite \ - --save_build \ - --make_nproc=32 \ - --cmake=-DARTEMIS_ENABLE_CUDA=On \ - --cmake=-DKokkos_ARCH_VOLTA70=On \ - --cmake=-DCMAKE_CXX_COMPILER=external/parthenon/external/Kokkos/bin/nvcc_wrapper \ - --log_file=ci_gpu_log.txt - - name: Upload GPU test log - if: always() - uses: actions/upload-artifact@v3 - with: - name: ci_gpu_log.txt - path: tst/ci_gpu_log.txt - retention-days: 3 - - name: Upload figures - if: always() - uses: actions/upload-artifact@v3 - with: - name: figs - path: tst/figs - retention-days: 3 +# ci_nightly_gpu_job: +# runs-on: ubuntu-latest # Update to a runner with GPU support if available +# steps: +# - uses: actions/checkout@v3 +# with: +# submodules: recursive +# - name: Update Parthenon submodule +# run: | +# cd external/parthenon +# git pull origin develop +# echo "==> Current Parthenon commit hash:" +# git rev-parse HEAD +# - name: Set up environment +# run: | +# source env/bash +# - name: Run GPU tests +# run: | +# cd tst +# python3 run_tests.py gpu.suite \ +# --save_build \ +# --make_nproc=32 \ +# --cmake=-DARTEMIS_ENABLE_CUDA=On \ +# --cmake=-DKokkos_ARCH_VOLTA70=On \ +# --cmake=-DCMAKE_CXX_COMPILER=external/parthenon/external/Kokkos/bin/nvcc_wrapper \ +# --log_file=ci_gpu_log.txt +# - name: Upload GPU test log +# if: always() +# uses: actions/upload-artifact@v3 +# with: +# name: ci_gpu_log.txt +# path: tst/ci_gpu_log.txt +# retention-days: 3 +# - name: Upload figures +# if: always() +# uses: actions/upload-artifact@v3 +# with: +# name: figs +# path: tst/figs +# retention-days: 3 From c8319c32c620fdd11fca4b82efb2ff45a2b7580b Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Fri, 8 Nov 2024 19:47:56 -0700 Subject: [PATCH 08/56] Smaller disk problems --- tst/scripts/disk/disk.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tst/scripts/disk/disk.py b/tst/scripts/disk/disk.py index 3058751..605e693 100644 --- a/tst/scripts/disk/disk.py +++ b/tst/scripts/disk/disk.py @@ -60,6 +60,9 @@ def run(**kwargs): g, int(10 * gam), b ), "problem/polytropic_index={:.2f}".format(gam), + "parthenon/mesh/nx1=64", + "parthenon/mesh/nx2=64", + "parthenon/mesh/nx3=64", ], ) artemis.run( From d71d7a1fb178890b5ab2e6c0d103e794c763ad53 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Fri, 8 Nov 2024 19:56:55 -0700 Subject: [PATCH 09/56] Only change cartesian disk --- tst/scripts/disk/disk.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tst/scripts/disk/disk.py b/tst/scripts/disk/disk.py index 605e693..a03c367 100644 --- a/tst/scripts/disk/disk.py +++ b/tst/scripts/disk/disk.py @@ -47,6 +47,13 @@ def run(**kwargs): for b in _bc: for g in _geom: bc_args = [] + geom_args = [] + if g == "cart": + geom_args = [ + "parthenon/mesh/nx1=64", + "parthenon/mesh/nx2=64", + "parthenon/mesh/nx3=64", + ] for d in directions[g]: bc_args.append("parthenon/mesh/i{}_bc={}".format(d, b)) bc_args.append("parthenon/mesh/o{}_bc={}".format(d, b)) @@ -60,10 +67,7 @@ def run(**kwargs): g, int(10 * gam), b ), "problem/polytropic_index={:.2f}".format(gam), - "parthenon/mesh/nx1=64", - "parthenon/mesh/nx2=64", - "parthenon/mesh/nx3=64", - ], + ] + geom_args, ) artemis.run( _nranks, From c4371f26ece9247983cf4f34ee0f9a07cd05d1a4 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Fri, 8 Nov 2024 20:29:54 -0700 Subject: [PATCH 10/56] Working on this --- run_local_ci.py | 124 +++++++++++++++++++++++++++++++++++++++ tst/scripts/disk/disk.py | 3 +- 2 files changed, 126 insertions(+), 1 deletion(-) create mode 100755 run_local_ci.py diff --git a/run_local_ci.py b/run_local_ci.py new file mode 100755 index 0000000..d10d327 --- /dev/null +++ b/run_local_ci.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +# ======================================================================================== +# (C) (or copyright) 2024. Triad National Security, LLC. All rights reserved. +# +# This program was produced under U.S. Government contract 89233218CNA000001 for Los +# Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC +# for the U.S. Department of Energy/National Nuclear Security Administration. All rights +# in the program are reserved by Triad National Security, LLC, and the U.S. Department +# of Energy/National Nuclear Security Administration. The Government is granted for +# itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide +# license in this material to reproduce, prepare derivative works, distribute copies to +# the public, perform publicly and display publicly, and to permit others to do so. +# ======================================================================================== + +# This file was created in part or in whole by one of OpenAI's generative AI models + +import sys +import os +import subprocess +import requests +import json +import tempfile +import shutil + +# Replace with your GitHub username and repository name +#GITHUB_USER = '' +#GITHUB_REPO = '' + +# The personal access token (PAT) with 'repo:status' permission +# Store your token securely and do not hardcode it in the script +GITHUB_TOKEN = os.environ.get('ARTEMIS_GITHUB_TOKEN') + +if GITHUB_TOKEN is None: + print("Error: GITHUB_TOKEN environment variable is not set.") + sys.exit(1) + +def get_pr_info(pr_number): + url = f'https://api.github.com/repos/lanl/artemis/pulls/{pr_number}' + headers = {'Authorization': f'token {GITHUB_TOKEN}'} + response = requests.get(url, headers=headers) + if response.status_code != 200: + print(f"Error fetching PR info: {response.status_code}") + print(response.text) + sys.exit(1) + return response.json() + +def update_status(commit_sha, state, description, context='chicoma'): + url = f'https://api.github.com/repos/lanl/artemis/statuses/{commit_sha}' + headers = {'Authorization': f'token {GITHUB_TOKEN}'} + data = { + 'state': state, + 'description': description, + 'context': context + } + response = requests.post(url, headers=headers, data=json.dumps(data)) + if response.status_code != 201: + print(f"Error setting status: {response.status_code}") + print(response.text) + sys.exit(1) + +def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): + # Create a temporary directory + with tempfile.TemporaryDirectory() as temp_dir: + print(f"Using temporary directory: {temp_dir}") + + # Clone the repository into the temporary directory + subprocess.run(['git', 'clone', head_repo, temp_dir], check=True) + os.chdir(temp_dir) + + # Checkout the PR branch + subprocess.run(['git', 'fetch', 'origin', head_ref], check=True) + subprocess.run(['git', 'checkout', head_ref], check=True) + + # Update submodules + subprocess.run(['git', 'submodule', 'update', '--init', '--recursive'], check=True) + + # Run the tests + try: + os.chdir(os.path.join(temp_dir, 'tst')) + test_command = [ + 'python3', 'run_tests.py', 'regression.suite', + '--save_build', + '--make_nproc=4', + '--cmake=-DCMAKE_C_COMPILER=gcc', + '--cmake=-DCMAKE_CXX_COMPILER=g++', + '--log_file=ci_cpu_log.txt' + ] + subprocess.run(test_command, check=True) + # Update the status to success + update_status(commit_sha, 'success', 'All tests passed.') + print("Tests passed.") + except subprocess.CalledProcessError: + # Update the status to failure + update_status(commit_sha, 'failure', 'Tests failed.') + print("Tests failed.") + sys.exit(1) + +def main(): + if len(sys.argv) != 2: + print("Usage: run_ci.py [PR number]") + sys.exit(1) + + pr_number = sys.argv[1] + + # Fetch PR information + pr_info = get_pr_info(pr_number) + head_repo = pr_info['head']['repo']['clone_url'] + head_ref = pr_info['head']['ref'] + commit_sha = pr_info['head']['sha'] + + print(f"PR #{pr_number} info:") + print(f"- Repository: {head_repo}") + print(f"- Branch: {head_ref}") + print(f"- Commit SHA: {commit_sha}") + + # Update status to 'pending' + update_status(commit_sha, 'pending', 'CI tests are running...') + + # Run the tests in a temporary directory + run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha) + +if __name__ == '__main__': + main() + diff --git a/tst/scripts/disk/disk.py b/tst/scripts/disk/disk.py index a03c367..da53603 100644 --- a/tst/scripts/disk/disk.py +++ b/tst/scripts/disk/disk.py @@ -67,7 +67,8 @@ def run(**kwargs): g, int(10 * gam), b ), "problem/polytropic_index={:.2f}".format(gam), - ] + geom_args, + ] + + geom_args, ) artemis.run( _nranks, From 4fc31f54e3cb525b56f76dfc0048d21738dc2dea Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Fri, 8 Nov 2024 21:46:52 -0700 Subject: [PATCH 11/56] Formatting --- run_local_ci.py | 68 ++++++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 32 deletions(-) diff --git a/run_local_ci.py b/run_local_ci.py index d10d327..bec82af 100755 --- a/run_local_ci.py +++ b/run_local_ci.py @@ -23,20 +23,21 @@ import shutil # Replace with your GitHub username and repository name -#GITHUB_USER = '' -#GITHUB_REPO = '' +# GITHUB_USER = '' +# GITHUB_REPO = '' # The personal access token (PAT) with 'repo:status' permission # Store your token securely and do not hardcode it in the script -GITHUB_TOKEN = os.environ.get('ARTEMIS_GITHUB_TOKEN') +GITHUB_TOKEN = os.environ.get("ARTEMIS_GITHUB_TOKEN") if GITHUB_TOKEN is None: print("Error: GITHUB_TOKEN environment variable is not set.") sys.exit(1) + def get_pr_info(pr_number): - url = f'https://api.github.com/repos/lanl/artemis/pulls/{pr_number}' - headers = {'Authorization': f'token {GITHUB_TOKEN}'} + url = f"https://api.github.com/repos/lanl/artemis/pulls/{pr_number}" + headers = {"Authorization": f"token {GITHUB_TOKEN}"} response = requests.get(url, headers=headers) if response.status_code != 200: print(f"Error fetching PR info: {response.status_code}") @@ -44,57 +45,60 @@ def get_pr_info(pr_number): sys.exit(1) return response.json() -def update_status(commit_sha, state, description, context='chicoma'): - url = f'https://api.github.com/repos/lanl/artemis/statuses/{commit_sha}' - headers = {'Authorization': f'token {GITHUB_TOKEN}'} - data = { - 'state': state, - 'description': description, - 'context': context - } + +def update_status(commit_sha, state, description, context="Continuous Integration / chicoma-gpu"): + url = f"https://api.github.com/repos/lanl/artemis/statuses/{commit_sha}" + headers = {"Authorization": f"token {GITHUB_TOKEN}"} + data = {"state": state, "description": description, "context": context} response = requests.post(url, headers=headers, data=json.dumps(data)) if response.status_code != 201: print(f"Error setting status: {response.status_code}") print(response.text) sys.exit(1) + def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): # Create a temporary directory with tempfile.TemporaryDirectory() as temp_dir: print(f"Using temporary directory: {temp_dir}") # Clone the repository into the temporary directory - subprocess.run(['git', 'clone', head_repo, temp_dir], check=True) + subprocess.run(["git", "clone", head_repo, temp_dir], check=True) os.chdir(temp_dir) # Checkout the PR branch - subprocess.run(['git', 'fetch', 'origin', head_ref], check=True) - subprocess.run(['git', 'checkout', head_ref], check=True) + subprocess.run(["git", "fetch", "origin", head_ref], check=True) + subprocess.run(["git", "checkout", head_ref], check=True) # Update submodules - subprocess.run(['git', 'submodule', 'update', '--init', '--recursive'], check=True) + subprocess.run( + ["git", "submodule", "update", "--init", "--recursive"], check=True + ) # Run the tests try: - os.chdir(os.path.join(temp_dir, 'tst')) + os.chdir(os.path.join(temp_dir, "tst")) test_command = [ - 'python3', 'run_tests.py', 'regression.suite', - '--save_build', - '--make_nproc=4', - '--cmake=-DCMAKE_C_COMPILER=gcc', - '--cmake=-DCMAKE_CXX_COMPILER=g++', - '--log_file=ci_cpu_log.txt' + "python3", + "run_tests.py", + "regression.suite", + "--save_build", + "--make_nproc=4", + "--cmake=-DCMAKE_C_COMPILER=gcc", + "--cmake=-DCMAKE_CXX_COMPILER=g++", + "--log_file=ci_cpu_log.txt", ] subprocess.run(test_command, check=True) # Update the status to success - update_status(commit_sha, 'success', 'All tests passed.') + update_status(commit_sha, "success", "All tests passed.") print("Tests passed.") except subprocess.CalledProcessError: # Update the status to failure - update_status(commit_sha, 'failure', 'Tests failed.') + update_status(commit_sha, "failure", "Tests failed.") print("Tests failed.") sys.exit(1) + def main(): if len(sys.argv) != 2: print("Usage: run_ci.py [PR number]") @@ -104,9 +108,9 @@ def main(): # Fetch PR information pr_info = get_pr_info(pr_number) - head_repo = pr_info['head']['repo']['clone_url'] - head_ref = pr_info['head']['ref'] - commit_sha = pr_info['head']['sha'] + head_repo = pr_info["head"]["repo"]["clone_url"] + head_ref = pr_info["head"]["ref"] + commit_sha = pr_info["head"]["sha"] print(f"PR #{pr_number} info:") print(f"- Repository: {head_repo}") @@ -114,11 +118,11 @@ def main(): print(f"- Commit SHA: {commit_sha}") # Update status to 'pending' - update_status(commit_sha, 'pending', 'CI tests are running...') + update_status(commit_sha, "pending", "CI tests are running...") # Run the tests in a temporary directory run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha) -if __name__ == '__main__': - main() +if __name__ == "__main__": + main() From 32edddf0c3492096da1c695db5b9d7330a7b938c Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Fri, 8 Nov 2024 22:48:51 -0700 Subject: [PATCH 12/56] Only two ranks for MPI --- run_local_ci.py | 12 ++++++++++-- tst/scripts/advection/advection_mpi.py | 2 +- tst/scripts/binary/binary_mpi.py | 2 +- tst/scripts/binary_adi/binary_adi_mpi.py | 2 +- tst/scripts/collisions/collisions_mpi.py | 2 +- tst/scripts/coords/blast_mpi.py | 2 +- tst/scripts/diffusion/alpha_disk_mpi.py | 2 +- tst/scripts/diffusion/thermal_diffusion_mpi.py | 2 +- tst/scripts/diffusion/viscous_diffusion_mpi.py | 2 +- tst/scripts/disk/disk_mpi.py | 2 +- tst/scripts/disk_nbody/disk_nbody_mpi.py | 2 +- tst/scripts/drag/drag_mpi.py | 2 +- tst/scripts/hydro/linwave_mpi.py | 2 +- tst/scripts/nbody/nbody_mpi.py | 2 +- tst/scripts/ssheet/ssheet_mpi.py | 2 +- 15 files changed, 24 insertions(+), 16 deletions(-) diff --git a/run_local_ci.py b/run_local_ci.py index bec82af..d6d6f19 100755 --- a/run_local_ci.py +++ b/run_local_ci.py @@ -46,7 +46,9 @@ def get_pr_info(pr_number): return response.json() -def update_status(commit_sha, state, description, context="Continuous Integration / chicoma-gpu"): +def update_status( + commit_sha, state, description, context="Continuous Integration / chicoma-gpu" +): url = f"https://api.github.com/repos/lanl/artemis/statuses/{commit_sha}" headers = {"Authorization": f"token {GITHUB_TOKEN}"} data = {"state": state, "description": description, "context": context} @@ -75,11 +77,17 @@ def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): ["git", "submodule", "update", "--init", "--recursive"], check=True ) + # Set up environment + # Run the tests try: os.chdir(os.path.join(temp_dir, "tst")) test_command = [ - "python3", + "bash", + "-c", + "source", + "../env/bash", + "&&" "python3", "run_tests.py", "regression.suite", "--save_build", diff --git a/tst/scripts/advection/advection_mpi.py b/tst/scripts/advection/advection_mpi.py index b724605..d8c1df6 100644 --- a/tst/scripts/advection/advection_mpi.py +++ b/tst/scripts/advection/advection_mpi.py @@ -23,7 +23,7 @@ logger = logging.getLogger("artemis" + __name__[7:]) # set logger name -advection._nranks = 4 +advection._nranks = 2 advection._file_id = "advection_mpi" diff --git a/tst/scripts/binary/binary_mpi.py b/tst/scripts/binary/binary_mpi.py index e88f41c..20139c8 100644 --- a/tst/scripts/binary/binary_mpi.py +++ b/tst/scripts/binary/binary_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -binary._nranks = 16 +binary._nranks = 2 binary._file_id = "binary_mpi" diff --git a/tst/scripts/binary_adi/binary_adi_mpi.py b/tst/scripts/binary_adi/binary_adi_mpi.py index 2c7032e..defa022 100644 --- a/tst/scripts/binary_adi/binary_adi_mpi.py +++ b/tst/scripts/binary_adi/binary_adi_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -binary._nranks = 16 +binary._nranks = 2 binary._file_id = "binary_mpi" diff --git a/tst/scripts/collisions/collisions_mpi.py b/tst/scripts/collisions/collisions_mpi.py index d4d1992..9dd2ea0 100644 --- a/tst/scripts/collisions/collisions_mpi.py +++ b/tst/scripts/collisions/collisions_mpi.py @@ -21,7 +21,7 @@ logger = logging.getLogger("artemis" + __name__[7:]) # set logger name -collisions._nranks = 16 +collisions._nranks = 2 collisions._file_id = "collisions_mpi" diff --git a/tst/scripts/coords/blast_mpi.py b/tst/scripts/coords/blast_mpi.py index 0d2f5d5..5cf0e13 100644 --- a/tst/scripts/coords/blast_mpi.py +++ b/tst/scripts/coords/blast_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -blast._nranks = 8 +blast._nranks = 2 blast._file_id = "blast_mpi" diff --git a/tst/scripts/diffusion/alpha_disk_mpi.py b/tst/scripts/diffusion/alpha_disk_mpi.py index 3d899e8..3db6056 100644 --- a/tst/scripts/diffusion/alpha_disk_mpi.py +++ b/tst/scripts/diffusion/alpha_disk_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -alpha_disk._nranks = 4 +alpha_disk._nranks = 2 alpha_disk._file_id = "alpha_disk_mpi" diff --git a/tst/scripts/diffusion/thermal_diffusion_mpi.py b/tst/scripts/diffusion/thermal_diffusion_mpi.py index 93c5a30..13d6623 100644 --- a/tst/scripts/diffusion/thermal_diffusion_mpi.py +++ b/tst/scripts/diffusion/thermal_diffusion_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -thermal_diffusion._nranks = 8 +thermal_diffusion._nranks = 2 thermal_diffusion._file_id = "thermal_diffusion_mpi" diff --git a/tst/scripts/diffusion/viscous_diffusion_mpi.py b/tst/scripts/diffusion/viscous_diffusion_mpi.py index 388ad91..faa42b9 100644 --- a/tst/scripts/diffusion/viscous_diffusion_mpi.py +++ b/tst/scripts/diffusion/viscous_diffusion_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -viscous_diffusion._nranks = 4 +viscous_diffusion._nranks = 2 viscous_diffusion._nd = [2] viscous_diffusion._file_id = "viscous_diffusion_mpi" diff --git a/tst/scripts/disk/disk_mpi.py b/tst/scripts/disk/disk_mpi.py index d827254..16c5ded 100644 --- a/tst/scripts/disk/disk_mpi.py +++ b/tst/scripts/disk/disk_mpi.py @@ -23,7 +23,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -disk._nranks = 8 +disk._nranks = 2 disk._file_id = "disk_mpi" diff --git a/tst/scripts/disk_nbody/disk_nbody_mpi.py b/tst/scripts/disk_nbody/disk_nbody_mpi.py index eefd547..b4a5e04 100644 --- a/tst/scripts/disk_nbody/disk_nbody_mpi.py +++ b/tst/scripts/disk_nbody/disk_nbody_mpi.py @@ -23,7 +23,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -disk._nranks = 8 +disk._nranks = 2 disk._file_id = "disk_nbody_mpi" diff --git a/tst/scripts/drag/drag_mpi.py b/tst/scripts/drag/drag_mpi.py index 50d15e5..90061df 100644 --- a/tst/scripts/drag/drag_mpi.py +++ b/tst/scripts/drag/drag_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -drag._nranks = 4 +drag._nranks = 2 drag._file_id = "drag_mpi" diff --git a/tst/scripts/hydro/linwave_mpi.py b/tst/scripts/hydro/linwave_mpi.py index b013ec8..b2e4017 100644 --- a/tst/scripts/hydro/linwave_mpi.py +++ b/tst/scripts/hydro/linwave_mpi.py @@ -23,7 +23,7 @@ logger = logging.getLogger("artemis" + __name__[7:]) # set logger name -linwave._nranks = 4 +linwave._nranks = 2 linwave._file_id = "linear_wave_mpi" diff --git a/tst/scripts/nbody/nbody_mpi.py b/tst/scripts/nbody/nbody_mpi.py index a121cee..471c026 100644 --- a/tst/scripts/nbody/nbody_mpi.py +++ b/tst/scripts/nbody/nbody_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -nbody._nranks = 8 +nbody._nranks = 2 nbody._file_id = "nbody_mpi" diff --git a/tst/scripts/ssheet/ssheet_mpi.py b/tst/scripts/ssheet/ssheet_mpi.py index d79d278..b042a58 100644 --- a/tst/scripts/ssheet/ssheet_mpi.py +++ b/tst/scripts/ssheet/ssheet_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -ssheet._nranks = 8 +ssheet._nranks = 2 ssheet._file_id = "ssheet_mpi" From 46f18798cbdef9a8b6b26c2bb1d9b7f312d33efb Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Fri, 8 Nov 2024 22:50:50 -0700 Subject: [PATCH 13/56] Oops typo --- run_local_ci.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/run_local_ci.py b/run_local_ci.py index d6d6f19..97ded04 100755 --- a/run_local_ci.py +++ b/run_local_ci.py @@ -87,7 +87,8 @@ def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): "-c", "source", "../env/bash", - "&&" "python3", + "&&", + "python3", "run_tests.py", "regression.suite", "--save_build", From 1becc6e1aca800d8a0e1d5cfe7e4627128a50ce0 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Fri, 8 Nov 2024 23:00:13 -0700 Subject: [PATCH 14/56] Testing CPU count --- run_local_ci.py | 16 ++++------------ tst/scripts/advection/advection_mpi.py | 3 ++- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/run_local_ci.py b/run_local_ci.py index 97ded04..48c90e6 100755 --- a/run_local_ci.py +++ b/run_local_ci.py @@ -77,24 +77,16 @@ def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): ["git", "submodule", "update", "--init", "--recursive"], check=True ) - # Set up environment - # Run the tests try: os.chdir(os.path.join(temp_dir, "tst")) test_command = [ "bash", "-c", - "source", - "../env/bash", - "&&", - "python3", - "run_tests.py", - "regression.suite", - "--save_build", - "--make_nproc=4", - "--cmake=-DCMAKE_C_COMPILER=gcc", - "--cmake=-DCMAKE_CXX_COMPILER=g++", + "source ../env/bash && python3 run_tests.py regression.suite " + "--save_build --make_nproc=4 " + "--cmake=-DCMAKE_C_COMPILER=gcc " + "--cmake=-DCMAKE_CXX_COMPILER=g++ " "--log_file=ci_cpu_log.txt", ] subprocess.run(test_command, check=True) diff --git a/tst/scripts/advection/advection_mpi.py b/tst/scripts/advection/advection_mpi.py index d8c1df6..ab9f5e8 100644 --- a/tst/scripts/advection/advection_mpi.py +++ b/tst/scripts/advection/advection_mpi.py @@ -18,12 +18,13 @@ # Modules import logging import numpy as np +import os import scripts.utils.artemis as artemis import scripts.advection.advection as advection logger = logging.getLogger("artemis" + __name__[7:]) # set logger name -advection._nranks = 2 +advection._nranks = min(os.cpu_count(), 4) advection._file_id = "advection_mpi" From 9e661a0aedb1562ecaeb5946db8080721741e32c Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Sat, 9 Nov 2024 00:07:31 -0700 Subject: [PATCH 15/56] binary test takes too long on two slow ranks -- leave for GPU --- tst/scripts/advection/advection_mpi.py | 2 +- tst/scripts/binary/binary_mpi.py | 2 +- tst/scripts/binary_adi/binary_adi_mpi.py | 2 +- tst/scripts/collisions/collisions_mpi.py | 2 +- tst/scripts/coords/blast_mpi.py | 2 +- tst/scripts/diffusion/alpha_disk_mpi.py | 2 +- tst/scripts/diffusion/thermal_diffusion_mpi.py | 2 +- tst/scripts/diffusion/viscous_diffusion_mpi.py | 2 +- tst/scripts/disk/disk_mpi.py | 2 +- tst/scripts/disk_nbody/disk_nbody_mpi.py | 2 +- tst/scripts/drag/drag_mpi.py | 2 +- tst/scripts/hydro/linwave_mpi.py | 2 +- tst/scripts/nbody/nbody_mpi.py | 2 +- tst/scripts/ssheet/ssheet_mpi.py | 2 +- tst/suites/parallel.suite | 4 ++-- 15 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tst/scripts/advection/advection_mpi.py b/tst/scripts/advection/advection_mpi.py index ab9f5e8..9ed23e3 100644 --- a/tst/scripts/advection/advection_mpi.py +++ b/tst/scripts/advection/advection_mpi.py @@ -24,7 +24,7 @@ logger = logging.getLogger("artemis" + __name__[7:]) # set logger name -advection._nranks = min(os.cpu_count(), 4) +advection._nranks = min(max(2, os.cpu_count()), 4) advection._file_id = "advection_mpi" diff --git a/tst/scripts/binary/binary_mpi.py b/tst/scripts/binary/binary_mpi.py index 20139c8..5707807 100644 --- a/tst/scripts/binary/binary_mpi.py +++ b/tst/scripts/binary/binary_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -binary._nranks = 2 +binary._nranks = min(max(2, os.cpu_count()), 16) binary._file_id = "binary_mpi" diff --git a/tst/scripts/binary_adi/binary_adi_mpi.py b/tst/scripts/binary_adi/binary_adi_mpi.py index defa022..58881c5 100644 --- a/tst/scripts/binary_adi/binary_adi_mpi.py +++ b/tst/scripts/binary_adi/binary_adi_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -binary._nranks = 2 +binary._nranks = min(max(2, os.cpu_count()), 16) binary._file_id = "binary_mpi" diff --git a/tst/scripts/collisions/collisions_mpi.py b/tst/scripts/collisions/collisions_mpi.py index 9dd2ea0..3a99d89 100644 --- a/tst/scripts/collisions/collisions_mpi.py +++ b/tst/scripts/collisions/collisions_mpi.py @@ -21,7 +21,7 @@ logger = logging.getLogger("artemis" + __name__[7:]) # set logger name -collisions._nranks = 2 +collisions._nranks = min(max(2, os.cpu_count()), 16) collisions._file_id = "collisions_mpi" diff --git a/tst/scripts/coords/blast_mpi.py b/tst/scripts/coords/blast_mpi.py index 5cf0e13..aa7c49a 100644 --- a/tst/scripts/coords/blast_mpi.py +++ b/tst/scripts/coords/blast_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -blast._nranks = 2 +blast._nranks = min(max(2, os.cpu_count()), 8) blast._file_id = "blast_mpi" diff --git a/tst/scripts/diffusion/alpha_disk_mpi.py b/tst/scripts/diffusion/alpha_disk_mpi.py index 3db6056..2ae5d28 100644 --- a/tst/scripts/diffusion/alpha_disk_mpi.py +++ b/tst/scripts/diffusion/alpha_disk_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -alpha_disk._nranks = 2 +alpha_disk._nranks = min(max(2, os.cpu_count()), 4) alpha_disk._file_id = "alpha_disk_mpi" diff --git a/tst/scripts/diffusion/thermal_diffusion_mpi.py b/tst/scripts/diffusion/thermal_diffusion_mpi.py index 13d6623..29a440b 100644 --- a/tst/scripts/diffusion/thermal_diffusion_mpi.py +++ b/tst/scripts/diffusion/thermal_diffusion_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -thermal_diffusion._nranks = 2 +thermal_diffusion._nranks = min(max(2, os.cpu_count()), 8) thermal_diffusion._file_id = "thermal_diffusion_mpi" diff --git a/tst/scripts/diffusion/viscous_diffusion_mpi.py b/tst/scripts/diffusion/viscous_diffusion_mpi.py index faa42b9..d08035a 100644 --- a/tst/scripts/diffusion/viscous_diffusion_mpi.py +++ b/tst/scripts/diffusion/viscous_diffusion_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -viscous_diffusion._nranks = 2 +viscous_diffusion._nranks = min(max(2, os.cpu_count()), 4) viscous_diffusion._nd = [2] viscous_diffusion._file_id = "viscous_diffusion_mpi" diff --git a/tst/scripts/disk/disk_mpi.py b/tst/scripts/disk/disk_mpi.py index 16c5ded..8ad7295 100644 --- a/tst/scripts/disk/disk_mpi.py +++ b/tst/scripts/disk/disk_mpi.py @@ -23,7 +23,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -disk._nranks = 2 +disk._nranks = min(max(2, os.cpu_count()), 8) disk._file_id = "disk_mpi" diff --git a/tst/scripts/disk_nbody/disk_nbody_mpi.py b/tst/scripts/disk_nbody/disk_nbody_mpi.py index b4a5e04..fee68ba 100644 --- a/tst/scripts/disk_nbody/disk_nbody_mpi.py +++ b/tst/scripts/disk_nbody/disk_nbody_mpi.py @@ -23,7 +23,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -disk._nranks = 2 +disk._nranks = min(max(2, os.cpu_count()), 8) disk._file_id = "disk_nbody_mpi" diff --git a/tst/scripts/drag/drag_mpi.py b/tst/scripts/drag/drag_mpi.py index 90061df..27ba082 100644 --- a/tst/scripts/drag/drag_mpi.py +++ b/tst/scripts/drag/drag_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -drag._nranks = 2 +drag._nranks = min(max(2, os.cpu_count()), 4) drag._file_id = "drag_mpi" diff --git a/tst/scripts/hydro/linwave_mpi.py b/tst/scripts/hydro/linwave_mpi.py index b2e4017..c18191f 100644 --- a/tst/scripts/hydro/linwave_mpi.py +++ b/tst/scripts/hydro/linwave_mpi.py @@ -23,7 +23,7 @@ logger = logging.getLogger("artemis" + __name__[7:]) # set logger name -linwave._nranks = 2 +linwave._nranks = min(max(2, os.cpu_count()), 4) linwave._file_id = "linear_wave_mpi" diff --git a/tst/scripts/nbody/nbody_mpi.py b/tst/scripts/nbody/nbody_mpi.py index 471c026..efb6cbf 100644 --- a/tst/scripts/nbody/nbody_mpi.py +++ b/tst/scripts/nbody/nbody_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -nbody._nranks = 2 +nbody._nranks = min(max(2, os.cpu_count()), 8) nbody._file_id = "nbody_mpi" diff --git a/tst/scripts/ssheet/ssheet_mpi.py b/tst/scripts/ssheet/ssheet_mpi.py index b042a58..2708a45 100644 --- a/tst/scripts/ssheet/ssheet_mpi.py +++ b/tst/scripts/ssheet/ssheet_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -ssheet._nranks = 2 +ssheet._nranks = min(max(2, os.cpu_count()), 8) ssheet._file_id = "ssheet_mpi" diff --git a/tst/suites/parallel.suite b/tst/suites/parallel.suite index d755c83..777de21 100644 --- a/tst/suites/parallel.suite +++ b/tst/suites/parallel.suite @@ -14,8 +14,8 @@ # parallel suite advection/advection_mpi -binary/binary_mpi -binary_adi/binary_adi_mpi +#binary/binary_mpi +#binary_adi/binary_adi_mpi coords/blast_mpi disk/disk_mpi nbody/nbody_mpi From b9b610b225fa6b8a04801bb1b7bb125c2fbbe3e4 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Sat, 9 Nov 2024 00:10:51 -0700 Subject: [PATCH 16/56] Oops missing os --- tst/scripts/collisions/collisions_mpi.py | 1 + tst/scripts/disk/disk_mpi.py | 1 + tst/scripts/disk_nbody/disk_nbody_mpi.py | 1 + tst/scripts/hydro/linwave_mpi.py | 1 + tst/suites/parallel.suite | 2 -- 5 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tst/scripts/collisions/collisions_mpi.py b/tst/scripts/collisions/collisions_mpi.py index 3a99d89..1812019 100644 --- a/tst/scripts/collisions/collisions_mpi.py +++ b/tst/scripts/collisions/collisions_mpi.py @@ -16,6 +16,7 @@ # Modules import logging import numpy as np +import os import scripts.utils.artemis as artemis import scripts.collisions.collisions as collisions diff --git a/tst/scripts/disk/disk_mpi.py b/tst/scripts/disk/disk_mpi.py index 8ad7295..989ce88 100644 --- a/tst/scripts/disk/disk_mpi.py +++ b/tst/scripts/disk/disk_mpi.py @@ -16,6 +16,7 @@ # Modules import logging import numpy as np +import os import scripts.utils.artemis as artemis import scripts.disk.disk as disk diff --git a/tst/scripts/disk_nbody/disk_nbody_mpi.py b/tst/scripts/disk_nbody/disk_nbody_mpi.py index fee68ba..e675495 100644 --- a/tst/scripts/disk_nbody/disk_nbody_mpi.py +++ b/tst/scripts/disk_nbody/disk_nbody_mpi.py @@ -16,6 +16,7 @@ # Modules import logging import numpy as np +import os import scripts.utils.artemis as artemis import scripts.disk_nbody.disk_nbody as disk diff --git a/tst/scripts/hydro/linwave_mpi.py b/tst/scripts/hydro/linwave_mpi.py index c18191f..cfe0be6 100644 --- a/tst/scripts/hydro/linwave_mpi.py +++ b/tst/scripts/hydro/linwave_mpi.py @@ -18,6 +18,7 @@ # Modules import logging import numpy as np +import os import scripts.utils.artemis as artemis import scripts.hydro.linwave as linwave diff --git a/tst/suites/parallel.suite b/tst/suites/parallel.suite index 777de21..f2356db 100644 --- a/tst/suites/parallel.suite +++ b/tst/suites/parallel.suite @@ -14,8 +14,6 @@ # parallel suite advection/advection_mpi -#binary/binary_mpi -#binary_adi/binary_adi_mpi coords/blast_mpi disk/disk_mpi nbody/nbody_mpi From 5c24d952a48e17e5e6aa2fd25baf9b0fae13d1f1 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Sat, 9 Nov 2024 00:15:56 -0700 Subject: [PATCH 17/56] Update names --- .github/workflows/ci.yml | 52 ++--------------------------------- .github/workflows/nightly.yml | 42 +--------------------------- 2 files changed, 3 insertions(+), 91 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 85a2702..dd4ea18 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ on: types: [opened, synchronize, reopened] jobs: - ci_format_job: + format: if: > ${{ !contains(github.event.pull_request.title, 'Draft:') && !contains(github.event.pull_request.title, 'WIP:') }} @@ -42,7 +42,7 @@ jobs: VERBOSE=1 ./style/format.sh git diff --exit-code --ignore-submodules - ci_cpu_job: + cpu: if: > ${{ !contains(github.event.pull_request.title, 'Draft:') && !contains(github.event.pull_request.title, 'WIP:') }} @@ -86,51 +86,3 @@ jobs: name: figs path: tst/figs retention-days: 3 - -# ci_gpu_job: -# if: > -# ${{ !contains(github.event.pull_request.title, 'Draft:') && -# !contains(github.event.pull_request.title, 'WIP:') }} -# runs-on: ubuntu-latest # Update to a runner with GPU support if available -# steps: -# - uses: actions/checkout@v3 -# with: -# submodules: recursive -# - name: Install dependencies -# run: | -# sudo apt-get update -qq -# sudo apt-get install -qq --no-install-recommends tzdata -# sudo apt-get install -qq git -# sudo apt-get install -qq make cmake g++ -# sudo apt-get install -qq libopenmpi-dev libhdf5-openmpi-dev -# sudo apt-get install -qq openssh-client -# sudo apt-get install -qq python3 python3-numpy python3-h5py python3-matplotlib -# sudo apt-get install -qq -y cuda -# sudo apt-get install -qq -y nvidia-driver-525 -# - name: Set up environment -# run: | -# source env/bash -# - name: Run GPU tests -# run: | -# cd tst -# python3 run_tests.py gpu.suite \ -# --save_build \ -# --make_nproc=32 \ -# --cmake=-DARTEMIS_ENABLE_CUDA=On \ -# --cmake=-DKokkos_ARCH_VOLTA70=On \ -# --cmake=-DCMAKE_CXX_COMPILER=$GITHUB_WORKSPACE/external/parthenon/external/Kokkos/bin/nvcc_wrapper \ -# --log_file=ci_gpu_log.txt -# - name: Upload GPU test log -# if: always() -# uses: actions/upload-artifact@v3 -# with: -# name: ci_gpu_log.txt -# path: tst/ci_gpu_log.txt -# retention-days: 3 -# - name: Upload figures -# if: always() -# uses: actions/upload-artifact@v3 -# with: -# name: figs -# path: tst/figs -# retention-days: 3 diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index ebeec0b..57aee74 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -20,7 +20,7 @@ on: - cron: '0 0 * * *' # Runs daily at midnight; adjust as needed jobs: - ci_nightly_cpu_job: + cpu: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 @@ -66,43 +66,3 @@ jobs: name: figs path: tst/figs retention-days: 3 - -# ci_nightly_gpu_job: -# runs-on: ubuntu-latest # Update to a runner with GPU support if available -# steps: -# - uses: actions/checkout@v3 -# with: -# submodules: recursive -# - name: Update Parthenon submodule -# run: | -# cd external/parthenon -# git pull origin develop -# echo "==> Current Parthenon commit hash:" -# git rev-parse HEAD -# - name: Set up environment -# run: | -# source env/bash -# - name: Run GPU tests -# run: | -# cd tst -# python3 run_tests.py gpu.suite \ -# --save_build \ -# --make_nproc=32 \ -# --cmake=-DARTEMIS_ENABLE_CUDA=On \ -# --cmake=-DKokkos_ARCH_VOLTA70=On \ -# --cmake=-DCMAKE_CXX_COMPILER=external/parthenon/external/Kokkos/bin/nvcc_wrapper \ -# --log_file=ci_gpu_log.txt -# - name: Upload GPU test log -# if: always() -# uses: actions/upload-artifact@v3 -# with: -# name: ci_gpu_log.txt -# path: tst/ci_gpu_log.txt -# retention-days: 3 -# - name: Upload figures -# if: always() -# uses: actions/upload-artifact@v3 -# with: -# name: figs -# path: tst/figs -# retention-days: 3 From fa64e711fdd5dcea79d7560f0e3dabfaf5a5491d Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Sat, 9 Nov 2024 00:19:14 -0700 Subject: [PATCH 18/56] Add slow version of parallel suite --- tst/suites/parallel_slow.suite | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 tst/suites/parallel_slow.suite diff --git a/tst/suites/parallel_slow.suite b/tst/suites/parallel_slow.suite new file mode 100644 index 0000000..ea9ea11 --- /dev/null +++ b/tst/suites/parallel_slow.suite @@ -0,0 +1,27 @@ +# ======================================================================================== +# (C) (or copyright) 2023-2024. Triad National Security, LLC. All rights reserved. +# +# This program was produced under U.S. Government contract 89233218CNA000001 for Los +# Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC +# for the U.S. Department of Energy/National Nuclear Security Administration. All rights +# in the program are reserved by Triad National Security, LLC, and the U.S. Department +# of Energy/National Nuclear Security Administration. The Government is granted for +# itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide +# license in this material to reproduce, prepare derivative works, distribute copies to +# the public, perform publicly and display publicly, and to permit others to do so. +# ======================================================================================== + +# parallel suite + +advection/advection_mpi +binary/binary_mpi +binary_aid/binary_adi_mpi +coords/blast_mpi +disk/disk_mpi +nbody/nbody_mpi +hydro/linwave_mpi +ssheet/ssheet_mpi +diffusion/viscous_diffusion_mpi +diffusion/alpha_disk_mpi +diffusion/thermal_diffusion_mpi +drag/drag_mpi From 2f31a3daaefef0482ff48dacafd2860826b464ba Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Sat, 9 Nov 2024 07:58:17 -0700 Subject: [PATCH 19/56] debug weird issue with cpu count --- tst/scripts/ssheet/ssheet_mpi.py | 1 + tst/suites/parallel.suite | 18 +++++++++--------- tst/suites/regression.suite | 2 +- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/tst/scripts/ssheet/ssheet_mpi.py b/tst/scripts/ssheet/ssheet_mpi.py index 2708a45..b9c14be 100644 --- a/tst/scripts/ssheet/ssheet_mpi.py +++ b/tst/scripts/ssheet/ssheet_mpi.py @@ -25,6 +25,7 @@ logging.getLogger("matplotlib").setLevel(logging.WARNING) ssheet._nranks = min(max(2, os.cpu_count()), 8) +print('ranks: {ssheet._nranks}') ssheet._file_id = "ssheet_mpi" diff --git a/tst/suites/parallel.suite b/tst/suites/parallel.suite index f2356db..cda6c7e 100644 --- a/tst/suites/parallel.suite +++ b/tst/suites/parallel.suite @@ -13,13 +13,13 @@ # parallel suite -advection/advection_mpi -coords/blast_mpi -disk/disk_mpi -nbody/nbody_mpi -hydro/linwave_mpi +#advection/advection_mpi +#coords/blast_mpi +#disk/disk_mpi +#nbody/nbody_mpi +#hydro/linwave_mpi ssheet/ssheet_mpi -diffusion/viscous_diffusion_mpi -diffusion/alpha_disk_mpi -diffusion/thermal_diffusion_mpi -drag/drag_mpi +#diffusion/viscous_diffusion_mpi +#diffusion/alpha_disk_mpi +#diffusion/thermal_diffusion_mpi +#drag/drag_mpi diff --git a/tst/suites/regression.suite b/tst/suites/regression.suite index 84b9b0c..6d84656 100644 --- a/tst/suites/regression.suite +++ b/tst/suites/regression.suite @@ -13,5 +13,5 @@ # regression suite -serial.suite +#serial.suite parallel.suite From cbe5152e12ca621f9874fe193080dfc410722623 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Sat, 9 Nov 2024 08:33:25 -0700 Subject: [PATCH 20/56] typo in debug prints --- tst/scripts/ssheet/ssheet_mpi.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tst/scripts/ssheet/ssheet_mpi.py b/tst/scripts/ssheet/ssheet_mpi.py index b9c14be..046c844 100644 --- a/tst/scripts/ssheet/ssheet_mpi.py +++ b/tst/scripts/ssheet/ssheet_mpi.py @@ -25,7 +25,8 @@ logging.getLogger("matplotlib").setLevel(logging.WARNING) ssheet._nranks = min(max(2, os.cpu_count()), 8) -print('ranks: {ssheet._nranks}') +print(f'os cpu count: {os.cpu_count()}') +print(f'ranks: {ssheet._nranks}') ssheet._file_id = "ssheet_mpi" From ddcc735912c75d69a2e17b1d61ab9da97d3e30b0 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Sat, 9 Nov 2024 10:23:11 -0700 Subject: [PATCH 21/56] New attempt at counting mpi slots --- tst/scripts/ssheet/ssheet_mpi.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tst/scripts/ssheet/ssheet_mpi.py b/tst/scripts/ssheet/ssheet_mpi.py index 046c844..8bae7de 100644 --- a/tst/scripts/ssheet/ssheet_mpi.py +++ b/tst/scripts/ssheet/ssheet_mpi.py @@ -24,7 +24,21 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -ssheet._nranks = min(max(2, os.cpu_count()), 8) +def get_mpi_slots(): + try: + result = subprocess.run(['lscpu'], stdout=subprocess.PIPE, text=True, check=True) + sockets = 0 + for line in result.stdout.split('\n'): + if 'Socket(s):' in line: + sockets = int(line.split(':')[1].strip()) + return sockets * 2 # Assuming 2 ranks per socket + except Exception as e: + print(f"Error getting MPI slot info: {e}") + return 2 # Default to 2 slots if detection fails + +#ssheet._nranks = min(max(2, os.cpu_count()), 8) +ssheet._nranks = min(max(2, get_mpi_slots()), 8) +print(f'slots: {get_mpi_slots()}') print(f'os cpu count: {os.cpu_count()}') print(f'ranks: {ssheet._nranks}') ssheet._file_id = "ssheet_mpi" From c03669572d155a65e7abd9e3ba7b142f6eb6e2bf Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Sat, 9 Nov 2024 10:25:23 -0700 Subject: [PATCH 22/56] Trying again with mpi oversubscribe --- tst/scripts/ssheet/ssheet_mpi.py | 4 +++- tst/scripts/utils/artemis.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tst/scripts/ssheet/ssheet_mpi.py b/tst/scripts/ssheet/ssheet_mpi.py index 8bae7de..a1cfd60 100644 --- a/tst/scripts/ssheet/ssheet_mpi.py +++ b/tst/scripts/ssheet/ssheet_mpi.py @@ -29,6 +29,7 @@ def get_mpi_slots(): result = subprocess.run(['lscpu'], stdout=subprocess.PIPE, text=True, check=True) sockets = 0 for line in result.stdout.split('\n'): + print(line) if 'Socket(s):' in line: sockets = int(line.split(':')[1].strip()) return sockets * 2 # Assuming 2 ranks per socket @@ -37,7 +38,8 @@ def get_mpi_slots(): return 2 # Default to 2 slots if detection fails #ssheet._nranks = min(max(2, os.cpu_count()), 8) -ssheet._nranks = min(max(2, get_mpi_slots()), 8) +#ssheet._nranks = min(max(2, get_mpi_slots()), 8) +ssheet._nranks = 8 print(f'slots: {get_mpi_slots()}') print(f'os cpu count: {os.cpu_count()}') print(f'ranks: {ssheet._nranks}') diff --git a/tst/scripts/utils/artemis.py b/tst/scripts/utils/artemis.py index 4e00313..de140fa 100644 --- a/tst/scripts/utils/artemis.py +++ b/tst/scripts/utils/artemis.py @@ -66,7 +66,7 @@ def run(nproc, input_filename, arguments, restart=None): os.chdir(exe_dir) try: input_filename_full = "../../" + artemis_rel_path + "inputs/" + input_filename - run_command = ["mpiexec", "-n", str(nproc), "./artemis"] + run_command = ["mpiexec", "--oversubscribe", "-n", str(nproc), "./artemis"] if restart is not None: run_command += ["-r", restart] run_command += ["-i", input_filename_full] From 16c4c8c26ae4d52298cdadf185bc6c74a0368ef0 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Sat, 9 Nov 2024 10:51:00 -0700 Subject: [PATCH 23/56] OK lets just use oversubscribe --- tst/scripts/advection/advection_mpi.py | 2 +- tst/scripts/binary/binary_mpi.py | 2 +- tst/scripts/binary_adi/binary_adi_mpi.py | 2 +- tst/scripts/collisions/collisions_mpi.py | 2 +- tst/scripts/coords/blast_mpi.py | 2 +- tst/scripts/diffusion/alpha_disk_mpi.py | 2 +- tst/scripts/diffusion/thermal_diffusion_mpi.py | 2 +- tst/scripts/diffusion/viscous_diffusion_mpi.py | 2 +- tst/scripts/disk_nbody/disk_nbody_mpi.py | 2 +- tst/scripts/drag/drag_mpi.py | 2 +- tst/scripts/hydro/linwave_mpi.py | 2 +- tst/scripts/ssheet/ssheet_mpi.py | 18 ------------------ tst/suites/parallel.suite | 18 +++++++++--------- 13 files changed, 20 insertions(+), 38 deletions(-) diff --git a/tst/scripts/advection/advection_mpi.py b/tst/scripts/advection/advection_mpi.py index 9ed23e3..0e66c17 100644 --- a/tst/scripts/advection/advection_mpi.py +++ b/tst/scripts/advection/advection_mpi.py @@ -24,7 +24,7 @@ logger = logging.getLogger("artemis" + __name__[7:]) # set logger name -advection._nranks = min(max(2, os.cpu_count()), 4) +advection._nranks = 4 advection._file_id = "advection_mpi" diff --git a/tst/scripts/binary/binary_mpi.py b/tst/scripts/binary/binary_mpi.py index 5707807..e88f41c 100644 --- a/tst/scripts/binary/binary_mpi.py +++ b/tst/scripts/binary/binary_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -binary._nranks = min(max(2, os.cpu_count()), 16) +binary._nranks = 16 binary._file_id = "binary_mpi" diff --git a/tst/scripts/binary_adi/binary_adi_mpi.py b/tst/scripts/binary_adi/binary_adi_mpi.py index 58881c5..2c7032e 100644 --- a/tst/scripts/binary_adi/binary_adi_mpi.py +++ b/tst/scripts/binary_adi/binary_adi_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -binary._nranks = min(max(2, os.cpu_count()), 16) +binary._nranks = 16 binary._file_id = "binary_mpi" diff --git a/tst/scripts/collisions/collisions_mpi.py b/tst/scripts/collisions/collisions_mpi.py index 1812019..608dee6 100644 --- a/tst/scripts/collisions/collisions_mpi.py +++ b/tst/scripts/collisions/collisions_mpi.py @@ -22,7 +22,7 @@ logger = logging.getLogger("artemis" + __name__[7:]) # set logger name -collisions._nranks = min(max(2, os.cpu_count()), 16) +collisions._nranks = 16 collisions._file_id = "collisions_mpi" diff --git a/tst/scripts/coords/blast_mpi.py b/tst/scripts/coords/blast_mpi.py index aa7c49a..0d2f5d5 100644 --- a/tst/scripts/coords/blast_mpi.py +++ b/tst/scripts/coords/blast_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -blast._nranks = min(max(2, os.cpu_count()), 8) +blast._nranks = 8 blast._file_id = "blast_mpi" diff --git a/tst/scripts/diffusion/alpha_disk_mpi.py b/tst/scripts/diffusion/alpha_disk_mpi.py index 2ae5d28..3d899e8 100644 --- a/tst/scripts/diffusion/alpha_disk_mpi.py +++ b/tst/scripts/diffusion/alpha_disk_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -alpha_disk._nranks = min(max(2, os.cpu_count()), 4) +alpha_disk._nranks = 4 alpha_disk._file_id = "alpha_disk_mpi" diff --git a/tst/scripts/diffusion/thermal_diffusion_mpi.py b/tst/scripts/diffusion/thermal_diffusion_mpi.py index 29a440b..93c5a30 100644 --- a/tst/scripts/diffusion/thermal_diffusion_mpi.py +++ b/tst/scripts/diffusion/thermal_diffusion_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -thermal_diffusion._nranks = min(max(2, os.cpu_count()), 8) +thermal_diffusion._nranks = 8 thermal_diffusion._file_id = "thermal_diffusion_mpi" diff --git a/tst/scripts/diffusion/viscous_diffusion_mpi.py b/tst/scripts/diffusion/viscous_diffusion_mpi.py index d08035a..388ad91 100644 --- a/tst/scripts/diffusion/viscous_diffusion_mpi.py +++ b/tst/scripts/diffusion/viscous_diffusion_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -viscous_diffusion._nranks = min(max(2, os.cpu_count()), 4) +viscous_diffusion._nranks = 4 viscous_diffusion._nd = [2] viscous_diffusion._file_id = "viscous_diffusion_mpi" diff --git a/tst/scripts/disk_nbody/disk_nbody_mpi.py b/tst/scripts/disk_nbody/disk_nbody_mpi.py index e675495..0ab31f5 100644 --- a/tst/scripts/disk_nbody/disk_nbody_mpi.py +++ b/tst/scripts/disk_nbody/disk_nbody_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -disk._nranks = min(max(2, os.cpu_count()), 8) +disk._nranks = 8 disk._file_id = "disk_nbody_mpi" diff --git a/tst/scripts/drag/drag_mpi.py b/tst/scripts/drag/drag_mpi.py index 27ba082..50d15e5 100644 --- a/tst/scripts/drag/drag_mpi.py +++ b/tst/scripts/drag/drag_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -drag._nranks = min(max(2, os.cpu_count()), 4) +drag._nranks = 4 drag._file_id = "drag_mpi" diff --git a/tst/scripts/hydro/linwave_mpi.py b/tst/scripts/hydro/linwave_mpi.py index cfe0be6..4ac361b 100644 --- a/tst/scripts/hydro/linwave_mpi.py +++ b/tst/scripts/hydro/linwave_mpi.py @@ -24,7 +24,7 @@ logger = logging.getLogger("artemis" + __name__[7:]) # set logger name -linwave._nranks = min(max(2, os.cpu_count()), 4) +linwave._nranks = 4 linwave._file_id = "linear_wave_mpi" diff --git a/tst/scripts/ssheet/ssheet_mpi.py b/tst/scripts/ssheet/ssheet_mpi.py index a1cfd60..d79d278 100644 --- a/tst/scripts/ssheet/ssheet_mpi.py +++ b/tst/scripts/ssheet/ssheet_mpi.py @@ -24,25 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -def get_mpi_slots(): - try: - result = subprocess.run(['lscpu'], stdout=subprocess.PIPE, text=True, check=True) - sockets = 0 - for line in result.stdout.split('\n'): - print(line) - if 'Socket(s):' in line: - sockets = int(line.split(':')[1].strip()) - return sockets * 2 # Assuming 2 ranks per socket - except Exception as e: - print(f"Error getting MPI slot info: {e}") - return 2 # Default to 2 slots if detection fails - -#ssheet._nranks = min(max(2, os.cpu_count()), 8) -#ssheet._nranks = min(max(2, get_mpi_slots()), 8) ssheet._nranks = 8 -print(f'slots: {get_mpi_slots()}') -print(f'os cpu count: {os.cpu_count()}') -print(f'ranks: {ssheet._nranks}') ssheet._file_id = "ssheet_mpi" diff --git a/tst/suites/parallel.suite b/tst/suites/parallel.suite index cda6c7e..f2356db 100644 --- a/tst/suites/parallel.suite +++ b/tst/suites/parallel.suite @@ -13,13 +13,13 @@ # parallel suite -#advection/advection_mpi -#coords/blast_mpi -#disk/disk_mpi -#nbody/nbody_mpi -#hydro/linwave_mpi +advection/advection_mpi +coords/blast_mpi +disk/disk_mpi +nbody/nbody_mpi +hydro/linwave_mpi ssheet/ssheet_mpi -#diffusion/viscous_diffusion_mpi -#diffusion/alpha_disk_mpi -#diffusion/thermal_diffusion_mpi -#drag/drag_mpi +diffusion/viscous_diffusion_mpi +diffusion/alpha_disk_mpi +diffusion/thermal_diffusion_mpi +drag/drag_mpi From c922286eaab25bc7660a64f3953b47c126637bbb Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Sat, 9 Nov 2024 13:26:51 -0700 Subject: [PATCH 24/56] Change how run dir is created --- tst/run_tests.py | 31 ++++++++++++- tst/scripts/advection/advection.py | 11 ++++- tst/scripts/binary/binary.py | 2 +- tst/scripts/binary_adi/binary_adi.py | 2 +- tst/scripts/collisions/collisions.py | 5 +- tst/scripts/coords/blast.py | 5 +- tst/scripts/diffusion/alpha_disk.py | 2 +- tst/scripts/diffusion/thermal_diffusion.py | 2 +- tst/scripts/diffusion/viscous_diffusion.py | 2 +- tst/scripts/disk/disk.py | 22 +++++---- tst/scripts/disk_nbody/disk_nbody.py | 16 +++++-- tst/scripts/drag/drag.py | 7 +-- tst/scripts/hydro/linwave.py | 10 +++- tst/scripts/nbody/nbody.py | 2 +- tst/scripts/nbody/nbody_mpi.py | 2 +- tst/scripts/ssheet/ssheet.py | 2 +- tst/scripts/utils/artemis.py | 53 ++++++++++++++-------- 17 files changed, 126 insertions(+), 50 deletions(-) mode change 100644 => 100755 tst/run_tests.py diff --git a/tst/run_tests.py b/tst/run_tests.py old mode 100644 new mode 100755 index 0a486cb..d0081d3 --- a/tst/run_tests.py +++ b/tst/run_tests.py @@ -12,6 +12,8 @@ # the public, perform publicly and display publicly, and to permit others to do so. # ======================================================================================== +# This file was created in part by one of OpenAI's generative AI models + # Regression test script for Artemis. # Usage: From this directory, call this script with python: @@ -112,6 +114,10 @@ def main(**kwargs): test_times = [] test_results = [] test_errors = [] + + # Extract arguments + artemis_exe_path = kwargs.pop("exe") + try: # Check that required modules are installed for all test dependencies deps_installed = True @@ -130,8 +136,21 @@ def main(**kwargs): deps_installed = False if not deps_installed: logger.warning("WARNING! Not all required Python modules " "are available") + + # Set the executable path if provided + if artemis_exe_path is not None: + artemis.artemis_executable = os.path.abspath(artemis_exe_path) + # Check that path is valid + if not ( + os.path.exists(artemis.artemis_executable) + and os.access(artemis.artemis_executable, os.X_OK) + ): + logger.error("Exception occurred", exc_info=True) + test_errors.append("make()") + raise TestError('Provided executable "{artemis_exe_path}" not found!') + # Build Artemis - if not kwargs.pop("reuse_build"): + if artemis_exe_path is None and not kwargs.pop("reuse_build"): try: os.system("rm -rf {0}/build".format(current_dir)) # insert arguments for artemis.make() @@ -142,6 +161,7 @@ def main(**kwargs): logger.error("Exception occurred", exc_info=True) test_errors.append("make()") raise TestError("Unable to build Artemis") + # Run each test for name in test_names: t0 = timer() @@ -178,7 +198,7 @@ def main(**kwargs): # For CI, print after every individual test has finished logger.info("{} test: run(), analyze() finished".format(name)) finally: - if not kwargs.pop("save_build"): + if not kwargs.pop("save_build") and artemis_exe_path is None: os.system("rm -rf {0}/build".format(current_dir)) # Report test results @@ -275,6 +295,13 @@ def log_init(args): help="do not recompile the code and reuse the build directory.", ) + parser.add_argument( + "--exe", + type=str, + default=None, + help="path to pre-built executable", + ) + args = parser.parse_args() log_init(args) diff --git a/tst/scripts/advection/advection.py b/tst/scripts/advection/advection.py index 4bb3d6c..2d6436a 100644 --- a/tst/scripts/advection/advection.py +++ b/tst/scripts/advection/advection.py @@ -18,6 +18,7 @@ # Modules import logging import numpy as np +import os import scripts.utils.artemis as artemis logger = logging.getLogger("artemis" + __name__[7:]) # set logger name @@ -71,8 +72,14 @@ def analyze(): # error convergence rates, and error identicality between L- and R-going # advection. logger.debug("Analyzing test " + __name__) - data = np.loadtxt("build/src/" + _file_id + "-errs.dat", dtype=np.float64, ndmin=2) - history = np.loadtxt("build/src/" + _file_id + ".out0.hst") + data = np.loadtxt( + os.path.join(artemis.get_run_directory(), _file_id + "-errs.dat"), + dtype=np.float64, + ndmin=2, + ) + history = np.loadtxt( + os.path.join(artemis.get_run_directory(), _file_id + ".out0.hst") + ) analyze_status = True if np.isnan(data).any() or np.isnan(history).any(): logger.warning("NaN encountered") diff --git a/tst/scripts/binary/binary.py b/tst/scripts/binary/binary.py index ef5852e..335c3d2 100644 --- a/tst/scripts/binary/binary.py +++ b/tst/scripts/binary/binary.py @@ -53,7 +53,7 @@ def analyze(): analyze_status = True time, r, phi, z, [d, u, v, w, T] = load_level( - "final", base="{}.out1".format(_file_id), dir="build/src" + "final", base="{}.out1".format(_file_id), dir=artemis.get_run_directory() ) rc = 0.5 * (r[1:] + r[:-1]) pc = 0.5 * (phi[1:] + phi[:-1]) diff --git a/tst/scripts/binary_adi/binary_adi.py b/tst/scripts/binary_adi/binary_adi.py index 31b0ba9..8fb80c1 100644 --- a/tst/scripts/binary_adi/binary_adi.py +++ b/tst/scripts/binary_adi/binary_adi.py @@ -69,7 +69,7 @@ def analyze(): for dv in _de_switch: problem_id = _file_id + "_{}_de{:d}_{}".format(fv, int(10 * dv), cv) time, r, phi, z, [d, u, v, w, T] = load_level( - "final", dir="build/src", base=problem_id + ".out1" + "final", dir=artemis.get_run_directory(), base=problem_id + ".out1" ) rc = 0.5 * (r[1:] + r[:-1]) pc = 0.5 * (phi[1:] + phi[:-1]) diff --git a/tst/scripts/collisions/collisions.py b/tst/scripts/collisions/collisions.py index 9e273a9..10aca79 100644 --- a/tst/scripts/collisions/collisions.py +++ b/tst/scripts/collisions/collisions.py @@ -18,6 +18,7 @@ # Modules import logging import numpy as np +import os import scripts.utils.artemis as artemis logger = logging.getLogger("artemis" + __name__[7:]) # set logger name @@ -58,7 +59,9 @@ def analyze(): logger.debug("Analyzing test " + __name__) - fname = "build/src/{}_{:d}.reb".format(_file_id, _nranks) + fname = os.path.join( + artemis.get_run_directory(), "{}_{:d}.reb".format(_file_id, _nranks) + ) logger.debug("Reading" + fname) d = np.loadtxt(fname) with open(fname, "r") as f: diff --git a/tst/scripts/coords/blast.py b/tst/scripts/coords/blast.py index 9849bf6..edc9cd8 100644 --- a/tst/scripts/coords/blast.py +++ b/tst/scripts/coords/blast.py @@ -117,7 +117,10 @@ def analyze(): else interp1d(dat3[:, 0], dat3[:, 3]) ) res = load_snap( - "build/src/" + _file_id + "_{}{:d}.out1.final.phdf".format(g, 2) + os.path.join( + artemis.get_run_directory(), + _file_id + "_{}{:d}.out1.final.phdf".format(g, 2), + ) ) pres = res[4][-1] xc = 0.5 * (res[1][:, 1:] + res[1][:, :-1]) diff --git a/tst/scripts/diffusion/alpha_disk.py b/tst/scripts/diffusion/alpha_disk.py index 07b033b..0271c57 100644 --- a/tst/scripts/diffusion/alpha_disk.py +++ b/tst/scripts/diffusion/alpha_disk.py @@ -83,7 +83,7 @@ def analyze(): os.makedirs(artemis.artemis_fig_dir, exist_ok=True) time, x, y, z, [dens, u, v, w, T] = binary.load_level( - "final", dir="build/src", base=base + ".out1" + "final", dir=artemis.get_run_directory(), base=base + ".out1" ) r = 0.5 * (x[1:] + x[:-1]) diff --git a/tst/scripts/diffusion/thermal_diffusion.py b/tst/scripts/diffusion/thermal_diffusion.py index f4b22bc..9a98a2c 100644 --- a/tst/scripts/diffusion/thermal_diffusion.py +++ b/tst/scripts/diffusion/thermal_diffusion.py @@ -90,7 +90,7 @@ def analyze(): for ax, g in zip(axes, _geom): name = "{}_{}".format(_file_id, g[:3]) time, x, y, z, [d, u, v, w, T] = binary.load_level( - "final", dir="build/src", base="{}.out1".format(name) + "final", dir=artemis.get_run_directory(), base="{}.out1".format(name) ) xc = 0.5 * (x[1:] + x[:-1]) ans = Tans(xc.ravel(), f=_flux, T0=_gtemp, x0=1.2, xi=0.2, d=dind[g], k=_kcond) diff --git a/tst/scripts/diffusion/viscous_diffusion.py b/tst/scripts/diffusion/viscous_diffusion.py index 5ad55fd..04b5267 100644 --- a/tst/scripts/diffusion/viscous_diffusion.py +++ b/tst/scripts/diffusion/viscous_diffusion.py @@ -90,7 +90,7 @@ def analyze(): os.makedirs(artemis.artemis_fig_dir, exist_ok=True) time, x, y, z, [dens, u, v, w, T] = binary.load_level( - "final", dir="build/src", base=base + ".out1" + "final", dir=artemis.get_run_directory(), base=base + ".out1" ) xc = 0.5 * (x[1:] + x[:-1]) yc = 0.5 * (y[1:] + y[:-1]) diff --git a/tst/scripts/disk/disk.py b/tst/scripts/disk/disk.py index da53603..e5d9367 100644 --- a/tst/scripts/disk/disk.py +++ b/tst/scripts/disk/disk.py @@ -17,6 +17,7 @@ # Modules import logging import numpy as np +import os import scripts.utils.artemis as artemis logger = logging.getLogger("artemis" + __name__[7:]) # set logger name @@ -98,15 +99,24 @@ def analyze(): for gam in _gamma: logger.debug("Analyzing test {}_{}".format(__name__, g)) logger.debug( - "build/src/disk_{}_{:d}_{}.out1".format(g, int(10 * gam), b) + os.path.join( + artemis.get_run_directory(), + "disk_{}_{:d}_{}.out1".format(g, int(10 * gam), b), + ) ) _, (x, y, z), (d0, _, _, _, _), sys, _ = loadf( 0, - base="build/src/disk_{}_{:d}_{}.out1".format(g, int(10 * gam), b), + base=os.path.join( + artemis.get_run_directory(), + "disk_{}_{:d}_{}.out1".format(g, int(10 * gam), b), + ), ) time, (x, y, z), (d, T, u, v, w), sys, dt = loadf( "final", - base="build/src/disk_{}_{:d}_{}.out1".format(g, int(10 * gam), b), + base=os.path.join( + artemis.get_run_directory(), + "disk_{}_{:d}_{}.out1".format(g, int(10 * gam), b), + ), ) mybad = False mybad |= np.any(np.isnan(d)) @@ -189,8 +199,4 @@ def loadf(n, base="disk.out1"): w = f["gas.prim.velocity_0"][...][:, 2, :, :, :] x = f["Locations/x"][...] y = f["Locations/y"][...] - z = f["Locations/z"][...] - sys = f["Params"].attrs["artemis/coord_sys"] - time = f["Info"].attrs["Time"] - dt = f["Info"].attrs["dt"] - return time, (x, y, z), (d, T, u, v, w), sys, dt + \ No newline at end of file diff --git a/tst/scripts/disk_nbody/disk_nbody.py b/tst/scripts/disk_nbody/disk_nbody.py index 657c553..daf9ba9 100644 --- a/tst/scripts/disk_nbody/disk_nbody.py +++ b/tst/scripts/disk_nbody/disk_nbody.py @@ -17,6 +17,7 @@ # Modules import logging import numpy as np +import os import scripts.utils.artemis as artemis logger = logging.getLogger("artemis" + __name__[7:]) # set logger name @@ -87,18 +88,23 @@ def analyze(): for gam in _gamma: logger.debug("Analyzing test {}_{}".format(__name__, g)) logger.debug( - "build/src/disk_nbody_{}_{:d}_{}.out1".format(g, int(10 * gam), b) + os.path.join( + artemis.get_run_directory(), + "disk_nbody_{}_{:d}_{}.out1".format(g, int(10 * gam), b), + ) ) _, (x, y, z), (d0, _, _, _, _), sys, _ = loadf( 0, - base="build/src/disk_nbody_{}_{:d}_{}.out1".format( - g, int(10 * gam), b + base=os.path.join( + artemis.get_run_directory(), + "/disk_nbody_{}_{:d}_{}.out1".format(g, int(10 * gam), b), ), ) time, (x, y, z), (d, T, u, v, w), sys, dt = loadf( "final", - base="build/src/disk_nbody_{}_{:d}_{}.out1".format( - g, int(10 * gam), b + base=os.path.join( + artemis.get_run_directory(), + "disk_nbody_{}_{:d}_{}.out1".format(g, int(10 * gam), b), ), ) mybad = False diff --git a/tst/scripts/drag/drag.py b/tst/scripts/drag/drag.py index a3fa189..37dbe9d 100644 --- a/tst/scripts/drag/drag.py +++ b/tst/scripts/drag/drag.py @@ -60,7 +60,9 @@ def analyze(): mom_tot = [] errors = [] for n in range(1, int(_tlim / 0.05)): - fname = "build/src/{}.out1.{:05d}.phdf".format(_file_id, n) + fname = os.path.join( + artemis.get_run_directory(), "{}.out1.{:05d}.phdf".format(_file_id, n) + ) with h5py.File(fname, "r") as f: t = f["Info"].attrs["Time"] vg = f["gas.prim.velocity_0"][...][:, 0, :].ravel() @@ -118,5 +120,4 @@ def analyze(): errors = np.array(errors).ravel() fail = np.any(errors > _tol) - fail |= np.max(mom_err) > 1e-13 - return not fail + fail |= np.max(mom_err) > 1e-1 \ No newline at end of file diff --git a/tst/scripts/hydro/linwave.py b/tst/scripts/hydro/linwave.py index ef719d8..54009eb 100644 --- a/tst/scripts/hydro/linwave.py +++ b/tst/scripts/hydro/linwave.py @@ -18,6 +18,7 @@ # Modules import logging import numpy as np +import os import scripts.utils.artemis as artemis logger = logging.getLogger("artemis" + __name__[7:]) # set logger name @@ -77,7 +78,14 @@ def analyze(): # error convergence rates, and error identicality between L- and R-going # sound waves. logger.debug("Analyzing test " + __name__) - data = np.loadtxt("build/src/" + _file_id + "-errs.dat", dtype=np.float64, ndmin=2) + data = np.loadtxt( + os.path.join( + artemis.get_run_directory(), + _file_id + "-errs.dat", + dtype=np.float64, + ndmin=2, + ) + ) analyze_status = True if np.isnan(data).any(): logger.warning("NaN encountered") diff --git a/tst/scripts/nbody/nbody.py b/tst/scripts/nbody/nbody.py index 3e06203..9b78caa 100644 --- a/tst/scripts/nbody/nbody.py +++ b/tst/scripts/nbody/nbody.py @@ -61,7 +61,7 @@ def analyze(): analyze_status = True time, r, phi, z, [d, u, v, w, T] = load_level( - "final", base="{}.out1".format(_file_id), dir="build/src" + "final", base="{}.out1".format(_file_id), dir=artemis.get_run_directory() ) rc = 0.5 * (r[1:] + r[:-1]) pc = 0.5 * (phi[1:] + phi[:-1]) diff --git a/tst/scripts/nbody/nbody_mpi.py b/tst/scripts/nbody/nbody_mpi.py index efb6cbf..a121cee 100644 --- a/tst/scripts/nbody/nbody_mpi.py +++ b/tst/scripts/nbody/nbody_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -nbody._nranks = min(max(2, os.cpu_count()), 8) +nbody._nranks = 8 nbody._file_id = "nbody_mpi" diff --git a/tst/scripts/ssheet/ssheet.py b/tst/scripts/ssheet/ssheet.py index 7bc8e11..b643d43 100644 --- a/tst/scripts/ssheet/ssheet.py +++ b/tst/scripts/ssheet/ssheet.py @@ -51,7 +51,7 @@ def analyze(): analyze_status = True time, x, y, z, [d, u, v, w, T] = binary.load_level( - "final", dir="build/src", base="{}.out1".format(_file_id) + "final", dir=artemis.get_run_directory(), base="{}.out1".format(_file_id) ) xc = 0.5 * (x[1:] + x[:-1]) yc = 0.5 * (y[1:] + y[:-1]) diff --git a/tst/scripts/utils/artemis.py b/tst/scripts/utils/artemis.py index de140fa..a14419a 100644 --- a/tst/scripts/utils/artemis.py +++ b/tst/scripts/utils/artemis.py @@ -20,13 +20,28 @@ import logging import os import subprocess +import datetime from timeit import default_timer as timer from .log_pipe import LogPipe # Global variables -artemis_rel_path = "../" +current_dir = os.getcwd() +artemis_dir = os.path.abspath(os.path.join(current_dir, "..")) +artemis_executable = os.path.join(artemis_dir, "build", "src", "artemis") +artemis_inputs_dir = os.path.join(artemis_dir, "inputs") artemis_fig_dir = "./figs/" +# Create run directory for this invocation of the test framework +now = datetime.datetime.now() +run_directory_name = "tests_run_{0:%Y%m%d_%H%M%S}".format(now) +run_directory = os.path.join(current_dir, run_directory_name) +os.makedirs(run_directory, exist_ok=True) + + +# Function for returning the path to the run directory for this set of tests +def get_run_directory(): + return run_directory + # Function for compiling Artemis def make(cmake_args, make_nproc): @@ -60,27 +75,27 @@ def make(cmake_args, make_nproc): # Function for running Artemis (with MPI) def run(nproc, input_filename, arguments, restart=None): + global run_directory out_log = LogPipe("artemis.run", logging.INFO) - current_dir = os.getcwd() - exe_dir = current_dir + "/build/src/" - os.chdir(exe_dir) + + # Build the run command + run_command = ["mpiexec", "--oversubscribe", "-n", str(nproc), artemis_executable] + if restart is not None: + run_command += ["-r", restart] + input_filename_full = os.path.join(artemis_inputs_dir, input_filename) + run_command += ["-i", input_filename_full] + try: - input_filename_full = "../../" + artemis_rel_path + "inputs/" + input_filename - run_command = ["mpiexec", "--oversubscribe", "-n", str(nproc), "./artemis"] - if restart is not None: - run_command += ["-r", restart] - run_command += ["-i", input_filename_full] - try: - cmd = run_command + arguments - logging.getLogger("artemis.run").debug("Executing: " + " ".join(cmd)) - subprocess.check_call(cmd, stdout=out_log) - except subprocess.CalledProcessError as err: - raise ArtemisError( - "Return code {0} from command '{1}'".format( - err.returncode, " ".join(err.cmd) - ) + os.chdir(run_directory) + cmd = run_command + arguments + logging.getLogger("artemis.run").debug("Executing: " + " ".join(cmd)) + subprocess.check_call(cmd, stdout=out_log) + except subprocess.CalledProcessError as err: + raise ArtemisError( + "Return code {0} from command '{1}'".format( + err.returncode, " ".join(err.cmd) ) - + ) finally: out_log.close() os.chdir(current_dir) From 17de819a23d0650890b031e52cbbfd24c4e0451d Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Sat, 9 Nov 2024 13:43:28 -0700 Subject: [PATCH 25/56] I need this branch to be up to date on github lol --- env/bash | 8 ++++++-- run_local_ci.py | 9 +++++---- tst/scripts/disk/disk.py | 6 +++++- tst/scripts/drag/drag.py | 3 ++- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/env/bash b/env/bash index fd8f4e5..148158d 100644 --- a/env/bash +++ b/env/bash @@ -160,7 +160,7 @@ function build_artemis { DEBUG_BUILD=false # Whether to configure for Debug build (default is cmake's RelWithDebInfo) FULL_BUILD=false # Whether to completely wipe the build directory, if non-empty ASAN_BUILD=false # Whether to configure with ASAN support for error checking - options='hb:cdfa' + options='hb:cdfaj:' while getopts $options opt; do case $opt in h) @@ -172,6 +172,7 @@ function build_artemis { echo " -c : Force re-configuration of build" echo " -d : Switch to debug build from default RelWithDebInfo" echo " -f : Force complete re-build" + echo " -j [N] : Number of ranks N to use for make" echo " WARNING uses rm -rf to remove build directory if it exists" return ;; @@ -190,6 +191,9 @@ function build_artemis { echo "Full build requested" FULL_BUILD=true ;; + j) + BUILD_RANKS="$OPTARG" + ;; a) echo "ASAN build requested" ASAN_BUILD=true @@ -259,7 +263,7 @@ function build_artemis { configure_artemis fi - make -j + make -j$BUILD_RANKS make_status=$? echo "${ABS_BUILD_DIR}" diff --git a/run_local_ci.py b/run_local_ci.py index 48c90e6..3e82192 100755 --- a/run_local_ci.py +++ b/run_local_ci.py @@ -83,12 +83,13 @@ def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): test_command = [ "bash", "-c", - "source ../env/bash && python3 run_tests.py regression.suite " - "--save_build --make_nproc=4 " - "--cmake=-DCMAKE_C_COMPILER=gcc " - "--cmake=-DCMAKE_CXX_COMPILER=g++ " + "source ../env/bash && build_artemis -b " + temp_dir + " -j 4 && python3 run_tests.py regression.suite " + #"--save_build --make_nproc=4 " + #"--cmake=-DCMAKE_C_COMPILER=gcc " + #"--cmake=-DCMAKE_CXX_COMPILER=g++ " "--log_file=ci_cpu_log.txt", ] + print(test_command) subprocess.run(test_command, check=True) # Update the status to success update_status(commit_sha, "success", "All tests passed.") diff --git a/tst/scripts/disk/disk.py b/tst/scripts/disk/disk.py index e5d9367..f1340ee 100644 --- a/tst/scripts/disk/disk.py +++ b/tst/scripts/disk/disk.py @@ -199,4 +199,8 @@ def loadf(n, base="disk.out1"): w = f["gas.prim.velocity_0"][...][:, 2, :, :, :] x = f["Locations/x"][...] y = f["Locations/y"][...] - \ No newline at end of file + z = f["Locations/z"][...] + sys = f["Params"].attrs["artemis/coord_sys"] + time = f["Info"].attrs["Time"] + dt = f["Info"].attrs["dt"] + return time, (x, y, z), (d, T, u, v, w), sys, dt diff --git a/tst/scripts/drag/drag.py b/tst/scripts/drag/drag.py index 37dbe9d..4a149eb 100644 --- a/tst/scripts/drag/drag.py +++ b/tst/scripts/drag/drag.py @@ -120,4 +120,5 @@ def analyze(): errors = np.array(errors).ravel() fail = np.any(errors > _tol) - fail |= np.max(mom_err) > 1e-1 \ No newline at end of file + fail |= np.max(mom_err) > 1e-13 + return not fail From e5917af4bb666ef63908ada078809a51e453de1c Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Sat, 9 Nov 2024 13:48:57 -0700 Subject: [PATCH 26/56] build code separately from run_tests --- .github/workflows/ci.yml | 6 ++---- tst/scripts/disk/disk_mpi.py | 2 +- tst/suites/regression.suite | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dd4ea18..0f6ee58 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,11 +66,9 @@ jobs: - name: Run CPU tests run: | cd tst + build_artemis -b tst/build -j 4 python3 run_tests.py regression.suite \ - --save_build \ - --make_nproc=4 \ - --cmake=-DCMAKE_C_COMPILER=gcc \ - --cmake=-DCMAKE_CXX_COMPILER=g++ \ + --exe tst/build/artemis --log_file=ci_cpu_log.txt - name: Upload CPU test log if: always() diff --git a/tst/scripts/disk/disk_mpi.py b/tst/scripts/disk/disk_mpi.py index 989ce88..fb97cf0 100644 --- a/tst/scripts/disk/disk_mpi.py +++ b/tst/scripts/disk/disk_mpi.py @@ -24,7 +24,7 @@ logging.getLogger("h5py").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) -disk._nranks = min(max(2, os.cpu_count()), 8) +disk._nranks = 8 disk._file_id = "disk_mpi" diff --git a/tst/suites/regression.suite b/tst/suites/regression.suite index 6d84656..84b9b0c 100644 --- a/tst/suites/regression.suite +++ b/tst/suites/regression.suite @@ -13,5 +13,5 @@ # regression suite -#serial.suite +serial.suite parallel.suite From 8678efa94c9bbf63bcf9694eedf76b5471b82bf0 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Sat, 9 Nov 2024 13:50:36 -0700 Subject: [PATCH 27/56] update --- .github/workflows/ci.yml | 2 +- .github/workflows/nightly.yml | 5 ++--- run_local_ci.py | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0f6ee58..9fd825e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,7 +66,7 @@ jobs: - name: Run CPU tests run: | cd tst - build_artemis -b tst/build -j 4 + build_artemis -b tst/build -j 4 -f python3 run_tests.py regression.suite \ --exe tst/build/artemis --log_file=ci_cpu_log.txt diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 57aee74..2239551 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -47,10 +47,9 @@ jobs: - name: Run CPU tests run: | cd tst + build_artemis -b tst/build -j 4 -f python3 run_tests.py regression.suite \ - --make_nproc=32 \ - --cmake=-DCMAKE_C_COMPILER=gcc \ - --cmake=-DCMAKE_CXX_COMPILER=g++ \ + --exe tst/build/artemis --log_file=ci_cpu_log.txt - name: Upload CPU test log if: always() diff --git a/run_local_ci.py b/run_local_ci.py index 3e82192..8845f00 100755 --- a/run_local_ci.py +++ b/run_local_ci.py @@ -83,7 +83,7 @@ def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): test_command = [ "bash", "-c", - "source ../env/bash && build_artemis -b " + temp_dir + " -j 4 && python3 run_tests.py regression.suite " + "source ../env/bash && build_artemis -b " + temp_dir + " -j 4 -f && python3 run_tests.py regression.suite " #"--save_build --make_nproc=4 " #"--cmake=-DCMAKE_C_COMPILER=gcc " #"--cmake=-DCMAKE_CXX_COMPILER=g++ " From 0ed3e6e89a24ee340d0bd916e433ddfa7ade1f0e Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Sat, 9 Nov 2024 14:06:04 -0700 Subject: [PATCH 28/56] Dont use build_artemis command on unknown CI runner platforms --- .github/workflows/ci.yml | 11 ++++++----- .github/workflows/nightly.yml | 11 ++++++----- run_local_ci.py | 9 +++++---- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9fd825e..3dbefb5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,15 +60,16 @@ jobs: sudo apt-get install -qq libopenmpi-dev libhdf5-openmpi-dev sudo apt-get install -qq openssh-client sudo apt-get install -qq python3 python3-numpy python3-h5py python3-matplotlib - - name: Set up environment - run: | - source env/bash - name: Run CPU tests run: | cd tst - build_artemis -b tst/build -j 4 -f + mkdir -p build + cd build + cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc ../../ + make -j 4 + cd .. python3 run_tests.py regression.suite \ - --exe tst/build/artemis + --exe build/src/artemis --log_file=ci_cpu_log.txt - name: Upload CPU test log if: always() diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 2239551..ce8e05f 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -41,15 +41,16 @@ jobs: git pull origin develop echo "==> Current Parthenon commit hash:" git rev-parse HEAD - - name: Set up environment - run: | - source env/bash - name: Run CPU tests run: | cd tst - build_artemis -b tst/build -j 4 -f + mkdir -p build + cd build + cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc ../../ + make -j 4 + cd .. python3 run_tests.py regression.suite \ - --exe tst/build/artemis + --exe build/src/artemis --log_file=ci_cpu_log.txt - name: Upload CPU test log if: always() diff --git a/run_local_ci.py b/run_local_ci.py index 8845f00..9f7685b 100755 --- a/run_local_ci.py +++ b/run_local_ci.py @@ -80,13 +80,14 @@ def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): # Run the tests try: os.chdir(os.path.join(temp_dir, "tst")) + build_dir = os.path.join(temp_dir, "build") test_command = [ "bash", "-c", - "source ../env/bash && build_artemis -b " + temp_dir + " -j 4 -f && python3 run_tests.py regression.suite " - #"--save_build --make_nproc=4 " - #"--cmake=-DCMAKE_C_COMPILER=gcc " - #"--cmake=-DCMAKE_CXX_COMPILER=g++ " + "source ../env/bash && build_artemis -b " + + build_dir + + " -j 4 -f && python3 run_tests.py regression.suite " + "--exe " + build_dir + " " "--log_file=ci_cpu_log.txt", ] print(test_command) From 9c1cd4117699846b734e9690f2aede5f1d01e422 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Mon, 11 Nov 2024 13:27:01 -0700 Subject: [PATCH 29/56] Fix paths --- run_local_ci.py | 6 ++++-- tst/run_tests.py | 7 ++++--- tst/scripts/utils/artemis.py | 10 +++++++--- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/run_local_ci.py b/run_local_ci.py index 9f7685b..9ff1404 100755 --- a/run_local_ci.py +++ b/run_local_ci.py @@ -60,6 +60,8 @@ def update_status( def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): + current_dir = os.getcwd() + # Create a temporary directory with tempfile.TemporaryDirectory() as temp_dir: print(f"Using temporary directory: {temp_dir}") @@ -86,8 +88,8 @@ def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): "-c", "source ../env/bash && build_artemis -b " + build_dir - + " -j 4 -f && python3 run_tests.py regression.suite " - "--exe " + build_dir + " " + + " -j 4 -f && cd " + os.path.join(temp_dir, "tst") + " && python3 run_tests.py regression.suite " + "--exe " + os.path.join(build_dir, "src", "artemis") + " " "--log_file=ci_cpu_log.txt", ] print(test_command) diff --git a/tst/run_tests.py b/tst/run_tests.py index d0081d3..8cd96e6 100755 --- a/tst/run_tests.py +++ b/tst/run_tests.py @@ -66,7 +66,8 @@ def process_suite(filename): dir_test_names = [ name for _, name, _ in iter_modules( - path=["scripts/" + test_name], prefix=test_name + "." + #path=["scripts/" + test_name], prefix=test_name + "." + path=[os.path.join(artemis.get_source_directory(), "tst", "scripts", test_name)], prefix=test_name + "." ) ] tests += dir_test_names @@ -84,7 +85,7 @@ def main(**kwargs): dir_test_names = [ name for _, name, _ in iter_modules( - path=["scripts/" + directory], prefix=directory + "." + path=[os.path.join(artemis.get_source_directory(), "tst", "scripts", directory)], prefix=directory + "." ) ] test_names.extend(dir_test_names) @@ -100,7 +101,7 @@ def main(**kwargs): dir_test_names = [ name for _, name, _ in iter_modules( - path=["scripts/" + test], prefix=test + "." + path=[os.path.join(artemis.get_source_directory(), "tst", "scripts", test)], prefix=test + "." ) ] test_names.extend(dir_test_names) diff --git a/tst/scripts/utils/artemis.py b/tst/scripts/utils/artemis.py index a14419a..e7f0187 100644 --- a/tst/scripts/utils/artemis.py +++ b/tst/scripts/utils/artemis.py @@ -26,7 +26,7 @@ # Global variables current_dir = os.getcwd() -artemis_dir = os.path.abspath(os.path.join(current_dir, "..")) +artemis_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..")) artemis_executable = os.path.join(artemis_dir, "build", "src", "artemis") artemis_inputs_dir = os.path.join(artemis_dir, "inputs") artemis_fig_dir = "./figs/" @@ -34,7 +34,7 @@ # Create run directory for this invocation of the test framework now = datetime.datetime.now() run_directory_name = "tests_run_{0:%Y%m%d_%H%M%S}".format(now) -run_directory = os.path.join(current_dir, run_directory_name) +run_directory = os.path.join(artemis_dir, "tst", run_directory_name) os.makedirs(run_directory, exist_ok=True) @@ -42,6 +42,10 @@ def get_run_directory(): return run_directory +# Provide base directory of artemis source tree +def get_source_directory(): + return artemis_dir + # Function for compiling Artemis def make(cmake_args, make_nproc): @@ -52,7 +56,7 @@ def make(cmake_args, make_nproc): subprocess.check_call(["mkdir", "build"], stdout=out_log) build_dir = current_dir + "/build/" os.chdir(build_dir) - cmake_command = ["cmake", "../" + artemis_rel_path] + cmake_args + cmake_command = ["cmake", artemis_dir] + cmake_args make_command = ["make", "-j" + str(make_nproc)] try: t0 = timer() From 6656fee05b6ce1e0966a4b63ef1657e278f98b69 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Mon, 11 Nov 2024 13:45:55 -0700 Subject: [PATCH 30/56] Cleanup, move file --- run_local_ci.py => tst/run_local_ci.py | 4 ---- 1 file changed, 4 deletions(-) rename run_local_ci.py => tst/run_local_ci.py (97%) diff --git a/run_local_ci.py b/tst/run_local_ci.py similarity index 97% rename from run_local_ci.py rename to tst/run_local_ci.py index 9ff1404..674483c 100755 --- a/run_local_ci.py +++ b/tst/run_local_ci.py @@ -22,10 +22,6 @@ import tempfile import shutil -# Replace with your GitHub username and repository name -# GITHUB_USER = '' -# GITHUB_REPO = '' - # The personal access token (PAT) with 'repo:status' permission # Store your token securely and do not hardcode it in the script GITHUB_TOKEN = os.environ.get("ARTEMIS_GITHUB_TOKEN") From 824b47e6b67f02c62f6c043276c54b61c99489d5 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 12 Nov 2024 08:22:21 -0700 Subject: [PATCH 31/56] shifting to darwin --- temp_ci_script.slurm | 33 ++++++++++ tst/ci_runner.py | 136 ++++++++++++++++++++++++++++++++++++++++ tst/ci_runner.slurm | 33 ++++++++++ tst/launch_ci_runner.py | 58 +++++++++++++++++ 4 files changed, 260 insertions(+) create mode 100644 temp_ci_script.slurm create mode 100755 tst/ci_runner.py create mode 100644 tst/ci_runner.slurm create mode 100755 tst/launch_ci_runner.py diff --git a/temp_ci_script.slurm b/temp_ci_script.slurm new file mode 100644 index 0000000..efe8592 --- /dev/null +++ b/temp_ci_script.slurm @@ -0,0 +1,33 @@ +#!/bin/bash +#SBATCH -A t24_ngpfc_g +#SBATCH --job-name=artemis_ci_chicoma_gpu_job +#SBATCH --output=ci_gpu_%j.out +#SBATCH --error=ci_gpu_%j.err +#SBATCH --time=00:10:00 +#SBATCH -N 1 +#SBATCH -p gpu +#SBATCH --qos=standard +#SBATCH -C gpu40 + +# Node options +#SBATCH --tasks-per-node=4 +#SBATCH --exclusive +#SBATCH --mem=0 + +# Set artemis path + +# Check github token +echo "Github token:" +echo $ARTEMIS_GITHUB_TOKEN + +# Load environment +source env/bash + +# Ensure GITHUB_TOKEN is set +#export GITHUB_TOKEN= # Or source it from a secure file + +module list + +# Run the CI Python script +#srun python3 ci_runner.py:w + diff --git a/tst/ci_runner.py b/tst/ci_runner.py new file mode 100755 index 0000000..289038b --- /dev/null +++ b/tst/ci_runner.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 +# ======================================================================================== +# (C) (or copyright) 2024. Triad National Security, LLC. All rights reserved. +# +# This program was produced under U.S. Government contract 89233218CNA000001 for Los +# Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC +# for the U.S. Department of Energy/National Nuclear Security Administration. All rights +# in the program are reserved by Triad National Security, LLC, and the U.S. Department +# of Energy/National Nuclear Security Administration. The Government is granted for +# itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide +# license in this material to reproduce, prepare derivative works, distribute copies to +# the public, perform publicly and display publicly, and to permit others to do so. +# ======================================================================================== + +# This file was created in part or in whole by one of OpenAI's generative AI models + +import socket +import fnmatch +import os +import subprocess +import requests +import sys + +# The personal access token (PAT) with 'repo:status' permission +# Store your token securely and do not hardcode it in the script +GITHUB_TOKEN = os.environ.get("ARTEMIS_GITHUB_TOKEN") + +def get_pr_info(pr_number): + url = f"https://api.github.com/repos/lanl/artemis/pulls/{pr_number}" + headers = {"Authorization": f"token {GITHUB_TOKEN}"} + response = requests.get(url, headers=headers) + if response.status_code != 200: + print(f"Error fetching PR info: {response.status_code}") + print(response.text) + sys.exit(1) + return response.json() + +def update_status( + commit_sha, state, description, context="Continuous Integration / chicoma-gpu" +): + url = f"https://api.github.com/repos/lanl/artemis/statuses/{commit_sha}" + headers = {"Authorization": f"token {GITHUB_TOKEN}"} + data = {"state": state, "description": description, "context": context} + response = requests.post(url, headers=headers, data=json.dumps(data)) + if response.status_code != 201: + print(f"Error setting status: {response.status_code}") + print(response.text) + sys.exit(1) + +#def run_tests(): +# try: +# subprocess.run(['python3', 'run_tests.py', 'regression.suite', +# '--save_build', '--make_nproc=4', +# '--cmake=-DCMAKE_C_COMPILER=gcc', +# '--cmake=-DCMAKE_CXX_COMPILER=g++', +# '--log_file=ci_cpu_log.txt'], check=True) +# return True +# except subprocess.CalledProcessError: +# return False +def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): + current_dir = os.getcwd() + + # Create a temporary directory + with tempfile.TemporaryDirectory() as temp_dir: + print(f"Using temporary directory: {temp_dir}") + + # Clone the repository into the temporary directory + subprocess.run(["git", "clone", head_repo, temp_dir], check=True) + os.chdir(temp_dir) + + # Checkout the PR branch + subprocess.run(["git", "fetch", "origin", head_ref], check=True) + subprocess.run(["git", "checkout", head_ref], check=True) + + # Update submodules + subprocess.run( + ["git", "submodule", "update", "--init", "--recursive"], check=True + ) + + # Run the tests + try: + os.chdir(os.path.join(temp_dir, "tst")) + build_dir = os.path.join(temp_dir, "build") + test_command = [ + "bash", + "-c", + "source ../env/bash && build_artemis -b " + + build_dir + + " -j 4 -f && cd " + os.path.join(temp_dir, "tst") + " && python3 run_tests.py regression.suite " + "--exe " + os.path.join(build_dir, "src", "artemis") + " " + "--log_file=ci_cpu_log.txt", + ] + #print(test_command) + subprocess.run(test_command, check=True) + return True + # Update the status to success + #update_status(commit_sha, "success", "All tests passed.") + #print("Tests passed.") + except subprocess.CalledProcessError: + return False + # Update the status to failure + #update_status(commit_sha, "failure", "Tests failed.") + #print("Tests failed.") + #sys.exit(1) + +def main(): + if len(sys.argv) != 3: + print("Usage: ci_runner.py [PR number] [checkout dir]") + sys.exit(1) + + pr_number = sys.argv[1] + + # Check that we are on the right system + #hostname = socket.gethostname() + #if not fnmatch.fnmatch(hostname, "ch-fe*"): + # print("ERROR script must be run from Chicoma frontend node!") + # sys.exit(1) + + # Fetch PR information + pr_info = get_pr_info(pr_number) + head_repo = pr_info["head"]["repo"]["clone_url"] + head_ref = pr_info["head"]["ref"] + commit_sha = pr_info["head"]["sha"] + + update_status(commit_sha, 'pending', 'CI Slurm job running...') + + # Run the tests in a temporary directory + test_success = run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha) + + if test_success: + update_status(commit_sha, 'success', 'All tests passed.') + else: + update_status(commit_sha, 'failure', 'Tests failed.') + +if __name__ == '__main__': + main() diff --git a/tst/ci_runner.slurm b/tst/ci_runner.slurm new file mode 100644 index 0000000..bd814ef --- /dev/null +++ b/tst/ci_runner.slurm @@ -0,0 +1,33 @@ +#!/bin/bash +#SBATCH -A t24_ngpfc_g +#SBATCH --job-name=artemis_ci_chicoma_gpu_job +#SBATCH --output=ci_gpu_%j.out +#SBATCH --error=ci_gpu_%j.err +#SBATCH --time=00:10:00 +#SBATCH -N 1 +#SBATCH -p gpu +#SBATCH --qos=standard +#SBATCH -C gpu40 + +# Node options +#SBATCH --tasks-per-node=4 +#SBATCH --exclusive +#SBATCH --mem=0 + +# Set artemis path + +# Check github token +echo "Github token:" +echo $ARTEMIS_GITHUB_TOKEN + +# Load environment +#source env/bash + +# Ensure GITHUB_TOKEN is set +#export GITHUB_TOKEN= # Or source it from a secure file + +#module list + +# Run the CI Python script +srun python3 ci_runner.py + diff --git a/tst/launch_ci_runner.py b/tst/launch_ci_runner.py new file mode 100755 index 0000000..4ebee11 --- /dev/null +++ b/tst/launch_ci_runner.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +# ======================================================================================== +# (C) (or copyright) 2024. Triad National Security, LLC. All rights reserved. +# +# This program was produced under U.S. Government contract 89233218CNA000001 for Los +# Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC +# for the U.S. Department of Energy/National Nuclear Security Administration. All rights +# in the program are reserved by Triad National Security, LLC, and the U.S. Department +# of Energy/National Nuclear Security Administration. The Government is granted for +# itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide +# license in this material to reproduce, prepare derivative works, distribute copies to +# the public, perform publicly and display publicly, and to permit others to do so. +# ======================================================================================== + +# This file was created in part or in whole by one of OpenAI's generative AI models + +import subprocess +import os +import sys +import subprocess + +if __name__ == "__main__": + if len(sys.argv) != 2: + print("Usage: ci_runner.py [PR number]") + sys.exit(1) + pr_number = sys.argv[1] + + sbatch_command = [ + 'sbatch', + '-A', 't24_ngpfc_g', + '--job-name=ci_gpu_job', + '--output=ci_gpu_job.out', + '--error=ci_gpu_job.err', + '--time=00:10:00', + '-N', '1', + '-p', 'gpu', + '--qos=standard', + '-C', 'gpu40', + '--tasks-per-node=4', + '--exclusive', + '--mem=0', + '--wrap', # Wraps the following command as a single string + f'python3 ci_runner.py {pr_number}' + ] + print(sbatch_command) + + # Execute the sbatch command + result = subprocess.run(sbatch_command, stdout=subprocess.PIPE, check=True) + + # Print the job ID + print(result) + for line in result.stdout.splitlines(): + if "Submitted batch job" in line: + job_id = line.split()[-1] + print(f"Job submitted with ID: {job_id}") + + #raise RuntimeError("Failed to submit Slurm job.") + From 918608a0b986ed0e4110ed73d058f188fec9a61d Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 12 Nov 2024 08:51:13 -0700 Subject: [PATCH 32/56] Move old files --- tst/ci_runner.py | 43 +++++++----- tst/ci_runner.slurm | 33 --------- tst/launch_ci_runner.py | 61 ++++++++++------- tst/run_local_ci.py | 129 ----------------------------------- tst/run_tests.py | 29 ++++++-- tst/scripts/utils/artemis.py | 5 +- 6 files changed, 93 insertions(+), 207 deletions(-) delete mode 100644 tst/ci_runner.slurm delete mode 100755 tst/run_local_ci.py diff --git a/tst/ci_runner.py b/tst/ci_runner.py index 289038b..fe16520 100755 --- a/tst/ci_runner.py +++ b/tst/ci_runner.py @@ -20,11 +20,14 @@ import subprocess import requests import sys +import json +import tempfile # The personal access token (PAT) with 'repo:status' permission # Store your token securely and do not hardcode it in the script GITHUB_TOKEN = os.environ.get("ARTEMIS_GITHUB_TOKEN") + def get_pr_info(pr_number): url = f"https://api.github.com/repos/lanl/artemis/pulls/{pr_number}" headers = {"Authorization": f"token {GITHUB_TOKEN}"} @@ -35,8 +38,9 @@ def get_pr_info(pr_number): sys.exit(1) return response.json() + def update_status( - commit_sha, state, description, context="Continuous Integration / chicoma-gpu" + commit_sha, state, description, context="Continuous Integration / darwin_volta-x86" ): url = f"https://api.github.com/repos/lanl/artemis/statuses/{commit_sha}" headers = {"Authorization": f"token {GITHUB_TOKEN}"} @@ -47,7 +51,8 @@ def update_status( print(response.text) sys.exit(1) -#def run_tests(): + +# def run_tests(): # try: # subprocess.run(['python3', 'run_tests.py', 'regression.suite', # '--save_build', '--make_nproc=4', @@ -86,33 +91,36 @@ def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): "-c", "source ../env/bash && build_artemis -b " + build_dir - + " -j 4 -f && cd " + os.path.join(temp_dir, "tst") + " && python3 run_tests.py regression.suite " + + " -j 4 -f && cd " + + os.path.join(temp_dir, "tst") + + " && python3 run_tests.py regression.suite " "--exe " + os.path.join(build_dir, "src", "artemis") + " " "--log_file=ci_cpu_log.txt", ] - #print(test_command) + # print(test_command) subprocess.run(test_command, check=True) return True # Update the status to success - #update_status(commit_sha, "success", "All tests passed.") - #print("Tests passed.") + # update_status(commit_sha, "success", "All tests passed.") + # print("Tests passed.") except subprocess.CalledProcessError: return False # Update the status to failure - #update_status(commit_sha, "failure", "Tests failed.") - #print("Tests failed.") - #sys.exit(1) + # update_status(commit_sha, "failure", "Tests failed.") + # print("Tests failed.") + # sys.exit(1) + def main(): - if len(sys.argv) != 3: - print("Usage: ci_runner.py [PR number] [checkout dir]") + if len(sys.argv) != 2: + print("Usage: ci_runner.py [PR number]") sys.exit(1) pr_number = sys.argv[1] # Check that we are on the right system - #hostname = socket.gethostname() - #if not fnmatch.fnmatch(hostname, "ch-fe*"): + # hostname = socket.gethostname() + # if not fnmatch.fnmatch(hostname, "ch-fe*"): # print("ERROR script must be run from Chicoma frontend node!") # sys.exit(1) @@ -122,15 +130,16 @@ def main(): head_ref = pr_info["head"]["ref"] commit_sha = pr_info["head"]["sha"] - update_status(commit_sha, 'pending', 'CI Slurm job running...') + update_status(commit_sha, "pending", "CI Slurm job running...") # Run the tests in a temporary directory test_success = run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha) if test_success: - update_status(commit_sha, 'success', 'All tests passed.') + update_status(commit_sha, "success", "All tests passed.") else: - update_status(commit_sha, 'failure', 'Tests failed.') + update_status(commit_sha, "failure", "Tests failed.") + -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/tst/ci_runner.slurm b/tst/ci_runner.slurm deleted file mode 100644 index bd814ef..0000000 --- a/tst/ci_runner.slurm +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -#SBATCH -A t24_ngpfc_g -#SBATCH --job-name=artemis_ci_chicoma_gpu_job -#SBATCH --output=ci_gpu_%j.out -#SBATCH --error=ci_gpu_%j.err -#SBATCH --time=00:10:00 -#SBATCH -N 1 -#SBATCH -p gpu -#SBATCH --qos=standard -#SBATCH -C gpu40 - -# Node options -#SBATCH --tasks-per-node=4 -#SBATCH --exclusive -#SBATCH --mem=0 - -# Set artemis path - -# Check github token -echo "Github token:" -echo $ARTEMIS_GITHUB_TOKEN - -# Load environment -#source env/bash - -# Ensure GITHUB_TOKEN is set -#export GITHUB_TOKEN= # Or source it from a secure file - -#module list - -# Run the CI Python script -srun python3 ci_runner.py - diff --git a/tst/launch_ci_runner.py b/tst/launch_ci_runner.py index 4ebee11..20335f8 100755 --- a/tst/launch_ci_runner.py +++ b/tst/launch_ci_runner.py @@ -15,6 +15,8 @@ # This file was created in part or in whole by one of OpenAI's generative AI models import subprocess +import socket +import fnmatch import os import sys import subprocess @@ -25,34 +27,47 @@ sys.exit(1) pr_number = sys.argv[1] + # Check that we are on the right system + hostname = socket.gethostname() + if not fnmatch.fnmatch(hostname, "darwin-fe*"): + print("ERROR script must be run from Darwin frontend node!") + sys.exit(1) + + # sbatch_command = [ + # 'sbatch', + # '-A', 't24_ngpfc_g', + # '--job-name=ci_gpu_job', + # '--output=ci_gpu_job.out', + # '--error=ci_gpu_job.err', + # '--time=00:10:00', + # '-N', '1', + # '-p', 'gpu', + # '--qos=standard', + # '-C', 'gpu40', + # '--tasks-per-node=4', + # '--exclusive', + # '--mem=0', + # '--wrap', # Wraps the following command as a single string + # f'python3 ci_runner.py {pr_number}' + # ] + # print(sbatch_command) sbatch_command = [ - 'sbatch', - '-A', 't24_ngpfc_g', - '--job-name=ci_gpu_job', - '--output=ci_gpu_job.out', - '--error=ci_gpu_job.err', - '--time=00:10:00', - '-N', '1', - '-p', 'gpu', - '--qos=standard', - '-C', 'gpu40', - '--tasks-per-node=4', - '--exclusive', - '--mem=0', - '--wrap', # Wraps the following command as a single string - f'python3 ci_runner.py {pr_number}' + "sbatch", + "--job-name=artemis_ci_darwin_volta-x86", + "--partition=volta-x86", + "--time=04:00:00", + "--wrap", + f"python3 ci_runner.py {pr_number}", ] - print(sbatch_command) # Execute the sbatch command result = subprocess.run(sbatch_command, stdout=subprocess.PIPE, check=True) # Print the job ID - print(result) - for line in result.stdout.splitlines(): - if "Submitted batch job" in line: - job_id = line.split()[-1] - print(f"Job submitted with ID: {job_id}") - - #raise RuntimeError("Failed to submit Slurm job.") + # print(result) + # for line in result.stdout.splitlines(): + # if "Submitted batch job" in line: + # job_id = line.split()[-1] + # print(f"Job submitted with ID: {job_id}") + # raise RuntimeError("Failed to submit Slurm job.") diff --git a/tst/run_local_ci.py b/tst/run_local_ci.py deleted file mode 100755 index 674483c..0000000 --- a/tst/run_local_ci.py +++ /dev/null @@ -1,129 +0,0 @@ -#!/usr/bin/env python3 -# ======================================================================================== -# (C) (or copyright) 2024. Triad National Security, LLC. All rights reserved. -# -# This program was produced under U.S. Government contract 89233218CNA000001 for Los -# Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC -# for the U.S. Department of Energy/National Nuclear Security Administration. All rights -# in the program are reserved by Triad National Security, LLC, and the U.S. Department -# of Energy/National Nuclear Security Administration. The Government is granted for -# itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide -# license in this material to reproduce, prepare derivative works, distribute copies to -# the public, perform publicly and display publicly, and to permit others to do so. -# ======================================================================================== - -# This file was created in part or in whole by one of OpenAI's generative AI models - -import sys -import os -import subprocess -import requests -import json -import tempfile -import shutil - -# The personal access token (PAT) with 'repo:status' permission -# Store your token securely and do not hardcode it in the script -GITHUB_TOKEN = os.environ.get("ARTEMIS_GITHUB_TOKEN") - -if GITHUB_TOKEN is None: - print("Error: GITHUB_TOKEN environment variable is not set.") - sys.exit(1) - - -def get_pr_info(pr_number): - url = f"https://api.github.com/repos/lanl/artemis/pulls/{pr_number}" - headers = {"Authorization": f"token {GITHUB_TOKEN}"} - response = requests.get(url, headers=headers) - if response.status_code != 200: - print(f"Error fetching PR info: {response.status_code}") - print(response.text) - sys.exit(1) - return response.json() - - -def update_status( - commit_sha, state, description, context="Continuous Integration / chicoma-gpu" -): - url = f"https://api.github.com/repos/lanl/artemis/statuses/{commit_sha}" - headers = {"Authorization": f"token {GITHUB_TOKEN}"} - data = {"state": state, "description": description, "context": context} - response = requests.post(url, headers=headers, data=json.dumps(data)) - if response.status_code != 201: - print(f"Error setting status: {response.status_code}") - print(response.text) - sys.exit(1) - - -def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): - current_dir = os.getcwd() - - # Create a temporary directory - with tempfile.TemporaryDirectory() as temp_dir: - print(f"Using temporary directory: {temp_dir}") - - # Clone the repository into the temporary directory - subprocess.run(["git", "clone", head_repo, temp_dir], check=True) - os.chdir(temp_dir) - - # Checkout the PR branch - subprocess.run(["git", "fetch", "origin", head_ref], check=True) - subprocess.run(["git", "checkout", head_ref], check=True) - - # Update submodules - subprocess.run( - ["git", "submodule", "update", "--init", "--recursive"], check=True - ) - - # Run the tests - try: - os.chdir(os.path.join(temp_dir, "tst")) - build_dir = os.path.join(temp_dir, "build") - test_command = [ - "bash", - "-c", - "source ../env/bash && build_artemis -b " - + build_dir - + " -j 4 -f && cd " + os.path.join(temp_dir, "tst") + " && python3 run_tests.py regression.suite " - "--exe " + os.path.join(build_dir, "src", "artemis") + " " - "--log_file=ci_cpu_log.txt", - ] - print(test_command) - subprocess.run(test_command, check=True) - # Update the status to success - update_status(commit_sha, "success", "All tests passed.") - print("Tests passed.") - except subprocess.CalledProcessError: - # Update the status to failure - update_status(commit_sha, "failure", "Tests failed.") - print("Tests failed.") - sys.exit(1) - - -def main(): - if len(sys.argv) != 2: - print("Usage: run_ci.py [PR number]") - sys.exit(1) - - pr_number = sys.argv[1] - - # Fetch PR information - pr_info = get_pr_info(pr_number) - head_repo = pr_info["head"]["repo"]["clone_url"] - head_ref = pr_info["head"]["ref"] - commit_sha = pr_info["head"]["sha"] - - print(f"PR #{pr_number} info:") - print(f"- Repository: {head_repo}") - print(f"- Branch: {head_ref}") - print(f"- Commit SHA: {commit_sha}") - - # Update status to 'pending' - update_status(commit_sha, "pending", "CI tests are running...") - - # Run the tests in a temporary directory - run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha) - - -if __name__ == "__main__": - main() diff --git a/tst/run_tests.py b/tst/run_tests.py index 8cd96e6..fc55cd5 100755 --- a/tst/run_tests.py +++ b/tst/run_tests.py @@ -66,8 +66,16 @@ def process_suite(filename): dir_test_names = [ name for _, name, _ in iter_modules( - #path=["scripts/" + test_name], prefix=test_name + "." - path=[os.path.join(artemis.get_source_directory(), "tst", "scripts", test_name)], prefix=test_name + "." + # path=["scripts/" + test_name], prefix=test_name + "." + path=[ + os.path.join( + artemis.get_source_directory(), + "tst", + "scripts", + test_name, + ) + ], + prefix=test_name + ".", ) ] tests += dir_test_names @@ -85,7 +93,15 @@ def main(**kwargs): dir_test_names = [ name for _, name, _ in iter_modules( - path=[os.path.join(artemis.get_source_directory(), "tst", "scripts", directory)], prefix=directory + "." + path=[ + os.path.join( + artemis.get_source_directory(), + "tst", + "scripts", + directory, + ) + ], + prefix=directory + ".", ) ] test_names.extend(dir_test_names) @@ -101,7 +117,12 @@ def main(**kwargs): dir_test_names = [ name for _, name, _ in iter_modules( - path=[os.path.join(artemis.get_source_directory(), "tst", "scripts", test)], prefix=test + "." + path=[ + os.path.join( + artemis.get_source_directory(), "tst", "scripts", test + ) + ], + prefix=test + ".", ) ] test_names.extend(dir_test_names) diff --git a/tst/scripts/utils/artemis.py b/tst/scripts/utils/artemis.py index e7f0187..60d2f2e 100644 --- a/tst/scripts/utils/artemis.py +++ b/tst/scripts/utils/artemis.py @@ -26,7 +26,9 @@ # Global variables current_dir = os.getcwd() -artemis_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..")) +artemis_dir = os.path.abspath( + os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..") +) artemis_executable = os.path.join(artemis_dir, "build", "src", "artemis") artemis_inputs_dir = os.path.join(artemis_dir, "inputs") artemis_fig_dir = "./figs/" @@ -42,6 +44,7 @@ def get_run_directory(): return run_directory + # Provide base directory of artemis source tree def get_source_directory(): return artemis_dir From 56435b5e342e5d109dddf724f611406ecea367a2 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 12 Nov 2024 11:05:34 -0700 Subject: [PATCH 33/56] formatting, clean up CI script --- .github/PULL_REQUEST_TEMPLATE.md | 9 + .github/workflows/ci.yml | 5 + README.md | 12 +- tst/ci_runner.py | 145 -------------- tst/launch_ci_runner.py | 212 ++++++++++++++++----- tst/scripts/advection/advection.py | 9 +- tst/scripts/diffusion/viscous_diffusion.py | 2 +- tst/scripts/disk/disk.py | 3 +- tst/scripts/hydro/linwave.py | 9 +- tst/suites/parallel.suite | 1 - 10 files changed, 202 insertions(+), 205 deletions(-) create mode 100644 .github/PULL_REQUEST_TEMPLATE.md delete mode 100755 tst/ci_runner.py diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..a7df87b --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,9 @@ +## Background + +## Description of Changes + +## Checklist + +- [ ] New features are documented +- [ ] Tests added for bug fixes and new features +- [ ] (@lanl.gov employees) Update copyright on changed files diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3dbefb5..4bdd6db 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,6 +19,11 @@ on: pull_request: types: [opened, synchronize, reopened] +# Cancel outdated workflows +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} + cancel-in-progress: true + jobs: format: if: > diff --git a/README.md b/README.md index 82482f0..5b2a477 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,6 @@ Currently supported computers/partitions are: skylake-gold volta-x86 (gpu) - power9-rhel7 (gpu) # Installation @@ -97,6 +96,17 @@ with an empty commit, do git commit --allow-empty -m "trigger pipeline" && git push +A portion of the CI is run on LANL's internal Darwin platform. To launch this CI job, someone with +Darwin access (usually a LANL employee) must first create a Github Personal Access Token and store +it securely in their own environment as `ARTEMIS_GITHUB_TOKEN`, e.g. in their `~/.bashrc`: + + export ARTEMIS_GITHUB_TOKEN=[token] + +and then log in to Darwin and manually launch the CI runner: + + cd artemis + ./tst/launcher_ci_runner.py [Number of the github PR] + ## Release Artemis is released under the BSD 3-Clause License. For more details see the LICENSE.md diff --git a/tst/ci_runner.py b/tst/ci_runner.py deleted file mode 100755 index fe16520..0000000 --- a/tst/ci_runner.py +++ /dev/null @@ -1,145 +0,0 @@ -#!/usr/bin/env python3 -# ======================================================================================== -# (C) (or copyright) 2024. Triad National Security, LLC. All rights reserved. -# -# This program was produced under U.S. Government contract 89233218CNA000001 for Los -# Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC -# for the U.S. Department of Energy/National Nuclear Security Administration. All rights -# in the program are reserved by Triad National Security, LLC, and the U.S. Department -# of Energy/National Nuclear Security Administration. The Government is granted for -# itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide -# license in this material to reproduce, prepare derivative works, distribute copies to -# the public, perform publicly and display publicly, and to permit others to do so. -# ======================================================================================== - -# This file was created in part or in whole by one of OpenAI's generative AI models - -import socket -import fnmatch -import os -import subprocess -import requests -import sys -import json -import tempfile - -# The personal access token (PAT) with 'repo:status' permission -# Store your token securely and do not hardcode it in the script -GITHUB_TOKEN = os.environ.get("ARTEMIS_GITHUB_TOKEN") - - -def get_pr_info(pr_number): - url = f"https://api.github.com/repos/lanl/artemis/pulls/{pr_number}" - headers = {"Authorization": f"token {GITHUB_TOKEN}"} - response = requests.get(url, headers=headers) - if response.status_code != 200: - print(f"Error fetching PR info: {response.status_code}") - print(response.text) - sys.exit(1) - return response.json() - - -def update_status( - commit_sha, state, description, context="Continuous Integration / darwin_volta-x86" -): - url = f"https://api.github.com/repos/lanl/artemis/statuses/{commit_sha}" - headers = {"Authorization": f"token {GITHUB_TOKEN}"} - data = {"state": state, "description": description, "context": context} - response = requests.post(url, headers=headers, data=json.dumps(data)) - if response.status_code != 201: - print(f"Error setting status: {response.status_code}") - print(response.text) - sys.exit(1) - - -# def run_tests(): -# try: -# subprocess.run(['python3', 'run_tests.py', 'regression.suite', -# '--save_build', '--make_nproc=4', -# '--cmake=-DCMAKE_C_COMPILER=gcc', -# '--cmake=-DCMAKE_CXX_COMPILER=g++', -# '--log_file=ci_cpu_log.txt'], check=True) -# return True -# except subprocess.CalledProcessError: -# return False -def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): - current_dir = os.getcwd() - - # Create a temporary directory - with tempfile.TemporaryDirectory() as temp_dir: - print(f"Using temporary directory: {temp_dir}") - - # Clone the repository into the temporary directory - subprocess.run(["git", "clone", head_repo, temp_dir], check=True) - os.chdir(temp_dir) - - # Checkout the PR branch - subprocess.run(["git", "fetch", "origin", head_ref], check=True) - subprocess.run(["git", "checkout", head_ref], check=True) - - # Update submodules - subprocess.run( - ["git", "submodule", "update", "--init", "--recursive"], check=True - ) - - # Run the tests - try: - os.chdir(os.path.join(temp_dir, "tst")) - build_dir = os.path.join(temp_dir, "build") - test_command = [ - "bash", - "-c", - "source ../env/bash && build_artemis -b " - + build_dir - + " -j 4 -f && cd " - + os.path.join(temp_dir, "tst") - + " && python3 run_tests.py regression.suite " - "--exe " + os.path.join(build_dir, "src", "artemis") + " " - "--log_file=ci_cpu_log.txt", - ] - # print(test_command) - subprocess.run(test_command, check=True) - return True - # Update the status to success - # update_status(commit_sha, "success", "All tests passed.") - # print("Tests passed.") - except subprocess.CalledProcessError: - return False - # Update the status to failure - # update_status(commit_sha, "failure", "Tests failed.") - # print("Tests failed.") - # sys.exit(1) - - -def main(): - if len(sys.argv) != 2: - print("Usage: ci_runner.py [PR number]") - sys.exit(1) - - pr_number = sys.argv[1] - - # Check that we are on the right system - # hostname = socket.gethostname() - # if not fnmatch.fnmatch(hostname, "ch-fe*"): - # print("ERROR script must be run from Chicoma frontend node!") - # sys.exit(1) - - # Fetch PR information - pr_info = get_pr_info(pr_number) - head_repo = pr_info["head"]["repo"]["clone_url"] - head_ref = pr_info["head"]["ref"] - commit_sha = pr_info["head"]["sha"] - - update_status(commit_sha, "pending", "CI Slurm job running...") - - # Run the tests in a temporary directory - test_success = run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha) - - if test_success: - update_status(commit_sha, "success", "All tests passed.") - else: - update_status(commit_sha, "failure", "Tests failed.") - - -if __name__ == "__main__": - main() diff --git a/tst/launch_ci_runner.py b/tst/launch_ci_runner.py index 20335f8..acea4c1 100755 --- a/tst/launch_ci_runner.py +++ b/tst/launch_ci_runner.py @@ -18,56 +18,176 @@ import socket import fnmatch import os +import requests import sys +import json import subprocess +import argparse +import tempfile +import shlex -if __name__ == "__main__": - if len(sys.argv) != 2: - print("Usage: ci_runner.py [PR number]") +# The personal access token (PAT) with 'repo:status' permission +# Store your token securely and do not hardcode it in the script +GITHUB_TOKEN = os.environ.get("ARTEMIS_GITHUB_TOKEN") + + +def get_pr_info(pr_number): + url = f"https://api.github.com/repos/lanl/artemis/pulls/{pr_number}" + headers = {"Authorization": f"token {GITHUB_TOKEN}"} + response = requests.get(url, headers=headers) + if response.status_code != 200: + print(f"Error fetching PR info: {response.status_code}") + print(response.text) sys.exit(1) - pr_number = sys.argv[1] + return response.json() + - # Check that we are on the right system - hostname = socket.gethostname() - if not fnmatch.fnmatch(hostname, "darwin-fe*"): - print("ERROR script must be run from Darwin frontend node!") +def update_status( + commit_sha, state, description, context="Continuous Integration / darwin_volta-x86" +): + url = f"https://api.github.com/repos/lanl/artemis/statuses/{commit_sha}" + headers = {"Authorization": f"token {GITHUB_TOKEN}"} + data = {"state": state, "description": description, "context": context} + response = requests.post(url, headers=headers, data=json.dumps(data)) + if response.status_code != 201: + print(f"Error setting status: {response.status_code}") + print(response.text) sys.exit(1) - # sbatch_command = [ - # 'sbatch', - # '-A', 't24_ngpfc_g', - # '--job-name=ci_gpu_job', - # '--output=ci_gpu_job.out', - # '--error=ci_gpu_job.err', - # '--time=00:10:00', - # '-N', '1', - # '-p', 'gpu', - # '--qos=standard', - # '-C', 'gpu40', - # '--tasks-per-node=4', - # '--exclusive', - # '--mem=0', - # '--wrap', # Wraps the following command as a single string - # f'python3 ci_runner.py {pr_number}' - # ] - # print(sbatch_command) - sbatch_command = [ - "sbatch", - "--job-name=artemis_ci_darwin_volta-x86", - "--partition=volta-x86", - "--time=04:00:00", - "--wrap", - f"python3 ci_runner.py {pr_number}", - ] - - # Execute the sbatch command - result = subprocess.run(sbatch_command, stdout=subprocess.PIPE, check=True) - - # Print the job ID - # print(result) - # for line in result.stdout.splitlines(): - # if "Submitted batch job" in line: - # job_id = line.split()[-1] - # print(f"Job submitted with ID: {job_id}") - - # raise RuntimeError("Failed to submit Slurm job.") + +def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): + current_dir = os.getcwd() + + # Create a temporary directory + with tempfile.TemporaryDirectory() as temp_dir: + print(f"Using temporary directory: {temp_dir}") + + # Clone the repository into the temporary directory + subprocess.run(["git", "clone", head_repo, temp_dir], check=True) + os.chdir(temp_dir) + + # Checkout the PR branch + subprocess.run(["git", "fetch", "origin", head_ref], check=True) + subprocess.run(["git", "checkout", head_ref], check=True) + + # Update submodules + subprocess.run( + ["git", "submodule", "update", "--init", "--recursive"], check=True + ) + + # Run the tests + try: + os.chdir(os.path.join(temp_dir, "tst")) + build_dir = os.path.join(temp_dir, "build") + + # Run subprocess command to compile code and launch run_tests.py + test_command = [ + "bash", + "-c", + "source ../env/bash && build_artemis -b " + + build_dir + + " -j 20 -f && cd " + + os.path.join(temp_dir, "tst") + + " && python3 run_tests.py gpu.suite " + "--exe " + os.path.join(build_dir, "src", "artemis") + " " + "--log_file=ci_cpu_log.txt", + ] + subprocess.run(test_command, check=True) + + # CI apparently succeeded; indicate that + return True + except subprocess.CalledProcessError: + # If CI failed, indicate that + return False + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Run CI tasks with optional Slurm submission." + ) + parser.add_argument( + "pr_number", type=int, help="Pull request number for the CI run." + ) + parser.add_argument( + "--submission", + action="store_true", + help="Flag to indicate the script is running as a Slurm submission job.", + ) + args = parser.parse_args() + + # Fetch PR information + pr_info = get_pr_info(args.pr_number) + head_repo = pr_info["head"]["repo"]["clone_url"] + head_ref = pr_info["head"]["ref"] + commit_sha = pr_info["head"]["sha"] + + if args.submission: + # Update github PR status to indicate we have begun testing + update_status(commit_sha, "pending", "CI Slurm job running...") + + # Run the tests in a temporary directory + test_success = run_tests_in_temp_dir( + args.pr_number, head_repo, head_ref, commit_sha + ) + + # Update github PR status to indicate that testing has concluded + if test_success: + update_status(commit_sha, "success", "All tests passed.") + else: + update_status(commit_sha, "failure", "Tests failed.") + else: + # Check that we are on the right system + hostname = socket.gethostname() + if not fnmatch.fnmatch(hostname, "darwin-fe*"): + print("ERROR script must be run from Darwin frontend node!") + sys.exit(1) + + # Execute the sbatch command + try: + # Submit batch job with ci_runner script that will checkout and build the code and run + # tests + job_name = f"artemis_ci_darwin_volta-x86_PR{args.pr_number}" + + # Clean up existing jobs for same PR + squeue_command = f"squeue --name={shlex.quote(job_name)} --user=$(whoami) --noheader --format=%i" + squeue_result = subprocess.run( + squeue_command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + ) + + job_ids = squeue_result.stdout.strip().split() + print("Canceling jobs:") + for job_id in job_ids: + print(f" {job_id}") + + # Use scancel to cancel the jobs + scancel_command = ["scancel"] + job_ids + scancel_result = subprocess.run(scancel_command, universal_newlines=True) + + sbatch_command = [ + "sbatch", + f"--job-name={job_name}", + f"--output={job_name}_%j.out", + f"--error={job_name}_%j.err", + "--partition=volta-x86", + "--time=04:00:00", + "--wrap", + # f"python3 ci_runner.py {pr_number}", + f"python3 {sys.argv[0]} {args.pr_number} --submission", + ] + result = subprocess.run( + sbatch_command, + stdout=subprocess.PIPE, + check=True, + universal_newlines=True, + ) + print(result.stdout.strip()) + + # Update PR status that we have successfully submitted to SLURM job + update_status(commit_sha, "pending", "CI SLURM job submitted...") + except subprocess.CalledProcessError: + # Update PR status that we have failed to submit the SLURM job + update_status(commit_sha, "failure", "SLURM job submission failed.") diff --git a/tst/scripts/advection/advection.py b/tst/scripts/advection/advection.py index 2d6436a..30baa77 100644 --- a/tst/scripts/advection/advection.py +++ b/tst/scripts/advection/advection.py @@ -96,10 +96,11 @@ def history_equiv(a, b, tol=1.0e-4): return True history_expected = [ - 1.00000e00, - 1.11612e-02, - 5.60000e01, - 1.60000e01, + 1.00000e00, # unused + -1e100, # unused + 1.11612e-02, # unused + 5.60000e01, # unused + 1.60000e01, # unused 6.75000e00, 2.25000e00, 4.50000e00, diff --git a/tst/scripts/diffusion/viscous_diffusion.py b/tst/scripts/diffusion/viscous_diffusion.py index 04b5267..579be17 100644 --- a/tst/scripts/diffusion/viscous_diffusion.py +++ b/tst/scripts/diffusion/viscous_diffusion.py @@ -96,7 +96,7 @@ def analyze(): yc = 0.5 * (y[1:] + y[:-1]) time0, x0, y0, z0, [dens0, u0, v0, w0, T0] = binary.load_level( - 0, dir="build/src", base=base + ".out1" + 0, dir=artemis.get_run_directory(), base=base + ".out1" ) vx3 = w[0, :] diff --git a/tst/scripts/disk/disk.py b/tst/scripts/disk/disk.py index f1340ee..1f97404 100644 --- a/tst/scripts/disk/disk.py +++ b/tst/scripts/disk/disk.py @@ -80,7 +80,8 @@ def run(**kwargs): g, int(10 * gam), b ), "problem/polytropic_index={:.2f}".format(gam), - ], + ] + + geom_args, restart="disk_{}_{:d}_{}.out2.final.rhdf".format( g, int(10 * gam), b ), diff --git a/tst/scripts/hydro/linwave.py b/tst/scripts/hydro/linwave.py index 54009eb..2952e7d 100644 --- a/tst/scripts/hydro/linwave.py +++ b/tst/scripts/hydro/linwave.py @@ -79,12 +79,9 @@ def analyze(): # sound waves. logger.debug("Analyzing test " + __name__) data = np.loadtxt( - os.path.join( - artemis.get_run_directory(), - _file_id + "-errs.dat", - dtype=np.float64, - ndmin=2, - ) + os.path.join(artemis.get_run_directory(), _file_id + "-errs.dat"), + dtype=np.float64, + ndmin=2, ) analyze_status = True if np.isnan(data).any(): diff --git a/tst/suites/parallel.suite b/tst/suites/parallel.suite index f2356db..75304c3 100644 --- a/tst/suites/parallel.suite +++ b/tst/suites/parallel.suite @@ -21,5 +21,4 @@ hydro/linwave_mpi ssheet/ssheet_mpi diffusion/viscous_diffusion_mpi diffusion/alpha_disk_mpi -diffusion/thermal_diffusion_mpi drag/drag_mpi From e6bd93859b3b2126e56a68a0aae28dc5f264da42 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 12 Nov 2024 11:38:35 -0700 Subject: [PATCH 34/56] Try to fix logfile output --- tst/run_tests.py | 2 +- tst/scripts/advection/advection.py | 9 ++++----- tst/scripts/utils/artemis.py | 8 +++++--- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/tst/run_tests.py b/tst/run_tests.py index fc55cd5..23be39b 100755 --- a/tst/run_tests.py +++ b/tst/run_tests.py @@ -272,7 +272,7 @@ def log_init(args): c_handler.setFormatter(logging.Formatter("%(message)s")) # only show msg logger.addHandler(c_handler) # setup log_file - log_fn = kwargs.pop("log_file") + log_fn = os.path.join(artemis.artemis_log_dir, kwargs.pop("log_file")) if log_fn: f_handler = logging.FileHandler(log_fn) f_handler.setLevel(0) # log everything diff --git a/tst/scripts/advection/advection.py b/tst/scripts/advection/advection.py index 30baa77..2d6436a 100644 --- a/tst/scripts/advection/advection.py +++ b/tst/scripts/advection/advection.py @@ -96,11 +96,10 @@ def history_equiv(a, b, tol=1.0e-4): return True history_expected = [ - 1.00000e00, # unused - -1e100, # unused - 1.11612e-02, # unused - 5.60000e01, # unused - 1.60000e01, # unused + 1.00000e00, + 1.11612e-02, + 5.60000e01, + 1.60000e01, 6.75000e00, 2.25000e00, 4.50000e00, diff --git a/tst/scripts/utils/artemis.py b/tst/scripts/utils/artemis.py index 60d2f2e..1289368 100644 --- a/tst/scripts/utils/artemis.py +++ b/tst/scripts/utils/artemis.py @@ -25,13 +25,15 @@ from .log_pipe import LogPipe # Global variables -current_dir = os.getcwd() +# current_dir = os.getcwd() artemis_dir = os.path.abspath( os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..") ) -artemis_executable = os.path.join(artemis_dir, "build", "src", "artemis") +artemis_executable = os.path.join(artemis_dir, "tst", "build", "src", "artemis") artemis_inputs_dir = os.path.join(artemis_dir, "inputs") -artemis_fig_dir = "./figs/" +# artemis_fig_dir = "./figs/" +artemis_fig_dir = os.path.join(artemis_dir, "tst", "figs") +artemis_log_dir = os.path.join(artemis_dir, "tst") # Create run directory for this invocation of the test framework now = datetime.datetime.now() From db2068bb0f95c1027aed41cbfd3fa09855720141 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 12 Nov 2024 11:59:45 -0700 Subject: [PATCH 35/56] advection history output seems out of order -- lets test (also log artifact upload) --- env/bash | 1 - tst/suites/regression.suite | 2 +- tst/suites/serial.suite | 10 +++++----- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/env/bash b/env/bash index 148158d..f69ad81 100644 --- a/env/bash +++ b/env/bash @@ -266,6 +266,5 @@ function build_artemis { make -j$BUILD_RANKS make_status=$? - echo "${ABS_BUILD_DIR}" return $make_status } diff --git a/tst/suites/regression.suite b/tst/suites/regression.suite index 84b9b0c..1dd139d 100644 --- a/tst/suites/regression.suite +++ b/tst/suites/regression.suite @@ -14,4 +14,4 @@ # regression suite serial.suite -parallel.suite +#parallel.suite diff --git a/tst/suites/serial.suite b/tst/suites/serial.suite index c89ae0c..5823dab 100644 --- a/tst/suites/serial.suite +++ b/tst/suites/serial.suite @@ -14,9 +14,9 @@ # serial suite advection/advection -coords/blast -disk/disk +#coords/blast +#disk/disk hydro/linwave -ssheet/ssheet -diffusion/alpha_disk -drag/drag +#ssheet/ssheet +#diffusion/alpha_disk +#drag/drag From 3bebfb03fe726541d3f3fb8a878693f721645392 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 12 Nov 2024 12:03:52 -0700 Subject: [PATCH 36/56] Switch to cmake preset? --- .github/workflows/ci.yml | 2 +- temp_ci_script.slurm | 33 --------------------------------- 2 files changed, 1 insertion(+), 34 deletions(-) delete mode 100644 temp_ci_script.slurm diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4bdd6db..1b0a0b1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -70,7 +70,7 @@ jobs: cd tst mkdir -p build cd build - cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc ../../ + cmake --preset=cpu-release ../../ make -j 4 cd .. python3 run_tests.py regression.suite \ diff --git a/temp_ci_script.slurm b/temp_ci_script.slurm deleted file mode 100644 index efe8592..0000000 --- a/temp_ci_script.slurm +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -#SBATCH -A t24_ngpfc_g -#SBATCH --job-name=artemis_ci_chicoma_gpu_job -#SBATCH --output=ci_gpu_%j.out -#SBATCH --error=ci_gpu_%j.err -#SBATCH --time=00:10:00 -#SBATCH -N 1 -#SBATCH -p gpu -#SBATCH --qos=standard -#SBATCH -C gpu40 - -# Node options -#SBATCH --tasks-per-node=4 -#SBATCH --exclusive -#SBATCH --mem=0 - -# Set artemis path - -# Check github token -echo "Github token:" -echo $ARTEMIS_GITHUB_TOKEN - -# Load environment -source env/bash - -# Ensure GITHUB_TOKEN is set -#export GITHUB_TOKEN= # Or source it from a secure file - -module list - -# Run the CI Python script -#srun python3 ci_runner.py:w - From 88ff9bc8826169a833cd59dc8eb05e9ca5b9a409 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 12 Nov 2024 12:12:37 -0700 Subject: [PATCH 37/56] Is this sufficient --- CMakePresets.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CMakePresets.json b/CMakePresets.json index 0499c29..e0773fb 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -10,6 +10,7 @@ "name": "cpu-debug", "cacheVariables": { "CMAKE_MAKE_PROGRAM": "$env{MAKE_PROGRAM}", + "CMAKE_CXX_COMPILER": "g++", "CMAKE_BUILD_TYPE": "Debug", "Kokkos_ENABLE_DEBUG_BOUNDS_CHECK": "ON" } @@ -18,6 +19,7 @@ "name": "cpu-release", "cacheVariables": { "CMAKE_MAKE_PROGRAM": "$env{MAKE_PROGRAM}", + "CMAKE_CXX_COMPILER": "g++", "CMAKE_BUILD_TYPE": "RelWithDebInfo" } }, From f23eca785d81b057988cbadac8a0163a6eb62036 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 12 Nov 2024 12:31:05 -0700 Subject: [PATCH 38/56] OK also provide C compiler by default --- CMakePresets.json | 2 ++ tst/launch_ci_runner.py | 17 +++++++++-------- tst/suites/gpu.suite | 2 +- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/CMakePresets.json b/CMakePresets.json index e0773fb..46502c2 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -10,6 +10,7 @@ "name": "cpu-debug", "cacheVariables": { "CMAKE_MAKE_PROGRAM": "$env{MAKE_PROGRAM}", + "CMAKE_C_COMPILER": "gcc", "CMAKE_CXX_COMPILER": "g++", "CMAKE_BUILD_TYPE": "Debug", "Kokkos_ENABLE_DEBUG_BOUNDS_CHECK": "ON" @@ -19,6 +20,7 @@ "name": "cpu-release", "cacheVariables": { "CMAKE_MAKE_PROGRAM": "$env{MAKE_PROGRAM}", + "CMAKE_C_COMPILER": "gcc", "CMAKE_CXX_COMPILER": "g++", "CMAKE_BUILD_TYPE": "RelWithDebInfo" } diff --git a/tst/launch_ci_runner.py b/tst/launch_ci_runner.py index acea4c1..a9bf04b 100755 --- a/tst/launch_ci_runner.py +++ b/tst/launch_ci_runner.py @@ -159,28 +159,29 @@ def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): ) job_ids = squeue_result.stdout.strip().split() - print("Canceling jobs:") - for job_id in job_ids: - print(f" {job_id}") + if len(job_ids) >= 1: + print("Canceling jobs:") + for job_id in job_ids: + print(f" {job_id}") - # Use scancel to cancel the jobs - scancel_command = ["scancel"] + job_ids - scancel_result = subprocess.run(scancel_command, universal_newlines=True) + # Use scancel to cancel the jobs + scancel_command = ["scancel"] + job_ids + scancel_result = subprocess.run(scancel_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) sbatch_command = [ "sbatch", f"--job-name={job_name}", f"--output={job_name}_%j.out", - f"--error={job_name}_%j.err", + f"--error={job_name}_%j.out", "--partition=volta-x86", "--time=04:00:00", "--wrap", - # f"python3 ci_runner.py {pr_number}", f"python3 {sys.argv[0]} {args.pr_number} --submission", ] result = subprocess.run( sbatch_command, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, check=True, universal_newlines=True, ) diff --git a/tst/suites/gpu.suite b/tst/suites/gpu.suite index 93d32a1..9d18b97 100644 --- a/tst/suites/gpu.suite +++ b/tst/suites/gpu.suite @@ -19,4 +19,4 @@ binary/binary binary_adi/binary_adi nbody/nbody diffusion/viscous_diffusion -diffusion/thermal_diffusion +#diffusion/thermal_diffusion From ec7991d5227c27ccd873ae16476daa9a5378d151 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 12 Nov 2024 12:36:16 -0700 Subject: [PATCH 39/56] default to make for github CI --- CMakeLists.txt | 5 +++++ CMakePresets.json | 4 ---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 40c1035..5499327 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,6 +17,11 @@ project(artemis LANGUAGES C CXX) set(CMAKE_CXX_STANDARD 17) set(CMAKE_EXPORT_COMPILE_COMMANDS On) +# Default to make +if not (MAKE_PROGRAM) + find_program(MAKE_PROGRAM names make) +endif() + # Options option(ARTEMIS_ENABLE_CUDA "Enable cuda for artemis and all dependencies" OFF) option(ARTEMIS_ENABLE_HDF5 "Enable HDF5 for artemis and all dependencies" ON) diff --git a/CMakePresets.json b/CMakePresets.json index 46502c2..0499c29 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -10,8 +10,6 @@ "name": "cpu-debug", "cacheVariables": { "CMAKE_MAKE_PROGRAM": "$env{MAKE_PROGRAM}", - "CMAKE_C_COMPILER": "gcc", - "CMAKE_CXX_COMPILER": "g++", "CMAKE_BUILD_TYPE": "Debug", "Kokkos_ENABLE_DEBUG_BOUNDS_CHECK": "ON" } @@ -20,8 +18,6 @@ "name": "cpu-release", "cacheVariables": { "CMAKE_MAKE_PROGRAM": "$env{MAKE_PROGRAM}", - "CMAKE_C_COMPILER": "gcc", - "CMAKE_CXX_COMPILER": "g++", "CMAKE_BUILD_TYPE": "RelWithDebInfo" } }, From a985542e8ec274b12f235eb002f357777dafc945 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 12 Nov 2024 12:39:41 -0700 Subject: [PATCH 40/56] cmake syntax --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5499327..b1401e9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,7 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_EXPORT_COMPILE_COMMANDS On) # Default to make -if not (MAKE_PROGRAM) +if (NOT MAKE_PROGRAM) find_program(MAKE_PROGRAM names make) endif() From bd7ba7ccbec15becfd0cfafb362d23e9256e57a5 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 12 Nov 2024 12:44:16 -0700 Subject: [PATCH 41/56] ok we actually do need these compiler flags apparently --- CMakePresets.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CMakePresets.json b/CMakePresets.json index 0499c29..46502c2 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -10,6 +10,8 @@ "name": "cpu-debug", "cacheVariables": { "CMAKE_MAKE_PROGRAM": "$env{MAKE_PROGRAM}", + "CMAKE_C_COMPILER": "gcc", + "CMAKE_CXX_COMPILER": "g++", "CMAKE_BUILD_TYPE": "Debug", "Kokkos_ENABLE_DEBUG_BOUNDS_CHECK": "ON" } @@ -18,6 +20,8 @@ "name": "cpu-release", "cacheVariables": { "CMAKE_MAKE_PROGRAM": "$env{MAKE_PROGRAM}", + "CMAKE_C_COMPILER": "gcc", + "CMAKE_CXX_COMPILER": "g++", "CMAKE_BUILD_TYPE": "RelWithDebInfo" } }, From 31f726cd3e388963a34deb4129a529086a838480 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 12 Nov 2024 12:46:40 -0700 Subject: [PATCH 42/56] trying again... --- .github/workflows/ci.yml | 2 ++ .github/workflows/nightly.yml | 2 ++ CMakeLists.txt | 5 ----- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1b0a0b1..85ee4ad 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,6 +52,8 @@ jobs: ${{ !contains(github.event.pull_request.title, 'Draft:') && !contains(github.event.pull_request.title, 'WIP:') }} runs-on: ubuntu-latest + env: + MAKE_PROGRAM: ${{ env.MAKE_PROGRAM || 'make' }} steps: - uses: actions/checkout@v3 with: diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index ce8e05f..73b0173 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -22,6 +22,8 @@ on: jobs: cpu: runs-on: ubuntu-latest + env: + MAKE_PROGRAM: ${{ env.MAKE_PROGRAM || 'make' }} steps: - uses: actions/checkout@v3 with: diff --git a/CMakeLists.txt b/CMakeLists.txt index b1401e9..40c1035 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,11 +17,6 @@ project(artemis LANGUAGES C CXX) set(CMAKE_CXX_STANDARD 17) set(CMAKE_EXPORT_COMPILE_COMMANDS On) -# Default to make -if (NOT MAKE_PROGRAM) - find_program(MAKE_PROGRAM names make) -endif() - # Options option(ARTEMIS_ENABLE_CUDA "Enable cuda for artemis and all dependencies" OFF) option(ARTEMIS_ENABLE_HDF5 "Enable HDF5 for artemis and all dependencies" ON) From 2ecaedbbc3f03174eba84338d431ebdec290b717 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 12 Nov 2024 14:55:02 -0700 Subject: [PATCH 43/56] Did env break the CI? --- .github/workflows/ci.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 85ee4ad..f387b4f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,8 +52,6 @@ jobs: ${{ !contains(github.event.pull_request.title, 'Draft:') && !contains(github.event.pull_request.title, 'WIP:') }} runs-on: ubuntu-latest - env: - MAKE_PROGRAM: ${{ env.MAKE_PROGRAM || 'make' }} steps: - uses: actions/checkout@v3 with: @@ -69,6 +67,7 @@ jobs: sudo apt-get install -qq python3 python3-numpy python3-h5py python3-matplotlib - name: Run CPU tests run: | + export MAKE_PROGRAM=make cd tst mkdir -p build cd build From 3a89e1dc4dd56103ddf1f04d42d13fee87a31bdc Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 12 Nov 2024 15:08:13 -0700 Subject: [PATCH 44/56] annoying nonetype issue --- tst/run_tests.py | 4 ++-- tst/scripts/utils/artemis.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tst/run_tests.py b/tst/run_tests.py index 23be39b..8c5e825 100755 --- a/tst/run_tests.py +++ b/tst/run_tests.py @@ -272,9 +272,9 @@ def log_init(args): c_handler.setFormatter(logging.Formatter("%(message)s")) # only show msg logger.addHandler(c_handler) # setup log_file - log_fn = os.path.join(artemis.artemis_log_dir, kwargs.pop("log_file")) + log_fn = kwargs.pop("log_file") if log_fn: - f_handler = logging.FileHandler(log_fn) + f_handler = logging.FileHandler(os.path.join(artemis.artemis_log_dir, log_fn)) f_handler.setLevel(0) # log everything f_format = logging.Formatter( "%(asctime)s|%(levelname)s" ":%(name)s: %(message)s" diff --git a/tst/scripts/utils/artemis.py b/tst/scripts/utils/artemis.py index 1289368..d2240f4 100644 --- a/tst/scripts/utils/artemis.py +++ b/tst/scripts/utils/artemis.py @@ -25,7 +25,7 @@ from .log_pipe import LogPipe # Global variables -# current_dir = os.getcwd() +current_dir = os.getcwd() artemis_dir = os.path.abspath( os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..") ) From 0426be3dac9c94a67d0c9681f832951c2ee02b06 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 12 Nov 2024 15:39:31 -0700 Subject: [PATCH 45/56] Trying with two ranks... --- tst/scripts/advection/advection_mpi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tst/scripts/advection/advection_mpi.py b/tst/scripts/advection/advection_mpi.py index 0e66c17..bed8976 100644 --- a/tst/scripts/advection/advection_mpi.py +++ b/tst/scripts/advection/advection_mpi.py @@ -24,7 +24,7 @@ logger = logging.getLogger("artemis" + __name__[7:]) # set logger name -advection._nranks = 4 +advection._nranks = 2 advection._file_id = "advection_mpi" From 0e3b88f91db1e2fd94e14d5440867f1557f83f94 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 12 Nov 2024 16:10:28 -0700 Subject: [PATCH 46/56] OK explicitly turn off oversubscribe and see if that fixes it --- tst/launch_ci_runner.py | 7 ++++++- tst/scripts/utils/artemis.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tst/launch_ci_runner.py b/tst/launch_ci_runner.py index a9bf04b..2c47b40 100755 --- a/tst/launch_ci_runner.py +++ b/tst/launch_ci_runner.py @@ -166,7 +166,12 @@ def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): # Use scancel to cancel the jobs scancel_command = ["scancel"] + job_ids - scancel_result = subprocess.run(scancel_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) + scancel_result = subprocess.run( + scancel_command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + ) sbatch_command = [ "sbatch", diff --git a/tst/scripts/utils/artemis.py b/tst/scripts/utils/artemis.py index d2240f4..9bc5bbd 100644 --- a/tst/scripts/utils/artemis.py +++ b/tst/scripts/utils/artemis.py @@ -88,7 +88,7 @@ def run(nproc, input_filename, arguments, restart=None): out_log = LogPipe("artemis.run", logging.INFO) # Build the run command - run_command = ["mpiexec", "--oversubscribe", "-n", str(nproc), artemis_executable] + run_command = ["mpiexec", "-n", str(nproc), artemis_executable] if restart is not None: run_command += ["-r", restart] input_filename_full = os.path.join(artemis_inputs_dir, input_filename) From cb3387bff4c3ddbc99965c347a106c52b7199721 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 12 Nov 2024 16:34:48 -0700 Subject: [PATCH 47/56] Another try... sigh --- tst/scripts/advection/advection.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tst/scripts/advection/advection.py b/tst/scripts/advection/advection.py index 2d6436a..13de141 100644 --- a/tst/scripts/advection/advection.py +++ b/tst/scripts/advection/advection.py @@ -88,6 +88,11 @@ def analyze(): if history.shape != (44, 18): analyze_status = False history_line = history[-1] + # DEBUG + print("OK what is in this file") + with open(os.path.join(artemis.get_run_directory(), _file_id + ".out0.hst"), 'r') as file: + contents = file.read() + print(contents) def history_equiv(a, b, tol=1.0e-4): if 2.0 * (np.fabs(a - b)) / (np.fabs(a) + np.fabs(b)) > tol: From 91591ebd307a7cf15803db142451f5176f07c5e1 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 12 Nov 2024 20:07:05 -0700 Subject: [PATCH 48/56] Merge into develop, fix advection, back to oversubscribe --- tst/launch_ci_runner.py | 3 +- tst/scripts/advection/advection.py | 78 +++++++++++++------------- tst/scripts/advection/advection_mpi.py | 2 +- tst/scripts/utils/artemis.py | 2 +- tst/suites/regression.suite | 2 +- tst/suites/serial.suite | 10 ++-- 6 files changed, 47 insertions(+), 50 deletions(-) diff --git a/tst/launch_ci_runner.py b/tst/launch_ci_runner.py index 2c47b40..9d7d96f 100755 --- a/tst/launch_ci_runner.py +++ b/tst/launch_ci_runner.py @@ -67,8 +67,7 @@ def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): os.chdir(temp_dir) # Checkout the PR branch - subprocess.run(["git", "fetch", "origin", head_ref], check=True) - subprocess.run(["git", "checkout", head_ref], check=True) + subprocess.run(["git", "pull", "--no-rebase", "origin", head_ref], check=True) # Update submodules subprocess.run( diff --git a/tst/scripts/advection/advection.py b/tst/scripts/advection/advection.py index 13de141..43c9d6d 100644 --- a/tst/scripts/advection/advection.py +++ b/tst/scripts/advection/advection.py @@ -20,6 +20,9 @@ import numpy as np import os import scripts.utils.artemis as artemis +import sys +sys.path.append(os.path.join(artemis.artemis_dir, "analysis")) +from ahistory import ahistory logger = logging.getLogger("artemis" + __name__[7:]) # set logger name @@ -77,22 +80,12 @@ def analyze(): dtype=np.float64, ndmin=2, ) - history = np.loadtxt( - os.path.join(artemis.get_run_directory(), _file_id + ".out0.hst") - ) + history = ahistory(os.path.join(artemis.get_run_directory(), _file_id + ".out0.hst")) analyze_status = True - if np.isnan(data).any() or np.isnan(history).any(): + if np.isnan(data).any(): logger.warning("NaN encountered") analyze_status = False raise FloatingPointError("NaN encountered") - if history.shape != (44, 18): - analyze_status = False - history_line = history[-1] - # DEBUG - print("OK what is in this file") - with open(os.path.join(artemis.get_run_directory(), _file_id + ".out0.hst"), 'r') as file: - contents = file.read() - print(contents) def history_equiv(a, b, tol=1.0e-4): if 2.0 * (np.fabs(a - b)) / (np.fabs(a) + np.fabs(b)) > tol: @@ -100,37 +93,42 @@ def history_equiv(a, b, tol=1.0e-4): else: return True - history_expected = [ - 1.00000e00, - 1.11612e-02, - 5.60000e01, - 1.60000e01, - 6.75000e00, - 2.25000e00, - 4.50000e00, - 4.50000e00, - 9.45000e00, - 6.07500e00, - 6.75000e00, - 6.75000e00, - 2.25000e00, - -2.25000e00, - 4.50000e00, - -4.50000e00, - 4.50000e00, - -4.50000e00, - ] - if len(history_line) != len(history_expected): - print( - f"Number of history rows ({len(history_line)}) do not equal expectation ({len(history_expected)})!" - ) - analyze_status = False - for n, val in enumerate(history_expected): - if not history_equiv(history_line[n], val): + history_expected = { + "time": 1.0, + "dt": 1.11612e-02, + "cycle": 56, + "nbtotal": 16, + "gas_mass_0": 6.75, + "gas_momentum_x1_0": 2.25, + "gas_momentum_x2_0": 4.5, + "gas_momentum_x3_0": 4.5, + "gas_energy_0": 9.45, + "gas_internal_energy_0": 6.075, + "dust_mass_0": 6.75, + "dust_mass_1": 6.75, + "dust_momentum_x1_0": 2.25, + "dust_momentum_x1_1": -2.25, + "dust_momentum_x2_0": 4.5, + "dust_momentum_x2_1": -4.5, + "dust_momentum_x3_0": 4.5, + "dust_momentum_x3_1": -4.5 + } + + for key in history_expected.keys(): + values = history.Get(key) + if len(values) != 11: + analyze_status=False + for value in values: + if np.isnan(value): + logger.warning("NaN encountered") + analyze_status = False + raise FloatingPointError("NaN encountered") + if not history_equiv(values[-1], history_expected[key]): print( - f"History entry {n} = {history_line[n]} does not match expectation = {val}!" + f"History entry {key} = {values[-1]} does not match expectation = {history_expected[key]}!" ) analyze_status = False + data = data.reshape([len(_int), len(_recon), len(_flux), 2, data.shape[-1]]) for ii, iv in enumerate(_int): for ri, rv in enumerate(_recon): diff --git a/tst/scripts/advection/advection_mpi.py b/tst/scripts/advection/advection_mpi.py index bed8976..0e66c17 100644 --- a/tst/scripts/advection/advection_mpi.py +++ b/tst/scripts/advection/advection_mpi.py @@ -24,7 +24,7 @@ logger = logging.getLogger("artemis" + __name__[7:]) # set logger name -advection._nranks = 2 +advection._nranks = 4 advection._file_id = "advection_mpi" diff --git a/tst/scripts/utils/artemis.py b/tst/scripts/utils/artemis.py index 9bc5bbd..d2240f4 100644 --- a/tst/scripts/utils/artemis.py +++ b/tst/scripts/utils/artemis.py @@ -88,7 +88,7 @@ def run(nproc, input_filename, arguments, restart=None): out_log = LogPipe("artemis.run", logging.INFO) # Build the run command - run_command = ["mpiexec", "-n", str(nproc), artemis_executable] + run_command = ["mpiexec", "--oversubscribe", "-n", str(nproc), artemis_executable] if restart is not None: run_command += ["-r", restart] input_filename_full = os.path.join(artemis_inputs_dir, input_filename) diff --git a/tst/suites/regression.suite b/tst/suites/regression.suite index 1dd139d..84b9b0c 100644 --- a/tst/suites/regression.suite +++ b/tst/suites/regression.suite @@ -14,4 +14,4 @@ # regression suite serial.suite -#parallel.suite +parallel.suite diff --git a/tst/suites/serial.suite b/tst/suites/serial.suite index 5823dab..c89ae0c 100644 --- a/tst/suites/serial.suite +++ b/tst/suites/serial.suite @@ -14,9 +14,9 @@ # serial suite advection/advection -#coords/blast -#disk/disk +coords/blast +disk/disk hydro/linwave -#ssheet/ssheet -#diffusion/alpha_disk -#drag/drag +ssheet/ssheet +diffusion/alpha_disk +drag/drag From 49d8ac96be3951aeafbf2b32f54c453216d86995 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Tue, 12 Nov 2024 21:13:37 -0700 Subject: [PATCH 49/56] logfile issue --- tst/launch_ci_runner.py | 5 ++-- tst/scripts/advection/advection.py | 43 ++++++++++++++++-------------- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/tst/launch_ci_runner.py b/tst/launch_ci_runner.py index 9d7d96f..95f77c2 100755 --- a/tst/launch_ci_runner.py +++ b/tst/launch_ci_runner.py @@ -88,8 +88,9 @@ def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): + " -j 20 -f && cd " + os.path.join(temp_dir, "tst") + " && python3 run_tests.py gpu.suite " - "--exe " + os.path.join(build_dir, "src", "artemis") + " " - "--log_file=ci_cpu_log.txt", + + "--exe " + + os.path.join(build_dir, "src", "artemis") + + " --log_file=ci_cpu_log.txt", ] subprocess.run(test_command, check=True) diff --git a/tst/scripts/advection/advection.py b/tst/scripts/advection/advection.py index 43c9d6d..6578187 100644 --- a/tst/scripts/advection/advection.py +++ b/tst/scripts/advection/advection.py @@ -21,6 +21,7 @@ import os import scripts.utils.artemis as artemis import sys + sys.path.append(os.path.join(artemis.artemis_dir, "analysis")) from ahistory import ahistory @@ -80,7 +81,9 @@ def analyze(): dtype=np.float64, ndmin=2, ) - history = ahistory(os.path.join(artemis.get_run_directory(), _file_id + ".out0.hst")) + history = ahistory( + os.path.join(artemis.get_run_directory(), _file_id + ".out0.hst") + ) analyze_status = True if np.isnan(data).any(): logger.warning("NaN encountered") @@ -94,30 +97,30 @@ def history_equiv(a, b, tol=1.0e-4): return True history_expected = { - "time": 1.0, - "dt": 1.11612e-02, - "cycle": 56, - "nbtotal": 16, - "gas_mass_0": 6.75, - "gas_momentum_x1_0": 2.25, - "gas_momentum_x2_0": 4.5, - "gas_momentum_x3_0": 4.5, - "gas_energy_0": 9.45, - "gas_internal_energy_0": 6.075, - "dust_mass_0": 6.75, - "dust_mass_1": 6.75, - "dust_momentum_x1_0": 2.25, - "dust_momentum_x1_1": -2.25, - "dust_momentum_x2_0": 4.5, - "dust_momentum_x2_1": -4.5, - "dust_momentum_x3_0": 4.5, - "dust_momentum_x3_1": -4.5 + "time": 1.0, + "dt": 1.11612e-02, + "cycle": 56, + "nbtotal": 16, + "gas_mass_0": 6.75, + "gas_momentum_x1_0": 2.25, + "gas_momentum_x2_0": 4.5, + "gas_momentum_x3_0": 4.5, + "gas_energy_0": 9.45, + "gas_internal_energy_0": 6.075, + "dust_mass_0": 6.75, + "dust_mass_1": 6.75, + "dust_momentum_x1_0": 2.25, + "dust_momentum_x1_1": -2.25, + "dust_momentum_x2_0": 4.5, + "dust_momentum_x2_1": -4.5, + "dust_momentum_x3_0": 4.5, + "dust_momentum_x3_1": -4.5, } for key in history_expected.keys(): values = history.Get(key) if len(values) != 11: - analyze_status=False + analyze_status = False for value in values: if np.isnan(value): logger.warning("NaN encountered") From b08b2cc79739cce06b0b4ddc972f9e4dc884fbb2 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 13 Nov 2024 08:43:08 -0700 Subject: [PATCH 50/56] Added run_tests behaviors --- .github/workflows/ci.yml | 2 +- .github/workflows/nightly.yml | 2 +- tst/run_tests.py | 12 +++++++++--- tst/scripts/advection/advection.py | 8 ++++++-- tst/scripts/utils/artemis.py | 26 +++++++++++++++++++------- 5 files changed, 36 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f387b4f..fb593f8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -75,7 +75,7 @@ jobs: make -j 4 cd .. python3 run_tests.py regression.suite \ - --exe build/src/artemis + --exe build/src/artemis \ --log_file=ci_cpu_log.txt - name: Upload CPU test log if: always() diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 73b0173..7609d8f 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -52,7 +52,7 @@ jobs: make -j 4 cd .. python3 run_tests.py regression.suite \ - --exe build/src/artemis + --exe build/src/artemis \ --log_file=ci_cpu_log.txt - name: Upload CPU test log if: always() diff --git a/tst/run_tests.py b/tst/run_tests.py index 8c5e825..1cdf993 100755 --- a/tst/run_tests.py +++ b/tst/run_tests.py @@ -159,9 +159,7 @@ def main(**kwargs): if not deps_installed: logger.warning("WARNING! Not all required Python modules " "are available") - # Set the executable path if provided if artemis_exe_path is not None: - artemis.artemis_executable = os.path.abspath(artemis_exe_path) # Check that path is valid if not ( os.path.exists(artemis.artemis_executable) @@ -170,9 +168,17 @@ def main(**kwargs): logger.error("Exception occurred", exc_info=True) test_errors.append("make()") raise TestError('Provided executable "{artemis_exe_path}" not found!') + # Set the valid provided executable path + artemis.set_executable(os.path.abspath(artemis_exe_path)) + else: + # If we are in a directory with an executable, default to using that + local_path = os.path.join(os.getcwd(), "artemis") + if os.path.exists(local_path) and os.access(local_path, os.X_OK): + print(f"Found local executable {local_path}") + artemis.set_executable(local_path) # Build Artemis - if artemis_exe_path is None and not kwargs.pop("reuse_build"): + if not artemis.custom_exe and not kwargs.pop("reuse_build"): try: os.system("rm -rf {0}/build".format(current_dir)) # insert arguments for artemis.make() diff --git a/tst/scripts/advection/advection.py b/tst/scripts/advection/advection.py index 6578187..c97d0d7 100644 --- a/tst/scripts/advection/advection.py +++ b/tst/scripts/advection/advection.py @@ -76,14 +76,18 @@ def analyze(): # error convergence rates, and error identicality between L- and R-going # advection. logger.debug("Analyzing test " + __name__) + err_path = os.path.join(artemis.get_run_directory(), _file_id + "-errs.dat") data = np.loadtxt( - os.path.join(artemis.get_run_directory(), _file_id + "-errs.dat"), + err_path, dtype=np.float64, ndmin=2, ) + hist_path = os.path.join(artemis.get_run_directory(), _file_id + ".out0.hst") history = ahistory( - os.path.join(artemis.get_run_directory(), _file_id + ".out0.hst") + hist_path ) + os.system(f"rm {err_path}") + os.system(f"rm {hist_path}") analyze_status = True if np.isnan(data).any(): logger.warning("NaN encountered") diff --git a/tst/scripts/utils/artemis.py b/tst/scripts/utils/artemis.py index d2240f4..73f6ae9 100644 --- a/tst/scripts/utils/artemis.py +++ b/tst/scripts/utils/artemis.py @@ -32,19 +32,29 @@ artemis_executable = os.path.join(artemis_dir, "tst", "build", "src", "artemis") artemis_inputs_dir = os.path.join(artemis_dir, "inputs") # artemis_fig_dir = "./figs/" +artemis_run_dir = os.path.join(artemis_dir, "tst", "build", "src", "tst") artemis_fig_dir = os.path.join(artemis_dir, "tst", "figs") artemis_log_dir = os.path.join(artemis_dir, "tst") +custom_exe = False # Create run directory for this invocation of the test framework -now = datetime.datetime.now() -run_directory_name = "tests_run_{0:%Y%m%d_%H%M%S}".format(now) -run_directory = os.path.join(artemis_dir, "tst", run_directory_name) -os.makedirs(run_directory, exist_ok=True) +#now = datetime.datetime.now() +#run_directory_name = "tests_run_{0:%Y%m%d_%H%M%S}".format(now) +#run_directory = os.path.join(artemis_dir, "tst", run_directory_name) +#os.makedirs(run_directory, exist_ok=True) + +def set_executable(executable_path): + global artemis_executable + global artemis_run_dir + global custom_exe + artemis_executable = executable_path + artemis_run_dir = os.path.join(os.path.dirname(artemis_executable), 'tst') + custom_exe = True # Function for returning the path to the run directory for this set of tests def get_run_directory(): - return run_directory + return artemis_run_dir # Provide base directory of artemis source tree @@ -84,8 +94,9 @@ def make(cmake_args, make_nproc): # Function for running Artemis (with MPI) def run(nproc, input_filename, arguments, restart=None): - global run_directory + #global run_directory out_log = LogPipe("artemis.run", logging.INFO) + os.makedirs(artemis_run_dir, exist_ok=True) # Build the run command run_command = ["mpiexec", "--oversubscribe", "-n", str(nproc), artemis_executable] @@ -95,7 +106,8 @@ def run(nproc, input_filename, arguments, restart=None): run_command += ["-i", input_filename_full] try: - os.chdir(run_directory) + #os.chdir(run_directory) + os.chdir(artemis_run_dir) cmd = run_command + arguments logging.getLogger("artemis.run").debug("Executing: " + " ".join(cmd)) subprocess.check_call(cmd, stdout=out_log) From 7e406d0ab7e39265e4c8fac949a55f84cefce3f7 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 13 Nov 2024 08:52:25 -0700 Subject: [PATCH 51/56] Formatting --- tst/scripts/advection/advection.py | 4 +--- tst/scripts/utils/artemis.py | 15 ++++++++------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/tst/scripts/advection/advection.py b/tst/scripts/advection/advection.py index c97d0d7..608892f 100644 --- a/tst/scripts/advection/advection.py +++ b/tst/scripts/advection/advection.py @@ -83,9 +83,7 @@ def analyze(): ndmin=2, ) hist_path = os.path.join(artemis.get_run_directory(), _file_id + ".out0.hst") - history = ahistory( - hist_path - ) + history = ahistory(hist_path) os.system(f"rm {err_path}") os.system(f"rm {hist_path}") analyze_status = True diff --git a/tst/scripts/utils/artemis.py b/tst/scripts/utils/artemis.py index 73f6ae9..d20cdde 100644 --- a/tst/scripts/utils/artemis.py +++ b/tst/scripts/utils/artemis.py @@ -38,17 +38,18 @@ custom_exe = False # Create run directory for this invocation of the test framework -#now = datetime.datetime.now() -#run_directory_name = "tests_run_{0:%Y%m%d_%H%M%S}".format(now) -#run_directory = os.path.join(artemis_dir, "tst", run_directory_name) -#os.makedirs(run_directory, exist_ok=True) +# now = datetime.datetime.now() +# run_directory_name = "tests_run_{0:%Y%m%d_%H%M%S}".format(now) +# run_directory = os.path.join(artemis_dir, "tst", run_directory_name) +# os.makedirs(run_directory, exist_ok=True) + def set_executable(executable_path): global artemis_executable global artemis_run_dir global custom_exe artemis_executable = executable_path - artemis_run_dir = os.path.join(os.path.dirname(artemis_executable), 'tst') + artemis_run_dir = os.path.join(os.path.dirname(artemis_executable), "tst") custom_exe = True @@ -94,7 +95,7 @@ def make(cmake_args, make_nproc): # Function for running Artemis (with MPI) def run(nproc, input_filename, arguments, restart=None): - #global run_directory + # global run_directory out_log = LogPipe("artemis.run", logging.INFO) os.makedirs(artemis_run_dir, exist_ok=True) @@ -106,7 +107,7 @@ def run(nproc, input_filename, arguments, restart=None): run_command += ["-i", input_filename_full] try: - #os.chdir(run_directory) + # os.chdir(run_directory) os.chdir(artemis_run_dir) cmd = run_command + arguments logging.getLogger("artemis.run").debug("Executing: " + " ".join(cmd)) From 2b4351836aca4466c7b66d733bd831a9d3791e79 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 13 Nov 2024 10:41:36 -0700 Subject: [PATCH 52/56] Need to commit this to repo to test --- external/parthenon | 2 +- tst/launch_ci_runner.py | 6 +++++- tst/run_tests.py | 3 ++- tst/scripts/utils/artemis.py | 8 ++------ 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/external/parthenon b/external/parthenon index 79d5d30..0968c3e 160000 --- a/external/parthenon +++ b/external/parthenon @@ -1 +1 @@ -Subproject commit 79d5d301068cf34603c5194a796cdcbd3a134dae +Subproject commit 0968c3ea8c4e0877041b2c5ee154fa8c15c5ec92 diff --git a/tst/launch_ci_runner.py b/tst/launch_ci_runner.py index 95f77c2..535a7ab 100755 --- a/tst/launch_ci_runner.py +++ b/tst/launch_ci_runner.py @@ -92,7 +92,10 @@ def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): + os.path.join(build_dir, "src", "artemis") + " --log_file=ci_cpu_log.txt", ] - subprocess.run(test_command, check=True) + print("tst cmd:\n") + print(test_command) + ret = subprocess.run(test_command, + check=True) # CI apparently succeeded; indicate that return True @@ -129,6 +132,7 @@ def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): test_success = run_tests_in_temp_dir( args.pr_number, head_repo, head_ref, commit_sha ) + printf("now here") # Update github PR status to indicate that testing has concluded if test_success: diff --git a/tst/run_tests.py b/tst/run_tests.py index 1cdf993..b65e136 100755 --- a/tst/run_tests.py +++ b/tst/run_tests.py @@ -160,6 +160,7 @@ def main(**kwargs): logger.warning("WARNING! Not all required Python modules " "are available") if artemis_exe_path is not None: + print(f'path: {artemis_exe_path}') # Check that path is valid if not ( os.path.exists(artemis.artemis_executable) @@ -167,7 +168,7 @@ def main(**kwargs): ): logger.error("Exception occurred", exc_info=True) test_errors.append("make()") - raise TestError('Provided executable "{artemis_exe_path}" not found!') + raise TestError(f'Provided executable \"{artemis_exe_path}\" not found!') # Set the valid provided executable path artemis.set_executable(os.path.abspath(artemis_exe_path)) else: diff --git a/tst/scripts/utils/artemis.py b/tst/scripts/utils/artemis.py index d20cdde..2e90e16 100644 --- a/tst/scripts/utils/artemis.py +++ b/tst/scripts/utils/artemis.py @@ -37,13 +37,9 @@ artemis_log_dir = os.path.join(artemis_dir, "tst") custom_exe = False -# Create run directory for this invocation of the test framework -# now = datetime.datetime.now() -# run_directory_name = "tests_run_{0:%Y%m%d_%H%M%S}".format(now) -# run_directory = os.path.join(artemis_dir, "tst", run_directory_name) -# os.makedirs(run_directory, exist_ok=True) - +# Optionally set custom path for executable, and update other variables related to where +# we run the code def set_executable(executable_path): global artemis_executable global artemis_run_dir From c59bca2a47b950e38f93da25c3df6d158876b001 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 13 Nov 2024 11:10:25 -0700 Subject: [PATCH 53/56] Oops found the bug --- tst/launch_ci_runner.py | 3 --- tst/run_tests.py | 5 ++--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/tst/launch_ci_runner.py b/tst/launch_ci_runner.py index 535a7ab..c139cf3 100755 --- a/tst/launch_ci_runner.py +++ b/tst/launch_ci_runner.py @@ -92,8 +92,6 @@ def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): + os.path.join(build_dir, "src", "artemis") + " --log_file=ci_cpu_log.txt", ] - print("tst cmd:\n") - print(test_command) ret = subprocess.run(test_command, check=True) @@ -132,7 +130,6 @@ def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): test_success = run_tests_in_temp_dir( args.pr_number, head_repo, head_ref, commit_sha ) - printf("now here") # Update github PR status to indicate that testing has concluded if test_success: diff --git a/tst/run_tests.py b/tst/run_tests.py index b65e136..9787775 100755 --- a/tst/run_tests.py +++ b/tst/run_tests.py @@ -160,11 +160,10 @@ def main(**kwargs): logger.warning("WARNING! Not all required Python modules " "are available") if artemis_exe_path is not None: - print(f'path: {artemis_exe_path}') # Check that path is valid if not ( - os.path.exists(artemis.artemis_executable) - and os.access(artemis.artemis_executable, os.X_OK) + os.path.exists(artemis_exe_path) + and os.access(artemis_exe_path, os.X_OK) ): logger.error("Exception occurred", exc_info=True) test_errors.append("make()") From 16545ffffd3e50ab9df6b8508335895148a90e9f Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 13 Nov 2024 11:12:21 -0700 Subject: [PATCH 54/56] Better gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 71ab273..c36f8b0 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,10 @@ debug* doc/_build/* doc/src/parameters.rst +# CI files +artemis_ci_*.out +tst/figs + # Visualization package files .smhist From ab4e8001682a5329db537e25f978babee7c2e4b4 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 13 Nov 2024 11:17:10 -0700 Subject: [PATCH 55/56] revert parthenon --- external/parthenon | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/parthenon b/external/parthenon index 0968c3e..79d5d30 160000 --- a/external/parthenon +++ b/external/parthenon @@ -1 +1 @@ -Subproject commit 0968c3ea8c4e0877041b2c5ee154fa8c15c5ec92 +Subproject commit 79d5d301068cf34603c5194a796cdcbd3a134dae From b44c50ca88e98d3931a08d4a653a311fdda27038 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 13 Nov 2024 19:40:01 -0700 Subject: [PATCH 56/56] Formatting and update readme --- README.md | 30 ++++++++++++++++++++++++++++-- tst/launch_ci_runner.py | 3 +-- tst/run_tests.py | 2 +- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 5b2a477..727e229 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,16 @@ Currently supported computers/partitions are: skylake-gold volta-x86 (gpu) +## Chicoma + + cpu + gpu + +## Venado + + gg (cpu) + gh (gpu) + # Installation git submodule update --init --recursive @@ -82,7 +92,23 @@ script: ## Testing -There is a suite of tests in the `tst/` directory. To run the full regression suite, do +There is a suite of tests in the `tst/` directory. Tests are run with the included `run_tests.py` +script. This script can be run in three ways: + +1. With default arguments, where the current version of the source will be built. The resulting +executable can be saved for reuse with `--save_build`, and if saved can be reused in subsequent test +runs with `--reuse_build`. Note that `--save_build` must continue to be supplied as well to avoid +the reused build being deleted after the tests are run. +2. If the `run_tests.py` script is called from a directory with a valid `artemis` executable, that +executable will be used for testing and will not be cleaned up afterwards. +3. If the path to an `artemis` executable is provided to the `--exe` option of `run_tests.py`, that +executable will be used for testing and will not be cleaned up afterwards. + +In all cases, the tests will be run from a `tst` directory created in the same folder as the +executable being used. Figures will be created in `artemis/tst/figs` and the log file in +`artemis/tst`. + +To run the full regression suite, do python3 run_tests.py regression.suite @@ -90,7 +116,7 @@ You can also pass a list of individual tests to the script, or create your own s ## CI -We use the gitlab CI for regression testing. The CI will not run if the PR is marked "Draft:" or +We use the github CI for regression testing. The CI will not run if the PR is marked "Draft:" or "WIP:". Removing these labels from the title will not automatically launch the CI. To launch the CI with an empty commit, do diff --git a/tst/launch_ci_runner.py b/tst/launch_ci_runner.py index c139cf3..d9ac746 100755 --- a/tst/launch_ci_runner.py +++ b/tst/launch_ci_runner.py @@ -92,8 +92,7 @@ def run_tests_in_temp_dir(pr_number, head_repo, head_ref, commit_sha): + os.path.join(build_dir, "src", "artemis") + " --log_file=ci_cpu_log.txt", ] - ret = subprocess.run(test_command, - check=True) + ret = subprocess.run(test_command, check=True) # CI apparently succeeded; indicate that return True diff --git a/tst/run_tests.py b/tst/run_tests.py index 9787775..5888117 100755 --- a/tst/run_tests.py +++ b/tst/run_tests.py @@ -167,7 +167,7 @@ def main(**kwargs): ): logger.error("Exception occurred", exc_info=True) test_errors.append("make()") - raise TestError(f'Provided executable \"{artemis_exe_path}\" not found!') + raise TestError(f'Provided executable "{artemis_exe_path}" not found!') # Set the valid provided executable path artemis.set_executable(os.path.abspath(artemis_exe_path)) else: