Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates to CI #359

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/.parallelworks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# .parallelworks Directory

The .parallelworks directory stores the CI scripts that reside on Parallelworks
These scripts are executed via the GitHub Actions Workflows in .github/workflows

On Parallelworks these scripts are installed at: /contrib/fv3/GFDL_atmos_cubed_sphere_CI
59 changes: 59 additions & 0 deletions .github/.parallelworks/checkout.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/bin/sh -xe

##############################################################################
## User set up variables
## Root directory for CI
dirRoot=/contrib/fv3
## Intel version to be used
intelVersion=2023.2.0
##############################################################################
## HPC-ME container
container=/contrib/containers/noaa-intel-prototype_2023.09.25.sif
container_env_script=/contrib/containers/load_spack_noaa-intel.sh
##############################################################################

#Parse Arguments
branch=main
commit=none
while [[ $# -gt 0 ]]; do
case $1 in
-b|--branch)
branch="$2"
shift # past argument
shift # past value
;;
-h|--hash)
commit="$2"
shift # past argument
shift # past value
;;
*)
echo "unknown argument"
exit 1
;;
esac
done

echo "branch is $branch"
echo "commit is $commit"

## Set up the directories
testDir=${dirRoot}/${intelVersion}/GFDL_atmos_cubed_sphere/${branch}/${commit}
logDir=${testDir}/log
export MODULESHOME=/usr/share/lmod/lmod
## create directories
rm -rf ${testDir}
mkdir -p ${logDir}
# salloc commands to start up
#2 tests layout 8,8 (16 nodes)
#2 tests layout 4,8 (8 nodes)
#9 tests layout 4,4 (18 nodes)
#5 tests layout 4,1 (5 nodes)
#17 tests layout 2,2 (17 nodes)
#salloc --partition=p2 -N 64 -J ${branch} sleep 20m &

## clone code
cd ${testDir}
git clone --recursive https://github.com/NOAA-GFDL/SHiELD_build.git && cd SHiELD_build && ./CHECKOUT_code |& tee ${logDir}/checkout.log
## Check out the PR
cd ${testDir}/SHiELD_SRC/GFDL_atmos_cubed_sphere && git fetch origin ${branch}:toMerge && git merge toMerge
84 changes: 84 additions & 0 deletions .github/.parallelworks/compile.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/bin/sh -xe

##############################################################################
## User set up variables
## Root directory for CI
dirRoot=/contrib/fv3
## Intel version to be used
intelVersion=2023.2.0
##############################################################################
## HPC-ME container
container=/contrib/containers/noaa-intel-prototype_2023.09.25.sif
container_env_script=/contrib/containers/load_spack_noaa-intel.sh
##############################################################################

#Parse Arguments
branch=main
commit=none
while [[ $# -gt 0 ]]; do
case $1 in
-b|--branch)
branch="$2"
shift # past argument
shift # past value
;;
-h|--hash)
commit="$2"
shift # past argument
shift # past value
;;
-c|--config)
config="$2"
shift # past argument
shift # past value
;;
--hydro)
hydro="$2"
shift # past argument
shift # past value
;;
--bit)
bit="$2"
shift # past argument
shift # past value
;;
-m|--mode)
mode="$2"
shift # past argument
shift # past value
;;
*)
echo "unknown argument"
exit 1
;;
esac
done

if [ -z $mode ] || [ -z $bit ] || [ -z $hydro ] || [ -z $config ]
then
echo "must specify config, hydro, bit, and mode options for compile"
exit 1
fi

echo "branch is $branch"
echo "commit is $commit"
echo "mode is $mode"
echo "bit is $bit"
echo "hydro is $hydro"
echo "config is $config"

if [ $hydro = "sw" ] && [ $config = "shield" ]
then
echo "this combination should not be tested"
else
## Set up the directories
testDir=${dirRoot}/${intelVersion}/GFDL_atmos_cubed_sphere/${branch}/${commit}
logDir=${testDir}/log
# Set up build
cd ${testDir}/SHiELD_build/Build
#Define External Libs path
export EXTERNAL_LIBS=${dirRoot}/externallibs
# Build SHiELD
set -o pipefail
singularity exec -B /contrib ${container} ${container_env_script} "./COMPILE ${config} ${hydro} ${bit} ${mode} intel clean" |& tee ${logDir}/compile_${config}_${hydro}_${bit}_${mode}_intel.out
fi
79 changes: 79 additions & 0 deletions .github/.parallelworks/run_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/bin/bash -xe
ulimit -s unlimited
##############################################################################
## User set up variables
## Root directory for CI
dirRoot=/contrib/fv3
## Intel version to be used
intelVersion=2023.2.0
##############################################################################
## HPC-ME container
container=/contrib/containers/noaa-intel-prototype_2023.09.25.sif
container_env_script=/contrib/containers/load_spack_noaa-intel-mlong.sh

#Parse Arguments
branch=main
commit=none
while [[ $# -gt 0 ]]; do
case $1 in
-b|--branch)
branch="$2"
shift # past argument
shift # past value
;;
-h|--hash)
commit="$2"
shift # past argument
shift # past value
;;
-t|--test)
testname="$2"
shift # past argument
shift # past value
;;
*)
echo "unknown argument"
exit 1
;;
esac
done

if [ -z $testname ]
then
echo "must specify a test name with -t"
exit 1
fi

echo "branch is $branch"
echo "commit is $commit"
echo "test is $testname"

## Set up the directories
MODULESHOME=/usr/share/lmod/lmod
testDir=${dirRoot}/${intelVersion}/GFDL_atmos_cubed_sphere/${branch}/${commit}
logDir=${testDir}/log
baselineDir=${dirRoot}/baselines/intel/${intelVersion}

## Run the CI Test
# Define the builddir testscriptdir and rundir BUILDDIR is used by test scripts
# Set the BUILDDIR for the test script to use
export BUILDDIR="${testDir}/SHiELD_build"
testscriptDir=${BUILDDIR}/RTS/CI
runDir=${BUILDDIR}/CI/BATCH-CI

# Run CI test scripts
cd ${testscriptDir}
set -o pipefail
# Execute the test piping output to log file
./${testname} " --partition=compute --mpi=pmi2 --job-name=${commit}_${testname} singularity exec -B /contrib -B /apps ${container} ${container_env_script}" |& tee ${logDir}/run_${testname}.log

## Compare Restarts to Baseline
source $MODULESHOME/init/sh
export MODULEPATH=/mnt/shared/manual_modules:/usr/share/modulefiles/Linux:/usr/share/modulefiles/Core:/usr/share/lmod/lmod/modulefiles/Core:/apps/modules/modulefiles:/apps/modules/modulefamilies/intel
module load intel/2022.1.2
module load netcdf
module load nccmp
for resFile in `ls ${baselineDir}/${testname}`
do
nccmp -d ${baselineDir}/${testname}/${resFile} ${runDir}/${testname}/RESTART/${resFile}
done
98 changes: 52 additions & 46 deletions .github/workflows/SHiELD_parallelworks_intel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ jobs:
# so this salloc will prompt 46 nodes to startup and stay active for 20 min
# this is enough nodes for the first 17 tests to run in parallel, and we
# have 17 runners configured.
- run: salloc --partition=p2 -N 46 -J $GITHUB_SHA sleep 20m &
- run: /contrib/fv3/GFDL_atmos_cubed_sphere_CI/checkout.sh $GITHUB_REF $GITHUB_SHA
- run: salloc --partition=compute -N 46 -J $GITHUB_SHA sleep 20m &
- run: /contrib/fv3/GFDL_atmos_cubed_sphere_CI/checkout.sh -b $GITHUB_REF -h $GITHUB_SHA

build:
if: github.repository == 'NOAA-GFDL/GFDL_atmos_cubed_sphere'
Expand All @@ -43,15 +43,21 @@ jobs:
needs: [checkout]
strategy:
fail-fast: true
max-parallel: 3
max-parallel: 17
matrix:
runpath: [/contrib/fv3/GFDL_atmos_cubed_sphere_CI/]
runscript: [swcompile.sh, nhcompile.sh, hydrocompile.sh]
runscript: [/contrib/fv3/GFDL_atmos_cubed_sphere_CI/compile.sh]
config: [solo]
hydro: [sw, nh, hydro]
bit: [64bit]
mode: [repro]
steps:
- env:
RUNPATH: ${{ matrix.runpath }}
RUNSCRIPT: ${{ matrix.runscript }}
run: $RUNPATH/$RUNSCRIPT $GITHUB_REF $GITHUB_SHA
CONFIG: ${{ matrix.config }}
HYDRO: ${{ matrix.hydro }}
BIT: ${{ matrix.bit }}
MODE: ${{ matrix.mode }}
run: $RUNSCRIPT -b $GITHUB_REF -h $GITHUB_SHA -c $CONFIG --hydro $HYDRO --bit $BIT -m $MODE

test:
if: github.repository == 'NOAA-GFDL/GFDL_atmos_cubed_sphere'
Expand All @@ -62,55 +68,55 @@ jobs:
fail-fast: false
max-parallel: 17
matrix:
runpath: [/contrib/fv3/GFDL_atmos_cubed_sphere_CI/]
runscript:
runscript: [/contrib/fv3/GFDL_atmos_cubed_sphere_CI/run_test.sh]
argument:
# These are placed in order of largest to smallest jobs
#layout 8,8 needs 8 nodes on dvcimultiintel cluster
- C512r20.solo.superC.sh
- C768.sw.BTwave.sh
- C512r20.solo.superC
- C768.sw.BTwave
#layout 4,8 needs 4 nodes on dvcimultiintel cluster
- C256r20.solo.superC.sh
- C384.sw.BLvortex.sh
- C256r20.solo.superC
- C384.sw.BLvortex
#layout 4,4 needs 2 nodes on dvcimultiintel cluster
- C128r20.solo.superC.sh
- C128r3.solo.TC.d1.sh
- C128r3.solo.TC.h6.sh
- C128r3.solo.TC.sh
- C128r3.solo.TC.tr8.sh
- C192.sw.BLvortex.sh
- C192.sw.BTwave.sh
- C192.sw.modon.sh
- C384.sw.BTwave.sh
- C128r20.solo.superC
- C128r3.solo.TC.d1
- C128r3.solo.TC.h6
- C128r3.solo.TC
- C128r3.solo.TC.tr8
- C192.sw.BLvortex
- C192.sw.BTwave
- C192.sw.modon
- C384.sw.BTwave
#layout 4,1 and 2,2 need 1 node on dvcimultiintel cluster
- C96.solo.BCdry.hyd.sh
- C96.solo.BCdry.sh
- C96.solo.BCmoist.hyd.d3.sh
- C96.solo.BCmoist.hyd.sh
- C96.solo.BCmoist.nhK.sh
- C96.solo.BCmoist.sh
- C96.solo.mtn_rest.hyd.diff2.sh
- C96.solo.mtn_rest.hyd.sh
- C96.solo.mtn_rest.nonmono.diff2.sh
- C96.solo.mtn_rest.sh
- C96.sw.BLvortex.sh
- C96.sw.BTwave.sh
- C96.sw.modon.sh
- C96.sw.RHwave.sh
- d96_1k.solo.mtn_rest_shear.olddamp.sh
- d96_1k.solo.mtn_rest_shear.sh
- d96_1k.solo.mtn_schar.mono.sh
- d96_1k.solo.mtn_schar.sh
- d96_2k.solo.bubble.n0.sh
- d96_2k.solo.bubble.nhK.sh
- d96_2k.solo.bubble.sh
- d96_500m.solo.mtn_schar.sh
- C96.solo.BCdry.hyd
- C96.solo.BCdry
- C96.solo.BCmoist.hyd.d3
- C96.solo.BCmoist.hyd
- C96.solo.BCmoist.nhK
- C96.solo.BCmoist
- C96.solo.mtn_rest.hyd.diff2
- C96.solo.mtn_rest.hyd
- C96.solo.mtn_rest.nonmono.diff2
- C96.solo.mtn_rest
- C96.sw.BLvortex
- C96.sw.BTwave
- C96.sw.modon
- C96.sw.RHwave
- d96_1k.solo.mtn_rest_shear.olddamp
- d96_1k.solo.mtn_rest_shear
- d96_1k.solo.mtn_schar.mono
- d96_1k.solo.mtn_schar
- d96_2k.solo.bubble.n0
- d96_2k.solo.bubble.nhK
- d96_2k.solo.bubble
- d96_500m.solo.mtn_schar
steps:
# This will end the slurm job started in the checkout job
- run: scancel -n $GITHUB_SHA
- env:
RUNPATH: ${{ matrix.runpath }}
RUNSCRIPT: ${{ matrix.runscript }}
run: $RUNPATH/$RUNSCRIPT $GITHUB_REF $GITHUB_SHA
ARG1: ${{ matrix.argument }}
run: $RUNSCRIPT -t $ARG1 -b $GITHUB_REF -h $GITHUB_SHA
shutdown:
if: always() && github.repository == 'NOAA-GFDL/GFDL_atmos_cubed_sphere'
runs-on: [gfdlacsciintel]
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/daily_cleanup_parallelworks.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: Old Build Cleanup

# This GitHub Action Workflow is runing on the GFDL_ACS_CIINTEL cluster
# This GitHub Action Workflow is runing on the gclustercigfdlacs cluster
# This will delete all build directories older than 30 days
# Build directories are on the cloud at /contrib/fv3/2023.2.0

Expand All @@ -16,3 +16,4 @@ jobs:
name: Delete Builds
steps:
- run: find /contrib/fv3/2023.2.0/GFDL_atmos_cubed_sphere/refs/pull -maxdepth 1 -mindepth 1 -mtime +30 -type d -print -exec rm -rf "{}" \;
- run: find /contrib/fv3/2023.2.0/GFDL_atmos_cubed_sphere/refs/heads -maxdepth 1 -mindepth 1 -mtime +30 -type d -print -exec rm -rf "{}" \;
Loading