Skip to content

Upgrade CI: new cloud cluster, new container, more tests, still intel only #1

Upgrade CI: new cloud cluster, new container, more tests, still intel only

Upgrade CI: new cloud cluster, new container, more tests, still intel only #1

name: Compile SHiELD SOLO and run tests
# This GitHub Action Workflow is running on the cloud devcimultiintel cluster
# The tests are run inside of a container with the following software/libraries:
# -intel: 2023.2.0
# -hdf5: 1.14.0
# -netcdf-c: 4.9.2
# -netcdf-fortran: 4.6.0
# -cmake
# -libyaml
on:
pull_request:
branches:
- main
jobs:
checkout:
runs-on: [self-hosted, devcimultiintel]
name: Checkout Code
steps:
# It can take a long time (5-15 minutes) to spinup nodes
# so this salloc will prompt 46 nodes to startup and stay active for 20 min
# this is enough nodes for the first 17 tests to run in parallel, and we
# have 17 runners configured.
- run: salloc --partition=p2 -N 46 -J $GITHUB_REF sleep 20m &
- run: /contrib/fv3/intelFV3CIScripts/checkout.sh $GITHUB_REF
build:
runs-on: [self-hosted,devcimultiintel]
name: SOLO SHiELD build
needs: [checkout]
strategy:
fail-fast: true
max-parallel: 3
matrix:
runpath: [/contrib/fv3/intelFV3CIScripts/]
runscript: [swcompile.sh, nhcompile.sh, hydrocompile.sh]
steps:
- env:
RUNPATH: ${{ matrix.runpath }}
RUNSCRIPT: ${{ matrix.runscript }}
run: $RUNPATH/$RUNSCRIPT $GITHUB_REF
test:
runs-on: [self-hosted, devcimultiintel]
name: SOLO SHiELD test suite
needs: [checkout, build]
strategy:
fail-fast: false
max-parallel: 17
matrix:
runpath: [/contrib/fv3/intelFV3CIScripts/]
runscript:
# These are placed in order of largest to smallest jobs
#layout 8,8 needs 8 nodes on dvcimultiintel cluster
- C512r20.solo.superC.sh
- C768.sw.BTwave.sh
#layout 4,8 needs 4 nodes on dvcimultiintel cluster
- C256r20.solo.superC.sh
- C384.sw.BLvortex.sh
#layout 4,4 needs 2 nodes on dvcimultiintel cluster
- C128r20.solo.superC.sh
- C128r3.solo.TC.d1.sh
- C128r3.solo.TC.h6.sh
- C128r3.solo.TC.sh
- C128r3.solo.TC.tr8.sh
- C192.sw.BLvortex.sh
- C192.sw.BTwave.sh
- C192.sw.modon.sh
- C384.sw.BTwave.sh
#layout 4,1 and 2,2 need 1 node on dvcimultiintel cluster
- C96.solo.BCdry.hyd.sh
- C96.solo.BCdry.sh
- C96.solo.BCmoist.hyd.d3.sh
- C96.solo.BCmoist.hyd.sh
- C96.solo.BCmoist.nhK.sh
- C96.solo.BCmoist.sh
- C96.solo.mtn_rest.hyd.diff2.sh
- C96.solo.mtn_rest.hyd.sh
- C96.solo.mtn_rest.nonmono.diff2.sh
- C96.solo.mtn_rest.sh
- C96.sw.BLvortex.sh
- C96.sw.BTwave.sh
- C96.sw.modon.sh
- C96.sw.RHwave.sh
- d96_1k.solo.mtn_rest_shear.olddamp.sh
- d96_1k.solo.mtn_rest_shear.sh
- d96_1k.solo.mtn_schar.mono.sh
- d96_1k.solo.mtn_schar.sh
- d96_2k.solo.bubble.n0.sh
- d96_2k.solo.bubble.nhK.sh
- d96_2k.solo.bubble.sh
- d96_500m.solo.mtn_schar.sh
steps:
# This will end the slurm job started in the checkout job
- run: scancel -n $GITHUB_REF
- env:
RUNPATH: ${{ matrix.runpath }}
RUNSCRIPT: ${{ matrix.runscript }}
run: $RUNPATH/$RUNSCRIPT $GITHUB_REF
shutdown:
runs-on: [self-hosted, devcimultiintel]
name: Shutdown Processes
if: always()
needs: [checkout, build, test]
strategy:
fail-fast: false
max-parallel: 17
matrix:
test:
- C512r20.solo.superC
- C768.sw.BTwave
- C256r20.solo.superC
- C384.sw.BLvortex
- C128r20.solo.superC
- C128r3.solo.TC.d1
- C128r3.solo.TC.h6
- C128r3.solo.TC
- C128r3.solo.TC.tr8
- C192.sw.BLvortex
- C192.sw.BTwave
- C192.sw.modon
- C384.sw.BTwave
- C96.solo.BCdry.hyd
- C96.solo.BCdry
- C96.solo.BCmoist.hyd.d3
- C96.solo.BCmoist.hyd
- C96.solo.BCmoist.nhK
- C96.solo.BCmoist
- C96.solo.mtn_rest.hyd.diff2
- C96.solo.mtn_rest.hyd
- C96.solo.mtn_rest.nonmono.diff2
- C96.solo.mtn_rest
- C96.sw.BLvortex
- C96.sw.BTwave
- C96.sw.modon
- C96.sw.RHwave
- d96_1k.solo.mtn_rest_shear.olddamp
- d96_1k.solo.mtn_rest_shear
- d96_1k.solo.mtn_schar.mono
- d96_1k.solo.mtn_schar
- d96_2k.solo.bubble.n0
- d96_2k.solo.bubble.nhK
- d96_2k.solo.bubble
- d96_500m.solo.mtn_schar
steps:
- run: scancel -n $GITHUB_REF
- env:
TEST: ${{ matrix.test }}
run: scancel -n $TEST