Skip to content

Commit

Permalink
[develop] Changes for Derecho, a new platform (#894)
Browse files Browse the repository at this point in the history
Modulefile and other configuration files have been added to adapt the SRW to Derecho system.

---------

Co-authored-by: Natalie Perlin <perlin.natalie@gmail.com>
Co-authored-by: Michael Kavulich <kavulich@ucar.edu>
  • Loading branch information
3 people authored Sep 19, 2023
1 parent 5b59e01 commit fc0403e
Show file tree
Hide file tree
Showing 34 changed files with 169 additions and 19 deletions.
3 changes: 3 additions & 0 deletions etc/lmod-setup.csh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ else if ( "$L_MACHINE" == singularity ) then
else if ( "$L_MACHINE" == gaea ) then
source /lustre/f2/dev/role.epic/contrib/Lmod_init.csh

else if ( "$L_MACHINE" == derecho ) then
module reset

else if ( "$L_MACHINE" == odin ) then
module unload modules
unset -f module
Expand Down
5 changes: 4 additions & 1 deletion etc/lmod-setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Usage: source etc/lmod-setup.sh PLATFORM
OPTIONS:
PLATFORM - name of machine you are building on
(e.g. cheyenne | hera | jet | orion | wcoss2 )
(e.g. cheyenne | hera | jet | orion | wcoss2 )
EOF_USAGE
exit 1
else
Expand Down Expand Up @@ -47,6 +47,9 @@ elif [ "$L_MACHINE" = singularity ]; then
elif [ "$L_MACHINE" = gaea ]; then
source /lustre/f2/dev/role.epic/contrib/Lmod_init.sh

elif [ "$L_MACHINE" = derecho ]; then
module reset

elif [ "$L_MACHINE" = odin ]; then
module unload modules
unset -f module
Expand Down
35 changes: 35 additions & 0 deletions modulefiles/build_derecho_intel.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
help([[
This module loads libraries for building the UFS SRW App on
the CISL machine Derecho (Cray) using Intel-classic-2023.0.0
]])

whatis([===[Loads libraries needed for building the UFS SRW App on Cheyenne ]===])

load(pathJoin("cmake", os.getenv("cmake_ver") or "3.26.3"))
load(pathJoin("ncarenv", os.getenv("ncarenv_ver") or "23.06"))
load(pathJoin("craype", os.getenv("craype_ver") or "2.7.20"))

unload("netcdf")
unload("hdf5")
load(pathJoin("intel-classic", os.getenv("intel_classic_ver") or "2023.0.0"))
load(pathJoin("cray-mpich", os.getenv("cray_mpich_ver") or "8.1.25"))

prepend_path("MODULEPATH","/glade/work/epicufsrt/contrib/derecho/hpc-stack/intel-classic-2023.0.0/modulefiles/stack")
load(pathJoin("hpc", os.getenv("hpc_ver") or "1.2.0"))
load(pathJoin("hpc-intel-classic", os.getenv("hpc_intel_classic_ver") or "2023.0.0"))
load(pathJoin("hpc-cray-mpich", os.getenv("hpc_cray_mpich_ver") or "8.1.25"))

load(pathJoin("ncarcompilers", os.getenv("ncarcompilers_ver") or "1.0.0"))
load(pathJoin("mkl", os.getenv("mkl_ver") or "2023.0.0"))

load("srw_common")

setenv("CC","cc")
setenv("FC","ftn")
setenv("CXX","CC")

setenv("CMAKE_C_COMPILER","cc")
setenv("CMAKE_CXX_COMPILER","CC")
setenv("CMAKE_Fortran_COMPILER","ftn")
setenv("CMAKE_Platform","derecho.intel")

3 changes: 3 additions & 0 deletions modulefiles/tasks/derecho/aqm_ics.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
load(pathJoin("cmake", os.getenv("cmake_ver") or "3.22.0"))
load("nco/4.9.5")
load("miniconda_regional_workflow_cmaq")
3 changes: 3 additions & 0 deletions modulefiles/tasks/derecho/aqm_lbcs.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
load(pathJoin("cmake", os.getenv("cmake_ver") or "3.22.0"))
load("nco/4.9.5")
load("miniconda_regional_workflow_cmaq")
2 changes: 2 additions & 0 deletions modulefiles/tasks/derecho/fire_emission.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
load("ncarenv")
load("miniconda_regional_workflow_cmaq")
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
unload("python")
load("conda")
prepend_path("MODULEPATH","/glade/work/epicufsrt/contrib/derecho/miniconda3/modulefiles")
load(pathJoin("miniconda3", os.getenv("miniconda3_ver") or "4.12.0"))

setenv("SRW_ENV", "regional_workflow_cmaq")
4 changes: 4 additions & 0 deletions modulefiles/tasks/derecho/nexus_emission.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
load("nco/5.0.6")

load("ncarenv")
load("miniconda_regional_workflow_cmaq")
2 changes: 2 additions & 0 deletions modulefiles/tasks/derecho/nexus_gfs_sfc.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
load("ncarenv")
load("miniconda_regional_workflow_cmaq")
3 changes: 3 additions & 0 deletions modulefiles/tasks/derecho/nexus_post_split.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
load(pathJoin("nco", os.getenv("nco_ver") or "5.0.6"))
load("ncarenv")
load("miniconda_regional_workflow_cmaq")
5 changes: 5 additions & 0 deletions modulefiles/tasks/derecho/plot_allvars.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
unload("python")
prepend_path("MODULEPATH","/glade/work/epicufsrt/contrib/miniconda3/modulefiles")
load(pathJoin("miniconda3", os.getenv("miniconda3_ver") or "4.12.0"))

setenv("SRW_ENV", "regional_workflow")
2 changes: 2 additions & 0 deletions modulefiles/tasks/derecho/point_source.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
load("ncarenv")
load("miniconda_regional_workflow_cmaq")
2 changes: 2 additions & 0 deletions modulefiles/tasks/derecho/pre_post_stat.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
load("nco/4.9.5")
load("miniconda_regional_workflow_cmaq")
5 changes: 5 additions & 0 deletions modulefiles/tasks/derecho/python_srw.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
unload("python")
prepend_path("MODULEPATH","/glade/work/epicufsrt/contrib/derecho/miniconda3/modulefiles")
load(pathJoin("miniconda3", os.getenv("miniconda3_ver") or "4.12.0"))

setenv("SRW_ENV", "workflow_tools")
6 changes: 6 additions & 0 deletions modulefiles/tasks/derecho/run_vx.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
--[[
Compiler-specific modules are used for met and metplus libraries
--]]
load(pathJoin("met", os.getenv("met_ver") or "10.1.2"))
load(pathJoin("metplus", os.getenv("metplus_ver") or "4.1.3"))
load("python_srw")
24 changes: 24 additions & 0 deletions modulefiles/wflow_derecho.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
help([[
This module loads python environement for running the UFS SRW App on
on the CISL machine Derecho (Cray)
]])

whatis([===[Loads libraries for running the UFS SRW Workflow on Derecho ]===])

load("ncarenv")

append_path("MODULEPATH","/glade/work/epicufsrt/contrib/derecho/rocoto/modulefiles")
load("rocoto")

unload("python")

load("set_pythonpath")
prepend_path("MODULEPATH","/glade/work/epicufsrt/contrib/derecho/miniconda3/modulefiles")
load(pathJoin("miniconda3", os.getenv("miniconda3_ver") or "4.12.0"))

if mode() == "load" then
LmodMsgRaw([===[Please do the following to activate conda:
> conda activate workflow_tools
]===])
end

1 change: 0 additions & 1 deletion parm/wflow/aqm_post.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ default_aqm_task: &default_aqm
native: '{{ platform.SCHED_NATIVE_CMD }}'
nnodes: 1
nodes: '{{ nnodes }}:ppn={{ ppn }}'
nodesize: "&NCORES_PER_NODE;"
partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}'
ppn: 1
queue: '&QUEUE_DEFAULT;'
Expand Down
1 change: 0 additions & 1 deletion parm/wflow/aqm_prep.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ default_aqm_task: &default_aqm
native: '{{ platform.SCHED_NATIVE_CMD }}'
nnodes: 1
nodes: '{{ nnodes }}:ppn={{ ppn }}'
nodesize: "&NCORES_PER_NODE;"
partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}'
ppn: 1
queue: '&QUEUE_DEFAULT;'
Expand Down
2 changes: 0 additions & 2 deletions parm/wflow/coldstart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ default_task: &default_task
ENSMEM_INDX: '#mem#'
native: '{{ platform.SCHED_NATIVE_CMD }}'
nodes: '{{ nnodes }}:ppn={{ ppn }}'
nodesize: "&NCORES_PER_NODE;"
partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}'
queue: '&QUEUE_DEFAULT;'
walltime: 00:30:00
Expand Down Expand Up @@ -149,7 +148,6 @@ metatask_run_ensemble:
SLASH_ENSMEM_SUBDIR: '&SLASH_ENSMEM_SUBDIR;'
nprocs:
join: !cycstr '&LOGDIR;/{{ jobname }}_@Y@m@d@H&LOGEXT;'
nodesize: '&NCORES_PER_NODE;'
nnodes: '{{ task_run_fcst.NNODES_RUN_FCST // 1 }}'
partition: '{% if platform.get("PARTITION_FCST") %}&PARTITION_FCST;{% else %}None{% endif %}'
ppn: '{{ task_run_fcst.PPN_RUN_FCST // 1 }}'
Expand Down
1 change: 0 additions & 1 deletion parm/wflow/plot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ default_task_plot: &default_task
native: '{{ platform.SCHED_NATIVE_CMD }}'
nnodes: 1
nodes: '{{ nnodes }}:ppn={{ ppn }}'
nodesize: "&NCORES_PER_NODE;"
partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}'
ppn: 24
queue: '&QUEUE_DEFAULT;'
Expand Down
1 change: 0 additions & 1 deletion parm/wflow/post.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ default_task_post: &default_task
nodes: '{{ nnodes }}:ppn={{ ppn }}'
nnodes: 2
ppn: 24
nodesize: "&NCORES_PER_NODE;"
partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}'
queue: '&QUEUE_DEFAULT;'
walltime: 00:15:00
Expand Down
1 change: 0 additions & 1 deletion parm/wflow/prdgen.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ metatask_run_prdgen:
join: !cycstr '&LOGDIR;/{{ jobname }}_@Y@m@d@H&LOGEXT;'
native: '{{ platform.SCHED_NATIVE_CMD }}'
nodes: '{{ nnodes }}:ppn={{ ppn }}'
nodesize: "&NCORES_PER_NODE;"
nnodes: 1
partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}'
ppn: 22
Expand Down
1 change: 0 additions & 1 deletion parm/wflow/prep.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ default_task_prep: &default_task
native: '{{ platform.SCHED_NATIVE_CMD }}'
nodes: '{{ nnodes }}:ppn={{ ppn }}'
nnodes: 1
nodesize: "&NCORES_PER_NODE;"
ppn: 24
partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}'
queue: '&QUEUE_DEFAULT;'
Expand Down
1 change: 0 additions & 1 deletion parm/wflow/verify_det.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ default_task_verify_det: &default_task_verify_det
native: '{{ platform.SCHED_NATIVE_CMD }}'
nnodes: 1
nodes: '{{ nnodes }}:ppn={{ ppn }}'
nodesize: '&NCORES_PER_NODE;'
partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}'
ppn: 1
queue: '&QUEUE_DEFAULT;'
Expand Down
1 change: 0 additions & 1 deletion parm/wflow/verify_ens.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ default_task_verify_ens: &default_task_verify_ens
native: '{{ platform.SCHED_NATIVE_CMD }}'
nnodes: 1
nodes: '{{ nnodes }}:ppn={{ ppn }}'
nodesize: '&NCORES_PER_NODE;'
partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}'
ppn: 1
queue: '&QUEUE_DEFAULT;'
Expand Down
1 change: 0 additions & 1 deletion parm/wflow/verify_pre.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ default_task_verify_pre: &default_task_verify_pre
native: '{{ platform.SCHED_NATIVE_CMD }}'
nnodes: 1
nodes: '{{ nnodes }}:ppn={{ ppn }}'
nodesize: '&NCORES_PER_NODE;'
partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}'
ppn: 1
queue: '&QUEUE_DEFAULT;'
Expand Down
6 changes: 3 additions & 3 deletions tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ for the current code in the users ufs-srweather-app directory. It consists of t

Currently, the following configurations are supported:

Machine | Cheyenne | Hera | Jet | Orion | wcoss2 |
------------| ------------|--------|--------|--------|---------|
Compiler(s) | Intel, GNU | Intel | Intel | Intel | Intel |
Machine | Derecho | Cheyenne | Hera | Jet | Orion | wcoss2 |
------------|---------|-------------|--------|--------|--------|---------|
Compiler(s) | Intel | Intel, GNU | Intel | Intel | Intel | Intel |

The CMake build is done in the ``build_${compiler}`` directory.
The executables for each build are installed under the ``bin_${compiler}`` directory.
Expand Down
1 change: 1 addition & 0 deletions tests/WE2E/machine_suites/comprehensive.derecho
9 changes: 9 additions & 0 deletions tests/WE2E/machine_suites/coverage.derecho
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
custom_ESGgrid_IndianOcean_6km
grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16_plot
grid_RRFS_CONUS_25km_ics_NAM_lbcs_NAM_suite_GFS_v16
grid_RRFS_CONUScompact_13km_ics_HRRR_lbcs_RAP_suite_HRRR
grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta
grid_SUBCONUS_Ind_3km_ics_HRRR_lbcs_HRRR_suite_HRRR
nco_grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_timeoffset_suite_GFS_v16
pregen_grid_orog_sfc_climo
specify_template_filenames
2 changes: 1 addition & 1 deletion tests/WE2E/setup_WE2E_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ function usage {

}

machines=( hera jet cheyenne orion wcoss2 gaea odin singularity macos noaacloud )
machines=( hera jet cheyenne derecho orion wcoss2 gaea odin singularity macos noaacloud )

if [ "$1" = "-h" ] ; then usage ; fi
[[ $# -le 2 ]] && usage
Expand Down
2 changes: 1 addition & 1 deletion tests/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ function usage() {
exit 1
}

machines=( hera jet cheyenne orion wcoss2 gaea odin singularity macos noaacloud )
machines=( hera jet cheyenne derecho orion wcoss2 gaea odin singularity macos noaacloud )

[[ $# -gt 4 ]] && usage

Expand Down
2 changes: 1 addition & 1 deletion ush/get_crontab_contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def get_crontab_contents(called_from_cron, machine, debug):
# themselves being called as cron jobs. In that case, we must instead
# call the system version of crontab at /usr/bin/crontab.
#
if machine == "CHEYENNE":
if machine == "CHEYENNE" or machine == "DERECHO":
if called_from_cron:
crontab_cmd = "/usr/bin/crontab"
else:
Expand Down
43 changes: 43 additions & 0 deletions ush/machine/derecho.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
platform:
WORKFLOW_MANAGER: rocoto
NCORES_PER_NODE: 128
SCHED: pbspro
TEST_CCPA_OBS_DIR: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/obs_data/ccpa/proc
TEST_MRMS_OBS_DIR: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/obs_data/mrms/proc
TEST_NDAS_OBS_DIR: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/obs_data/ndas/proc
DOMAIN_PREGEN_BASEDIR: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/FV3LAM_pregen
QUEUE_DEFAULT: main
QUEUE_FCST: main
QUEUE_HPSS: main
RUN_CMD_FCST: mpiexec -n ${PE_MEMBER01}
RUN_CMD_POST: mpiexec -n $nprocs
RUN_CMD_PRDGEN: mpiexec -n $nprocs
RUN_CMD_SERIAL: time
RUN_CMD_UTILS: mpiexec -n $nprocs
RUN_CMD_NEXUS: mpiexec -n $nprocs
RUN_CMD_AQMLBC: mpiexec -n ${NUMTS}
PRE_TASK_CMDS: '{ ulimit -s unlimited; ulimit -a; }'
TEST_EXTRN_MDL_SOURCE_BASEDIR: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/input_model_data
TEST_AQM_INPUT_BASEDIR: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/aqm_data
TEST_PREGEN_BASEDIR: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/FV3LAM_pregen
TEST_ALT_EXTRN_MDL_SYSBASEDIR_ICS: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/dummy_FV3GFS_sys_dir
TEST_ALT_EXTRN_MDL_SYSBASEDIR_LBCS: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/dummy_FV3GFS_sys_dir
TEST_VX_FCST_INPUT_BASEDIR: '{{ "/glade/work/epicufsrt/contrib/UFS_SRW_data/develop/output_data/fcst_" }}{{ "ens" if (global.NUM_ENS_MEMBERS > 0) else "det" }}{{ "/{{workflow.PREDEF_GRID_NAME}}" }}{% raw %}{% endraw %}'
FIXaer: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/fix/fix_aer
FIXgsi: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/fix/fix_gsi
FIXgsm: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/fix/fix_am
FIXlut: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/fix/fix_lut
FIXorg: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/fix/fix_orog
FIXsfc: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/fix/fix_sfc_climo
FIXshp: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/NaturalEarth
EXTRN_MDL_DATA_STORES: aws
data:
ics_lbcs:
FV3GFS:
nemsio: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/input_model_data/FV3GFS/nemsio/${yyyymmdd}${hh}
grib2: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/input_model_data/FV3GFS/grib2/${yyyymmdd}${hh}
netcdf: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/input_model_data/FV3GFS/netcdf/${yyyymmdd}${hh}
NAM: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/input_model_data/NAM/${yyyymmdd}${hh}
HRRR: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/input_model_data/HRRR/${yyyymmdd}${hh}
RAP: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/input_model_data/RAP/${yyyymmdd}${hh}
GSMGFS: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/input_model_data/GSMGFS/${yyyymmdd}${hh}
2 changes: 1 addition & 1 deletion ush/valid_param_vals.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
valid_vals_RUN_ENVIR: ["nco", "community"]
valid_vals_VERBOSE: [True, False]
valid_vals_DEBUG: [True, False]
valid_vals_MACHINE: ["HERA", "WCOSS2", "ORION", "JET", "ODIN", "CHEYENNE", "STAMPEDE", "LINUX", "MACOS", "NOAACLOUD", "SINGULARITY", "GAEA"]
valid_vals_MACHINE: ["HERA", "WCOSS2", "ORION", "JET", "ODIN", "CHEYENNE", "DERECHO", "STAMPEDE", "LINUX", "MACOS", "NOAACLOUD", "SINGULARITY", "GAEA"]
valid_vals_SCHED: ["slurm", "pbspro", "lsf", "lsfcray", "none"]
valid_vals_FCST_MODEL: ["ufs-weather-model"]
valid_vals_WORKFLOW_MANAGER: ["rocoto", "ecflow", "none"]
Expand Down

0 comments on commit fc0403e

Please sign in to comment.