From fa50321f0639bd0111d8ac70c4e4bdd5263f5baa Mon Sep 17 00:00:00 2001 From: Alex Carpenter Date: Thu, 25 Jul 2024 15:50:39 -0700 Subject: [PATCH] Ocean Submit Script Update Co-authored-by: Kyle Pannone --- support/Environments/ocean_gcc.sh | 2 + support/Machines/Ocean.yaml | 12 +++ support/SubmitScripts/Ocean.sh | 105 +++++++++------------ support/SubmitScripts/OceanClang.sh | 137 ---------------------------- 4 files changed, 58 insertions(+), 198 deletions(-) create mode 100644 support/Machines/Ocean.yaml delete mode 100755 support/SubmitScripts/OceanClang.sh diff --git a/support/Environments/ocean_gcc.sh b/support/Environments/ocean_gcc.sh index ccd517be74f2..84b8667e5b1a 100755 --- a/support/Environments/ocean_gcc.sh +++ b/support/Environments/ocean_gcc.sh @@ -92,7 +92,9 @@ spectre_run_cmake() { -D CMAKE_Fortran_COMPILER=${GCC_HOME}/gfortran \ -D USE_PCH=ON \ -D BUILD_PYTHON_BINDINGS=ON \ + -D MACHINE=Ocean \ -D BOOTSTRAP_PY_DEPS=ON \ + -D DEBUG_SYMBOLS=OFF \ "$@" \ $SPECTRE_HOME } diff --git a/support/Machines/Ocean.yaml b/support/Machines/Ocean.yaml new file mode 100644 index 000000000000..2a01d3182a99 --- /dev/null +++ b/support/Machines/Ocean.yaml @@ -0,0 +1,12 @@ +# Distributed under the MIT License. +# See LICENSE.txt for details. + +Machine: + Name: Ocean + Description: | + Supercomputer at Cal State Fullerton hosted by Geoffrey Lovelace. + DefaultTasksPerNode: 1 + DefaultProcsPerTasks: 20 + DefaultQueue: "orca-1" + DefaultTimeLimit: "1-00:00:00" + LaunchCommandSingleNode: [] diff --git a/support/SubmitScripts/Ocean.sh b/support/SubmitScripts/Ocean.sh index d82d395a3148..f32b0d6b459d 100755 --- a/support/SubmitScripts/Ocean.sh +++ b/support/SubmitScripts/Ocean.sh @@ -1,69 +1,52 @@ -#!/bin/bash - -#SBATCH -o spectre.stdout -#SBATCH -e spectre.stderr -#SBATCH --ntasks-per-node 20 -#SBATCH -J KerrSchild -#SBATCH --nodes 2 -#SBATCH -p orca-1 -#SBATCH -t 12:00:00 -#SBATCH -D . +{% extends "SubmitTemplateBase.sh" %} # Distributed under the MIT License. # See LICENSE.txt for details. -# To run a job on Ocean: -# - Set the -J, --nodes, and -t options above, which correspond to job name, -# number of nodes, and wall time limit in HH:MM:SS, respectively. -# - Set the build directory, run directory, executable name, -# and input file below. -# -# NOTE: The executable will not be copied from the build directory, so if you -# update your build directory this file will use the updated executable. -# -# Optionally, if you need more control over how SpECTRE is launched on -# Ocean you can edit the launch command at the end of this file directly. -# -# To submit the script to the queue run: -# sbatch Ocean.sh +# Ocean is a supercomputer at Cal State, Fullerton. +# More information: +# https://github.com/sxs-collaboration/WelcomeToSXS/wiki/Ocean -# Replace these paths with the path to your build directory and to the -# directory where you want the output to appear, i.e. the run directory -# E.g., if you cloned spectre in your home directory, set -# SPECTRE_BUILD_DIR to ${HOME}/spectre/build. If you want to run in a -# directory called "Run" in the current directory, set -# SPECTRE_RUN_DIR to ${PWD}/Run -export SPECTRE_BUILD_DIR=${HOME}/Codes/spectre/spectre/build_singularity_release -export SPECTRE_RUN_DIR=${PWD}/Run +{% block head %} +{{ super() -}} +#SBATCH --nodes {{ num_nodes | default(1) }} +#SBATCH --ntasks-per-node 1 +#SBATCH --cpus-per-task 20 +#SBATCH -p {{ queue | default("orca-1") }} +#SBATCH -t {{ time_limit | default("1-00:00:00") }} +{% endblock %} -# Choose the executable and input file to run -# To use an input file in the current directory, set -# SPECTRE_INPUT_FILE to ${PWD}/InputFileName.yaml -export SPECTRE_EXECUTABLE=${SPECTRE_BUILD_DIR}/bin/EvolveGeneralizedHarmonic -export SPECTRE_INPUT_FILE=${PWD}/KerrSchild.yaml - -# These commands load the relevant modules and cd into the run directory, -# creating it if it doesn't exist -module load ohpc -mkdir -p ${SPECTRE_RUN_DIR} -cd ${SPECTRE_RUN_DIR} - -# Copy the input file into the run directory, to preserve it -cp ${SPECTRE_INPUT_FILE} ${SPECTRE_RUN_DIR}/ - -# Set desired permissions for files created with this script -umask 0022 - -# Set the path to include the build directory's bin directory -export PATH=${SPECTRE_BUILD_DIR}/bin:$PATH - -# Flag to stop blas in CCE from parallelizing without charm++ +{% block run_command %} export OPENBLAS_NUM_THREADS=1 -# The 19 is there because Charm++ uses one thread per node for communication -# Here, -np should take the number of nodes (must be the same as --nodes -# in the #SBATCH options above). -SPECTRE_COMMAND="${SPECTRE_EXECUTABLE} +ppn 19 +pemap 0-18 +commap 19" - -mpirun -np ${SLURM_JOB_NUM_NODES} --map-by ppr:1:node singularity exec \ -/opt/ohpc/pub/containers/spectre_ocean.sif \ -bash -c "${SPECTRE_COMMAND} --input-file ${SPECTRE_INPUT_FILE}" +# Generate nodelist file +echo "Running on the following nodes:" +echo ${SLURM_NODELIST} +touch nodelist.$SLURM_JOBID +for node in $(echo $SLURM_NODELIST | scontrol show hostnames); do + echo "host ${node}" >> nodelist.$SLURM_JOBID +done + +# Set worker threads and run command +WORKER_THREADS=$((SLURM_NTASKS * CHARM_PPN)) +SPECTRE_COMMAND="${SPECTRE_EXECUTABLE} ++np ${SLURM_NTASKS} \ +++p ${WORKER_THREADS} ++ppn ${CHARM_PPN} \ +++nodelist nodelist.${SLURM_JOBID}" + + +# When invoking through `charmrun`, charm will initiate remote sessions which +# will wipe out environment settings unless it is forced to re-initialize the +# spectre environment between the start of the remote session and starting the +# spectre executable +echo "#!/bin/sh +source ${SPECTRE_HOME}/support/Environments/ocean_gcc.sh +spectre_load_modules +\$@ +" > ${RUN_DIR}/runscript.${SLURM_JOBID} +chmod u+x ${RUN_DIR}/runscript.${SLURM_JOBID} + +# Run +charmrun ++runscript ${RUN_DIR}/runscript.${SLURM_JOBID} \ + ${SPECTRE_COMMAND} --input-file ${SPECTRE_INPUT_FILE} \ + ${SPECTRE_CHECKPOINT:+ +restart "${SPECTRE_CHECKPOINT}"} +{% endblock %} diff --git a/support/SubmitScripts/OceanClang.sh b/support/SubmitScripts/OceanClang.sh deleted file mode 100755 index b99e0426a141..000000000000 --- a/support/SubmitScripts/OceanClang.sh +++ /dev/null @@ -1,137 +0,0 @@ -#!/bin/bash - -#SBATCH -o spectre.stdout -#SBATCH -e spectre.stderr -#SBATCH --ntasks-per-node 20 -#SBATCH -J KerrSchild -#SBATCH --nodes 2 -#SBATCH -p orca-1 -#SBATCH -t 12:00:00 -#SBATCH -D . - -# Distributed under the MIT License. -# See LICENSE.txt for details. - -# To run a job on Ocean: -# - Set the -J, --nodes, and -t options above, which correspond to job name, -# number of nodes, and wall time limit in HH:MM:SS, respectively. -# - Set the build directory, run directory, executable name, -# and input file below. -# - Add a file ${HOME}/.charmrunrc that does the following (replace -# ${SPECTRE_BUILD_DIR}/../ with the path to your spectre checkout): -# source /etc/profile.d/lmod.sh -# source ${SPECTRE_BUILD_DIR}/../support/Environments/ocean_clang.sh -# spectre_load_modules -# -# NOTE: The executable will not be copied from the build directory, so if you -# update your build directory this file will use the updated executable. -# -# Optionally, if you need more control over how SpECTRE is launched on -# Ocean you can edit the launch command at the end of this file directly. -# -# To submit the script to the queue run: -# sbatch Ocean.sh - -# Replace these paths with the path to your build directory, to the source root -# directory, the spectre dependencies module directory, and to the directory -# where you want the output to appear, i.e. the run directory. -# E.g., if you cloned spectre in your home directory, set -# SPECTRE_BUILD_DIR to ${HOME}/spectre/build. If you want to run in a -# directory called "Run" in the current directory, set -# SPECTRE_RUN_DIR to ${PWD}/Run -export SPECTRE_BUILD_DIR=${HOME}/Codes/spectre/spectre/build_clang -export SPECTRE_MODULE_DIR=${HOME}/Codes/spectre_deps/modules/ -export SPECTRE_RUN_DIR=${PWD}/Run - -# Choose the executable and input file to run -# To use an input file in the current directory, set -# SPECTRE_INPUT_FILE to ${PWD}/InputFileName.yaml -export SPECTRE_EXECUTABLE=${SPECTRE_BUILD_DIR}/bin/EvolveKerrSchild -export SPECTRE_INPUT_FILE=${PWD}/KerrSchild.yaml - -# These commands load the relevant modules and cd into the run directory, -# creating it if it doesn't exist -module load ohpc -source ${SPECTRE_BUILD_DIR}/../support/Environments/ocean_clang.sh -module use ${SPECTRE_MODULE_DIR} -spectre_load_modules -module list - -mkdir -p ${SPECTRE_RUN_DIR} -cd ${SPECTRE_RUN_DIR} - -# Copy the input file into the run directory, to preserve it -cp ${SPECTRE_INPUT_FILE} ${SPECTRE_RUN_DIR}/ - -# Set desired permissions for files created with this script -umask 0022 - -# Set the path to include the build directory's bin directory -export PATH=${SPECTRE_BUILD_DIR}/bin:$PATH - -# Flag to stop blas in CCE from parallelizing without charm++ -export OPENBLAS_NUM_THREADS=1 - -# Generate the nodefile -echo "Running on the following nodes:" -echo ${SLURM_NODELIST} -touch nodelist.$SLURM_JOBID -for node in $(echo $SLURM_NODELIST | scontrol show hostnames); do - echo "host ${node}" >> nodelist.$SLURM_JOBID -done - -# The (SLURM_NTASKS_PER_NODE - 1) is there because Charm++ uses one thread per -# node for communication -# Here, ++np should take the number of nodes (must be the same as --nodes -# in the #SBATCH options above). -WORKER_THREADS_PER_NODE=$((SLURM_NTASKS_PER_NODE - 1)) -WORKER_THREADS=$((SLURM_NPROCS - SLURM_NNODES)) -SPECTRE_COMMAND="${SPECTRE_EXECUTABLE} ++np ${SLURM_NNODES} \ -++p ${WORKER_THREADS} ++ppn ${WORKER_THREADS_PER_NODE} \ -++nodelist nodelist.${SLURM_JOBID}" - - -# When invoking through `charmrun`, charm will initiate remote sessions which -# will wipe out environment settings unless it is forced to re-initialize the -# spectre environment between the start of the remote session and starting the -# spectre executable -echo "#!/bin/sh -source ${SPECTRE_BUILD_DIR}/../support/Environments/ocean_clang.sh -module use ${SPECTRE_MODULE_DIR} -spectre_load_modules -\$@ -" > ${SPECTRE_RUN_DIR}/runscript.${SLURM_JOBID} - -chmod u+x ${SPECTRE_RUN_DIR}/runscript.${SLURM_JOBID} - -checkpoints=0 -current_checkpoint=0000 -if [[ $checkpoints == 0 ]]; then - charmrun ++runscript ${SPECTRE_RUN_DIR}/runscript.${SLURM_JOBID} \ - ${SPECTRE_COMMAND} --input-file ${SPECTRE_INPUT_FILE} - sleep 10s - # If a checkpoint is found add one to checkpoint and sumbit next job - if test -e "${PWD}/Checkpoints/Checkpoint_$current_checkpoint"; then - cp ../OceanClang.sh . - sed -i "s/^checkpoints=0/checkpoints=1/" OceanClang.sh - sbatch OceanClang.sh - fi -# Section to start from checkpoint -elif [[ $checkpoints -gt 0 && $checkpoints -lt 10000 ]]; then - ln -s ${PWD}/../Checkpoints/Checkpoint_$current_checkpoint . - charmrun ${SPECTRE_COMMAND} \ - +restart Checkpoints/Checkpoint_$current_checkpoint \ - --input-file ${SPECTRE_INPUT_FILE} - sleep 10s - # If the next checkpoint was created modify variables and submit next job - printf -v next_checkpoint %04d $checkpoints - if test -e "${PWD}/Checkpoints/Checkpoint_$next_checkpoint"; then - cp ../OceanClang.sh . - next_num_of_checkpoints=$(($checkpoints + 1)) - #Updating variables for the next possible checkpoint - sed -i "s/^checkpoints=$checkpoints/"\ -"checkpoints=$next_num_of_checkpoints/" OceanClang.sh - sed -i "s/^current_checkpoint=$current_checkpoint/"\ -"current_checkpoint=$next_checkpoint/" OceanClang.sh - sbatch OceanClang.sh - fi -fi