diff --git a/python/TestHarness/schedulers/RunHPC.py b/python/TestHarness/schedulers/RunHPC.py index ec0d1baa4844..a925c0ad0a9f 100644 --- a/python/TestHarness/schedulers/RunHPC.py +++ b/python/TestHarness/schedulers/RunHPC.py @@ -482,20 +482,14 @@ def submitJob(self, job, hold, lock=True): # Write the script open(submission_script, 'w').write(script) + # Path to the hpc submit script, which waits for the file to exist + # and then submits it + hpc_submit = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'hpc_submit') + # Submission command. Here we have a simple bash loop # that will try to wait for the file if it doesn't exist yet submission_command = self.getHPCSubmissionCommand() - cmd = [f'cd {tester.getTestDir()}', - f'FILE="{submission_script}"', - 'for i in {1..40}', - 'do if [ -e "$FILE" ]', - f'then {self.getHPCSubmissionCommand()} $FILE', - 'exit $?', - 'else sleep 0.25', - 'fi', - 'done', - 'exit 1'] - cmd = '; '.join(cmd) + cmd = f'{hpc_submit} {submission_command} {submission_script}' # Do the submission; this is thread safe exit_code, result, full_cmd = self.callHPC(self.CallHPCPoolType.submit, cmd, num_retries=5) diff --git a/python/TestHarness/schedulers/hpc_submit b/python/TestHarness/schedulers/hpc_submit new file mode 100755 index 000000000000..1e5848359804 --- /dev/null +++ b/python/TestHarness/schedulers/hpc_submit @@ -0,0 +1,36 @@ +#!/bin/bash +# Helper script for RunHPC that attempts to submit a HPC +# job until success. This is needed in the event that the +# qsub/slurm script that we wrote from the TestHarness +# is not immediately readable due to a networked file +# system on the (possibly) other host we're submitting from. + +SUBMIT_COMMAND="$1" +FILE="$2" +if [ -z "$SUBMIT_COMMAND" ]; then + echo "First argument (submit command) not provided" + exit 1 +fi +if [ -z "$FILE" ]; then + echo "Second argument (file) not provided" + exit 1 +fi +COMMAND=("${SUBMIT_COMMAND}" "${FILE}") +# Try for 10 seconds to find the file +for i in {1..40}; do + if [ -e "$FILE" ]; then + failed= + "${COMMAND[@]}" || failed=1 + if [ -n "$failed" ]; then + sleep 1 + "${COMMAND[@]}" "${FILE}" + exit $? + else + exit 0 + fi + else + sleep 0.25 + fi +done +echo "Failed to find ${FILE}" +exit 1