From e97a46c3551160b5696b13aae672597496d374db Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 9 Jul 2024 13:57:48 -0600 Subject: [PATCH 001/208] Bug fix to support the %H format in METplus via printf. --- ush/bash_utils/eval_METplus_timestr_tmpl.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ush/bash_utils/eval_METplus_timestr_tmpl.sh b/ush/bash_utils/eval_METplus_timestr_tmpl.sh index 245369509b..0f2c4c0716 100644 --- a/ush/bash_utils/eval_METplus_timestr_tmpl.sh +++ b/ush/bash_utils/eval_METplus_timestr_tmpl.sh @@ -163,9 +163,12 @@ cannot be empty: #----------------------------------------------------------------------- # case "${METplus_time_fmt}" in - "%Y%m%d%H"|"%Y%m%d"|"%H%M%S"|"%H") + "%Y%m%d%H"|"%Y%m%d"|"%H%M%S") fmt="${METplus_time_fmt}" ;; + "%H") + fmt="%02.0f" + ;; "%HHH") # # Print format assumes that the argument to printf (i.e. the number to From 815c941f291a764c86e7a0e0c6d2996b2e94ec9a Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 9 Jul 2024 14:42:20 -0600 Subject: [PATCH 002/208] Bug fix to the bug fix! --- ush/bash_utils/eval_METplus_timestr_tmpl.sh | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/ush/bash_utils/eval_METplus_timestr_tmpl.sh b/ush/bash_utils/eval_METplus_timestr_tmpl.sh index 0f2c4c0716..572f7c68c4 100644 --- a/ush/bash_utils/eval_METplus_timestr_tmpl.sh +++ b/ush/bash_utils/eval_METplus_timestr_tmpl.sh @@ -167,7 +167,18 @@ cannot be empty: fmt="${METplus_time_fmt}" ;; "%H") - fmt="%02.0f" +# +# The "%H" format needs to be treated differently depending on if it's +# formatting a "lead" time type or another (e.g. "init" or "vald") because +# for "lead", the printf function is used below (which doesn't understand +# the "%H" format) whereas for the others, the date utility is used (which +# does understand "%H"). +# + if [ "${METplus_time_type}" = "lead" ]; then + fmt="%02.0f" + else + fmt="${METplus_time_fmt}" + fi ;; "%HHH") # From bc8548060558ecc9e3f8b2a8f64f9bc7910ac608 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 11 Jul 2024 12:10:29 -0600 Subject: [PATCH 003/208] Bug fix from Michelle H. for prepbufr files: "On May 22, the name of the tar file where the prepbufr files live changed" --- parm/data_locations.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/parm/data_locations.yml b/parm/data_locations.yml index 7901f4c085..dd3b5ddd17 100644 --- a/parm/data_locations.yml +++ b/parm/data_locations.yml @@ -351,11 +351,13 @@ NDAS_obs: - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} + - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} archive_file_names: - "com2_nam_prod_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" - "gpfs_dell1_nco_ops_com_nam_prod_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" - "com_nam_prod_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" - "com_obsproc_v1.1_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" + - "com_obsproc_v1.2_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" file_names: obs: - "./nam.t{hh}z.prepbufr.tm*.nr" From 81d61b8ea2d233eece6abcbd08086dec393f1ba3 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 11 Jul 2024 12:22:09 -0600 Subject: [PATCH 004/208] Bug fix for removing phantom 00-hour tasks from workflow. Bug found by Michelle Harrold, solution by Michael Kavulich. --- parm/wflow/verify_det.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index e82d7c61e1..3acfa3e836 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -1,6 +1,6 @@ default_task_verify_det: &default_task_verify_det account: '&ACCOUNT;' - attrs: + attrs: &default_attrs cycledefs: forecast maxtries: '1' envars: &default_vars @@ -30,6 +30,7 @@ metatask_GridStat_CCPA_all_accums_all_mems: task_run_MET_GridStat_vx_APCP#ACCUM_HH#h_mem#mem#: <<: *default_task_verify_det attrs: + <<: *default_attrs maxtries: '2' command: '&LOAD_MODULES_RUN_TASK_FP; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX"' envars: @@ -62,6 +63,7 @@ metatask_GridStat_NOHRSC_all_accums_all_mems: task_run_MET_GridStat_vx_ASNOW#ACCUM_HH#h_mem#mem#: <<: *default_task_verify_det attrs: + <<: *default_attrs maxtries: '2' command: '&LOAD_MODULES_RUN_TASK_FP; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX"' envars: From 35530abd74948fc50d1e6ebc25a25a8a7cd2f8c2 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 16 Jul 2024 11:01:43 -0600 Subject: [PATCH 005/208] Bug fix: Append cycle date to names of deterministic GridStat and PointStat tasks' METplus log files. --- scripts/exregional_run_met_gridstat_or_pointstat_vx.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 4f871e6e1b..91c5a7896b 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -277,7 +277,7 @@ fi # metplus_config_tmpl_bn="GridStat_or_PointStat" metplus_config_bn="${MetplusToolName}_${FIELDNAME_IN_MET_FILEDIR_NAMES}_${ensmem_name}" -metplus_log_bn="${metplus_config_bn}" +metplus_log_bn="${metplus_config_bn}_$CDATE" # # Add prefixes and suffixes (extensions) to the base file names. # From 6c548ceeb17f60d7fa11417ae8ef7451a5269321 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 16 Jul 2024 14:05:06 -0600 Subject: [PATCH 006/208] Version of ex-script for pulling obs that works for multiple overlapping cycles for CCPA and MRMS but not yet for NDAS or NOHRSC. --- scripts/exregional_get_verif_obs.sh | 295 +++++++++++++++++++--------- 1 file changed, 199 insertions(+), 96 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index a74f11cd3a..564860899f 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -123,6 +123,11 @@ imm=$(echo ${PDY} | cut -c5-6) idd=$(echo ${PDY} | cut -c7-8) ihh=${cyc} +echo +echo "HELLO GGGGGGGG" +iyyyymmddhh=${PDY}${cyc} +echo "iyyyymmddhh = ${iyyyymmddhh}" + # Unix date utility needs dates in yyyy-mm-dd hh:mm:ss format unix_init_DATE="${iyyyy}-${imm}-${idd} ${ihh}:00:00" @@ -144,126 +149,184 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do vdate_p1=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 day" +%Y%m%d%H) vyyyymmdd_p1=$(echo ${vdate_p1} | cut -c1-8) +echo +echo "HELLO HHHHHHHH" +echo "vyyyymmdd = ${vyyyymmdd}" +echo "vyyyymmdd_p1 = ${vyyyymmdd_p1}" +echo "ihh = ${ihh}" +#exit + #remove leading zero again, this time keep original vhh_noZero=$((10#${vhh})) - - # Retrieve CCPA observations +# +#----------------------------------------------------------------------- +# +# Retrieve CCPA observations. +# +#----------------------------------------------------------------------- +# if [[ ${OBTYPE} == "CCPA" ]]; then - #CCPA is accumulation observations, so none to retrieve for hour zero + # CCPA is accumulation observations, so for hour 0 there are no files + # to retrieve. if [[ ${current_fcst} -eq 0 ]]; then current_fcst=$((${current_fcst} + 1)) continue fi - # Staging location for raw CCPA data from HPSS - ccpa_raw=${OBS_DIR}/raw + # Accumulation is for accumulation of CCPA data to pull (hardcoded to + # 01h, see note above). + accum=01 - # Reorganized CCPA location + # Directory in which the daily subdirectories containing the CCPA grib2 + # files will appear after this script is done. Make sure this exists. ccpa_proc=${OBS_DIR} + if [[ ! -d "${ccpa_proc}/${vyyyymmdd}" ]]; then + mkdir -p ${ccpa_proc}/${vyyyymmdd} + fi - # Accumulation is for accumulation of CCPA data to pull (hardcoded to 01h, see note above.) - accum=01 + # File name within the HPSS archive file. Note that this only includes + # the valid hour in its name; the year, month, and day are specified in + # the name of the directory in which it is located within the archive. + ccpa_fn="ccpa.t${vhh}z.${accum}h.hrap.conus.gb2" + + # Full path to final location of the CCPA grib2 file for the current valid + # time. Note that this path includes the valid date (year, month, and day) + # information in the name of a subdirectory and the valid hour-of-day in + # the name of the file. + ccpa_fp_proc="${ccpa_proc}/${vyyyymmdd}/${ccpa_fn}" + + # Temporary staging directory for raw CCPA files from HPSS. These "raw" + # directories are temporary directories in which archive files from HPSS + # are placed and files within those archives extracted. Note that the + # name of this subdirectory is cycle-specific to avoid other get_obs_ccpa + # workflow tasks (i.e. those corresponding to cycles other than the current + # one) writing into the same directory. + ccpa_raw="${ccpa_proc}/raw_${iyyyymmddhh}" # Check if file exists on disk; if not, pull it. - ccpa_file="$ccpa_proc/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2" - if [[ -f "${ccpa_file}" ]]; then + if [[ -f "${ccpa_fp_proc}" ]]; then + echo "${OBTYPE} file exists on disk:" - echo "${ccpa_file}" + echo " ccpa_fp_proc = \"${ccpa_fp_proc}\"" + echo "Will NOT attempt to retrieve from remote locations." + else - echo "${OBTYPE} file does not exist on disk:" - echo "${ccpa_file}" - echo "Will attempt to retrieve from remote locations" - # Create necessary raw and prop directories - if [[ ! -d "$ccpa_raw/${vyyyymmdd}" ]]; then - mkdir -p $ccpa_raw/${vyyyymmdd} - fi - if [[ ! -d "$ccpa_raw/${vyyyymmdd_p1}" ]]; then - mkdir -p $ccpa_raw/${vyyyymmdd_p1} + echo "${OBTYPE} file does not exist on disk:" + echo " ccpa_fp_proc = \"${ccpa_fp_proc}\"" + echo "Will attempt to retrieve from remote locations." + + # Create the necessary raw (sub)directories on disk. Note that we need + # to create a subdirectory for 1 day + the current valid date because + # that is needed to get around a metadata error in the CCPA files on HPSS + # (in particular, one hour CCPA files have incorrect metadata in the files + # under the "00" directory from 20180718 to 20210504). + if [[ ! -d "${ccpa_raw}/${vyyyymmdd}" ]]; then + mkdir -p ${ccpa_raw}/${vyyyymmdd} fi - if [[ ! -d "$ccpa_proc/${vyyyymmdd}" ]]; then - mkdir -p $ccpa_proc/${vyyyymmdd} + if [[ ! -d "${ccpa_raw}/${vyyyymmdd_p1}" ]]; then + mkdir -p ${ccpa_raw}/${vyyyymmdd_p1} fi - # Check if valid hour is 00 + + valid_time=${vyyyymmdd}${vhh} + output_path="${ccpa_raw}/${vyyyymmdd}" if [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then - # Pull CCPA data from HPSS - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd_p1}${vhh} \ - --data_stores hpss \ - --data_type CCPA_obs \ - --output_path $ccpa_raw/${vyyyymmdd_p1} \ - --summary_file ${logfile}" + valid_time=${vyyyymmdd_p1}${vhh} + output_path="${ccpa_raw}/${vyyyymmdd_p1}" + fi - echo "CALLING: ${cmd}" - $cmd || print_err_msg_exit "\ - Could not retrieve CCPA data from HPSS + # The retrieve_data.py script below uses the current working directory as + # the location into which to extract the contents of the HPSS archive (tar) + # file. Thus, if there are multiple get_obs_ccpa tasks running (i.e. ones + # for different cycles), they will be extracting files into the same (current) + # directory. That causes errors in the workflow. To avoid this, change + # location to the raw directory. This will avoid such errors because the + # raw directory has a cycle-specific name. + cd ${ccpa_raw} + + # Pull CCPA data from HPSS. This will get a single grib2 (.gb2) file + # corresponding to the current valid time (valid_time). + cmd=" + python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${valid_time} \ + --data_stores hpss \ + --data_type CCPA_obs \ + --output_path ${output_path} \ + --summary_file ${logfile}" - The following command exited with a non-zero exit status: - ${cmd} + echo "CALLING: ${cmd}" + $cmd || print_err_msg_exit "\ + Could not retrieve CCPA data from HPSS. + + The following command exited with a non-zero exit status: + ${cmd} " - else - # Pull CCPA data from HPSS - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd}${vhh} \ - --data_stores hpss \ - --data_type CCPA_obs \ - --output_path $ccpa_raw/${vyyyymmdd} \ - --summary_file ${logfile}" + # Move CCPA file to its final location. + # + # Since this script is part of a workflow, other tasks (for other cycles) + # that call this script may have extracted and placed the current file + # in its final location between the time we checked for its existence + # above above (and didn't find it) and now. This can happen because + # there can be overlap between the verification times for the current + # cycle and those of other cycles. For this reason, check again for the + # existence of the file in its final location. If it's already been + # created by another task, don't bother to move it from its raw location + # to its final location. + if [[ -f "${ccpa_fp_proc}" ]]; then - echo "CALLING: ${cmd}" - $cmd || print_err_msg_exit "\ - Could not retrieve CCPA data from HPSS + echo "${OBTYPE} file exists on disk:" + echo " ccpa_fp_proc = \"{ccpa_fp_proc}\"" + echo "It was likely created by a get_obs_ccpa workflow task for another cycle." + echo "NOT moving file from its temporary (raw) location to its final location." - The following command exited with a non-zero exit status: - ${cmd} -" - fi + else - # One hour CCPA files have incorrect metadata in the files under the "00" directory from 20180718 to 20210504. - # After data is pulled, reorganize into correct valid yyyymmdd structure. - if [[ ${vhh_noZero} -ge 1 && ${vhh_noZero} -le 6 ]]; then - cp $ccpa_raw/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 $ccpa_proc/${vyyyymmdd} - elif [[ ${vhh_noZero} -ge 7 && ${vhh_noZero} -le 12 ]]; then - cp $ccpa_raw/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 $ccpa_proc/${vyyyymmdd} - elif [[ ${vhh_noZero} -ge 13 && ${vhh_noZero} -le 18 ]]; then - cp $ccpa_raw/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 $ccpa_proc/${vyyyymmdd} - elif [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then - if [[ ${vyyyymmdd} -ge 20180718 && ${vyyyymmdd} -le 20210504 ]]; then - wgrib2 $ccpa_raw/${vyyyymmdd_p1}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 -set_date -24hr -grib $ccpa_proc/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 -s - else - cp $ccpa_raw/${vyyyymmdd_p1}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 $ccpa_proc/${vyyyymmdd} - fi - elif [[ ${vhh_noZero} -eq 0 ]]; then - # One hour CCPA files on HPSS have incorrect metadata in the files under the "00" directory from 20180718 to 20210504. - if [[ ${vyyyymmdd} -ge 20180718 && ${vyyyymmdd} -le 20210504 ]]; then - wgrib2 $ccpa_raw/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 -set_date -24hr -grib $ccpa_proc/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 -s - else - cp $ccpa_raw/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 $ccpa_proc/${vyyyymmdd} + # Full path to the CCPA file that was pulled and extracted above and + # placed in the raw directory. + ccpa_fp_raw="${output_path}/${ccpa_fn}" + + # One hour CCPA files have incorrect metadata in the files under the "00" + # directory from 20180718 to 20210504. After data is pulled, reorganize + # into correct valid yyyymmdd structure. + if [[ ${vhh_noZero} -ge 1 && ${vhh_noZero} -le 18 ]]; then + mv ${ccpa_fp_raw} ${ccpa_fp_proc} + elif [[ (${vhh_noZero} -eq 0) || (${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23) ]]; then + if [[ ${vyyyymmdd} -ge 20180718 && ${vyyyymmdd} -le 20210504 ]]; then + wgrib2 ${ccpa_fp_raw} -set_date -24hr -grib ${ccpa_fp_proc} -s + else + mv ${ccpa_fp_raw} ${ccpa_fp_proc} + fi fi + fi fi - # Retrieve MRMS observations +# +#----------------------------------------------------------------------- +# +# Retrieve MRMS observations. +# +#----------------------------------------------------------------------- +# elif [[ ${OBTYPE} == "MRMS" ]]; then + # Top-level MRMS directory - # raw MRMS data from HPSS - mrms_raw=${OBS_DIR}/raw # Reorganized MRMS location mrms_proc=${OBS_DIR} + # raw MRMS data from HPSS + #mrms_raw=${OBS_DIR}/raw + mrms_raw="${mrms_proc}/raw_${iyyyymmddhh}" + # For each field (REFC and RETOP), check if file exists on disk; if not, pull it. for field in ${VAR[@]}; do + if [ "${field}" = "REFC" ]; then field_base_name="MergedReflectivityQCComposite" level="_00.50_" @@ -279,32 +342,53 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do " fi - mrms_file="$mrms_proc/${vyyyymmdd}/${field_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" + mrms_fn="${field_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" + mrms_day_dir="${mrms_proc}/${vyyyymmdd}" + mrms_fp="${mrms_proc}/${vyyyymmdd}/${mrms_fn}" + +# if [[ -f "${mrms_fp}" ]]; then +# +# echo "${OBTYPE} file for field \"${field}\" exists on disk:" +# echo " mrms_fp = \"${mrms_fp}\"" +# echo "Will NOT attempt to retrieve from remote locations." + + if [[ -d "${mrms_day_dir}" ]]; then + + echo "${OBTYPE} directory for field \"${field}\" and day ${vyyyymmdd} exists on disk:" + echo " mrms_day_dir = \"${mrms_day_dir}\"" + echo "This means observation files for this field and all hours of this day have been or are being retrieved." + echo "Will NOT attempt to retrieve the current file" + echo " mrms_fp = \"${mrms_fp}\"" + echo "from remote locations." - if [[ -f "${mrms_file}" ]]; then - echo "${OBTYPE} file exists on disk for field ${field}:\n${mrms_file}" else - echo "${OBTYPE} file does not exist on disk for field ${field}:\n${mrms_file}" - echo "Will attempt to retrieve from remote locations" + + echo "${OBTYPE} file for field \"${field}\" does not exist on disk:" + echo " mrms_fp = \"${mrms_fp}\"" + echo "Will attempt to retrieve from remote locations." + # Create directories if necessary - if [[ ! -d "$mrms_raw/${vyyyymmdd}" ]]; then - mkdir -p $mrms_raw/${vyyyymmdd} + if [[ ! -d "${mrms_raw}/${vyyyymmdd}" ]]; then + mkdir -p ${mrms_raw}/${vyyyymmdd} fi if [[ ! -d "$mrms_proc/${vyyyymmdd}" ]]; then mkdir -p $mrms_proc/${vyyyymmdd} fi + valid_time=${vyyyymmdd}${vhh} + output_path="${mrms_raw}/${vyyyymmdd}" + cd ${mrms_raw} # Pull MRMS data from HPSS cmd=" python3 -u ${USHdir}/retrieve_data.py \ --debug \ --file_set obs \ --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd}${vhh} \ + --cycle_date ${valid_time} \ --data_stores hpss \ --data_type MRMS_obs \ - --output_path $mrms_raw/${vyyyymmdd} \ + --output_path ${output_path} \ --summary_file ${logfile}" echo "CALLING: ${cmd}" @@ -326,8 +410,13 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do fi done - - # Retrieve NDAS observations +# +#----------------------------------------------------------------------- +# +# Retrieve NDAS observations. +# +#----------------------------------------------------------------------- +# elif [[ ${OBTYPE} == "NDAS" ]]; then # raw NDAS data from HPSS ndas_raw=${OBS_DIR}/raw @@ -363,9 +452,17 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do continue fi +echo "" +echo "HELLO AAAAA" +echo "vhh_noZero = ${vhh_noZero}" + if [[ ${vhh_noZero} -eq 0 || ${vhh_noZero} -eq 6 || ${vhh_noZero} -eq 12 || ${vhh_noZero} -eq 18 ]]; then +echo "" +echo "HELLO BBBBB" if [[ ! -d "$ndas_raw/${vyyyymmdd}${vhh}" ]]; then +echo "" +echo "HELLO CCCCC" mkdir -p $ndas_raw/${vyyyymmdd}${vhh} fi @@ -459,8 +556,13 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do fi fi - - # Retrieve NOHRSC observations +# +#----------------------------------------------------------------------- +# +# Retrieve NOHRSC observations. +# +#----------------------------------------------------------------------- +# elif [[ ${OBTYPE} == "NOHRSC" ]]; then #NOHRSC is accumulation observations, so none to retrieve for hour zero @@ -534,7 +636,8 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do Invalid OBTYPE specified for script; valid options are CCPA, MRMS, NDAS, and NOHRSC " fi # Increment to next forecast hour - # Increment to next forecast hour + + # Increment to next forecast hour echo "Finished fcst hr=${current_fcst}" current_fcst=$((${current_fcst} + 1)) @@ -542,7 +645,7 @@ done # Clean up raw, unprocessed observation files -rm -rf ${OBS_DIR}/raw +#rm -rf ${OBS_DIR}/raw # #----------------------------------------------------------------------- From 307f92ee1f998f303d93859ebd0b26bc63db1385 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 17 Jul 2024 09:53:55 -0600 Subject: [PATCH 007/208] Changes to make get_obs_mrms tasks to work for mulitple cycles and without performing unnecessary repeated pulls. --- scripts/exregional_get_verif_obs.sh | 87 ++++++++++++++--------------- 1 file changed, 42 insertions(+), 45 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 564860899f..c86ba6796a 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -89,7 +89,7 @@ set -x # hh (00 through 05). If using custom staged data, you will have to # rename the files accordingly. # -# If data is retrieved from HPSS, it will automatically staged by this +# If data is retrieved from HPSS, it will be automatically staged by this # this script. # # @@ -293,13 +293,15 @@ echo "ihh = ${ihh}" # One hour CCPA files have incorrect metadata in the files under the "00" # directory from 20180718 to 20210504. After data is pulled, reorganize # into correct valid yyyymmdd structure. + #mv_or_cp="mv" + mv_or_cp="cp" if [[ ${vhh_noZero} -ge 1 && ${vhh_noZero} -le 18 ]]; then - mv ${ccpa_fp_raw} ${ccpa_fp_proc} + ${mv_or_cp} ${ccpa_fp_raw} ${ccpa_fp_proc} elif [[ (${vhh_noZero} -eq 0) || (${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23) ]]; then if [[ ${vyyyymmdd} -ge 20180718 && ${vyyyymmdd} -le 20210504 ]]; then wgrib2 ${ccpa_fp_raw} -set_date -24hr -grib ${ccpa_fp_proc} -s else - mv ${ccpa_fp_raw} ${ccpa_fp_proc} + ${mv_or_cp} ${ccpa_fp_raw} ${ccpa_fp_proc} fi fi @@ -320,59 +322,53 @@ echo "ihh = ${ihh}" # Reorganized MRMS location mrms_proc=${OBS_DIR} - # raw MRMS data from HPSS - #mrms_raw=${OBS_DIR}/raw - mrms_raw="${mrms_proc}/raw_${iyyyymmddhh}" - - # For each field (REFC and RETOP), check if file exists on disk; if not, pull it. - for field in ${VAR[@]}; do + mrms_day_dir="${mrms_proc}/${vyyyymmdd}" - if [ "${field}" = "REFC" ]; then - field_base_name="MergedReflectivityQCComposite" - level="_00.50_" - elif [ "${field}" = "RETOP" ]; then - field_base_name="EchoTop" - level="_18_00.50_" - else - echo "Invalid field: ${field}" - print_err_msg_exit "\ - Invalid field specified: ${field} + if [[ -d "${mrms_day_dir}" ]]; then - Valid options are 'REFC', 'RETOP'. -" - fi + echo "${OBTYPE} directory for day ${vyyyymmdd} exists on disk:" + echo " mrms_day_dir = \"${mrms_day_dir}\"" + echo "This means observation files for this field and all hours of this day have been or are being retrieved." + echo "Thus, we will NOT attempt to retrieve the current data from remote locations" - mrms_fn="${field_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" - mrms_day_dir="${mrms_proc}/${vyyyymmdd}" - mrms_fp="${mrms_proc}/${vyyyymmdd}/${mrms_fn}" - -# if [[ -f "${mrms_fp}" ]]; then -# -# echo "${OBTYPE} file for field \"${field}\" exists on disk:" -# echo " mrms_fp = \"${mrms_fp}\"" -# echo "Will NOT attempt to retrieve from remote locations." - - if [[ -d "${mrms_day_dir}" ]]; then + else - echo "${OBTYPE} directory for field \"${field}\" and day ${vyyyymmdd} exists on disk:" - echo " mrms_day_dir = \"${mrms_day_dir}\"" - echo "This means observation files for this field and all hours of this day have been or are being retrieved." - echo "Will NOT attempt to retrieve the current file" - echo " mrms_fp = \"${mrms_fp}\"" - echo "from remote locations." + # For each field (REFC and RETOP), check if file exists on disk; if not, pull it. + for field in ${VAR[@]}; do + + # raw MRMS data from HPSS + #mrms_raw=${OBS_DIR}/raw + #mrms_raw="${mrms_proc}/raw_${field}_${iyyyymmddhh}" + mrms_raw="${mrms_proc}/raw_${iyyyymmddhh}" + + if [ "${field}" = "REFC" ]; then + field_base_name="MergedReflectivityQCComposite" + level="_00.50_" + elif [ "${field}" = "RETOP" ]; then + field_base_name="EchoTop" + level="_18_00.50_" + else + echo "Invalid field: ${field}" + print_err_msg_exit "\ + Invalid field specified: ${field} + + Valid options are 'REFC', 'RETOP'. +" + fi - else + mrms_fn="${field_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" + mrms_fp="${mrms_proc}/${vyyyymmdd}/${mrms_fn}" echo "${OBTYPE} file for field \"${field}\" does not exist on disk:" echo " mrms_fp = \"${mrms_fp}\"" echo "Will attempt to retrieve from remote locations." - # Create directories if necessary + # Create directories if necessary. if [[ ! -d "${mrms_raw}/${vyyyymmdd}" ]]; then mkdir -p ${mrms_raw}/${vyyyymmdd} fi - if [[ ! -d "$mrms_proc/${vyyyymmdd}" ]]; then - mkdir -p $mrms_proc/${vyyyymmdd} + if [[ ! -d "${mrms_proc}/${vyyyymmdd}" ]]; then + mkdir -p ${mrms_proc}/${vyyyymmdd} fi valid_time=${vyyyymmdd}${vhh} @@ -408,8 +404,9 @@ echo "ihh = ${ihh}" hour=$((${hour} + 1)) # hourly increment done - fi - done + done + + fi # #----------------------------------------------------------------------- # From be542168f738cb7f3b93594bd62413ca30d4428b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 17 Jul 2024 23:20:49 -0600 Subject: [PATCH 008/208] Minor improvement for consistency. --- parm/wflow/verify_pre.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index b7511bf63f..da43336a0d 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -64,12 +64,12 @@ task_get_obs_mrms: task_get_obs_ndas: <<: *default_task_verify_pre + command: '&LOAD_MODULES_RUN_TASK_FP; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' - command: '&LOAD_MODULES_RUN_TASK_FP; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' From af2ab4c531aa5ca80a513f8fd164485862217b68 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 19 Jul 2024 01:39:37 -0600 Subject: [PATCH 009/208] New version of CCPA obs fetching (rename variables, include lots more comments). --- scripts/exregional_get_verif_obs.sh | 264 ++++++++++++++++++++-------- 1 file changed, 193 insertions(+), 71 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index c86ba6796a..957770e5f1 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -42,8 +42,8 @@ set -x # # {CCPA_OBS_DIR}/{YYYYMMDD}/ccpa.t{HH}z.01h.hrap.conus.gb2 # -# If data is retrieved from HPSS, it will automatically staged by this -# this script. +# If data is retrieved from HPSS, it will be automatically staged by this +# script. # # Notes about the data and how it's used for verification: # @@ -53,7 +53,7 @@ set -x # 2. There is a problem with the valid time in the metadata for files # valid from 19 - 00 UTC (or files under the '00' directory). This is # accounted for in this script for data retrieved from HPSS, but if you -# have manually staged data on disk you should be sure this is accouned +# have manually staged data on disk you should be sure this is accounted # for. See in-line comments below for details. # # @@ -167,44 +167,45 @@ echo "ihh = ${ihh}" # if [[ ${OBTYPE} == "CCPA" ]]; then - # CCPA is accumulation observations, so for hour 0 there are no files - # to retrieve. + # CCPA is accumulation observations. We do not need to retrieve any + # observed accumulations at forecast hour 0 because there aren't yet + # any accumulations in the forecast(s) to compare it to. if [[ ${current_fcst} -eq 0 ]]; then current_fcst=$((${current_fcst} + 1)) continue fi - # Accumulation is for accumulation of CCPA data to pull (hardcoded to - # 01h, see note above). + # CCPA accumulation period to consider. Here, we only retrieve data for + # 01h accumulations (see note above). Other accumulations (03h, 06h, 24h) + # are obtained elsewhere in the workflow by adding up these 01h accumulations. accum=01 - # Directory in which the daily subdirectories containing the CCPA grib2 - # files will appear after this script is done. Make sure this exists. - ccpa_proc=${OBS_DIR} - if [[ ! -d "${ccpa_proc}/${vyyyymmdd}" ]]; then - mkdir -p ${ccpa_proc}/${vyyyymmdd} - fi - - # File name within the HPSS archive file. Note that this only includes - # the valid hour in its name; the year, month, and day are specified in - # the name of the directory in which it is located within the archive. + # Base directory in which the daily subdirectories containing the CCPA + # grib2 files will appear after this script is done, and the daily such + # subdirectory for the current valid time (year, month, and day). We + # refer to these as the "processed" base and daily subdirectories because + # they contain the final files after all processing by this script is + # complete. + ccpa_basedir_proc=${OBS_DIR} + ccpa_day_dir_proc="${ccpa_basedir_proc}/${vyyyymmdd}" + # Make sure these directories exist. + mkdir -p ${ccpa_day_dir_proc} + + # Name of the grib2 file to extract from the archive (tar) file. Note + # that this only contains the valid hour; the valid year, month, and day + # are specified in the name of the directory within the archive in which + # the file is located. ccpa_fn="ccpa.t${vhh}z.${accum}h.hrap.conus.gb2" - # Full path to final location of the CCPA grib2 file for the current valid - # time. Note that this path includes the valid date (year, month, and day) - # information in the name of a subdirectory and the valid hour-of-day in - # the name of the file. - ccpa_fp_proc="${ccpa_proc}/${vyyyymmdd}/${ccpa_fn}" - - # Temporary staging directory for raw CCPA files from HPSS. These "raw" - # directories are temporary directories in which archive files from HPSS - # are placed and files within those archives extracted. Note that the - # name of this subdirectory is cycle-specific to avoid other get_obs_ccpa - # workflow tasks (i.e. those corresponding to cycles other than the current - # one) writing into the same directory. - ccpa_raw="${ccpa_proc}/raw_${iyyyymmddhh}" - - # Check if file exists on disk; if not, pull it. + # Full path to the location of the processed CCPA grib2 file for the + # current valid time. Note that this path includes the valid date (year, + # month, and day) information in the name of a subdirectory and the valid + # hour-of-day in the name of the file. + ccpa_fp_proc="${ccpa_day_dir_proc}/${ccpa_fn}" + + # Check if the CCPA grib2 file for the current valid time already exists + # at its procedded location on disk. If so, skip and go to the next valid + # time. If not, pull it. if [[ -f "${ccpa_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" @@ -216,34 +217,152 @@ echo "ihh = ${ihh}" echo "${OBTYPE} file does not exist on disk:" echo " ccpa_fp_proc = \"${ccpa_fp_proc}\"" echo "Will attempt to retrieve from remote locations." + # + #----------------------------------------------------------------------- + # + # Below, we will use the retrieve_data.py script to retrieve the CCPA + # grib2 file from a data store (e.g. HPSS). Before doing so, note the + # following: + # + # * The daily archive (tar) file containing CCPA obs has a name of the + # form + # + # [PREFIX].YYYYMMDD.tar + # + # where YYYYMMDD is a given year, month, and day combination, and + # [PREFIX] is a string that is not relevant to the discussion here + # (the value it can take on depends on which of several time periods + # YYYYMMDD falls in, and the retrieve_data.py tries various values + # until it finds one for which a tar file exists). Unintuitively, this + # archive file contains accumulation data for valid times starting at + # hour 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current + # day (YYYYMMDD). In other words, the valid times of the contents of + # this archive file are shifted back by 6 hours relative to the time + # string appearing in the name of the file. See section "DETAILS..." + # for a detailed description of the directory structure in the CCPA + # archive files. + # + # * We call retrieve_data.py in a temporary cycle-specific subdirectory + # in order to prevent get_obs_ccpa tasks for different cycles from + # clobbering each other's output. We refer to this as the "raw" CCPA + # base directory because it contains files as they are found in the + # archives before any processing by this script. + # + # * In each (cycle-specific) raw base directory, the data is arranged in + # daily subdirectories with the same timing as in the archive (tar) + # files (which are described in the section "DETAILS..." below). In + # particular, each daily subdirectory has the form YYYYMDD, and it may + # contain CCPA grib2 files for accumulations valid at hour 19 of the + # previous day (YYYYMM[DD-1]) to hour 18 of the current day (YYYYMMDD). + # (Data valid at hours 19-23 of the current day (YYYYMMDD) go into the + # daily subdirectory for the next day, i.e. YYYYMM[DD+1].) We refer + # to these as raw daily (sub)directories to distinguish them from the + # processed daily subdirectories under the processed (final) CCPA base + # directory (ccpa_basedir_proc). + # + # * For a given cycle, some of the valid times at which there is forecast + # output may not have a corresponding file under the raw base directory + # for that cycle. This is because another cycle that overlaps this cycle + # has already obtained the grib2 CCPA file for that valid time and placed + # it in its processed location; as a result, the retrieveal of that grib2 + # file for this cycle is skipped. + # + # * To obtain a more intuitive temporal arrangement of the data in the + # processed CCPA directory structure than the temporal arrangement used + # in the archives and raw directories, we process the raw files such + # that the data in the processed directory structure is shifted forward + # in time 6 hours relative to the data in the archives and raw directories. + # This results in a processed base directory that, like the raw base + # directory, also contains daily subdirectories of the form YYYYMMDD, + # but each such subdirectory may only contain CCPA data at valid hours + # within that day, i.e. at valid times YYYYMMDD[00, 01, ..., 23] (but + # may not contain data that is valid on the previous, next, or any other + # day). + # + # * For data between 20180718 and 20210504, the 01h accumulation data + # (which is the only accumulation we are retrieving) have incorrect + # metadata under the "00" directory in the archive files (meaning for + # hour 00 and hours 19-23, which are the ones in the "00" directory). + # Below, we use wgrib2 to make a correction for this when transferring + # (moving or copying) grib2 files from the raw daily directories to + # the processed daily directories. + # + # + # DETAILS OF DIRECTORY STRUCTURE IN CCPA ARCHIVE (TAR) FILES + # ---------------------------------------------------------- + # + # The daily archive file containing CCPA obs is named + # + # [PREFIX].YYYYMMDD.tar + # + # This file contains accumulation data for valid times starting at hour + # 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current day + # (YYYYMMDD). In particular, when untarred, the daily archive file + # expands into four subdirectories: 00, 06, 12, and 18. The 06, 12, and + # 18 subdirectories contain grib2 files for accumulations valid at or + # below the hour-of-day given by the subdirectory name (and on YYYYMMDD). + # For example, the 06 directory contains data valid at: + # + # * YYYYMMDD[01, 02, 03, 04, 05, 06] for 01h accumulations; + # * YYYYMMDD[03, 06] for 03h accumulations; + # * YYYYMMDD[06] for 06h accumulations. + # + # The valid times for the data in the 12 and 18 subdirectories are + # analogous. However, the 00 subdirectory is different in that it + # contains accumulations at hour 00 on YYYYMMDD as well as ones BEFORE + # this time, i.e. the data for valid times other than YYYYMMDD00 are on + # the PREVIOUS day. Thus, the 00 subdirectory contains data valid at + # (note the DD-1, meaning one day prior): + # + # * YYYYMM[DD-1][19, 20, 21, 22, 23] and YYYYMMDD00 for 01h accumulations; + # * YYYYMM[DD-1][19] and YYYYMMDD00 for 03h accumulations; + # * YYYYMMDD00 for 06h accumulations. + # + #----------------------------------------------------------------------- + # - # Create the necessary raw (sub)directories on disk. Note that we need - # to create a subdirectory for 1 day + the current valid date because - # that is needed to get around a metadata error in the CCPA files on HPSS - # (in particular, one hour CCPA files have incorrect metadata in the files - # under the "00" directory from 20180718 to 20210504). - if [[ ! -d "${ccpa_raw}/${vyyyymmdd}" ]]; then - mkdir -p ${ccpa_raw}/${vyyyymmdd} - fi - if [[ ! -d "${ccpa_raw}/${vyyyymmdd_p1}" ]]; then - mkdir -p ${ccpa_raw}/${vyyyymmdd_p1} - fi - - valid_time=${vyyyymmdd}${vhh} - output_path="${ccpa_raw}/${vyyyymmdd}" - if [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then + # Set parameters for retrieving CCPA data using retrieve_data.py. + # Definitions: + # + # valid_time: + # The valid time in the name of the archive (tar) file from which data + # will be pulled. Due to the way the data is arranged in the CCPA archive + # files (as described above), for valid hours 19 to 23 of the current day, + # this must be set to the corresponding valid time on the NEXT day. + # + # ccpa_basedir_raw: + # Raw base directory that will contain the raw daily subdirectory in which + # the retrieved CCPA grib2 file will be placed. Note that this must be + # cycle-dependent (where the cycle is given by the variable iyyyymmddhh) + # to avoid get_obs_ccpa workflow tasks for other cycles writing to the + # same directories/files. Note also that this doesn't have to depend on + # the current valid hour (0-18 vs. 19-23), but for clarity and ease of + # debugging, here we do make it valid-hour-dependent. + # + # ccpa_day_dir_raw: + # Raw daily subdirectory under the raw base directory. This is dependent + # on the valid hour (i.e. different for hours 19-23 than for hours 0-18) + # in order to maintain the same data timing arrangement in the raw daily + # directories as in the archive files. + # + if [[ ${vhh_noZero} -ge 0 && ${vhh_noZero} -le 18 ]]; then + valid_time=${vyyyymmdd}${vhh} + ccpa_basedir_raw="${ccpa_basedir_proc}/raw_${iyyyymmddhh}" + ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd}" + elif [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then valid_time=${vyyyymmdd_p1}${vhh} - output_path="${ccpa_raw}/${vyyyymmdd_p1}" + ccpa_basedir_raw="${ccpa_basedir_proc}/raw_${iyyyymmddhh}_vhh19-23" + ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd_p1}" fi + mkdir -p ${ccpa_day_dir_raw} - # The retrieve_data.py script below uses the current working directory as - # the location into which to extract the contents of the HPSS archive (tar) - # file. Thus, if there are multiple get_obs_ccpa tasks running (i.e. ones - # for different cycles), they will be extracting files into the same (current) - # directory. That causes errors in the workflow. To avoid this, change - # location to the raw directory. This will avoid such errors because the - # raw directory has a cycle-specific name. - cd ${ccpa_raw} + # Before calling retrieve_data.py, change location to the raw base + # directory to avoid get_obs_ccpa tasks for other cycles from clobbering + # the output from this call to retrieve_data.py. Note that retrieve_data.py + # extracts the CCPA tar files into the directory it was called from, + # which is the working directory of this script right before retrieve_data.py + # is called. + cd ${ccpa_basedir_raw} # Pull CCPA data from HPSS. This will get a single grib2 (.gb2) file # corresponding to the current valid time (valid_time). @@ -255,7 +374,7 @@ echo "ihh = ${ihh}" --cycle_date ${valid_time} \ --data_stores hpss \ --data_type CCPA_obs \ - --output_path ${output_path} \ + --output_path ${ccpa_day_dir_raw} \ --summary_file ${logfile}" echo "CALLING: ${cmd}" @@ -266,38 +385,41 @@ echo "ihh = ${ihh}" ${cmd} " - # Move CCPA file to its final location. + # Create the processed CCPA grib2 files. This usually consists of just + # moving or copying the raw file to its processed location, but for valid + # times between 20180718 and 20210504, it involves using wgrib2 to correct + # an error in the metadata of the raw file and writing the corrected data + # to a new grib2 file in the processed location. # # Since this script is part of a workflow, other tasks (for other cycles) # that call this script may have extracted and placed the current file - # in its final location between the time we checked for its existence - # above above (and didn't find it) and now. This can happen because - # there can be overlap between the verification times for the current - # cycle and those of other cycles. For this reason, check again for the - # existence of the file in its final location. If it's already been - # created by another task, don't bother to move it from its raw location - # to its final location. + # in its processed location between the time we checked for its existence + # above (and didn't find it) and now. This can happen because there can + # be overlap between the verification times for the current cycle and + # those of other cycles. For this reason, check again for the existence + # of the file in its processed location. If it has already been created + # by another task, don't bother to create it. if [[ -f "${ccpa_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" echo " ccpa_fp_proc = \"{ccpa_fp_proc}\"" - echo "It was likely created by a get_obs_ccpa workflow task for another cycle." - echo "NOT moving file from its temporary (raw) location to its final location." + echo "It was likely created by a get_obs_ccpa workflow task for another cycle that overlaps the current one." + echo "NOT moving or copying file from its raw location to its processed location." else # Full path to the CCPA file that was pulled and extracted above and # placed in the raw directory. - ccpa_fp_raw="${output_path}/${ccpa_fn}" + ccpa_fp_raw="${ccpa_day_dir_raw}/${ccpa_fn}" - # One hour CCPA files have incorrect metadata in the files under the "00" - # directory from 20180718 to 20210504. After data is pulled, reorganize - # into correct valid yyyymmdd structure. #mv_or_cp="mv" mv_or_cp="cp" if [[ ${vhh_noZero} -ge 1 && ${vhh_noZero} -le 18 ]]; then ${mv_or_cp} ${ccpa_fp_raw} ${ccpa_fp_proc} elif [[ (${vhh_noZero} -eq 0) || (${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23) ]]; then + # One hour CCPA files have incorrect metadata in the files under the "00" + # directory from 20180718 to 20210504. After data is pulled, reorganize + # into correct valid yyyymmdd structure. if [[ ${vyyyymmdd} -ge 20180718 && ${vyyyymmdd} -le 20210504 ]]; then wgrib2 ${ccpa_fp_raw} -set_date -24hr -grib ${ccpa_fp_proc} -s else From 85c3d58a0c855ea347a2350c62b0eae88ac38bee Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 19 Jul 2024 13:53:26 -0600 Subject: [PATCH 010/208] Minor changes to ccpa section. --- scripts/exregional_get_verif_obs.sh | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 957770e5f1..1e49d1f45c 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -138,6 +138,11 @@ fcst_length=$((10#${fcst_length})) current_fcst=0 while [[ ${current_fcst} -le ${fcst_length} ]]; do + +echo +echo "HELLO GGGGGGGG" +echo "current_fcst = ${current_fcst}" + # Calculate valid date info using date utility vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" +%Y%m%d%H) unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") @@ -191,10 +196,8 @@ echo "ihh = ${ihh}" # Make sure these directories exist. mkdir -p ${ccpa_day_dir_proc} - # Name of the grib2 file to extract from the archive (tar) file. Note - # that this only contains the valid hour; the valid year, month, and day - # are specified in the name of the directory within the archive in which - # the file is located. + # Name of the grib2 file to extract from the archive (tar) file as well + # as the name of the processed grib2 file. ccpa_fn="ccpa.t${vhh}z.${accum}h.hrap.conus.gb2" # Full path to the location of the processed CCPA grib2 file for the @@ -391,14 +394,14 @@ echo "ihh = ${ihh}" # an error in the metadata of the raw file and writing the corrected data # to a new grib2 file in the processed location. # - # Since this script is part of a workflow, other tasks (for other cycles) - # that call this script may have extracted and placed the current file - # in its processed location between the time we checked for its existence - # above (and didn't find it) and now. This can happen because there can - # be overlap between the verification times for the current cycle and - # those of other cycles. For this reason, check again for the existence - # of the file in its processed location. If it has already been created - # by another task, don't bother to create it. + # Since this script is part of a workflow, another get_obs_ccpa task (i.e. + # for another cycle) may have extracted and placed the current file in its + # processed location between the time we checked for its existence above + # (and didn't find it) and now. This can happen because there can be + # overlap between the verification times for the current cycle and those + # of other cycles. For this reason, check again for the existence of the + # processed file. If it has already been created by another get_obs_ccpa + # task, don't bother to recreate it. if [[ -f "${ccpa_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" From b7c6f00d7b75f0534fd1e2789e90d09c787c4309 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 19 Jul 2024 13:56:25 -0600 Subject: [PATCH 011/208] Changes for MRMS. --- scripts/exregional_get_verif_obs.sh | 213 +++++++++++++++++++--------- 1 file changed, 143 insertions(+), 70 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 1e49d1f45c..3ae8405e36 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -442,96 +442,169 @@ echo "ihh = ${ihh}" # elif [[ ${OBTYPE} == "MRMS" ]]; then - # Top-level MRMS directory + # Base directory in which the daily subdirectories containing the MRMS + # grib2 files for REFC (composite reflectivity) and REFC (echo top) will + # appear after this script is done, and the daily such subdirectory for + # the current valid time (year, month, and day). We refer to these as + # the "processed" base and daily subdirectories because they contain the + # final files after all processing by this script is complete. + mrms_basedir_proc=${OBS_DIR} + mrms_day_dir_proc="${mrms_basedir_proc}/${vyyyymmdd}" + + # For each field (REFC and RETOP), check if file exists on disk; if not, pull it. + for field in ${VAR[@]}; do + + # Set parameters needed in setting the MRMS grib2 file name to create in the day directory. + if [ "${field}" = "REFC" ]; then + file_base_name="MergedReflectivityQCComposite" + level="_00.50_" + elif [ "${field}" = "RETOP" ]; then + file_base_name="EchoTop" + level="_18_00.50_" + else + echo "Invalid field: ${field}" + print_err_msg_exit "\ + Invalid field specified: ${field} + + Valid options are 'REFC', 'RETOP'. +" + fi - # Reorganized MRMS location - mrms_proc=${OBS_DIR} +# Name of the MRMS grib2 file for the current field and valid time that +# will appear in the processed daily subdirectory after this script finishes. + mrms_fn="${file_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" + +# Full path to the processed MRMS grib2 file for the current field and +# valid time. + mrms_fp_proc="${mrms_day_dir_proc}/${mrms_fn}" - mrms_day_dir="${mrms_proc}/${vyyyymmdd}" +# Check if the processed MRMS grib2 file for the current field and valid +# time already exists on disk. If so, skip and go to the next valid time. +# If not, pull it. + if [[ -f "${mrms_fp_proc}" ]]; then - if [[ -d "${mrms_day_dir}" ]]; then + echo "${OBTYPE} file exists on disk:" + echo " mrms_fp_proc = \"${mrms_fp_proc}\"" + echo "Will NOT attempt to retrieve from remote locations." - echo "${OBTYPE} directory for day ${vyyyymmdd} exists on disk:" - echo " mrms_day_dir = \"${mrms_day_dir}\"" - echo "This means observation files for this field and all hours of this day have been or are being retrieved." - echo "Thus, we will NOT attempt to retrieve the current data from remote locations" + else - else + echo "${OBTYPE} file does not exist on disk:" + echo " mrms_fp_proc = \"${mrms_fp_proc}\"" + echo "Will attempt to retrieve from remote locations." - # For each field (REFC and RETOP), check if file exists on disk; if not, pull it. - for field in ${VAR[@]}; do - - # raw MRMS data from HPSS - #mrms_raw=${OBS_DIR}/raw - #mrms_raw="${mrms_proc}/raw_${field}_${iyyyymmddhh}" - mrms_raw="${mrms_proc}/raw_${iyyyymmddhh}" + # Raw base directory that will contain the raw daily subdirectory in which + # the gzipped MRMS grib2 retrieved from archive file will be placed. Note + # that the name of this directory depends on (contains) the valid year, + # month, and day (but not on the cycle, i.e. not on iyyyymmddhh) in order + # to avoid having get_obs_mrms tasks from other cycles clobbering the + # output from this one. It is also possible to make this directory name + # depend instead on the cycle, but that turns out to cause an inefficiency + # in that get_obs_mrms tasks for different cycles will not be able to + # detect that another cycle has already retrieved the data for the current + # valid day from an archive and will unnecessarily repeat the retrieval. + #mrms_basedir_raw="${mrms_basedir_proc}/raw_${iyyyymmddhh}" + mrms_basedir_raw="${mrms_basedir_proc}/raw_${vyyyymmdd}" - if [ "${field}" = "REFC" ]; then - field_base_name="MergedReflectivityQCComposite" - level="_00.50_" - elif [ "${field}" = "RETOP" ]; then - field_base_name="EchoTop" - level="_18_00.50_" + # Raw daily subdirectory under the raw base directory. + mrms_day_dir_raw="${mrms_basedir_raw}/${vyyyymmdd}" + + +# Check if the raw daily directory already exists on disk. If so, it +# means all the gzipped MRMS grib2 files -- i.e. for both REFC and RETOP +# and for all times (hours, minutes, and seconds) in the current valid +# day -- have already been or are in the process of being retrieved from +# the archive (tar) files. If so, skip the retrieval process. If not, +# proceed to retrieve all the files and place them in the raw daily +# directory. + if [[ -d "${mrms_day_dir_raw}" ]]; then + +# Change the following comments. + echo "${OBTYPE} directory for day ${vyyyymmdd} exists on disk:" + echo " mrms_day_dir_proc = \"${mrms_day_dir_proc}\"" + echo "This means observation files for this field and all hours of this day have been or are being retrieved." + echo "Thus, we will NOT attempt to retrieve the current data from remote locations" + else - echo "Invalid field: ${field}" - print_err_msg_exit "\ - Invalid field specified: ${field} - - Valid options are 'REFC', 'RETOP'. -" - fi - mrms_fn="${field_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" - mrms_fp="${mrms_proc}/${vyyyymmdd}/${mrms_fn}" + mkdir -p ${mrms_day_dir_raw} + valid_time=${vyyyymmdd}${vhh} + + cd ${mrms_basedir_raw} + +# Use the retrieve_data.py script to retrieve all the gzipped MRMS grib2 +# files -- i.e. for both REFC and RETOP and for all times (hours, minutes, +# and seconds) in the current valid day -- and place them in the raw daily +# directory. Note that this will pull both the REFC and RETOP files in +# one call. + cmd=" + python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${valid_time} \ + --data_stores hpss \ + --data_type MRMS_obs \ + --output_path ${mrms_day_dir_raw} \ + --summary_file ${logfile}" + + echo "CALLING: ${cmd}" + + $cmd || print_err_msg_exit "\ + Could not retrieve MRMS data from HPSS + + The following command exited with a non-zero exit status: + ${cmd} +" - echo "${OBTYPE} file for field \"${field}\" does not exist on disk:" - echo " mrms_fp = \"${mrms_fp}\"" - echo "Will attempt to retrieve from remote locations." +# Create a flag file that can be used to confirm the completion of the +# retrieval of all files for the current valid day. + touch ${mrms_day_dir_raw}/pull_completed.txt - # Create directories if necessary. - if [[ ! -d "${mrms_raw}/${vyyyymmdd}" ]]; then - mkdir -p ${mrms_raw}/${vyyyymmdd} - fi - if [[ ! -d "${mrms_proc}/${vyyyymmdd}" ]]; then - mkdir -p ${mrms_proc}/${vyyyymmdd} fi - valid_time=${vyyyymmdd}${vhh} - output_path="${mrms_raw}/${vyyyymmdd}" +# Make sure the retrieval process for the current day (which may have +# been executed above for this cycle or by another cycle) has completed +# by checking for the existence of the flag file that marks complettion. +# If not, keep checking until the flag file shows up. + while [[ ! -f "${mrms_day_dir_raw}/pull_completed.txt" ]]; do + echo "Waiting for the retrieval process for valid day ${vyyyymmdd} to complete..." + sleep 5s + done - cd ${mrms_raw} - # Pull MRMS data from HPSS - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${valid_time} \ - --data_stores hpss \ - --data_type MRMS_obs \ - --output_path ${output_path} \ - --summary_file ${logfile}" +# Since this script is part of a workflow, another get_obs_mrms task (i.e. +# for another cycle) may have extracted and placed the current file in its +# processed location between the time we checked for its existence above +# (and didn't find it) and now. This can happen because there can be +# overlap between the verification times for the current cycle and those +# of other cycles. For this reason, check again for the existence of the +# processed file. If it has already been created by another get_obs_mrms +# task, don't bother to recreate it. + if [[ -f "${mrms_fp_proc}" ]]; then - echo "CALLING: ${cmd}" + echo "${OBTYPE} file exists on disk:" + echo " mrms_fp_proc = \"${mrms_fp_proc}\"" + echo "Will NOT attempt to retrieve from remote locations." - $cmd || print_err_msg_exit "\ - Could not retrieve MRMS data from HPSS + else - The following command exited with a non-zero exit status: - ${cmd} -" +# Search the raw daily directory for the current valid day to find the +# gizipped MRMS grib2 file whose time stamp (in the file name) is closest +# to the current valid day and hour. Then unzip that file and copy it +# to the processed daily directory, in the process renaming it to replace +# the minutes and hours in the file name with "0000". + valid_time=${vyyyymmdd}${vhh} + python ${USHdir}/mrms_pull_topofhour.py \ + --valid_time ${valid_time} \ + --outdir ${mrms_basedir_proc} \ + --source ${mrms_basedir_raw} \ + --product ${file_base_name} - hour=0 - while [[ ${hour} -le 23 ]]; do - HH=$(printf "%02d" $hour) - echo "hour=${hour}" - python ${USHdir}/mrms_pull_topofhour.py --valid_time ${vyyyymmdd}${HH} --outdir ${mrms_proc} --source ${mrms_raw} --product ${field_base_name} - hour=$((${hour} + 1)) # hourly increment - done + fi - done + fi - fi + done # #----------------------------------------------------------------------- # From 2bc8ed1c65bd61c52b490d838dd49afb4d11c95b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 19 Jul 2024 16:04:45 -0600 Subject: [PATCH 012/208] Clean up comments in the MRMS section. --- scripts/exregional_get_verif_obs.sh | 130 +++++++++++++++------------- 1 file changed, 70 insertions(+), 60 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 3ae8405e36..254b5166a3 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -444,17 +444,17 @@ echo "ihh = ${ihh}" # Base directory in which the daily subdirectories containing the MRMS # grib2 files for REFC (composite reflectivity) and REFC (echo top) will - # appear after this script is done, and the daily such subdirectory for - # the current valid time (year, month, and day). We refer to these as - # the "processed" base and daily subdirectories because they contain the - # final files after all processing by this script is complete. + # be located after this script is done, and the daily such subdirectory + # for the current valid time (year, month, and day). We refer to these + # as the "processed" base and daily subdirectories because they contain + # the final files after all processing by this script is complete. mrms_basedir_proc=${OBS_DIR} mrms_day_dir_proc="${mrms_basedir_proc}/${vyyyymmdd}" - # For each field (REFC and RETOP), check if file exists on disk; if not, pull it. + # Loop over the fields (REFC and RETOP). for field in ${VAR[@]}; do - # Set parameters needed in setting the MRMS grib2 file name to create in the day directory. + # Set field-dependent parameters needed in forming grib2 file names. if [ "${field}" = "REFC" ]; then file_base_name="MergedReflectivityQCComposite" level="_00.50_" @@ -470,17 +470,20 @@ echo "ihh = ${ihh}" " fi -# Name of the MRMS grib2 file for the current field and valid time that -# will appear in the processed daily subdirectory after this script finishes. + # Name of the MRMS grib2 file for the current field and valid time that + # will appear in the processed daily subdirectory after this script finishes. + # This is the name of the processed file. Note that this is generally + # not the name of the gzipped grib2 files that may be retrieved below + # from archive files using the retrieve_data.py script. mrms_fn="${file_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" -# Full path to the processed MRMS grib2 file for the current field and -# valid time. + # Full path to the processed MRMS grib2 file for the current field and + # valid time. mrms_fp_proc="${mrms_day_dir_proc}/${mrms_fn}" -# Check if the processed MRMS grib2 file for the current field and valid -# time already exists on disk. If so, skip and go to the next valid time. -# If not, pull it. + # Check if the processed MRMS grib2 file for the current field and valid + # time already exists on disk. If so, skip this valid time and go to the + # next one. If not, pull it. if [[ -f "${mrms_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" @@ -493,50 +496,57 @@ echo "ihh = ${ihh}" echo " mrms_fp_proc = \"${mrms_fp_proc}\"" echo "Will attempt to retrieve from remote locations." - # Raw base directory that will contain the raw daily subdirectory in which - # the gzipped MRMS grib2 retrieved from archive file will be placed. Note - # that the name of this directory depends on (contains) the valid year, - # month, and day (but not on the cycle, i.e. not on iyyyymmddhh) in order - # to avoid having get_obs_mrms tasks from other cycles clobbering the - # output from this one. It is also possible to make this directory name - # depend instead on the cycle, but that turns out to cause an inefficiency - # in that get_obs_mrms tasks for different cycles will not be able to - # detect that another cycle has already retrieved the data for the current - # valid day from an archive and will unnecessarily repeat the retrieval. - #mrms_basedir_raw="${mrms_basedir_proc}/raw_${iyyyymmddhh}" + # Base directory that will contain the daily subdirectories in which the + # gzipped MRMS grib2 files retrieved from archive files will be placed, + # and the daily subdirectory for the current valid year, month, and day. + # We refer to these as the "raw" MRMS base and daily directories because + # they contain files as they are found in the archives before any processing + # by this script. + # + # Note that the name of the raw base directory depends on (contains) the + # valid year, month, and day (but not on the cycle, i.e. not on iyyyymmddhh) + # in order to avoid having get_obs_mrms tasks from other cycles clobbering + # the output from this one. It is also possible to make the name of this + # directory name depend instead on the cycle, but that turns out to cause + # an inefficiency in that get_obs_mrms tasks for different cycles will + # not be able to detect that another cycle has already retrieved the data + # for the current valid day will unnecessarily repeat the retrieval. mrms_basedir_raw="${mrms_basedir_proc}/raw_${vyyyymmdd}" - - # Raw daily subdirectory under the raw base directory. mrms_day_dir_raw="${mrms_basedir_raw}/${vyyyymmdd}" -# Check if the raw daily directory already exists on disk. If so, it -# means all the gzipped MRMS grib2 files -- i.e. for both REFC and RETOP -# and for all times (hours, minutes, and seconds) in the current valid -# day -- have already been or are in the process of being retrieved from -# the archive (tar) files. If so, skip the retrieval process. If not, -# proceed to retrieve all the files and place them in the raw daily -# directory. + # Check if the raw daily directory already exists on disk. If so, it + # means all the gzipped MRMS grib2 files -- i.e. for both REFC and RETOP + # and for all times (hours, minutes, and seconds) in the current valid + # day -- have already been or are in the process of being retrieved from + # the archive (tar) files. If so, skip the retrieval process. If not, + # proceed to retrieve all the files and place them in the raw daily + # directory. if [[ -d "${mrms_day_dir_raw}" ]]; then -# Change the following comments. echo "${OBTYPE} directory for day ${vyyyymmdd} exists on disk:" echo " mrms_day_dir_proc = \"${mrms_day_dir_proc}\"" - echo "This means observation files for this field and all hours of this day have been or are being retrieved." - echo "Thus, we will NOT attempt to retrieve the current data from remote locations" + echo "This means MRMS files for all hours of the current valid day (${vyyyymmdd}) have been or are being retrieved." + echo "Thus, we will NOT attempt to retrieve MRMS data for the current valid time from remote locations." else mkdir -p ${mrms_day_dir_raw} valid_time=${vyyyymmdd}${vhh} + # Before calling retrieve_data.py, change location to the raw base + # directory to avoid get_obs_mrms tasks for other cycles from clobbering + # the output from this call to retrieve_data.py. Note that retrieve_data.py + # extracts the MRMS tar files into the directory it was called from, + # which is the working directory of this script right before retrieve_data.py + # is called. cd ${mrms_basedir_raw} -# Use the retrieve_data.py script to retrieve all the gzipped MRMS grib2 -# files -- i.e. for both REFC and RETOP and for all times (hours, minutes, -# and seconds) in the current valid day -- and place them in the raw daily -# directory. Note that this will pull both the REFC and RETOP files in -# one call. + # Use the retrieve_data.py script to retrieve all the gzipped MRMS grib2 + # files -- i.e. for both REFC and RETOP and for all times (hours, minutes, + # and seconds) in the current valid day -- and place them in the raw daily + # directory. Note that this will pull both the REFC and RETOP files in + # one call. cmd=" python3 -u ${USHdir}/retrieve_data.py \ --debug \ @@ -557,29 +567,29 @@ echo "ihh = ${ihh}" ${cmd} " -# Create a flag file that can be used to confirm the completion of the -# retrieval of all files for the current valid day. + # Create a flag file that can be used to confirm the completion of the + # retrieval of all files for the current valid day. touch ${mrms_day_dir_raw}/pull_completed.txt fi -# Make sure the retrieval process for the current day (which may have -# been executed above for this cycle or by another cycle) has completed -# by checking for the existence of the flag file that marks complettion. -# If not, keep checking until the flag file shows up. + # Make sure the retrieval process for the current day (which may have + # been executed above for this cycle or by another cycle) has completed + # by checking for the existence of the flag file that marks completion. + # If not, keep checking until the flag file shows up. while [[ ! -f "${mrms_day_dir_raw}/pull_completed.txt" ]]; do echo "Waiting for the retrieval process for valid day ${vyyyymmdd} to complete..." sleep 5s done -# Since this script is part of a workflow, another get_obs_mrms task (i.e. -# for another cycle) may have extracted and placed the current file in its -# processed location between the time we checked for its existence above -# (and didn't find it) and now. This can happen because there can be -# overlap between the verification times for the current cycle and those -# of other cycles. For this reason, check again for the existence of the -# processed file. If it has already been created by another get_obs_mrms -# task, don't bother to recreate it. + # Since this script is part of a workflow, another get_obs_mrms task (i.e. + # for another cycle) may have extracted and placed the current file in its + # processed location between the time we checked for its existence above + # (and didn't find it) and now. This can happen because there can be + # overlap between the verification times for the current cycle and those + # of other cycles. For this reason, check again for the existence of the + # processed file. If it has already been created by another get_obs_mrms + # task, don't bother to recreate it. if [[ -f "${mrms_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" @@ -588,11 +598,11 @@ echo "ihh = ${ihh}" else -# Search the raw daily directory for the current valid day to find the -# gizipped MRMS grib2 file whose time stamp (in the file name) is closest -# to the current valid day and hour. Then unzip that file and copy it -# to the processed daily directory, in the process renaming it to replace -# the minutes and hours in the file name with "0000". + # Search the raw daily directory for the current valid day to find the + # gizipped MRMS grib2 file whose time stamp (in the file name) is closest + # to the current valid day and hour. Then unzip that file and copy it + # to the processed daily directory, in the process renaming it to replace + # the minutes and hours in the file name with "0000". valid_time=${vyyyymmdd}${vhh} python ${USHdir}/mrms_pull_topofhour.py \ --valid_time ${valid_time} \ From 184534283613620a0d4d88cee26d5cd03b45dc99 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 19 Jul 2024 18:29:44 -0600 Subject: [PATCH 013/208] Minor fixes to NDAS section. --- scripts/exregional_get_verif_obs.sh | 36 ++++++++++++++++++----------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 254b5166a3..c31795441a 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -630,7 +630,7 @@ echo "ihh = ${ihh}" ndas_proc=${OBS_DIR} # Check if file exists on disk - ndas_file="$ndas_proc/prepbufr.ndas.${vyyyymmdd}${vhh}" + ndas_file="${ndas_proc}/prepbufr.ndas.${vyyyymmdd}${vhh}" if [[ -f "${ndas_file}" ]]; then echo "${OBTYPE} file exists on disk:" echo "${ndas_file}" @@ -643,7 +643,7 @@ echo "ihh = ${ihh}" # # The "tm" here means "time minus", so nam.t12z.prepbufr.tm00.nr is valid for 12z, # nam.t00z.prepbufr.tm03.nr is valid for 21z the previous day, etc. - # This means that every six hours we have to obs files valid for the same time: + # This means that every six hours we have two obs files valid for the same time: # nam.tHHz.prepbufr.tm00.nr and nam.t[HH+6]z.prepbufr.tm06.nr # We want to use the tm06 file because it contains more/better obs (confirmed with EMC: even # though the earlier files are larger, this is because the time window is larger) @@ -657,18 +657,24 @@ echo "ihh = ${ihh}" continue fi + # Whether to move or copy extracted files from the raw directories to their + # final locations. + #mv_or_cp="mv" + mv_or_cp="cp" + echo "" echo "HELLO AAAAA" echo "vhh_noZero = ${vhh_noZero}" - if [[ ${vhh_noZero} -eq 0 || ${vhh_noZero} -eq 6 || ${vhh_noZero} -eq 12 || ${vhh_noZero} -eq 18 ]]; then + if [[ ${vhh_noZero} -eq 0 || ${vhh_noZero} -eq 6 || \ + ${vhh_noZero} -eq 12 || ${vhh_noZero} -eq 18 ]]; then echo "" echo "HELLO BBBBB" if [[ ! -d "$ndas_raw/${vyyyymmdd}${vhh}" ]]; then echo "" echo "HELLO CCCCC" - mkdir -p $ndas_raw/${vyyyymmdd}${vhh} + mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} fi # Pull NDAS data from HPSS @@ -680,7 +686,7 @@ echo "HELLO CCCCC" --cycle_date ${vyyyymmdd}${vhh} \ --data_stores hpss \ --data_type NDAS_obs \ - --output_path $ndas_raw/${vyyyymmdd}${vhh} \ + --output_path ${ndas_raw}/${vyyyymmdd}${vhh} \ --summary_file ${logfile}" echo "CALLING: ${cmd}" @@ -692,8 +698,8 @@ echo "HELLO CCCCC" ${cmd} " - if [[ ! -d "$ndas_proc" ]]; then - mkdir -p $ndas_proc + if [[ ! -d "${ndas_proc}" ]]; then + mkdir -p ${ndas_proc} fi # copy files from the previous 6 hours ("tm" means "time minus") @@ -702,7 +708,8 @@ echo "HELLO CCCCC" vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_vdate} ${tm} hours ago" +%Y%m%d%H) tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') - cp $ndas_raw/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr $ndas_proc/prepbufr.ndas.${vyyyymmddhh_tm} + ${mv_or_cp} ${ndas_raw}/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr \ + ${ndas_proc}/prepbufr.ndas.${vyyyymmddhh_tm} done fi @@ -720,8 +727,8 @@ echo "HELLO CCCCC" vhh=${vhh_noZero} fi - if [[ ! -d "$ndas_raw/${vyyyymmdd}${vhh}" ]]; then - mkdir -p $ndas_raw/${vyyyymmdd}${vhh} + if [[ ! -d "${ndas_raw}/${vyyyymmdd}${vhh}" ]]; then + mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} fi # Pull NDAS data from HPSS @@ -733,7 +740,7 @@ echo "HELLO CCCCC" --cycle_date ${vyyyymmdd}${vhh} \ --data_stores hpss \ --data_type NDAS_obs \ - --output_path $ndas_raw/${vyyyymmdd}${vhh} \ + --output_path ${ndas_raw}/${vyyyymmdd}${vhh} \ --summary_file ${logfile}" echo "CALLING: ${cmd}" @@ -745,8 +752,8 @@ echo "HELLO CCCCC" ${cmd} " - if [[ ! -d "$ndas_proc" ]]; then - mkdir -p $ndas_proc + if [[ ! -d "${ndas_proc}" ]]; then + mkdir -p ${ndas_proc} fi for tm in $(seq 1 6); do @@ -755,7 +762,8 @@ echo "HELLO CCCCC" vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_fdate} ${tm} hours ago" +%Y%m%d%H) tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') - cp $ndas_raw/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr $ndas_proc/prepbufr.ndas.${vyyyymmddhh_tm} + ${mv_or_cp} ${ndas_raw}/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr \ + ${ndas_proc}/prepbufr.ndas.${vyyyymmddhh_tm} done fi From 8c38c19a54642506b4a6dca673a6aaabf667e066 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sat, 20 Jul 2024 10:21:58 -0600 Subject: [PATCH 014/208] Change names of raw directories for CCPA and MRMS to indicate whether they're per-cycle or per-day. --- scripts/exregional_get_verif_obs.sh | 77 ++++++++++++++--------------- 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index c31795441a..4427434b1c 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -36,7 +36,7 @@ set -x # # CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs # ---------- -# If data is available on disk, it must be in the following +# If data is available on disk, it must be in the following # directory structure and file name conventions expected by verification # tasks: # @@ -46,8 +46,8 @@ set -x # script. # # Notes about the data and how it's used for verification: -# -# 1. Accumulation is currently hardcoded to 01h. The verification will +# +# 1. Accumulation is currently hardcoded to 01h. The verification will # use MET/pcp-combine to sum 01h files into desired accumulations. # # 2. There is a problem with the valid time in the metadata for files @@ -59,17 +59,17 @@ set -x # # MRMS (Multi-Radar Multi-Sensor) radar observations # ---------- -# If data is available on disk, it must be in the following +# If data is available on disk, it must be in the following # directory structure and file name conventions expected by verification # tasks: # # {MRMS_OBS_DIR}/{YYYYMMDD}/[PREFIX]{YYYYMMDD}-{HH}0000.grib2, -# +# # Where [PREFIX] is MergedReflectivityQCComposite_00.50_ for reflectivity # data and EchoTop_18_00.50_ for echo top data. If data is not available # at the top of the hour, you should rename the file closest in time to # your hour(s) of interest to the above naming format. A script -# "ush/mrms_pull_topofhour.py" is provided for this purpose. +# "ush/mrms_pull_topofhour.py" is provided for this purpose. # # If data is retrieved from HPSS, it will automatically staged by this # this script. @@ -77,30 +77,30 @@ set -x # # NDAS (NAM Data Assimilation System) conventional observations # ---------- -# If data is available on disk, it must be in the following +# If data is available on disk, it must be in the following # directory structure and file name conventions expected by verification # tasks: # # {NDAS_OBS_DIR}/{YYYYMMDD}/prepbufr.ndas.{YYYYMMDDHH} -# +# # Note that data retrieved from HPSS and other sources may be in a -# different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is +# different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is # either 00, 06, 12, or 18, and prevhour is the number of hours prior to # hh (00 through 05). If using custom staged data, you will have to # rename the files accordingly. -# +# # If data is retrieved from HPSS, it will be automatically staged by this # this script. # # # NOHRSC snow accumulation observations # ---------- -# If data is available on disk, it must be in the following +# If data is available on disk, it must be in the following # directory structure and file name conventions expected by verification # tasks: # # {NOHRSC_OBS_DIR}/{YYYYMMDD}/sfav2_CONUS_{AA}h_{YYYYMMDD}{HH}_grid184.grb2 -# +# # where AA is the 2-digit accumulation duration in hours: 06 or 24 # # METplus is configured to verify snowfall using 06- and 24-h accumulated @@ -143,7 +143,7 @@ echo echo "HELLO GGGGGGGG" echo "current_fcst = ${current_fcst}" - # Calculate valid date info using date utility + # Calculate valid date info using date utility vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" +%Y%m%d%H) unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") vyyyymmdd=$(echo ${vdate} | cut -c1-8) @@ -176,7 +176,7 @@ echo "ihh = ${ihh}" # observed accumulations at forecast hour 0 because there aren't yet # any accumulations in the forecast(s) to compare it to. if [[ ${current_fcst} -eq 0 ]]; then - current_fcst=$((${current_fcst} + 1)) + current_fcst=$((current_fcst + 1)) continue fi @@ -235,7 +235,7 @@ echo "ihh = ${ihh}" # where YYYYMMDD is a given year, month, and day combination, and # [PREFIX] is a string that is not relevant to the discussion here # (the value it can take on depends on which of several time periods - # YYYYMMDD falls in, and the retrieve_data.py tries various values + # YYYYMMDD falls in, and the retrieve_data.py tries various values # until it finds one for which a tar file exists). Unintuitively, this # archive file contains accumulation data for valid times starting at # hour 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current @@ -247,7 +247,7 @@ echo "ihh = ${ihh}" # # * We call retrieve_data.py in a temporary cycle-specific subdirectory # in order to prevent get_obs_ccpa tasks for different cycles from - # clobbering each other's output. We refer to this as the "raw" CCPA + # clobbering each other's output. We refer to this as the "raw" CCPA # base directory because it contains files as they are found in the # archives before any processing by this script. # @@ -260,9 +260,9 @@ echo "ihh = ${ihh}" # (Data valid at hours 19-23 of the current day (YYYYMMDD) go into the # daily subdirectory for the next day, i.e. YYYYMM[DD+1].) We refer # to these as raw daily (sub)directories to distinguish them from the - # processed daily subdirectories under the processed (final) CCPA base + # processed daily subdirectories under the processed (final) CCPA base # directory (ccpa_basedir_proc). - # + # # * For a given cycle, some of the valid times at which there is forecast # output may not have a corresponding file under the raw base directory # for that cycle. This is because another cycle that overlaps this cycle @@ -274,7 +274,7 @@ echo "ihh = ${ihh}" # processed CCPA directory structure than the temporal arrangement used # in the archives and raw directories, we process the raw files such # that the data in the processed directory structure is shifted forward - # in time 6 hours relative to the data in the archives and raw directories. + # in time 6 hours relative to the data in the archives and raw directories. # This results in a processed base directory that, like the raw base # directory, also contains daily subdirectories of the form YYYYMMDD, # but each such subdirectory may only contain CCPA data at valid hours @@ -293,7 +293,7 @@ echo "ihh = ${ihh}" # # DETAILS OF DIRECTORY STRUCTURE IN CCPA ARCHIVE (TAR) FILES # ---------------------------------------------------------- - # + # # The daily archive file containing CCPA obs is named # # [PREFIX].YYYYMMDD.tar @@ -345,16 +345,16 @@ echo "ihh = ${ihh}" # ccpa_day_dir_raw: # Raw daily subdirectory under the raw base directory. This is dependent # on the valid hour (i.e. different for hours 19-23 than for hours 0-18) - # in order to maintain the same data timing arrangement in the raw daily + # in order to maintain the same data timing arrangement in the raw daily # directories as in the archive files. # if [[ ${vhh_noZero} -ge 0 && ${vhh_noZero} -le 18 ]]; then valid_time=${vyyyymmdd}${vhh} - ccpa_basedir_raw="${ccpa_basedir_proc}/raw_${iyyyymmddhh}" + ccpa_basedir_raw="${ccpa_basedir_proc}/raw_cyc${iyyyymmddhh}" ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd}" elif [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then valid_time=${vyyyymmdd_p1}${vhh} - ccpa_basedir_raw="${ccpa_basedir_proc}/raw_${iyyyymmddhh}_vhh19-23" + ccpa_basedir_raw="${ccpa_basedir_proc}/raw_cyc${iyyyymmddhh}_vhh19-23" ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd_p1}" fi mkdir -p ${ccpa_day_dir_raw} @@ -362,7 +362,7 @@ echo "ihh = ${ihh}" # Before calling retrieve_data.py, change location to the raw base # directory to avoid get_obs_ccpa tasks for other cycles from clobbering # the output from this call to retrieve_data.py. Note that retrieve_data.py - # extracts the CCPA tar files into the directory it was called from, + # extracts the CCPA tar files into the directory it was called from, # which is the working directory of this script right before retrieve_data.py # is called. cd ${ccpa_basedir_raw} @@ -402,7 +402,7 @@ echo "ihh = ${ihh}" # of other cycles. For this reason, check again for the existence of the # processed file. If it has already been created by another get_obs_ccpa # task, don't bother to recreate it. - if [[ -f "${ccpa_fp_proc}" ]]; then + if [[ -f "${ccpa_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" echo " ccpa_fp_proc = \"{ccpa_fp_proc}\"" @@ -446,14 +446,14 @@ echo "ihh = ${ihh}" # grib2 files for REFC (composite reflectivity) and REFC (echo top) will # be located after this script is done, and the daily such subdirectory # for the current valid time (year, month, and day). We refer to these - # as the "processed" base and daily subdirectories because they contain + # as the "processed" base and daily subdirectories because they contain # the final files after all processing by this script is complete. mrms_basedir_proc=${OBS_DIR} mrms_day_dir_proc="${mrms_basedir_proc}/${vyyyymmdd}" # Loop over the fields (REFC and RETOP). for field in ${VAR[@]}; do - + # Set field-dependent parameters needed in forming grib2 file names. if [ "${field}" = "REFC" ]; then file_base_name="MergedReflectivityQCComposite" @@ -465,7 +465,7 @@ echo "ihh = ${ihh}" echo "Invalid field: ${field}" print_err_msg_exit "\ Invalid field specified: ${field} - + Valid options are 'REFC', 'RETOP'. " fi @@ -476,7 +476,7 @@ echo "ihh = ${ihh}" # not the name of the gzipped grib2 files that may be retrieved below # from archive files using the retrieve_data.py script. mrms_fn="${file_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" - + # Full path to the processed MRMS grib2 file for the current field and # valid time. mrms_fp_proc="${mrms_day_dir_proc}/${mrms_fn}" @@ -511,16 +511,15 @@ echo "ihh = ${ihh}" # an inefficiency in that get_obs_mrms tasks for different cycles will # not be able to detect that another cycle has already retrieved the data # for the current valid day will unnecessarily repeat the retrieval. - mrms_basedir_raw="${mrms_basedir_proc}/raw_${vyyyymmdd}" + mrms_basedir_raw="${mrms_basedir_proc}/raw_day${vyyyymmdd}" mrms_day_dir_raw="${mrms_basedir_raw}/${vyyyymmdd}" - - # Check if the raw daily directory already exists on disk. If so, it + # Check if the raw daily directory already exists on disk. If so, it # means all the gzipped MRMS grib2 files -- i.e. for both REFC and RETOP # and for all times (hours, minutes, and seconds) in the current valid # day -- have already been or are in the process of being retrieved from # the archive (tar) files. If so, skip the retrieval process. If not, - # proceed to retrieve all the files and place them in the raw daily + # proceed to retrieve all the files and place them in the raw daily # directory. if [[ -d "${mrms_day_dir_raw}" ]]; then @@ -537,7 +536,7 @@ echo "ihh = ${ihh}" # Before calling retrieve_data.py, change location to the raw base # directory to avoid get_obs_mrms tasks for other cycles from clobbering # the output from this call to retrieve_data.py. Note that retrieve_data.py - # extracts the MRMS tar files into the directory it was called from, + # extracts the MRMS tar files into the directory it was called from, # which is the working directory of this script right before retrieve_data.py # is called. cd ${mrms_basedir_raw} @@ -608,7 +607,7 @@ echo "ihh = ${ihh}" --valid_time ${valid_time} \ --outdir ${mrms_basedir_proc} \ --source ${mrms_basedir_raw} \ - --product ${file_base_name} + --product ${file_base_name} fi @@ -641,7 +640,7 @@ echo "ihh = ${ihh}" # NDAS data is available in 6-hourly combined tar files, each with 7 1-hour prepbufr files: # nam.tHHz.prepbufr.tm00.nr, nam.tHHz.prepbufr.tm01.nr, ... , nam.tHHz.prepbufr.tm06.nr # - # The "tm" here means "time minus", so nam.t12z.prepbufr.tm00.nr is valid for 12z, + # The "tm" here means "time minus", so nam.t12z.prepbufr.tm00.nr is valid for 12z, # nam.t00z.prepbufr.tm03.nr is valid for 21z the previous day, etc. # This means that every six hours we have two obs files valid for the same time: # nam.tHHz.prepbufr.tm00.nr and nam.t[HH+6]z.prepbufr.tm06.nr @@ -652,7 +651,7 @@ echo "ihh = ${ihh}" # pull more HPSS tarballs than necessary if [[ ${current_fcst} -eq 0 && ${current_fcst} -ne ${fcst_length} ]]; then - # If at forecast hour zero, skip to next hour. + # If at forecast hour zero, skip to next hour. current_fcst=$((${current_fcst} + 1)) continue fi @@ -793,7 +792,7 @@ echo "HELLO CCCCC" # If 24-hour files should be available (at 00z and 12z) then look for both files # Otherwise just look for 6hr file if (( ${current_fcst} % 12 == 0 )) && (( ${current_fcst} >= 24 )) ; then - if [[ ! -f "${nohrsc06h_file}" || ! -f "${nohrsc24h_file}" ]] ; then + if [[ ! -f "${nohrsc06h_file}" || ! -f "${nohrsc24h_file}" ]] ; then retrieve=1 echo "${OBTYPE} files do not exist on disk:" echo "${nohrsc06h_file}" @@ -848,7 +847,7 @@ echo "HELLO CCCCC" print_err_msg_exit "\ Invalid OBTYPE specified for script; valid options are CCPA, MRMS, NDAS, and NOHRSC " - fi # Increment to next forecast hour + fi # Increment to next forecast hour # Increment to next forecast hour echo "Finished fcst hr=${current_fcst}" From 7f531871c2909e9aaa4b300561e59f67d5e8bc55 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sat, 20 Jul 2024 11:14:42 -0600 Subject: [PATCH 015/208] Version with NDAS changes that seems to work. Still need lots of cleanup and comments. --- scripts/exregional_get_verif_obs.sh | 42 +++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 4427434b1c..673e4edc76 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -622,14 +622,34 @@ echo "ihh = ${ihh}" #----------------------------------------------------------------------- # elif [[ ${OBTYPE} == "NDAS" ]]; then +# Fix these comments. + # Calculate valid date - 1 day; this is needed because some obs files + # are stored in the *previous* day's 00h directory + vdate_m1h=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 hour ago" +%Y%m%d%H) + #vyyyymmdd_m1h=$(echo ${vdate_m1h} | cut -c1-8) + +echo "" +echo "HELLO PPPPPPP" +echo "vyyyymmdd = ${vyyyymmdd}" +echo "vhh = ${vhh}" +echo "vhh_noZero = ${vhh_noZero}" +#echo "vyyyymmdd_m1h = ${vyyyymmdd_m1h}" +echo "vdate_m1h = ${vdate_m1h}" + # raw NDAS data from HPSS ndas_raw=${OBS_DIR}/raw # Reorganized NDAS location ndas_proc=${OBS_DIR} + # raw NDAS data from HPSS + #ndas_raw=${OBS_DIR}/raw + ndas_raw="${ndas_proc}/raw_cyc${iyyyymmddhh}" + # Check if file exists on disk - ndas_file="${ndas_proc}/prepbufr.ndas.${vyyyymmdd}${vhh}" + #ndas_file="${ndas_proc}/prepbufr.ndas.${vyyyymmdd}${vhh}" + #ndas_file_m1h="${ndas_proc}/prepbufr.ndas.${vyyyymmdd_m1h}${vhh}" + ndas_file="${ndas_proc}/prepbufr.ndas.${vdate_m1h}" if [[ -f "${ndas_file}" ]]; then echo "${OBTYPE} file exists on disk:" echo "${ndas_file}" @@ -650,9 +670,14 @@ echo "ihh = ${ihh}" # The current logic of this script will likely stage more files than you need, but will never # pull more HPSS tarballs than necessary - if [[ ${current_fcst} -eq 0 && ${current_fcst} -ne ${fcst_length} ]]; then - # If at forecast hour zero, skip to next hour. - current_fcst=$((${current_fcst} + 1)) +# + +# This seems like a strange statement since the only way it can be true +# is if the forecast length is zero. + # If at forecast hour zero, skip to next hour. + #if [[ ${current_fcst} -eq 0 && ${current_fcst} -ne ${fcst_length} ]]; then + if [[ ${current_fcst} -eq 0 ]]; then + current_fcst=$((current_fcst + 1)) continue fi @@ -670,12 +695,16 @@ echo "vhh_noZero = ${vhh_noZero}" echo "" echo "HELLO BBBBB" - if [[ ! -d "$ndas_raw/${vyyyymmdd}${vhh}" ]]; then + #valid_time=${vyyyymmdd}${vhh} + #output_path="${ndas_raw}/${vyyyymmdd}" + + if [[ ! -d "${ndas_raw}/${vyyyymmdd}${vhh}" ]]; then echo "" echo "HELLO CCCCC" mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} fi + cd ${ndas_raw} # Pull NDAS data from HPSS cmd=" python3 -u ${USHdir}/retrieve_data.py \ @@ -704,6 +733,7 @@ echo "HELLO CCCCC" # copy files from the previous 6 hours ("tm" means "time minus") # The tm06 files contain more/better observations than tm00 for the equivalent time for tm in $(seq 1 6); do +# for tm in $(seq --format="%02g" 6 -1 1); do vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_vdate} ${tm} hours ago" +%Y%m%d%H) tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') @@ -715,6 +745,7 @@ echo "HELLO CCCCC" # If at last forecast hour, make sure we're getting the last observations if [[ ${current_fcst} -eq ${fcst_length} ]]; then + echo "Retrieving NDAS obs for final forecast hour" vhh_noZero=$((vhh_noZero + 6 - (vhh_noZero % 6))) if [[ ${vhh_noZero} -eq 24 ]]; then @@ -730,6 +761,7 @@ echo "HELLO CCCCC" mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} fi + cd ${ndas_raw} # Pull NDAS data from HPSS cmd=" python3 -u ${USHdir}/retrieve_data.py \ From 7926705a1c7a92f01958ed939d899118d0004d0c Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sat, 20 Jul 2024 22:02:17 -0600 Subject: [PATCH 016/208] Second set of NDAS changes so that there are no repeat pulls of NDAS files from HPSS (and works with multiple cycles). --- scripts/exregional_get_verif_obs.sh | 297 +++++++++++++++++----------- 1 file changed, 185 insertions(+), 112 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 673e4edc76..033dd3c0fb 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -133,6 +133,16 @@ unix_init_DATE="${iyyyy}-${imm}-${idd} ${ihh}:00:00" # This awk expression gets the last item of the list $FHR fcst_length=$(echo ${FHR} | awk '{ print $NF }') + +if [[ ${OBTYPE} == "NDAS" ]]; then + vdate_last=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length} hours" +%Y%m%d%H) + vhh_last=$(echo ${vdate_last} | cut -c9-10) + hours_to_add=$(( vhh_last + 6 - (vhh_last % 6) )) + fcst_length_rounded_up=$(( fcst_length + hours_to_add )) +# vdate_last_rounded_up=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length_rounded_up} hours" +%Y%m%d%H) + fcst_length=${fcst_length_rounded_up} +fi + # Make sure fcst_length isn't octal (leading zero) fcst_length=$((10#${fcst_length})) @@ -145,19 +155,19 @@ echo "current_fcst = ${current_fcst}" # Calculate valid date info using date utility vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" +%Y%m%d%H) - unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") + #unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") vyyyymmdd=$(echo ${vdate} | cut -c1-8) vhh=$(echo ${vdate} | cut -c9-10) # Calculate valid date + 1 day; this is needed because some obs files # are stored in the *next* day's 00h directory - vdate_p1=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 day" +%Y%m%d%H) - vyyyymmdd_p1=$(echo ${vdate_p1} | cut -c1-8) + vdate_p1d=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 day" +%Y%m%d%H) + vyyyymmdd_p1d=$(echo ${vdate_p1d} | cut -c1-8) echo echo "HELLO HHHHHHHH" echo "vyyyymmdd = ${vyyyymmdd}" -echo "vyyyymmdd_p1 = ${vyyyymmdd_p1}" +echo "vyyyymmdd_p1d = ${vyyyymmdd_p1d}" echo "ihh = ${ihh}" #exit @@ -353,9 +363,9 @@ echo "ihh = ${ihh}" ccpa_basedir_raw="${ccpa_basedir_proc}/raw_cyc${iyyyymmddhh}" ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd}" elif [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then - valid_time=${vyyyymmdd_p1}${vhh} + valid_time=${vyyyymmdd_p1d}${vhh} ccpa_basedir_raw="${ccpa_basedir_proc}/raw_cyc${iyyyymmddhh}_vhh19-23" - ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd_p1}" + ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd_p1d}" fi mkdir -p ${ccpa_day_dir_raw} @@ -622,40 +632,48 @@ echo "ihh = ${ihh}" #----------------------------------------------------------------------- # elif [[ ${OBTYPE} == "NDAS" ]]; then + # Fix these comments. # Calculate valid date - 1 day; this is needed because some obs files # are stored in the *previous* day's 00h directory vdate_m1h=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 hour ago" +%Y%m%d%H) #vyyyymmdd_m1h=$(echo ${vdate_m1h} | cut -c1-8) + vdate_p1h=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 hour" +%Y%m%d%H) + vhh_p1h=$(echo ${vdate_p1h} | cut -c9-10) + vhh_p1h_noZero=$((10#${vhh_p1h})) + vyyyymmdd_p1h=$(echo ${vdate_p1h} | cut -c1-8) + echo "" echo "HELLO PPPPPPP" echo "vyyyymmdd = ${vyyyymmdd}" echo "vhh = ${vhh}" echo "vhh_noZero = ${vhh_noZero}" #echo "vyyyymmdd_m1h = ${vyyyymmdd_m1h}" +echo "vdate = ${vdate}" echo "vdate_m1h = ${vdate_m1h}" - - # raw NDAS data from HPSS - ndas_raw=${OBS_DIR}/raw +echo "vdate_p1h = ${vdate_m1h}" # Reorganized NDAS location - ndas_proc=${OBS_DIR} + ndas_basedir_proc=${OBS_DIR} + ndas_day_dir_proc="${ndas_basedir_proc}" # raw NDAS data from HPSS #ndas_raw=${OBS_DIR}/raw - ndas_raw="${ndas_proc}/raw_cyc${iyyyymmddhh}" + #ndas_raw="${ndas_basedir_proc}/raw_cyc${iyyyymmddhh}" # Check if file exists on disk - #ndas_file="${ndas_proc}/prepbufr.ndas.${vyyyymmdd}${vhh}" - #ndas_file_m1h="${ndas_proc}/prepbufr.ndas.${vyyyymmdd_m1h}${vhh}" - ndas_file="${ndas_proc}/prepbufr.ndas.${vdate_m1h}" - if [[ -f "${ndas_file}" ]]; then + #ndas_file="${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmdd}${vhh}" + #ndas_file_m1h="${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmdd_m1h}${vhh}" + #ndas_fn_check="prepbufr.ndas.${vdate_p1h}" + ndas_fn="prepbufr.ndas.${vyyyymmdd}${vhh}" + ndas_fp_proc="${ndas_basedir_proc}/${ndas_fn}" + if [[ -f "${ndas_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" - echo "${ndas_file}" + echo "${ndas_fp_proc}" else echo "${OBTYPE} file does not exist on disk:" - echo "${ndas_file}" + echo "${ndas_fp_proc}" echo "Will attempt to retrieve from remote locations" # NDAS data is available in 6-hourly combined tar files, each with 7 1-hour prepbufr files: # nam.tHHz.prepbufr.tm00.nr, nam.tHHz.prepbufr.tm01.nr, ... , nam.tHHz.prepbufr.tm06.nr @@ -670,16 +688,14 @@ echo "vdate_m1h = ${vdate_m1h}" # The current logic of this script will likely stage more files than you need, but will never # pull more HPSS tarballs than necessary -# - # This seems like a strange statement since the only way it can be true # is if the forecast length is zero. # If at forecast hour zero, skip to next hour. #if [[ ${current_fcst} -eq 0 && ${current_fcst} -ne ${fcst_length} ]]; then - if [[ ${current_fcst} -eq 0 ]]; then - current_fcst=$((current_fcst + 1)) - continue - fi + # if [[ ${current_fcst} -eq 0 ]]; then + # current_fcst=$((current_fcst + 1)) + # continue + # fi # Whether to move or copy extracted files from the raw directories to their # final locations. @@ -689,116 +705,173 @@ echo "vdate_m1h = ${vdate_m1h}" echo "" echo "HELLO AAAAA" echo "vhh_noZero = ${vhh_noZero}" +echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" - if [[ ${vhh_noZero} -eq 0 || ${vhh_noZero} -eq 6 || \ - ${vhh_noZero} -eq 12 || ${vhh_noZero} -eq 18 ]]; then + if [[ ${vhh_p1h_noZero} -eq 0 || ${vhh_p1h_noZero} -eq 6 || \ + ${vhh_p1h_noZero} -eq 12 || ${vhh_p1h_noZero} -eq 18 ]]; then echo "" echo "HELLO BBBBB" - #valid_time=${vyyyymmdd}${vhh} - #output_path="${ndas_raw}/${vyyyymmdd}" + #ndas_basedir_raw="${ndas_basedir_proc}/raw_cyc${iyyyymmddhh}" + #ndas_basedir_raw="${ndas_basedir_proc}/raw_qrtrday${vyyyymmdd_p1h}${vhh_p1h}" + ndas_basedir_raw="${ndas_basedir_proc}/raw_day${vyyyymmdd_p1h}" + ndas_day_dir_raw="${ndas_basedir_raw}/${vyyyymmdd_p1h}${vhh_p1h}" + #mkdir -p ${ndas_day_dir_raw} - if [[ ! -d "${ndas_raw}/${vyyyymmdd}${vhh}" ]]; then -echo "" -echo "HELLO CCCCC" - mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} - fi - cd ${ndas_raw} - # Pull NDAS data from HPSS - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd}${vhh} \ - --data_stores hpss \ - --data_type NDAS_obs \ - --output_path ${ndas_raw}/${vyyyymmdd}${vhh} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - - $cmd || print_err_msg_exit "\ - Could not retrieve NDAS data from HPSS - - The following command exited with a non-zero exit status: - ${cmd} -" - if [[ ! -d "${ndas_proc}" ]]; then - mkdir -p ${ndas_proc} - fi + # Check if the raw daily directory already exists on disk. If so, it + # means +#all the gzipped NDAS grib2 files -- i.e. for both REFC and RETOP +# and for all times (hours, minutes, and seconds) in the current valid +# day -- have already been or are in the process of being retrieved from +# the archive (tar) files. +# If so, skip the retrieval process. If not, + # proceed to retrieve all the files and place them in the raw daily + # directory. + if [[ -d "${ndas_day_dir_raw}" ]]; then - # copy files from the previous 6 hours ("tm" means "time minus") - # The tm06 files contain more/better observations than tm00 for the equivalent time - for tm in $(seq 1 6); do -# for tm in $(seq --format="%02g" 6 -1 1); do - vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_vdate} ${tm} hours ago" +%Y%m%d%H) - tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') +# Fix up these messages. + echo "${OBTYPE} directory for day ${vyyyymmdd} exists on disk:" + echo " ndas_day_dir_proc = \"${ndas_day_dir_proc}\"" + echo "This means NDAS files for all hours of the current valid day (${vyyyymmdd}) have been or are being retrieved." + echo "Thus, we will NOT attempt to retrieve NDAS data for the current valid time from remote locations." - ${mv_or_cp} ${ndas_raw}/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr \ - ${ndas_proc}/prepbufr.ndas.${vyyyymmddhh_tm} - done + else - fi + mkdir -p ${ndas_day_dir_raw} + valid_time=${vyyyymmdd_p1h}${vhh_p1h} + +# Before calling retrieve_data.py, change location to the raw base +# directory to avoid get_obs_ndas tasks for other cycles from clobbering +# the output from this call to retrieve_data.py. Note that retrieve_data.py +# extracts the NDAS tar files into the directory it was called from, +# which is the working directory of this script right before retrieve_data.py +# is called. + cd ${ndas_basedir_raw} + +# Use the retrieve_data.py script to retrieve all the gzipped NDAS grib2 +# files -- i.e. for both REFC and RETOP and for all times (hours, minutes, +# and seconds) in the current valid day -- and place them in the raw daily +# directory. Note that this will pull both the REFC and RETOP files in +# one call. + cmd=" + python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${vyyyymmdd_p1h}${vhh_p1h} \ + --data_stores hpss \ + --data_type NDAS_obs \ + --output_path ${ndas_day_dir_raw} \ + --summary_file ${logfile}" - # If at last forecast hour, make sure we're getting the last observations - if [[ ${current_fcst} -eq ${fcst_length} ]]; then - - echo "Retrieving NDAS obs for final forecast hour" - vhh_noZero=$((vhh_noZero + 6 - (vhh_noZero % 6))) - if [[ ${vhh_noZero} -eq 24 ]]; then - vyyyymmdd=${vyyyymmdd_p1} - vhh=00 - elif [[ ${vhh_noZero} -eq 6 ]]; then - vhh=06 - else - vhh=${vhh_noZero} - fi + echo "CALLING: ${cmd}" - if [[ ! -d "${ndas_raw}/${vyyyymmdd}${vhh}" ]]; then - mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} - fi + $cmd || print_err_msg_exit "\ + Could not retrieve NDAS data from HPSS - cd ${ndas_raw} - # Pull NDAS data from HPSS - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd}${vhh} \ - --data_stores hpss \ - --data_type NDAS_obs \ - --output_path ${ndas_raw}/${vyyyymmdd}${vhh} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - - $cmd || print_err_msg_exit "\ - Could not retrieve NDAS data from HPSS - - The following command exited with a non-zero exit status: - ${cmd} + The following command exited with a non-zero exit status: + ${cmd} " +# Create a flag file that can be used to confirm the completion of the +# retrieval of all files for the current valid day. + touch ${ndas_day_dir_raw}/pull_completed.txt - if [[ ! -d "${ndas_proc}" ]]; then - mkdir -p ${ndas_proc} fi - for tm in $(seq 1 6); do - last_fhr=$((fcst_length + 6 - (vhh_noZero % 6))) - unix_fdate=$($DATE_UTIL -d "${unix_init_DATE} ${last_fhr} hours" "+%Y-%m-%d %H:00:00") - vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_fdate} ${tm} hours ago" +%Y%m%d%H) - tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') - - ${mv_or_cp} ${ndas_raw}/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr \ - ${ndas_proc}/prepbufr.ndas.${vyyyymmddhh_tm} +# Make sure the retrieval process for the current day (which may have +# been executed above for this cycle or by another cycle) has completed +# by checking for the existence of the flag file that marks completion. +# If not, keep checking until the flag file shows up. + while [[ ! -f "${ndas_day_dir_raw}/pull_completed.txt" ]]; do + echo "Waiting for the retrieval process for valid quarter-day ending on ${vyyyymmdd_p1h}${vhh_p1h} to complete..." + sleep 5s done + if [[ -f "${ndas_fp_proc}" ]]; then + + echo "${OBTYPE} file exists on disk:" + echo " ndas_fp_proc = \"${ndas_fp_proc}\"" + echo "Will NOT attempt to retrieve from remote locations." + + else + + #mkdir -p ${ndas_basedir_proc} + + unix_vdate_p1h=$($DATE_UTIL -d "${unix_init_DATE} $((current_fcst+1)) hours" "+%Y-%m-%d %H:00:00") + # copy files from the previous 6 hours ("tm" means "time minus") + # The tm06 files contain more/better observations than tm00 for the equivalent time + for tm in $(seq 1 6); do +# for tm in $(seq --format="%02g" 6 -1 1); do + vyyyymmddhh_p1h_tm=$($DATE_UTIL -d "${unix_vdate_p1h} ${tm} hours ago" +%Y%m%d%H) + if [ ${vyyyymmddhh_p1h_tm} -le ${vdate_last} ]; then + tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') + ${mv_or_cp} ${ndas_day_dir_raw}/nam.t${vhh_p1h}z.prepbufr.tm${tm2}.nr \ + ${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmddhh_p1h_tm} + fi + done + + fi + fi + # If at last forecast hour, make sure we're getting the last observations +# if [[ ${current_fcst} -eq ${fcst_length} ]]; then +# +# echo "Retrieving NDAS obs for final forecast hour" +# vhh_noZero=$((vhh_noZero + 6 - (vhh_noZero % 6))) +# if [[ ${vhh_noZero} -eq 24 ]]; then +# vyyyymmdd=${vyyyymmdd_p1d} +# vhh=00 +# elif [[ ${vhh_noZero} -eq 6 ]]; then +# vhh=06 +# else +# vhh=${vhh_noZero} +# fi +# +# if [[ ! -d "${ndas_raw}/${vyyyymmdd}${vhh}" ]]; then +# mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} +# fi +# +# cd ${ndas_raw} +# # Pull NDAS data from HPSS +# cmd=" +# python3 -u ${USHdir}/retrieve_data.py \ +# --debug \ +# --file_set obs \ +# --config ${PARMdir}/data_locations.yml \ +# --cycle_date ${vyyyymmdd}${vhh} \ +# --data_stores hpss \ +# --data_type NDAS_obs \ +# --output_path ${ndas_raw}/${vyyyymmdd}${vhh} \ +# --summary_file ${logfile}" +# +# echo "CALLING: ${cmd}" +# +# $cmd || print_err_msg_exit "\ +# Could not retrieve NDAS data from HPSS +# +# The following command exited with a non-zero exit status: +# ${cmd} +#" +# +# if [[ ! -d "${ndas_basedir_proc}" ]]; then +# mkdir -p ${ndas_basedir_proc} +# fi +# +# for tm in $(seq 1 6); do +# last_fhr=$((fcst_length + 6 - (vhh_noZero % 6))) +# unix_fdate=$($DATE_UTIL -d "${unix_init_DATE} ${last_fhr} hours" "+%Y-%m-%d %H:00:00") +# vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_fdate} ${tm} hours ago" +%Y%m%d%H) +# tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') +# +# ${mv_or_cp} ${ndas_raw}/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr \ +# ${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmddhh_tm} +# done +# +# fi + fi # #----------------------------------------------------------------------- From f8c3ec67d41aca784b102498931f0eb1e6eda3d1 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 22 Jul 2024 16:32:12 -0600 Subject: [PATCH 017/208] Clean up NDAS section in get_obs_... ex-script. --- scripts/exregional_get_verif_obs.sh | 240 +++++++++++----------------- 1 file changed, 94 insertions(+), 146 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 033dd3c0fb..93f17bfa60 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -583,7 +583,7 @@ echo "ihh = ${ihh}" fi # Make sure the retrieval process for the current day (which may have - # been executed above for this cycle or by another cycle) has completed + # been executed above for this cycle or for another cycle) has completed # by checking for the existence of the flag file that marks completion. # If not, keep checking until the flag file shows up. while [[ ! -f "${mrms_day_dir_raw}/pull_completed.txt" ]]; do @@ -633,48 +633,53 @@ echo "ihh = ${ihh}" # elif [[ ${OBTYPE} == "NDAS" ]]; then -# Fix these comments. - # Calculate valid date - 1 day; this is needed because some obs files - # are stored in the *previous* day's 00h directory - vdate_m1h=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 hour ago" +%Y%m%d%H) - #vyyyymmdd_m1h=$(echo ${vdate_m1h} | cut -c1-8) - - vdate_p1h=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 hour" +%Y%m%d%H) + # Calculate valid date plus 1 hour. This is needed because we need to + # check whether this date corresponds to one of the valid hours-of-day + # 00, 06, 12, and 18 on which the NDAS archives are provided. + unix_vdate_p1h=$($DATE_UTIL -d "${unix_init_DATE} $((current_fcst+1)) hours" "+%Y-%m-%d %H:00:00") + vdate_p1h=$($DATE_UTIL -d "${unix_vdate_p1h}" +%Y%m%d%H) + vyyyymmdd_p1h=$(echo ${vdate_p1h} | cut -c1-8) vhh_p1h=$(echo ${vdate_p1h} | cut -c9-10) vhh_p1h_noZero=$((10#${vhh_p1h})) - vyyyymmdd_p1h=$(echo ${vdate_p1h} | cut -c1-8) echo "" echo "HELLO PPPPPPP" echo "vyyyymmdd = ${vyyyymmdd}" echo "vhh = ${vhh}" echo "vhh_noZero = ${vhh_noZero}" -#echo "vyyyymmdd_m1h = ${vyyyymmdd_m1h}" echo "vdate = ${vdate}" -echo "vdate_m1h = ${vdate_m1h}" -echo "vdate_p1h = ${vdate_m1h}" +echo "vdate_p1h = ${vdate_p1h}" - # Reorganized NDAS location + # Base directory in which the hourly NDAS prepbufr files will be located. + # We refer to this as the "processed" base directory because it contains + # the final files after all processing by this script is complete. ndas_basedir_proc=${OBS_DIR} - ndas_day_dir_proc="${ndas_basedir_proc}" - - # raw NDAS data from HPSS - #ndas_raw=${OBS_DIR}/raw - #ndas_raw="${ndas_basedir_proc}/raw_cyc${iyyyymmddhh}" - # Check if file exists on disk - #ndas_file="${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmdd}${vhh}" - #ndas_file_m1h="${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmdd_m1h}${vhh}" - #ndas_fn_check="prepbufr.ndas.${vdate_p1h}" + # Name of the NDAS prepbufr file for the current valid time that will + # appear in the processed daily subdirectory after this script finishes. + # This is the name of the processed file. Note that this is not the + # same as the name of the raw file, i.e. the file extracted from the + # archive (tar) file retrieved below by the retrieve_data.py script. ndas_fn="prepbufr.ndas.${vyyyymmdd}${vhh}" + + # Full path to the processed NDAS prepbufr file for the current field and + # valid time. ndas_fp_proc="${ndas_basedir_proc}/${ndas_fn}" + + # Check if the processed NDAS prepbufr file for the current valid time + # already exists on disk. If so, skip this valid time and go to the next + # one. if [[ -f "${ndas_fp_proc}" ]]; then + echo "${OBTYPE} file exists on disk:" - echo "${ndas_fp_proc}" + echo " ndas_fp_proc = \"${ndas_fp_proc}\"" + echo "Will NOT attempt to retrieve from remote locations." + else + echo "${OBTYPE} file does not exist on disk:" - echo "${ndas_fp_proc}" - echo "Will attempt to retrieve from remote locations" + echo " ndas_fp_proc = \"${ndas_fp_proc}\"" + echo "Will attempt to retrieve from remote locations." # NDAS data is available in 6-hourly combined tar files, each with 7 1-hour prepbufr files: # nam.tHHz.prepbufr.tm00.nr, nam.tHHz.prepbufr.tm01.nr, ... , nam.tHHz.prepbufr.tm06.nr # @@ -685,18 +690,6 @@ echo "vdate_p1h = ${vdate_m1h}" # We want to use the tm06 file because it contains more/better obs (confirmed with EMC: even # though the earlier files are larger, this is because the time window is larger) - # The current logic of this script will likely stage more files than you need, but will never - # pull more HPSS tarballs than necessary - -# This seems like a strange statement since the only way it can be true -# is if the forecast length is zero. - # If at forecast hour zero, skip to next hour. - #if [[ ${current_fcst} -eq 0 && ${current_fcst} -ne ${fcst_length} ]]; then - # if [[ ${current_fcst} -eq 0 ]]; then - # current_fcst=$((current_fcst + 1)) - # continue - # fi - # Whether to move or copy extracted files from the raw directories to their # final locations. #mv_or_cp="mv" @@ -707,60 +700,59 @@ echo "HELLO AAAAA" echo "vhh_noZero = ${vhh_noZero}" echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" + # Due to the way NDAS archives are organized, we can only retrieve the + # archive (tar) file containing data for the current valid hour (and the + # 5 hours preceeding it) if the hour-of-day corresponding to the current + # valid time plus 1 hour corresponds to one of 0, 6, 12, and 18. if [[ ${vhh_p1h_noZero} -eq 0 || ${vhh_p1h_noZero} -eq 6 || \ ${vhh_p1h_noZero} -eq 12 || ${vhh_p1h_noZero} -eq 18 ]]; then -echo "" -echo "HELLO BBBBB" - #ndas_basedir_raw="${ndas_basedir_proc}/raw_cyc${iyyyymmddhh}" - #ndas_basedir_raw="${ndas_basedir_proc}/raw_qrtrday${vyyyymmdd_p1h}${vhh_p1h}" + # Base directory that will contain the 6-hourly subdirectories in which + # the NDAS prepbufr files retrieved from archive files will be placed, + # and the 6-hourly subdirectory for the current valid time plus 1 hour. + # We refer to these as the "raw" NDAS base and 6-hourly directories + # because they contain files as they are found in the archives before + # any processing by this script. ndas_basedir_raw="${ndas_basedir_proc}/raw_day${vyyyymmdd_p1h}" - ndas_day_dir_raw="${ndas_basedir_raw}/${vyyyymmdd_p1h}${vhh_p1h}" - #mkdir -p ${ndas_day_dir_raw} - - - - # Check if the raw daily directory already exists on disk. If so, it - # means -#all the gzipped NDAS grib2 files -- i.e. for both REFC and RETOP -# and for all times (hours, minutes, and seconds) in the current valid -# day -- have already been or are in the process of being retrieved from -# the archive (tar) files. -# If so, skip the retrieval process. If not, - # proceed to retrieve all the files and place them in the raw daily - # directory. + ndas_day_dir_raw="${ndas_basedir_raw}/${vdate_p1h}" + + # Check if the raw 6-hourly directory already exists on disk. If so, it + # means the NDAS prepbufr files for the current valid hour and the 5 hours + # preceeding it have already been or are in the process of being retrieved + # from the archive (tar) files. If so, skip the retrieval process. If + # not, proceed to retrieve the archive file, extract the prepbufr files + # from it, and place them in the raw daily directory. if [[ -d "${ndas_day_dir_raw}" ]]; then -# Fix up these messages. - echo "${OBTYPE} directory for day ${vyyyymmdd} exists on disk:" - echo " ndas_day_dir_proc = \"${ndas_day_dir_proc}\"" - echo "This means NDAS files for all hours of the current valid day (${vyyyymmdd}) have been or are being retrieved." - echo "Thus, we will NOT attempt to retrieve NDAS data for the current valid time from remote locations." + print_info_msg " +${OBTYPE} raw directory for day ${vdate_p1h} exists on disk: + ndas_day_dir_raw = \"${ndas_day_dir_raw}\" +This means NDAS files for the current valid time (${vyyyymmdd}) and the +5 hours preceeding it have been or are being retrieved by a get_obs_ndas +workflow task for another cycle. Thus, we will NOT attempt to retrieve +NDAS data for the current valid time from remote locations." else mkdir -p ${ndas_day_dir_raw} - valid_time=${vyyyymmdd_p1h}${vhh_p1h} - -# Before calling retrieve_data.py, change location to the raw base -# directory to avoid get_obs_ndas tasks for other cycles from clobbering -# the output from this call to retrieve_data.py. Note that retrieve_data.py -# extracts the NDAS tar files into the directory it was called from, -# which is the working directory of this script right before retrieve_data.py -# is called. + + # Before calling retrieve_data.py, change location to the raw base + # directory to avoid get_obs_ndas tasks for other cycles from clobbering + # the output from this call to retrieve_data.py. Note that retrieve_data.py + # extracts the NDAS prepbufr files the archive into the directory it was + # called from, which is the working directory of this script right before + # retrieve_data.py is called. cd ${ndas_basedir_raw} -# Use the retrieve_data.py script to retrieve all the gzipped NDAS grib2 -# files -- i.e. for both REFC and RETOP and for all times (hours, minutes, -# and seconds) in the current valid day -- and place them in the raw daily -# directory. Note that this will pull both the REFC and RETOP files in -# one call. + # Use the retrieve_data.py script to retrieve all the NDAS prepbufr files + # for the current valid hour and the 5 hours preceeding it and place them + # in the raw 6-hourly directory. cmd=" python3 -u ${USHdir}/retrieve_data.py \ --debug \ --file_set obs \ --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd_p1h}${vhh_p1h} \ + --cycle_date ${vdate_p1h} \ --data_stores hpss \ --data_type NDAS_obs \ --output_path ${ndas_day_dir_raw} \ @@ -774,21 +766,32 @@ echo "HELLO BBBBB" The following command exited with a non-zero exit status: ${cmd} " -# Create a flag file that can be used to confirm the completion of the -# retrieval of all files for the current valid day. + + # Create a flag file that can be used to confirm the completion of the + # retrieval of all files for the 6-hour interval ending in vdate_p1h. touch ${ndas_day_dir_raw}/pull_completed.txt fi -# Make sure the retrieval process for the current day (which may have -# been executed above for this cycle or by another cycle) has completed -# by checking for the existence of the flag file that marks completion. -# If not, keep checking until the flag file shows up. + # Make sure the retrieval process for the 6-hour interval ending in + # vdate_p1h (which may have been executed above for this cycle or for + # another cycle) has completed by checking for the existence of the flag + # file that marks completion. If not, keep checking until the flag file + # shows up. while [[ ! -f "${ndas_day_dir_raw}/pull_completed.txt" ]]; do - echo "Waiting for the retrieval process for valid quarter-day ending on ${vyyyymmdd_p1h}${vhh_p1h} to complete..." + echo "Waiting for completion of the NDAS obs retrieval process for the" + echo "6-hour interval ending on ${vdate_p1h} ..." sleep 5s done + # Since this script is part of a workflow, another get_obs_ndas task (i.e. + # for another cycle) may have extracted and placed the current file in its + # processed location between the time we checked for its existence above + # (and didn't find it) and now. This can happen because there can be + # overlap between the verification times for the current cycle and those + # of other cycles. For this reason, check again for the existence of the + # processed file. If it has already been created by another get_obs_ndas + # task, don't bother to recreate it. if [[ -f "${ndas_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" @@ -797,18 +800,19 @@ echo "HELLO BBBBB" else - #mkdir -p ${ndas_basedir_proc} - - unix_vdate_p1h=$($DATE_UTIL -d "${unix_init_DATE} $((current_fcst+1)) hours" "+%Y-%m-%d %H:00:00") - # copy files from the previous 6 hours ("tm" means "time minus") - # The tm06 files contain more/better observations than tm00 for the equivalent time - for tm in $(seq 1 6); do + # Create the processed NDAS prepbufr files for the current valid hour as + # well as the preceeding 5 hours (or fewer if they're outside the time + # interval of the forecast) by copying or moving (and in the process + # renaming) them from the raw 6-hourly directory. In the following loop, + # "tm" means "time minus". Note that the tm06 files contain more/better + # observations than tm00 for the equivalent time. + for tm in $(seq 6 -1 1); do # for tm in $(seq --format="%02g" 6 -1 1); do - vyyyymmddhh_p1h_tm=$($DATE_UTIL -d "${unix_vdate_p1h} ${tm} hours ago" +%Y%m%d%H) - if [ ${vyyyymmddhh_p1h_tm} -le ${vdate_last} ]; then + vdate_p1h_tm=$($DATE_UTIL -d "${unix_vdate_p1h} ${tm} hours ago" +%Y%m%d%H) + if [ ${vdate_p1h_tm} -le ${vdate_last} ]; then tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') ${mv_or_cp} ${ndas_day_dir_raw}/nam.t${vhh_p1h}z.prepbufr.tm${tm2}.nr \ - ${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmddhh_p1h_tm} + ${ndas_basedir_proc}/prepbufr.ndas.${vdate_p1h_tm} fi done @@ -816,62 +820,6 @@ echo "HELLO BBBBB" fi - # If at last forecast hour, make sure we're getting the last observations -# if [[ ${current_fcst} -eq ${fcst_length} ]]; then -# -# echo "Retrieving NDAS obs for final forecast hour" -# vhh_noZero=$((vhh_noZero + 6 - (vhh_noZero % 6))) -# if [[ ${vhh_noZero} -eq 24 ]]; then -# vyyyymmdd=${vyyyymmdd_p1d} -# vhh=00 -# elif [[ ${vhh_noZero} -eq 6 ]]; then -# vhh=06 -# else -# vhh=${vhh_noZero} -# fi -# -# if [[ ! -d "${ndas_raw}/${vyyyymmdd}${vhh}" ]]; then -# mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} -# fi -# -# cd ${ndas_raw} -# # Pull NDAS data from HPSS -# cmd=" -# python3 -u ${USHdir}/retrieve_data.py \ -# --debug \ -# --file_set obs \ -# --config ${PARMdir}/data_locations.yml \ -# --cycle_date ${vyyyymmdd}${vhh} \ -# --data_stores hpss \ -# --data_type NDAS_obs \ -# --output_path ${ndas_raw}/${vyyyymmdd}${vhh} \ -# --summary_file ${logfile}" -# -# echo "CALLING: ${cmd}" -# -# $cmd || print_err_msg_exit "\ -# Could not retrieve NDAS data from HPSS -# -# The following command exited with a non-zero exit status: -# ${cmd} -#" -# -# if [[ ! -d "${ndas_basedir_proc}" ]]; then -# mkdir -p ${ndas_basedir_proc} -# fi -# -# for tm in $(seq 1 6); do -# last_fhr=$((fcst_length + 6 - (vhh_noZero % 6))) -# unix_fdate=$($DATE_UTIL -d "${unix_init_DATE} ${last_fhr} hours" "+%Y-%m-%d %H:00:00") -# vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_fdate} ${tm} hours ago" +%Y%m%d%H) -# tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') -# -# ${mv_or_cp} ${ndas_raw}/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr \ -# ${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmddhh_tm} -# done -# -# fi - fi # #----------------------------------------------------------------------- From bc276fe188aa516d3f365f0b9fa5648da2cbc0ed Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 22 Jul 2024 19:26:53 -0600 Subject: [PATCH 018/208] Add debugging statement to clarify the current working directory where cleanup is happening. --- ush/retrieve_data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ush/retrieve_data.py b/ush/retrieve_data.py index 5acf9d5ce9..5b4320ccb6 100755 --- a/ush/retrieve_data.py +++ b/ush/retrieve_data.py @@ -51,6 +51,7 @@ def clean_up_output_dir(expected_subdir, local_archive, output_path, source_path unavailable = {} expand_source_paths = [] logging.debug(f"Cleaning up local paths: {source_paths}") + logging.debug(f"Looking for these local paths under directory: {os.getcwd()}") for p in source_paths: expand_source_paths.extend(glob.glob(p.lstrip("/"))) From fe50a5dca742ecb248bcd78a265c7a79a39ddf95 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 25 Jul 2024 11:32:52 -0600 Subject: [PATCH 019/208] Add code to cause the script to wait until all the (processed) obs files, that are expected to be created once the task is finished actually get created. This is needed because it is possible that for some forecast hours for which there is overlap between cycles, the files are being retrieved and processed by the get_obs_... task for another cycle. --- scripts/exregional_get_verif_obs.sh | 50 ++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 93f17bfa60..b615f05ffa 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -146,6 +146,7 @@ fi # Make sure fcst_length isn't octal (leading zero) fcst_length=$((10#${fcst_length})) +processed_fp_list=() current_fcst=0 while [[ ${current_fcst} -le ${fcst_length} ]]; do @@ -216,6 +217,9 @@ echo "ihh = ${ihh}" # hour-of-day in the name of the file. ccpa_fp_proc="${ccpa_day_dir_proc}/${ccpa_fn}" + # Store the full path to the processed file in a list for later use. + processed_fp_list+=(${ccpa_fp_proc}) + # Check if the CCPA grib2 file for the current valid time already exists # at its procedded location on disk. If so, skip and go to the next valid # time. If not, pull it. @@ -491,6 +495,9 @@ echo "ihh = ${ihh}" # valid time. mrms_fp_proc="${mrms_day_dir_proc}/${mrms_fn}" + # Store the full path to the processed file in a list for later use. + processed_fp_list+=(${mrms_fp_proc}) + # Check if the processed MRMS grib2 file for the current field and valid # time already exists on disk. If so, skip this valid time and go to the # next one. If not, pull it. @@ -666,6 +673,9 @@ echo "vdate_p1h = ${vdate_p1h}" # valid time. ndas_fp_proc="${ndas_basedir_proc}/${ndas_fn}" + # Store the full path to the processed file in a list for later use. + processed_fp_list+=(${ndas_fp_proc}) + # Check if the processed NDAS prepbufr file for the current valid time # already exists on disk. If so, skip this valid time and go to the next # one. @@ -907,11 +917,41 @@ NDAS data for the current valid time from remote locations." current_fcst=$((${current_fcst} + 1)) done - - -# Clean up raw, unprocessed observation files -#rm -rf ${OBS_DIR}/raw - +# +#----------------------------------------------------------------------- +# +# At this point, the processed data files for all output forecast hours +# for this cycle are either being created (by a get_obs_... task for +# another cycle) or have already been created (either by this get_obs_... +# task or one for another cycle). In case they are still being created, +# make sure they have in fact been created before exiting this script. +# If we don't do this, it is possible for this get_obs_... task to complete +# successfully but still have processed obs files for some forecast hours +# not yet created, which is undesirable. +# +#----------------------------------------------------------------------- +# +num_proc_files=${#processed_fp_list[@]} +for (( i=0; i<${num_proc_files}; i++ )); do + obs_fp="${processed_fp_list[$i]}" + while [[ ! -f "${obs_fp}" ]]; do + echo "Waiting for ${OBTYPE} file to be created on disk (by a get_obs_... workflow task for another cycle):" + echo " obs_fp = \"${obs_fp}\"" + sleep 5s + done +done +# +#----------------------------------------------------------------------- +# +# Clean up raw directories. +# +#----------------------------------------------------------------------- +# +remove_raw="TRUE" +#remove_raw="FALSE" +if [ "${remove_raw}" = "TRUE" ]; then + rm -rf ${OBS_DIR}/raw_* +fi # #----------------------------------------------------------------------- # From dc4971dedfed12f45e1dd13411300ac3fe1ae53a Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 25 Jul 2024 11:45:51 -0600 Subject: [PATCH 020/208] Fix the workflow task dependencies and ex-script for the GenEnsProd and EnsembleStat tasks such that GenEnsProd does not depend on the completion of get_obs_... tasks (because it doesn't need observations) but only forecast output while EnsembleStat does. --- parm/wflow/verify_ens.yaml | 47 ++-- ...onal_run_met_genensprod_or_ensemblestat.sh | 64 +++--- ush/set_vx_fhr_list.sh | 206 +++++++++++++++--- 3 files changed, 231 insertions(+), 86 deletions(-) diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 18b23a1eb0..4be4c5b47e 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -37,16 +37,9 @@ metatask_GenEnsProd_EnsembleStat_CCPA: FCST_LEVEL: 'A#ACCUM_HH#' FCST_THRESH: 'all' dependency: - and: - # The PcpCombine task for obs must be complete because this GenEnsProd - # task checks to see the forecast hours for which obs are available before - # processing the forecast for those hours. - taskdep_pcpcombine_obs: - attrs: - task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h - metataskdep_pcpcombine_fcst: - attrs: - metatask: PcpCombine_fcst_APCP#ACCUM_HH#h_all_mems + metataskdep_pcpcombine_fcst: + attrs: + metatask: PcpCombine_fcst_APCP#ACCUM_HH#h_all_mems task_run_MET_EnsembleStat_vx_APCP#ACCUM_HH#h: <<: *task_GenEnsProd_CCPA envars: @@ -54,9 +47,13 @@ metatask_GenEnsProd_EnsembleStat_CCPA: METPLUSTOOLNAME: 'ENSEMBLESTAT' FCST_THRESH: 'none' dependency: - taskdep_genensprod: - attrs: - task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h + and: + taskdep_pcpcombine_obs: + attrs: + task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h metatask_GenEnsProd_EnsembleStat_NOHRSC: var: @@ -75,9 +72,6 @@ metatask_GenEnsProd_EnsembleStat_NOHRSC: FCST_THRESH: 'all' dependency: and: - # The PcpCombine task for obs must be complete because this GenEnsProd - # task checks to see the forecast hours for which obs are available before - # processing the forecast for those hours. metataskdep_pcpcombine_fcst: attrs: metatask: PcpCombine_fcst_ASNOW#ACCUM_HH#h_all_mems @@ -149,15 +143,8 @@ metatask_GenEnsProd_EnsembleStat_NDAS: FCST_THRESH: 'all' walltime: 02:30:00 dependency: - and: - # The Pb2nc task (which is run only for obs) must be complete because - # this GenEnsProd task checks to see the forecast hours for which obs - # are available before processing the forecast for those hours. - taskdep_pb2nc: - attrs: - task: run_MET_Pb2nc_obs - metataskdep_check_post_output: - <<: *check_post_output + metataskdep_check_post_output: + <<: *check_post_output task_run_MET_EnsembleStat_vx_#VAR#: <<: *task_GenEnsProd_NDAS envars: @@ -165,9 +152,13 @@ metatask_GenEnsProd_EnsembleStat_NDAS: METPLUSTOOLNAME: 'ENSEMBLESTAT' walltime: 01:00:00 dependency: - taskdep_genensprod: - attrs: - task: run_MET_GenEnsProd_vx_#VAR# + and: + taskdep_pb2nc: + attrs: + task: run_MET_Pb2nc_obs + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_#VAR# metatask_GridStat_CCPA_ensmeanprob_all_accums: var: diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 93caeaa7f2..5003047f4f 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -200,30 +200,40 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. -# This is done by starting with the full list of forecast hours for which -# there is forecast output and then removing from that list any forecast -# hours for which there is no corresponding observation data. -# -# Note that strictly speaking, this does not need to be done if the MET/ -# METplus tool being called is GenEnsProd (because this tool only operates -# on forecasts), but we run the check anyway in this case in order to -# keep the code here simpler and because the output of GenEnsProd for -# forecast hours with missing observations will not be used anyway in -# downstream verification tasks. -# -#----------------------------------------------------------------------- -# -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" +# Generate the list of forecast hours for which to run the specified +# METplus tool. +# +# If running the GenEnsProd tool, we set this to the list of forecast +# output times without filtering for the existence of observation files +# corresponding to those times. This is because GenEnsProd operates +# only on forecasts; it does not need observations. +# +# On the other hand, if running the EnsembleStat tool, we set the list of +# forecast hours to a set of times that takes into consideration whether +# or not observations exist. We do this by starting with the full list +# of forecast times for which there is forecast output and then removing +# from that list any times for which there is no corresponding observations. +# +#----------------------------------------------------------------------- +# +if [ "${MetplusToolName}" = "GenEnsProd" ]; then + set_vx_fhr_list_no_missing \ + fcst_len_hrs="${FCST_LEN_HRS}" \ + field="$VAR" \ + accum_hh="${ACCUM_HH}" \ + outvarname_fhr_list_no_missing="FHR_LIST" +elif [ "${MetplusToolName}" = "EnsembleStat" ]; then + set_vx_fhr_list \ + cdate="${CDATE}" \ + fcst_len_hrs="${FCST_LEN_HRS}" \ + field="$VAR" \ + accum_hh="${ACCUM_HH}" \ + base_dir="${OBS_INPUT_DIR}" \ + fn_template="${OBS_INPUT_FN_TEMPLATE}" \ + check_accum_contrib_files="FALSE" \ + num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ + outvarname_fhr_list="FHR_LIST" +fi # #----------------------------------------------------------------------- # @@ -290,7 +300,7 @@ metplus_log_fn="metplus.log.${metplus_log_bn}" # #----------------------------------------------------------------------- # -# Load the yaml-like file containing the configuration for ensemble +# Load the yaml-like file containing the configuration for ensemble # verification. # #----------------------------------------------------------------------- @@ -368,7 +378,7 @@ settings="\ # # Verification configuration dictionary. # -'vx_config_dict': +'vx_config_dict': ${vx_config_dict:-} " @@ -380,7 +390,7 @@ uw template render \ -o ${metplus_config_fp} \ --verbose \ --values-file "${tmpfile}" \ - --search-path "/" + --search-path "/" err=$? rm $tmpfile diff --git a/ush/set_vx_fhr_list.sh b/ush/set_vx_fhr_list.sh index 5cefc78365..8101e927e5 100644 --- a/ush/set_vx_fhr_list.sh +++ b/ush/set_vx_fhr_list.sh @@ -1,14 +1,24 @@ # #----------------------------------------------------------------------- # -# This file defines a function that generates a list of forecast hours -# such that for each hour there exist a corresponding obs file. It does -# this by first generating a generic sequence of forecast hours and then -# removing from that sequence any hour for which there is no obs file. +# This file defines functions used to generate sets of forecast hours for +# which verification will be performed. # #----------------------------------------------------------------------- # -function set_vx_fhr_list() { + +function set_vx_fhr_list_no_missing() { +# +#----------------------------------------------------------------------- +# +# This function sets the forecast hours for which verification will be +# performed under the assumption that that the data file (which may be +# a forecast output file or an observation file) for each hour is available +# (i.e. that there are no missing files). +# +#----------------------------------------------------------------------- +# + # #----------------------------------------------------------------------- # @@ -48,15 +58,10 @@ function set_vx_fhr_list() { #----------------------------------------------------------------------- # local valid_args=( \ - "cdate" \ "fcst_len_hrs" \ "field" \ "accum_hh" \ - "base_dir" \ - "fn_template" \ - "check_accum_contrib_files" \ - "num_missing_files_max" \ - "outvarname_fhr_list" \ + "outvarname_fhr_list_no_missing" \ ) process_args valid_args "$@" # @@ -76,27 +81,15 @@ function set_vx_fhr_list() { # #----------------------------------------------------------------------- # - local crnt_tmpl \ - crnt_tmpl_esc \ - fhr \ - fhr_array \ - fhr_int \ + local fhr_array \ fhr_list \ + fhr_int \ fhr_min \ - fhr_max \ - fn \ - fp \ - i \ - num_fcst_hrs \ - num_missing_files \ - regex_search_tmpl \ - remainder \ - skip_this_fhr + fhr_max # #----------------------------------------------------------------------- # -# Create array containing set of forecast hours for which we will check -# for the existence of corresponding observation or forecast file. +# Create the array of forecast hours. # #----------------------------------------------------------------------- # @@ -140,6 +133,157 @@ this field (field): fhr_max="${fcst_len_hrs}" fhr_array=($( seq ${fhr_min} ${fhr_int} ${fhr_max} )) + + # Express the forecast hour array as a (scalar) string containing a comma + # (and space) separated list of the elements of fhr_array. + fhr_list=$( printf "%s, " "${fhr_array[@]}" ) + fhr_list=$( echo "${fhr_list}" | $SED "s/, $//g" ) + + print_info_msg "$VERBOSE" "\ +Initial (i.e. before filtering for missing files) set of forecast hours +(saved in a scalar string variable) is: + fhr_list = \"${fhr_list}\" +" +# +#----------------------------------------------------------------------- +# +# Set output variables. +# +#----------------------------------------------------------------------- +# + if [ ! -z "${outvarname_fhr_list_no_missing}" ]; then + printf -v ${outvarname_fhr_list_no_missing} "%s" "${fhr_list}" + fi +# +#----------------------------------------------------------------------- +# +# Restore the shell options saved at the beginning of this script/function. +# +#----------------------------------------------------------------------- +# + { restore_shell_opts; } > /dev/null 2>&1 + +} + + + +# +#----------------------------------------------------------------------- +# +# This function generates a list of forecast hours such that for each +# such hour, there exists a corresponding data file with a name of the +# form specified by the template fn_template. Depending on fn_template, +# this file may contain forecast or observation data. This function +# generates this forecast hour list by first generating a set of hours +# under the assumption that there is a corresponding data file for each +# hour and then removing from that list any hour for which there is no +# data file. +# +#----------------------------------------------------------------------- +# +function set_vx_fhr_list() { +# +#----------------------------------------------------------------------- +# +# Save current shell options (in a global array). Then set new options +# for this script/function. +# +#----------------------------------------------------------------------- +# + { save_shell_opts; set -u +x; } > /dev/null 2>&1 +# +#----------------------------------------------------------------------- +# +# Get the full path to the file in which this script/function is located +# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in +# which the file is located (scrfunc_dir). +# +#----------------------------------------------------------------------- +# + local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) + local scrfunc_fn=$( basename "${scrfunc_fp}" ) + local scrfunc_dir=$( dirname "${scrfunc_fp}" ) +# +#----------------------------------------------------------------------- +# +# Get the name of this function. +# +#----------------------------------------------------------------------- +# + local func_name="${FUNCNAME[0]}" +# +#----------------------------------------------------------------------- +# +# Specify the set of valid argument names for this script/function. Then +# process the arguments provided to this script/function (which should +# consist of a set of name-value pairs of the form arg1="value1", etc). +# +#----------------------------------------------------------------------- +# + local valid_args=( \ + "cdate" \ + "fcst_len_hrs" \ + "field" \ + "accum_hh" \ + "base_dir" \ + "fn_template" \ + "check_accum_contrib_files" \ + "num_missing_files_max" \ + "outvarname_fhr_list" \ + ) + process_args valid_args "$@" +# +#----------------------------------------------------------------------- +# +# For debugging purposes, print out values of arguments passed to this +# script. Note that these will be printed out only if VERBOSE is set to +# TRUE. +# +#----------------------------------------------------------------------- +# + print_input_args valid_args +# +#----------------------------------------------------------------------- +# +# Declare local variables. +# +#----------------------------------------------------------------------- +# + local crnt_tmpl \ + crnt_tmpl_esc \ + fhr \ + fhr_array \ + fhr_list \ + fn \ + fp \ + i \ + num_fcst_hrs \ + num_missing_files \ + regex_search_tmpl \ + remainder \ + skip_this_fhr +# +#----------------------------------------------------------------------- +# +# For the specified field, generate the set of forecast hours at which +# verification will be performed under the assumption that for each such +# hour, the corresponding forecast and/or observation files exists. Thus, +# this set of forecast hours is an initial guess for the hours at which +# vx will be performed. +# +#----------------------------------------------------------------------- +# + set_vx_fhr_list_no_missing \ + fcst_len_hrs="${fcst_len_hrs}" \ + field="${field}" \ + accum_hh="${accum_hh}" \ + outvarname_fhr_list_no_missing="fhr_list_no_missing" + + # For convenience, save the scalar variable fhr_list_no_missing to a bash + # array. + fhr_array=$( printf "%s" "${fhr_list_no_missing}" | $SED "s/,//g" ) + fhr_array=( ${fhr_array} ) + print_info_msg "$VERBOSE" "\ Initial (i.e. before filtering for missing files) set of forecast hours is: @@ -174,7 +318,7 @@ is: skip_this_fhr="FALSE" for (( j=0; j<${num_back_hrs}; j++ )); do # -# Use the provided template to set the name of/relative path to the file +# Use the provided template to set the name of/relative path to the file # Note that the while-loop below is over all METplus time string templates # of the form {...} in the template fn_template; it continues until all # such templates have been evaluated to actual time strings. @@ -195,7 +339,7 @@ is: # # Replace METplus time templates in fn with actual times. Note that # when using sed, we need to escape various characters (question mark, -# closing and opening curly braces, etc) in the METplus template in +# closing and opening curly braces, etc) in the METplus template in # order for the sed command below to work properly. # crnt_tmpl_esc=$( echo "${crnt_tmpl}" | \ @@ -253,8 +397,8 @@ METplus configuration file. # fhr_list=$( echo "${fhr_list}" | $SED "s/^,//g" ) print_info_msg "$VERBOSE" "\ -Final (i.e. after filtering for missing files) set of forecast hours is -(written as a single string): +Final (i.e. after filtering for missing files) set of forecast hours +(saved in a scalar string variable) is: fhr_list = \"${fhr_list}\" " # From 13aba39e140f116b9a553229bc975247e2282ec0 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 25 Jul 2024 15:54:28 -0600 Subject: [PATCH 021/208] Bug fixes after running WE2E vx suite. --- parm/wflow/verify_ens.yaml | 10 +++++++--- scripts/exregional_get_verif_obs.sh | 29 ++++++++++++++++++++++------- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 4be4c5b47e..9f1079b505 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -248,6 +248,10 @@ metatask_PointStat_NDAS_ensmeanprob: FCST_LEVEL: 'all' FCST_THRESH: 'all' dependency: - taskdep: - attrs: - task: run_MET_GenEnsProd_vx_#VAR# + and: + taskdep_pb2nc: + attrs: + task: run_MET_Pb2nc_obs + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_#VAR# diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index b615f05ffa..72be333b82 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -134,10 +134,11 @@ unix_init_DATE="${iyyyy}-${imm}-${idd} ${ihh}:00:00" # This awk expression gets the last item of the list $FHR fcst_length=$(echo ${FHR} | awk '{ print $NF }') +vdate_last=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length} hours" +%Y%m%d%H) if [[ ${OBTYPE} == "NDAS" ]]; then - vdate_last=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length} hours" +%Y%m%d%H) vhh_last=$(echo ${vdate_last} | cut -c9-10) - hours_to_add=$(( vhh_last + 6 - (vhh_last % 6) )) + #hours_to_add=$(( vhh_last + 6 - (vhh_last % 6) )) + hours_to_add=$(( 6 - (vhh_last % 6) )) fcst_length_rounded_up=$(( fcst_length + hours_to_add )) # vdate_last_rounded_up=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length_rounded_up} hours" +%Y%m%d%H) fcst_length=${fcst_length_rounded_up} @@ -218,7 +219,9 @@ echo "ihh = ${ihh}" ccpa_fp_proc="${ccpa_day_dir_proc}/${ccpa_fn}" # Store the full path to the processed file in a list for later use. - processed_fp_list+=(${ccpa_fp_proc}) + if [ ${vyyyymmdd}${vhh} -le ${vdate_last} ]; then + processed_fp_list+=(${ccpa_fp_proc}) + fi # Check if the CCPA grib2 file for the current valid time already exists # at its procedded location on disk. If so, skip and go to the next valid @@ -496,7 +499,9 @@ echo "ihh = ${ihh}" mrms_fp_proc="${mrms_day_dir_proc}/${mrms_fn}" # Store the full path to the processed file in a list for later use. - processed_fp_list+=(${mrms_fp_proc}) + if [ ${vyyyymmdd}${vhh} -le ${vdate_last} ]; then + processed_fp_list+=(${mrms_fp_proc}) + fi # Check if the processed MRMS grib2 file for the current field and valid # time already exists on disk. If so, skip this valid time and go to the @@ -674,7 +679,13 @@ echo "vdate_p1h = ${vdate_p1h}" ndas_fp_proc="${ndas_basedir_proc}/${ndas_fn}" # Store the full path to the processed file in a list for later use. - processed_fp_list+=(${ndas_fp_proc}) +echo +echo "LLLLLLLLLLLLL" + if [ ${vyyyymmdd}${vhh} -le ${vdate_last} ]; then +echo "MMMMMMMMMMMMM" +echo "processed_fp_list = |${processed_fp_list[@]}" + processed_fp_list+=(${ndas_fp_proc}) + fi # Check if the processed NDAS prepbufr file for the current valid time # already exists on disk. If so, skip this valid time and go to the next @@ -931,7 +942,11 @@ done # #----------------------------------------------------------------------- # +echo +echo "KKKKKKKKKKKK" +echo "processed_fp_list = |${processed_fp_list[@]}" num_proc_files=${#processed_fp_list[@]} +echo "num_proc_files = ${num_proc_files}" for (( i=0; i<${num_proc_files}; i++ )); do obs_fp="${processed_fp_list[$i]}" while [[ ! -f "${obs_fp}" ]]; do @@ -947,8 +962,8 @@ done # #----------------------------------------------------------------------- # -remove_raw="TRUE" -#remove_raw="FALSE" +#remove_raw="TRUE" +remove_raw="FALSE" if [ "${remove_raw}" = "TRUE" ]; then rm -rf ${OBS_DIR}/raw_* fi From 860f62e3aee3dd71d3eb23765e9fd846b61b5444 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 25 Jul 2024 15:55:20 -0600 Subject: [PATCH 022/208] Bugfix to dependencies of ensemble vx tasks that come after GenEnsProd due to changes to dependencies of GenEnsProd tasks in previous commit(s). --- parm/wflow/verify_ens.yaml | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 9f1079b505..4d01281b6d 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -180,9 +180,13 @@ metatask_GridStat_CCPA_ensmeanprob_all_accums: FCST_LEVEL: 'A#ACCUM_HH#' FCST_THRESH: 'all' dependency: - taskdep: - attrs: - task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h + and: + taskdep_pcpcombine_obs: + attrs: + task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h metatask_GridStat_NOHRSC_ensmeanprob_all_accums: var: @@ -204,9 +208,13 @@ metatask_GridStat_NOHRSC_ensmeanprob_all_accums: FCST_LEVEL: 'A#ACCUM_HH#' FCST_THRESH: 'all' dependency: - taskdep: - attrs: - task: run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h + and: + taskdep: + attrs: + task: get_obs_nohrsc + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h metatask_GridStat_MRMS_ensprob: var: @@ -224,9 +232,13 @@ metatask_GridStat_MRMS_ensprob: FCST_LEVEL: 'L0' FCST_THRESH: 'all' dependency: - taskdep: - attrs: - task: run_MET_GenEnsProd_vx_#VAR# + and: + taskdep_get_obs_mrms: + attrs: + task: get_obs_mrms + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_#VAR# metatask_PointStat_NDAS_ensmeanprob: var: From e54ec16d6e7f12cc095e053d00388081dc7ffe60 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 26 Jul 2024 00:11:17 -0600 Subject: [PATCH 023/208] Bug fixes to get all WE2E vx tests to succeed. --- scripts/exregional_get_verif_obs.sh | 73 +++++++++++++++++++---------- 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 72be333b82..f3a52710c5 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -124,7 +124,7 @@ idd=$(echo ${PDY} | cut -c7-8) ihh=${cyc} echo -echo "HELLO GGGGGGGG" +echo "HELLO AAAAAAAAAAA" iyyyymmddhh=${PDY}${cyc} echo "iyyyymmddhh = ${iyyyymmddhh}" @@ -134,14 +134,17 @@ unix_init_DATE="${iyyyy}-${imm}-${idd} ${ihh}:00:00" # This awk expression gets the last item of the list $FHR fcst_length=$(echo ${FHR} | awk '{ print $NF }') +echo +echo "BYE 00000000" vdate_last=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length} hours" +%Y%m%d%H) if [[ ${OBTYPE} == "NDAS" ]]; then +echo "BYE 111111111" vhh_last=$(echo ${vdate_last} | cut -c9-10) #hours_to_add=$(( vhh_last + 6 - (vhh_last % 6) )) hours_to_add=$(( 6 - (vhh_last % 6) )) fcst_length_rounded_up=$(( fcst_length + hours_to_add )) # vdate_last_rounded_up=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length_rounded_up} hours" +%Y%m%d%H) - fcst_length=${fcst_length_rounded_up} +# fcst_length=${fcst_length_rounded_up} fi # Make sure fcst_length isn't octal (leading zero) @@ -152,14 +155,17 @@ current_fcst=0 while [[ ${current_fcst} -le ${fcst_length} ]]; do echo -echo "HELLO GGGGGGGG" +echo "HELLO BBBBBBBBBBB" echo "current_fcst = ${current_fcst}" # Calculate valid date info using date utility vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" +%Y%m%d%H) - #unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") + unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") vyyyymmdd=$(echo ${vdate} | cut -c1-8) vhh=$(echo ${vdate} | cut -c9-10) +echo +echo "BYE 222222222" +echo "vhh = ${vhh}" # Calculate valid date + 1 day; this is needed because some obs files # are stored in the *next* day's 00h directory @@ -167,11 +173,10 @@ echo "current_fcst = ${current_fcst}" vyyyymmdd_p1d=$(echo ${vdate_p1d} | cut -c1-8) echo -echo "HELLO HHHHHHHH" +echo "HELLO CCCCCCCCCC" echo "vyyyymmdd = ${vyyyymmdd}" echo "vyyyymmdd_p1d = ${vyyyymmdd_p1d}" echo "ihh = ${ihh}" -#exit #remove leading zero again, this time keep original vhh_noZero=$((10#${vhh})) @@ -655,7 +660,8 @@ echo "ihh = ${ihh}" vhh_p1h_noZero=$((10#${vhh_p1h})) echo "" -echo "HELLO PPPPPPP" +echo "HELLO DDDDDDDDDDD" +echo "vdate = ${vdate}" echo "vyyyymmdd = ${vyyyymmdd}" echo "vhh = ${vhh}" echo "vhh_noZero = ${vhh_noZero}" @@ -672,7 +678,7 @@ echo "vdate_p1h = ${vdate_p1h}" # This is the name of the processed file. Note that this is not the # same as the name of the raw file, i.e. the file extracted from the # archive (tar) file retrieved below by the retrieve_data.py script. - ndas_fn="prepbufr.ndas.${vyyyymmdd}${vhh}" + ndas_fn="prepbufr.ndas.${vdate}" # Full path to the processed NDAS prepbufr file for the current field and # valid time. @@ -680,9 +686,9 @@ echo "vdate_p1h = ${vdate_p1h}" # Store the full path to the processed file in a list for later use. echo -echo "LLLLLLLLLLLLL" - if [ ${vyyyymmdd}${vhh} -le ${vdate_last} ]; then -echo "MMMMMMMMMMMMM" +echo "EEEEEEEEEEEEEE" + if [ ${vdate} -le ${vdate_last} ]; then +echo "FFFFFFFFFFFFFF" echo "processed_fp_list = |${processed_fp_list[@]}" processed_fp_list+=(${ndas_fp_proc}) fi @@ -717,7 +723,7 @@ echo "processed_fp_list = |${processed_fp_list[@]}" mv_or_cp="cp" echo "" -echo "HELLO AAAAA" +echo "HELLO GGGGGGGGGGGGG" echo "vhh_noZero = ${vhh_noZero}" echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" @@ -726,7 +732,22 @@ echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" # 5 hours preceeding it) if the hour-of-day corresponding to the current # valid time plus 1 hour corresponds to one of 0, 6, 12, and 18. if [[ ${vhh_p1h_noZero} -eq 0 || ${vhh_p1h_noZero} -eq 6 || \ - ${vhh_p1h_noZero} -eq 12 || ${vhh_p1h_noZero} -eq 18 ]]; then + ${vhh_p1h_noZero} -eq 12 || ${vhh_p1h_noZero} -eq 18 || \ + ${current_fcst} -eq ${fcst_length} ]]; then + + if [[ ${vhh_p1h_noZero} -eq 0 || ${vhh_p1h_noZero} -eq 6 || \ + ${vhh_p1h_noZero} -eq 12 || ${vhh_p1h_noZero} -eq 18 ]]; then + unix_vdate_archive="${unix_vdate_p1h}" + vdate_archive="${vdate_p1h}" + vyyyymmdd_archive="${vyyyymmdd_p1h}" + vhh_archive=${vhh_p1h} + elif [[ ${current_fcst} -eq ${fcst_length} ]]; then + hours_to_archive=$(( 6 - (vhh % 6) )) + unix_vdate_archive=$($DATE_UTIL -d "${unix_vdate} ${hours_to_archive} hours" "+%Y-%m-%d %H:00:00") + vdate_archive=$($DATE_UTIL -d "${unix_vdate} ${hours_to_archive} hours" +%Y%m%d%H) + vyyyymmdd_archive=$(echo ${vdate_archive} | cut -c1-8) + vhh_archive=$(echo ${vdate_archive} | cut -c9-10) + fi # Base directory that will contain the 6-hourly subdirectories in which # the NDAS prepbufr files retrieved from archive files will be placed, @@ -734,8 +755,8 @@ echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" # We refer to these as the "raw" NDAS base and 6-hourly directories # because they contain files as they are found in the archives before # any processing by this script. - ndas_basedir_raw="${ndas_basedir_proc}/raw_day${vyyyymmdd_p1h}" - ndas_day_dir_raw="${ndas_basedir_raw}/${vdate_p1h}" + ndas_basedir_raw="${ndas_basedir_proc}/raw_day${vyyyymmdd_archive}" + ndas_day_dir_raw="${ndas_basedir_raw}/${vdate_archive}" # Check if the raw 6-hourly directory already exists on disk. If so, it # means the NDAS prepbufr files for the current valid hour and the 5 hours @@ -746,9 +767,9 @@ echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" if [[ -d "${ndas_day_dir_raw}" ]]; then print_info_msg " -${OBTYPE} raw directory for day ${vdate_p1h} exists on disk: +${OBTYPE} raw 6-hourly directory ${vdate_archive} exists on disk: ndas_day_dir_raw = \"${ndas_day_dir_raw}\" -This means NDAS files for the current valid time (${vyyyymmdd}) and the +This means NDAS files for the current valid time (${vdate}) and the 5 hours preceeding it have been or are being retrieved by a get_obs_ndas workflow task for another cycle. Thus, we will NOT attempt to retrieve NDAS data for the current valid time from remote locations." @@ -773,7 +794,7 @@ NDAS data for the current valid time from remote locations." --debug \ --file_set obs \ --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vdate_p1h} \ + --cycle_date ${vdate_archive} \ --data_stores hpss \ --data_type NDAS_obs \ --output_path ${ndas_day_dir_raw} \ @@ -789,19 +810,19 @@ NDAS data for the current valid time from remote locations." " # Create a flag file that can be used to confirm the completion of the - # retrieval of all files for the 6-hour interval ending in vdate_p1h. + # retrieval of all files for the 6-hour interval ending in vdate_archive. touch ${ndas_day_dir_raw}/pull_completed.txt fi # Make sure the retrieval process for the 6-hour interval ending in - # vdate_p1h (which may have been executed above for this cycle or for + # vdate_archive (which may have been executed above for this cycle or for # another cycle) has completed by checking for the existence of the flag # file that marks completion. If not, keep checking until the flag file # shows up. while [[ ! -f "${ndas_day_dir_raw}/pull_completed.txt" ]]; do echo "Waiting for completion of the NDAS obs retrieval process for the" - echo "6-hour interval ending on ${vdate_p1h} ..." + echo "6-hour interval ending on ${vdate_archive} ..." sleep 5s done @@ -829,11 +850,11 @@ NDAS data for the current valid time from remote locations." # observations than tm00 for the equivalent time. for tm in $(seq 6 -1 1); do # for tm in $(seq --format="%02g" 6 -1 1); do - vdate_p1h_tm=$($DATE_UTIL -d "${unix_vdate_p1h} ${tm} hours ago" +%Y%m%d%H) - if [ ${vdate_p1h_tm} -le ${vdate_last} ]; then + vdate_archive_tm=$($DATE_UTIL -d "${unix_vdate_archive} ${tm} hours ago" +%Y%m%d%H) + if [[ ${vdate_archive_tm} -le ${vdate_last} ]]; then tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') - ${mv_or_cp} ${ndas_day_dir_raw}/nam.t${vhh_p1h}z.prepbufr.tm${tm2}.nr \ - ${ndas_basedir_proc}/prepbufr.ndas.${vdate_p1h_tm} + ${mv_or_cp} ${ndas_day_dir_raw}/nam.t${vhh_archive}z.prepbufr.tm${tm2}.nr \ + ${ndas_basedir_proc}/prepbufr.ndas.${vdate_archive_tm} fi done @@ -943,7 +964,7 @@ done #----------------------------------------------------------------------- # echo -echo "KKKKKKKKKKKK" +echo "HHHHHHHHHHHHHHHH" echo "processed_fp_list = |${processed_fp_list[@]}" num_proc_files=${#processed_fp_list[@]} echo "num_proc_files = ${num_proc_files}" From 8e8a1c10defb814f153a5df88acff5ddfda098e9 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 28 Jul 2024 09:36:19 -0600 Subject: [PATCH 024/208] Increase default wallclock time for get_obs_ccpa tasks since they're tending to time out for 48-hour forecasts. --- parm/wflow/verify_pre.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index da43336a0d..c99bd4b4e6 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -33,7 +33,7 @@ task_get_obs_ccpa: native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" - walltime: 00:45:00 + walltime: 02:00:00 task_get_obs_nohrsc: <<: *default_task_verify_pre From a34d99339e67cba2bd9abf474dd1543e77e1433f Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 28 Jul 2024 09:39:34 -0600 Subject: [PATCH 025/208] For each cycle except the last one, in the PcpCombine_obs tasks make sure PcpCombine operates only on those hours unique to the cycle, i.e. for those times starting from the initial time of the cycle to just before the initial time of the next cycle. For the PcpCombine_obs task for the last cycle, allow it to operate on all hours of that cycle's forecast. This ensures that the PcpCombine tasks for the various cycles do not clobber each other's output. Accordingly, change the dependencies of downstream tasks that depend on PcpCombine obs output to make sure they include all PcpCombine_obs tasks that cover the forecast period of the that downstream task's cycle. --- parm/wflow/verify_det.yaml | 39 +++++++++++++++++++++--- parm/wflow/verify_ens.yaml | 36 +++++++++++++++++++--- scripts/exregional_run_met_pcpcombine.sh | 21 ++++++++++++- 3 files changed, 86 insertions(+), 10 deletions(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index 3acfa3e836..f69429bbe4 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -47,12 +47,41 @@ metatask_GridStat_CCPA_all_accums_all_mems: walltime: 02:00:00 dependency: and: - taskdep_pcpcombine_obs: + # The following will include dependencies on the PcpCombine_obs task for + # the current cycle as well as those from other cycles that process CCPA + # obs at valid times that are part of the current cycle's forecast. This + # dependence is necessary because each PcpCombine_obs task except the + # last one processes obs at valid times starting with the initial time + # of the current cycle's forecast and ending with the last output time + # of this forecast that is before the initial time of the next cycle's + # forecast. It will also include a dependency on the PcpCombine_fcst + # task for the current cycle. + taskdep_pcpcombine_obs_fcst: attrs: - task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h - taskdep_pcpcombine_fcst: - attrs: - task: run_MET_PcpCombine_fcst_APCP#ACCUM_HH#h_mem#mem# + task: '{%- set num_cycl_dep = (workflow.FCST_LEN_HRS/workflow.INCR_CYCL_FREQ)|round(0,"ceil")|int %} + {%- set num_cycl_dep = num_cycl_dep %} + {%- for n in range(0, num_cycl_dep) %} + {%- set cycl_offset = n*workflow.INCR_CYCL_FREQ %} + {%- if n == 0 %} + {{- "run_MET_PcpCombine_obs_APCP#ACCUM_HH#h\" cycle_offset=\"%02d:00:00\"/>\n" % cycl_offset }} + {%- elif ((n > 0) and (n < num_cycl_dep-1)) %} + {{- " \n"}} + {{- " \n"}} + {{- " \n" % cycl_offset}} + {{- " \n"}} + {{- " \n" % cycl_offset}} + {{- " \n"}} + {%- elif (n == num_cycl_dep-1) %} + {{- " \n"}} + {{- " \n"}} + {{- " \n" % cycl_offset }} + {{- " \n"}} + {{- " \n" % cycl_offset }} + {{- " \n" }} + {%- endif %} + {%- endfor %} + {{- " \n" % cycl_offset }} + {%- elif ((n > 0) and (n < num_cycl_dep-1)) %} + {{- " \n"}} + {{- " \n"}} + {{- " \n" % cycl_offset}} + {{- " \n"}} + {{- " \n" % cycl_offset}} + {{- " \n"}} + {%- elif (n == num_cycl_dep-1) %} + {{- " \n"}} + {{- " \n"}} + {{- " \n" % cycl_offset }} + {{- " \n"}} + {{- " \n" % cycl_offset }} + {{- " \n" }} + {%- endif %} + {%- endfor %}' taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h @@ -182,8 +211,7 @@ metatask_GridStat_CCPA_ensmeanprob_all_accums: dependency: and: taskdep_pcpcombine_obs: - attrs: - task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h + <<: &taskdep_pcpcombine_obs taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 6e64d102e6..20ae1a9794 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -212,9 +212,28 @@ elif [ "${FCST_OR_OBS}" = "OBS" ]; then num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" fi +# If processing obs, then for all cylces except the last one, calculate +# a "forecast length" that will hours up to but not including the initial +# (zeroth) hour of the next cycle. For the last cycle, take the "forecast +# length" of the obs to be the same as that of the forecast for the cycle. +# This ensures that the PcpCombine_obs tasks for different cycles do not +# overwrite or clobber output from another cycle (because with this +# approach, the valid times on which the current PcpCombine_obs task is +# operating is distinct from the ones for the PcpCombine_obs tasks for +# every other cycle). +fcst_len_hrs="${FCST_LEN_HRS}" +if [ "${FCST_OR_OBS}" = "OBS" ]; then + yyyymmddhhmn="${PDY}${cyc}00" + if [ ${yyyymmddhhmn} -lt ${DATE_LAST_CYCL} ] && \ + [ ${FCST_LEN_HRS} -ge ${INCR_CYCL_FREQ} ]; then + output_incr_hrs="1" + fcst_len_hrs=$((INCR_CYCL_FREQ - output_incr_hrs + 1)) + fi +fi + set_vx_fhr_list \ cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ + fcst_len_hrs="${fcst_len_hrs}" \ field="$VAR" \ accum_hh="${ACCUM_HH}" \ base_dir="${base_dir}" \ From 5550a41a1b31e104db3c39f02f95aa3edb8361ae Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 28 Jul 2024 10:16:43 -0600 Subject: [PATCH 026/208] Bug fix in yaml. --- parm/wflow/verify_ens.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 995f362926..2d62b803b8 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -211,7 +211,7 @@ metatask_GridStat_CCPA_ensmeanprob_all_accums: dependency: and: taskdep_pcpcombine_obs: - <<: &taskdep_pcpcombine_obs + <<: *taskdep_pcpcombine_obs taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h From c76ed1afdc3b9e18c59c36cf8567588f3e25fa1f Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 29 Jul 2024 11:50:16 -0600 Subject: [PATCH 027/208] Fix still-existing problem of file clobbering with get_obs_mrms and possibly also get_obs_ndas by putting in sleep commands. --- scripts/exregional_get_verif_obs.sh | 36 +++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index f3a52710c5..a2759f7ef5 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -548,10 +548,21 @@ echo "ihh = ${ihh}" # the archive (tar) files. If so, skip the retrieval process. If not, # proceed to retrieve all the files and place them in the raw daily # directory. + # + # Note that despite the check on the existence of the raw daily directory + # below, it is possible for two get_obs_mrms tasks to try to retrieve + # obs for the same day. To minimize this possibility, sleep for a random + # number of seconds (with a maximum wait of maxwait seconds set below) + # before performing the directory existence check + maxwait=30 + sleep_duration_secs=$((RANDOM % maxwait)) + echo "Sleeping for $duration seconds..." + sleep "${sleep_duration_secs}s" + if [[ -d "${mrms_day_dir_raw}" ]]; then - echo "${OBTYPE} directory for day ${vyyyymmdd} exists on disk:" - echo " mrms_day_dir_proc = \"${mrms_day_dir_proc}\"" + echo "${OBTYPE} raw daily directory for day ${vyyyymmdd} exists on disk:" + echo " mrms_day_dir_raw = \"${mrms_day_dir_raw}\"" echo "This means MRMS files for all hours of the current valid day (${vyyyymmdd}) have been or are being retrieved." echo "Thus, we will NOT attempt to retrieve MRMS data for the current valid time from remote locations." @@ -650,7 +661,7 @@ echo "ihh = ${ihh}" # elif [[ ${OBTYPE} == "NDAS" ]]; then - # Calculate valid date plus 1 hour. This is needed because we need to + # Calculate valid date plus 1 hour. This is needed because we need to # check whether this date corresponds to one of the valid hours-of-day # 00, 06, 12, and 18 on which the NDAS archives are provided. unix_vdate_p1h=$($DATE_UTIL -d "${unix_init_DATE} $((current_fcst+1)) hours" "+%Y-%m-%d %H:00:00") @@ -675,7 +686,7 @@ echo "vdate_p1h = ${vdate_p1h}" # Name of the NDAS prepbufr file for the current valid time that will # appear in the processed daily subdirectory after this script finishes. - # This is the name of the processed file. Note that this is not the + # This is the name of the processed file. Note that this is not the # same as the name of the raw file, i.e. the file extracted from the # archive (tar) file retrieved below by the retrieve_data.py script. ndas_fn="prepbufr.ndas.${vdate}" @@ -695,7 +706,7 @@ echo "processed_fp_list = |${processed_fp_list[@]}" # Check if the processed NDAS prepbufr file for the current valid time # already exists on disk. If so, skip this valid time and go to the next - # one. + # one. if [[ -f "${ndas_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" @@ -764,6 +775,17 @@ echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" # from the archive (tar) files. If so, skip the retrieval process. If # not, proceed to retrieve the archive file, extract the prepbufr files # from it, and place them in the raw daily directory. + # + # Note that despite the check on the existence of the raw daily directory + # below, it is possible for two get_obs_mrms tasks to try to retrieve + # obs for the same day. To minimize this possibility, sleep for a random + # number of seconds (with a maximum wait of maxwait seconds set below) + # before performing the directory existence check + maxwait=30 + sleep_duration_secs=$((RANDOM % maxwait)) + echo "Sleeping for $duration seconds..." + sleep "${sleep_duration_secs}s" + if [[ -d "${ndas_day_dir_raw}" ]]; then print_info_msg " @@ -949,6 +971,7 @@ NDAS data for the current valid time from remote locations." current_fcst=$((${current_fcst} + 1)) done +echo "SSSSSSSSSSSSSSSS" # #----------------------------------------------------------------------- # @@ -956,14 +979,13 @@ done # for this cycle are either being created (by a get_obs_... task for # another cycle) or have already been created (either by this get_obs_... # task or one for another cycle). In case they are still being created, -# make sure they have in fact been created before exiting this script. +# make sure they have in fact been created before exiting this script. # If we don't do this, it is possible for this get_obs_... task to complete # successfully but still have processed obs files for some forecast hours # not yet created, which is undesirable. # #----------------------------------------------------------------------- # -echo echo "HHHHHHHHHHHHHHHH" echo "processed_fp_list = |${processed_fp_list[@]}" num_proc_files=${#processed_fp_list[@]} From 3f1dea1ebd2f4755bf268a78984c5125ec3476af Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 29 Jul 2024 11:51:57 -0600 Subject: [PATCH 028/208] Improvements to jinja2 code to put in dependencies from other cycles. --- parm/wflow/verify_det.yaml | 27 ++++++++++----------------- parm/wflow/verify_ens.yaml | 19 ++++++------------- 2 files changed, 16 insertions(+), 30 deletions(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index f69429bbe4..35358c9b67 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -54,9 +54,8 @@ metatask_GridStat_CCPA_all_accums_all_mems: # last one processes obs at valid times starting with the initial time # of the current cycle's forecast and ending with the last output time # of this forecast that is before the initial time of the next cycle's - # forecast. It will also include a dependency on the PcpCombine_fcst - # task for the current cycle. - taskdep_pcpcombine_obs_fcst: + # forecast. + taskdep_pcpcombine_obs: attrs: task: '{%- set num_cycl_dep = (workflow.FCST_LEN_HRS/workflow.INCR_CYCL_FREQ)|round(0,"ceil")|int %} {%- set num_cycl_dep = num_cycl_dep %} @@ -64,24 +63,18 @@ metatask_GridStat_CCPA_all_accums_all_mems: {%- set cycl_offset = n*workflow.INCR_CYCL_FREQ %} {%- if n == 0 %} {{- "run_MET_PcpCombine_obs_APCP#ACCUM_HH#h\" cycle_offset=\"%02d:00:00\"/>\n" % cycl_offset }} - {%- elif ((n > 0) and (n < num_cycl_dep-1)) %} - {{- " \n"}} - {{- " \n"}} - {{- " \n" % cycl_offset}} - {{- " \n"}} - {{- " \n" % cycl_offset}} - {{- " \n"}} - {%- elif (n == num_cycl_dep-1) %} - {{- " \n"}} - {{- " \n"}} - {{- " \n" % cycl_offset }} - {{- " \n"}} + {%- else %} + {{- " \n" }} + {{- " \n" % cycl_offset }} {{- " \n" % cycl_offset }} {{- " \n" }} {%- endif %} {%- endfor %} - {{- " \n" }} + {{- " \n" % cycl_offset }} - {%- elif ((n > 0) and (n < num_cycl_dep-1)) %} - {{- " \n"}} - {{- " \n"}} - {{- " \n" % cycl_offset}} - {{- " \n"}} - {{- " \n" % cycl_offset}} - {{- " \n"}} - {%- elif (n == num_cycl_dep-1) %} - {{- " \n"}} - {{- " \n"}} - {{- " \n" % cycl_offset }} - {{- " \n"}} + {%- else %} + {{- " \n" }} + {{- " \n" % cycl_offset }} {{- " \n" % cycl_offset }} {{- " \n" }} {%- endif %} - {%- endfor %}' + {%- endfor %} + {{- " \n" }} + {{- " Date: Tue, 30 Jul 2024 17:47:56 -0600 Subject: [PATCH 029/208] Bug fix. --- scripts/exregional_get_verif_obs.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index a2759f7ef5..314273ba93 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -556,7 +556,7 @@ echo "ihh = ${ihh}" # before performing the directory existence check maxwait=30 sleep_duration_secs=$((RANDOM % maxwait)) - echo "Sleeping for $duration seconds..." + echo "Sleeping for ${sleep_duration_secs} seconds..." sleep "${sleep_duration_secs}s" if [[ -d "${mrms_day_dir_raw}" ]]; then @@ -783,7 +783,7 @@ echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" # before performing the directory existence check maxwait=30 sleep_duration_secs=$((RANDOM % maxwait)) - echo "Sleeping for $duration seconds..." + echo "Sleeping for ${sleep_duration_secs} seconds..." sleep "${sleep_duration_secs}s" if [[ -d "${ndas_day_dir_raw}" ]]; then From f9af954a305debedfd4305f62e8b10902dbb95e5 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 15 Aug 2024 10:39:29 -0600 Subject: [PATCH 030/208] Minor typo fix. --- jobs/JREGIONAL_GET_VERIF_OBS | 1 - 1 file changed, 1 deletion(-) diff --git a/jobs/JREGIONAL_GET_VERIF_OBS b/jobs/JREGIONAL_GET_VERIF_OBS index 3820a739db..65377ddde2 100755 --- a/jobs/JREGIONAL_GET_VERIF_OBS +++ b/jobs/JREGIONAL_GET_VERIF_OBS @@ -55,7 +55,6 @@ This is the J-job script for the task that checks, pulls, and stages observation data for verification purposes. ========================================================================" -# # #----------------------------------------------------------------------- # From f81cd1cab8922930b2d908bb6c141af8cdf10318 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 15 Aug 2024 10:49:21 -0600 Subject: [PATCH 031/208] Add workflow configuration options for whether or not to remove raw obs files. --- parm/wflow/verify_pre.yaml | 4 ++++ ush/config_defaults.yaml | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index c99bd4b4e6..2357c6bc5e 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -29,6 +29,7 @@ task_get_obs_ccpa: ACCUM_HH: '01' OBS_DIR: '&CCPA_OBS_DIR;' OBTYPE: 'CCPA' + REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_CCPA }}' FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' @@ -42,6 +43,7 @@ task_get_obs_nohrsc: <<: *default_vars OBS_DIR: '&NOHRSC_OBS_DIR;' OBTYPE: 'NOHRSC' + REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_NOHRSC }}' FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' @@ -56,6 +58,7 @@ task_get_obs_mrms: OBS_DIR: '&MRMS_OBS_DIR;' OBTYPE: 'MRMS' VAR: 'REFC RETOP' + REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_MRMS }}' FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' @@ -69,6 +72,7 @@ task_get_obs_ndas: <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' + REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_NDAS }}' FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index c9c0fc7cb8..ceccd71277 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -324,6 +324,24 @@ platform: # #----------------------------------------------------------------------- # + # REMOVE_RAW_OBS_DIRS_[CCPA|MRMS|NDAS|NOHRSC]: + # Boolean flag specifying whether to remove the "raw" observation + # directories after pulling the specified type of obs (CCPA, MRMS, + # NDAS, or NOHRSC). The raw directories are the ones in which the + # observation files are placed immediately after pulling them from + # a data store (e.g. NOAA's HPSS) but before performing any processing + # on them (e.g. renaming the files or reorganizing their directory + # structure). + # + #----------------------------------------------------------------------- + # + REMOVE_RAW_OBS_DIRS_CCPA: true + REMOVE_RAW_OBS_DIRS_MRMS: true + REMOVE_RAW_OBS_DIRS_NDAS: true + REMOVE_RAW_OBS_DIRS_NOHRSC: true + # + #----------------------------------------------------------------------- + # # DOMAIN_PREGEN_BASEDIR: # The base directory containing pregenerated grid, orography, and surface # climatology files. This is an alternative for setting GRID_DIR, From 01e87b8b2f22e21e55c1131c1d11040860a3c8ce Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 16 Aug 2024 11:41:50 -0600 Subject: [PATCH 032/208] Make the names of the deterministic and ensemble vx configuration files user-specifiable. --- scripts/exregional_run_met_genensprod_or_ensemblestat.sh | 4 +--- scripts/exregional_run_met_gridstat_or_pointstat_vx.sh | 4 +--- .../exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh | 4 +--- .../exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh | 4 +--- ush/config_defaults.yaml | 7 +++++++ 5 files changed, 11 insertions(+), 12 deletions(-) diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 5003047f4f..5034369851 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -305,9 +305,7 @@ metplus_log_fn="metplus.log.${metplus_log_bn}" # #----------------------------------------------------------------------- # -det_or_ens="ens" -vx_config_fn="vx_config_${det_or_ens}.yaml" -vx_config_fp="${METPLUS_CONF}/${vx_config_fn}" +vx_config_fp="${METPLUS_CONF}/${VX_CONFIG_ENS_FN}" vx_config_dict=$(<"${vx_config_fp}") # Indent each line of vx_config_dict so that it is aligned properly when # included in the yaml-formatted variable "settings" below. diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 91c5a7896b..e54dd7b553 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -292,9 +292,7 @@ metplus_log_fn="metplus.log.${metplus_log_bn}" # #----------------------------------------------------------------------- # -det_or_ens="det" -vx_config_fn="vx_config_${det_or_ens}.yaml" -vx_config_fp="${METPLUS_CONF}/${vx_config_fn}" +vx_config_fp="${METPLUS_CONF}/${VX_CONFIG_DET_FN}" vx_config_dict=$(<"${vx_config_fp}") # Indent each line of vx_config_dict so that it is aligned properly when # included in the yaml-formatted variable "settings" below. diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index 6e4a4ff33f..f08c002d5f 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -250,9 +250,7 @@ metplus_log_fn="metplus.log.${metplus_log_bn}" # #----------------------------------------------------------------------- # -det_or_ens="ens" -vx_config_fn="vx_config_${det_or_ens}.yaml" -vx_config_fp="${METPLUS_CONF}/${vx_config_fn}" +vx_config_fp="${METPLUS_CONF}/${VX_CONFIG_ENS_FN}" vx_config_dict=$(<"${vx_config_fp}") # Indent each line of vx_config_dict so that it is aligned properly when # included in the yaml-formatted variable "settings" below. diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 924d321ec3..5952ed3785 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -249,9 +249,7 @@ metplus_log_fn="metplus.log.${metplus_log_bn}" # #----------------------------------------------------------------------- # -det_or_ens="ens" -vx_config_fn="vx_config_${det_or_ens}.yaml" -vx_config_fp="${METPLUS_CONF}/${vx_config_fn}" +vx_config_fp="${METPLUS_CONF}/${VX_CONFIG_ENS_FN}" vx_config_dict=$(<"${vx_config_fp}") # Indent each line of vx_config_dict so that it is aligned properly when # included in the yaml-formatted variable "settings" below. diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index ceccd71277..e564444b49 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2555,6 +2555,13 @@ verification: # be run. # NUM_MISSING_FCST_FILES_MAX: 0 + # + # Names of configuration files for deterministic and ensemble vx that + # specify the field groups, field names, levels, and (if applicable) + # thresholds for which to run verification. + # + VX_CONFIG_DET_FN: 'vx_config_det.yaml' + VX_CONFIG_ENS_FN: 'vx_config_ens.yaml' #---------------------------- # CPL_AQM config parameters From 21f7b691af429623c51d001e6de91085bec9672f Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 26 Aug 2024 10:45:09 -0600 Subject: [PATCH 033/208] Add new files. --- ush/bash_utils/ceil.sh | 122 +++++++++++ ush/get_obs_ccpa.sh | 454 +++++++++++++++++++++++++++++++++++++++++ ush/get_obs_mrms.sh | 260 +++++++++++++++++++++++ ush/get_obs_ndas.sh | 305 +++++++++++++++++++++++++++ 4 files changed, 1141 insertions(+) create mode 100644 ush/bash_utils/ceil.sh create mode 100755 ush/get_obs_ccpa.sh create mode 100755 ush/get_obs_mrms.sh create mode 100755 ush/get_obs_ndas.sh diff --git a/ush/bash_utils/ceil.sh b/ush/bash_utils/ceil.sh new file mode 100644 index 0000000000..dc8a21c90d --- /dev/null +++ b/ush/bash_utils/ceil.sh @@ -0,0 +1,122 @@ +# +#----------------------------------------------------------------------- +# +# This function returns the ceiling of the quotient of two numbers. The +# ceiling of a number is the number rounded up to the nearest integer. +# +#----------------------------------------------------------------------- +# +function ceil() { +# +#----------------------------------------------------------------------- +# +# Save current shell options (in a global array). Then set new options +# for this script/function. +# +#----------------------------------------------------------------------- +# + { save_shell_opts; . ${USHdir}/preamble.sh; } > /dev/null 2>&1 +# +#----------------------------------------------------------------------- +# +# Get the full path to the file in which this script/function is located +# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in +# which the file is located (scrfunc_dir). +# +#----------------------------------------------------------------------- +# + local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) + local scrfunc_fn=$( basename "${scrfunc_fp}" ) + local scrfunc_dir=$( dirname "${scrfunc_fp}" ) +# +#----------------------------------------------------------------------- +# +# Get the name of this function. +# +#----------------------------------------------------------------------- +# + local func_name="${FUNCNAME[0]}" +# +#----------------------------------------------------------------------- +# +# Check number of arguments. +# +#----------------------------------------------------------------------- +# + if [ "$#" -ne 2 ]; then + + print_err_msg_exit " +Incorrect number of arguments specified: + + Function name: \"${func_name}\" + Number of arguments specified: $# + +Usage: + + ${func_name} numer denom + +where denom is a nonnegative integer and denom is a positive integer. +" + + fi +# +#----------------------------------------------------------------------- +# +# Make sure arguments are of the right form. +# +#----------------------------------------------------------------------- +# + local numer="$1" + local denom="$2" + + if ! [[ "${numer}" =~ ^[0-9]+$ ]]; then + print_err_msg_exit " +The first argument to the \"${func_name}\" function (numer) must be a nonnegative +integer but isn't: + numer = ${numer} +" + fi + + if [[ "${denom}" -eq 0 ]]; then + print_err_msg_exit " +The second argument to the \"${func_name}\" function (denom) cannot be zero: + denom = ${denom} +" + fi + + if ! [[ "${denom}" =~ ^[0-9]+$ ]]; then + print_err_msg_exit " +The second argument to the \"${func_name}\" function (denom) must be a positive +integer but isn't: + denom = ${denom} +" + fi +# +#----------------------------------------------------------------------- +# +# Let ceil(a,b) denote the ceiling of the quotient of a and b. It can be +# shown that for two positive integers a and b, we have: +# +# ceil(a,b) = floor((a+b-1)/b) +# +# where floor(a,b) is the integer obtained by rounding the quotient of +# a and b (i.e. a/b) down to the nearest integer. Since in bash a +# division returns only the integer part of the result, it is effectively +# the floor function. Thus the following. +# +#----------------------------------------------------------------------- +# + result=$(( (numer+denom-1)/denom )) + print_info_msg "${result}" +# +#----------------------------------------------------------------------- +# +# Restore the shell options saved at the beginning of this script/func- +# tion. +# +#----------------------------------------------------------------------- +# + { restore_shell_opts; } > /dev/null 2>&1 + +} + diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh new file mode 100755 index 0000000000..5cbf6638c6 --- /dev/null +++ b/ush/get_obs_ccpa.sh @@ -0,0 +1,454 @@ +#!/usr/bin/env bash + +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +. $USHdir/source_util_funcs.sh +source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} + +set -u +set -x +# +#----------------------------------------------------------------------- +# +# This script performs several important tasks for preparing data for +# verification tasks. Depending on the value of the environment variable +# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data +# set. +# +# If data is not available on disk (in the location specified by +# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), +# the script attempts to retrieve the data from HPSS using the retrieve_data.py +# script. Depending on the data set, there are a few strange quirks and/or +# bugs in the way data is organized; see in-line comments for details. +# +# +# CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs +# ---------- +# If data is available on disk, it must be in the following +# directory structure and file name conventions expected by verification +# tasks: +# +# {CCPA_OBS_DIR}/{YYYYMMDD}/ccpa.t{HH}z.01h.hrap.conus.gb2 +# +# If data is retrieved from HPSS, it will be automatically staged by this +# script. +# +# Notes about the data and how it's used for verification: +# +# 1. Accumulation is currently hardcoded to 01h. The verification will +# use MET/pcp-combine to sum 01h files into desired accumulations. +# +# 2. There is a problem with the valid time in the metadata for files +# valid from 19 - 00 UTC (or files under the '00' directory). This is +# accounted for in this script for data retrieved from HPSS, but if you +# have manually staged data on disk you should be sure this is accounted +# for. See in-line comments below for details. +# +#----------------------------------------------------------------------- +# + +# +#----------------------------------------------------------------------- +# +# Below, we will use the retrieve_data.py script to retrieve the CCPA +# grib2 file from a data store (e.g. HPSS). Before doing so, note the +# following: +# +# * The daily archive (tar) file containing CCPA obs has a name of the +# form +# +# [PREFIX].YYYYMMDD.tar +# +# where YYYYMMDD is a given year, month, and day combination, and +# [PREFIX] is a string that is not relevant to the discussion here +# (the value it can take on depends on which of several time periods +# YYYYMMDD falls in, and the retrieve_data.py tries various values +# until it finds one for which a tar file exists). Unintuitively, this +# archive file contains accumulation data for valid times starting at +# hour 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current +# day (YYYYMMDD). In other words, the valid times of the contents of +# this archive file are shifted back by 6 hours relative to the time +# string appearing in the name of the file. See section "DETAILS..." +# for a detailed description of the directory structure in the CCPA +# archive files. +# +# * We call retrieve_data.py in a temporary cycle-specific subdirectory +# in order to prevent get_obs_ccpa tasks for different cycles from +# clobbering each other's output. We refer to this as the "raw" CCPA +# base directory because it contains files as they are found in the +# archives before any processing by this script. +# +# * In each (cycle-specific) raw base directory, the data is arranged in +# daily subdirectories with the same timing as in the archive (tar) +# files (which are described in the section "DETAILS..." below). In +# particular, each daily subdirectory has the form YYYYMDD, and it may +# contain CCPA grib2 files for accumulations valid at hour 19 of the +# previous day (YYYYMM[DD-1]) to hour 18 of the current day (YYYYMMDD). +# (Data valid at hours 19-23 of the current day (YYYYMMDD) go into the +# daily subdirectory for the next day, i.e. YYYYMM[DD+1].) We refer +# to these as raw daily (sub)directories to distinguish them from the +# processed daily subdirectories under the processed (final) CCPA base +# directory (basedir_proc). +# +# * For a given cycle, some of the valid times at which there is forecast +# output may not have a corresponding file under the raw base directory +# for that cycle. This is because another cycle that overlaps this cycle +# has already obtained the grib2 CCPA file for that valid time and placed +# it in its processed location; as a result, the retrieveal of that grib2 +# file for this cycle is skipped. +# +# * To obtain a more intuitive temporal arrangement of the data in the +# processed CCPA directory structure than the temporal arrangement used +# in the archives and raw directories, we process the raw files such +# that the data in the processed directory structure is shifted forward +# in time 6 hours relative to the data in the archives and raw directories. +# This results in a processed base directory that, like the raw base +# directory, also contains daily subdirectories of the form YYYYMMDD, +# but each such subdirectory may only contain CCPA data at valid hours +# within that day, i.e. at valid times YYYYMMDD[00, 01, ..., 23] (but +# may not contain data that is valid on the previous, next, or any other +# day). +# +# * For data between 20180718 and 20210504, the 01h accumulation data +# (which is the only accumulation we are retrieving) have incorrect +# metadata under the "00" directory in the archive files (meaning for +# hour 00 and hours 19-23, which are the ones in the "00" directory). +# Below, we use wgrib2 to make a correction for this when transferring +# (moving or copying) grib2 files from the raw daily directories to +# the processed daily directories. +# +# +# DETAILS OF DIRECTORY STRUCTURE IN CCPA ARCHIVE (TAR) FILES +# ---------------------------------------------------------- +# +# The daily archive file containing CCPA obs is named +# +# [PREFIX].YYYYMMDD.tar +# +# This file contains accumulation data for valid times starting at hour +# 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current day +# (YYYYMMDD). In particular, when untarred, the daily archive file +# expands into four subdirectories: 00, 06, 12, and 18. The 06, 12, and +# 18 subdirectories contain grib2 files for accumulations valid at or +# below the hour-of-day given by the subdirectory name (and on YYYYMMDD). +# For example, the 06 directory contains data valid at: +# +# * YYYYMMDD[01, 02, 03, 04, 05, 06] for 01h accumulations; +# * YYYYMMDD[03, 06] for 03h accumulations; +# * YYYYMMDD[06] for 06h accumulations. +# +# The valid times for the data in the 12 and 18 subdirectories are +# analogous. However, the 00 subdirectory is different in that it +# contains accumulations at hour 00 on YYYYMMDD as well as ones BEFORE +# this time, i.e. the data for valid times other than YYYYMMDD00 are on +# the PREVIOUS day. Thus, the 00 subdirectory contains data valid at +# (note the DD-1, meaning one day prior): +# +# * YYYYMM[DD-1][19, 20, 21, 22, 23] and YYYYMMDD00 for 01h accumulations; +# * YYYYMM[DD-1][19] and YYYYMMDD00 for 03h accumulations; +# * YYYYMMDD00 for 06h accumulations. +# +#----------------------------------------------------------------------- +# + +# CCPA accumulation period to consider. Here, we only retrieve data for +# 1-hour accumulations. Other accumulations (03h, 06h, 24h) are obtained +# by other tasks in the workflow that add up these hourly values. +accum="01" + +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. +yyyymmdd_task=${PDY} + +# Base directory in which the daily subdirectories containing the CCPA +# grib2 files will appear after this script is done. We refer to this as +# the "processed" base directory because it contains the files after all +# processing by this script is complete. +basedir_proc=${OBS_DIR} + +# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a +# scalar string containing all relevant forecast output times (each) in +# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings +# because in ROCOTO, there doesn't seem to be a way to pass a bash array +# from the XML to task's script. To have an array-valued variable to +# work with, here, we create the new variable output_times_all that is +# the array-valued counterpart of OUTPUT_TIMES_ALL. +output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) + +# List of times (each of the form YYYYMMDDHH) for which there is forecast +# APCP (accumulated precipitation) output for the current day. We start +# constructing this by extracting from the full list of all forecast APCP +# output times (i.e. from all cycles) all elements that contain the current +# task's day (in the form YYYYMMDD). +output_times_crnt_day=($(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}")) +# If the 0th hour of the current day is in this list (and if it is, it +# will be the first element), remove it because for APCP, that time is +# considered part of the previous day (because it represents precipitation +# that occurred during the last hour of the previous day). +if [[ ${output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then + output_times_crnt_day=(${output_times_crnt_day[@]:1}) +fi +# If the 0th hour of the next day (i.e. the day after yyyymmdd_task) is +# one of the output times in the list of all APCP output times, we include +# it in the list for the current day because for APCP, that time is +# considered part of the current day (because it represents precipitation +# that occured during the last hour of the current day). +yyyymmdd00_task_p1d=$(${DATE_UTIL} --date "${yyyymmdd_task} 1 day" +%Y%m%d%H) +if [[ ${output_times_all[@]} =~ ${yyyymmdd00_task_p1d} ]]; then + output_times_crnt_day+=(${yyyymmdd00_task_p1d}) +fi + +# If there are no forecast APCP output times on the day of the current +# task, exit the script. +num_output_times_crnt_day=${#output_times_crnt_day[@]} +if [[ ${num_output_times_crnt_day} -eq 0 ]]; then + print_info_msg " +None of the forecast APCP output times fall in the current day (including +the 0th hour of the next day). Thus, there is no need to retrieve any +obs files." + exit +fi + +# Obs files will be obtained by extracting them from the relevant 6-hourly +# archives. Thus, we need the sequence of archive hours over which to +# loop. In the simplest case, this sequence will be "6 12 18 24". This +# will be the case if the forecast output times include all hours of the +# task's day and if none of the obs files for this day already exist on +# disk. In other cases, the sequence we loop over will be a subset of +# "6 12 18 24". +# +# To generate this sequence, we first set its starting and ending values +# as well as the interval. + +# Sequence interval must be 6 hours because the archives are 6-hourly. +arcv_hr_incr=6 + +# Initial guess for starting archive hour. This is set to the hour +# corresponding to the first forecast output time of the day. +hh_first=$(echo ${output_times_crnt_day[0]} | cut -c9-10) +hr_first=$((10#${hh_first})) +arcv_hr_start=$(ceil ${hr_first} ${arcv_hr_incr}) +arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) + +# Ending archive hour. This is set to the hour corresponding to the last +# forecast output time of the day. +hh_last=$(echo ${output_times_crnt_day[-1]} | cut -c9-10) +hr_last=$((10#${hh_last})) +if [[ ${hr_last} -eq 0 ]]; then + arcv_hr_end=24 +else + arcv_hr_end=$(ceil ${hr_last} ${arcv_hr_incr}) + arcv_hr_end=$(( arcv_hr_end*arcv_hr_incr )) +fi + +# Check whether any obs files already exist on disk. If so, adjust the +# starting archive hour. In the process, keep a count of the number of +# files that already exist on disk. +num_existing_files=0 +for yyyymmddhh in ${output_times_crnt_day[@]}; do + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + day_dir_proc="${basedir_proc}/${yyyymmdd}" + fn_proc="ccpa.t${hh}z.${accum}h.hrap.conus.gb2" + fp_proc="${day_dir_proc}/${fn_proc}" + if [[ -f ${fp_proc} ]]; then + num_existing_files=$((num_existing_files+1)) + print_info_msg " +File already exists on disk: + fp_proc = \"${fp_proc}\"" + else + hr=$((10#${hh})) + arcv_hr_start=$(ceil ${hr} ${arcv_hr_incr}) + arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) + print_info_msg " +File does not exists on disk: + fp_proc = \"${fp_proc}\" +Setting the hour (since 00) of the first archive to retrieve to: + arcv_hr_start = \"${arcv_hr_start}\"" + break + fi +done + +# If the number of obs files that already exist on disk is equal to the +# number of files needed, then there is no need to retrieve any files. +num_needed_files=$((num_output_times_crnt_day)) +if [[ ${num_existing_files} -eq ${num_needed_files} ]]; then + print_info_msg " +All obs files needed for the current day (yyyymmdd_task) already exist +on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any files." + exit +# Otherwise, will need to retrieve files. In this case, set the sequence +# of hours corresponding to the archives from which files will be retrieved. +else + arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) + arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" + print_info_msg " +At least some obs files needed needed for the current day (yyyymmdd_task) +do not exist on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +The number of obs files needed is: + num_needed_files = ${num_needed_files} +The number of obs files that already exist on disk is: + num_existing_files = ${num_existing_files} +Will retrieve remaining files by looping over archives corresponding to +the following hours (since 00 of this day): + arcv_hrs = ${arcv_hrs_str} +" +fi +# +#----------------------------------------------------------------------- +# +# At this point, at least some obs files for the current day need to be +# retrieved. Thus, loop over the relevant archives that contain obs for +# the day given by yyyymmdd_task and retrieve files as needed. +# +#----------------------------------------------------------------------- +# + +# Whether to move or copy files from raw to processed directories. +#mv_or_cp="mv" +mv_or_cp="cp" +# If the raw directories and files are to be removed at the end of this +# script, no need to copy the files since the raw directories are going +# to be removed anyway. +if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then + mv_or_cp="mv" +fi + +# Base directory that will contain the daily subdirectories in which the +# CCPA grib2 files retrieved from archive (tar) files will be placed. +# We refer to this as the "raw" base directory because it contains files +# as they are found in the archives before any processing by this script. +basedir_raw="${basedir_proc}/${yyyymmdd_task}/raw" + +for arcv_hr in ${arcv_hrs[@]}; do + + print_info_msg " +arcv_hr = ${arcv_hr}" + + # Calculate the time information for the current archive. + yyyymmddhh_arcv=$(${DATE_UTIL} --date "${yyyymmdd_task} ${arcv_hr} hours" +%Y%m%d%H) + yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) + hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) + + # Directory that will contain the CCPA grib2 files retrieved from the + # current 6-hourly archive file. We refer to this as the "raw" quarter- + # daily directory because it will contain the files as they are in the + # archive before any processing by this script. + qrtrday_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" + + # Check whether any of the forecast APCP output times for the day associated + # with this task fall in the time interval spanned by the current archive. + # If so, set the flag (do_retrieve) to retrieve the files in the current + # archive. + yyyymmddhh_qrtrday_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 5 hours ago" +%Y%m%d%H) + yyyymmddhh_qrtrday_end=${yyyymmddhh_arcv} + do_retrieve="FALSE" + nout=${#output_times_crnt_day[@]} + for (( i=0; i<${nout}; i++ )); do + output_time=${output_times_crnt_day[i]} + if [[ "${output_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ + [[ "${output_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then + do_retrieve="TRUE" + break + fi + done + + if [[ ${do_retrieve} == "TRUE" ]]; then + + # Make sure the raw quarter-daily directory exists because it is used + # below as the output directory of the retrieve_data.py script (so if + # this directory doesn't already exist, that script will fail). Creating + # this directory also ensures that the raw base directory (basedir_raw) + # exists before we change location to it below. + mkdir -p ${qrtrday_dir_raw} + + # The retrieve_data.py script first extracts the contents of the archive + # file into the directory it was called from and then moves them to the + # specified output location (via the --output_path option). In order to + # avoid other get_obs_ccpa tasks (i.e. those associated with other days) + # from interfering with (clobbering) these files (because extracted files + # from different get_obs_ccpa tasks to have the same names or relative + # paths), we change location to the base raw directory so that files with + # same names are extracted into different directories. + cd ${basedir_raw} + + # Pull CCPA data from HPSS. This will get all 6 obs files in the current + # archive and place them in the raw quarter-daily directory. + cmd=" + python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${yyyymmddhh_arcv} \ + --data_stores hpss \ + --data_type CCPA_obs \ + --output_path ${qrtrday_dir_raw} \ + --summary_file retrieve_data.log" + + print_info_msg "CALLING: ${cmd}" + $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." + + # Create the processed CCPA grib2 files. This usually consists of just + # moving or copying the raw files to their processed location, but for + # times between 20180718 and 20210504 and hours-of-day 19 through the + # end of the day (i.e. hour 0 of the next day), it involves using wgrib2 + # to correct an error in the metadata of the raw file and writing the + # corrected data to a new grib2 file in the processed location. + for hrs_ago in $(seq 5 -1 0); do + yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + if [[ ${output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + fn_raw="ccpa.t${hh}z.${accum}h.hrap.conus.gb2" + fp_raw="${qrtrday_dir_raw}/${fn_raw}" + day_dir_proc="${basedir_proc}/${yyyymmdd}" + mkdir -p ${day_dir_proc} + fn_proc="${fn_raw}" + fp_proc="${day_dir_proc}/${fn_proc}" + hh_noZero=$((10#${hh})) + # CCPA files for 1-hour accumulation have incorrect metadata in the files + # under the "00" directory from 20180718 to 20210504. After the data is + # pulled, reorganize into correct yyyymmdd structure. + if [[ ${yyyymmdd} -ge 20180718 && ${yyyymmdd} -le 20210504 ]] && \ + [[ (${hh_noZero} -ge 19 && ${hh_noZero} -le 23) || (${hh_noZero} -eq 0) ]]; then + wgrib2 ${fp_raw} -set_date -24hr -grib ${fp_proc} -s + else + ${mv_or_cp} ${fp_raw} ${fp_proc} + fi + fi + done + + else + + print_info_msg " +None of the current day's forecast APCP output times fall in the range +spanned by the current 6-hourly archive file. The bounds of the current +archive are: + yyyymmddhh_qrtrday_start = \"${yyyymmddhh_qrtrday_start}\" + yyyymmddhh_qrtrday_end = \"${yyyymmddhh_qrtrday_end}\" +The forecast output times for APCP are: + output_times_crnt_day = ($(printf "\"%s\" " ${output_times_crnt_day[@]}))" + + fi + +done +# +#----------------------------------------------------------------------- +# +# Clean up raw directories. +# +#----------------------------------------------------------------------- +# +if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then + print_info_msg "Removing raw directories and files..." + rm -rf ${basedir_raw} || print_err_msg_exit "\ +Failed to remove raw directories and files." +fi diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh new file mode 100755 index 0000000000..23896bbf38 --- /dev/null +++ b/ush/get_obs_mrms.sh @@ -0,0 +1,260 @@ +#!/usr/bin/env bash + +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +. $USHdir/source_util_funcs.sh +source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} + +set -u +#set -x +# +#----------------------------------------------------------------------- +# +# This script performs several important tasks for preparing data for +# verification tasks. Depending on the value of the environment variable +# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data +# set. +# +# If data is not available on disk (in the location specified by +# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), +# the script attempts to retrieve the data from HPSS using the retrieve_data.py +# script. Depending on the data set, there are a few strange quirks and/or +# bugs in the way data is organized; see in-line comments for details. +# +# +# MRMS (Multi-Radar Multi-Sensor) radar observations +# ---------- +# If data is available on disk, it must be in the following +# directory structure and file name conventions expected by verification +# tasks: +# +# {MRMS_OBS_DIR}/{YYYYMMDD}/[PREFIX]{YYYYMMDD}-{HH}0000.grib2, +# +# Where [PREFIX] is MergedReflectivityQCComposite_00.50_ for reflectivity +# data and EchoTop_18_00.50_ for echo top data. If data is not available +# at the top of the hour, you should rename the file closest in time to +# your hour(s) of interest to the above naming format. A script +# "ush/mrms_pull_topofhour.py" is provided for this purpose. +# +# If data is retrieved from HPSS, it will automatically staged by this +# this script. +# +#----------------------------------------------------------------------- +# + +# Create an array-valued counterpart of MRMS_FIELDS. MRMS_FIELDS is an +# environment variable created in the ROCOTO XML. It is a scalar variable +# because there doesn't seem to be a way to pass a bash array from the +# XML to the task's script. +mrms_fields=($(printf "%s" "${MRMS_FIELDS}")) + +# Loop over the fields (REFC and RETOP) and set the file base name +# corresponding to each. +fields_in_filenames=() +levels_in_filenames=() +for field in ${mrms_fields[@]}; do + # Set field-dependent parameters needed in forming grib2 file names. + if [ "${field}" = "REFC" ]; then + fields_in_filenames+=("MergedReflectivityQCComposite") + levels_in_filenames+=("00.50") + elif [ "${field}" = "RETOP" ]; then + fields_in_filenames+=("EchoTop") + levels_in_filenames+=("18_00.50") + else + print_err_msg_exit "\ +Invalid field specified: + field = \"${field}\" +Valid options are 'REFC', 'RETOP'." + fi +done + +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. +yyyymmdd_task=${PDY} + +# Base directory in which the daily subdirectories containing the MRMS +# grib2 files will appear after this script is done. We refer to this +# as the "processed" base directory because it contains the files after +# all processing by this script is complete. +basedir_proc=${OBS_DIR} + +# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a +# scalar string containing all relevant forecast output times (each) in +# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings +# because in ROCOTO, there doesn't seem to be a way to pass a bash array +# from the XML to task's script. To have an array-valued variable to +# work with, here, we create the new variable output_times_all that is +# the array-valued counterpart of OUTPUT_TIMES_ALL. +output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) + +# List of times (each of the form YYYYMMDDHH) for which there is forecast +# output for the current day. We extract this list from the full list of +# all forecast output times (i.e. from all cycles). +output_times_crnt_day=($(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}")) + +# If there are no forecast output times on the day of the current task, +# exit the script. +num_output_times_crnt_day=${#output_times_crnt_day[@]} +if [[ ${num_output_times_crnt_day} -eq 0 ]]; then + print_info_msg " +None of the forecast output times fall in the current day. Thus, there +is no need to retrieve any obs files." + exit +fi + +# Check whether any obs files already exist on disk. If so, adjust the +# starting archive hour. In the process, keep a count of the number of +# files that already exist on disk. +num_existing_files=0 +num_mrms_fields=${#mrms_fields[@]} +for (( i=0; i<${num_mrms_fields}; i++ )); do + for yyyymmddhh in ${output_times_crnt_day[@]}; do + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + day_dir_proc="${basedir_proc}/${yyyymmdd}" + fn_proc="${fields_in_filenames[$i]}_${levels_in_filenames[$i]}_${yyyymmdd}-${hh}0000.grib2" + fp_proc="${day_dir_proc}/${fn_proc}" + if [[ -f ${fp_proc} ]]; then + num_existing_files=$((num_existing_files+1)) + print_info_msg " +File already exists on disk: + fp_proc = \"${fp_proc}\"" + else + break + fi + done +done + +# If the number of obs files that already exist on disk is equal to the +# number of files needed, then there is no need to retrieve any files. +num_needed_files=$((num_output_times_crnt_day*num_mrms_fields)) +if [[ ${num_existing_files} -eq $((num_needed_files)) ]]; then + print_info_msg " +All obs files needed for the current day (yyyymmdd_task) already exist +on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any files." + exit +# Otherwise, will need to retrieve files. +else + print_info_msg " +At least some obs files needed needed for the current day (yyyymmdd_task) +do not exist on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +The number of obs files needed is: + num_needed_files = ${num_needed_files} +The number of obs files that already exist on disk is: + num_existing_files = ${num_existing_files} +Will retrieve remaining files. +" +fi +# +#----------------------------------------------------------------------- +# +# At this point, at least some obs files for the current day need to be +# retrieved. +# +#----------------------------------------------------------------------- +# + +# Whether to move or copy files from raw to processed directories. +#mv_or_cp="mv" +mv_or_cp="cp" +# If the raw directories and files are to be removed at the end of this +# script, no need to copy the files since the raw directories are going +# to be removed anyway. +if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then + mv_or_cp="mv" +fi + +# Base directory that will contain the daily subdirectories in which the +# MRMS grib2 files retrieved from archive (tar) files will be placed. +# We refer to this as the "raw" base directory because it contains files +# as they are found in the archives before any processing by this script. +basedir_raw="${basedir_proc}/${yyyymmdd_task}/raw" + +# Time associated with the archive. MRMS data have daily archives that +# have the hour-of-day set to "00". +yyyymmddhh_arcv="${yyyymmdd_task}00" + +# Directory that will contain the MRMS grib2 files retrieved from the +# current 6-hourly archive file. We refer to this as the "raw" quarter- +# daily directory because it will contain the files as they are in the +# archive before any processing by this script. +day_dir_raw="${basedir_raw}/${yyyymmdd_task}" + +# Make sure the raw quarter-daily directory exists because it is used +# below as the output directory of the retrieve_data.py script (so if +# this directory doesn't already exist, that script will fail). Creating +# this directory also ensures that the raw base directory (basedir_raw) +# exists before we change location to it below. +mkdir -p ${day_dir_raw} + +# The retrieve_data.py script first extracts the contents of the archive +# file into the directory it was called from and then moves them to the +# specified output location (via the --output_path option). In order to +# avoid other get_obs_ndas tasks (i.e. those associated with other days) +# from interfering with (clobbering) these files (because extracted files +# from different get_obs_ndas tasks to have the same names or relative +# paths), we change location to the base raw directory so that files with +# same names are extracted into different directories. +cd ${basedir_raw} + +# Pull MRMS data from HPSS. This will get all 7 obs files in the current +# archive and place them in the raw quarter-daily directory, although we +# will make use of only 6 of these (we will not use the tm00 file). +cmd=" +python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${yyyymmddhh_arcv} \ + --data_stores hpss \ + --data_type MRMS_obs \ + --output_path ${day_dir_raw} \ + --summary_file retrieve_data.log" + +print_info_msg "CALLING: ${cmd}" +$cmd || print_err_msg_exit "Could not retrieve obs from HPSS." +# +#----------------------------------------------------------------------- +# +# Loop over the 24 hour period starting with the zeroth hour of the day +# associated with this task and ending with the 23rd hour. +# +#----------------------------------------------------------------------- +# + +# Loop through all hours of the day associated with the task. For each +# hour, find the gzipped grib2 file in the raw daily directory that is +# closest in time to this hour. Then gunzip the file and copy it (in the +# process renaming it) to the processed location. +for hr in $(seq 0 1 23); do + yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_task} ${hr} hours" +%Y%m%d%H) + if [[ ${output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + for (( i=0; i<${num_mrms_fields}; i++ )); do + python ${USHdir}/mrms_pull_topofhour.py \ + --valid_time ${yyyymmddhh} \ + --outdir ${basedir_proc} \ + --source ${basedir_raw} \ + --product ${fields_in_filenames[$i]} + done + fi +done +# +#----------------------------------------------------------------------- +# +# Clean up raw directories. +# +#----------------------------------------------------------------------- +# +if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then + print_info_msg "Removing raw directories and files..." + rm -rf ${mrms_basedir_raw} || print_err_msg_exit "\ +Failed to remove raw directories and files." +fi diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh new file mode 100755 index 0000000000..d98f390c8b --- /dev/null +++ b/ush/get_obs_ndas.sh @@ -0,0 +1,305 @@ +#!/usr/bin/env bash + +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +. $USHdir/source_util_funcs.sh +source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} + +set -u +#set -x +# +#----------------------------------------------------------------------- +# +# This script performs several important tasks for preparing data for +# verification tasks. Depending on the value of the environment variable +# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data +# set. +# +# If data is not available on disk (in the location specified by +# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), +# the script attempts to retrieve the data from HPSS using the retrieve_data.py +# script. Depending on the data set, there are a few strange quirks and/or +# bugs in the way data is organized; see in-line comments for details. +# +# +# NDAS (NAM Data Assimilation System) conventional observations +# ---------- +# If data is available on disk, it must be in the following +# directory structure and file name conventions expected by verification +# tasks: +# +# {NDAS_OBS_DIR}/{YYYYMMDD}/prepbufr.ndas.{YYYYMMDDHH} +# +# Note that data retrieved from HPSS and other sources may be in a +# different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is +# either 00, 06, 12, or 18, and prevhour is the number of hours prior to +# hh (00 through 05). If using custom staged data, you will have to +# rename the files accordingly. +# +# If data is retrieved from HPSS, it will be automatically staged by this +# this script. +# +#----------------------------------------------------------------------- +# + +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. +yyyymmdd_task=${PDY} + +# Base directory in which the daily subdirectories containing the NDAS +# prepbufr files will appear after this script is done. We refer to this +# as the "processed" base directory because it contains the files after +# all processing by this script is complete. +basedir_proc=${OBS_DIR} + +# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a +# scalar string containing all relevant forecast output times (each) in +# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings +# because in ROCOTO, there doesn't seem to be a way to pass a bash array +# from the XML to task's script. To have an array-valued variable to +# work with, here, we create the new variable output_times_all that is +# the array-valued counterpart of OUTPUT_TIMES_ALL. +output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) + +# List of times (each of the form YYYYMMDDHH) for which there is forecast +# output for the current day. We extract this list from the full list of +# all forecast output times (i.e. from all cycles). +output_times_crnt_day=($(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}")) + +# If there are no forecast output times on the day of the current task, +# exit the script. +num_output_times_crnt_day=${#output_times_crnt_day[@]} +if [[ ${num_output_times_crnt_day} -eq 0 ]]; then + print_info_msg " +None of the forecast output times fall in the current day. Thus, there +is no need to retrieve any obs files." + exit +fi + +# Obs files will be obtained by extracting them from the relevant 6-hourly +# archives. Thus, we need the sequence of archive hours over which to +# loop. In the simplest case, this sequence will be "6 12 18 24". This +# will be the case if the forecast output times include all hours of the +# task's day and if none of the obs files for this day already exist on +# disk. In other cases, the sequence we loop over will be a subset of +# "6 12 18 24". +# +# To generate this sequence, we first set its starting and ending values +# as well as the interval. + +# Sequence interval must be 6 hours because the archives are 6-hourly. +arcv_hr_incr=6 + +# Initial guess for starting archive hour. This is set to the hour +# corresponding to the first forecast output time of the day. +hh_first=$(echo ${output_times_crnt_day[0]} | cut -c9-10) +hr_first=$((10#${hh_first})) +arcv_hr_start=$(( (hr_first/arcv_hr_incr + 1)*arcv_hr_incr )) + +# Ending archive hour. This is set to the hour corresponding to the last +# forecast output time of the day. +hh_last=$(echo ${output_times_crnt_day[-1]} | cut -c9-10) +hr_last=$((10#${hh_last})) +arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) + +# Check whether any obs files already exist on disk. If so, adjust the +# starting archive hour. In the process, keep a count of the number of +# files that already exist on disk. +num_existing_files=0 +for yyyymmddhh in ${output_times_crnt_day[@]}; do + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + day_dir_proc="${basedir_proc}" + fn_proc="prepbufr.ndas.${yyyymmddhh}" + fp_proc="${day_dir_proc}/${fn_proc}" + if [[ -f ${fp_proc} ]]; then + num_existing_files=$((num_existing_files+1)) + print_info_msg " +File already exists on disk: + fp_proc = \"${fp_proc}\"" + else + hr=$((10#${hh})) + arcv_hr_start=$(( (hr/arcv_hr_incr + 1)*arcv_hr_incr )) + print_info_msg " +File does not exists on disk: + fp_proc = \"${fp_proc}\" +Setting the hour (since 00) of the first archive to retrieve to: + arcv_hr_start = \"${arcv_hr_start}\"" + break + fi +done + +# If the number of obs files that already exist on disk is equal to the +# number of files needed, then there is no need to retrieve any files. +num_needed_files=$((num_output_times_crnt_day)) +if [[ ${num_existing_files} -eq ${num_needed_files} ]]; then + print_info_msg " +All obs files needed for the current day (yyyymmdd_task) already exist +on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any files." + exit +# Otherwise, will need to retrieve files. In this case, set the sequence +# of hours corresponding to the archives from which files will be retrieved. +else + arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) + arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" + print_info_msg " +At least some obs files needed needed for the current day (yyyymmdd_task) +do not exist on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +The number of obs files needed is: + num_needed_files = ${num_needed_files} +The number of obs files that already exist on disk is: + num_existing_files = ${num_existing_files} +Will retrieve remaining files by looping over archives corresponding to +the following hours (since 00 of this day): + arcv_hrs = ${arcv_hrs_str} +" +fi +# +#----------------------------------------------------------------------- +# +# At this point, at least some obs files for the current day need to be +# retrieved. Thus, loop over the relevant archives that contain obs for +# the day given by yyyymmdd_task and retrieve files as needed. +# +#----------------------------------------------------------------------- +# + +# Whether to move or copy files from raw to processed directories. +#mv_or_cp="mv" +mv_or_cp="cp" +# If the raw directories and files are to be removed at the end of this +# script, no need to copy the files since the raw directories are going +# to be removed anyway. +if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then + mv_or_cp="mv" +fi + +# Base directory that will contain the daily subdirectories in which the +# NDAS prepbufr files retrieved from archive (tar) files will be placed. +# We refer to this as the "raw" base directory because it contains files +# as they are found in the archives before any processing by this script. +basedir_raw="${basedir_proc}/raw_${yyyymmdd_task}" + +for arcv_hr in ${arcv_hrs[@]}; do + + print_info_msg " +arcv_hr = ${arcv_hr}" + + # Calculate the time information for the current archive. + yyyymmddhh_arcv=$(${DATE_UTIL} --date "${yyyymmdd_task} ${arcv_hr} hours" +%Y%m%d%H) + yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) + hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) + + # Directory that will contain the NDAS prepbufr files retrieved from the + # current 6-hourly archive file. We refer to this as the "raw" quarter- + # daily directory because it will contain the files as they are in the + # archive before any processing by this script. + qrtrday_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" + + # Check whether any of the forecast output times for the day associated + # with this task fall in the time interval spanned by the current archive. + # If so, set the flag (do_retrieve) to retrieve the files in the current + # archive. + yyyymmddhh_qrtrday_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 6 hours ago" +%Y%m%d%H) + yyyymmddhh_qrtrday_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 1 hours ago" +%Y%m%d%H) + do_retrieve="FALSE" + nout=${#output_times_crnt_day[@]} + for (( i=0; i<${nout}; i++ )); do + output_time=${output_times_crnt_day[i]} + if [[ "${output_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ + [[ "${output_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then + do_retrieve="TRUE" + break + fi + done + + if [[ ${do_retrieve} == "TRUE" ]]; then + + # Make sure the raw quarter-daily directory exists because it is used + # below as the output directory of the retrieve_data.py script (so if + # this directory doesn't already exist, that script will fail). Creating + # this directory also ensures that the raw base directory (basedir_raw) + # exists before we change location to it below. + mkdir -p ${qrtrday_dir_raw} + + # The retrieve_data.py script first extracts the contents of the archive + # file into the directory it was called from and then moves them to the + # specified output location (via the --output_path option). In order to + # avoid other get_obs_ndas tasks (i.e. those associated with other days) + # from interfering with (clobbering) these files (because extracted files + # from different get_obs_ndas tasks to have the same names or relative + # paths), we change location to the base raw directory so that files with + # same names are extracted into different directories. + cd ${basedir_raw} + + # Pull NDAS data from HPSS. This will get all 7 obs files in the current + # archive and place them in the raw quarter-daily directory, although we + # will make use of only 6 of these (we will not use the tm00 file). + cmd=" + python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${yyyymmddhh_arcv} \ + --data_stores hpss \ + --data_type NDAS_obs \ + --output_path ${qrtrday_dir_raw} \ + --summary_file retrieve_data.log" + + print_info_msg "CALLING: ${cmd}" + $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." + + # Create the processed NDAS prepbufr files. This consists of simply + # copying or moving (and in the process renaming) them from the raw + # quarter-daily directory to the processed directory. Note that the + # tm06 files contain more/better observations than tm00 for the + # equivalent time, so we use those. + for hrs_ago in $(seq --format="%02g" 6 -1 1); do + yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + if [[ ${output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + fn_raw="nam.t${hh_arcv}z.prepbufr.tm${hrs_ago}.nr" + fp_raw="${qrtrday_dir_raw}/${fn_raw}" + day_dir_proc="${basedir_proc}" + mkdir -p ${day_dir_proc} + fn_proc="prepbufr.ndas.${yyyymmddhh}" + fp_proc="${day_dir_proc}/${fn_proc}" + ${mv_or_cp} ${fp_raw} ${fp_proc} + fi + done + + else + + print_info_msg " +None of the current day's forecast output times fall in the range spanned +by the current 6-hourly archive file. The bounds of the current archive +are: + yyyymmddhh_qrtrday_start = \"${yyyymmddhh_qrtrday_start}\" + yyyymmddhh_qrtrday_end = \"${yyyymmddhh_qrtrday_end}\" +The forecast output times are: + output_times_crnt_day = ($(printf "\"%s\" " ${output_times_crnt_day[@]}))" + + fi + +done +# +#----------------------------------------------------------------------- +# +# Clean up raw directories. +# +#----------------------------------------------------------------------- +# +if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then + print_info_msg "Removing raw directories and files..." + rm -rf ${basedir_raw} || print_err_msg_exit "\ +Failed to remove raw directories and files." +fi From e14b1b8e4485666594894fe9501da7f29e0a4df7 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 16:52:46 -0600 Subject: [PATCH 034/208] Bug fixes to get_obs_... tasks. --- ush/get_obs_ccpa.sh | 15 ++++++++++----- ush/get_obs_mrms.sh | 11 ++++++++--- ush/get_obs_ndas.sh | 11 ++++++++--- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index 5cbf6638c6..ef1d55eb05 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -185,12 +185,16 @@ output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) # constructing this by extracting from the full list of all forecast APCP # output times (i.e. from all cycles) all elements that contain the current # task's day (in the form YYYYMMDD). -output_times_crnt_day=($(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}")) +output_times_crnt_day=() +if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fi # If the 0th hour of the current day is in this list (and if it is, it # will be the first element), remove it because for APCP, that time is # considered part of the previous day (because it represents precipitation # that occurred during the last hour of the previous day). -if [[ ${output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then +if [[ ${#output_times_crnt_day[@]} -gt 0 ]] && \ + [[ ${output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then output_times_crnt_day=(${output_times_crnt_day[@]:1}) fi # If the 0th hour of the next day (i.e. the day after yyyymmdd_task) is @@ -208,9 +212,10 @@ fi num_output_times_crnt_day=${#output_times_crnt_day[@]} if [[ ${num_output_times_crnt_day} -eq 0 ]]; then print_info_msg " -None of the forecast APCP output times fall in the current day (including -the 0th hour of the next day). Thus, there is no need to retrieve any -obs files." +None of the forecast APCP output times fall within the day (including the +0th hour of the next day) associated with the current task (yyyymmdd_task): + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any obs files." exit fi diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index 23896bbf38..92fc24fa56 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -95,15 +95,20 @@ output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) # List of times (each of the form YYYYMMDDHH) for which there is forecast # output for the current day. We extract this list from the full list of # all forecast output times (i.e. from all cycles). -output_times_crnt_day=($(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}")) +output_times_crnt_day=() +if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fi # If there are no forecast output times on the day of the current task, # exit the script. num_output_times_crnt_day=${#output_times_crnt_day[@]} if [[ ${num_output_times_crnt_day} -eq 0 ]]; then print_info_msg " -None of the forecast output times fall in the current day. Thus, there -is no need to retrieve any obs files." +None of the forecast output times fall within the day associated with the +current task (yyyymmdd_task): + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any obs files." exit fi diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index d98f390c8b..441de7b31d 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -69,15 +69,20 @@ output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) # List of times (each of the form YYYYMMDDHH) for which there is forecast # output for the current day. We extract this list from the full list of # all forecast output times (i.e. from all cycles). -output_times_crnt_day=($(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}")) +output_times_crnt_day=() +if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fi # If there are no forecast output times on the day of the current task, # exit the script. num_output_times_crnt_day=${#output_times_crnt_day[@]} if [[ ${num_output_times_crnt_day} -eq 0 ]]; then print_info_msg " -None of the forecast output times fall in the current day. Thus, there -is no need to retrieve any obs files." +None of the forecast output times fall within the day associated with the +current task (yyyymmdd_task): + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any obs files." exit fi From 609b4e99efbc06463329cb3d8348219c7843fc5c Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 17:02:09 -0600 Subject: [PATCH 035/208] Change paths to archive files to make retrieve_data.py work with new get_obs_...sh scripts. --- parm/data_locations.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/parm/data_locations.yml b/parm/data_locations.yml index dd3b5ddd17..a3712a1972 100644 --- a/parm/data_locations.yml +++ b/parm/data_locations.yml @@ -305,10 +305,7 @@ CCPA_obs: - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} archive_internal_dir: - - "./00" - - "./06" - - "./12" - - "./18" + - "./{hh}" archive_file_names: - "com2_ccpa_prod_ccpa.{yyyy}{mm}{dd}.tar" - "gpfs_dell1_nco_ops_com_ccpa_prod_ccpa.{yyyy}{mm}{dd}.tar" @@ -316,7 +313,7 @@ CCPA_obs: - "com_ccpa_v4.2_ccpa.{yyyy}{mm}{dd}.tar" file_names: obs: - - "ccpa.t{hh}z.01h.hrap.conus.gb2" + - "ccpa.t*z.01h.hrap.conus.gb2" MRMS_obs: hpss: From ed6b6771aa105cd9df5f1cc89acef02934e79dd7 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 17:21:19 -0600 Subject: [PATCH 036/208] Move most of the code for getting obs files out of the ex-script exregional_get_verif_obs.sh since those are now in the ush/get_obs_[ccpa|mrms|ndas|nohrsc].sh scripts. --- scripts/exregional_get_verif_obs.sh | 912 +--------------------------- 1 file changed, 19 insertions(+), 893 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 314273ba93..a055fc0bc4 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -18,7 +18,6 @@ source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} #----------------------------------------------------------------------- # { save_shell_opts; . $USHdir/preamble.sh; } > /dev/null 2>&1 -set -x # #----------------------------------------------------------------------- # @@ -108,908 +107,35 @@ set -x # # If data is retrieved from HPSS, it will automatically staged by this # this script. - -#----------------------------------------------------------------------- -# Create and enter top-level obs directory (so temporary data from HPSS won't collide with other tasks) -mkdir -p ${OBS_DIR} -cd ${OBS_DIR} - -# Set log file for retrieving obs -logfile=retrieve_data.log - -# PDY and cyc are defined in rocoto XML...they are the yyyymmdd and hh for initial forecast hour respectively -iyyyy=$(echo ${PDY} | cut -c1-4) -imm=$(echo ${PDY} | cut -c5-6) -idd=$(echo ${PDY} | cut -c7-8) -ihh=${cyc} - -echo -echo "HELLO AAAAAAAAAAA" -iyyyymmddhh=${PDY}${cyc} -echo "iyyyymmddhh = ${iyyyymmddhh}" - -# Unix date utility needs dates in yyyy-mm-dd hh:mm:ss format -unix_init_DATE="${iyyyy}-${imm}-${idd} ${ihh}:00:00" - -# This awk expression gets the last item of the list $FHR -fcst_length=$(echo ${FHR} | awk '{ print $NF }') - -echo -echo "BYE 00000000" -vdate_last=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length} hours" +%Y%m%d%H) -if [[ ${OBTYPE} == "NDAS" ]]; then -echo "BYE 111111111" - vhh_last=$(echo ${vdate_last} | cut -c9-10) - #hours_to_add=$(( vhh_last + 6 - (vhh_last % 6) )) - hours_to_add=$(( 6 - (vhh_last % 6) )) - fcst_length_rounded_up=$(( fcst_length + hours_to_add )) -# vdate_last_rounded_up=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length_rounded_up} hours" +%Y%m%d%H) -# fcst_length=${fcst_length_rounded_up} -fi - -# Make sure fcst_length isn't octal (leading zero) -fcst_length=$((10#${fcst_length})) - -processed_fp_list=() -current_fcst=0 -while [[ ${current_fcst} -le ${fcst_length} ]]; do - -echo -echo "HELLO BBBBBBBBBBB" -echo "current_fcst = ${current_fcst}" - - # Calculate valid date info using date utility - vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" +%Y%m%d%H) - unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") - vyyyymmdd=$(echo ${vdate} | cut -c1-8) - vhh=$(echo ${vdate} | cut -c9-10) -echo -echo "BYE 222222222" -echo "vhh = ${vhh}" - - # Calculate valid date + 1 day; this is needed because some obs files - # are stored in the *next* day's 00h directory - vdate_p1d=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 day" +%Y%m%d%H) - vyyyymmdd_p1d=$(echo ${vdate_p1d} | cut -c1-8) - -echo -echo "HELLO CCCCCCCCCC" -echo "vyyyymmdd = ${vyyyymmdd}" -echo "vyyyymmdd_p1d = ${vyyyymmdd_p1d}" -echo "ihh = ${ihh}" - - #remove leading zero again, this time keep original - vhh_noZero=$((10#${vhh})) -# -#----------------------------------------------------------------------- -# -# Retrieve CCPA observations. -# -#----------------------------------------------------------------------- -# - if [[ ${OBTYPE} == "CCPA" ]]; then - - # CCPA is accumulation observations. We do not need to retrieve any - # observed accumulations at forecast hour 0 because there aren't yet - # any accumulations in the forecast(s) to compare it to. - if [[ ${current_fcst} -eq 0 ]]; then - current_fcst=$((current_fcst + 1)) - continue - fi - - # CCPA accumulation period to consider. Here, we only retrieve data for - # 01h accumulations (see note above). Other accumulations (03h, 06h, 24h) - # are obtained elsewhere in the workflow by adding up these 01h accumulations. - accum=01 - - # Base directory in which the daily subdirectories containing the CCPA - # grib2 files will appear after this script is done, and the daily such - # subdirectory for the current valid time (year, month, and day). We - # refer to these as the "processed" base and daily subdirectories because - # they contain the final files after all processing by this script is - # complete. - ccpa_basedir_proc=${OBS_DIR} - ccpa_day_dir_proc="${ccpa_basedir_proc}/${vyyyymmdd}" - # Make sure these directories exist. - mkdir -p ${ccpa_day_dir_proc} - - # Name of the grib2 file to extract from the archive (tar) file as well - # as the name of the processed grib2 file. - ccpa_fn="ccpa.t${vhh}z.${accum}h.hrap.conus.gb2" - - # Full path to the location of the processed CCPA grib2 file for the - # current valid time. Note that this path includes the valid date (year, - # month, and day) information in the name of a subdirectory and the valid - # hour-of-day in the name of the file. - ccpa_fp_proc="${ccpa_day_dir_proc}/${ccpa_fn}" - - # Store the full path to the processed file in a list for later use. - if [ ${vyyyymmdd}${vhh} -le ${vdate_last} ]; then - processed_fp_list+=(${ccpa_fp_proc}) - fi - - # Check if the CCPA grib2 file for the current valid time already exists - # at its procedded location on disk. If so, skip and go to the next valid - # time. If not, pull it. - if [[ -f "${ccpa_fp_proc}" ]]; then - - echo "${OBTYPE} file exists on disk:" - echo " ccpa_fp_proc = \"${ccpa_fp_proc}\"" - echo "Will NOT attempt to retrieve from remote locations." - - else - - echo "${OBTYPE} file does not exist on disk:" - echo " ccpa_fp_proc = \"${ccpa_fp_proc}\"" - echo "Will attempt to retrieve from remote locations." - # - #----------------------------------------------------------------------- - # - # Below, we will use the retrieve_data.py script to retrieve the CCPA - # grib2 file from a data store (e.g. HPSS). Before doing so, note the - # following: - # - # * The daily archive (tar) file containing CCPA obs has a name of the - # form - # - # [PREFIX].YYYYMMDD.tar - # - # where YYYYMMDD is a given year, month, and day combination, and - # [PREFIX] is a string that is not relevant to the discussion here - # (the value it can take on depends on which of several time periods - # YYYYMMDD falls in, and the retrieve_data.py tries various values - # until it finds one for which a tar file exists). Unintuitively, this - # archive file contains accumulation data for valid times starting at - # hour 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current - # day (YYYYMMDD). In other words, the valid times of the contents of - # this archive file are shifted back by 6 hours relative to the time - # string appearing in the name of the file. See section "DETAILS..." - # for a detailed description of the directory structure in the CCPA - # archive files. - # - # * We call retrieve_data.py in a temporary cycle-specific subdirectory - # in order to prevent get_obs_ccpa tasks for different cycles from - # clobbering each other's output. We refer to this as the "raw" CCPA - # base directory because it contains files as they are found in the - # archives before any processing by this script. - # - # * In each (cycle-specific) raw base directory, the data is arranged in - # daily subdirectories with the same timing as in the archive (tar) - # files (which are described in the section "DETAILS..." below). In - # particular, each daily subdirectory has the form YYYYMDD, and it may - # contain CCPA grib2 files for accumulations valid at hour 19 of the - # previous day (YYYYMM[DD-1]) to hour 18 of the current day (YYYYMMDD). - # (Data valid at hours 19-23 of the current day (YYYYMMDD) go into the - # daily subdirectory for the next day, i.e. YYYYMM[DD+1].) We refer - # to these as raw daily (sub)directories to distinguish them from the - # processed daily subdirectories under the processed (final) CCPA base - # directory (ccpa_basedir_proc). - # - # * For a given cycle, some of the valid times at which there is forecast - # output may not have a corresponding file under the raw base directory - # for that cycle. This is because another cycle that overlaps this cycle - # has already obtained the grib2 CCPA file for that valid time and placed - # it in its processed location; as a result, the retrieveal of that grib2 - # file for this cycle is skipped. - # - # * To obtain a more intuitive temporal arrangement of the data in the - # processed CCPA directory structure than the temporal arrangement used - # in the archives and raw directories, we process the raw files such - # that the data in the processed directory structure is shifted forward - # in time 6 hours relative to the data in the archives and raw directories. - # This results in a processed base directory that, like the raw base - # directory, also contains daily subdirectories of the form YYYYMMDD, - # but each such subdirectory may only contain CCPA data at valid hours - # within that day, i.e. at valid times YYYYMMDD[00, 01, ..., 23] (but - # may not contain data that is valid on the previous, next, or any other - # day). - # - # * For data between 20180718 and 20210504, the 01h accumulation data - # (which is the only accumulation we are retrieving) have incorrect - # metadata under the "00" directory in the archive files (meaning for - # hour 00 and hours 19-23, which are the ones in the "00" directory). - # Below, we use wgrib2 to make a correction for this when transferring - # (moving or copying) grib2 files from the raw daily directories to - # the processed daily directories. - # - # - # DETAILS OF DIRECTORY STRUCTURE IN CCPA ARCHIVE (TAR) FILES - # ---------------------------------------------------------- - # - # The daily archive file containing CCPA obs is named - # - # [PREFIX].YYYYMMDD.tar - # - # This file contains accumulation data for valid times starting at hour - # 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current day - # (YYYYMMDD). In particular, when untarred, the daily archive file - # expands into four subdirectories: 00, 06, 12, and 18. The 06, 12, and - # 18 subdirectories contain grib2 files for accumulations valid at or - # below the hour-of-day given by the subdirectory name (and on YYYYMMDD). - # For example, the 06 directory contains data valid at: - # - # * YYYYMMDD[01, 02, 03, 04, 05, 06] for 01h accumulations; - # * YYYYMMDD[03, 06] for 03h accumulations; - # * YYYYMMDD[06] for 06h accumulations. - # - # The valid times for the data in the 12 and 18 subdirectories are - # analogous. However, the 00 subdirectory is different in that it - # contains accumulations at hour 00 on YYYYMMDD as well as ones BEFORE - # this time, i.e. the data for valid times other than YYYYMMDD00 are on - # the PREVIOUS day. Thus, the 00 subdirectory contains data valid at - # (note the DD-1, meaning one day prior): - # - # * YYYYMM[DD-1][19, 20, 21, 22, 23] and YYYYMMDD00 for 01h accumulations; - # * YYYYMM[DD-1][19] and YYYYMMDD00 for 03h accumulations; - # * YYYYMMDD00 for 06h accumulations. - # - #----------------------------------------------------------------------- - # - - # Set parameters for retrieving CCPA data using retrieve_data.py. - # Definitions: - # - # valid_time: - # The valid time in the name of the archive (tar) file from which data - # will be pulled. Due to the way the data is arranged in the CCPA archive - # files (as described above), for valid hours 19 to 23 of the current day, - # this must be set to the corresponding valid time on the NEXT day. - # - # ccpa_basedir_raw: - # Raw base directory that will contain the raw daily subdirectory in which - # the retrieved CCPA grib2 file will be placed. Note that this must be - # cycle-dependent (where the cycle is given by the variable iyyyymmddhh) - # to avoid get_obs_ccpa workflow tasks for other cycles writing to the - # same directories/files. Note also that this doesn't have to depend on - # the current valid hour (0-18 vs. 19-23), but for clarity and ease of - # debugging, here we do make it valid-hour-dependent. - # - # ccpa_day_dir_raw: - # Raw daily subdirectory under the raw base directory. This is dependent - # on the valid hour (i.e. different for hours 19-23 than for hours 0-18) - # in order to maintain the same data timing arrangement in the raw daily - # directories as in the archive files. - # - if [[ ${vhh_noZero} -ge 0 && ${vhh_noZero} -le 18 ]]; then - valid_time=${vyyyymmdd}${vhh} - ccpa_basedir_raw="${ccpa_basedir_proc}/raw_cyc${iyyyymmddhh}" - ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd}" - elif [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then - valid_time=${vyyyymmdd_p1d}${vhh} - ccpa_basedir_raw="${ccpa_basedir_proc}/raw_cyc${iyyyymmddhh}_vhh19-23" - ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd_p1d}" - fi - mkdir -p ${ccpa_day_dir_raw} - - # Before calling retrieve_data.py, change location to the raw base - # directory to avoid get_obs_ccpa tasks for other cycles from clobbering - # the output from this call to retrieve_data.py. Note that retrieve_data.py - # extracts the CCPA tar files into the directory it was called from, - # which is the working directory of this script right before retrieve_data.py - # is called. - cd ${ccpa_basedir_raw} - - # Pull CCPA data from HPSS. This will get a single grib2 (.gb2) file - # corresponding to the current valid time (valid_time). - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${valid_time} \ - --data_stores hpss \ - --data_type CCPA_obs \ - --output_path ${ccpa_day_dir_raw} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - $cmd || print_err_msg_exit "\ - Could not retrieve CCPA data from HPSS. - - The following command exited with a non-zero exit status: - ${cmd} -" - - # Create the processed CCPA grib2 files. This usually consists of just - # moving or copying the raw file to its processed location, but for valid - # times between 20180718 and 20210504, it involves using wgrib2 to correct - # an error in the metadata of the raw file and writing the corrected data - # to a new grib2 file in the processed location. - # - # Since this script is part of a workflow, another get_obs_ccpa task (i.e. - # for another cycle) may have extracted and placed the current file in its - # processed location between the time we checked for its existence above - # (and didn't find it) and now. This can happen because there can be - # overlap between the verification times for the current cycle and those - # of other cycles. For this reason, check again for the existence of the - # processed file. If it has already been created by another get_obs_ccpa - # task, don't bother to recreate it. - if [[ -f "${ccpa_fp_proc}" ]]; then - - echo "${OBTYPE} file exists on disk:" - echo " ccpa_fp_proc = \"{ccpa_fp_proc}\"" - echo "It was likely created by a get_obs_ccpa workflow task for another cycle that overlaps the current one." - echo "NOT moving or copying file from its raw location to its processed location." - - else - - # Full path to the CCPA file that was pulled and extracted above and - # placed in the raw directory. - ccpa_fp_raw="${ccpa_day_dir_raw}/${ccpa_fn}" - - #mv_or_cp="mv" - mv_or_cp="cp" - if [[ ${vhh_noZero} -ge 1 && ${vhh_noZero} -le 18 ]]; then - ${mv_or_cp} ${ccpa_fp_raw} ${ccpa_fp_proc} - elif [[ (${vhh_noZero} -eq 0) || (${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23) ]]; then - # One hour CCPA files have incorrect metadata in the files under the "00" - # directory from 20180718 to 20210504. After data is pulled, reorganize - # into correct valid yyyymmdd structure. - if [[ ${vyyyymmdd} -ge 20180718 && ${vyyyymmdd} -le 20210504 ]]; then - wgrib2 ${ccpa_fp_raw} -set_date -24hr -grib ${ccpa_fp_proc} -s - else - ${mv_or_cp} ${ccpa_fp_raw} ${ccpa_fp_proc} - fi - fi - - fi - - fi -# -#----------------------------------------------------------------------- -# -# Retrieve MRMS observations. -# -#----------------------------------------------------------------------- -# - elif [[ ${OBTYPE} == "MRMS" ]]; then - - # Base directory in which the daily subdirectories containing the MRMS - # grib2 files for REFC (composite reflectivity) and REFC (echo top) will - # be located after this script is done, and the daily such subdirectory - # for the current valid time (year, month, and day). We refer to these - # as the "processed" base and daily subdirectories because they contain - # the final files after all processing by this script is complete. - mrms_basedir_proc=${OBS_DIR} - mrms_day_dir_proc="${mrms_basedir_proc}/${vyyyymmdd}" - - # Loop over the fields (REFC and RETOP). - for field in ${VAR[@]}; do - - # Set field-dependent parameters needed in forming grib2 file names. - if [ "${field}" = "REFC" ]; then - file_base_name="MergedReflectivityQCComposite" - level="_00.50_" - elif [ "${field}" = "RETOP" ]; then - file_base_name="EchoTop" - level="_18_00.50_" - else - echo "Invalid field: ${field}" - print_err_msg_exit "\ - Invalid field specified: ${field} - - Valid options are 'REFC', 'RETOP'. -" - fi - - # Name of the MRMS grib2 file for the current field and valid time that - # will appear in the processed daily subdirectory after this script finishes. - # This is the name of the processed file. Note that this is generally - # not the name of the gzipped grib2 files that may be retrieved below - # from archive files using the retrieve_data.py script. - mrms_fn="${file_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" - - # Full path to the processed MRMS grib2 file for the current field and - # valid time. - mrms_fp_proc="${mrms_day_dir_proc}/${mrms_fn}" - - # Store the full path to the processed file in a list for later use. - if [ ${vyyyymmdd}${vhh} -le ${vdate_last} ]; then - processed_fp_list+=(${mrms_fp_proc}) - fi - - # Check if the processed MRMS grib2 file for the current field and valid - # time already exists on disk. If so, skip this valid time and go to the - # next one. If not, pull it. - if [[ -f "${mrms_fp_proc}" ]]; then - - echo "${OBTYPE} file exists on disk:" - echo " mrms_fp_proc = \"${mrms_fp_proc}\"" - echo "Will NOT attempt to retrieve from remote locations." - - else - - echo "${OBTYPE} file does not exist on disk:" - echo " mrms_fp_proc = \"${mrms_fp_proc}\"" - echo "Will attempt to retrieve from remote locations." - - # Base directory that will contain the daily subdirectories in which the - # gzipped MRMS grib2 files retrieved from archive files will be placed, - # and the daily subdirectory for the current valid year, month, and day. - # We refer to these as the "raw" MRMS base and daily directories because - # they contain files as they are found in the archives before any processing - # by this script. - # - # Note that the name of the raw base directory depends on (contains) the - # valid year, month, and day (but not on the cycle, i.e. not on iyyyymmddhh) - # in order to avoid having get_obs_mrms tasks from other cycles clobbering - # the output from this one. It is also possible to make the name of this - # directory name depend instead on the cycle, but that turns out to cause - # an inefficiency in that get_obs_mrms tasks for different cycles will - # not be able to detect that another cycle has already retrieved the data - # for the current valid day will unnecessarily repeat the retrieval. - mrms_basedir_raw="${mrms_basedir_proc}/raw_day${vyyyymmdd}" - mrms_day_dir_raw="${mrms_basedir_raw}/${vyyyymmdd}" - - # Check if the raw daily directory already exists on disk. If so, it - # means all the gzipped MRMS grib2 files -- i.e. for both REFC and RETOP - # and for all times (hours, minutes, and seconds) in the current valid - # day -- have already been or are in the process of being retrieved from - # the archive (tar) files. If so, skip the retrieval process. If not, - # proceed to retrieve all the files and place them in the raw daily - # directory. - # - # Note that despite the check on the existence of the raw daily directory - # below, it is possible for two get_obs_mrms tasks to try to retrieve - # obs for the same day. To minimize this possibility, sleep for a random - # number of seconds (with a maximum wait of maxwait seconds set below) - # before performing the directory existence check - maxwait=30 - sleep_duration_secs=$((RANDOM % maxwait)) - echo "Sleeping for ${sleep_duration_secs} seconds..." - sleep "${sleep_duration_secs}s" - - if [[ -d "${mrms_day_dir_raw}" ]]; then - - echo "${OBTYPE} raw daily directory for day ${vyyyymmdd} exists on disk:" - echo " mrms_day_dir_raw = \"${mrms_day_dir_raw}\"" - echo "This means MRMS files for all hours of the current valid day (${vyyyymmdd}) have been or are being retrieved." - echo "Thus, we will NOT attempt to retrieve MRMS data for the current valid time from remote locations." - - else - - mkdir -p ${mrms_day_dir_raw} - valid_time=${vyyyymmdd}${vhh} - - # Before calling retrieve_data.py, change location to the raw base - # directory to avoid get_obs_mrms tasks for other cycles from clobbering - # the output from this call to retrieve_data.py. Note that retrieve_data.py - # extracts the MRMS tar files into the directory it was called from, - # which is the working directory of this script right before retrieve_data.py - # is called. - cd ${mrms_basedir_raw} - - # Use the retrieve_data.py script to retrieve all the gzipped MRMS grib2 - # files -- i.e. for both REFC and RETOP and for all times (hours, minutes, - # and seconds) in the current valid day -- and place them in the raw daily - # directory. Note that this will pull both the REFC and RETOP files in - # one call. - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${valid_time} \ - --data_stores hpss \ - --data_type MRMS_obs \ - --output_path ${mrms_day_dir_raw} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - - $cmd || print_err_msg_exit "\ - Could not retrieve MRMS data from HPSS - - The following command exited with a non-zero exit status: - ${cmd} -" - - # Create a flag file that can be used to confirm the completion of the - # retrieval of all files for the current valid day. - touch ${mrms_day_dir_raw}/pull_completed.txt - - fi - - # Make sure the retrieval process for the current day (which may have - # been executed above for this cycle or for another cycle) has completed - # by checking for the existence of the flag file that marks completion. - # If not, keep checking until the flag file shows up. - while [[ ! -f "${mrms_day_dir_raw}/pull_completed.txt" ]]; do - echo "Waiting for the retrieval process for valid day ${vyyyymmdd} to complete..." - sleep 5s - done - - # Since this script is part of a workflow, another get_obs_mrms task (i.e. - # for another cycle) may have extracted and placed the current file in its - # processed location between the time we checked for its existence above - # (and didn't find it) and now. This can happen because there can be - # overlap between the verification times for the current cycle and those - # of other cycles. For this reason, check again for the existence of the - # processed file. If it has already been created by another get_obs_mrms - # task, don't bother to recreate it. - if [[ -f "${mrms_fp_proc}" ]]; then - - echo "${OBTYPE} file exists on disk:" - echo " mrms_fp_proc = \"${mrms_fp_proc}\"" - echo "Will NOT attempt to retrieve from remote locations." - - else - - # Search the raw daily directory for the current valid day to find the - # gizipped MRMS grib2 file whose time stamp (in the file name) is closest - # to the current valid day and hour. Then unzip that file and copy it - # to the processed daily directory, in the process renaming it to replace - # the minutes and hours in the file name with "0000". - valid_time=${vyyyymmdd}${vhh} - python ${USHdir}/mrms_pull_topofhour.py \ - --valid_time ${valid_time} \ - --outdir ${mrms_basedir_proc} \ - --source ${mrms_basedir_raw} \ - --product ${file_base_name} - - fi - - fi - - done -# -#----------------------------------------------------------------------- -# -# Retrieve NDAS observations. -# -#----------------------------------------------------------------------- -# - elif [[ ${OBTYPE} == "NDAS" ]]; then - - # Calculate valid date plus 1 hour. This is needed because we need to - # check whether this date corresponds to one of the valid hours-of-day - # 00, 06, 12, and 18 on which the NDAS archives are provided. - unix_vdate_p1h=$($DATE_UTIL -d "${unix_init_DATE} $((current_fcst+1)) hours" "+%Y-%m-%d %H:00:00") - vdate_p1h=$($DATE_UTIL -d "${unix_vdate_p1h}" +%Y%m%d%H) - vyyyymmdd_p1h=$(echo ${vdate_p1h} | cut -c1-8) - vhh_p1h=$(echo ${vdate_p1h} | cut -c9-10) - vhh_p1h_noZero=$((10#${vhh_p1h})) - -echo "" -echo "HELLO DDDDDDDDDDD" -echo "vdate = ${vdate}" -echo "vyyyymmdd = ${vyyyymmdd}" -echo "vhh = ${vhh}" -echo "vhh_noZero = ${vhh_noZero}" -echo "vdate = ${vdate}" -echo "vdate_p1h = ${vdate_p1h}" - - # Base directory in which the hourly NDAS prepbufr files will be located. - # We refer to this as the "processed" base directory because it contains - # the final files after all processing by this script is complete. - ndas_basedir_proc=${OBS_DIR} - - # Name of the NDAS prepbufr file for the current valid time that will - # appear in the processed daily subdirectory after this script finishes. - # This is the name of the processed file. Note that this is not the - # same as the name of the raw file, i.e. the file extracted from the - # archive (tar) file retrieved below by the retrieve_data.py script. - ndas_fn="prepbufr.ndas.${vdate}" - - # Full path to the processed NDAS prepbufr file for the current field and - # valid time. - ndas_fp_proc="${ndas_basedir_proc}/${ndas_fn}" - - # Store the full path to the processed file in a list for later use. -echo -echo "EEEEEEEEEEEEEE" - if [ ${vdate} -le ${vdate_last} ]; then -echo "FFFFFFFFFFFFFF" -echo "processed_fp_list = |${processed_fp_list[@]}" - processed_fp_list+=(${ndas_fp_proc}) - fi - - # Check if the processed NDAS prepbufr file for the current valid time - # already exists on disk. If so, skip this valid time and go to the next - # one. - if [[ -f "${ndas_fp_proc}" ]]; then - - echo "${OBTYPE} file exists on disk:" - echo " ndas_fp_proc = \"${ndas_fp_proc}\"" - echo "Will NOT attempt to retrieve from remote locations." - - else - - echo "${OBTYPE} file does not exist on disk:" - echo " ndas_fp_proc = \"${ndas_fp_proc}\"" - echo "Will attempt to retrieve from remote locations." - # NDAS data is available in 6-hourly combined tar files, each with 7 1-hour prepbufr files: - # nam.tHHz.prepbufr.tm00.nr, nam.tHHz.prepbufr.tm01.nr, ... , nam.tHHz.prepbufr.tm06.nr - # - # The "tm" here means "time minus", so nam.t12z.prepbufr.tm00.nr is valid for 12z, - # nam.t00z.prepbufr.tm03.nr is valid for 21z the previous day, etc. - # This means that every six hours we have two obs files valid for the same time: - # nam.tHHz.prepbufr.tm00.nr and nam.t[HH+6]z.prepbufr.tm06.nr - # We want to use the tm06 file because it contains more/better obs (confirmed with EMC: even - # though the earlier files are larger, this is because the time window is larger) - - # Whether to move or copy extracted files from the raw directories to their - # final locations. - #mv_or_cp="mv" - mv_or_cp="cp" - -echo "" -echo "HELLO GGGGGGGGGGGGG" -echo "vhh_noZero = ${vhh_noZero}" -echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" - - # Due to the way NDAS archives are organized, we can only retrieve the - # archive (tar) file containing data for the current valid hour (and the - # 5 hours preceeding it) if the hour-of-day corresponding to the current - # valid time plus 1 hour corresponds to one of 0, 6, 12, and 18. - if [[ ${vhh_p1h_noZero} -eq 0 || ${vhh_p1h_noZero} -eq 6 || \ - ${vhh_p1h_noZero} -eq 12 || ${vhh_p1h_noZero} -eq 18 || \ - ${current_fcst} -eq ${fcst_length} ]]; then - - if [[ ${vhh_p1h_noZero} -eq 0 || ${vhh_p1h_noZero} -eq 6 || \ - ${vhh_p1h_noZero} -eq 12 || ${vhh_p1h_noZero} -eq 18 ]]; then - unix_vdate_archive="${unix_vdate_p1h}" - vdate_archive="${vdate_p1h}" - vyyyymmdd_archive="${vyyyymmdd_p1h}" - vhh_archive=${vhh_p1h} - elif [[ ${current_fcst} -eq ${fcst_length} ]]; then - hours_to_archive=$(( 6 - (vhh % 6) )) - unix_vdate_archive=$($DATE_UTIL -d "${unix_vdate} ${hours_to_archive} hours" "+%Y-%m-%d %H:00:00") - vdate_archive=$($DATE_UTIL -d "${unix_vdate} ${hours_to_archive} hours" +%Y%m%d%H) - vyyyymmdd_archive=$(echo ${vdate_archive} | cut -c1-8) - vhh_archive=$(echo ${vdate_archive} | cut -c9-10) - fi - - # Base directory that will contain the 6-hourly subdirectories in which - # the NDAS prepbufr files retrieved from archive files will be placed, - # and the 6-hourly subdirectory for the current valid time plus 1 hour. - # We refer to these as the "raw" NDAS base and 6-hourly directories - # because they contain files as they are found in the archives before - # any processing by this script. - ndas_basedir_raw="${ndas_basedir_proc}/raw_day${vyyyymmdd_archive}" - ndas_day_dir_raw="${ndas_basedir_raw}/${vdate_archive}" - - # Check if the raw 6-hourly directory already exists on disk. If so, it - # means the NDAS prepbufr files for the current valid hour and the 5 hours - # preceeding it have already been or are in the process of being retrieved - # from the archive (tar) files. If so, skip the retrieval process. If - # not, proceed to retrieve the archive file, extract the prepbufr files - # from it, and place them in the raw daily directory. - # - # Note that despite the check on the existence of the raw daily directory - # below, it is possible for two get_obs_mrms tasks to try to retrieve - # obs for the same day. To minimize this possibility, sleep for a random - # number of seconds (with a maximum wait of maxwait seconds set below) - # before performing the directory existence check - maxwait=30 - sleep_duration_secs=$((RANDOM % maxwait)) - echo "Sleeping for ${sleep_duration_secs} seconds..." - sleep "${sleep_duration_secs}s" - - if [[ -d "${ndas_day_dir_raw}" ]]; then - - print_info_msg " -${OBTYPE} raw 6-hourly directory ${vdate_archive} exists on disk: - ndas_day_dir_raw = \"${ndas_day_dir_raw}\" -This means NDAS files for the current valid time (${vdate}) and the -5 hours preceeding it have been or are being retrieved by a get_obs_ndas -workflow task for another cycle. Thus, we will NOT attempt to retrieve -NDAS data for the current valid time from remote locations." - - else - - mkdir -p ${ndas_day_dir_raw} - - # Before calling retrieve_data.py, change location to the raw base - # directory to avoid get_obs_ndas tasks for other cycles from clobbering - # the output from this call to retrieve_data.py. Note that retrieve_data.py - # extracts the NDAS prepbufr files the archive into the directory it was - # called from, which is the working directory of this script right before - # retrieve_data.py is called. - cd ${ndas_basedir_raw} - - # Use the retrieve_data.py script to retrieve all the NDAS prepbufr files - # for the current valid hour and the 5 hours preceeding it and place them - # in the raw 6-hourly directory. - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vdate_archive} \ - --data_stores hpss \ - --data_type NDAS_obs \ - --output_path ${ndas_day_dir_raw} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - - $cmd || print_err_msg_exit "\ - Could not retrieve NDAS data from HPSS - - The following command exited with a non-zero exit status: - ${cmd} -" - - # Create a flag file that can be used to confirm the completion of the - # retrieval of all files for the 6-hour interval ending in vdate_archive. - touch ${ndas_day_dir_raw}/pull_completed.txt - - fi - - # Make sure the retrieval process for the 6-hour interval ending in - # vdate_archive (which may have been executed above for this cycle or for - # another cycle) has completed by checking for the existence of the flag - # file that marks completion. If not, keep checking until the flag file - # shows up. - while [[ ! -f "${ndas_day_dir_raw}/pull_completed.txt" ]]; do - echo "Waiting for completion of the NDAS obs retrieval process for the" - echo "6-hour interval ending on ${vdate_archive} ..." - sleep 5s - done - - # Since this script is part of a workflow, another get_obs_ndas task (i.e. - # for another cycle) may have extracted and placed the current file in its - # processed location between the time we checked for its existence above - # (and didn't find it) and now. This can happen because there can be - # overlap between the verification times for the current cycle and those - # of other cycles. For this reason, check again for the existence of the - # processed file. If it has already been created by another get_obs_ndas - # task, don't bother to recreate it. - if [[ -f "${ndas_fp_proc}" ]]; then - - echo "${OBTYPE} file exists on disk:" - echo " ndas_fp_proc = \"${ndas_fp_proc}\"" - echo "Will NOT attempt to retrieve from remote locations." - - else - - # Create the processed NDAS prepbufr files for the current valid hour as - # well as the preceeding 5 hours (or fewer if they're outside the time - # interval of the forecast) by copying or moving (and in the process - # renaming) them from the raw 6-hourly directory. In the following loop, - # "tm" means "time minus". Note that the tm06 files contain more/better - # observations than tm00 for the equivalent time. - for tm in $(seq 6 -1 1); do -# for tm in $(seq --format="%02g" 6 -1 1); do - vdate_archive_tm=$($DATE_UTIL -d "${unix_vdate_archive} ${tm} hours ago" +%Y%m%d%H) - if [[ ${vdate_archive_tm} -le ${vdate_last} ]]; then - tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') - ${mv_or_cp} ${ndas_day_dir_raw}/nam.t${vhh_archive}z.prepbufr.tm${tm2}.nr \ - ${ndas_basedir_proc}/prepbufr.ndas.${vdate_archive_tm} - fi - done - - fi - - fi - - fi -# -#----------------------------------------------------------------------- -# -# Retrieve NOHRSC observations. # #----------------------------------------------------------------------- # - elif [[ ${OBTYPE} == "NOHRSC" ]]; then - - #NOHRSC is accumulation observations, so none to retrieve for hour zero - if [[ ${current_fcst} -eq 0 ]]; then - current_fcst=$((${current_fcst} + 1)) - continue - fi - - # Reorganized NOHRSC location (no need for raw data dir) - nohrsc_proc=${OBS_DIR} - - nohrsc06h_file="$nohrsc_proc/${vyyyymmdd}/sfav2_CONUS_06h_${vyyyymmdd}${vhh}_grid184.grb2" - nohrsc24h_file="$nohrsc_proc/${vyyyymmdd}/sfav2_CONUS_24h_${vyyyymmdd}${vhh}_grid184.grb2" - retrieve=0 - # If 24-hour files should be available (at 00z and 12z) then look for both files - # Otherwise just look for 6hr file - if (( ${current_fcst} % 12 == 0 )) && (( ${current_fcst} >= 24 )) ; then - if [[ ! -f "${nohrsc06h_file}" || ! -f "${nohrsc24h_file}" ]] ; then - retrieve=1 - echo "${OBTYPE} files do not exist on disk:" - echo "${nohrsc06h_file}" - echo "${nohrsc24h_file}" - echo "Will attempt to retrieve from remote locations" - else - echo "${OBTYPE} files exist on disk:" - echo "${nohrsc06h_file}" - echo "${nohrsc24h_file}" - fi - elif (( ${current_fcst} % 6 == 0 )) ; then - if [[ ! -f "${nohrsc06h_file}" ]]; then - retrieve=1 - echo "${OBTYPE} file does not exist on disk:" - echo "${nohrsc06h_file}" - echo "Will attempt to retrieve from remote locations" - else - echo "${OBTYPE} file exists on disk:" - echo "${nohrsc06h_file}" - fi - fi - if [ $retrieve == 1 ]; then - if [[ ! -d "$nohrsc_proc/${vyyyymmdd}" ]]; then - mkdir -p $nohrsc_proc/${vyyyymmdd} - fi - - # Pull NOHRSC data from HPSS; script will retrieve all files so only call once - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd}${vhh} \ - --data_stores hpss \ - --data_type NOHRSC_obs \ - --output_path $nohrsc_proc/${vyyyymmdd} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - - $cmd || print_err_msg_exit "\ - Could not retrieve NOHRSC data from HPSS - - The following command exited with a non-zero exit status: - ${cmd} +if [[ ${OBTYPE} == "CCPA" ]]; then + $USHdir/get_obs_ccpa.sh +elif [[ ${OBTYPE} == "MRMS" ]]; then + $USHdir/get_obs_mrms.sh +elif [[ ${OBTYPE} == "NDAS" ]]; then + $USHdir/get_obs_ndas.sh +elif [[ ${OBTYPE} == "NOHRSC" ]]; then + $USHdir/get_obs_nohrsc.sh +else + print_err_msg_exit "\ +Invalid OBTYPE specified for script: + OBTYPE = \"${OBTYPE}\" +Valid options are CCPA, MRMS, NDAS, and NOHRSC. " - # 6-hour forecast needs to be renamed - mv $nohrsc_proc/${vyyyymmdd}/sfav2_CONUS_6h_${vyyyymmdd}${vhh}_grid184.grb2 ${nohrsc06h_file} - fi - - else - print_err_msg_exit "\ - Invalid OBTYPE specified for script; valid options are CCPA, MRMS, NDAS, and NOHRSC - " - fi # Increment to next forecast hour - - # Increment to next forecast hour - echo "Finished fcst hr=${current_fcst}" - current_fcst=$((${current_fcst} + 1)) - -done -echo "SSSSSSSSSSSSSSSS" -# -#----------------------------------------------------------------------- -# -# At this point, the processed data files for all output forecast hours -# for this cycle are either being created (by a get_obs_... task for -# another cycle) or have already been created (either by this get_obs_... -# task or one for another cycle). In case they are still being created, -# make sure they have in fact been created before exiting this script. -# If we don't do this, it is possible for this get_obs_... task to complete -# successfully but still have processed obs files for some forecast hours -# not yet created, which is undesirable. -# -#----------------------------------------------------------------------- -# -echo "HHHHHHHHHHHHHHHH" -echo "processed_fp_list = |${processed_fp_list[@]}" -num_proc_files=${#processed_fp_list[@]} -echo "num_proc_files = ${num_proc_files}" -for (( i=0; i<${num_proc_files}; i++ )); do - obs_fp="${processed_fp_list[$i]}" - while [[ ! -f "${obs_fp}" ]]; do - echo "Waiting for ${OBTYPE} file to be created on disk (by a get_obs_... workflow task for another cycle):" - echo " obs_fp = \"${obs_fp}\"" - sleep 5s - done -done +fi # #----------------------------------------------------------------------- # -# Clean up raw directories. +# Create flag file that indicates completion of task. This is needed by +# the workflow. # #----------------------------------------------------------------------- # -#remove_raw="TRUE" -remove_raw="FALSE" -if [ "${remove_raw}" = "TRUE" ]; then - rm -rf ${OBS_DIR}/raw_* -fi +obtype=$(echo_lowercase ${OBTYPE}) +mkdir -p ${WFLOW_FLAG_FILES_DIR} +touch "${WFLOW_FLAG_FILES_DIR}/get_obs_${obtype}_${PDY}_complete.txt" # #----------------------------------------------------------------------- # From 2ee3a46d525bb25eda4137f36df91d516fe790d5 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 17:49:09 -0600 Subject: [PATCH 037/208] Add ceil.sh to the set of scripts/functions that are sourced by source_util_funcs.sh. --- ush/source_util_funcs.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ush/source_util_funcs.sh b/ush/source_util_funcs.sh index 7fe3025d6a..3884793fea 100644 --- a/ush/source_util_funcs.sh +++ b/ush/source_util_funcs.sh @@ -96,6 +96,15 @@ function source_util_funcs() { # #----------------------------------------------------------------------- # +# Source the file containing the function that returns the ceiling of +# the quotient of two positive integers. +# +#----------------------------------------------------------------------- +# + . ${bashutils_dir}/ceil.sh +# +#----------------------------------------------------------------------- +# # Source the file containing the functions that will echo given strings # as uppercase or lowercase # From d60f6511d4b221fc98edd8473374c4cb449cd58e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 17:55:54 -0600 Subject: [PATCH 038/208] Changes to reflect the fact that the output of PcpCombine_obs tasks now goes into a cycle-based subdirectory (under the experiment directory), as opposed to a obs day based subdirectory. --- ...onal_run_met_genensprod_or_ensemblestat.sh | 2 +- ...gional_run_met_gridstat_or_pointstat_vx.sh | 9 +++--- ...un_met_gridstat_or_pointstat_vx_ensprob.sh | 2 +- scripts/exregional_run_met_pcpcombine.sh | 28 ++++--------------- 4 files changed, 13 insertions(+), 28 deletions(-) diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 5034369851..24f12be786 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -131,7 +131,7 @@ if [ "${grid_or_point}" = "grid" ]; then case "${FIELDNAME_IN_MET_FILEDIR_NAMES}" in "APCP"*) - OBS_INPUT_DIR="${vx_output_basedir}/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}" ;; diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index e54dd7b553..f833ee534c 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -132,6 +132,7 @@ time_lag=$( bc -l <<< "${ENS_TIME_LAG_HRS[$i]}*${SECS_PER_HOUR}" ) # vx_fcst_input_basedir=$( eval echo "${VX_FCST_INPUT_BASEDIR}" ) vx_output_basedir=$( eval echo "${VX_OUTPUT_BASEDIR}" ) + ensmem_indx=$(printf "%0${VX_NDIGITS_ENSMEM_NAMES}d" $(( 10#${ENSMEM_INDX}))) ensmem_name="mem${ensmem_indx}" if [ "${RUN_ENVIR}" = "nco" ]; then @@ -162,15 +163,15 @@ if [ "${grid_or_point}" = "grid" ]; then case "${FIELDNAME_IN_MET_FILEDIR_NAMES}" in "APCP"*) - OBS_INPUT_DIR="${vx_output_basedir}/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" - FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" + FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" FCST_INPUT_FN_TEMPLATE="${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) OBS_INPUT_DIR="${OBS_DIR}" OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" - FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" + FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" FCST_INPUT_FN_TEMPLATE="${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "REFC") @@ -198,7 +199,7 @@ fi OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_INPUT_FN_TEMPLATE} ) FCST_INPUT_FN_TEMPLATE=$( eval echo ${FCST_INPUT_FN_TEMPLATE} ) -OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}/${slash_ensmem_subdir_or_null}" +OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}" STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 5952ed3785..23bafe91fd 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -119,7 +119,7 @@ if [ "${grid_or_point}" = "grid" ]; then case "${FIELDNAME_IN_MET_FILEDIR_NAMES}" in "APCP"*) - OBS_INPUT_DIR="${vx_output_basedir}/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 20ae1a9794..ee999d1de3 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -163,6 +163,9 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then slash_ensmem_subdir_or_null="" fi fi +elif [ "${FCST_OR_OBS}" = "OBS" ]; then + slash_cdate_or_null="/${CDATE}" + slash_ensmem_subdir_or_null="/obs" fi OBS_INPUT_DIR="" @@ -175,7 +178,7 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then FCST_INPUT_DIR="${vx_fcst_input_basedir}" FCST_INPUT_FN_TEMPLATE=$( eval echo ${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE} ) - OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}/${slash_ensmem_subdir_or_null}" + OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_fcst" OUTPUT_FN_TEMPLATE=$( eval echo ${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT} ) STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" @@ -185,7 +188,7 @@ elif [ "${FCST_OR_OBS}" = "OBS" ]; then OBS_INPUT_DIR="${OBS_DIR}" OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE} ) - OUTPUT_BASE="${vx_output_basedir}" + OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT} ) STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" @@ -212,28 +215,9 @@ elif [ "${FCST_OR_OBS}" = "OBS" ]; then num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" fi -# If processing obs, then for all cylces except the last one, calculate -# a "forecast length" that will hours up to but not including the initial -# (zeroth) hour of the next cycle. For the last cycle, take the "forecast -# length" of the obs to be the same as that of the forecast for the cycle. -# This ensures that the PcpCombine_obs tasks for different cycles do not -# overwrite or clobber output from another cycle (because with this -# approach, the valid times on which the current PcpCombine_obs task is -# operating is distinct from the ones for the PcpCombine_obs tasks for -# every other cycle). -fcst_len_hrs="${FCST_LEN_HRS}" -if [ "${FCST_OR_OBS}" = "OBS" ]; then - yyyymmddhhmn="${PDY}${cyc}00" - if [ ${yyyymmddhhmn} -lt ${DATE_LAST_CYCL} ] && \ - [ ${FCST_LEN_HRS} -ge ${INCR_CYCL_FREQ} ]; then - output_incr_hrs="1" - fcst_len_hrs=$((INCR_CYCL_FREQ - output_incr_hrs + 1)) - fi -fi - set_vx_fhr_list \ cdate="${CDATE}" \ - fcst_len_hrs="${fcst_len_hrs}" \ + fcst_len_hrs="${FCST_LEN_HRS}" \ field="$VAR" \ accum_hh="${ACCUM_HH}" \ base_dir="${base_dir}" \ From 009666f994ddb61c81f95988dd02574850248a53 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 17:59:08 -0600 Subject: [PATCH 039/208] Simplify names of some variables; add variable for where workflow task flag files should be placed; fix template for output files from pcpcombine_obs tasks. --- ush/config_defaults.yaml | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index e564444b49..9ac5bc39f8 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -335,10 +335,10 @@ platform: # #----------------------------------------------------------------------- # - REMOVE_RAW_OBS_DIRS_CCPA: true - REMOVE_RAW_OBS_DIRS_MRMS: true - REMOVE_RAW_OBS_DIRS_NDAS: true - REMOVE_RAW_OBS_DIRS_NOHRSC: true + REMOVE_RAW_OBS_CCPA: true + REMOVE_RAW_OBS_MRMS: true + REMOVE_RAW_OBS_NDAS: true + REMOVE_RAW_OBS_NOHRSC: true # #----------------------------------------------------------------------- # @@ -547,12 +547,17 @@ workflow: # default will point to: # # EXPTDIR: "${EXPT_BASEDIR}/${EXPT_SUBDIR}" + # + # WFLOW_FLAG_FILES_DIR: + # Directory in which flag files marking completion of various workflow + # tasks can be placed. #----------------------------------------------------------------------- # EXPT_BASEDIR: '' # This will be set in setup.py prior to extend_yaml() being called EXPT_SUBDIR: 'experiment' EXEC_SUBDIR: "exec" EXPTDIR: '{{ [workflow.EXPT_BASEDIR, workflow.EXPT_SUBDIR]|path_join }}' + WFLOW_FLAG_FILES_DIR: '{{ [workflow.EXPTDIR, "wflow_flag_files"]|path_join }}' # #----------------------------------------------------------------------- # @@ -1726,7 +1731,7 @@ task_run_fcst: #----------------------------------------------------------------------- # # KMP_AFFINITY_*: - # From Intel: "The Intel® runtime library has the ability to bind OpenMP + # From Intel: "The Intel runtime library has the ability to bind OpenMP # threads to physical processing units. The interface is controlled using # the KMP_AFFINITY environment variable. Depending on the system (machine) # topology, application, and operating system, thread affinity can have a @@ -2464,7 +2469,7 @@ verification: # METplus Pb2nc tool on NDAS observations. (These files will contain # obs ADPSFC or ADPUPA fields in NetCDF format.) # - OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${OBS_CCPA_APCP_FN_TEMPLATE}_a${ACCUM_HH}h.nc' + OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: 'ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2_a${ACCUM_HH}h.nc' OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}.nc' # # VX_FCST_MODEL_NAME: From 1622c0e45493ad5403501c32c1109ce2d259bc04 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 18:01:59 -0600 Subject: [PATCH 040/208] Add file accidentally left out of commit two hashes ago. --- scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index f08c002d5f..e769a194fc 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -119,7 +119,7 @@ if [ "${grid_or_point}" = "grid" ]; then case "${FIELDNAME_IN_MET_FILEDIR_NAMES}" in "APCP"*) - OBS_INPUT_DIR="${vx_output_basedir}/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) From 9a26289c08d854cecd64b45f4e97207d1b4ca7b0 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 18:05:21 -0600 Subject: [PATCH 041/208] Modifications to exregional_run_met_pb2nc_obs.sh so that the corresponding task is day-based (i.e. it is run for each day for which obs are needed). The script now also creates a flag file indicating it completed successfully (needed for the workflow to work properly). --- scripts/exregional_run_met_pb2nc_obs.sh | 105 +++++++++++++++++++++--- 1 file changed, 94 insertions(+), 11 deletions(-) diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index 985cd33c7f..c5f007f7c0 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -71,6 +71,43 @@ to convert NDAS prep buffer observation files to NetCDF format. # #----------------------------------------------------------------------- # +# +# +#----------------------------------------------------------------------- +# +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. +yyyymmdd_task=${PDY} + +# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a +# scalar string containing all relevant forecast output times (each) in +# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings +# because in ROCOTO, there doesn't seem to be a way to pass a bash array +# from the XML to task's script. To have an array-valued variable to +# work with, here, we create the new variable output_times_all that is +# the array-valued counterpart of OUTPUT_TIMES_ALL. +output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) + +# List of times (each of the form YYYYMMDDHH) for which there is forecast +# output for the current day. We extract this list from the full list of +# all forecast output times (i.e. from all cycles). +output_times_crnt_day=() +if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fi + +num_output_times_crnt_day=${#output_times_crnt_day[@]} +if [[ ${num_output_times_crnt_day} -eq 0 ]]; then + print_info_msg " +None of the forecast output times fall within the day associated with the +current task (yyyymmdd_task): + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to run ${METPLUSTOOLNAME} on any prepbufr files." + exit +fi +# +#----------------------------------------------------------------------- +# # Get the cycle date and time in YYYYMMDDHH format. # #----------------------------------------------------------------------- @@ -123,16 +160,52 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${MetplusToolName}_obs" # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" +FHR_LIST="" +num_missing_files=0 +for yyyymmddhh in ${output_times_crnt_day[@]}; do + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + fn="prepbufr.ndas.${yyyymmddhh}" + fp="${OBS_INPUT_DIR}/${fn}" + if [[ -f "${fp}" ]]; then + print_info_msg " +Found ${OBTYPE} obs file corresponding to forecast output time (yyyymmddhh): + yyyymmddhh = \"${yyyymmddhh}\" + fp = \"${fp}\" +" + hh_noZero=$((10#${hh})) + #FHR_LIST+=("${yyyymmddhh}") + FHR_LIST="${FHR_LIST},${hh_noZero}" + else + num_missing_files=$((num_missing_files+1)) + print_info_msg " +${OBTYPE} obs file corresponding to forecast output time (yyyymmddhh) does +not exist on disk: + yyyymmddhh = \"${yyyymmddhh}\" + fp = \"${fp}\" +Removing this time from the list of times to be processed by ${METPLUSTOOLNAME}. +" + fi +done + +# If the number of missing files is greater than the maximum allowed +# (specified by num_missing_files_max), print out an error message and +# exit. +if [ "${num_missing_files}" -gt "${NUM_MISSING_OBS_FILES_MAX}" ]; then + print_err_msg_exit "\ +The number of missing ${OBTYPE} obs files (num_missing_files) is greater +than the maximum allowed number (num_missing_files_max): + num_missing_files = ${num_missing_files} + num_missing_files_max = ${num_missing_files_max}" +fi + +# Remove leading comma from FHR_LIST. +FHR_LIST=$( echo "${FHR_LIST}" | $SED "s/^,//g" ) +print_info_msg "$VERBOSE" "\ +Final (i.e. after filtering for missing files) set of forecast hours +(saved in a scalar string variable) is: + FHR_LIST = \"${FHR_LIST}\" +" # #----------------------------------------------------------------------- # @@ -282,7 +355,7 @@ uw template render \ -o ${metplus_config_fp} \ --verbose \ --values-file "${tmpfile}" \ - --search-path "/" + --search-path "/" err=$? rm $tmpfile @@ -315,6 +388,16 @@ METplus configuration file used is: # #----------------------------------------------------------------------- # +# Create flag file that indicates completion of task. This is needed by +# the workflow. +# +#----------------------------------------------------------------------- +# +mkdir -p ${WFLOW_FLAG_FILES_DIR} +touch "${WFLOW_FLAG_FILES_DIR}/run_met_pb2nc_obs_${PDY}_complete.txt" +# +#----------------------------------------------------------------------- +# # Print message indicating successful completion of script. # #----------------------------------------------------------------------- From e8a6f7dcfb4f81105acdb766c46272dac7073aec Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 18:15:36 -0600 Subject: [PATCH 042/208] Move sections of WE2E yaml files around to be consistent with the order in config_defaults.yaml. --- .../config.MET_ensemble_verification_only_vx.yaml | 6 +++--- ...config.MET_ensemble_verification_only_vx_time_lag.yaml | 8 +++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx.yaml b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx.yaml index 812e805645..80b2e3099f 100644 --- a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx.yaml @@ -8,9 +8,6 @@ metadata: user: RUN_ENVIR: community -nco: - NET_default: rrfs - workflow: PREDEF_GRID_NAME: RRFS_CONUS_25km DATE_FIRST_CYCL: '2019061500' @@ -18,6 +15,9 @@ workflow: FCST_LEN_HRS: 6 PREEXISTING_DIR_METHOD: rename +nco: + NET_default: rrfs + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml", "parm/wflow/verify_ens.yaml"]|include }}' diff --git a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml index f7d82cb8cd..d0edccca01 100644 --- a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml @@ -16,18 +16,20 @@ workflow: FCST_LEN_HRS: 6 PREEXISTING_DIR_METHOD: rename +nco: + NET_default: 'RRFSE_CONUS' + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml", "parm/wflow/verify_ens.yaml"]|include }}' -nco: - NET_default: 'RRFSE_CONUS' - global: DO_ENSEMBLE: true NUM_ENS_MEMBERS: 2 ENS_TIME_LAG_HRS: '[ 0, 12 ]' +# If the following is commented out, then the obs files staged on each +# platform will be (found and) used. platform: CCPA_OBS_DIR: '{{ workflow.EXPTDIR }}/obs_data/ccpa/proc' MRMS_OBS_DIR: '{{ workflow.EXPTDIR }}/obs_data/mrms/proc' From a3c8c4d34676e8410a0bedc27c2a8b201f4faf5a Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 18:17:00 -0600 Subject: [PATCH 043/208] Changes to workflow yaml files for vx portion of the workflow in order to have cycledefs for obs-day-based tasks that include only the obs days for which obs are actually needed but not on any days (inbetween the first and last obs days over all cycles) for which obs are not needed. --- parm/wflow/verify_det.yaml | 64 +++++++++++++++++++-------------- parm/wflow/verify_ens.yaml | 74 +++++++++++++++++++++----------------- parm/wflow/verify_pre.yaml | 60 ++++++++++++++++++++----------- 3 files changed, 118 insertions(+), 80 deletions(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index 35358c9b67..89f614b4e8 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -47,31 +47,9 @@ metatask_GridStat_CCPA_all_accums_all_mems: walltime: 02:00:00 dependency: and: - # The following will include dependencies on the PcpCombine_obs task for - # the current cycle as well as those from other cycles that process CCPA - # obs at valid times that are part of the current cycle's forecast. This - # dependence is necessary because each PcpCombine_obs task except the - # last one processes obs at valid times starting with the initial time - # of the current cycle's forecast and ending with the last output time - # of this forecast that is before the initial time of the next cycle's - # forecast. taskdep_pcpcombine_obs: attrs: - task: '{%- set num_cycl_dep = (workflow.FCST_LEN_HRS/workflow.INCR_CYCL_FREQ)|round(0,"ceil")|int %} - {%- set num_cycl_dep = num_cycl_dep %} - {%- for n in range(0, num_cycl_dep) %} - {%- set cycl_offset = n*workflow.INCR_CYCL_FREQ %} - {%- if n == 0 %} - {{- "run_MET_PcpCombine_obs_APCP#ACCUM_HH#h\" cycle_offset=\"%02d:00:00\"/>\n" % cycl_offset }} - {%- else %} - {{- " \n" }} - {{- " \n" % cycl_offset }} - {{- " \n" % cycl_offset }} - {{- " \n" }} - {%- endif %} - {%- endfor %} - {{- " \n" }} - {{- " \n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_mrms_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' datadep_post_files_exist: attrs: age: 00:00:00:30 @@ -163,9 +157,25 @@ metatask_PointStat_NDAS_all_mems: walltime: 01:00:00 dependency: and: - taskdep_pb2nc: + datadep_all_pb2nc_obs_complete: attrs: - task: run_MET_Pb2nc_obs + age: 00:00:00:30 + # Check that the flag files that indicate that the Pb2NC tasks are + # complete are all present before launching any PointStat task. + text: '{%- set num_obs_days = workflow.OBS_DAYS_ALL_CYCLES_INST|length %} + {%- set indent = " " %} + {%- set indent_p2 = indent + " " %} + {%- for n in range(0, num_obs_days) %} + {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_INST[n] %} + {%- if n == 0 %} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {%- else %} + {{- indent ~ "\n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' datadep_post_files_exist: attrs: age: 00:00:00:30 diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 17086c6bc6..0893248863 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -48,31 +48,9 @@ metatask_GenEnsProd_EnsembleStat_CCPA: FCST_THRESH: 'none' dependency: and: - # The following will include dependencies on the PcpCombine_obs task for - # the current cycle as well as those from other cycles that process CCPA - # obs at valid times that are part of the current cycle's forecast. This - # dependence is necessary because each PcpCombine_obs task except the - # last one processes obs at valid times starting with the initial time - # of the current cycle's forecast and ending with the last output time - # of this forecast that is before the initial time of the next cycle's - # forecast. taskdep_pcpcombine_obs: &taskdep_pcpcombine_obs attrs: - task: '{%- set num_cycl_dep = (workflow.FCST_LEN_HRS/workflow.INCR_CYCL_FREQ)|round(0,"ceil")|int %} - {%- set num_cycl_dep = num_cycl_dep %} - {%- for n in range(0, num_cycl_dep) %} - {%- set cycl_offset = n*workflow.INCR_CYCL_FREQ %} - {%- if n == 0 %} - {{- "run_MET_PcpCombine_obs_APCP#ACCUM_HH#h\" cycle_offset=\"%02d:00:00\"/>\n" % cycl_offset }} - {%- else %} - {{- " \n" }} - {{- " \n" % cycl_offset }} - {{- " \n" % cycl_offset }} - {{- " \n" }} - {%- endif %} - {%- endfor %} - {{- " \n" }} - {{- " \n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_mrms_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_#VAR# @@ -175,9 +169,25 @@ metatask_GenEnsProd_EnsembleStat_NDAS: walltime: 01:00:00 dependency: and: - taskdep_pb2nc: + datadep_all_pb2nc_obs_complete: &all_pb2nc_obs_complete attrs: - task: run_MET_Pb2nc_obs + age: 00:00:00:30 + # Check that the flag files that indicate that the Pb2NC tasks are + # complete are all present before launching any EnsembleStat task. + text: '{%- set num_obs_days = workflow.OBS_DAYS_ALL_CYCLES_INST|length %} + {%- set indent = " " %} + {%- set indent_p2 = indent + " " %} + {%- for n in range(0, num_obs_days) %} + {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_INST[n] %} + {%- if n == 0 %} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {%- else %} + {{- indent ~ "\n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_#VAR# @@ -254,9 +264,8 @@ metatask_GridStat_MRMS_ensprob: FCST_THRESH: 'all' dependency: and: - taskdep_get_obs_mrms: - attrs: - task: get_obs_mrms + datadep_all_get_obs_mrms_complete: + <<: *all_get_obs_mrms_complete taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_#VAR# @@ -282,9 +291,8 @@ metatask_PointStat_NDAS_ensmeanprob: FCST_THRESH: 'all' dependency: and: - taskdep_pb2nc: - attrs: - task: run_MET_Pb2nc_obs + datadep_all_pb2nc_obs_complete: + <<: *all_pb2nc_obs_complete taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_#VAR# diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 2357c6bc5e..13815a9752 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -1,7 +1,7 @@ default_task_verify_pre: &default_task_verify_pre account: '&ACCOUNT;' attrs: - cycledefs: forecast + cycledefs: cycledef_obs_days_inst maxtries: '1' envars: &default_vars GLOBAL_VAR_DEFNS_FP: '&GLOBAL_VAR_DEFNS_FP;' @@ -23,14 +23,19 @@ default_task_verify_pre: &default_task_verify_pre task_get_obs_ccpa: <<: *default_task_verify_pre + attrs: + cycledefs: cycledef_obs_days_cumul + maxtries: '1' command: '&LOAD_MODULES_RUN_TASK_FP; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars - ACCUM_HH: '01' OBS_DIR: '&CCPA_OBS_DIR;' OBTYPE: 'CCPA' - REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_CCPA }}' - FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' + OUTPUT_TIMES_ALL: &output_times_all_cumul + '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL|length)) %} + {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL[i] }} + {%- endfor %}' + REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_CCPA' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -43,8 +48,8 @@ task_get_obs_nohrsc: <<: *default_vars OBS_DIR: '&NOHRSC_OBS_DIR;' OBTYPE: 'NOHRSC' - REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_NOHRSC }}' - FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' + OUTPUT_TIMES_ALL: *output_times_all_cumul + REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_NOHRSC' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -57,9 +62,12 @@ task_get_obs_mrms: <<: *default_vars OBS_DIR: '&MRMS_OBS_DIR;' OBTYPE: 'MRMS' - VAR: 'REFC RETOP' - REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_MRMS }}' - FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' + MRMS_FIELDS: 'REFC RETOP' + OUTPUT_TIMES_ALL: &output_times_all_inst + '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_INST|length)) %} + {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_INST[i] }} + {%- endfor %}' + REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_MRMS' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -72,8 +80,8 @@ task_get_obs_ndas: <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' - REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_NDAS }}' - FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' + OUTPUT_TIMES_ALL: *output_times_all_inst + REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_NDAS' queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' @@ -82,7 +90,7 @@ task_get_obs_ndas: task_run_MET_Pb2nc_obs: <<: *default_task_verify_pre attrs: - cycledefs: forecast + cycledefs: cycledef_obs_days_inst maxtries: '2' command: '&LOAD_MODULES_RUN_TASK_FP; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PB2NC_OBS"' envars: @@ -92,6 +100,7 @@ task_run_MET_Pb2nc_obs: FCST_OR_OBS: OBS OBTYPE: NDAS OBS_DIR: '&NDAS_OBS_DIR;' + OUTPUT_TIMES_ALL: *output_times_all_inst METPLUSTOOLNAME: 'PB2NC' dependency: and: @@ -127,14 +136,25 @@ metatask_PcpCombine_obs: and: datadep: text: "&CCPA_OBS_DIR;" - or: - not: - taskvalid: - attrs: - task: get_obs_ccpa - taskdep: - attrs: - task: get_obs_ccpa + datadep_all_get_obs_ccpa_complete: + attrs: + age: 00:00:00:30 + # Check that the flag files that indicate that the get_obs_ccpa tasks + # are complete are all present before launching any PcpCombine task. + text: '{%- set num_obs_days = workflow.OBS_DAYS_ALL_CYCLES_CUMUL|length %} + {%- set indent = " " %} + {%- set indent_p2 = indent + " " %} + {%- for n in range(0, num_obs_days) %} + {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_CUMUL[n] %} + {%- if n == 0 %} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_ccpa_" ~ yyyymmdd ~ "_complete.txt" }} + {%- else %} + {{- indent ~ "\n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_ccpa_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' metatask_check_post_output_all_mems: var: From c774e40cc4d9f3f48fee2ef49a01a6bae0b47e21 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 5 Sep 2024 12:29:06 -0600 Subject: [PATCH 044/208] Add code to be able to generate forecast output times and obs days, both for instantaneous and cumulative fields, and pass this info to the ROCOTO xml generation system. --- ush/set_cycle_dates.py | 268 +++++++++++++++++++++++++++++++++++++++-- ush/setup.py | 44 ++++++- 2 files changed, 298 insertions(+), 14 deletions(-) diff --git a/ush/set_cycle_dates.py b/ush/set_cycle_dates.py index 0c63a87e49..c3969d8ef3 100644 --- a/ush/set_cycle_dates.py +++ b/ush/set_cycle_dates.py @@ -1,11 +1,10 @@ #!/usr/bin/env python3 from datetime import datetime, timedelta, date - +from pprint import pprint from python_utils import print_input_args, print_err_msg_exit - -def set_cycle_dates(date_start, date_end, incr_cycl_freq): +def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl): """This file defines a function that, given the start and end dates as date time objects, and a cycling frequency, returns an array of cycle date-hours whose elements have the form YYYYMMDDHH. Here, @@ -13,22 +12,267 @@ def set_cycle_dates(date_start, date_end, incr_cycl_freq): two-digit day of the month, and HH is a two-digit hour of the day. Args: - date_start: start date, datetime object - date_end: end date, datetime object - incr_cycl_freq: cycle frequency increment in hours, an int + start_time_first_cycl: + Starting time of first cycle; a datetime object. + + start_time_last_cycl: + Starting time of last cycle; a datetime object. + + cycl_intvl: + Time interval between cycle starting times; a timedelta object. + Returns: - A list of dates in a format YYYYMMDDHH + A list of strings containing cycle starting times in the format + 'YYYYMMDDHH' """ print_input_args(locals()) - freq_delta = timedelta(hours=incr_cycl_freq) - # iterate over cycles all_cdates = [] - cdate = date_start - while cdate <= date_end: + cdate = start_time_first_cycl + while cdate <= start_time_last_cycl: cyc = datetime.strftime(cdate, "%Y%m%d%H") all_cdates.append(cyc) - cdate += freq_delta + cdate += cycl_intvl return all_cdates + + +def set_fcst_output_times_and_obs_days_all_cycles( + start_time_first_cycl, start_time_last_cycl, cycl_intvl, fcst_len, fcst_output_intvl): + """Given the starting time of the first cycle of an SRW App experiment, the + starting time of the last cycle, the interval between cycle start times, + the forecast length, and the forecast output interval, this function + returns two pairs of lists: the first of each pair is a list of strings + of forecast output times over all cycles (each element of the form + 'YYYYMMDDHH'), and the second is a list of days over all cycles on which + observations are needed to perform verification (each element of the form + 'YYYYMMDD'). The first pair of lists is for instantaneous output fields + (e.g. REFC, RETOP, T2m), and the second pair is for cumulative ones (e.g. + APCP or accumulated precipitation). + + Args: + start_time_first_cycl: + Starting time of first cycle; a datetime object. + + start_time_last_cycl: + Starting time of last cycle; a datetime object. + + cycl_intvl: + Time interval between cycle starting times; a timedelta object. + + fcst_len: + The length of each forecast; a timedelta object. + + fcst_output_intvl: + Time interval between forecast output times; a timedelta object. + + Returns: + output_times_all_cycles_inst: + List of forecast output times over all cycles of instantaneous fields. + Each element is a string of the form 'YYYYMMDDHH'. + + obs_days_all_cycles_inst: + List of observation days (i.e. days on which observations are needed to + perform verification) over all cycles of instantaneous fields. Each + element is a string of the form 'YYYYMMDD'. + + output_times_all_cycles_cumul: + List of forecast output times over all cycles of cumulative fields. Each + element is a string of the form 'YYYYMMDDHH'. + + obs_days_all_cycles_cumul: + List of observation days (i.e. days on which observations are needed to + perform verification) over all cycles of cumulative fields. Each element + is a string of the form 'YYYYMMDD'. + + """ + + # Get the list containing the starting times of the cycles. Each element + # of the list is a string of the form 'YYYYMMDDHH'. + cycle_start_times_str \ + = set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl) + + # Convert cycle_start_times_str to a list of datetime objects. + cycle_start_times = [datetime.strptime(yyyymmddhh, "%Y%m%d%H") for yyyymmddhh in cycle_start_times_str] + + # Get the number of forecast output times per cycle/forecast. + num_output_times_per_cycle = int(fcst_len/fcst_output_intvl + 1) + + # Initialize sets that will contain the various forecast output and obs + # day information. + output_times_all_cycles_inst = set() + obs_days_all_cycles_inst = set() + output_times_all_cycles_cumul = set() + obs_days_all_cycles_cumul = set() + + for i, start_time_crnt_cycle in enumerate(cycle_start_times): + # Create a list of forecast output times of instantaneous fields for the + # current cycle. + output_times_crnt_cycle_inst \ + = [start_time_crnt_cycle + i*fcst_output_intvl + for i in range(0,num_output_times_per_cycle)] + # Include the output times of instantaneous fields for the current cycle + # in the set of all such output times over all cycles. + output_times_all_cycles_inst \ + = output_times_all_cycles_inst | set(output_times_crnt_cycle_inst) + + # Create a list of instantaneous field obs days (i.e. days on which + # observations of instantaneous fields are needed for verification) for + # the current cycle. We do this by dropping the hour-of-day from each + # element of the list of forecast output times and keeping only unique + # elements. + tmp = [datetime_obj.date() for datetime_obj in output_times_crnt_cycle_inst] + obs_days_crnt_cycl_inst = sorted(set(tmp)) + # Include the obs days for instantaneous fields for the current cycle + # in the set of all such obs days over all cycles. + obs_days_all_cycles_inst = obs_days_all_cycles_inst | set(obs_days_crnt_cycl_inst) + + # Create a list of forecast output times of cumulative fields for the + # current cycle. This is simply the list of forecast output times for + # instantaneous fields but with the first time dropped (because nothing + # has yet accumulated at the starting time of the cycle). + output_times_crnt_cycle_cumul = output_times_crnt_cycle_inst + output_times_crnt_cycle_cumul.pop(0) + # Include the obs days for cumulative fields for the current cycle in the + # set of all such obs days over all cycles. + output_times_all_cycles_cumul \ + = output_times_all_cycles_cumul | set(output_times_crnt_cycle_cumul) + + # Create a list of cumulative field obs days (i.e. days on which + # observations of cumulative fields are needed for verification) for + # the current cycle. We do this by dropping the hour-of-day from each + # element of the list of forecast output times and keeping only unique + # elements. Note, however, that before dropping the hour-of-day from + # the list of forecast output times, we remove the last forecast output + # time if it happens to be the 0th hour of a day. This is because in + # the scripts/tasks that get observations of cumulative fields, the + # zeroth hour of a day is considered part of the previous day (because + # it represents accumulation that occurred on the previous day). + tmp = output_times_crnt_cycle_cumul + last_output_time_cumul = output_times_crnt_cycle_cumul[-1] + if last_output_time_cumul.hour == 0: + tmp.pop() + tmp = [datetime_obj.date() for datetime_obj in tmp] + obs_days_crnt_cycl_cumul = sorted(set(tmp)) + # Include the obs days for cumulative fields for the current cycle in the + # set of all such obs days over all cycles. + obs_days_all_cycles_cumul = obs_days_all_cycles_cumul | set(obs_days_crnt_cycl_cumul) + + # Convert the set of output times of instantaneous fields over all cycles + # to a sorted list of strings of the form 'YYYYMMDDHH'. + output_times_all_cycles_inst = sorted(output_times_all_cycles_inst) + output_times_all_cycles_inst = [datetime.strftime(output_times_all_cycles_inst[i], "%Y%m%d%H") + for i in range(len(output_times_all_cycles_inst))] + + # Convert the set of obs days for instantaneous fields over all cycles + # to a sorted list of strings of the form 'YYYYMMDD'. + obs_days_all_cycles_inst = sorted(obs_days_all_cycles_inst) + obs_days_all_cycles_inst = [datetime.strftime(obs_days_all_cycles_inst[i], "%Y%m%d") + for i in range(len(obs_days_all_cycles_inst))] + + # Convert the set of output times of cumulative fields over all cycles to + # a sorted list of strings of the form 'YYYYMMDDHH'. + output_times_all_cycles_cumul = sorted(output_times_all_cycles_cumul) + output_times_all_cycles_cumul = [datetime.strftime(output_times_all_cycles_cumul[i], "%Y%m%d%H") + for i in range(len(output_times_all_cycles_cumul))] + + # Convert the set of obs days for cumulative fields over all cycles to a + # sorted list of strings of the form 'YYYYMMDD'. + obs_days_all_cycles_cumul = sorted(obs_days_all_cycles_cumul) + obs_days_all_cycles_cumul = [datetime.strftime(obs_days_all_cycles_cumul[i], "%Y%m%d") + for i in range(len(obs_days_all_cycles_cumul))] + + return output_times_all_cycles_inst, obs_days_all_cycles_inst, \ + output_times_all_cycles_cumul, obs_days_all_cycles_cumul + + +def set_cycledefs_for_obs_days(obs_days_all_cycles): + """Given a list of days on which obs are needed, this function generates a + list of ROCOTO-style cycledef strings that together span the days (over + all cycles of an SRW App experiment) on which obs are needed. The input + list of days must be increasing in time, but the days do not have to be + consecutive, i.e. there may be gaps between days that are greater than + one day. + + Each cycledef string in the output list represents a set of consecutive + days in the input string (when used inside a tag in a ROCOTO + XML). Thus, when the cycledef strings in the output string are all + assigned to the same cycledef group in a ROCOTO XML, that group will + represent all the days on which observations are needed. + + Args: + obs_days_all_cycles: + A list of strings of the form 'YYYYMMDD', with each string representing + a day on which observations are needed. Note that the list must be + sorted, i.e. the days must be increasing in time, but there may be + gaps between days. + + Returns: + cycledef_all_obs_days: + A list of strings, with each string being a ROCOTO-style cycledef of + the form + + '{yyyymmdd_start}0000 {yyyymmdd_end}0000 24:00:00' + + where {yyyymmdd_start} is the starting day of the first cycle in the + cycledef, and {yyyymmdd_end} is the starting day of the last cycle (note + that the minutes and hours in these cycledef stirngs are always set to + '00'). Thus, one of the elements of the output list may be as follows: + + '202404290000 202405010000 24:00:00' + """ + + # To enable arithmetic with dates, convert input sting list of observation + # days (i.e. days on which observations are needed) over all cycles to a + # list of datetime objects. + tmp = [datetime.strptime(yyyymmdd, "%Y%m%d") for yyyymmdd in obs_days_all_cycles] + + # Initialize the variable that in the loop below contains the date of + # the previous day. This is just the first element of the list of + # datetime objects constructed above. Then use it to initialize the + # list (contin_obs_day_lists) that will contain lists of consecutive + # observation days. Thus, after its construction is complete, each + # element of contin_obs_day_lists will itself be a list containing + # datetime objects that are 24 hours apart. + day_prev = tmp[0] + contin_obs_day_lists = list() + contin_obs_day_lists.append([day_prev]) + + # Remove the first element of the list of obs days since it has already + # been used initiliaze contin_obs_day_lists. + tmp.pop(0) + + # Loop over the remaining list of obs days and construct the list of + # lists of consecutive obs days. + one_day = timedelta(days=1) + for day_crnt in tmp: + # If the current obs day comes 24 hours after the previous obs day, i.e. + # if it is the next day of the previous obs day, append it to the last + # existing list in contin_obs_day_lists. + if day_crnt == day_prev + one_day: + contin_obs_day_lists[-1].append(day_crnt) + # If the current obs day is NOT the next day of the previous obs day, + # append a new element to contin_obs_day_lists and initialize it as a + # list containing a single element -- the current obs day. + else: + contin_obs_day_lists.append([day_crnt]) + # Update the value of the previous day in preparation for the next + # iteration of the loop. + day_prev = day_crnt + + # Use the list of lists of consecutive obs days to construct a list of + # ROCOTO-style cycledef strings that each represent a set of consecutive + # obs days when included in a tag in a ROCOTO XML. Each + # string in this new list corresponds to a series of consecutive days on + # which observations are needed (where by "consecutive" we mean no days + # are skipped), and there is at least a one day gap between each such + # series. These cycledefs together represent all the days (i.e. over all + # cycles of the experiment) on which observations are needed. + cycledef_all_obs_days = list() + for contin_obs_day_list in contin_obs_day_lists: + cycledef_start = contin_obs_day_list[0].strftime('%Y%m%d%H%M') + cycledef_end = contin_obs_day_list[-1].strftime('%Y%m%d%H%M') + cycledef_all_obs_days.append(' '.join([cycledef_start, cycledef_end, '24:00:00'])) + + return cycledef_all_obs_days diff --git a/ush/setup.py b/ush/setup.py index 0511653fa2..b6f4256fc4 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -10,6 +10,7 @@ from textwrap import dedent import yaml +from pprint import pprint from python_utils import ( log_info, @@ -37,7 +38,9 @@ load_xml_file, ) -from set_cycle_dates import set_cycle_dates +from set_cycle_dates import \ + set_cycle_dates, set_fcst_output_times_and_obs_days_all_cycles, \ + set_cycledefs_for_obs_days from set_predef_grid_params import set_predef_grid_params from set_gridparams_ESGgrid import set_gridparams_ESGgrid from set_gridparams_GFDLgrid import set_gridparams_GFDLgrid @@ -754,6 +757,43 @@ def get_location(xcs, fmt, expt_cfg): date_last_cycl = workflow_config.get("DATE_LAST_CYCL") incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) + # Set the forecast output interval. Ideally, this should be obtained + # from the SRW App's configuration file, but such a variable doesn't + # yet exist in that file. + fcst_output_intvl_hrs = 1 + + # To enable arithmetic with dates and times, convert various time + # intervals from integer to datetime.timedelta objects. + cycl_intvl = datetime.timedelta(days=0, hours=incr_cycl_freq, minutes=0, seconds=0) + fcst_len = datetime.timedelta(days=0, hours=fcst_len_hrs, minutes=0, seconds=0) + fcst_output_intvl = datetime.timedelta(days=0, hours=fcst_output_intvl_hrs, minutes=0, seconds=0) + + # Generate a list of forecast output times and a list of obs days (i.e. + # days on which observations are needed to perform verification) over all + # cycles, both for instantaneous fields (e.g. T2m, REFC, RETOP) and for + # cumulative ones (e.g. APCP). + output_times_all_cycles_inst, obs_days_all_cycles_inst, \ + output_times_all_cycles_cumul, obs_days_all_cycles_cumul \ + = set_fcst_output_times_and_obs_days_all_cycles( \ + date_first_cycl, date_last_cycl, cycl_intvl, fcst_len, fcst_output_intvl) + + # Add the list generated above to the dictionary containing workflow + # configuration variables. These will be needed in generating the ROCOTO + # XML. + workflow_config['OUTPUT_TIMES_ALL_CYCLES_INST'] = output_times_all_cycles_inst + workflow_config['OBS_DAYS_ALL_CYCLES_INST'] = obs_days_all_cycles_inst + workflow_config['OUTPUT_TIMES_ALL_CYCLES_CUMUL'] = output_times_all_cycles_cumul + workflow_config['OBS_DAYS_ALL_CYCLES_CUMUL'] = obs_days_all_cycles_cumul + + # Generate lists of ROCOTO cycledef strings corresonding to the obs days + # for instantaneous fields and those for cumulative ones. + cycledef_obs_days_inst = set_cycledefs_for_obs_days(obs_days_all_cycles_inst) + cycledef_obs_days_cumul = set_cycledefs_for_obs_days(obs_days_all_cycles_cumul) + # Save the lists of cycledefs in the dictionary containing values needed + # to construct the ROCOTO XML. + rocoto_config['cycledefs']['cycledef_obs_days_inst'] = cycledef_obs_days_inst + rocoto_config['cycledefs']['cycledef_obs_days_cumul'] = cycledef_obs_days_cumul + # set varying forecast lengths only when fcst_len_hrs=-1 if fcst_len_hrs == -1: fcst_len_cycl = workflow_config.get("FCST_LEN_CYCL") @@ -765,7 +805,7 @@ def get_location(xcs, fmt, expt_cfg): num_cycles = len(set_cycle_dates( date_first_cycl, date_last_cycl, - incr_cycl_freq)) + cycl_incr)) if num_cycles != len(fcst_len_cycl): logger.error(f""" The number of entries in FCST_LEN_CYCL does From 7bb8b1fefdc0b96c20e93c1e8c3551e89bc8c05d Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 5 Sep 2024 14:06:02 -0600 Subject: [PATCH 045/208] Fix bug in the way the start time of the second cycle is calculated. --- parm/wflow/default_workflow.yaml | 8 +++++++- ush/setup.py | 13 +++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/parm/wflow/default_workflow.yaml b/parm/wflow/default_workflow.yaml index c79415b3be..d7198cf524 100644 --- a/parm/wflow/default_workflow.yaml +++ b/parm/wflow/default_workflow.yaml @@ -51,7 +51,13 @@ rocoto: forecast: - !startstopfreq ['{{workflow.DATE_FIRST_CYCL}}', '{{workflow.DATE_LAST_CYCL}}', '{{workflow.INCR_CYCL_FREQ}}'] cycled_from_second: - - !startstopfreq ['{%- if workflow.DATE_FIRST_CYCL != workflow.DATE_LAST_CYCL %}{{ [workflow.DATE_FIRST_CYCL[0:8], "{:02d}".format(workflow.INCR_CYCL_FREQ)]|join }}{%- else %}{{workflow.DATE_FIRST_CYCL}}{%- endif %}', '{{workflow.DATE_LAST_CYCL}}', '{{workflow.INCR_CYCL_FREQ}}'] + - !startstopfreq ['{%- if workflow.DATE_FIRST_CYCL != workflow.DATE_LAST_CYCL %} + {{- workflow.DATE_SECOND_CYCL }} + {%- else %} + {{- workflow.DATE_FIRST_CYCL }} + {%- endif %}', + '{{ workflow.DATE_LAST_CYCL }}', + '{{ workflow.INCR_CYCL_FREQ }}'] log: !cycstr '&LOGDIR;/FV3LAM_wflow.{% if user.RUN_ENVIR == "nco" %}{{ workflow.WORKFLOW_ID + "." }}{% endif %}log' tasks: taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml", "parm/wflow/test.yaml"]|include }}' diff --git a/ush/setup.py b/ush/setup.py index b6f4256fc4..4b56294aaa 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -794,6 +794,19 @@ def get_location(xcs, fmt, expt_cfg): rocoto_config['cycledefs']['cycledef_obs_days_inst'] = cycledef_obs_days_inst rocoto_config['cycledefs']['cycledef_obs_days_cumul'] = cycledef_obs_days_cumul + # The "cycled_from_second" cycledef in the default workflow configuration + # file (default_workflow.yaml) requires the starting date of the second + # cycle. That is difficult to calculate in the yaml file itself because + # currently, there are no utilities to perform arithmetic with dates. + # Thus, we calculate it here and save it as a variable in the workflow + # configuration dictionary. Note that correct functioning of the default + # workflow yaml file also requires that DATE_[FIRST|SECOND|LAST]_CYCL all + # be strings, not datetime objects. We perform those conversions here. + date_second_cycl = date_first_cycl + cycl_intvl + workflow_config['DATE_FIRST_CYCL'] = datetime.datetime.strftime(date_first_cycl, "%Y%m%d%H") + workflow_config['DATE_SECOND_CYCL'] = datetime.datetime.strftime(date_second_cycl, "%Y%m%d%H") + workflow_config['DATE_LAST_CYCL'] = datetime.datetime.strftime(date_last_cycl, "%Y%m%d%H") + # set varying forecast lengths only when fcst_len_hrs=-1 if fcst_len_hrs == -1: fcst_len_cycl = workflow_config.get("FCST_LEN_CYCL") From ef6aafe6c1e2f02407bf417b4faa9a4f559d1962 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 5 Sep 2024 14:07:24 -0600 Subject: [PATCH 046/208] Rename script for clarity. --- ush/{set_cycle_dates.py => set_cycle_and_obs_timeinfo.py} | 0 ush/setup.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename ush/{set_cycle_dates.py => set_cycle_and_obs_timeinfo.py} (100%) diff --git a/ush/set_cycle_dates.py b/ush/set_cycle_and_obs_timeinfo.py similarity index 100% rename from ush/set_cycle_dates.py rename to ush/set_cycle_and_obs_timeinfo.py diff --git a/ush/setup.py b/ush/setup.py index 4b56294aaa..9ecc5e402b 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -38,7 +38,7 @@ load_xml_file, ) -from set_cycle_dates import \ +from set_cycle_and_obs_timeinfo import \ set_cycle_dates, set_fcst_output_times_and_obs_days_all_cycles, \ set_cycledefs_for_obs_days from set_predef_grid_params import set_predef_grid_params From 99246f2e5d08d6e837f0043eeb8d2f418072ff2c Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 8 Sep 2024 05:54:05 -0600 Subject: [PATCH 047/208] Allow use of dots in WE2E test names. --- tests/WE2E/run_WE2E_tests.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/WE2E/run_WE2E_tests.py b/tests/WE2E/run_WE2E_tests.py index 5d4bd81105..d3c2cb98ab 100755 --- a/tests/WE2E/run_WE2E_tests.py +++ b/tests/WE2E/run_WE2E_tests.py @@ -141,7 +141,6 @@ def run_we2e_tests(homedir, args) -> None: pretty_list = "\n".join(str(x) for x in tests_to_run) logging.info(f'Will run {len(tests_to_run)} tests:\n{pretty_list}') - config_default_file = os.path.join(ushdir,'config_defaults.yaml') logging.debug(f"Loading config defaults file {config_default_file}") config_defaults = load_config_file(config_default_file) @@ -159,7 +158,13 @@ def run_we2e_tests(homedir, args) -> None: # test-specific options, then write resulting complete config.yaml starttime = datetime.now() starttime_string = starttime.strftime("%Y%m%d%H%M%S") - test_name = os.path.basename(test).split('.')[1] + test_fn = os.path.basename(test) + # Set the test name to all characters between the initial "config." and + # the final ".yaml" in the file name. This will allow any characters to + # be used as part of the test name, in particular a ".". + prefix = 'config.' + suffix = '.yaml' + test_name = test_fn[test_fn.find(prefix)+len(prefix):test_fn.rfind(suffix)] logging.debug(f"For test {test_name}, constructing config.yaml") test_cfg = load_config_file(test) From f1d2c29261a8a9978757ae9a46edcdeda7e04dee Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 8 Sep 2024 05:54:57 -0600 Subject: [PATCH 048/208] Remove extraneous "proc" subdirectory in default obs directories. --- ush/config_defaults.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 9ac5bc39f8..6486388cad 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -317,10 +317,10 @@ platform: # #----------------------------------------------------------------------- # - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa/proc" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc/proc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms/proc" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas/proc" + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" # #----------------------------------------------------------------------- # From 5945b027f9bf65c30945d56a2b2237be036b72d2 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 8 Sep 2024 05:56:08 -0600 Subject: [PATCH 049/208] Comment out "set -x". --- ush/get_obs_ccpa.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index ef1d55eb05..0f3dd248ea 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -11,7 +11,7 @@ source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} set -u -set -x +#set -x # #----------------------------------------------------------------------- # From b20f2c9c617c0ff26567ec45b97df194e3ef9a29 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 8 Sep 2024 17:54:26 -0600 Subject: [PATCH 050/208] Add WE2E test configuration files for getting obs, doing vx pre-processing, and deterministic vx. --- ...7hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml | 63 ++++++++++++++++++ ...1hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml | 65 +++++++++++++++++++ ...24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml | 63 ++++++++++++++++++ ...24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml | 65 +++++++++++++++++++ ...24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 64 ++++++++++++++++++ ...24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml | 63 ++++++++++++++++++ ...96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 65 +++++++++++++++++++ 7 files changed, 448 insertions(+) create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml new file mode 100644 index 0000000000..8b840a8ea8 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml @@ -0,0 +1,63 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from NCEP's operational version of the HRRR. + + This test is for the scenario in which there are multiple, short (i.e. + shorter than 24hr), overlapping forecasts in a day. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 7 + DATE_FIRST_CYCL: '2024042902' + DATE_LAST_CYCL: '2024043006' + FCST_LEN_HRS: 9 + +platform: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'hrrr_ncep' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + VX_FCST_MODEL_NAME: 'hrrr_ncep' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml new file mode 100644 index 0000000000..18558e0d95 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml @@ -0,0 +1,65 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from NCEP's operational version of the HRRR. + + This test is for the scenario in which there are multiple, short (i.e. + shorter than 24hr), NON-overlapping forecasts in a day with multi-hour + (but < 24hr) gaps between the end of one forecast and the start of the + next. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 11 + DATE_FIRST_CYCL: '2024042902' + DATE_LAST_CYCL: '2024043022' + FCST_LEN_HRS: 3 + +platform: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'hrrr_ncep' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + VX_FCST_MODEL_NAME: 'hrrr_ncep' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml new file mode 100644 index 0000000000..5d6929cd4a --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml @@ -0,0 +1,63 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from NCEP's operational version of the HRRR. + + This test is for the scenario in which the first obs needed is at 00z. It + tests the special treatment needed for obtaining CCPA and NDAS obs at 00z. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042900' + DATE_LAST_CYCL: '2024043000' + FCST_LEN_HRS: 3 + +platform: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'hrrr_ncep' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + VX_FCST_MODEL_NAME: 'hrrr_ncep' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml new file mode 100644 index 0000000000..7ec2264509 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml @@ -0,0 +1,65 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from one of NSSL's MPAS prototypes submitted + to the 2024 HWT Spring Forecast Experiment. + + This test is for the scenario in which forecasts do not include 00z. It + is the simplest case of obtaining CCPA and NDAS obs because it avoids + testing the special treatment needed at 00z for these obs types. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042912' + DATE_LAST_CYCL: '2024043012' + FCST_LEN_HRS: 3 + +platform: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'mpashn4nssl' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml new file mode 100644 index 0000000000..29427201e7 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -0,0 +1,64 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from one of NSSL's MPAS prototypes submitted + to the 2024 HWT Spring Forecast Experiment. + + This test is for the scenario in which there are multiple, long (i.e. + longer than 24hr) overlapping forecasts. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042912' + DATE_LAST_CYCL: '2024050212' + FCST_LEN_HRS: 48 + +platform: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'mpashn4nssl' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml new file mode 100644 index 0000000000..508d14c7fa --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml @@ -0,0 +1,63 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from NCEP's operational version of the HRRR. + + This test is for the scenario in which the last obs needed is at 00z. It + tests the special treatment needed for obtaining CCPA and NDAS obs at 00z. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042921' + DATE_LAST_CYCL: '2024043021' + FCST_LEN_HRS: 3 + +platform: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'hrrr_ncep' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + VX_FCST_MODEL_NAME: 'hrrr_ncep' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml new file mode 100644 index 0000000000..c838e8581d --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -0,0 +1,65 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from one of NSSL's MPAS prototypes submitted + to the 2024 HWT Spring Forecast Experiment. + + This test is for the scenario in which there are multiple, long (i.e. + longer than 24hr) NON-overlapping forecasts with multi-day gaps between + the end of one forecast and the start of the next. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 96 + DATE_FIRST_CYCL: '2024042912' + DATE_LAST_CYCL: '2024051112' + FCST_LEN_HRS: 48 + +platform: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'mpashn4nssl' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' From e3b1f6fdf41831aa628c3dcf046f90c4b6e9db67 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 8 Sep 2024 18:02:11 -0600 Subject: [PATCH 051/208] Rename files. --- ...multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml} | 0 ...multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml} | 0 ....multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml} | 0 ....multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml} | 0 ....multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml} | 0 ....multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml} | 0 ....multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml} | 0 7 files changed, 0 insertions(+), 0 deletions(-) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml => config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml => config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml => config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml => config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml => config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml => config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml => config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml} (100%) diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml rename to tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml rename to tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml rename to tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml rename to tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml rename to tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml rename to tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml rename to tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml From 2725832d80b1c6f72642c5b594022a1dd8d378cd Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 9 Sep 2024 13:34:21 -0600 Subject: [PATCH 052/208] Bug fixes after merging in develop. --- parm/wflow/verify_pre.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index a237246eb7..6c1b8ff2b5 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -26,7 +26,7 @@ task_get_obs_ccpa: attrs: cycledefs: cycledef_obs_days_cumul maxtries: '1' - command: '&LOAD_MODULES_RUN_TASK_FP; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' + command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars OBS_DIR: '&CCPA_OBS_DIR;' @@ -75,7 +75,7 @@ task_get_obs_mrms: task_get_obs_ndas: <<: *default_task_verify_pre - command: '&LOAD_MODULES_RUN_TASK_FP; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' + command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' From c38e9816ac1a7890f481dd6cc8904a52ff1bb962 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 9 Sep 2024 15:47:06 -0600 Subject: [PATCH 053/208] Bug fixes to use new yaml version of var_defns.sh. --- scripts/exregional_get_verif_obs.sh | 2 +- ush/get_obs_ccpa.sh | 4 +++- ush/get_obs_mrms.sh | 4 +++- ush/get_obs_ndas.sh | 4 +++- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index b971ab3615..fbd751718f 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -31,7 +31,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user nco ; do +for sect in user workflow nco ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done # diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index 0f3dd248ea..b17dda97e0 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -8,7 +8,9 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} +for sect in user ; do + source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} +done set -u #set -x diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index 92fc24fa56..a7a47d3f98 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -8,7 +8,9 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} +for sect in user ; do + source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} +done set -u #set -x diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index 441de7b31d..f4cddfe19a 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -8,7 +8,9 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} +for sect in user ; do + source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} +done set -u #set -x From d8cb3a06e17afe2a42fae78703b2197edc9b168e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 9 Sep 2024 18:12:11 -0600 Subject: [PATCH 054/208] Bug fix to remove crontab line once experiment is complete (regardless of whether it succeeded or failed). --- ush/get_crontab_contents.py | 2 +- ush/launch_FV3LAM_wflow.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ush/get_crontab_contents.py b/ush/get_crontab_contents.py index 6b0548141c..fbdf80dae9 100644 --- a/ush/get_crontab_contents.py +++ b/ush/get_crontab_contents.py @@ -162,7 +162,7 @@ def delete_crontab_line(called_from_cron, machine, crontab_line, debug): crontab_contents = crontab_contents.replace(crontab_line + "\n", "") crontab_contents = crontab_contents.replace(crontab_line, "") else: - print(f"\nWARNING: line not found in crontab, nothing to remove:\n {crontab_line}\n") + print(f"\nWARNING: line not found in crontab, nothing to remove:\n{crontab_line}\n") run_command(f"""echo '{crontab_contents}' | {crontab_cmd}""") diff --git a/ush/launch_FV3LAM_wflow.sh b/ush/launch_FV3LAM_wflow.sh index 7c26511f4f..7a4a16e4b5 100644 --- a/ush/launch_FV3LAM_wflow.sh +++ b/ush/launch_FV3LAM_wflow.sh @@ -353,9 +353,9 @@ script for this experiment: # Remove CRONTAB_LINE from cron table # if [ "${called_from_cron}" = "TRUE" ]; then - python3 $USHdir/get_crontab_contents.py --remove -m=${machine} -l="${CRONTAB_LINE}" -c -d + python3 $USHdir/get_crontab_contents.py --remove -m=${machine} -l='${CRONTAB_LINE}' -c -d else - python3 $USHdir/get_crontab_contents.py --remove -m=${machine} -l="${CRONTAB_LINE}" -d + python3 $USHdir/get_crontab_contents.py --remove -m=${machine} -l='${CRONTAB_LINE}' -d fi fi # From f7b77fd8a383b49dd6895a603af2b829e67b404b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 9 Sep 2024 23:27:05 -0600 Subject: [PATCH 055/208] Use SRW's standard way of doing boolean comparisons in bash. --- ush/get_obs_ccpa.sh | 6 +++--- ush/get_obs_mrms.sh | 4 ++-- ush/get_obs_ndas.sh | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index b17dda97e0..21f85e2ff4 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -325,7 +325,7 @@ mv_or_cp="cp" # If the raw directories and files are to be removed at the end of this # script, no need to copy the files since the raw directories are going # to be removed anyway. -if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then +if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then mv_or_cp="mv" fi @@ -368,7 +368,7 @@ arcv_hr = ${arcv_hr}" fi done - if [[ ${do_retrieve} == "TRUE" ]]; then + if [[ $(boolify "${do_retrieve}") == "TRUE" ]]; then # Make sure the raw quarter-daily directory exists because it is used # below as the output directory of the retrieve_data.py script (so if @@ -454,7 +454,7 @@ done # #----------------------------------------------------------------------- # -if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then +if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then print_info_msg "Removing raw directories and files..." rm -rf ${basedir_raw} || print_err_msg_exit "\ Failed to remove raw directories and files." diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index a7a47d3f98..d82ba842c0 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -175,7 +175,7 @@ mv_or_cp="cp" # If the raw directories and files are to be removed at the end of this # script, no need to copy the files since the raw directories are going # to be removed anyway. -if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then +if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then mv_or_cp="mv" fi @@ -260,7 +260,7 @@ done # #----------------------------------------------------------------------- # -if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then +if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then print_info_msg "Removing raw directories and files..." rm -rf ${mrms_basedir_raw} || print_err_msg_exit "\ Failed to remove raw directories and files." diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index f4cddfe19a..b65e3173f1 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -185,7 +185,7 @@ mv_or_cp="cp" # If the raw directories and files are to be removed at the end of this # script, no need to copy the files since the raw directories are going # to be removed anyway. -if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then +if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then mv_or_cp="mv" fi @@ -228,7 +228,7 @@ arcv_hr = ${arcv_hr}" fi done - if [[ ${do_retrieve} == "TRUE" ]]; then + if [[ $(boolify "${do_retrieve}") == "TRUE" ]]; then # Make sure the raw quarter-daily directory exists because it is used # below as the output directory of the retrieve_data.py script (so if @@ -305,7 +305,7 @@ done # #----------------------------------------------------------------------- # -if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then +if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then print_info_msg "Removing raw directories and files..." rm -rf ${basedir_raw} || print_err_msg_exit "\ Failed to remove raw directories and files." From 6425b822ea8b26642d48c07ab6750c8b9d1c8b78 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 10 Sep 2024 01:21:13 -0600 Subject: [PATCH 056/208] Make script more compact. --- scripts/exregional_get_verif_obs.sh | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index fbd751718f..4e981b3958 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -135,21 +135,17 @@ done # #----------------------------------------------------------------------- # -if [[ ${OBTYPE} == "CCPA" ]]; then - $USHdir/get_obs_ccpa.sh -elif [[ ${OBTYPE} == "MRMS" ]]; then - $USHdir/get_obs_mrms.sh -elif [[ ${OBTYPE} == "NDAS" ]]; then - $USHdir/get_obs_ndas.sh -elif [[ ${OBTYPE} == "NOHRSC" ]]; then - $USHdir/get_obs_nohrsc.sh -else +valid_obtypes=("CCPA" "MRMS" "NDAS" "NOHRSC") +if [[ ! ${valid_obtypes[@]} =~ ${OBTYPE} ]]; then print_err_msg_exit "\ -Invalid OBTYPE specified for script: +Invalid observation type (OBTYPE) specified for script: OBTYPE = \"${OBTYPE}\" -Valid options are CCPA, MRMS, NDAS, and NOHRSC. +Valid observation types are: + $(printf "\"%s\" " ${valid_obtypes[@]}) " fi +script_bn="get_obs_$(echo_lowercase ${OBTYPE})" +$USHdir/${script_bn}.sh # #----------------------------------------------------------------------- # @@ -158,9 +154,8 @@ fi # #----------------------------------------------------------------------- # -obtype=$(echo_lowercase ${OBTYPE}) mkdir -p ${WFLOW_FLAG_FILES_DIR} -touch "${WFLOW_FLAG_FILES_DIR}/get_obs_${obtype}_${PDY}_complete.txt" +touch "${WFLOW_FLAG_FILES_DIR}/${script_bn}_${PDY}_complete.txt" # #----------------------------------------------------------------------- # From 24c8c5912e3b6416bfff3623c55445fd87a30fa8 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 10 Sep 2024 01:25:32 -0600 Subject: [PATCH 057/208] Bug fixes. --- parm/wflow/verify_pre.yaml | 8 ++++---- ush/get_obs_mrms.sh | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 6c1b8ff2b5..c6e33e4565 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -35,7 +35,7 @@ task_get_obs_ccpa: '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL|length)) %} {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL[i] }} {%- endfor %}' - REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_CCPA' + REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_CCPA }}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -49,7 +49,7 @@ task_get_obs_nohrsc: OBS_DIR: '&NOHRSC_OBS_DIR;' OBTYPE: 'NOHRSC' OUTPUT_TIMES_ALL: *output_times_all_cumul - REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_NOHRSC' + REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_NOHRSC }}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -67,7 +67,7 @@ task_get_obs_mrms: '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_INST|length)) %} {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_INST[i] }} {%- endfor %}' - REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_MRMS' + REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_MRMS }}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -81,7 +81,7 @@ task_get_obs_ndas: OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' OUTPUT_TIMES_ALL: *output_times_all_inst - REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_NDAS' + REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_NDAS }}' queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index d82ba842c0..377ffb25c0 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -262,6 +262,6 @@ done # if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then print_info_msg "Removing raw directories and files..." - rm -rf ${mrms_basedir_raw} || print_err_msg_exit "\ + rm -rf ${basedir_raw} || print_err_msg_exit "\ Failed to remove raw directories and files." fi From 2622f8b47809449ff797a80f5cdb883b406bd2ee Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 10 Sep 2024 10:27:39 -0600 Subject: [PATCH 058/208] Get REMOVE_RAW_OBS... variables directly from var_defns.yaml file instead of setting it as an enviroment variable in the rocoto xml. This makes it possible to change their settings by chaning the values in var_defns.yaml and rerunning the get_obs_... tasks. --- parm/wflow/verify_pre.yaml | 4 ---- ush/get_obs_ccpa.sh | 15 +++++++++------ ush/get_obs_mrms.sh | 15 +++++++++------ ush/get_obs_ndas.sh | 15 +++++++++------ 4 files changed, 27 insertions(+), 22 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index c6e33e4565..4a9e750c56 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -35,7 +35,6 @@ task_get_obs_ccpa: '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL|length)) %} {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL[i] }} {%- endfor %}' - REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_CCPA }}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -49,7 +48,6 @@ task_get_obs_nohrsc: OBS_DIR: '&NOHRSC_OBS_DIR;' OBTYPE: 'NOHRSC' OUTPUT_TIMES_ALL: *output_times_all_cumul - REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_NOHRSC }}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -67,7 +65,6 @@ task_get_obs_mrms: '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_INST|length)) %} {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_INST[i] }} {%- endfor %}' - REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_MRMS }}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -81,7 +78,6 @@ task_get_obs_ndas: OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' OUTPUT_TIMES_ALL: *output_times_all_inst - REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_NDAS }}' queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index 21f85e2ff4..6d217c43c2 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -8,7 +8,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user ; do +for sect in user platform ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -322,10 +322,13 @@ fi # Whether to move or copy files from raw to processed directories. #mv_or_cp="mv" mv_or_cp="cp" +# Whether to remove raw observations after processed directories have +# been created from them. +remove_raw_obs="${REMOVE_RAW_OBS_CCPA}" # If the raw directories and files are to be removed at the end of this # script, no need to copy the files since the raw directories are going # to be removed anyway. -if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then mv_or_cp="mv" fi @@ -450,12 +453,12 @@ done # #----------------------------------------------------------------------- # -# Clean up raw directories. +# Clean up raw obs directories. # #----------------------------------------------------------------------- # -if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then - print_info_msg "Removing raw directories and files..." +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then + print_info_msg "Removing raw obs directories..." rm -rf ${basedir_raw} || print_err_msg_exit "\ -Failed to remove raw directories and files." +Failed to remove raw obs directories." fi diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index 377ffb25c0..4fbe503b25 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -8,7 +8,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user ; do +for sect in user platform ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -172,10 +172,13 @@ fi # Whether to move or copy files from raw to processed directories. #mv_or_cp="mv" mv_or_cp="cp" +# Whether to remove raw observations after processed directories have +# been created from them. +remove_raw_obs="${REMOVE_RAW_OBS_MRMS}" # If the raw directories and files are to be removed at the end of this # script, no need to copy the files since the raw directories are going # to be removed anyway. -if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then mv_or_cp="mv" fi @@ -256,12 +259,12 @@ done # #----------------------------------------------------------------------- # -# Clean up raw directories. +# Clean up raw obs directories. # #----------------------------------------------------------------------- # -if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then - print_info_msg "Removing raw directories and files..." +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then + print_info_msg "Removing raw obs directories..." rm -rf ${basedir_raw} || print_err_msg_exit "\ -Failed to remove raw directories and files." +Failed to remove raw obs directories." fi diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index b65e3173f1..23e0496e35 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -8,7 +8,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user ; do +for sect in user platform ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -182,10 +182,13 @@ fi # Whether to move or copy files from raw to processed directories. #mv_or_cp="mv" mv_or_cp="cp" +# Whether to remove raw observations after processed directories have +# been created from them. +remove_raw_obs="${REMOVE_RAW_OBS_NDAS}" # If the raw directories and files are to be removed at the end of this # script, no need to copy the files since the raw directories are going # to be removed anyway. -if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then mv_or_cp="mv" fi @@ -301,12 +304,12 @@ done # #----------------------------------------------------------------------- # -# Clean up raw directories. +# Clean up raw obs directories. # #----------------------------------------------------------------------- # -if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then - print_info_msg "Removing raw directories and files..." +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then + print_info_msg "Removing raw obs directories..." rm -rf ${basedir_raw} || print_err_msg_exit "\ -Failed to remove raw directories and files." +Failed to remove raw obs directories." fi From 2a379983419e636149656ac8d84f1fe10250a55e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 11 Sep 2024 16:48:05 -0600 Subject: [PATCH 059/208] Minor fixes to comments and output messages. --- ush/bash_utils/eval_METplus_timestr_tmpl.sh | 15 ++++++++------- ush/set_cycle_and_obs_timeinfo.py | 3 ++- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/ush/bash_utils/eval_METplus_timestr_tmpl.sh b/ush/bash_utils/eval_METplus_timestr_tmpl.sh index 572f7c68c4..c3e19c8935 100644 --- a/ush/bash_utils/eval_METplus_timestr_tmpl.sh +++ b/ush/bash_utils/eval_METplus_timestr_tmpl.sh @@ -92,7 +92,8 @@ function eval_METplus_timestr_tmpl() { # if [ -z "${METplus_timestr_tmpl}" ]; then print_err_msg_exit "\ -The specified METplus time string template (METplus_timestr_tmpl) cannot be empty: +The specified METplus time string template (METplus_timestr_tmpl) cannot +be empty: METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\"" fi @@ -100,21 +101,21 @@ The specified METplus time string template (METplus_timestr_tmpl) cannot be empt if [[ ${init_time} =~ ^[0-9]+$ ]]; then if [ "$len" -ne 10 ] && [ "$len" -ne 12 ] && [ "$len" -ne 14 ]; then print_err_msg_exit "\ -The specified initial time string (init_time) must contain exactly 10, -12, or 14 integers (but contains $len): +The specified initial time string (init_time) must contain 10, 12, or 14 +digits (but contains $len): init_time = \"${init_time}\"" fi else print_err_msg_exit "\ -The specified initial time string (init_time) must consist of only -integers and cannot be empty: +The specified initial time string (init_time) must consist of digits only +and cannot be empty: init_time = \"${init_time}\"" fi if ! [[ $fhr =~ ^[0-9]+$ ]]; then print_err_msg_exit "\ -The specified forecast hour (fhr) must consist of only integers and -cannot be empty: +The specified forecast hour (fhr) must consist of digits only and cannot +be empty: fhr = \"${fhr}\"" fi # diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index c3969d8ef3..36635b643e 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -49,7 +49,8 @@ def set_fcst_output_times_and_obs_days_all_cycles( observations are needed to perform verification (each element of the form 'YYYYMMDD'). The first pair of lists is for instantaneous output fields (e.g. REFC, RETOP, T2m), and the second pair is for cumulative ones (e.g. - APCP or accumulated precipitation). + APCP or accumulated precipitation). The accumulation period for the latter + is the forecast output interval. Args: start_time_first_cycl: From 5160d3ccedf78f36c849846f01d3961500ae4a4e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 12 Sep 2024 14:54:06 -0600 Subject: [PATCH 060/208] Fix typos. --- ush/get_obs_ccpa.sh | 2 +- ush/get_obs_mrms.sh | 2 +- ush/get_obs_ndas.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index 6d217c43c2..54537b9e97 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -174,7 +174,7 @@ yyyymmdd_task=${PDY} basedir_proc=${OBS_DIR} # The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a -# scalar string containing all relevant forecast output times (each) in +# scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array # from the XML to task's script. To have an array-valued variable to diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index 4fbe503b25..d4e32a4c54 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -86,7 +86,7 @@ yyyymmdd_task=${PDY} basedir_proc=${OBS_DIR} # The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a -# scalar string containing all relevant forecast output times (each) in +# scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array # from the XML to task's script. To have an array-valued variable to diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index 23e0496e35..b69d4cb1bc 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -60,7 +60,7 @@ yyyymmdd_task=${PDY} basedir_proc=${OBS_DIR} # The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a -# scalar string containing all relevant forecast output times (each) in +# scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array # from the XML to task's script. To have an array-valued variable to From f910f6fcb98ec8e0fe09d7eaeda036dfbf581c3b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 12 Sep 2024 14:56:54 -0600 Subject: [PATCH 061/208] Fix more typos. --- ush/get_obs_ccpa.sh | 2 +- ush/get_obs_mrms.sh | 2 +- ush/get_obs_ndas.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index 54537b9e97..1eee37339c 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -177,7 +177,7 @@ basedir_proc=${OBS_DIR} # scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to task's script. To have an array-valued variable to +# from the XML to the task's script. To have an array-valued variable to # work with, here, we create the new variable output_times_all that is # the array-valued counterpart of OUTPUT_TIMES_ALL. output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index d4e32a4c54..6d84141eab 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -89,7 +89,7 @@ basedir_proc=${OBS_DIR} # scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to task's script. To have an array-valued variable to +# from the XML to the task's script. To have an array-valued variable to # work with, here, we create the new variable output_times_all that is # the array-valued counterpart of OUTPUT_TIMES_ALL. output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index b69d4cb1bc..595ecc2688 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -63,7 +63,7 @@ basedir_proc=${OBS_DIR} # scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to task's script. To have an array-valued variable to +# from the XML to the task's script. To have an array-valued variable to # work with, here, we create the new variable output_times_all that is # the array-valued counterpart of OUTPUT_TIMES_ALL. output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) From aa7188b7897c8497be2817bb0e056358e932c9bc Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 13 Sep 2024 16:53:30 -0600 Subject: [PATCH 062/208] Fix up comments. --- ush/get_obs_ccpa.sh | 10 +++++----- ush/get_obs_mrms.sh | 2 +- ush/get_obs_ndas.sh | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index 1eee37339c..79ce882da3 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -235,15 +235,15 @@ fi # Sequence interval must be 6 hours because the archives are 6-hourly. arcv_hr_incr=6 -# Initial guess for starting archive hour. This is set to the hour -# corresponding to the first forecast output time of the day. +# Initial guess for starting archive hour. This is set to the archive +# hour containing obs at the first forecast output time of the day. hh_first=$(echo ${output_times_crnt_day[0]} | cut -c9-10) hr_first=$((10#${hh_first})) arcv_hr_start=$(ceil ${hr_first} ${arcv_hr_incr}) arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) -# Ending archive hour. This is set to the hour corresponding to the last -# forecast output time of the day. +# Ending archive hour. This is set to the archive hour containing obs at +# the last forecast output time of the day. hh_last=$(echo ${output_times_crnt_day[-1]} | cut -c9-10) hr_last=$((10#${hh_last})) if [[ ${hr_last} -eq 0 ]]; then @@ -255,7 +255,7 @@ fi # Check whether any obs files already exist on disk. If so, adjust the # starting archive hour. In the process, keep a count of the number of -# files that already exist on disk. +# obs files that already exist on disk. num_existing_files=0 for yyyymmddhh in ${output_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index 6d84141eab..6aac0159b3 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -116,7 +116,7 @@ fi # Check whether any obs files already exist on disk. If so, adjust the # starting archive hour. In the process, keep a count of the number of -# files that already exist on disk. +# obs files that already exist on disk. num_existing_files=0 num_mrms_fields=${#mrms_fields[@]} for (( i=0; i<${num_mrms_fields}; i++ )); do diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index 595ecc2688..2954552412 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -102,21 +102,21 @@ fi # Sequence interval must be 6 hours because the archives are 6-hourly. arcv_hr_incr=6 -# Initial guess for starting archive hour. This is set to the hour -# corresponding to the first forecast output time of the day. +# Initial guess for starting archive hour. This is set to the archive +# hour containing obs at the first forecast output time of the day. hh_first=$(echo ${output_times_crnt_day[0]} | cut -c9-10) hr_first=$((10#${hh_first})) arcv_hr_start=$(( (hr_first/arcv_hr_incr + 1)*arcv_hr_incr )) -# Ending archive hour. This is set to the hour corresponding to the last -# forecast output time of the day. +# Ending archive hour. This is set to the archive hour containing obs at +# the last forecast output time of the day. hh_last=$(echo ${output_times_crnt_day[-1]} | cut -c9-10) hr_last=$((10#${hh_last})) arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) # Check whether any obs files already exist on disk. If so, adjust the # starting archive hour. In the process, keep a count of the number of -# files that already exist on disk. +# obs files that already exist on disk. num_existing_files=0 for yyyymmddhh in ${output_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) From 3c794f258b1e827ec0b484ab030f14220b20d6a8 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 16 Sep 2024 13:41:42 -0600 Subject: [PATCH 063/208] Add new function and clean up existing. --- ush/bash_utils/eval_METplus_timestr_tmpl.sh | 232 ++++++++++++++++---- 1 file changed, 194 insertions(+), 38 deletions(-) diff --git a/ush/bash_utils/eval_METplus_timestr_tmpl.sh b/ush/bash_utils/eval_METplus_timestr_tmpl.sh index c3e19c8935..ae4a1c7ebf 100644 --- a/ush/bash_utils/eval_METplus_timestr_tmpl.sh +++ b/ush/bash_utils/eval_METplus_timestr_tmpl.sh @@ -1,8 +1,9 @@ # #----------------------------------------------------------------------- # -# This file defines a function that evaluates a METplus time-string -# template. +# This function evaluates a METplus time-string template, i.e. a string +# (e.g. a file name template) containing one or more METplus time- +# formatting strings. # #----------------------------------------------------------------------- # @@ -49,7 +50,7 @@ function eval_METplus_timestr_tmpl() { "init_time" \ "fhr" \ "METplus_timestr_tmpl" \ - "outvarname_formatted_time" \ + "outvarname_evaluated_timestr" \ ) process_args valid_args "$@" # @@ -69,16 +70,171 @@ function eval_METplus_timestr_tmpl() { # #----------------------------------------------------------------------- # - local fmt \ - formatted_time \ + local crnt_timefmt \ + crnt_timefmt_esc \ + evaluated_timestr \ + regex_search_tmpl \ + the_time \ + tmpl_remainder +# +#----------------------------------------------------------------------- +# +# Loop over all METplus time-formatting strings in the given METplus +# template and evaluate each using the given initial time (init_time) and +# forecast hour (fhr). +# +# Note that the while-loop below is over all METplus time-formatting +# strings of the form {...} in the template METplus_timestr_tmpl; it +# continues until all such time-formatting strings have been evaluated +# to actual times. +# +#----------------------------------------------------------------------- +# +# Regular expression used by the sed utility below to pick out the next +# METplus time-formatting string in the given METplus time-string template. +# + regex_search_tmpl="(.*)(\{.*\})(.*)" +# +# Initialize while-loop variables. +# + evaluated_timestr="${METplus_timestr_tmpl}" + + crnt_timefmt=$( printf "%s" "${METplus_timestr_tmpl}" | \ + $SED -n -r -e "s|${regex_search_tmpl}|\2|p" ) + tmpl_remainder=$( printf "%s" "${METplus_timestr_tmpl}" | \ + $SED -n -r -e "s|${regex_search_tmpl}|\1\3|p" ) + + while [ ! -z "${crnt_timefmt}" ]; do + + eval_single_METplus_timefmt \ + init_time="${init_time}" \ + fhr="${fhr}" \ + METplus_timefmt="${crnt_timefmt}" \ + outvarname_evaluated_timefmt="the_time" +# +# Replace the next METplus time string in evaluated_timestr with an actual +# time. +# +# Note that when using sed, we need to escape various characters (question +# mark, closing and opening curly braces, etc) in the METplus template in +# order for the sed command below to work properly. +# + crnt_timefmt_esc=$( echo "${crnt_timefmt}" | \ + $SED -r -e "s/\?/\\\?/g" -e "s/\{/\\\{/g" -e "s/\}/\\\}/g" ) + evaluated_timestr=$( echo "${evaluated_timestr}" | \ + $SED -n -r "s|(.*)(${crnt_timefmt_esc})(.*)|\1${the_time}\3|p" ) +# +# Set up values for the next iteration of the while-loop. +# + crnt_timefmt=$( printf "%s" "${tmpl_remainder}" | \ + $SED -n -r -e "s|${regex_search_tmpl}|\2|p" ) + tmpl_remainder=$( printf "%s" "${tmpl_remainder}" | \ + $SED -n -r -e "s|${regex_search_tmpl}|\1\3|p" ) + + done +# +#----------------------------------------------------------------------- +# +# Set output variables. +# +#----------------------------------------------------------------------- +# + if [ ! -z "${outvarname_evaluated_timestr}" ]; then + printf -v ${outvarname_evaluated_timestr} "%s" "${evaluated_timestr}" + fi +# +#----------------------------------------------------------------------- +# +# Restore the shell options saved at the beginning of this script/function. +# +#----------------------------------------------------------------------- +# + { restore_shell_opts; } > /dev/null 2>&1 + +} + +# +#----------------------------------------------------------------------- +# +# This function uses the specified initial forecast time and forecast +# hour to evaluate a single METplus time-formatting string and return +# the corresponding time. +# +#----------------------------------------------------------------------- +# +function eval_single_METplus_timefmt() { +# +#----------------------------------------------------------------------- +# +# Save current shell options (in a global array). Then set new options +# for this script/function. +# +#----------------------------------------------------------------------- +# + { save_shell_opts; . ${USHdir}/preamble.sh; } > /dev/null 2>&1 +# +#----------------------------------------------------------------------- +# +# Get the full path to the file in which this script/function is located +# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in +# which the file is located (scrfunc_dir). +# +#----------------------------------------------------------------------- +# + local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) + local scrfunc_fn=$( basename "${scrfunc_fp}" ) + local scrfunc_dir=$( dirname "${scrfunc_fp}" ) +# +#----------------------------------------------------------------------- +# +# Get the name of this function. +# +#----------------------------------------------------------------------- +# + local func_name="${FUNCNAME[0]}" +# +#----------------------------------------------------------------------- +# +# Specify the set of valid argument names for this script/function. Then +# process the arguments provided to this script/function (which should +# consist of a set of name-value pairs of the form arg1="value1", etc). +# +#----------------------------------------------------------------------- +# + local valid_args=( \ + "init_time" \ + "fhr" \ + "METplus_timefmt" \ + "outvarname_evaluated_timefmt" \ + ) + process_args valid_args "$@" +# +#----------------------------------------------------------------------- +# +# For debugging purposes, print out values of arguments passed to this +# script. Note that these will be printed out only if VERBOSE is set to +# TRUE. +# +#----------------------------------------------------------------------- +# + print_input_args "valid_args" +# +#----------------------------------------------------------------------- +# +# Declare local variables. +# +#----------------------------------------------------------------------- +# + local evaluated_timefmt \ + fmt \ hh_init \ init_time_str \ lead_hrs \ len \ - mn_init \ - METplus_time_fmt \ + METplus_time_codes \ METplus_time_shift \ METplus_time_type \ + mn_init \ regex_search \ ss_init \ valid_time_str \ @@ -90,25 +246,25 @@ function eval_METplus_timestr_tmpl() { # #----------------------------------------------------------------------- # - if [ -z "${METplus_timestr_tmpl}" ]; then + if [ -z "${METplus_timefmt}" ]; then print_err_msg_exit "\ -The specified METplus time string template (METplus_timestr_tmpl) cannot -be empty: - METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\"" +The specified METplus time-formatting string (METplus_timefmt) cannot be +empty: + METplus_timefmt = \"${METplus_timefmt}\"" fi len=${#init_time} if [[ ${init_time} =~ ^[0-9]+$ ]]; then if [ "$len" -ne 10 ] && [ "$len" -ne 12 ] && [ "$len" -ne 14 ]; then print_err_msg_exit "\ -The specified initial time string (init_time) must contain 10, 12, or 14 -digits (but contains $len): +The specified initial time (init_time) must contain 10, 12, or 14 digits +but instead contains $len: init_time = \"${init_time}\"" fi else print_err_msg_exit "\ -The specified initial time string (init_time) must consist of digits only -and cannot be empty: +The specified initial time (init_time) must consist of digits only and +cannot be empty: init_time = \"${init_time}\"" fi @@ -150,11 +306,11 @@ be empty: # regex_search="^\{(init|valid|lead)(\?)(fmt=)([^\?]*)(\?)?(shift=)?([^\?]*)?\}" METplus_time_type=$( \ - printf "%s" "${METplus_timestr_tmpl}" | $SED -n -r -e "s/${regex_search}/\1/p" ) - METplus_time_fmt=$( \ - printf "%s" "${METplus_timestr_tmpl}" | $SED -n -r -e "s/${regex_search}/\4/p" ) + printf "%s" "${METplus_timefmt}" | $SED -n -r -e "s/${regex_search}/\1/p" ) + METplus_time_codes=$( \ + printf "%s" "${METplus_timefmt}" | $SED -n -r -e "s/${regex_search}/\4/p" ) METplus_time_shift=$( \ - printf "%s" "${METplus_timestr_tmpl}" | $SED -n -r -e "s/${regex_search}/\7/p" ) + printf "%s" "${METplus_timefmt}" | $SED -n -r -e "s/${regex_search}/\7/p" ) # #----------------------------------------------------------------------- # @@ -163,9 +319,9 @@ be empty: # #----------------------------------------------------------------------- # - case "${METplus_time_fmt}" in + case "${METplus_time_codes}" in "%Y%m%d%H"|"%Y%m%d"|"%H%M%S") - fmt="${METplus_time_fmt}" + fmt="${METplus_time_codes}" ;; "%H") # @@ -178,7 +334,7 @@ be empty: if [ "${METplus_time_type}" = "lead" ]; then fmt="%02.0f" else - fmt="${METplus_time_fmt}" + fmt="${METplus_time_codes}" fi ;; "%HHH") @@ -194,10 +350,10 @@ be empty: ;; *) print_err_msg_exit "\ -Unsupported METplus time format: - METplus_time_fmt = \"${METplus_time_fmt}\" -METplus time string template passed to this function is: - METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\"" +Unsupported METplus time codes: + METplus_time_codes = \"${METplus_time_codes}\" +METplus time-formatting string passed to this function is: + METplus_timefmt = \"${METplus_timefmt}\"" ;; esac # @@ -213,10 +369,10 @@ METplus time string template passed to this function is: # case "${METplus_time_type}" in "init") - formatted_time=$( ${DATE_UTIL} --date="${init_time_str} + ${time_shift_str}" +"${fmt}" ) + evaluated_timefmt=$( ${DATE_UTIL} --date="${init_time_str} + ${time_shift_str}" +"${fmt}" ) ;; "valid") - formatted_time=$( ${DATE_UTIL} --date="${valid_time_str} + ${time_shift_str}" +"${fmt}" ) + evaluated_timefmt=$( ${DATE_UTIL} --date="${valid_time_str} + ${time_shift_str}" +"${fmt}" ) ;; "lead") lead_secs=$(( $( ${DATE_UTIL} --date="${valid_time_str} + ${time_shift_str}" +"%s" ) \ @@ -240,23 +396,23 @@ The remainder (lead_hrs_rem) after dividing the lead_secs by SECS_PER_HOUR # # Get the lead in the proper format. # - formatted_time=$( printf "${fmt}" "${lead_hrs}" ) + evaluated_timefmt=$( printf "${fmt}" "${lead_hrs}" ) ;; *) print_err_msg_exit "\ Unsupported METplus time type: METplus_time_type = \"${METplus_time_type}\" -METplus time string template passed to this function is: - METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\"" +METplus time-formatting string passed to this function is: + METplus_timefmt = \"${METplus_timefmt}\"" ;; esac - if [ -z "${formatted_time}" ]; then + if [ -z "${evaluated_timefmt}" ]; then print_err_msg_exit "\ -The specified METplus time string template (METplus_timestr_tmpl) could -not be evaluated for the given initial time (init_time) and forecast -hour (fhr): - METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\" +The specified METplus time-formatting string (METplus_timefmt) could not +be evaluated for the given initial time (init_time) and forecast hour +(fhr): + METplus_timefmt = \"${METplus_timefmt}\" init_time = \"${init_time}\" fhr = \"${fhr}\"" fi @@ -267,8 +423,8 @@ hour (fhr): # #----------------------------------------------------------------------- # - if [ ! -z "${outvarname_formatted_time}" ]; then - printf -v ${outvarname_formatted_time} "%s" "${formatted_time}" + if [ ! -z "${outvarname_evaluated_timefmt}" ]; then + printf -v ${outvarname_evaluated_timefmt} "%s" "${evaluated_timefmt}" fi # #----------------------------------------------------------------------- From ef1cbbb6f97de5ea03709ff89a895f1908aa8325 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 16 Sep 2024 14:12:31 -0600 Subject: [PATCH 064/208] Change variable names and remove unneeded variable. --- ush/get_obs_ccpa.sh | 47 ++++++++++++++++++++++----------------------- ush/get_obs_mrms.sh | 26 ++++++++++++------------- ush/get_obs_ndas.sh | 37 +++++++++++++++++------------------ 3 files changed, 54 insertions(+), 56 deletions(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index 79ce882da3..d3ad4c49ce 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -173,31 +173,31 @@ yyyymmdd_task=${PDY} # processing by this script is complete. basedir_proc=${OBS_DIR} -# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a -# scalar string containing all relevant forecast output times (each in +# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is +# a scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array # from the XML to the task's script. To have an array-valued variable to -# work with, here, we create the new variable output_times_all that is -# the array-valued counterpart of OUTPUT_TIMES_ALL. -output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) +# work with, here, we create the new variable fcst_output_times_all that +# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. +fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) # List of times (each of the form YYYYMMDDHH) for which there is forecast # APCP (accumulated precipitation) output for the current day. We start # constructing this by extracting from the full list of all forecast APCP # output times (i.e. from all cycles) all elements that contain the current # task's day (in the form YYYYMMDD). -output_times_crnt_day=() -if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fcst_output_times_crnt_day=() +if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) fi # If the 0th hour of the current day is in this list (and if it is, it # will be the first element), remove it because for APCP, that time is # considered part of the previous day (because it represents precipitation # that occurred during the last hour of the previous day). -if [[ ${#output_times_crnt_day[@]} -gt 0 ]] && \ - [[ ${output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then - output_times_crnt_day=(${output_times_crnt_day[@]:1}) +if [[ ${#fcst_output_times_crnt_day[@]} -gt 0 ]] && \ + [[ ${fcst_output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then + fcst_output_times_crnt_day=(${fcst_output_times_crnt_day[@]:1}) fi # If the 0th hour of the next day (i.e. the day after yyyymmdd_task) is # one of the output times in the list of all APCP output times, we include @@ -205,14 +205,14 @@ fi # considered part of the current day (because it represents precipitation # that occured during the last hour of the current day). yyyymmdd00_task_p1d=$(${DATE_UTIL} --date "${yyyymmdd_task} 1 day" +%Y%m%d%H) -if [[ ${output_times_all[@]} =~ ${yyyymmdd00_task_p1d} ]]; then - output_times_crnt_day+=(${yyyymmdd00_task_p1d}) +if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd00_task_p1d} ]]; then + fcst_output_times_crnt_day+=(${yyyymmdd00_task_p1d}) fi # If there are no forecast APCP output times on the day of the current # task, exit the script. -num_output_times_crnt_day=${#output_times_crnt_day[@]} -if [[ ${num_output_times_crnt_day} -eq 0 ]]; then +num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} +if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then print_info_msg " None of the forecast APCP output times fall within the day (including the 0th hour of the next day) associated with the current task (yyyymmdd_task): @@ -237,14 +237,14 @@ arcv_hr_incr=6 # Initial guess for starting archive hour. This is set to the archive # hour containing obs at the first forecast output time of the day. -hh_first=$(echo ${output_times_crnt_day[0]} | cut -c9-10) +hh_first=$(echo ${fcst_output_times_crnt_day[0]} | cut -c9-10) hr_first=$((10#${hh_first})) arcv_hr_start=$(ceil ${hr_first} ${arcv_hr_incr}) arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) # Ending archive hour. This is set to the archive hour containing obs at # the last forecast output time of the day. -hh_last=$(echo ${output_times_crnt_day[-1]} | cut -c9-10) +hh_last=$(echo ${fcst_output_times_crnt_day[-1]} | cut -c9-10) hr_last=$((10#${hh_last})) if [[ ${hr_last} -eq 0 ]]; then arcv_hr_end=24 @@ -257,7 +257,7 @@ fi # starting archive hour. In the process, keep a count of the number of # obs files that already exist on disk. num_existing_files=0 -for yyyymmddhh in ${output_times_crnt_day[@]}; do +for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) day_dir_proc="${basedir_proc}/${yyyymmdd}" @@ -283,7 +283,7 @@ done # If the number of obs files that already exist on disk is equal to the # number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_output_times_crnt_day)) +num_needed_files=$((num_fcst_output_times_crnt_day)) if [[ ${num_existing_files} -eq ${num_needed_files} ]]; then print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist @@ -361,9 +361,8 @@ arcv_hr = ${arcv_hr}" yyyymmddhh_qrtrday_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 5 hours ago" +%Y%m%d%H) yyyymmddhh_qrtrday_end=${yyyymmddhh_arcv} do_retrieve="FALSE" - nout=${#output_times_crnt_day[@]} - for (( i=0; i<${nout}; i++ )); do - output_time=${output_times_crnt_day[i]} + for (( i=0; i<${num_fcst_output_times_crnt_day}; i++ )); do + output_time=${fcst_output_times_crnt_day[i]} if [[ "${output_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ [[ "${output_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then do_retrieve="TRUE" @@ -416,7 +415,7 @@ arcv_hr = ${arcv_hr}" yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - if [[ ${output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + if [[ ${fcst_output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then fn_raw="ccpa.t${hh}z.${accum}h.hrap.conus.gb2" fp_raw="${qrtrday_dir_raw}/${fn_raw}" day_dir_proc="${basedir_proc}/${yyyymmdd}" @@ -445,7 +444,7 @@ archive are: yyyymmddhh_qrtrday_start = \"${yyyymmddhh_qrtrday_start}\" yyyymmddhh_qrtrday_end = \"${yyyymmddhh_qrtrday_end}\" The forecast output times for APCP are: - output_times_crnt_day = ($(printf "\"%s\" " ${output_times_crnt_day[@]}))" + fcst_output_times_crnt_day = ($(printf "\"%s\" " ${fcst_output_times_crnt_day[@]}))" fi diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index 6aac0159b3..af3b8ca942 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -85,27 +85,27 @@ yyyymmdd_task=${PDY} # all processing by this script is complete. basedir_proc=${OBS_DIR} -# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a -# scalar string containing all relevant forecast output times (each in +# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is +# a scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array # from the XML to the task's script. To have an array-valued variable to -# work with, here, we create the new variable output_times_all that is -# the array-valued counterpart of OUTPUT_TIMES_ALL. -output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) +# work with, here, we create the new variable fcst_output_times_all that +# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. +fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) # List of times (each of the form YYYYMMDDHH) for which there is forecast # output for the current day. We extract this list from the full list of # all forecast output times (i.e. from all cycles). -output_times_crnt_day=() -if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fcst_output_times_crnt_day=() +if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) fi # If there are no forecast output times on the day of the current task, # exit the script. -num_output_times_crnt_day=${#output_times_crnt_day[@]} -if [[ ${num_output_times_crnt_day} -eq 0 ]]; then +num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} +if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then print_info_msg " None of the forecast output times fall within the day associated with the current task (yyyymmdd_task): @@ -120,7 +120,7 @@ fi num_existing_files=0 num_mrms_fields=${#mrms_fields[@]} for (( i=0; i<${num_mrms_fields}; i++ )); do - for yyyymmddhh in ${output_times_crnt_day[@]}; do + for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) day_dir_proc="${basedir_proc}/${yyyymmdd}" @@ -139,7 +139,7 @@ done # If the number of obs files that already exist on disk is equal to the # number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_output_times_crnt_day*num_mrms_fields)) +num_needed_files=$((num_fcst_output_times_crnt_day*num_mrms_fields)) if [[ ${num_existing_files} -eq $((num_needed_files)) ]]; then print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist @@ -246,7 +246,7 @@ $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." # process renaming it) to the processed location. for hr in $(seq 0 1 23); do yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_task} ${hr} hours" +%Y%m%d%H) - if [[ ${output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + if [[ ${fcst_output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then for (( i=0; i<${num_mrms_fields}; i++ )); do python ${USHdir}/mrms_pull_topofhour.py \ --valid_time ${yyyymmddhh} \ diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index 2954552412..5562647340 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -59,27 +59,27 @@ yyyymmdd_task=${PDY} # all processing by this script is complete. basedir_proc=${OBS_DIR} -# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a -# scalar string containing all relevant forecast output times (each in +# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is +# a scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array # from the XML to the task's script. To have an array-valued variable to -# work with, here, we create the new variable output_times_all that is -# the array-valued counterpart of OUTPUT_TIMES_ALL. -output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) +# work with, here, we create the new variable fcst_output_times_all that +# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. +fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) # List of times (each of the form YYYYMMDDHH) for which there is forecast # output for the current day. We extract this list from the full list of # all forecast output times (i.e. from all cycles). -output_times_crnt_day=() -if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fcst_output_times_crnt_day=() +if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) fi # If there are no forecast output times on the day of the current task, # exit the script. -num_output_times_crnt_day=${#output_times_crnt_day[@]} -if [[ ${num_output_times_crnt_day} -eq 0 ]]; then +num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} +if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then print_info_msg " None of the forecast output times fall within the day associated with the current task (yyyymmdd_task): @@ -104,13 +104,13 @@ arcv_hr_incr=6 # Initial guess for starting archive hour. This is set to the archive # hour containing obs at the first forecast output time of the day. -hh_first=$(echo ${output_times_crnt_day[0]} | cut -c9-10) +hh_first=$(echo ${fcst_output_times_crnt_day[0]} | cut -c9-10) hr_first=$((10#${hh_first})) arcv_hr_start=$(( (hr_first/arcv_hr_incr + 1)*arcv_hr_incr )) # Ending archive hour. This is set to the archive hour containing obs at # the last forecast output time of the day. -hh_last=$(echo ${output_times_crnt_day[-1]} | cut -c9-10) +hh_last=$(echo ${fcst_output_times_crnt_day[-1]} | cut -c9-10) hr_last=$((10#${hh_last})) arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) @@ -118,7 +118,7 @@ arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) # starting archive hour. In the process, keep a count of the number of # obs files that already exist on disk. num_existing_files=0 -for yyyymmddhh in ${output_times_crnt_day[@]}; do +for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) day_dir_proc="${basedir_proc}" @@ -143,7 +143,7 @@ done # If the number of obs files that already exist on disk is equal to the # number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_output_times_crnt_day)) +num_needed_files=$((num_fcst_output_times_crnt_day)) if [[ ${num_existing_files} -eq ${num_needed_files} ]]; then print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist @@ -221,9 +221,8 @@ arcv_hr = ${arcv_hr}" yyyymmddhh_qrtrday_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 6 hours ago" +%Y%m%d%H) yyyymmddhh_qrtrday_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 1 hours ago" +%Y%m%d%H) do_retrieve="FALSE" - nout=${#output_times_crnt_day[@]} - for (( i=0; i<${nout}; i++ )); do - output_time=${output_times_crnt_day[i]} + for (( i=0; i<${num_fcst_output_times_crnt_day}; i++ )); do + output_time=${fcst_output_times_crnt_day[i]} if [[ "${output_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ [[ "${output_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then do_retrieve="TRUE" @@ -276,7 +275,7 @@ arcv_hr = ${arcv_hr}" yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - if [[ ${output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + if [[ ${fcst_output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then fn_raw="nam.t${hh_arcv}z.prepbufr.tm${hrs_ago}.nr" fp_raw="${qrtrday_dir_raw}/${fn_raw}" day_dir_proc="${basedir_proc}" @@ -296,7 +295,7 @@ are: yyyymmddhh_qrtrday_start = \"${yyyymmddhh_qrtrday_start}\" yyyymmddhh_qrtrday_end = \"${yyyymmddhh_qrtrday_end}\" The forecast output times are: - output_times_crnt_day = ($(printf "\"%s\" " ${output_times_crnt_day[@]}))" + fcst_output_times_crnt_day = ($(printf "\"%s\" " ${fcst_output_times_crnt_day[@]}))" fi From 2d751130084dce588aef9def18f2f487d1dadbc9 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 16 Sep 2024 14:13:24 -0600 Subject: [PATCH 065/208] Fix location of staged forecast input files. --- ...ulticyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml | 2 +- ...ulticyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml index 8b840a8ea8..e386d84e3d 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml @@ -57,7 +57,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml index 18558e0d95..9694f9845f 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml @@ -59,7 +59,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml index 5d6929cd4a..067187a216 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml @@ -57,7 +57,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml index 7ec2264509..75d706ba1f 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml @@ -59,7 +59,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index 29427201e7..a55c7c1e0b 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -58,7 +58,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml index 508d14c7fa..e5f8fc8d1f 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml @@ -57,7 +57,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index c838e8581d..6445ebca53 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -59,7 +59,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' From 830a635c2a0c9fcadf605b7d8d52f4b447c58652 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 17 Sep 2024 12:58:42 -0600 Subject: [PATCH 066/208] Add check in setup.py to make sure that the accumulation periods of accumlated vx fields are less than or equal to the forecast length. This check is also made in the workflow yaml files, but if all the accumulation periods happen to be greater than the forecast length, then the field must be completely removed from verification (i.e. its vx tasks must be removed from the workflow), and that can only be done via this check in setup.py; it can't be done in the workflow task yaml files. --- ush/setup.py | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/ush/setup.py b/ush/setup.py index d60f8a9154..d6e9e5c2d0 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -546,6 +546,31 @@ def remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # + # For vx fields that are accumulated, remove those accumulation hours + # that are longer than the forecast length. If that leaves the array + # of accumulation hours for that field empty, then remove the field + # from the list of fields to be verified. + # + # ----------------------------------------------------------------------- + # + # Get the vx fields specified in the experiment configuration. + vx_fields_config = expt_config["verification"]["VX_FIELDS"] + + fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") + vx_fields_accum = ["APCP", "ASNOW"] + for field in vx_fields_accum: + if field in vx_fields_config: + accum_periods_array_name = "".join(["VX_", field, "_ACCUMS_HRS"]) + accum_periods = expt_config["verification"][accum_periods_array_name] + accum_periods = [accum for accum in accum_periods if (accum <= fcst_len_hrs)] + expt_config["verification"][accum_periods_array_name] = accum_periods + if not accum_periods: + vx_fields_config.remove(field) + + expt_config["verification"]["VX_FIELDS"] = vx_fields_config + # + # ----------------------------------------------------------------------- + # # Remove all verification [meta]tasks for which no fields are specified. # # ----------------------------------------------------------------------- @@ -562,10 +587,10 @@ def remove_tag(tasks, tag): vx_fields_all["NOHRSC"] = ["ASNOW"] vx_metatasks_all["NOHRSC"] = ["task_get_obs_nohrsc", - "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", - "metatask_GridStat_NOHRSC_all_accums_all_mems", - "metatask_GenEnsProd_EnsembleStat_NOHRSC", - "metatask_GridStat_NOHRSC_ensmeanprob_all_accums"] + "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", + "metatask_GridStat_NOHRSC_all_accums_all_mems", + "metatask_GenEnsProd_EnsembleStat_NOHRSC", + "metatask_GridStat_NOHRSC_ensmeanprob_all_accums"] vx_fields_all["MRMS"] = ["REFC", "RETOP"] vx_metatasks_all["MRMS"] = ["metatask_GridStat_MRMS_all_mems", @@ -578,9 +603,6 @@ def remove_tag(tasks, tag): "metatask_GenEnsProd_EnsembleStat_NDAS", "metatask_PointStat_NDAS_ensmeanprob"] - # Get the vx fields specified in the experiment configuration. - vx_fields_config = expt_config["verification"]["VX_FIELDS"] - # If there are no vx fields specified, remove those tasks that are necessary # for all observation types. if not vx_fields_config: @@ -602,7 +624,6 @@ def remove_tag(tasks, tag): are specified for verification.""" )) rocoto_config['tasks'].pop(metatask) - # # ----------------------------------------------------------------------- # From 407c51b9a183983001f5d33ed25a05372361fe3e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 20 Sep 2024 08:25:06 -0600 Subject: [PATCH 067/208] Fix typo. --- parm/metplus/PcpCombine.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/metplus/PcpCombine.conf b/parm/metplus/PcpCombine.conf index 3cee69df1d..c2807e6380 100644 --- a/parm/metplus/PcpCombine.conf +++ b/parm/metplus/PcpCombine.conf @@ -98,7 +98,7 @@ FCST_PCP_COMBINE_INPUT_LEVELS = A01 # For accumulation variables (which is the only type of variable that we # run PcpCombine on), we add the accumulation period to the variable name # because this is how METplus normally sets names. This is because, -# epending on the settings in the METplus configuration file, it is +# depending on the settings in the METplus configuration file, it is # possible for a single NetCDF output file to contain output for multiple # accumulations, so even though the "level" attribute of each accumulation # variable in the output file will contain the level (e.g. "A1" or "A3"), From e3de6e1386955d36df0117e42cd376b1e829bcc6 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 20 Sep 2024 08:32:02 -0600 Subject: [PATCH 068/208] Fix typo. --- ush/get_obs_ndas.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index 5562647340..8b0c87b3eb 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -133,7 +133,7 @@ File already exists on disk: hr=$((10#${hh})) arcv_hr_start=$(( (hr/arcv_hr_incr + 1)*arcv_hr_incr )) print_info_msg " -File does not exists on disk: +File does not exist on disk: fp_proc = \"${fp_proc}\" Setting the hour (since 00) of the first archive to retrieve to: arcv_hr_start = \"${arcv_hr_start}\"" From fc7e257ebc88750bc0d0a1d20c979506f49fc179 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 07:22:44 -0600 Subject: [PATCH 069/208] Changes to allow NOHRSC obs and ASNOW to be accumulated by PcpCombine and verified in GridStat. --- parm/data_locations.yml | 2 +- parm/metplus/GridStat_or_PointStat.conf | 11 ++------ parm/metplus/PcpCombine.conf | 34 ++++++++++++++++--------- parm/metplus/vx_config_det.yaml | 1 + parm/metplus/vx_config_ens.yaml | 1 + 5 files changed, 27 insertions(+), 22 deletions(-) diff --git a/parm/data_locations.yml b/parm/data_locations.yml index a3712a1972..5a30e48774 100644 --- a/parm/data_locations.yml +++ b/parm/data_locations.yml @@ -371,6 +371,6 @@ NOHRSC_obs: - "dcom_{yyyy}{mm}{dd}.tar" file_names: obs: - - "sfav2_CONUS_*h_{yyyy}{mm}{dd}{hh}_grid184.grb2" + - "sfav2_CONUS_6h_{yyyy}{mm}{dd}*_grid184.grb2" archive_internal_dir: - ./wgrbbul/nohrsc_snowfall/ diff --git a/parm/metplus/GridStat_or_PointStat.conf b/parm/metplus/GridStat_or_PointStat.conf index 39d34eb24f..7bd0039ab5 100644 --- a/parm/metplus/GridStat_or_PointStat.conf +++ b/parm/metplus/GridStat_or_PointStat.conf @@ -93,7 +93,7 @@ METPLUS_CONF = {{ '{' ~ METPLUS_TOOL_NAME ~ '_OUTPUT_DIR}' }}/metplus_final.{{me {%- if (METPLUS_TOOL_NAME == 'GRID_STAT') %} - {%- if (input_field_group == 'APCP') %} + {%- if input_field_group in ['APCP', 'ASNOW'] %} #{{METPLUS_TOOL_NAME}}_INTERP_FIELD = BOTH #{{METPLUS_TOOL_NAME}}_INTERP_VLD_THRESH = 1.0 @@ -539,15 +539,8 @@ PcpCombine tool. In that file, the field name consists of the observation field name here (field_obs) with the accumulation period appended to it (separated by an underscore), so we must do the same here to get an exact match. - -Note: -Turns out for ASNOW, PcpCombine is not run for obs, so we exclude that -from the "if" clause here (so it goes into the "else"). For workflow -behavior uniformity between APCP and ASNOW, consider running PcpCombine -for ASNOW observations as well (just as it's run for APCP observations). - {%- if (input_field_group in ['APCP', 'ASNOW']) %} #} - {%- if (input_field_group in ['APCP']) %} + {%- if (input_field_group in ['APCP', 'ASNOW']) %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}}_{{accum_hh}} {%- else %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}} diff --git a/parm/metplus/PcpCombine.conf b/parm/metplus/PcpCombine.conf index c2807e6380..5bdd09c761 100644 --- a/parm/metplus/PcpCombine.conf +++ b/parm/metplus/PcpCombine.conf @@ -79,17 +79,26 @@ FCST_PCP_COMBINE_RUN = False # {{FCST_OR_OBS}}_PCP_COMBINE_METHOD = ADD -{%- if (FCST_OR_OBS == 'FCST') and (input_field_group == 'ASNOW') %} +{%- if (FCST_OR_OBS == 'FCST') %} + {%- if (input_field_group == 'ASNOW') %} # # Specify name of variable for Snowfall Accumulation. -# NOTE: Currently TSNOWP is used which is a constant-density estimate of snowfall accumulation. -# In future RRFS development, a GSL product with variable-density snowfall accumulation -# is planned for UPP. When that is included and turned on in post, this variable may be changed -# to ASNOW. # -FCST_PCP_COMBINE_INPUT_NAMES = TSNOWP - -FCST_PCP_COMBINE_INPUT_LEVELS = A01 +# NOTE: +# For forecasts, currently TSNOWP is used which is a constant-density +# estimate of snowfall accumulation. In future RRFS development, a GSL +# product with variable-density snowfall accumulation is planned for UPP. +# When that is included and turned on in post, this variable may be +# changed to ASNOW. +# +{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_NAMES = TSNOWP +{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_LEVELS = A{{input_accum_hh}} + {%- endif %} +{%- elif (FCST_OR_OBS == 'OBS') %} + {%- if (input_field_group == 'ASNOW') %} +{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_NAMES = ASNOW +{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_LEVELS = A{{input_accum_hh}} + {%- endif %} {%- endif %} # # Specify how to name the array in the NetCDF file that PcpCombine @@ -110,18 +119,19 @@ FCST_PCP_COMBINE_INPUT_LEVELS = A01 # the output NetCDF file). # {%- if (input_field_group in ['APCP', 'ASNOW']) %} -{{FCST_OR_OBS}}_PCP_COMBINE_OUTPUT_NAME = {{fieldname_in_met_output}}_{{accum_hh}} +{{FCST_OR_OBS}}_PCP_COMBINE_OUTPUT_NAME = {{fieldname_in_met_output}}_{{output_accum_hh}} {%- else %} {{FCST_OR_OBS}}_PCP_COMBINE_OUTPUT_NAME = {{fieldname_in_met_output}} {%- endif %} # # Accumulation interval available in the input data. # -{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_ACCUMS = 01 +#{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_ACCUMS = 01 +{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_ACCUMS = {{input_accum_hh}} # # Accumulation interval to generate in the output file. # -{{FCST_OR_OBS}}_PCP_COMBINE_OUTPUT_ACCUM = {{accum_hh}} +{{FCST_OR_OBS}}_PCP_COMBINE_OUTPUT_ACCUM = {{output_accum_hh}} # # If the output NetCDF file already exists, specify whether or not to # skip the call to PcpCombine. @@ -177,7 +187,7 @@ FCST_PCP_COMBINE_CONSTANT_INIT = True # # Name to identify observation data in output. # -OBTYPE = CCPA +OBTYPE = {{obtype}} {%- endif %} # # Specify file type of input data. diff --git a/parm/metplus/vx_config_det.yaml b/parm/metplus/vx_config_det.yaml index 8ea3fd5e13..c20e192dcb 100644 --- a/parm/metplus/vx_config_det.yaml +++ b/parm/metplus/vx_config_det.yaml @@ -50,6 +50,7 @@ APCP: ASNOW: ASNOW: A6: ['gt0.0', 'ge2.54', 'ge5.08', 'ge10.16', 'ge20.32'] + A24: ['gt0.0', 'ge2.54', 'ge10.16', 'ge20.32', 'ge30.48'] REFC: REFC%%MergedReflectivityQCComposite: L0%%Z500: ['ge20', 'ge30', 'ge40', 'ge50'] diff --git a/parm/metplus/vx_config_ens.yaml b/parm/metplus/vx_config_ens.yaml index 5f55254a4c..2608490565 100644 --- a/parm/metplus/vx_config_ens.yaml +++ b/parm/metplus/vx_config_ens.yaml @@ -14,6 +14,7 @@ APCP: ASNOW: ASNOW: A6: ['gt0.0', 'ge2.54', 'ge5.08', 'ge10.16', 'ge20.32'] + A24: ['gt0.0', 'ge2.54', 'ge10.16', 'ge20.32', 'ge30.48'] REFC: REFC%%MergedReflectivityQCComposite: L0%%Z500: ['ge20', 'ge30', 'ge40', 'ge50'] From 75325447c87aa12ccb95f8bd35ccab220e78add6 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 07:28:34 -0600 Subject: [PATCH 070/208] Changes to get_obs/vx WE2E tests to get them working with staged forecast output at new location. --- ...7hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml | 21 ++++++++----------- ...1hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml | 21 ++++++++----------- ...24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml | 21 ++++++++----------- ...24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml | 21 ++++++++----------- ...24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 21 ++++++++----------- ...24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml | 21 ++++++++----------- ...96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 21 ++++++++----------- 7 files changed, 63 insertions(+), 84 deletions(-) diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml index e386d84e3d..41428a7939 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml @@ -15,16 +15,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - -workflow: - PREEXISTING_DIR_METHOD: rename - PREDEF_GRID_NAME: RRFS_CONUS_25km - INCR_CYCL_FREQ: 7 - DATE_FIRST_CYCL: '2024042902' - DATE_LAST_CYCL: '2024043006' - FCST_LEN_HRS: 9 - -platform: # Base directories in which to look for obs. If these do not exist and/or # do not contain the requied obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to @@ -44,6 +34,14 @@ platform: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 7 + DATE_FIRST_CYCL: '2024042902' + DATE_LAST_CYCL: '2024043006' + FCST_LEN_HRS: 9 + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' @@ -57,7 +55,6 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml index 9694f9845f..2fae0d6388 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml @@ -17,16 +17,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - -workflow: - PREEXISTING_DIR_METHOD: rename - PREDEF_GRID_NAME: RRFS_CONUS_25km - INCR_CYCL_FREQ: 11 - DATE_FIRST_CYCL: '2024042902' - DATE_LAST_CYCL: '2024043022' - FCST_LEN_HRS: 3 - -platform: # Base directories in which to look for obs. If these do not exist and/or # do not contain the requied obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to @@ -46,6 +36,14 @@ platform: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 11 + DATE_FIRST_CYCL: '2024042902' + DATE_LAST_CYCL: '2024043022' + FCST_LEN_HRS: 3 + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' @@ -59,7 +57,6 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml index 067187a216..e150234a47 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml @@ -15,16 +15,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - -workflow: - PREEXISTING_DIR_METHOD: rename - PREDEF_GRID_NAME: RRFS_CONUS_25km - INCR_CYCL_FREQ: 24 - DATE_FIRST_CYCL: '2024042900' - DATE_LAST_CYCL: '2024043000' - FCST_LEN_HRS: 3 - -platform: # Base directories in which to look for obs. If these do not exist and/or # do not contain the requied obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to @@ -44,6 +34,14 @@ platform: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042900' + DATE_LAST_CYCL: '2024043000' + FCST_LEN_HRS: 3 + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' @@ -57,7 +55,6 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml index 75d706ba1f..2e180e2714 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml @@ -17,16 +17,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - -workflow: - PREEXISTING_DIR_METHOD: rename - PREDEF_GRID_NAME: RRFS_CONUS_25km - INCR_CYCL_FREQ: 24 - DATE_FIRST_CYCL: '2024042912' - DATE_LAST_CYCL: '2024043012' - FCST_LEN_HRS: 3 - -platform: # Base directories in which to look for obs. If these do not exist and/or # do not contain the requied obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to @@ -46,6 +36,14 @@ platform: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042912' + DATE_LAST_CYCL: '2024043012' + FCST_LEN_HRS: 3 + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' @@ -59,7 +57,6 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index a55c7c1e0b..37c3eceb24 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -16,16 +16,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - -workflow: - PREEXISTING_DIR_METHOD: rename - PREDEF_GRID_NAME: RRFS_CONUS_25km - INCR_CYCL_FREQ: 24 - DATE_FIRST_CYCL: '2024042912' - DATE_LAST_CYCL: '2024050212' - FCST_LEN_HRS: 48 - -platform: # Base directories in which to look for obs. If these do not exist and/or # do not contain the requied obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to @@ -45,6 +35,14 @@ platform: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042912' + DATE_LAST_CYCL: '2024050212' + FCST_LEN_HRS: 48 + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' @@ -58,7 +56,6 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml index e5f8fc8d1f..d8eb349433 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml @@ -15,16 +15,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - -workflow: - PREEXISTING_DIR_METHOD: rename - PREDEF_GRID_NAME: RRFS_CONUS_25km - INCR_CYCL_FREQ: 24 - DATE_FIRST_CYCL: '2024042921' - DATE_LAST_CYCL: '2024043021' - FCST_LEN_HRS: 3 - -platform: # Base directories in which to look for obs. If these do not exist and/or # do not contain the requied obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to @@ -44,6 +34,14 @@ platform: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042921' + DATE_LAST_CYCL: '2024043021' + FCST_LEN_HRS: 3 + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' @@ -57,7 +55,6 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index 6445ebca53..514dbed8d3 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -17,16 +17,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - -workflow: - PREEXISTING_DIR_METHOD: rename - PREDEF_GRID_NAME: RRFS_CONUS_25km - INCR_CYCL_FREQ: 96 - DATE_FIRST_CYCL: '2024042912' - DATE_LAST_CYCL: '2024051112' - FCST_LEN_HRS: 48 - -platform: # Base directories in which to look for obs. If these do not exist and/or # do not contain the requied obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to @@ -46,6 +36,14 @@ platform: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 96 + DATE_FIRST_CYCL: '2024042912' + DATE_LAST_CYCL: '2024051112' + FCST_LEN_HRS: 48 + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' @@ -59,7 +57,6 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' From 877bf05efad49fb5c15f1c568d72d024c0408e1b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 07:39:42 -0600 Subject: [PATCH 071/208] Add WE2E test to get NOHRSC obs and do vx on 6-hour and 24-hour snowfall accumulation. --- ...c.init_00z_fcstlen_36hr.winter_wx.SRW.yaml | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml new file mode 100644 index 0000000000..6069ce8212 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml @@ -0,0 +1,61 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, NOHRSC, MRMS, and NDAS + observations from HPSS for a single cycle with a relatively long forecast + (36 hours) cycle and then perform deterministic verification, including + first performing vx preprocessing with METplus tools such as PcpCombine + and Pb2Nc. + + The staged forecast data are from the SRW itself. + + This test uses a winter case to ensure that ASNOW is verified correctly + for both 6-hour and 24-hour accumulations. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +workflow: + PREEXISTING_DIR_METHOD: rename + # This is required in the experiment generation step, although it shouldn't + # since a forecast is not being run. + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2023021700' + DATE_LAST_CYCL: '2023021700' + FCST_LEN_HRS: 36 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'custom_ESGgrid_Michigan_Ontario' + +verification: + VX_FCST_MODEL_NAME: 'Michigan_Ontario_snow_8km' + VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/SRW' From 4495813d591ea7e536e073f8fe23f68a2ed2562d Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 07:41:36 -0600 Subject: [PATCH 072/208] Add script to get NOHRSC obs from HPSS. --- ush/get_obs_nohrsc.sh | 475 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 475 insertions(+) create mode 100755 ush/get_obs_nohrsc.sh diff --git a/ush/get_obs_nohrsc.sh b/ush/get_obs_nohrsc.sh new file mode 100755 index 0000000000..910cf3c35a --- /dev/null +++ b/ush/get_obs_nohrsc.sh @@ -0,0 +1,475 @@ +#!/usr/bin/env bash + +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +. $USHdir/source_util_funcs.sh +for sect in user platform verification ; do + source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} +done + +set -u +#set -x +# +#----------------------------------------------------------------------- +# +# This script performs several important tasks for preparing data for +# verification tasks. Depending on the value of the environment variable +# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data +# set. +# +# If data is not available on disk (in the location specified by +# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), +# the script attempts to retrieve the data from HPSS using the retrieve_data.py +# script. Depending on the data set, there are a few strange quirks and/or +# bugs in the way data is organized; see in-line comments for details. +# +# NOHRSC snow accumulation observations +# ---------- +# If data is available on disk, it must be in the following +# directory structure and file name conventions expected by verification +# tasks: +# +# {NOHRSC_OBS_DIR}/{YYYYMMDD}/sfav2_CONUS_{AA}h_{YYYYMMDD}{HH}_grid184.grb2 +# +# where AA is the 2-digit accumulation duration in hours: 06 or 24 +# +# METplus is configured to verify snowfall using 06- and 24-h accumulated +# snowfall from 6- and 12-hourly NOHRSC files, respectively. +# +# If data is retrieved from HPSS, it will automatically staged by this +# this script. +#----------------------------------------------------------------------- +# + +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. +yyyymmdd_task=${PDY} + +# Base directory in which the daily subdirectories containing the grib2 +# obs files will appear after this script is done. We refer to this as +# the "processed" base directory because it contains the files after all +# processing by this script is complete. +basedir_proc=${OBS_DIR} +# +#----------------------------------------------------------------------- +# +# Generate a list of forecast output times for the current day. Note +# that if the 0th hour of the next day (i.e. the day after the one +# associated with this task) is one of the forecast output times, we +# include it in the list for the current day because the accumulation +# associated with that hour occurred during the current day. +# +#----------------------------------------------------------------------- +# + +# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is +# a scalar string containing all relevant forecast output times (each in +# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings +# because in ROCOTO, there doesn't seem to be a way to pass a bash array +# from the XML to the task's script. To have an array-valued variable to +# work with, here, we create the new variable fcst_output_times_all that +# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. +fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) + +# List of times (each of the form YYYYMMDDHH) for which there is forecast +# ASNOW (accumulated snow) output for the current day. We start constructing +# this by extracting from the full list of all forecast ASNOW output times +# (i.e. from all cycles) all elements that contain the current task's day +# (in the form YYYYMMDD). +fcst_output_times_crnt_day=() +if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fi +# If the 0th hour of the current day is in this list (and if it is, it +# will be the first element), remove it because for ASNOW, that time is +# considered part of the previous day (because it represents snowfall +# that occurred during the last hour of the previous day). +if [[ ${#fcst_output_times_crnt_day[@]} -gt 0 ]] && \ + [[ ${fcst_output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then + fcst_output_times_crnt_day=(${fcst_output_times_crnt_day[@]:1}) +fi +# If the 0th hour of the next day (i.e. the day after yyyymmdd_task) is +# one of the output times in the list of all ASNOW output times, we +# include it in the list for the current day because for ASNOW, that time +# is considered part of the current day (because it represents snowfall +# that occured during the last hour of the current day). +yyyymmdd00_task_p1d=$(${DATE_UTIL} --date "${yyyymmdd_task} 1 day" +%Y%m%d%H) +if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd00_task_p1d} ]]; then + fcst_output_times_crnt_day+=(${yyyymmdd00_task_p1d}) +fi + +# If there are no forecast ASNOW output times on the day of the current +# task, exit the script. +num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} +if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then + print_info_msg " +None of the forecast ASNOW output times fall within the day (including the +0th hour of the next day) associated with the current task (yyyymmdd_task): + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any obs files." + exit +fi +# +#----------------------------------------------------------------------- +# +# Generate a list of all the times at which obs are available for the +# current day, possibly including hour 00 of the next day. +# +#----------------------------------------------------------------------- +# + +# The time interval (in hours) at which the obs are available on HPSS +# must be evenly divisible into 24. Otherwise, different days would +# have obs available at different hours. Make sure this is the case. +remainder=$(( 24 % NOHRSC_OBS_AVAIL_INTVL_HRS )) +if [ ${remainder} -ne 0 ]; then + print_err_msg_exit "\ +The obs availability interval NOHRSC_OBS_AVAIL_INTVL_HRS must divide evenly +into 24 but doesn't: + NOHRSC_OBS_AVAIL_INTVL_HRS = ${NOHRSC_OBS_AVAIL_INTVL_HRS} + mod(24, NOHRSC_OBS_AVAIL_INTVL_HRS) = ${remainder}" +fi + +# Construct the array of times during the current day (and possibly +# during hour 00 of the next day) at which obs are available on HPSS. +# Each element of this array is of the form "YYYYMMDDHH". +num_obs_avail_times=$((24/NOHRSC_OBS_AVAIL_INTVL_HRS)) +obs_avail_times_crnt_day=() +# Note: Start at i=1 because the output for hour 00 of the current day is +# considered part of the previous day (because it represents accumulation +# that occurred during the previous day). +for (( i=1; i<$((num_obs_avail_times+1)); i++ )); do + hrs=$((i*NOHRSC_OBS_AVAIL_INTVL_HRS)) + obs_avail_times_crnt_day+=( $(${DATE_UTIL} --date "${yyyymmdd_task} ${hrs} hours" +%Y%m%d%H) ) +done +# +#----------------------------------------------------------------------- +# +# Generate a list of all the times at which to retrieve obs. This is +# obtained from the intersection of the list of times at which there is +# forecast output and the list of times at which there are obs available. +# Note that if the forecast output is more frequent than the data is +# available, then the forecast values must be accumulated together to +# get values at the times at which the obs are available. This is done +# in another workflow task using the METplus tool PcpCombine. +# +#----------------------------------------------------------------------- +# +obs_retrieve_times_crnt_day=() +for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do + if [[ ${obs_avail_times_crnt_day[@]} =~ ${yyyymmddhh} ]] ; then + obs_retrieve_times_crnt_day+=(${yyyymmddhh}) + fi +done +# +#----------------------------------------------------------------------- +# +# +# +#----------------------------------------------------------------------- +# +array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" +eval obs_retrieve_times=\( \${${array_name}[@]} \) +echo +echo "QQQQQQQQQQQQQQQQQQQ" +#echo "obs_retrieve_times = |${obs_retrieve_times[@]}|" +echo "obs_retrieve_times =" +echo "|${obs_retrieve_times[@]}|" + +# For testing. +#obs_retrieve_times+=('abcd') +#obs_retrieve_times[4]='abcd' + +err_msg=" +The two methods of obtaining the array of obs retrieve times don't match: + obs_retrieve_times_crnt_day = + (${obs_retrieve_times_crnt_day[@]}) + obs_retrieve_times = + (${obs_retrieve_times[@]})" + +n1=${#obs_retrieve_times_crnt_day[@]} +n2=${#obs_retrieve_times[@]} +if [ ${n1} -ne ${n2} ]; then + print_err_msg_exit "${err_msg}" +fi + +for (( i=0; i<${n1}; i++ )); do + elem1=${obs_retrieve_times_crnt_day[$i]} + elem2=${obs_retrieve_times[$i]} + if [ ${elem1} != ${elem2} ]; then + print_err_msg_exit "${err_msg}" + fi +done + +obs_retrieve_times_crnt_day=($( printf "%s " "${obs_retrieve_times[@]}" )) + +echo +echo "RRRRRRRRRRRRRRRRR" +#echo "obs_retrieve_times_crnt_day = |${obs_retrieve_times_crnt_day[@]}|" +echo "obs_retrieve_times_crnt_day =" +echo "|${obs_retrieve_times_crnt_day[@]}|" + +#exit 1 +# +#----------------------------------------------------------------------- +# +# Obs files will be obtained by extracting them from the relevant 24-hourly +# archives. Thus, we need the sequence of archive hours over which to +# loop. In the simplest case, this sequence will be "0 24". This will +# be the case if the forecast output times include all hours of the +# task's day and if none of the obs files for this day already exist on +# disk. In other cases, the sequence we loop over will be a subset of +# "0 24", e.g. just "0" or just "24". +# +# To generate this sequence, we first set its starting and ending values +# as well as the interval. +# +#----------------------------------------------------------------------- +# + +# Sequence interval must be 24 hours because the archives are 24-hourly. +arcv_hr_incr=24 + +# Initial guess for starting archive hour. This is set to the archive +# hour containing obs at the first obs retrieval time of the day. +hh_first=$(echo ${obs_retrieve_times_crnt_day[0]} | cut -c9-10) +hr_first=$((10#${hh_first})) +arcv_hr_start=$(( hr_first/arcv_hr_incr )) +arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) + +# Ending archive hour. This is set to the archive hour containing obs at +# the last obs retrieval time of the day. +hh_last=$(echo ${obs_retrieve_times_crnt_day[-1]} | cut -c9-10) +hr_last=$((10#${hh_last})) +if [[ ${hr_last} -eq 0 ]]; then + arcv_hr_end=24 +else + arcv_hr_end=$(( hr_last/arcv_hr_incr )) + arcv_hr_end=$(( arcv_hr_end*arcv_hr_incr )) +fi + +# Check whether any obs files already exist on disk. If so, adjust the +# starting archive hour. In the process, keep a count of the number of +# obs files that already exist on disk. +num_existing_files=0 +for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + day_dir_proc="${basedir_proc}" + fn_proc="sfav2_CONUS_6h_${yyyymmddhh}_grid184.grb2" + fp_proc="${day_dir_proc}/${fn_proc}" + if [[ -f ${fp_proc} ]]; then + num_existing_files=$((num_existing_files+1)) + print_info_msg " +File already exists on disk: + fp_proc = \"${fp_proc}\"" + else + hr=$((10#${hh})) + arcv_hr_start=$(( hr/arcv_hr_incr )) + arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) + print_info_msg " +File does not exist on disk: + fp_proc = \"${fp_proc}\" +Setting the hour (since 00) of the first archive to retrieve to: + arcv_hr_start = \"${arcv_hr_start}\"" + break + fi +done + +# If the number of obs files that already exist on disk is equal to the +# number of obs files needed, then there is no need to retrieve any files. +num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} +if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then + + print_info_msg " +All obs files needed for the current day (yyyymmdd_task) already exist +on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any files." + exit + +# If the number of obs files that already exist on disk is not equal to +# the number of obs files needed, then we will need to retrieve files. +# In this case, set the sequence of hours corresponding to the archives +# from which files will be retrieved. +else + + arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) + arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" + print_info_msg " +At least some obs files needed needed for the current day (yyyymmdd_task) +do not exist on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +The number of obs files needed for the current day (which is equal to the +number of observation retrieval times for the current day) is: + num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} +The number of obs files that already exist on disk is: + num_existing_files = ${num_existing_files} +Will retrieve remaining files by looping over archives corresponding to +the following hours (since 00 of this day): + arcv_hrs = ${arcv_hrs_str} +" + +fi +# +#----------------------------------------------------------------------- +# +# At this point, at least some obs files for the current day need to be +# retrieved. The NOHRSC data on HPSS are archived by day, with the +# archive for a given day containing 6-hour as well as 24-hour grib2 +# files. The four 6-hour files are for accumulated snowfall at 00z +# (which represents accumulation over the last 6 hours of the previous +# day), 06z, 12z, and 18z, while the two 24-hour files are at 00z (which +# represents accumulation over all 24 hours of the previous day) and 12z +# (which represents accumulation over the last 12 hours of the previous +# day plus the first 12 hours of the current day). +# +# Here, we will only obtain the 6-hour files. In other workflow tasks, +# the values in these 6-hour files will be added as necessary to obtain +# accumulations over longer periods (e.g. 24 hours). Since the four +# 6-hour files are in one archive and are relatively small (on the order +# of kilobytes), we get them all with a single call to the retrieve_data.py +# script. +# +#----------------------------------------------------------------------- +# + +# Whether to move or copy files from raw to processed directories. +#mv_or_cp="mv" +mv_or_cp="cp" +# Whether to remove raw observations after processed directories have +# been created from them. +remove_raw_obs="${REMOVE_RAW_OBS_NOHRSC}" +# If the raw directories and files are to be removed at the end of this +# script, no need to copy the files since the raw directories are going +# to be removed anyway. +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then + mv_or_cp="mv" +fi + +# Base directory that will contain the daily subdirectories in which the +# NOHRSC grib2 files retrieved from archive (tar) files will be placed. +# We refer to this as the "raw" base directory because it contains files +# as they are found in the archives before any processing by this script. +basedir_raw="${basedir_proc}/raw_${yyyymmdd_task}" + +for arcv_hr in ${arcv_hrs[@]}; do + + print_info_msg " +arcv_hr = ${arcv_hr}" + + # Calculate the time information for the current archive. + yyyymmddhh_arcv=$(${DATE_UTIL} --date "${yyyymmdd_task} ${arcv_hr} hours" +%Y%m%d%H) + yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) + hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) + + # Directory that will contain the grib2 files retrieved from the current + # archive file. We refer to this as the "raw" archive directory because + # it will contain the files as they are in the archive before any processing + # by this script. + arcv_dir_raw="${basedir_raw}/${yyyymmdd_arcv}" + + # Check whether any of the obs retrieval times for the day associated with + # this task fall in the time interval spanned by the current archive. If + # so, set the flag (do_retrieve) to retrieve the files in the current + # archive. + arcv_contents_yyyymmddhh_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv}" +%Y%m%d%H) + hrs=$((arcv_hr_incr - 1)) + arcv_contents_yyyymmddhh_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs} hours" +%Y%m%d%H) + do_retrieve="FALSE" + for (( i=0; i<${num_obs_retrieve_times_crnt_day}; i++ )); do + obs_retrieve_time=${obs_retrieve_times_crnt_day[i]} + if [[ "${obs_retrieve_time}" -ge "${arcv_contents_yyyymmddhh_start}" ]] && \ + [[ "${obs_retrieve_time}" -le "${arcv_contents_yyyymmddhh_end}" ]]; then + do_retrieve="TRUE" + break + fi + done + + if [[ $(boolify "${do_retrieve}") != "TRUE" ]]; then + + print_info_msg " +None of the times in the current day (or hour 00 of the next day) at which +obs need to be retrieved fall in the range spanned by the current ${arcv_hr_incr}-hourly +archive file. The bounds of the data in the current archive file are: + arcv_contents_yyyymmddhh_start = \"${arcv_contents_yyyymmddhh_start}\" + arcv_contents_yyyymmddhh_end = \"${arcv_contents_yyyymmddhh_end}\" +The times at which obs need to be retrieved are: + obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" + + else + + # Make sure the raw archive directory exists because it is used below as + # the output directory of the retrieve_data.py script (so if this directory + # doesn't already exist, that script will fail). Creating this directory + # also ensures that the raw base directory (basedir_raw) exists before we + # change location to it below. + mkdir -p ${arcv_dir_raw} + + # The retrieve_data.py script first extracts the contents of the archive + # file into the directory it was called from and then moves them to the + # specified output location (via the --output_path option). In order to + # avoid other get_obs_ccpa tasks (i.e. those associated with other days) + # from interfering with (clobbering) these files (because extracted files + # from different get_obs_ccpa tasks to have the same names or relative + # paths), we change location to the base raw directory so that files with + # same names are extracted into different directories. + cd ${basedir_raw} + + # Pull obs from HPSS. This will get all the obs files in the current + # archive and place them in the raw archive directory. + cmd=" + python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${yyyymmddhh_arcv} \ + --data_stores hpss \ + --data_type NOHRSC_obs \ + --output_path ${arcv_dir_raw} \ + --summary_file retrieve_data.log" + + print_info_msg "CALLING: ${cmd}" + $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." + + # Create the processed NOHRSC grib2 files. This consists of simply copying + # or moving them from the raw daily directory to the processed directory. + for hrs in $(seq 0 6 18); do + yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs} hours" +%Y%m%d%H) + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + # Create the processed grib2 obs file from the raw one (by moving, copying, + # or otherwise) only if the time of the current file in the current archive + # also exists in the list of obs retrieval times for the current day. + if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + fn_raw="sfav2_CONUS_6h_${yyyymmddhh}_grid184.grb2" + fp_raw="${arcv_dir_raw}/${fn_raw}" + day_dir_proc="${basedir_proc}" + mkdir -p ${day_dir_proc} + fn_proc="${fn_raw}" + #fn_proc="sfav2_CONUS_6h_${yyyymmddhh}_grid184.grb2" + fp_proc="${day_dir_proc}/${fn_proc}" + ${mv_or_cp} ${fp_raw} ${fp_proc} + fi + done + + fi + +done +# +#----------------------------------------------------------------------- +# +# Clean up raw obs directories. +# +#----------------------------------------------------------------------- +# +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then + print_info_msg "Removing raw obs directories..." + rm -rf ${basedir_raw} || print_err_msg_exit "\ +Failed to remove raw obs directories." +fi From 80f2a1258c4e8053e9806af41f6d6548aea718c1 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 10:44:50 -0600 Subject: [PATCH 073/208] Add new variables specifying the time intervals at which different types of obs are available on NOAA HPSS. Use these new variables in file name templates. --- ush/config_defaults.yaml | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index bbddf30874..687463de5f 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2423,6 +2423,13 @@ verification: # METPLUS_VERBOSITY_LEVEL: 2 # + # Time interval (in hours) at which various types of obs are available on + # NOAA's HPSS. + CCPA_OBS_AVAIL_INTVL_HRS: 1 + NOHRSC_OBS_AVAIL_INTVL_HRS: 6 + MRMS_OBS_AVAIL_INTVL_HRS: 1 + NDAS_OBS_AVAIL_INTVL_HRS: 1 + # # Templates for CCPA, MRMS, and NDAS observation files. # # OBS_CCPA_APCP_FN_TEMPLATE: @@ -2448,8 +2455,10 @@ verification: # surface (ADPSFC) or ADP upper air (ADPUPA) fields and then generate # NetCDF versions of these files. # - OBS_CCPA_APCP_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2' - OBS_NOHRSC_ASNOW_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/sfav2_CONUS_${ACCUM_HH}h_{valid?fmt=%Y%m%d%H}_grid184.grb2' + OBS_CCPA_APCP_FN_TEMPLATE: '{%- set data_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} + {{- "{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z." ~ data_intvl_hrs ~ "h.hrap.conus.gb2" }}' + OBS_NOHRSC_ASNOW_FN_TEMPLATE: '{%- set data_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' OBS_MRMS_REFC_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' OBS_MRMS_RETOP_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE: 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' @@ -2467,7 +2476,10 @@ verification: # METplus Pb2nc tool on NDAS observations. (These files will contain # obs ADPSFC or ADPUPA fields in NetCDF format.) # - OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: 'ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2_a${ACCUM_HH}h.nc' + OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set data_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} + {{- "ccpa.t{valid?fmt=%H}z." ~ data_intvl_hrs ~ "h.hrap.conus.gb2_a${ACCUM_HH}h.nc" }}' + OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set data_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2_a${ACCUM_HH}h.nc" }}' OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}.nc' # # VX_FCST_MODEL_NAME: From a0a938c0667ddd2c3cdf76ae1768470e43532020 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 10:48:52 -0600 Subject: [PATCH 074/208] Rearrange the settings for the forecast file templates in config_default.sh so that they're more readable. --- ush/config_defaults.yaml | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 687463de5f..56bd15b814 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2547,9 +2547,25 @@ verification: # both for 1 hour and for > 1 hour accumulation periods, in NetCDF # format.) # - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}/postprd{% endif %}' - FCST_FN_TEMPLATE: '${NET_default}.t{init?fmt=%H?shift=-${time_lag}}z{% if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %}.${ensmem_name}{% endif %}.prslev.f{lead?fmt=%HHH?shift=${time_lag}}.${POST_OUTPUT_DOMAIN_NAME}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${NET_default}.t{init?fmt=%H}z{% if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %}.${ensmem_name}{% endif %}.prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${VAR}_a${ACCUM_HH}h.nc' + FCST_SUBDIR_TEMPLATE: '{%- if user.RUN_ENVIR == "nco" %} + {{- "${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}" }} + {%- else %} + {{- "{init?fmt=%Y%m%d%H?shift=-${time_lag}}" }} + {%- if global.DO_ENSEMBLE %} + {{- "/${ensmem_name}" }} + {%- endif %} + {{- "/postprd" }} + {%- endif %}' + FCST_FN_TEMPLATE: '{{- "${NET_default}.t{init?fmt=%H?shift=-${time_lag}}z" }} + {%- if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %} + {{- ".${ensmem_name}" }} + {%- endif %} + {{- ".prslev.f{lead?fmt=%HHH?shift=${time_lag}}.${POST_OUTPUT_DOMAIN_NAME}.grib2" }}' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- "${NET_default}.t{init?fmt=%H}z" }} + {%- if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %} + {{- ".${ensmem_name}" }} + {%- endif %} + {{- ".prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${VAR}_a${ACCUM_HH}h.nc" }}' # # For verification tasks that need observational data, this specifies # the maximum number of observation files that may be missing. If more From 692255598ff94ef260720c9ffbf21684ce5677d9 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 10:51:31 -0600 Subject: [PATCH 075/208] Allow verification of 12 and 18-hourly accumulated snowfall since these are now obtained by adding 6-hourly in the obs (as opposed to before, where we just got the 6 or 24 hourly obs without the option to add the 6-hourly to get 12 and 18). --- ush/valid_param_vals.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/valid_param_vals.yaml b/ush/valid_param_vals.yaml index 3530b51ae9..1ff5405ffb 100644 --- a/ush/valid_param_vals.yaml +++ b/ush/valid_param_vals.yaml @@ -78,4 +78,4 @@ valid_vals_DO_AQM_SAVE_AIRNOW_HIST: [True, False] valid_vals_COLDSTART: [True, False] valid_vals_VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] valid_vals_VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] -valid_vals_VX_ASNOW_ACCUMS_HRS: [ 6, 24 ] +valid_vals_VX_ASNOW_ACCUMS_HRS: [ 6, 12, 18, 24 ] From cae50b5d8a3ef9d477f6a771eee29cf04dcd0b50 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 12:31:19 -0600 Subject: [PATCH 076/208] In Pb2NC tasks, use the arrays in var_defns.py that specify the times on a given day at which a given types of obs should be retrieved. --- parm/metplus/Pb2nc_obs.conf | 2 +- scripts/exregional_run_met_pcpcombine.sh | 26 ++++++++++++++++-------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/parm/metplus/Pb2nc_obs.conf b/parm/metplus/Pb2nc_obs.conf index 729bf2ba06..24d469602f 100644 --- a/parm/metplus/Pb2nc_obs.conf +++ b/parm/metplus/Pb2nc_obs.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{leadhr_list}} # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 3d4d0cb9fb..9495031722 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -190,11 +190,13 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then elif [ "${FCST_OR_OBS}" = "OBS" ]; then OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE} ) + fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE}) + OBS_INPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" - OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT} ) + fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE_PCPCOMBINE_OUTPUT}) + OUTPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" fi @@ -213,22 +215,27 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then base_dir="${FCST_INPUT_DIR}" fn_template="${FCST_INPUT_FN_TEMPLATE}" num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" + subintvl_accum_hrs="${FCST_OUTPUT_INTVL_HRS}" elif [ "${FCST_OR_OBS}" = "OBS" ]; then base_dir="${OBS_INPUT_DIR}" fn_template="${OBS_INPUT_FN_TEMPLATE}" num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" + subintvl_accum_hrs="${OBS_AVAIL_INTVL_HRS}" fi +input_accum_hh=$(printf "%02d" ${subintvl_accum_hrs}) +vx_output_intvl_hrs="$((10#${ACCUM_HH}))" -set_vx_fhr_list \ - cdate="${CDATE}" \ +set_vx_hrs_list \ + yyyymmddhh_init="${CDATE}" \ fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ + vx_output_intvl_hrs="${vx_output_intvl_hrs}" \ + field_is_cumul="TRUE" \ + check_subintvl_files="TRUE" \ + subintvl_accum_hrs="${subintvl_accum_hrs}" \ base_dir="${base_dir}" \ fn_template="${fn_template}" \ - check_accum_contrib_files="TRUE" \ num_missing_files_max="${num_missing_files_max}" \ - outvarname_fhr_list="FHR_LIST" + outvarname_hrs_list="FHR_LIST" # #----------------------------------------------------------------------- # @@ -358,7 +365,8 @@ settings="\ 'fieldname_in_met_filedir_names': '${FIELDNAME_IN_MET_FILEDIR_NAMES}' 'obtype': '${OBTYPE}' 'FCST_OR_OBS': '${FCST_OR_OBS}' - 'accum_hh': '${ACCUM_HH:-}' + 'input_accum_hh': '${input_accum_hh}' + 'output_accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' 'input_field_group': '${VAR:-}' From fffdbd3739e8c5ac542e6fea63e5d8b53f5d907e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 12:39:24 -0600 Subject: [PATCH 077/208] In the get_obs tasks, use the arrays in var_defns.py that specify the times on a given day at which a given types of obs should be retrieved. Remove the arrays that specify the combined forecast output times for all cycles. --- parm/wflow/verify_det.yaml | 4 + parm/wflow/verify_pre.yaml | 8 +- ush/get_obs_ccpa.sh | 215 ++++++++++++++++++------------------- ush/get_obs_mrms.sh | 47 ++++---- ush/get_obs_ndas.sh | 102 ++++++++++-------- ush/get_obs_nohrsc.sh | 177 ++++-------------------------- 6 files changed, 204 insertions(+), 349 deletions(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index 4f4d4672ce..c4f420f10c 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -42,6 +42,7 @@ metatask_GridStat_CCPA_all_accums_all_mems: OBTYPE: 'CCPA' ENSMEM_INDX: "#mem#" SLASH_ENSMEM_SUBDIR_OR_NULL: '{% if global.DO_ENSEMBLE %}{{ "/mem#mem#" }}{% endif %}' + OBS_AVAIL_INTVL_HRS: '{{- verification.CCPA_OBS_AVAIL_INTVL_HRS }}' FCST_LEVEL: 'A#ACCUM_HH#' FCST_THRESH: 'all' walltime: 02:00:00 @@ -75,6 +76,7 @@ metatask_GridStat_NOHRSC_all_accums_all_mems: OBTYPE: 'NOHRSC' ENSMEM_INDX: "#mem#" SLASH_ENSMEM_SUBDIR_OR_NULL: '{% if global.DO_ENSEMBLE %}{{ "/mem#mem#" }}{% endif %}' + OBS_AVAIL_INTVL_HRS: '{{- verification.NOHRSC_OBS_AVAIL_INTVL_HRS }}' FCST_LEVEL: 'A#ACCUM_HH#' FCST_THRESH: 'all' walltime: 02:00:00 @@ -105,6 +107,7 @@ metatask_GridStat_MRMS_all_mems: OBTYPE: 'MRMS' ENSMEM_INDX: "#mem#" SLASH_ENSMEM_SUBDIR_OR_NULL: '{% if global.DO_ENSEMBLE %}{{ "/mem#mem#" }}{% endif %}' + OBS_AVAIL_INTVL_HRS: '{{- verification.MRMS_OBS_AVAIL_INTVL_HRS }}' FCST_LEVEL: 'L0' FCST_THRESH: 'all' walltime: 02:00:00 @@ -152,6 +155,7 @@ metatask_PointStat_NDAS_all_mems: ACCUM_HH: '01' ENSMEM_INDX: "#mem#" SLASH_ENSMEM_SUBDIR_OR_NULL: '{% if global.DO_ENSEMBLE %}{{ "/mem#mem#" }}{% endif %}' + OBS_AVAIL_INTVL_HRS: '{{- verification.NDAS_OBS_AVAIL_INTVL_HRS }}' FCST_LEVEL: 'all' FCST_THRESH: 'all' walltime: 01:00:00 diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 4a9e750c56..80b70f68d3 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -47,7 +47,6 @@ task_get_obs_nohrsc: <<: *default_vars OBS_DIR: '&NOHRSC_OBS_DIR;' OBTYPE: 'NOHRSC' - OUTPUT_TIMES_ALL: *output_times_all_cumul native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -61,10 +60,6 @@ task_get_obs_mrms: OBS_DIR: '&MRMS_OBS_DIR;' OBTYPE: 'MRMS' MRMS_FIELDS: 'REFC RETOP' - OUTPUT_TIMES_ALL: &output_times_all_inst - '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_INST|length)) %} - {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_INST[i] }} - {%- endfor %}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -77,7 +72,6 @@ task_get_obs_ndas: <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' - OUTPUT_TIMES_ALL: *output_times_all_inst queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' @@ -96,7 +90,6 @@ task_run_MET_Pb2nc_obs: FCST_OR_OBS: OBS OBTYPE: NDAS OBS_DIR: '&NDAS_OBS_DIR;' - OUTPUT_TIMES_ALL: *output_times_all_inst METPLUSTOOLNAME: 'PB2NC' dependency: and: @@ -127,6 +120,7 @@ metatask_PcpCombine_obs: FCST_OR_OBS: OBS OBTYPE: CCPA OBS_DIR: '&CCPA_OBS_DIR;' + OBS_AVAIL_INTVL_HRS: '{{- verification.CCPA_OBS_AVAIL_INTVL_HRS }}' METPLUSTOOLNAME: 'PCPCOMBINE' dependency: and: diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index d3ad4c49ce..aabb55e5a4 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -8,7 +8,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user platform ; do +for sect in user platform verification ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -158,93 +158,71 @@ set -u #----------------------------------------------------------------------- # -# CCPA accumulation period to consider. Here, we only retrieve data for -# 1-hour accumulations. Other accumulations (03h, 06h, 24h) are obtained -# by other tasks in the workflow that add up these hourly values. -accum="01" +# The time interval (in hours) at which the obs are available on HPSS +# must divide evenly into 24. Otherwise, different days would have obs +# available at different hours-of-day. Make sure this is the case. +remainder=$(( 24 % CCPA_OBS_AVAIL_INTVL_HRS )) +if [ ${remainder} -ne 0 ]; then + print_err_msg_exit "\ +The obs availability interval CCPA_OBS_AVAIL_INTVL_HRS must divide evenly +into 24 but doesn't: + CCPA_OBS_AVAIL_INTVL_HRS = ${CCPA_OBS_AVAIL_INTVL_HRS} + mod(24, CCPA_OBS_AVAIL_INTVL_HRS) = ${remainder}" +fi + +# Accumulation period to use when getting obs files. This is simply (a +# properly formatted version of) the obs availability interval. +accum_obs_fmt=$( printf "%02d" "${CCPA_OBS_AVAIL_INTVL_HRS}" ) # The day (in the form YYYMMDD) associated with the current task via the # task's cycledefs attribute in the ROCOTO xml. yyyymmdd_task=${PDY} -# Base directory in which the daily subdirectories containing the CCPA -# grib2 files will appear after this script is done. We refer to this as +# Base directory in which the daily subdirectories containing the grib2 +# obs files will appear after this script is done. We refer to this as # the "processed" base directory because it contains the files after all # processing by this script is complete. basedir_proc=${OBS_DIR} - -# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is -# a scalar string containing all relevant forecast output times (each in -# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings -# because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to the task's script. To have an array-valued variable to -# work with, here, we create the new variable fcst_output_times_all that -# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. -fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) - -# List of times (each of the form YYYYMMDDHH) for which there is forecast -# APCP (accumulated precipitation) output for the current day. We start -# constructing this by extracting from the full list of all forecast APCP -# output times (i.e. from all cycles) all elements that contain the current -# task's day (in the form YYYYMMDD). -fcst_output_times_crnt_day=() -if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) -fi -# If the 0th hour of the current day is in this list (and if it is, it -# will be the first element), remove it because for APCP, that time is -# considered part of the previous day (because it represents precipitation -# that occurred during the last hour of the previous day). -if [[ ${#fcst_output_times_crnt_day[@]} -gt 0 ]] && \ - [[ ${fcst_output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then - fcst_output_times_crnt_day=(${fcst_output_times_crnt_day[@]:1}) -fi -# If the 0th hour of the next day (i.e. the day after yyyymmdd_task) is -# one of the output times in the list of all APCP output times, we include -# it in the list for the current day because for APCP, that time is -# considered part of the current day (because it represents precipitation -# that occured during the last hour of the current day). -yyyymmdd00_task_p1d=$(${DATE_UTIL} --date "${yyyymmdd_task} 1 day" +%Y%m%d%H) -if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd00_task_p1d} ]]; then - fcst_output_times_crnt_day+=(${yyyymmdd00_task_p1d}) -fi - -# If there are no forecast APCP output times on the day of the current -# task, exit the script. -num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} -if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then - print_info_msg " -None of the forecast APCP output times fall within the day (including the -0th hour of the next day) associated with the current task (yyyymmdd_task): - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any obs files." - exit -fi - +# +#----------------------------------------------------------------------- +# +# Get the list of all the times in the current day at which to retrieve +# obs. This is an array with elements having format "YYYYMMDDHH". +# +#----------------------------------------------------------------------- +# +array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" +eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) +# +#----------------------------------------------------------------------- +# # Obs files will be obtained by extracting them from the relevant 6-hourly # archives. Thus, we need the sequence of archive hours over which to # loop. In the simplest case, this sequence will be "6 12 18 24". This -# will be the case if the forecast output times include all hours of the -# task's day and if none of the obs files for this day already exist on -# disk. In other cases, the sequence we loop over will be a subset of -# "6 12 18 24". +# will be the case if the observation retrieval times include all hours +# of the task's day and if none of the obs files for this day already +# exist on disk. In other cases, the sequence we loop over will be a +# subset of "6 12 18 24". # # To generate this sequence, we first set its starting and ending values # as well as the interval. +# +#----------------------------------------------------------------------- +# # Sequence interval must be 6 hours because the archives are 6-hourly. arcv_hr_incr=6 -# Initial guess for starting archive hour. This is set to the archive -# hour containing obs at the first forecast output time of the day. -hh_first=$(echo ${fcst_output_times_crnt_day[0]} | cut -c9-10) +# Initial guess for starting archive hour. This is set to the archive +# hour containing obs at the first obs retrieval time of the day. +hh_first=$(echo ${obs_retrieve_times_crnt_day[0]} | cut -c9-10) hr_first=$((10#${hh_first})) arcv_hr_start=$(ceil ${hr_first} ${arcv_hr_incr}) arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) # Ending archive hour. This is set to the archive hour containing obs at -# the last forecast output time of the day. -hh_last=$(echo ${fcst_output_times_crnt_day[-1]} | cut -c9-10) +# the last obs retrieval time of the day. +hh_last=$(echo ${obs_retrieve_times_crnt_day[-1]} | cut -c9-10) hr_last=$((10#${hh_last})) if [[ ${hr_last} -eq 0 ]]; then arcv_hr_end=24 @@ -257,11 +235,11 @@ fi # starting archive hour. In the process, keep a count of the number of # obs files that already exist on disk. num_existing_files=0 -for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do +for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) day_dir_proc="${basedir_proc}/${yyyymmdd}" - fn_proc="ccpa.t${hh}z.${accum}h.hrap.conus.gb2" + fn_proc="ccpa.t${hh}z.${accum_obs_fmt}h.hrap.conus.gb2" fp_proc="${day_dir_proc}/${fn_proc}" if [[ -f ${fp_proc} ]]; then num_existing_files=$((num_existing_files+1)) @@ -273,7 +251,7 @@ File already exists on disk: arcv_hr_start=$(ceil ${hr} ${arcv_hr_incr}) arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) print_info_msg " -File does not exists on disk: +File does not exist on disk: fp_proc = \"${fp_proc}\" Setting the hour (since 00) of the first archive to retrieve to: arcv_hr_start = \"${arcv_hr_start}\"" @@ -282,32 +260,39 @@ Setting the hour (since 00) of the first archive to retrieve to: done # If the number of obs files that already exist on disk is equal to the -# number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_fcst_output_times_crnt_day)) -if [[ ${num_existing_files} -eq ${num_needed_files} ]]; then +# number of obs files needed, then there is no need to retrieve any files. +num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} +if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then + print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist on disk: yyyymmdd_task = \"${yyyymmdd_task}\" Thus, there is no need to retrieve any files." exit -# Otherwise, will need to retrieve files. In this case, set the sequence -# of hours corresponding to the archives from which files will be retrieved. + +# If the number of obs files that already exist on disk is not equal to +# the number of obs files needed, then we will need to retrieve files. +# In this case, set the sequence of hours corresponding to the archives +# from which files will be retrieved. else + arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" print_info_msg " At least some obs files needed needed for the current day (yyyymmdd_task) do not exist on disk: yyyymmdd_task = \"${yyyymmdd_task}\" -The number of obs files needed is: - num_needed_files = ${num_needed_files} +The number of obs files needed for the current day (which is equal to the +number of observation retrieval times for the current day) is: + num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} The number of obs files that already exist on disk is: num_existing_files = ${num_existing_files} Will retrieve remaining files by looping over archives corresponding to the following hours (since 00 of this day): arcv_hrs = ${arcv_hrs_str} " + fi # #----------------------------------------------------------------------- @@ -348,36 +333,48 @@ arcv_hr = ${arcv_hr}" yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) - # Directory that will contain the CCPA grib2 files retrieved from the - # current 6-hourly archive file. We refer to this as the "raw" quarter- - # daily directory because it will contain the files as they are in the - # archive before any processing by this script. - qrtrday_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" + # Directory that will contain the grib2 files retrieved from the current + # archive file. We refer to this as the "raw" archive directory because + # it will contain the files as they are in the archive before any processing + # by this script. + arcv_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" - # Check whether any of the forecast APCP output times for the day associated - # with this task fall in the time interval spanned by the current archive. - # If so, set the flag (do_retrieve) to retrieve the files in the current + # Check whether any of the obs retrieval times for the day associated with + # this task fall in the time interval spanned by the current archive. If + # so, set the flag (do_retrieve) to retrieve the files in the current # archive. - yyyymmddhh_qrtrday_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 5 hours ago" +%Y%m%d%H) - yyyymmddhh_qrtrday_end=${yyyymmddhh_arcv} + hrs_ago=$((arcv_hr_incr - 1)) + arcv_contents_yyyymmddhh_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) + arcv_contents_yyyymmddhh_end=${yyyymmddhh_arcv} do_retrieve="FALSE" - for (( i=0; i<${num_fcst_output_times_crnt_day}; i++ )); do - output_time=${fcst_output_times_crnt_day[i]} - if [[ "${output_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ - [[ "${output_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then + for (( i=0; i<${num_obs_retrieve_times_crnt_day}; i++ )); do + obs_retrieve_time=${obs_retrieve_times_crnt_day[i]} + if [[ "${obs_retrieve_time}" -ge "${arcv_contents_yyyymmddhh_start}" ]] && \ + [[ "${obs_retrieve_time}" -le "${arcv_contents_yyyymmddhh_end}" ]]; then do_retrieve="TRUE" break fi done - if [[ $(boolify "${do_retrieve}") == "TRUE" ]]; then + if [[ $(boolify "${do_retrieve}") != "TRUE" ]]; then + + print_info_msg " +None of the times in the current day (or hour 00 of the next day) at which +obs need to be retrieved fall in the range spanned by the current ${arcv_hr_incr}-hourly +archive file. The bounds of the data in the current archive file are: + arcv_contents_yyyymmddhh_start = \"${arcv_contents_yyyymmddhh_start}\" + arcv_contents_yyyymmddhh_end = \"${arcv_contents_yyyymmddhh_end}\" +The times at which obs need to be retrieved are: + obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" + + else - # Make sure the raw quarter-daily directory exists because it is used - # below as the output directory of the retrieve_data.py script (so if - # this directory doesn't already exist, that script will fail). Creating - # this directory also ensures that the raw base directory (basedir_raw) - # exists before we change location to it below. - mkdir -p ${qrtrday_dir_raw} + # Make sure the raw archive directory exists because it is used below as + # the output directory of the retrieve_data.py script (so if this directory + # doesn't already exist, that script will fail). Creating this directory + # also ensures that the raw base directory (basedir_raw) exists before we + # change location to it below. + mkdir -p ${arcv_dir_raw} # The retrieve_data.py script first extracts the contents of the archive # file into the directory it was called from and then moves them to the @@ -389,8 +386,8 @@ arcv_hr = ${arcv_hr}" # same names are extracted into different directories. cd ${basedir_raw} - # Pull CCPA data from HPSS. This will get all 6 obs files in the current - # archive and place them in the raw quarter-daily directory. + # Pull obs from HPSS. This will get all the obs files in the current + # archive and place them in the raw archive directory. cmd=" python3 -u ${USHdir}/retrieve_data.py \ --debug \ @@ -399,7 +396,7 @@ arcv_hr = ${arcv_hr}" --cycle_date ${yyyymmddhh_arcv} \ --data_stores hpss \ --data_type CCPA_obs \ - --output_path ${qrtrday_dir_raw} \ + --output_path ${arcv_dir_raw} \ --summary_file retrieve_data.log" print_info_msg "CALLING: ${cmd}" @@ -415,9 +412,12 @@ arcv_hr = ${arcv_hr}" yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - if [[ ${fcst_output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then - fn_raw="ccpa.t${hh}z.${accum}h.hrap.conus.gb2" - fp_raw="${qrtrday_dir_raw}/${fn_raw}" + # Create the processed grib2 obs file from the raw one (by moving, copying, + # or otherwise) only if the time of the current file in the current archive + # also exists in the list of obs retrieval times for the current day. + if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + fn_raw="ccpa.t${hh}z.${accum_obs_fmt}h.hrap.conus.gb2" + fp_raw="${arcv_dir_raw}/${fn_raw}" day_dir_proc="${basedir_proc}/${yyyymmdd}" mkdir -p ${day_dir_proc} fn_proc="${fn_raw}" @@ -435,17 +435,6 @@ arcv_hr = ${arcv_hr}" fi done - else - - print_info_msg " -None of the current day's forecast APCP output times fall in the range -spanned by the current 6-hourly archive file. The bounds of the current -archive are: - yyyymmddhh_qrtrday_start = \"${yyyymmddhh_qrtrday_start}\" - yyyymmddhh_qrtrday_end = \"${yyyymmddhh_qrtrday_end}\" -The forecast output times for APCP are: - fcst_output_times_crnt_day = ($(printf "\"%s\" " ${fcst_output_times_crnt_day[@]}))" - fi done diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index af3b8ca942..a0d0590667 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -8,7 +8,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user platform ; do +for sect in user platform verification ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -84,31 +84,24 @@ yyyymmdd_task=${PDY} # as the "processed" base directory because it contains the files after # all processing by this script is complete. basedir_proc=${OBS_DIR} +# +#----------------------------------------------------------------------- +# +# Get the list of all the times in the current day at which to retrieve +# obs. This is an array with elements having format "YYYYMMDDHH". +# +#----------------------------------------------------------------------- +# +array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" +eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) -# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is -# a scalar string containing all relevant forecast output times (each in -# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings -# because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to the task's script. To have an array-valued variable to -# work with, here, we create the new variable fcst_output_times_all that -# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. -fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) - -# List of times (each of the form YYYYMMDDHH) for which there is forecast -# output for the current day. We extract this list from the full list of -# all forecast output times (i.e. from all cycles). -fcst_output_times_crnt_day=() -if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) -fi - -# If there are no forecast output times on the day of the current task, -# exit the script. -num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} -if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then +# If there are no observation retrieval times on the day of the current +# task, exit the script. +num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} +if [[ ${num_obs_retrieve_times_crnt_day} -eq 0 ]]; then print_info_msg " -None of the forecast output times fall within the day associated with the -current task (yyyymmdd_task): +None of the observation retrieval times fall within the day associated +with the current task (yyyymmdd_task): yyyymmdd_task = \"${yyyymmdd_task}\" Thus, there is no need to retrieve any obs files." exit @@ -120,7 +113,7 @@ fi num_existing_files=0 num_mrms_fields=${#mrms_fields[@]} for (( i=0; i<${num_mrms_fields}; i++ )); do - for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do + for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) day_dir_proc="${basedir_proc}/${yyyymmdd}" @@ -139,7 +132,7 @@ done # If the number of obs files that already exist on disk is equal to the # number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_fcst_output_times_crnt_day*num_mrms_fields)) +num_needed_files=$((num_obs_retrieve_times_crnt_day*num_mrms_fields)) if [[ ${num_existing_files} -eq $((num_needed_files)) ]]; then print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist @@ -246,7 +239,7 @@ $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." # process renaming it) to the processed location. for hr in $(seq 0 1 23); do yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_task} ${hr} hours" +%Y%m%d%H) - if [[ ${fcst_output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then for (( i=0; i<${num_mrms_fields}; i++ )); do python ${USHdir}/mrms_pull_topofhour.py \ --valid_time ${yyyymmddhh} \ diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index 8b0c87b3eb..7ab6fc652b 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -8,7 +8,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user platform ; do +for sect in user platform verification ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -49,6 +49,18 @@ set -u #----------------------------------------------------------------------- # +# The time interval (in hours) at which the obs are available on HPSS +# must divide evenly into 24. Otherwise, different days would have obs +# available at different hours-of-day. Make sure this is the case. +remainder=$(( 24 % NDAS_OBS_AVAIL_INTVL_HRS )) +if [ ${remainder} -ne 0 ]; then + print_err_msg_exit "\ +The obs availability interval NDAS_OBS_AVAIL_INTVL_HRS must divide evenly +into 24 but doesn't: + NDAS_OBS_AVAIL_INTVL_HRS = ${NDAS_OBS_AVAIL_INTVL_HRS} + mod(24, NDAS_OBS_AVAIL_INTVL_HRS) = ${remainder}" +fi + # The day (in the form YYYMMDD) associated with the current task via the # task's cycledefs attribute in the ROCOTO xml. yyyymmdd_task=${PDY} @@ -58,31 +70,28 @@ yyyymmdd_task=${PDY} # as the "processed" base directory because it contains the files after # all processing by this script is complete. basedir_proc=${OBS_DIR} +# +#----------------------------------------------------------------------- +# +# Get the list of all the times in the current day at which to retrieve +# obs. This is an array with elements having format "YYYYMMDDHH". +# +#----------------------------------------------------------------------- +# +array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" +eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) + + + -# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is -# a scalar string containing all relevant forecast output times (each in -# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings -# because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to the task's script. To have an array-valued variable to -# work with, here, we create the new variable fcst_output_times_all that -# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. -fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) - -# List of times (each of the form YYYYMMDDHH) for which there is forecast -# output for the current day. We extract this list from the full list of -# all forecast output times (i.e. from all cycles). -fcst_output_times_crnt_day=() -if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) -fi -# If there are no forecast output times on the day of the current task, -# exit the script. -num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} -if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then +# If there are no observation retrieval times on the day of the current +# task, exit the script. +num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} +if [[ ${num_obs_retrieve_times_crnt_day} -eq 0 ]]; then print_info_msg " -None of the forecast output times fall within the day associated with the -current task (yyyymmdd_task): +None of the observation retrieval times fall within the day associated +with the current task (yyyymmdd_task): yyyymmdd_task = \"${yyyymmdd_task}\" Thus, there is no need to retrieve any obs files." exit @@ -91,10 +100,10 @@ fi # Obs files will be obtained by extracting them from the relevant 6-hourly # archives. Thus, we need the sequence of archive hours over which to # loop. In the simplest case, this sequence will be "6 12 18 24". This -# will be the case if the forecast output times include all hours of the -# task's day and if none of the obs files for this day already exist on -# disk. In other cases, the sequence we loop over will be a subset of -# "6 12 18 24". +# will be the case if the observation retrieval times include all hours +# of the task's day and if none of the obs files for this day already +# exist on disk. In other cases, the sequence we loop over will be a +# subset of "6 12 18 24". # # To generate this sequence, we first set its starting and ending values # as well as the interval. @@ -103,14 +112,14 @@ fi arcv_hr_incr=6 # Initial guess for starting archive hour. This is set to the archive -# hour containing obs at the first forecast output time of the day. -hh_first=$(echo ${fcst_output_times_crnt_day[0]} | cut -c9-10) +# hour containing obs at the first observation retrieval time of the day. +hh_first=$(echo ${obs_retrieve_times_crnt_day[0]} | cut -c9-10) hr_first=$((10#${hh_first})) arcv_hr_start=$(( (hr_first/arcv_hr_incr + 1)*arcv_hr_incr )) # Ending archive hour. This is set to the archive hour containing obs at -# the last forecast output time of the day. -hh_last=$(echo ${fcst_output_times_crnt_day[-1]} | cut -c9-10) +# the last observation retrieval time of the day. +hh_last=$(echo ${obs_retrieve_times_crnt_day[-1]} | cut -c9-10) hr_last=$((10#${hh_last})) arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) @@ -118,7 +127,7 @@ arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) # starting archive hour. In the process, keep a count of the number of # obs files that already exist on disk. num_existing_files=0 -for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do +for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) day_dir_proc="${basedir_proc}" @@ -143,7 +152,7 @@ done # If the number of obs files that already exist on disk is equal to the # number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_fcst_output_times_crnt_day)) +num_needed_files=$((num_obs_retrieve_times_crnt_day)) if [[ ${num_existing_files} -eq ${num_needed_files} ]]; then print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist @@ -214,17 +223,18 @@ arcv_hr = ${arcv_hr}" # archive before any processing by this script. qrtrday_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" - # Check whether any of the forecast output times for the day associated - # with this task fall in the time interval spanned by the current archive. - # If so, set the flag (do_retrieve) to retrieve the files in the current + # Check whether any of the observation retrieval times for the day + # associated with this task fall in the time interval spanned by the + # current archive. If so, set the flag (do_retrieve) to retrieve the + # files in the current # archive. yyyymmddhh_qrtrday_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 6 hours ago" +%Y%m%d%H) yyyymmddhh_qrtrday_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 1 hours ago" +%Y%m%d%H) do_retrieve="FALSE" - for (( i=0; i<${num_fcst_output_times_crnt_day}; i++ )); do - output_time=${fcst_output_times_crnt_day[i]} - if [[ "${output_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ - [[ "${output_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then + for (( i=0; i<${num_obs_retrieve_times_crnt_day}; i++ )); do + retrieve_time=${obs_retrieve_times_crnt_day[i]} + if [[ "${retrieve_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ + [[ "${retrieve_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then do_retrieve="TRUE" break fi @@ -275,7 +285,7 @@ arcv_hr = ${arcv_hr}" yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - if [[ ${fcst_output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then fn_raw="nam.t${hh_arcv}z.prepbufr.tm${hrs_ago}.nr" fp_raw="${qrtrday_dir_raw}/${fn_raw}" day_dir_proc="${basedir_proc}" @@ -289,13 +299,13 @@ arcv_hr = ${arcv_hr}" else print_info_msg " -None of the current day's forecast output times fall in the range spanned -by the current 6-hourly archive file. The bounds of the current archive -are: +None of the current day's observation retrieval times fall in the range +spanned by the current 6-hourly archive file. The bounds of the current +archive are: yyyymmddhh_qrtrday_start = \"${yyyymmddhh_qrtrday_start}\" yyyymmddhh_qrtrday_end = \"${yyyymmddhh_qrtrday_end}\" -The forecast output times are: - fcst_output_times_crnt_day = ($(printf "\"%s\" " ${fcst_output_times_crnt_day[@]}))" +The observation retrieval times are: + obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" fi diff --git a/ush/get_obs_nohrsc.sh b/ush/get_obs_nohrsc.sh index 910cf3c35a..c71266ed07 100755 --- a/ush/get_obs_nohrsc.sh +++ b/ush/get_obs_nohrsc.sh @@ -46,86 +46,9 @@ set -u #----------------------------------------------------------------------- # -# The day (in the form YYYMMDD) associated with the current task via the -# task's cycledefs attribute in the ROCOTO xml. -yyyymmdd_task=${PDY} - -# Base directory in which the daily subdirectories containing the grib2 -# obs files will appear after this script is done. We refer to this as -# the "processed" base directory because it contains the files after all -# processing by this script is complete. -basedir_proc=${OBS_DIR} -# -#----------------------------------------------------------------------- -# -# Generate a list of forecast output times for the current day. Note -# that if the 0th hour of the next day (i.e. the day after the one -# associated with this task) is one of the forecast output times, we -# include it in the list for the current day because the accumulation -# associated with that hour occurred during the current day. -# -#----------------------------------------------------------------------- -# - -# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is -# a scalar string containing all relevant forecast output times (each in -# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings -# because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to the task's script. To have an array-valued variable to -# work with, here, we create the new variable fcst_output_times_all that -# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. -fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) - -# List of times (each of the form YYYYMMDDHH) for which there is forecast -# ASNOW (accumulated snow) output for the current day. We start constructing -# this by extracting from the full list of all forecast ASNOW output times -# (i.e. from all cycles) all elements that contain the current task's day -# (in the form YYYYMMDD). -fcst_output_times_crnt_day=() -if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) -fi -# If the 0th hour of the current day is in this list (and if it is, it -# will be the first element), remove it because for ASNOW, that time is -# considered part of the previous day (because it represents snowfall -# that occurred during the last hour of the previous day). -if [[ ${#fcst_output_times_crnt_day[@]} -gt 0 ]] && \ - [[ ${fcst_output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then - fcst_output_times_crnt_day=(${fcst_output_times_crnt_day[@]:1}) -fi -# If the 0th hour of the next day (i.e. the day after yyyymmdd_task) is -# one of the output times in the list of all ASNOW output times, we -# include it in the list for the current day because for ASNOW, that time -# is considered part of the current day (because it represents snowfall -# that occured during the last hour of the current day). -yyyymmdd00_task_p1d=$(${DATE_UTIL} --date "${yyyymmdd_task} 1 day" +%Y%m%d%H) -if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd00_task_p1d} ]]; then - fcst_output_times_crnt_day+=(${yyyymmdd00_task_p1d}) -fi - -# If there are no forecast ASNOW output times on the day of the current -# task, exit the script. -num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} -if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then - print_info_msg " -None of the forecast ASNOW output times fall within the day (including the -0th hour of the next day) associated with the current task (yyyymmdd_task): - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any obs files." - exit -fi -# -#----------------------------------------------------------------------- -# -# Generate a list of all the times at which obs are available for the -# current day, possibly including hour 00 of the next day. -# -#----------------------------------------------------------------------- -# - # The time interval (in hours) at which the obs are available on HPSS -# must be evenly divisible into 24. Otherwise, different days would -# have obs available at different hours. Make sure this is the case. +# must divide evenly into 24. Otherwise, different days would have obs +# available at different hours-of-day. Make sure this is the case. remainder=$(( 24 % NOHRSC_OBS_AVAIL_INTVL_HRS )) if [ ${remainder} -ne 0 ]; then print_err_msg_exit "\ @@ -135,93 +58,36 @@ into 24 but doesn't: mod(24, NOHRSC_OBS_AVAIL_INTVL_HRS) = ${remainder}" fi -# Construct the array of times during the current day (and possibly -# during hour 00 of the next day) at which obs are available on HPSS. -# Each element of this array is of the form "YYYYMMDDHH". -num_obs_avail_times=$((24/NOHRSC_OBS_AVAIL_INTVL_HRS)) -obs_avail_times_crnt_day=() -# Note: Start at i=1 because the output for hour 00 of the current day is -# considered part of the previous day (because it represents accumulation -# that occurred during the previous day). -for (( i=1; i<$((num_obs_avail_times+1)); i++ )); do - hrs=$((i*NOHRSC_OBS_AVAIL_INTVL_HRS)) - obs_avail_times_crnt_day+=( $(${DATE_UTIL} --date "${yyyymmdd_task} ${hrs} hours" +%Y%m%d%H) ) -done -# -#----------------------------------------------------------------------- -# -# Generate a list of all the times at which to retrieve obs. This is -# obtained from the intersection of the list of times at which there is -# forecast output and the list of times at which there are obs available. -# Note that if the forecast output is more frequent than the data is -# available, then the forecast values must be accumulated together to -# get values at the times at which the obs are available. This is done -# in another workflow task using the METplus tool PcpCombine. -# -#----------------------------------------------------------------------- -# -obs_retrieve_times_crnt_day=() -for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do - if [[ ${obs_avail_times_crnt_day[@]} =~ ${yyyymmddhh} ]] ; then - obs_retrieve_times_crnt_day+=(${yyyymmddhh}) - fi -done +# Accumulation period to use when getting obs files. This is simply (a +# properly formatted version of) the obs availability interval. +accum_obs_fmt=$( printf "%d" "${NOHRSC_OBS_AVAIL_INTVL_HRS}" ) + +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. +yyyymmdd_task=${PDY} + +# Base directory in which the daily subdirectories containing the grib2 +# obs files will appear after this script is done. We refer to this as +# the "processed" base directory because it contains the files after all +# processing by this script is complete. +basedir_proc=${OBS_DIR} # #----------------------------------------------------------------------- # -# +# Get the list of all the times in the current day at which to retrieve +# obs. This is an array with elements having format "YYYYMMDDHH". # #----------------------------------------------------------------------- # array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" -eval obs_retrieve_times=\( \${${array_name}[@]} \) -echo -echo "QQQQQQQQQQQQQQQQQQQ" -#echo "obs_retrieve_times = |${obs_retrieve_times[@]}|" -echo "obs_retrieve_times =" -echo "|${obs_retrieve_times[@]}|" - -# For testing. -#obs_retrieve_times+=('abcd') -#obs_retrieve_times[4]='abcd' - -err_msg=" -The two methods of obtaining the array of obs retrieve times don't match: - obs_retrieve_times_crnt_day = - (${obs_retrieve_times_crnt_day[@]}) - obs_retrieve_times = - (${obs_retrieve_times[@]})" - -n1=${#obs_retrieve_times_crnt_day[@]} -n2=${#obs_retrieve_times[@]} -if [ ${n1} -ne ${n2} ]; then - print_err_msg_exit "${err_msg}" -fi - -for (( i=0; i<${n1}; i++ )); do - elem1=${obs_retrieve_times_crnt_day[$i]} - elem2=${obs_retrieve_times[$i]} - if [ ${elem1} != ${elem2} ]; then - print_err_msg_exit "${err_msg}" - fi -done - -obs_retrieve_times_crnt_day=($( printf "%s " "${obs_retrieve_times[@]}" )) - -echo -echo "RRRRRRRRRRRRRRRRR" -#echo "obs_retrieve_times_crnt_day = |${obs_retrieve_times_crnt_day[@]}|" -echo "obs_retrieve_times_crnt_day =" -echo "|${obs_retrieve_times_crnt_day[@]}|" - -#exit 1 +eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) # #----------------------------------------------------------------------- # # Obs files will be obtained by extracting them from the relevant 24-hourly # archives. Thus, we need the sequence of archive hours over which to # loop. In the simplest case, this sequence will be "0 24". This will -# be the case if the forecast output times include all hours of the +# be the case if the observation retrieval times include all hours of the # task's day and if none of the obs files for this day already exist on # disk. In other cases, the sequence we loop over will be a subset of # "0 24", e.g. just "0" or just "24". @@ -261,7 +127,7 @@ for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) day_dir_proc="${basedir_proc}" - fn_proc="sfav2_CONUS_6h_${yyyymmddhh}_grid184.grb2" + fn_proc="sfav2_CONUS_${accum_obs_fmt}h_${yyyymmddhh}_grid184.grb2" fp_proc="${day_dir_proc}/${fn_proc}" if [[ -f ${fp_proc} ]]; then num_existing_files=$((num_existing_files+1)) @@ -447,12 +313,11 @@ The times at which obs need to be retrieved are: # or otherwise) only if the time of the current file in the current archive # also exists in the list of obs retrieval times for the current day. if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then - fn_raw="sfav2_CONUS_6h_${yyyymmddhh}_grid184.grb2" + fn_raw="sfav2_CONUS_${accum_obs_fmt}h_${yyyymmddhh}_grid184.grb2" fp_raw="${arcv_dir_raw}/${fn_raw}" day_dir_proc="${basedir_proc}" mkdir -p ${day_dir_proc} fn_proc="${fn_raw}" - #fn_proc="sfav2_CONUS_6h_${yyyymmddhh}_grid184.grb2" fp_proc="${day_dir_proc}/${fn_proc}" ${mv_or_cp} ${fp_raw} ${fp_proc} fi From 31a529c569825da84278afe240c06407e98f4278 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 12:50:44 -0600 Subject: [PATCH 078/208] Add file (exregional_run_met_pb2nc_obs.sh) that should have been part of commit hash cae50b5d8, and remove file (exregional_run_met_pcpcombine.sh) that should NOT have been. --- scripts/exregional_run_met_pb2nc_obs.sh | 81 ++++++++++-------------- scripts/exregional_run_met_pcpcombine.sh | 26 +++----- 2 files changed, 41 insertions(+), 66 deletions(-) diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index 63d530f370..494ce74a3d 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -74,40 +74,22 @@ to convert NDAS prep buffer observation files to NetCDF format. # #----------------------------------------------------------------------- # -# +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. # #----------------------------------------------------------------------- # -# The day (in the form YYYMMDD) associated with the current task via the -# task's cycledefs attribute in the ROCOTO xml. yyyymmdd_task=${PDY} - -# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a -# scalar string containing all relevant forecast output times (each) in -# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings -# because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to task's script. To have an array-valued variable to -# work with, here, we create the new variable output_times_all that is -# the array-valued counterpart of OUTPUT_TIMES_ALL. -output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) - -# List of times (each of the form YYYYMMDDHH) for which there is forecast -# output for the current day. We extract this list from the full list of -# all forecast output times (i.e. from all cycles). -output_times_crnt_day=() -if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) -fi - -num_output_times_crnt_day=${#output_times_crnt_day[@]} -if [[ ${num_output_times_crnt_day} -eq 0 ]]; then - print_info_msg " -None of the forecast output times fall within the day associated with the -current task (yyyymmdd_task): - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to run ${METPLUSTOOLNAME} on any prepbufr files." - exit -fi +# +#----------------------------------------------------------------------- +# +# Get the list of all the times in the current day at which to retrieve +# obs. This is an array with elements having format "YYYYMMDDHH". +# +#----------------------------------------------------------------------- +# +array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" +eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) # #----------------------------------------------------------------------- # @@ -159,31 +141,32 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${MetplusToolName}_obs" # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. +# Set the array of lead hours (relative to the date associated with this +# task) for which to run the MET/METplus tool. # #----------------------------------------------------------------------- # -FHR_LIST="" +LEADHR_LIST="" num_missing_files=0 -for yyyymmddhh in ${output_times_crnt_day[@]}; do +for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) fn="prepbufr.ndas.${yyyymmddhh}" fp="${OBS_INPUT_DIR}/${fn}" if [[ -f "${fp}" ]]; then print_info_msg " -Found ${OBTYPE} obs file corresponding to forecast output time (yyyymmddhh): +Found ${OBTYPE} obs file corresponding to observation retrieval time +(yyyymmddhh): yyyymmddhh = \"${yyyymmddhh}\" fp = \"${fp}\" " hh_noZero=$((10#${hh})) - #FHR_LIST+=("${yyyymmddhh}") - FHR_LIST="${FHR_LIST},${hh_noZero}" + LEADHR_LIST="${LEADHR_LIST},${hh_noZero}" else num_missing_files=$((num_missing_files+1)) print_info_msg " -${OBTYPE} obs file corresponding to forecast output time (yyyymmddhh) does -not exist on disk: +${OBTYPE} obs file corresponding to observation retrieval time (yyyymmddhh) +does not exist on disk: yyyymmddhh = \"${yyyymmddhh}\" fp = \"${fp}\" Removing this time from the list of times to be processed by ${METPLUSTOOLNAME}. @@ -202,12 +185,12 @@ than the maximum allowed number (num_missing_files_max): num_missing_files_max = ${num_missing_files_max}" fi -# Remove leading comma from FHR_LIST. -FHR_LIST=$( echo "${FHR_LIST}" | $SED "s/^,//g" ) +# Remove leading comma from LEADHR_LIST. +LEADHR_LIST=$( echo "${LEADHR_LIST}" | $SED "s/^,//g" ) print_info_msg "$VERBOSE" "\ -Final (i.e. after filtering for missing files) set of forecast hours +Final (i.e. after filtering for missing obs files) set of lead hours (saved in a scalar string variable) is: - FHR_LIST = \"${FHR_LIST}\" + LEADHR_LIST = \"${LEADHR_LIST}\" " # #----------------------------------------------------------------------- @@ -242,15 +225,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" +The list of lead hours for which to run METplus is empty: + LEADHR_LIST = [${LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -314,10 +297,10 @@ settings="\ 'METPLUS_TOOL_NAME': '${METPLUS_TOOL_NAME}' 'metplus_verbosity_level': '${METPLUS_VERBOSITY_LEVEL}' # -# Date and forecast hour information. +# Date and lead hour information. # 'cdate': '$CDATE' - 'fhr_list': '${FHR_LIST}' + 'leadhr_list': '${LEADHR_LIST}' # # Input and output directory/file information. # diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 9495031722..3d4d0cb9fb 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -190,13 +190,11 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then elif [ "${FCST_OR_OBS}" = "OBS" ]; then OBS_INPUT_DIR="${OBS_DIR}" - fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE}) - OBS_INPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) + OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE} ) OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" - fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE_PCPCOMBINE_OUTPUT}) - OUTPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) + OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT} ) STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" fi @@ -215,27 +213,22 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then base_dir="${FCST_INPUT_DIR}" fn_template="${FCST_INPUT_FN_TEMPLATE}" num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" - subintvl_accum_hrs="${FCST_OUTPUT_INTVL_HRS}" elif [ "${FCST_OR_OBS}" = "OBS" ]; then base_dir="${OBS_INPUT_DIR}" fn_template="${OBS_INPUT_FN_TEMPLATE}" num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" - subintvl_accum_hrs="${OBS_AVAIL_INTVL_HRS}" fi -input_accum_hh=$(printf "%02d" ${subintvl_accum_hrs}) -vx_output_intvl_hrs="$((10#${ACCUM_HH}))" -set_vx_hrs_list \ - yyyymmddhh_init="${CDATE}" \ +set_vx_fhr_list \ + cdate="${CDATE}" \ fcst_len_hrs="${FCST_LEN_HRS}" \ - vx_output_intvl_hrs="${vx_output_intvl_hrs}" \ - field_is_cumul="TRUE" \ - check_subintvl_files="TRUE" \ - subintvl_accum_hrs="${subintvl_accum_hrs}" \ + field="$VAR" \ + accum_hh="${ACCUM_HH}" \ base_dir="${base_dir}" \ fn_template="${fn_template}" \ + check_accum_contrib_files="TRUE" \ num_missing_files_max="${num_missing_files_max}" \ - outvarname_hrs_list="FHR_LIST" + outvarname_fhr_list="FHR_LIST" # #----------------------------------------------------------------------- # @@ -365,8 +358,7 @@ settings="\ 'fieldname_in_met_filedir_names': '${FIELDNAME_IN_MET_FILEDIR_NAMES}' 'obtype': '${OBTYPE}' 'FCST_OR_OBS': '${FCST_OR_OBS}' - 'input_accum_hh': '${input_accum_hh}' - 'output_accum_hh': '${ACCUM_HH:-}' + 'accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' 'input_field_group': '${VAR:-}' From f77d31484dd7eab232250ec8dd7f4b172bb67e60 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 12:56:40 -0600 Subject: [PATCH 079/208] Commit change that should have been part of commit hash fffdbd3. --- parm/wflow/verify_pre.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 80b70f68d3..80831f6f29 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -31,10 +31,6 @@ task_get_obs_ccpa: <<: *default_vars OBS_DIR: '&CCPA_OBS_DIR;' OBTYPE: 'CCPA' - OUTPUT_TIMES_ALL: &output_times_all_cumul - '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL|length)) %} - {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL[i] }} - {%- endfor %}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" From 99a87f9442cd3a57a8f92a1d70beb54b8d93b37b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 26 Sep 2024 12:51:30 -0600 Subject: [PATCH 080/208] Refactor the way the vx lead hours (i.e. the hours for which vx will be run) are calculated (and corresponding files checked for), including renaming of the file set_vx_fhr_list.sh to set_leadhrs.sh. --- parm/metplus/EnsembleStat.conf | 2 +- parm/metplus/GenEnsProd.conf | 2 +- parm/metplus/GridStat_ensmean.conf | 2 +- parm/metplus/GridStat_ensprob.conf | 2 +- parm/metplus/GridStat_or_PointStat.conf | 2 +- parm/metplus/PcpCombine.conf | 2 +- parm/metplus/PointStat_ensmean.conf | 2 +- parm/metplus/PointStat_ensprob.conf | 2 +- scripts/exregional_check_post_output.sh | 31 +- ...onal_run_met_genensprod_or_ensemblestat.sh | 49 +-- ...gional_run_met_gridstat_or_pointstat_vx.sh | 53 +-- ...un_met_gridstat_or_pointstat_vx_ensmean.sh | 51 +-- ...un_met_gridstat_or_pointstat_vx_ensprob.sh | 47 +-- scripts/exregional_run_met_pb2nc_obs.sh | 1 - scripts/exregional_run_met_pcpcombine.sh | 86 +++-- ush/{set_vx_fhr_list.sh => set_leadhrs.sh} | 301 ++++++------------ 16 files changed, 301 insertions(+), 334 deletions(-) rename ush/{set_vx_fhr_list.sh => set_leadhrs.sh} (50%) diff --git a/parm/metplus/EnsembleStat.conf b/parm/metplus/EnsembleStat.conf index 2caeda1521..3759d5d8a1 100644 --- a/parm/metplus/EnsembleStat.conf +++ b/parm/metplus/EnsembleStat.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/GenEnsProd.conf b/parm/metplus/GenEnsProd.conf index 6c47cedb0d..17005ecd1a 100644 --- a/parm/metplus/GenEnsProd.conf +++ b/parm/metplus/GenEnsProd.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/GridStat_ensmean.conf b/parm/metplus/GridStat_ensmean.conf index 6bbc20e3f8..0cfaa707bf 100644 --- a/parm/metplus/GridStat_ensmean.conf +++ b/parm/metplus/GridStat_ensmean.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/GridStat_ensprob.conf b/parm/metplus/GridStat_ensprob.conf index a43b8ed340..6c34eb6ba0 100644 --- a/parm/metplus/GridStat_ensprob.conf +++ b/parm/metplus/GridStat_ensprob.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/GridStat_or_PointStat.conf b/parm/metplus/GridStat_or_PointStat.conf index 7bd0039ab5..865f1c8d14 100644 --- a/parm/metplus/GridStat_or_PointStat.conf +++ b/parm/metplus/GridStat_or_PointStat.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/PcpCombine.conf b/parm/metplus/PcpCombine.conf index 5bdd09c761..de99871bed 100644 --- a/parm/metplus/PcpCombine.conf +++ b/parm/metplus/PcpCombine.conf @@ -35,7 +35,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/PointStat_ensmean.conf b/parm/metplus/PointStat_ensmean.conf index b16a481dbd..8637a7501d 100644 --- a/parm/metplus/PointStat_ensmean.conf +++ b/parm/metplus/PointStat_ensmean.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/PointStat_ensprob.conf b/parm/metplus/PointStat_ensprob.conf index 84b9f3954d..885ba121be 100644 --- a/parm/metplus/PointStat_ensprob.conf +++ b/parm/metplus/PointStat_ensprob.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/scripts/exregional_check_post_output.sh b/scripts/exregional_check_post_output.sh index 320311cc94..433aba1e4e 100755 --- a/scripts/exregional_check_post_output.sh +++ b/scripts/exregional_check_post_output.sh @@ -56,7 +56,7 @@ done # #----------------------------------------------------------------------- # -. $USHdir/set_vx_fhr_list.sh +. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -104,38 +104,33 @@ user-staged. #----------------------------------------------------------------------- # i="0" -if [ $(boolify "${DO_ENSEMBLE}") = "TRUE" ]; then +if [[ $(boolify "${DO_ENSEMBLE}") == "TRUE" ]]; then i=$( bc -l <<< "${ENSMEM_INDX}-1" ) fi time_lag=$( bc -l <<< "${ENS_TIME_LAG_HRS[$i]}*${SECS_PER_HOUR}" ) # #----------------------------------------------------------------------- # -# Get the list of forecast hours for which there is a post-processed -# output file. Note that: -# -# 1) CDATE (in YYYYMMDDHH format) is already available via the call to -# the job_preamble.sh script in the j-job of this ex-script. -# 2) VAR is set to "APCP" and ACCUM_HH is set to "01" because we assume -# the output files are hourly, so these settings will result in the -# function set_vx_fhr_list checking for existence of hourly post output -# files. +# Check to ensure that all the expected post-processed forecast output +# files are present on disk. This is done by the set_leadhrs function +# below. Note that CDATE (in YYYYMMDDHH format) is already available via +# the call to the job_preamble.sh script in the j-job of this ex-script. # #----------------------------------------------------------------------- # ensmem_indx=$(printf "%0${VX_NDIGITS_ENSMEM_NAMES}d" $(( 10#${ENSMEM_INDX}))) ensmem_name="mem${ensmem_indx}" FCST_INPUT_FN_TEMPLATE=$( eval echo ${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE} ) -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ + +set_leadhrs \ + yyyymmddhh_init="${CDATE}" \ + lhr_min="0" \ + lhr_max="${FCST_LEN_HRS}" \ + lhr_intvl="${FCST_OUTPUT_INTVL_HRS}" \ base_dir="${VX_FCST_INPUT_BASEDIR}" \ fn_template="${FCST_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" + outvarname_lhrs_list="FHR_LIST" # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 583178d3ad..73d98754b4 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -22,7 +22,7 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh +. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -220,23 +220,34 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" # #----------------------------------------------------------------------- # +case "$OBTYPE" in + "CCPA"|"NOHRSC") + vx_intvl="$((10#${ACCUM_HH}))" + vx_hr_start="${vx_intvl}" + ;; + *) + vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_hr_start="0" + ;; +esac +vx_hr_end="${FCST_LEN_HRS}" + if [ "${MetplusToolName}" = "GenEnsProd" ]; then - set_vx_fhr_list_no_missing \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - outvarname_fhr_list_no_missing="FHR_LIST" + set_leadhrs_no_missing \ + lhr_min="${vx_hr_start}" \ + lhr_max="${vx_hr_end}" \ + lhr_intvl="${vx_intvl}" \ + outvarname_lhrs_list_no_missing="VX_LEADHR_LIST" elif [ "${MetplusToolName}" = "EnsembleStat" ]; then - set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ + set_leadhrs \ + yyyymmddhh_init="${CDATE}" \ + lhr_min="${vx_hr_start}" \ + lhr_max="${vx_hr_end}" \ + lhr_intvl="${vx_intvl}" \ base_dir="${OBS_INPUT_DIR}" \ fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" + outvarname_lhrs_list="VX_LEADHR_LIST" fi # #----------------------------------------------------------------------- @@ -271,15 +282,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" +The list of lead hours for which to run METplus is empty: + VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -342,7 +353,7 @@ settings="\ # Date and forecast hour information. # 'cdate': '$CDATE' -'fhr_list': '${FHR_LIST}' +'vx_leadhr_list': '${VX_LEADHR_LIST}' # # Input and output directory/file information. # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index dd3fcd495b..263d22053f 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -22,7 +22,7 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh +. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -95,10 +95,12 @@ FIELDNAME_IN_FCST_INPUT="" FIELDNAME_IN_MET_OUTPUT="" FIELDNAME_IN_MET_FILEDIR_NAMES="" +# Note that ACCUM_HH will not be defined for the REFC, RETOP, ADPSFC, and +# ADPUPA field groups. set_vx_params \ obtype="${OBTYPE}" \ field="$VAR" \ - accum_hh="${ACCUM_HH}" \ + accum_hh="${ACCUM_HH:-}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ outvarname_fieldname_in_fcst_input="FIELDNAME_IN_FCST_INPUT" \ @@ -173,8 +175,8 @@ if [ "${grid_or_point}" = "grid" ]; then FCST_INPUT_FN_TEMPLATE="${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) - OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" + OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" FCST_INPUT_FN_TEMPLATE="${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; @@ -209,23 +211,34 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. -# This is done by starting with the full list of forecast hours for which -# there is forecast output and then removing from that list any forecast -# hours for which there is no corresponding observation data. +# Set the lead hours for which to run the MET/METplus tool. This is done +# by starting with the full list of lead hours for which we expect to +# find forecast output and then removing from that list any hours for +# which there is no corresponding observation data. # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ +case "$OBTYPE" in + "CCPA"|"NOHRSC") + vx_intvl="$((10#${ACCUM_HH}))" + vx_hr_start="${vx_intvl}" + ;; + *) + vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_hr_start="0" + ;; +esac +vx_hr_end="${FCST_LEN_HRS}" + +set_leadhrs \ + yyyymmddhh_init="${CDATE}" \ + lhr_min="${vx_hr_start}" \ + lhr_max="${vx_hr_end}" \ + lhr_intvl="${vx_intvl}" \ base_dir="${OBS_INPUT_DIR}" \ fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" + outvarname_lhrs_list="VX_LEADHR_LIST" # #----------------------------------------------------------------------- # @@ -259,15 +272,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" + VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -330,7 +343,7 @@ settings="\ # Date and forecast hour information. # 'cdate': '$CDATE' -'fhr_list': '${FHR_LIST}' +'vx_leadhr_list': '${VX_LEADHR_LIST}' # # Input and output directory/file information. # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index 5fdafb20d1..5ad0560f28 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -22,7 +22,7 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh +. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -127,8 +127,8 @@ if [ "${grid_or_point}" = "grid" ]; then OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) - OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" + OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "REFC") OBS_INPUT_DIR="${OBS_DIR}" @@ -157,23 +157,34 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}_ensmean" # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. -# This is done by starting with the full list of forecast hours for which -# there is forecast output and then removing from that list any forecast -# hours for which there is no corresponding observation data. +# Set the lead hours for which to run the MET/METplus tool. This is done +# by starting with the full list of lead hours for which we expect to +# find forecast output and then removing from that list any hours for +# which there is no corresponding observation data. # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ +case "$OBTYPE" in + "CCPA"|"NOHRSC") + vx_intvl="$((10#${ACCUM_HH}))" + vx_hr_start="${vx_intvl}" + ;; + *) + vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_hr_start="0" + ;; +esac +vx_hr_end="${FCST_LEN_HRS}" + +set_leadhrs \ + yyyymmddhh_init="${CDATE}" \ + lhr_min="${vx_hr_start}" \ + lhr_max="${vx_hr_end}" \ + lhr_intvl="${vx_intvl}" \ base_dir="${OBS_INPUT_DIR}" \ fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" + outvarname_lhrs_list="VX_LEADHR_LIST" # #----------------------------------------------------------------------- # @@ -216,15 +227,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" +The list of lead hours for which to run METplus is empty: + VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -287,7 +298,7 @@ settings="\ # Date and forecast hour information. # 'cdate': '$CDATE' -'fhr_list': '${FHR_LIST}' +'vx_leadhr_list': '${VX_LEADHR_LIST}' # # Input and output directory/file information. # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 201e67ccf1..9a8c35d1cb 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -22,7 +22,7 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh +. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -156,23 +156,34 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}_ensprob" # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. -# This is done by starting with the full list of forecast hours for which -# there is forecast output and then removing from that list any forecast -# hours for which there is no corresponding observation data. +# Set the lead hours for which to run the MET/METplus tool. This is done +# by starting with the full list of lead hours for which we expect to +# find forecast output and then removing from that list any hours for +# which there is no corresponding observation data. # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ +case "$OBTYPE" in + "CCPA"|"NOHRSC") + vx_intvl="$((10#${ACCUM_HH}))" + vx_hr_start="${vx_intvl}" + ;; + *) + vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_hr_start="0" + ;; +esac +vx_hr_end="${FCST_LEN_HRS}" + +set_leadhrs \ + yyyymmddhh_init="${CDATE}" \ + lhr_min="${vx_hr_start}" \ + lhr_max="${vx_hr_end}" \ + lhr_intvl="${vx_intvl}" \ base_dir="${OBS_INPUT_DIR}" \ fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" + outvarname_lhrs_list="VX_LEADHR_LIST" # #----------------------------------------------------------------------- # @@ -215,15 +226,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" +The list of lead hours for which to run METplus is empty: + VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -286,7 +297,7 @@ settings="\ # Date and forecast hour information. # 'cdate': '$CDATE' -'fhr_list': '${FHR_LIST}' +'vx_leadhr_list': '${VX_LEADHR_LIST}' # # Input and output directory/file information. # diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index 494ce74a3d..fbf3ec1689 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -21,7 +21,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 3d4d0cb9fb..97d156aa62 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -22,7 +22,7 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh +. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -190,45 +190,76 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then elif [ "${FCST_OR_OBS}" = "OBS" ]; then OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE} ) + fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE}) + OBS_INPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" - OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT} ) + fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE_PCPCOMBINE_OUTPUT}) + OUTPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" fi # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. -# This is done by starting with the full list of forecast hours for which -# there is forecast output and then removing from that list any forecast -# hours for which there is no corresponding observation data (if combining -# observed APCP) or forecast data (if combining forecast APCP). +# Set the array of lead hours for which to run the MET/METplus tool. +# +#----------------------------------------------------------------------- +# +vx_intvl="$((10#${ACCUM_HH}))" +set_leadhrs_no_missing \ + lhr_min="${vx_intvl}" \ + lhr_max="${FCST_LEN_HRS}" \ + lhr_intvl="${vx_intvl}" \ + outvarname_lhrs_list_no_missing="VX_LEADHR_LIST" +# +#----------------------------------------------------------------------- +# +# Check for the presence of files (either from observations or forecasts) +# needed to create required accumulation given by ACCUM_HH. # #----------------------------------------------------------------------- # if [ "${FCST_OR_OBS}" = "FCST" ]; then base_dir="${FCST_INPUT_DIR}" fn_template="${FCST_INPUT_FN_TEMPLATE}" - num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" + subintvl="${FCST_OUTPUT_INTVL_HRS}" elif [ "${FCST_OR_OBS}" = "OBS" ]; then base_dir="${OBS_INPUT_DIR}" fn_template="${OBS_INPUT_FN_TEMPLATE}" - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" + subintvl="${OBS_AVAIL_INTVL_HRS}" fi +num_missing_files_max="0" +input_accum_hh=$(printf "%02d" ${subintvl}) +# +# Convert the list of hours at which the PcpCombine tool will be run to +# an array. This represents the hours at which each accumulation period +# ends. Then use it to check the presence of all files requied to build +# the required accumulations from the sub-accumulations. +# +subintvl_end_hrs=($( echo ${VX_LEADHR_LIST} | $SED "s/,//g" )) +for hr_end in ${subintvl_end_hrs[@]}; do + hr_start=$((hr_end - vx_intvl + subintvl)) + print_info_msg " +Checking for the presence of files that will contribute to the ${vx_intvl}-hour +accumulation ending at lead hour ${hr_end} (relative to ${CDATE})... +" + set_leadhrs \ + yyyymmddhh_init="${CDATE}" \ + lhr_min="${hr_start}" \ + lhr_max="${hr_end}" \ + lhr_intvl="${subintvl}" \ + base_dir="${base_dir}" \ + fn_template="${fn_template}" \ + num_missing_files_max="${num_missing_files_max}" \ + outvarname_lhrs_list="tmp" +done -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${base_dir}" \ - fn_template="${fn_template}" \ - check_accum_contrib_files="TRUE" \ - num_missing_files_max="${num_missing_files_max}" \ - outvarname_fhr_list="FHR_LIST" +print_info_msg " +${MetplusToolName} will be run for the following lead hours (relative to ${CDATE}): + VX_LEADHR_LIST = ${VX_LEADHR_LIST} +" # #----------------------------------------------------------------------- # @@ -262,15 +293,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" +The list of lead hours for which to run METplus is empty: + VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -330,7 +361,7 @@ settings="\ # Date and forecast hour information. # 'cdate': '$CDATE' - 'fhr_list': '${FHR_LIST}' + 'vx_leadhr_list': '${VX_LEADHR_LIST}' # # Input and output directory/file information. # @@ -358,7 +389,8 @@ settings="\ 'fieldname_in_met_filedir_names': '${FIELDNAME_IN_MET_FILEDIR_NAMES}' 'obtype': '${OBTYPE}' 'FCST_OR_OBS': '${FCST_OR_OBS}' - 'accum_hh': '${ACCUM_HH:-}' + 'input_accum_hh': '${input_accum_hh}' + 'output_accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' 'input_field_group': '${VAR:-}' diff --git a/ush/set_vx_fhr_list.sh b/ush/set_leadhrs.sh similarity index 50% rename from ush/set_vx_fhr_list.sh rename to ush/set_leadhrs.sh index 8101e927e5..aa3b4b338f 100644 --- a/ush/set_vx_fhr_list.sh +++ b/ush/set_leadhrs.sh @@ -1,20 +1,21 @@ # #----------------------------------------------------------------------- # -# This file defines functions used to generate sets of forecast hours for +# This file defines functions used to generate sets of lead hours for # which verification will be performed. # #----------------------------------------------------------------------- # -function set_vx_fhr_list_no_missing() { +function set_leadhrs_no_missing() { # #----------------------------------------------------------------------- # -# This function sets the forecast hours for which verification will be -# performed under the assumption that that the data file (which may be -# a forecast output file or an observation file) for each hour is available -# (i.e. that there are no missing files). +# This function sets the lead hours (relative to some unspecified initial/ +# reference time) for which verification will be performed under the +# assumption that the data file (which may be a forecast output file or +# an observation file) for each hour is available (i.e. it assumes that +# there are no missing files). # #----------------------------------------------------------------------- # @@ -58,10 +59,10 @@ function set_vx_fhr_list_no_missing() { #----------------------------------------------------------------------- # local valid_args=( \ - "fcst_len_hrs" \ - "field" \ - "accum_hh" \ - "outvarname_fhr_list_no_missing" \ + "lhr_min" \ + "lhr_max" \ + "lhr_intvl" \ + "outvarname_lhrs_list_no_missing" \ ) process_args valid_args "$@" # @@ -81,69 +82,21 @@ function set_vx_fhr_list_no_missing() { # #----------------------------------------------------------------------- # - local fhr_array \ - fhr_list \ - fhr_int \ - fhr_min \ - fhr_max -# -#----------------------------------------------------------------------- -# -# Create the array of forecast hours. -# -#----------------------------------------------------------------------- -# - case "${field}" in - "APCP") - fhr_min="${accum_hh}" - fhr_int="${accum_hh}" - ;; - "ASNOW") - if [ "${accum_hh}" = "24" ]; then - fhr_min="24" - fhr_int="12" - else - fhr_min="${accum_hh}" - fhr_int="${accum_hh}" - fi - ;; - "REFC") - fhr_min="00" - fhr_int="01" - ;; - "RETOP") - fhr_min="00" - fhr_int="01" - ;; - "ADPSFC") - fhr_min="00" - fhr_int="01" - ;; - "ADPUPA") - fhr_min="00" - fhr_int="06" - ;; - *) - print_err_msg_exit "\ -A method for setting verification parameters has not been specified for -this field (field): - field = \"${field}\"" - ;; - esac - fhr_max="${fcst_len_hrs}" - - fhr_array=($( seq ${fhr_min} ${fhr_int} ${fhr_max} )) - - # Express the forecast hour array as a (scalar) string containing a comma - # (and space) separated list of the elements of fhr_array. - fhr_list=$( printf "%s, " "${fhr_array[@]}" ) - fhr_list=$( echo "${fhr_list}" | $SED "s/, $//g" ) + local lhrs_array \ + lhrs_list +# +#----------------------------------------------------------------------- +# +# Create the array of lead hours. +# +#----------------------------------------------------------------------- +# + lhrs_array=($( seq ${lhr_min} ${lhr_intvl} ${lhr_max} )) - print_info_msg "$VERBOSE" "\ -Initial (i.e. before filtering for missing files) set of forecast hours -(saved in a scalar string variable) is: - fhr_list = \"${fhr_list}\" -" + # Express the array of lead hours as a (scalar) string containing a comma + # (and space) separated list of the elements of lhrs_array. + lhrs_list=$( printf "%s, " "${lhrs_array[@]}" ) + lhrs_list=$( echo "${lhrs_list}" | $SED "s/, $//g" ) # #----------------------------------------------------------------------- # @@ -151,8 +104,8 @@ Initial (i.e. before filtering for missing files) set of forecast hours # #----------------------------------------------------------------------- # - if [ ! -z "${outvarname_fhr_list_no_missing}" ]; then - printf -v ${outvarname_fhr_list_no_missing} "%s" "${fhr_list}" + if [ ! -z "${outvarname_lhrs_list_no_missing}" ]; then + printf -v ${outvarname_lhrs_list_no_missing} "%s" "${lhrs_list}" fi # #----------------------------------------------------------------------- @@ -166,22 +119,18 @@ Initial (i.e. before filtering for missing files) set of forecast hours } - # #----------------------------------------------------------------------- # -# This function generates a list of forecast hours such that for each -# such hour, there exists a corresponding data file with a name of the -# form specified by the template fn_template. Depending on fn_template, -# this file may contain forecast or observation data. This function -# generates this forecast hour list by first generating a set of hours -# under the assumption that there is a corresponding data file for each -# hour and then removing from that list any hour for which there is no -# data file. +# This function generates a list of lead hours (relative to an initial or +# reference time yyyymmddhh_init) such that for each such hour, there +# exists a corresponding data file with a name of the form specified by +# the template fn_template. Depending on fn_template, this file may +# contain forecast or observation data. # #----------------------------------------------------------------------- # -function set_vx_fhr_list() { +function set_leadhrs() { # #----------------------------------------------------------------------- # @@ -221,15 +170,14 @@ function set_vx_fhr_list() { #----------------------------------------------------------------------- # local valid_args=( \ - "cdate" \ - "fcst_len_hrs" \ - "field" \ - "accum_hh" \ + "yyyymmddhh_init" \ + "lhr_min" \ + "lhr_max" \ + "lhr_intvl" \ "base_dir" \ "fn_template" \ - "check_accum_contrib_files" \ "num_missing_files_max" \ - "outvarname_fhr_list" \ + "outvarname_lhrs_list" \ ) process_args valid_args "$@" # @@ -251,155 +199,102 @@ function set_vx_fhr_list() { # local crnt_tmpl \ crnt_tmpl_esc \ - fhr \ - fhr_array \ - fhr_list \ fn \ fp \ i \ - num_fcst_hrs \ + lhr \ + lhrs_array \ + lhrs_list \ + num_hrs \ num_missing_files \ - regex_search_tmpl \ remainder \ - skip_this_fhr + skip_this_hour # #----------------------------------------------------------------------- # -# For the specified field, generate the set of forecast hours at which +# For the specified field, generate the set of lead hours at which # verification will be performed under the assumption that for each such -# hour, the corresponding forecast and/or observation files exists. Thus, -# this set of forecast hours is an initial guess for the hours at which -# vx will be performed. +# hour, the corresponding or observation file exists. Thus, this set is +# an initial guess for the lead hours at which vx will be performed. # #----------------------------------------------------------------------- # - set_vx_fhr_list_no_missing \ - fcst_len_hrs="${fcst_len_hrs}" \ - field="${field}" \ - accum_hh="${accum_hh}" \ - outvarname_fhr_list_no_missing="fhr_list_no_missing" + set_leadhrs_no_missing \ + lhr_min="${lhr_min}" \ + lhr_max="${lhr_max}" \ + lhr_intvl="${lhr_intvl}" \ + outvarname_lhrs_list_no_missing="lhrs_list_no_missing" - # For convenience, save the scalar variable fhr_list_no_missing to a bash - # array. - fhr_array=$( printf "%s" "${fhr_list_no_missing}" | $SED "s/,//g" ) - fhr_array=( ${fhr_array} ) + # For convenience, save the scalar variable lhrs_list_no_missing to a + # bash array. + lhrs_array=($( printf "%s" "${lhrs_list_no_missing}" | $SED "s/,//g" )) print_info_msg "$VERBOSE" "\ -Initial (i.e. before filtering for missing files) set of forecast hours -is: - fhr_array = ( $( printf "\"%s\" " "${fhr_array[@]}" )) +Initial (i.e. before filtering for missing files) set of lead hours +(relative to ${yyyymmddhh_init}) is: + lhrs_array = ( $( printf "\"%s\" " "${lhrs_array[@]}" )) " # #----------------------------------------------------------------------- # -# Loop through all forecast hours. For each one for which a corresponding -# file exists, add the forecast hour to fhr_list. fhr_list will be a -# scalar containing a comma-separated list of forecast hours for which -# corresponding files exist. Also, use the variable num_missing_files -# to keep track of the number of files that are missing. +# Loop through the array of lead hours generated above and construct the +# variable lhrs_list that will be scalar (string) containing a comma- +# separated list of hours for which corresponding forecast or observation +# files have been confirmed to exist. Also, use the variable +# num_missing_files to keep track of the number of files that are missing. # #----------------------------------------------------------------------- # - fhr_list="" + lhrs_list="" num_missing_files="0" - num_fcst_hrs=${#fhr_array[@]} - for (( i=0; i<${num_fcst_hrs}; i++ )); do - - fhr_orig="${fhr_array[$i]}" + num_hrs=${#lhrs_array[@]} + for (( i=0; i<${num_hrs}; i++ )); do - if [ "${check_accum_contrib_files}" = "TRUE" ]; then - fhr=$(( ${fhr_orig} - ${accum_hh} + 1 )) - num_back_hrs=${accum_hh} - else - fhr=${fhr_orig} - num_back_hrs=1 - fi - - skip_this_fhr="FALSE" - for (( j=0; j<${num_back_hrs}; j++ )); do -# -# Use the provided template to set the name of/relative path to the file -# Note that the while-loop below is over all METplus time string templates -# of the form {...} in the template fn_template; it continues until all -# such templates have been evaluated to actual time strings. -# - fn="${fn_template}" - regex_search_tmpl="(.*)(\{.*\})(.*)" - crnt_tmpl=$( printf "%s" "${fn_template}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\2|p" ) - remainder=$( printf "%s" "${fn_template}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\1\3|p" ) - while [ ! -z "${crnt_tmpl}" ]; do - - eval_METplus_timestr_tmpl \ - init_time="$cdate" \ - fhr="$fhr" \ - METplus_timestr_tmpl="${crnt_tmpl}" \ - outvarname_formatted_time="actual_value" -# -# Replace METplus time templates in fn with actual times. Note that -# when using sed, we need to escape various characters (question mark, -# closing and opening curly braces, etc) in the METplus template in -# order for the sed command below to work properly. -# - crnt_tmpl_esc=$( echo "${crnt_tmpl}" | \ - $SED -r -e "s/\?/\\\?/g" -e "s/\{/\\\{/g" -e "s/\}/\\\}/g" ) - fn=$( echo "${fn}" | \ - $SED -n -r "s|(.*)(${crnt_tmpl_esc})(.*)|\1${actual_value}\3|p" ) -# -# Set up values for the next iteration of the while-loop. -# - crnt_tmpl=$( printf "%s" "${remainder}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\2|p" ) - remainder=$( printf "%s" "${remainder}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\1\3|p" ) - - done + lhr="${lhrs_array[$i]}" + skip_this_hour="FALSE" +# +# Evaluate the METplus file name template containing METplus timestrings +# for the specified yyyymmddhh_init and current hour (lhr) to obtain the +# name of the current file (including possibly a relative directory). +# + eval_METplus_timestr_tmpl \ + init_time="${yyyymmddhh_init}" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${fn_template}" \ + outvarname_evaluated_timestr="fn" # # Get the full path to the file and check if it exists. # - fp="${base_dir}/${fn}" - - if [ -f "${fp}" ]; then - print_info_msg "\ -Found file (fp) for the current forecast hour (fhr; relative to the cycle -date cdate): - fhr = \"$fhr\" - cdate = \"$cdate\" + fp="${base_dir}/${fn}" + if [ -f "${fp}" ]; then + print_info_msg "\ +Found file (fp) for lead hour ${lhr} (relative to ${yyyymmddhh_init}): fp = \"${fp}\" " - else - skip_this_fhr="TRUE" - num_missing_files=$(( ${num_missing_files} + 1 )) - print_info_msg "\ -The file (fp) for the current forecast hour (fhr; relative to the cycle -date cdate) is missing: - fhr = \"$fhr\" - cdate = \"$cdate\" + else + skip_this_hour="TRUE" + num_missing_files=$(( ${num_missing_files} + 1 )) + print_info_msg "\ +The file (fp) for lead hour ${lhr} (relative to ${yyyymmddhh_init}) is MISSING: fp = \"${fp}\" -Excluding the current forecast hour from the list of hours passed to the -METplus configuration file. +Excluding this hour from the list of lead hours to return. " - break - fi - - fhr=$(( $fhr + 1 )) - - done + break + fi - if [ "${skip_this_fhr}" != "TRUE" ]; then - fhr_list="${fhr_list},${fhr_orig}" + if [[ ! $(boolify "${skip_this_hour}") == "TRUE" ]]; then + lhrs_list="${lhrs_list},${lhr}" fi done # -# Remove leading comma from fhr_list. +# Remove leading comma from lhrs_list. # - fhr_list=$( echo "${fhr_list}" | $SED "s/^,//g" ) + lhrs_list=$( echo "${lhrs_list}" | $SED "s/^,//g" ) print_info_msg "$VERBOSE" "\ -Final (i.e. after filtering for missing files) set of forecast hours -(saved in a scalar string variable) is: - fhr_list = \"${fhr_list}\" +Final (i.e. after filtering for missing files) set of lead hours relative +to ${yyyymmddhh_init} (saved in a scalar string variable) is: + lhrs_list = \"${lhrs_list}\" " # #----------------------------------------------------------------------- @@ -424,8 +319,8 @@ maximum allowed number (num_missing_files_max): # #----------------------------------------------------------------------- # - if [ ! -z "${outvarname_fhr_list}" ]; then - printf -v ${outvarname_fhr_list} "%s" "${fhr_list}" + if [ ! -z "${outvarname_lhrs_list}" ]; then + printf -v ${outvarname_lhrs_list} "%s" "${lhrs_list}" fi # #----------------------------------------------------------------------- From da81dbb0680c7973a662a8dc51c520fbd182dfb0 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 26 Sep 2024 14:46:19 -0600 Subject: [PATCH 081/208] Modify variable names for clarity. --- ush/set_cycle_and_obs_timeinfo.py | 64 +++++++++++++++---------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 36635b643e..9f9fbe9820 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -34,7 +34,7 @@ def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl): while cdate <= start_time_last_cycl: cyc = datetime.strftime(cdate, "%Y%m%d%H") all_cdates.append(cyc) - cdate += cycl_intvl + cdate += cycl_intvl return all_cdates @@ -69,7 +69,7 @@ def set_fcst_output_times_and_obs_days_all_cycles( Time interval between forecast output times; a timedelta object. Returns: - output_times_all_cycles_inst: + fcst_output_times_all_cycles_inst: List of forecast output times over all cycles of instantaneous fields. Each element is a string of the form 'YYYYMMDDHH'. @@ -78,7 +78,7 @@ def set_fcst_output_times_and_obs_days_all_cycles( perform verification) over all cycles of instantaneous fields. Each element is a string of the form 'YYYYMMDD'. - output_times_all_cycles_cumul: + fcst_output_times_all_cycles_cumul: List of forecast output times over all cycles of cumulative fields. Each element is a string of the form 'YYYYMMDDHH'. @@ -98,34 +98,34 @@ def set_fcst_output_times_and_obs_days_all_cycles( cycle_start_times = [datetime.strptime(yyyymmddhh, "%Y%m%d%H") for yyyymmddhh in cycle_start_times_str] # Get the number of forecast output times per cycle/forecast. - num_output_times_per_cycle = int(fcst_len/fcst_output_intvl + 1) + num_fcst_output_times_per_cycle = int(fcst_len/fcst_output_intvl + 1) # Initialize sets that will contain the various forecast output and obs # day information. - output_times_all_cycles_inst = set() + fcst_output_times_all_cycles_inst = set() obs_days_all_cycles_inst = set() - output_times_all_cycles_cumul = set() + fcst_output_times_all_cycles_cumul = set() obs_days_all_cycles_cumul = set() for i, start_time_crnt_cycle in enumerate(cycle_start_times): # Create a list of forecast output times of instantaneous fields for the # current cycle. - output_times_crnt_cycle_inst \ + fcst_output_times_crnt_cycle_inst \ = [start_time_crnt_cycle + i*fcst_output_intvl - for i in range(0,num_output_times_per_cycle)] - # Include the output times of instantaneous fields for the current cycle + for i in range(0,num_fcst_output_times_per_cycle)] + # Include the output times of instantaneous fields for the current cycle # in the set of all such output times over all cycles. - output_times_all_cycles_inst \ - = output_times_all_cycles_inst | set(output_times_crnt_cycle_inst) + fcst_output_times_all_cycles_inst \ + = fcst_output_times_all_cycles_inst | set(fcst_output_times_crnt_cycle_inst) # Create a list of instantaneous field obs days (i.e. days on which # observations of instantaneous fields are needed for verification) for # the current cycle. We do this by dropping the hour-of-day from each # element of the list of forecast output times and keeping only unique # elements. - tmp = [datetime_obj.date() for datetime_obj in output_times_crnt_cycle_inst] + tmp = [datetime_obj.date() for datetime_obj in fcst_output_times_crnt_cycle_inst] obs_days_crnt_cycl_inst = sorted(set(tmp)) - # Include the obs days for instantaneous fields for the current cycle + # Include the obs days for instantaneous fields for the current cycle # in the set of all such obs days over all cycles. obs_days_all_cycles_inst = obs_days_all_cycles_inst | set(obs_days_crnt_cycl_inst) @@ -133,12 +133,12 @@ def set_fcst_output_times_and_obs_days_all_cycles( # current cycle. This is simply the list of forecast output times for # instantaneous fields but with the first time dropped (because nothing # has yet accumulated at the starting time of the cycle). - output_times_crnt_cycle_cumul = output_times_crnt_cycle_inst - output_times_crnt_cycle_cumul.pop(0) + fcst_output_times_crnt_cycle_cumul = fcst_output_times_crnt_cycle_inst + fcst_output_times_crnt_cycle_cumul.pop(0) # Include the obs days for cumulative fields for the current cycle in the # set of all such obs days over all cycles. - output_times_all_cycles_cumul \ - = output_times_all_cycles_cumul | set(output_times_crnt_cycle_cumul) + fcst_output_times_all_cycles_cumul \ + = fcst_output_times_all_cycles_cumul | set(fcst_output_times_crnt_cycle_cumul) # Create a list of cumulative field obs days (i.e. days on which # observations of cumulative fields are needed for verification) for @@ -150,8 +150,8 @@ def set_fcst_output_times_and_obs_days_all_cycles( # the scripts/tasks that get observations of cumulative fields, the # zeroth hour of a day is considered part of the previous day (because # it represents accumulation that occurred on the previous day). - tmp = output_times_crnt_cycle_cumul - last_output_time_cumul = output_times_crnt_cycle_cumul[-1] + tmp = fcst_output_times_crnt_cycle_cumul + last_output_time_cumul = fcst_output_times_crnt_cycle_cumul[-1] if last_output_time_cumul.hour == 0: tmp.pop() tmp = [datetime_obj.date() for datetime_obj in tmp] @@ -162,9 +162,9 @@ def set_fcst_output_times_and_obs_days_all_cycles( # Convert the set of output times of instantaneous fields over all cycles # to a sorted list of strings of the form 'YYYYMMDDHH'. - output_times_all_cycles_inst = sorted(output_times_all_cycles_inst) - output_times_all_cycles_inst = [datetime.strftime(output_times_all_cycles_inst[i], "%Y%m%d%H") - for i in range(len(output_times_all_cycles_inst))] + fcst_output_times_all_cycles_inst = sorted(fcst_output_times_all_cycles_inst) + fcst_output_times_all_cycles_inst = [datetime.strftime(fcst_output_times_all_cycles_inst[i], "%Y%m%d%H") + for i in range(len(fcst_output_times_all_cycles_inst))] # Convert the set of obs days for instantaneous fields over all cycles # to a sorted list of strings of the form 'YYYYMMDD'. @@ -174,9 +174,9 @@ def set_fcst_output_times_and_obs_days_all_cycles( # Convert the set of output times of cumulative fields over all cycles to # a sorted list of strings of the form 'YYYYMMDDHH'. - output_times_all_cycles_cumul = sorted(output_times_all_cycles_cumul) - output_times_all_cycles_cumul = [datetime.strftime(output_times_all_cycles_cumul[i], "%Y%m%d%H") - for i in range(len(output_times_all_cycles_cumul))] + fcst_output_times_all_cycles_cumul = sorted(fcst_output_times_all_cycles_cumul) + fcst_output_times_all_cycles_cumul = [datetime.strftime(fcst_output_times_all_cycles_cumul[i], "%Y%m%d%H") + for i in range(len(fcst_output_times_all_cycles_cumul))] # Convert the set of obs days for cumulative fields over all cycles to a # sorted list of strings of the form 'YYYYMMDD'. @@ -184,8 +184,8 @@ def set_fcst_output_times_and_obs_days_all_cycles( obs_days_all_cycles_cumul = [datetime.strftime(obs_days_all_cycles_cumul[i], "%Y%m%d") for i in range(len(obs_days_all_cycles_cumul))] - return output_times_all_cycles_inst, obs_days_all_cycles_inst, \ - output_times_all_cycles_cumul, obs_days_all_cycles_cumul + return fcst_output_times_all_cycles_inst, obs_days_all_cycles_inst, \ + fcst_output_times_all_cycles_cumul, obs_days_all_cycles_cumul def set_cycledefs_for_obs_days(obs_days_all_cycles): @@ -195,17 +195,17 @@ def set_cycledefs_for_obs_days(obs_days_all_cycles): list of days must be increasing in time, but the days do not have to be consecutive, i.e. there may be gaps between days that are greater than one day. - + Each cycledef string in the output list represents a set of consecutive days in the input string (when used inside a tag in a ROCOTO - XML). Thus, when the cycledef strings in the output string are all + XML). Thus, when the cycledef strings in the output string are all assigned to the same cycledef group in a ROCOTO XML, that group will represent all the days on which observations are needed. Args: obs_days_all_cycles: A list of strings of the form 'YYYYMMDD', with each string representing - a day on which observations are needed. Note that the list must be + a day on which observations are needed. Note that the list must be sorted, i.e. the days must be increasing in time, but there may be gaps between days. @@ -218,7 +218,7 @@ def set_cycledefs_for_obs_days(obs_days_all_cycles): where {yyyymmdd_start} is the starting day of the first cycle in the cycledef, and {yyyymmdd_end} is the starting day of the last cycle (note - that the minutes and hours in these cycledef stirngs are always set to + that the minutes and hours in these cycledef stirngs are always set to '00'). Thus, one of the elements of the output list may be as follows: '202404290000 202405010000 24:00:00' @@ -229,7 +229,7 @@ def set_cycledefs_for_obs_days(obs_days_all_cycles): # list of datetime objects. tmp = [datetime.strptime(yyyymmdd, "%Y%m%d") for yyyymmdd in obs_days_all_cycles] - # Initialize the variable that in the loop below contains the date of + # Initialize the variable that in the loop below contains the date of # the previous day. This is just the first element of the list of # datetime objects constructed above. Then use it to initialize the # list (contin_obs_day_lists) that will contain lists of consecutive From fb3e7f42fa992c47618bf6fc9865d90b1827762d Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 27 Sep 2024 16:36:15 -0600 Subject: [PATCH 082/208] Check for the form of accum_hh (accumulation) only if it's going to be used. --- ush/set_vx_params.sh | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/ush/set_vx_params.sh b/ush/set_vx_params.sh index 9b67e36d22..267cd6902f 100644 --- a/ush/set_vx_params.sh +++ b/ush/set_vx_params.sh @@ -3,8 +3,9 @@ # # This file defines a function that sets various parameters needed when # performing verification. The way these parameters are set depends on -# the field being verified and, if the field is accumulated precipitation, -# the accumulation period (both of which are inputs to this function). +# the field being verified and, if the field is cumulative (e.g. +# accumulated precipitation or snowfall), the accumulation period +# (both of which are inputs to this function). # # As of 20220928, the verification tasks in the SRW App workflow use the # MET/METplus software (MET = Model Evaluation Tools) developed at the @@ -91,10 +92,14 @@ function set_vx_params() { # #----------------------------------------------------------------------- # - if [[ ! "${accum_hh}" =~ ^[0-9]{2}$ ]]; then - print_err_msg_exit "\ -The accumulation (accum_hh) must be a 2-digit integer: + if [ "${obtype}" = "CCPA" ] || [ "${obtype}" = "NOHRSC" ]; then + if [[ ! "${accum_hh}" =~ ^[0-9]{2}$ ]]; then + print_err_msg_exit "\ +For the given observation type (obtype), the accumulation (accum_hh) must +be a 2-digit integer: + obtype = \"${obtype}\" accum_hh = \"${accum_hh}\"" + fi fi # #----------------------------------------------------------------------- From 8f043fc749421191f029f95a284c9d32a156e9c2 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 27 Sep 2024 16:37:46 -0600 Subject: [PATCH 083/208] Changes that allow checks on the verification parameters and generation of new vx config parameters to allow the workflow to have obs-day-based as well as cycle-based tasks. --- ush/set_cycle_and_obs_timeinfo.py | 644 +++++++++++++++++++++++++----- ush/setup.py | 207 ++++++---- 2 files changed, 673 insertions(+), 178 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 9f9fbe9820..8f45e60f2a 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -2,14 +2,20 @@ from datetime import datetime, timedelta, date from pprint import pprint +from textwrap import dedent from python_utils import print_input_args, print_err_msg_exit +import logging -def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl): - """This file defines a function that, given the start and end dates - as date time objects, and a cycling frequency, returns an array of - cycle date-hours whose elements have the form YYYYMMDDHH. Here, - YYYY is a four-digit year, MM is a two- digit month, DD is a - two-digit day of the month, and HH is a two-digit hour of the day. + +def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, + return_type='string'): + """ + This file defines a function that returns a list containing the starting + times of all the cycles in the experiment. + + If return_type is set to "string" (the default value), the returned list + contains strings in the format 'YYYYMMDDHH'. If it is set to "datetime", + the returned list contains a set of datetime objects. Args: start_time_first_cycl: @@ -21,38 +27,85 @@ def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl): cycl_intvl: Time interval between cycle starting times; a timedelta object. + return_type: + String that specifies the type of the returned list. + Returns: - A list of strings containing cycle starting times in the format - 'YYYYMMDDHH' + all_cdates: + Either a list of strings in the format 'YYYYMMDDHH' or a list of datetime + objects containing the cycle starting times. """ print_input_args(locals()) + + valid_values = ['string', 'datetime'] + if return_type not in valid_values: + raise ValueError("Invalid value for 'a'. Expected 1, 2, or 3.") + msg = dedent(f""" + Invalid value for optional argument "return_type": + return_type = {return_type} + Valid values are: + valid_values = {valid_values} + """) + raise Exception(msg) # iterate over cycles all_cdates = [] cdate = start_time_first_cycl while cdate <= start_time_last_cycl: - cyc = datetime.strftime(cdate, "%Y%m%d%H") - all_cdates.append(cyc) + all_cdates.append(cdate) cdate += cycl_intvl + + if return_type == "string": + all_cdates = [datetime.strftime(cdate, "%Y%m%d%H") for cdate in all_cdates] + return all_cdates -def set_fcst_output_times_and_obs_days_all_cycles( - start_time_first_cycl, start_time_last_cycl, cycl_intvl, fcst_len, fcst_output_intvl): - """Given the starting time of the first cycle of an SRW App experiment, the - starting time of the last cycle, the interval between cycle start times, - the forecast length, and the forecast output interval, this function - returns two pairs of lists: the first of each pair is a list of strings - of forecast output times over all cycles (each element of the form - 'YYYYMMDDHH'), and the second is a list of days over all cycles on which - observations are needed to perform verification (each element of the form - 'YYYYMMDD'). The first pair of lists is for instantaneous output fields - (e.g. REFC, RETOP, T2m), and the second pair is for cumulative ones (e.g. - APCP or accumulated precipitation). The accumulation period for the latter - is the forecast output interval. +def check_temporal_consistency_cumul_fields( + vx_config, + start_time_first_cycl, start_time_last_cycl, cycl_intvl, + fcst_len, fcst_output_intvl): + """ + This function reads in a subset of the parameters in the verification + configuration dictionary and ensures that certain temporal constraints on + these parameters are satisfied. It then returns an updated version of + the verification configuration dictionary that satisfies these constranints. + + The constraints are on the accumulation intervals associated with the + cumulative forecast fields and corresponding observation type pairs that + are to be verified. The constraints on each such accumulation interval + are as follows: + + 1) The accumulation interval is less than or equal to the forecast length + (since otherwise, the forecast field cannot be accumulated over that + interval). + + 2) The obs availability interval evenly divides the accumulation interval. + This ensures that the obs can be added together to obtain accumulated + values of the obs field, e.g. the 6-hourly NOHRSC obs can be added + to obtain 24-hour observed snowfall accumulations. + + 3) The forecast output interval evenly divides the accumulation interval. + This ensures that the forecast output can be added together to obtain + accumulated values of the forecast field, e.g. if the forecast output + interval is 3 hours, the resulting 3-hourly APCP outputs from the + forecast can be added to obtain 6-hourly forecast APCP. + + 4) The hour-of-day at which the accumulated forecast values will be + available are a subset of the ones at which the accumulated obs + values are available. This ensures that the accumulated fields + from the obs and forecast are valid at the same times and thus can + be compared in the verification. + + If for a given field-accumulation combination any of these constraints + is violated, that accumulation is removed from the list of accumulations + to verify for that field. Args: + vx_config: + The verification configuration dictionary. + start_time_first_cycl: Starting time of first cycle; a datetime object. @@ -69,43 +122,295 @@ def set_fcst_output_times_and_obs_days_all_cycles( Time interval between forecast output times; a timedelta object. Returns: - fcst_output_times_all_cycles_inst: - List of forecast output times over all cycles of instantaneous fields. - Each element is a string of the form 'YYYYMMDDHH'. + vx_config: + An updated version of the verification configuration dictionary. + + fcst_obs_matched_times_all_cycles_cumul: + Dictionary containing the times (in YYYYMMDDHH string format) at + which various field/accumlation combinations are output and at + which the corresponding obs type is also available. + """ + # Set dictionary containing all cumulative fields (i.e. whether or not + # they are to be verified). The keys are the observation types and the + # values are the field names in the forecasts. + vx_cumul_fields_all = {"CCPA": "APCP", "NOHRSC": "ASNOW"} + + # Convert from datetime.timedelta objects to integers. + one_hour = timedelta(hours=1) + fcst_len_hrs = int(fcst_len/one_hour) + fcst_output_intvl_hrs = int(fcst_output_intvl/one_hour) + + # Generate a list containing the starting times of the cycles. This will + # be needed in checking that the hours-of-day of the forecast output match + # those of the observations. + cycle_start_times \ + = set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, + return_type='datetime') + + # Initialize one of the variables that will be returned to an empty + # dictionary. + fcst_obs_matched_times_all_cycles_cumul = dict() + + for obtype, field_fcst in vx_cumul_fields_all.items(): + + # If the current cumulative field is not in the list of fields to be + # verified, just skip to the next field. + if field_fcst not in vx_config["VX_FIELDS"]: + continue + + # Initialize a sub-dictionary in one of the dictionaries to be returned. + fcst_obs_matched_times_all_cycles_cumul.update({field_fcst: {}}) + + # + # Get the availability interval of the current observation type from the + # verification configuration dictionary and use it to calculate the hours- + # of-day at which the obs will be available. + # + # Get the obs availability interval. + config_var_name = "".join([obtype, "_OBS_AVAIL_INTVL_HRS"]) + obs_avail_intvl_hrs = vx_config[config_var_name] + # Ensure that the obs availability interval evenly divides into 24. + remainder = 24 % obs_avail_intvl_hrs + if remainder != 0: + msg = dedent(f""" + The obs availability interval for obs of type {obtype} must divide evenly + into 24 but doesn't: + obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + 24 % obs_avail_intvl_hrs = {remainder}" + """) + raise Exception(msg) + # Assume that the obs are available at hour 0 of the day regardless + # of obs type. + obs_avail_hr_start = 0 + obs_avail_hr_end = obs_avail_hr_start + 24 + # Construct list of obs availability hours-of-day. + obs_avail_hrs_of_day = [hr for hr in range(obs_avail_hr_start, obs_avail_hr_end, obs_avail_intvl_hrs)] + obs_avail_hrs_of_day_str = ['%02d' % int(hr) for hr in obs_avail_hrs_of_day] + # + # Get the array of accumulation intervals for the current cumulative field. + # Then loop over them to ensure that the constraints listed above are + # satisfied. If for a given accumulation one or more of the constraints + # is not satisfied, remove that accumulation from the list of accumulations + # for the current field. + # + accum_intvls_array_name = "".join(["VX_", field_fcst, "_ACCUMS_HRS"]) + accum_intvls_hrs = vx_config[accum_intvls_array_name] + # + # Loop through the accumulation intervals and check the temporal constraints + # listed above. + # + for accum_hrs in accum_intvls_hrs.copy(): + + accum_hh = f"{accum_hrs:02d}" + # Initialize a sub-sub-dictionary in one of the dictionaries to be returned. + fcst_obs_matched_times_all_cycles_cumul[field_fcst][accum_hh] = [] + # + # Check that accumulation inervals are shorter than the forecast length. + # + if accum_hrs > fcst_len_hrs: + msg = dedent(f""" + The accumulation interval (accum_hrs) for the current cumulative forecast + field (field_fcst) and corresponding observation type (obtype) is greater + than the forecast length (fcst_len_hrs): + field_fcst = {field_fcst} + obtype = {obtype} + accum_hrs = {accum_hrs} + fcst_len_hrs = {fcst_len_hrs} + Thus, this forecast field cannot be accumulated over this interval. + Will remove this accumulation interval from the list of accumulation + intervals to verify for this field/obtype. + """) + logging.info(msg) + accum_intvls_hrs.remove(accum_hrs) + # + # Check that accumulation inervals are evenly divisible by the observation + # availability interval. + # + if accum_hrs in accum_intvls_hrs: + rem_obs = accum_hrs % obs_avail_intvl_hrs + if rem_obs != 0: + msg = dedent(f""" + The accumulation interval (accum_hrs) for the current cumulative forecast + field (field_fcst) and corresponding observation type (obtype) is not + evenly divisible by the observation type's availability interval + (obs_avail_intvl_hrs): + field_fcst = {field_fcst} + obtype = {obtype} + accum_hrs = {accum_hrs} + obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + accum_hrs % obs_avail_intvl_hrs = {rem_obs} + Thus, this observation type cannot be accumulated over this interval. + Will remove this accumulation interval from the list of accumulation + intervals to verify for this field/obtype. + """) + logging.info(msg) + accum_intvls_hrs.remove(accum_hrs) + # + # Check that accumulation inervals are evenly divisible by the forecast + # output interval. + # + if accum_hrs in accum_intvls_hrs: + rem_fcst = accum_hrs % fcst_output_intvl_hrs + if rem_fcst != 0: + msg = dedent(f""" + The accumulation interval (accum_hrs) for the current cumulative forecast + field (field_fcst) and corresponding observation type (obtype) is not + evenly divisible by the forecast output interval (fcst_output_intvl): + field_fcst = {field_fcst} + obtype = {obtype} + accum_hrs = {accum_hrs} hr + fcst_output_intvl_hrs = {forecast_output_intvl} hr + accum_hrs % fcst_output_intvl_hrs = {rem_fcst} + Thus, this forecast field cannot be accumulated over this interval. + Will remove this accumulation interval from the list of accumulation + intervals to verify for this field/obtype. + """) + logging.info(msg) + accum_intvls_hrs.remove(accum_hrs) + # + # Check that the hours-of-day at which the current cumulative field will + # be output are a subset of the hours-of-day at which the corresponding + # obs type is output. + # + if accum_hrs in accum_intvls_hrs: + + # Initialize sets that will contain the forecast output times of the + # current cumulative field over all cycles. + fcst_output_times_all_cycles = set() + + # Calculate the forecast output times of the current cumulative field + # for the current cycle and include them in the the set of such times + # over all cycles. + accum = timedelta(hours=accum_hrs) + num_fcst_output_times_per_cycle = int(fcst_len/accum) + for i, start_time_crnt_cycle in enumerate(cycle_start_times): + fcst_output_times_crnt_cycle \ + = [start_time_crnt_cycle + (i+1)*accum + for i in range(0, num_fcst_output_times_per_cycle)] + fcst_output_times_all_cycles \ + = fcst_output_times_all_cycles | set(fcst_output_times_crnt_cycle) + + # Get all the hours-of-day at which the current cumulative field will be + # output by the forecast. + fcst_output_times_all_cycles = sorted(fcst_output_times_all_cycles) + fcst_output_times_all_cycles_str \ + = [datetime.strftime(dt_object, "%Y%m%d%H") + for dt_object in fcst_output_times_all_cycles] + fcst_output_hrs_of_day_str = [yyyymmddhh[8:10] for yyyymmddhh in fcst_output_times_all_cycles_str] + fcst_output_hrs_of_day_str.sort() + + # Check that all the forecast output hours-of-day are a subset of the obs + # availability hours-of-day. If not, remove the current accumulation + # interval from the list of intervals to verify. + if not set(fcst_output_hrs_of_day_str) <= set(obs_avail_hrs_of_day_str): + msg = dedent(f""" + The accumulation interval (accum_hrs) for the current cumulative forecast + field (field_fcst) is such that the forecast will output the field on at + least one of hour-of-day on which the corresponding observation type is + not available: + field_fcst = {field_fcst} + obtype = {obtype} + accum_hrs = {accum_hrs} hr + The forecast output hours-of-day for this field/accumulation interval + combination are: + fcst_output_hrs_of_day_str = {fcst_output_hrs_of_day_str} + The hours-of-day at which the obs are available are: + obs_avail_hrs_of_day_str = {obs_avail_hrs_of_day_str} + Thus, at least some of the forecast output cannot be verified. + Will remove this accumulation interval from the list of accumulation + intervals to verify for this field/obtype. + """) + logging.info(msg) + accum_intvls_hrs.remove(accum_hrs) + else: + fcst_obs_matched_times_all_cycles_cumul[field_fcst][accum_hh] = fcst_output_times_all_cycles_str + # + # Update the value in the experiment configuration dictionary of the list + # of accumulation intervals to verify for this cumulative field (since + # some accumulation intervals may have been removed after the checks above). + # + vx_config[accum_intvls_array_name] = accum_intvls_hrs + # + # If the updated list of accumulations for the current cumulative field + # is empty, remove the field from the list of fields to verify in the + # verification configuration dictionary. + # + if not accum_intvls_hrs: + vx_config["VX_FIELDS"].remove(field_fcst) + msg = dedent(f""" + The list of accumulation intervals (accum_intvls_hrs) for the current + cumulative field to verify (field_fcst) is empty: + field_fcst = {field_fcst} + accum_intvls_hrs = {accum_intvls_hrs} + Removing this field from the list of fields to verify. The updated list + is: + {vx_config["VX_FIELDS"]} + """) + logging.info(msg) + + return vx_config, fcst_obs_matched_times_all_cycles_cumul + + +def set_fcst_output_times_and_obs_days_all_cycles( + start_time_first_cycl, start_time_last_cycl, cycl_intvl, + fcst_len, fcst_output_intvl): + """ + This function returns forecast output times and observation days (i.e. + days on which obs are needed because there is forecast output on those + days) for both instantaneous (e.g. REFC, RETOP, T2m) and cumulative (e.g. + APCP) fields that need to be verified. Note that for cumulative fields, + the only accumulation interval considered is the forecast output interval. + Accumulation intervals larger than this are considered elsewhere (and + accumulation interval smaller than this are obviously not allowed). + + Args: + start_time_first_cycl: + Starting time of first cycle; a datetime object. + + start_time_last_cycl: + Starting time of last cycle; a datetime object. + + cycl_intvl: + Time interval between cycle starting times; a timedelta object. - obs_days_all_cycles_inst: - List of observation days (i.e. days on which observations are needed to - perform verification) over all cycles of instantaneous fields. Each - element is a string of the form 'YYYYMMDD'. + fcst_len: + The length of each forecast; a timedelta object. - fcst_output_times_all_cycles_cumul: - List of forecast output times over all cycles of cumulative fields. Each - element is a string of the form 'YYYYMMDDHH'. + fcst_output_intvl: + Time interval between forecast output times; a timedelta object. - obs_days_all_cycles_cumul: - List of observation days (i.e. days on which observations are needed to - perform verification) over all cycles of cumulative fields. Each element - is a string of the form 'YYYYMMDD'. + Returns: + fcst_output_times_all_cycles: + Dictionary containing a list of forecast output times over all cycles for + instantaneous fields and a second analogous list for cumulative fields. + Each element of these lists is a string of the form 'YYYYMMDDHH'. + obs_days_all_cycles: + Dictionary containing a list of observation days (i.e. days on which + observations are needed to perform verification) over all cycles for + instantaneous fields and a second analogous list for cumulative fields. + Each element of these lists is a string of the form 'YYYYMMDD'. """ # Get the list containing the starting times of the cycles. Each element - # of the list is a string of the form 'YYYYMMDDHH'. - cycle_start_times_str \ - = set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl) - - # Convert cycle_start_times_str to a list of datetime objects. - cycle_start_times = [datetime.strptime(yyyymmddhh, "%Y%m%d%H") for yyyymmddhh in cycle_start_times_str] + # of the list will be a datetime object. + cycle_start_times \ + = set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, + return_type='datetime') # Get the number of forecast output times per cycle/forecast. num_fcst_output_times_per_cycle = int(fcst_len/fcst_output_intvl + 1) - # Initialize sets that will contain the various forecast output and obs - # day information. - fcst_output_times_all_cycles_inst = set() - obs_days_all_cycles_inst = set() - fcst_output_times_all_cycles_cumul = set() - obs_days_all_cycles_cumul = set() + # Initialize dictionaries that will contain the various forecast output + # time and obs day information. Note that we initialize the contents of + # these dictionaries as sets because that better suites the data manipulation + # we will need to do, but these sets will later be converted to lists. + fcst_output_times_all_cycles = dict() + fcst_output_times_all_cycles['inst'] = set() + fcst_output_times_all_cycles['cumul'] = set() + obs_days_all_cycles = dict() + obs_days_all_cycles['inst'] = set() + obs_days_all_cycles['cumul'] = set() for i, start_time_crnt_cycle in enumerate(cycle_start_times): # Create a list of forecast output times of instantaneous fields for the @@ -115,8 +420,8 @@ def set_fcst_output_times_and_obs_days_all_cycles( for i in range(0,num_fcst_output_times_per_cycle)] # Include the output times of instantaneous fields for the current cycle # in the set of all such output times over all cycles. - fcst_output_times_all_cycles_inst \ - = fcst_output_times_all_cycles_inst | set(fcst_output_times_crnt_cycle_inst) + fcst_output_times_all_cycles['inst'] \ + = fcst_output_times_all_cycles['inst'] | set(fcst_output_times_crnt_cycle_inst) # Create a list of instantaneous field obs days (i.e. days on which # observations of instantaneous fields are needed for verification) for @@ -127,7 +432,7 @@ def set_fcst_output_times_and_obs_days_all_cycles( obs_days_crnt_cycl_inst = sorted(set(tmp)) # Include the obs days for instantaneous fields for the current cycle # in the set of all such obs days over all cycles. - obs_days_all_cycles_inst = obs_days_all_cycles_inst | set(obs_days_crnt_cycl_inst) + obs_days_all_cycles['inst'] = obs_days_all_cycles['inst'] | set(obs_days_crnt_cycl_inst) # Create a list of forecast output times of cumulative fields for the # current cycle. This is simply the list of forecast output times for @@ -137,8 +442,8 @@ def set_fcst_output_times_and_obs_days_all_cycles( fcst_output_times_crnt_cycle_cumul.pop(0) # Include the obs days for cumulative fields for the current cycle in the # set of all such obs days over all cycles. - fcst_output_times_all_cycles_cumul \ - = fcst_output_times_all_cycles_cumul | set(fcst_output_times_crnt_cycle_cumul) + fcst_output_times_all_cycles['cumul'] \ + = fcst_output_times_all_cycles['cumul'] | set(fcst_output_times_crnt_cycle_cumul) # Create a list of cumulative field obs days (i.e. days on which # observations of cumulative fields are needed for verification) for @@ -158,49 +463,57 @@ def set_fcst_output_times_and_obs_days_all_cycles( obs_days_crnt_cycl_cumul = sorted(set(tmp)) # Include the obs days for cumulative fields for the current cycle in the # set of all such obs days over all cycles. - obs_days_all_cycles_cumul = obs_days_all_cycles_cumul | set(obs_days_crnt_cycl_cumul) + obs_days_all_cycles['cumul'] = obs_days_all_cycles['cumul'] | set(obs_days_crnt_cycl_cumul) # Convert the set of output times of instantaneous fields over all cycles # to a sorted list of strings of the form 'YYYYMMDDHH'. - fcst_output_times_all_cycles_inst = sorted(fcst_output_times_all_cycles_inst) - fcst_output_times_all_cycles_inst = [datetime.strftime(fcst_output_times_all_cycles_inst[i], "%Y%m%d%H") - for i in range(len(fcst_output_times_all_cycles_inst))] + fcst_output_times_all_cycles['inst'] = sorted(fcst_output_times_all_cycles['inst']) + fcst_output_times_all_cycles['inst'] \ + = [datetime.strftime(fcst_output_times_all_cycles['inst'][i], "%Y%m%d%H") + for i in range(len(fcst_output_times_all_cycles['inst']))] # Convert the set of obs days for instantaneous fields over all cycles # to a sorted list of strings of the form 'YYYYMMDD'. - obs_days_all_cycles_inst = sorted(obs_days_all_cycles_inst) - obs_days_all_cycles_inst = [datetime.strftime(obs_days_all_cycles_inst[i], "%Y%m%d") - for i in range(len(obs_days_all_cycles_inst))] + obs_days_all_cycles['inst'] = sorted(obs_days_all_cycles['inst']) + obs_days_all_cycles['inst'] \ + = [datetime.strftime(obs_days_all_cycles['inst'][i], "%Y%m%d") + for i in range(len(obs_days_all_cycles['inst']))] # Convert the set of output times of cumulative fields over all cycles to # a sorted list of strings of the form 'YYYYMMDDHH'. - fcst_output_times_all_cycles_cumul = sorted(fcst_output_times_all_cycles_cumul) - fcst_output_times_all_cycles_cumul = [datetime.strftime(fcst_output_times_all_cycles_cumul[i], "%Y%m%d%H") - for i in range(len(fcst_output_times_all_cycles_cumul))] + fcst_output_times_all_cycles['cumul'] = sorted(fcst_output_times_all_cycles['cumul']) + fcst_output_times_all_cycles['cumul'] \ + = [datetime.strftime(fcst_output_times_all_cycles['cumul'][i], "%Y%m%d%H") + for i in range(len(fcst_output_times_all_cycles['cumul']))] # Convert the set of obs days for cumulative fields over all cycles to a # sorted list of strings of the form 'YYYYMMDD'. - obs_days_all_cycles_cumul = sorted(obs_days_all_cycles_cumul) - obs_days_all_cycles_cumul = [datetime.strftime(obs_days_all_cycles_cumul[i], "%Y%m%d") - for i in range(len(obs_days_all_cycles_cumul))] + obs_days_all_cycles['cumul'] = sorted(obs_days_all_cycles['cumul']) + obs_days_all_cycles['cumul'] \ + = [datetime.strftime(obs_days_all_cycles['cumul'][i], "%Y%m%d") + for i in range(len(obs_days_all_cycles['cumul']))] - return fcst_output_times_all_cycles_inst, obs_days_all_cycles_inst, \ - fcst_output_times_all_cycles_cumul, obs_days_all_cycles_cumul + return fcst_output_times_all_cycles, obs_days_all_cycles -def set_cycledefs_for_obs_days(obs_days_all_cycles): - """Given a list of days on which obs are needed, this function generates a - list of ROCOTO-style cycledef strings that together span the days (over - all cycles of an SRW App experiment) on which obs are needed. The input - list of days must be increasing in time, but the days do not have to be - consecutive, i.e. there may be gaps between days that are greater than - one day. +def set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles): + """ + Given a list of days on which observations are needed (because there is + forecast output on those days), this function generates a list of ROCOTO- + style cycledef strings that together span the days (over all cycles of an + SRW App experiment) on which obs are needed. The input list of days must + be increasing in time, but the days do not have to be consecutive, i.e. + there may be gaps between days that are greater than one day. Each cycledef string in the output list represents a set of consecutive days in the input string (when used inside a tag in a ROCOTO XML). Thus, when the cycledef strings in the output string are all assigned to the same cycledef group in a ROCOTO XML, that group will - represent all the days on which observations are needed. + represent all the days on which observations are needed. This allows + the ROCOTO workflow to define a single set of non-consecutive days on + which obs are needed and define tasks (e.g. get_obs) only for those + days, thereby avoiding the redundant creation of these tasks for any + in-between days on which obs are not needed. Args: obs_days_all_cycles: @@ -210,16 +523,16 @@ def set_cycledefs_for_obs_days(obs_days_all_cycles): gaps between days. Returns: - cycledef_all_obs_days: + cycledefs_all_obs_days: A list of strings, with each string being a ROCOTO-style cycledef of the form '{yyyymmdd_start}0000 {yyyymmdd_end}0000 24:00:00' where {yyyymmdd_start} is the starting day of the first cycle in the - cycledef, and {yyyymmdd_end} is the starting day of the last cycle (note + cycledef and {yyyymmdd_end} is the starting day of the last cycle (note that the minutes and hours in these cycledef stirngs are always set to - '00'). Thus, one of the elements of the output list may be as follows: + '00'). For example, an element of the output list may be: '202404290000 202405010000 24:00:00' """ @@ -232,16 +545,17 @@ def set_cycledefs_for_obs_days(obs_days_all_cycles): # Initialize the variable that in the loop below contains the date of # the previous day. This is just the first element of the list of # datetime objects constructed above. Then use it to initialize the - # list (contin_obs_day_lists) that will contain lists of consecutive + # list (consec_obs_days_lists) that will contain lists of consecutive # observation days. Thus, after its construction is complete, each - # element of contin_obs_day_lists will itself be a list containing - # datetime objects that are 24 hours apart. + # element of consec_obs_days_lists will itself be a list containing + # datetime objects that represent consecutive days (i.e. are guaranteed + # to be 24 hours apart). day_prev = tmp[0] - contin_obs_day_lists = list() - contin_obs_day_lists.append([day_prev]) + consec_obs_days_lists = list() + consec_obs_days_lists.append([day_prev]) # Remove the first element of the list of obs days since it has already - # been used initiliaze contin_obs_day_lists. + # been used initiliaze consec_obs_days_lists. tmp.pop(0) # Loop over the remaining list of obs days and construct the list of @@ -250,14 +564,14 @@ def set_cycledefs_for_obs_days(obs_days_all_cycles): for day_crnt in tmp: # If the current obs day comes 24 hours after the previous obs day, i.e. # if it is the next day of the previous obs day, append it to the last - # existing list in contin_obs_day_lists. + # existing list in consec_obs_days_lists. if day_crnt == day_prev + one_day: - contin_obs_day_lists[-1].append(day_crnt) + consec_obs_days_lists[-1].append(day_crnt) # If the current obs day is NOT the next day of the previous obs day, - # append a new element to contin_obs_day_lists and initialize it as a + # append a new element to consec_obs_days_lists and initialize it as a # list containing a single element -- the current obs day. else: - contin_obs_day_lists.append([day_crnt]) + consec_obs_days_lists.append([day_crnt]) # Update the value of the previous day in preparation for the next # iteration of the loop. day_prev = day_crnt @@ -267,13 +581,149 @@ def set_cycledefs_for_obs_days(obs_days_all_cycles): # obs days when included in a tag in a ROCOTO XML. Each # string in this new list corresponds to a series of consecutive days on # which observations are needed (where by "consecutive" we mean no days - # are skipped), and there is at least a one day gap between each such + # are skipped), and there is at least a one-day gap between each such # series. These cycledefs together represent all the days (i.e. over all # cycles of the experiment) on which observations are needed. - cycledef_all_obs_days = list() - for contin_obs_day_list in contin_obs_day_lists: - cycledef_start = contin_obs_day_list[0].strftime('%Y%m%d%H%M') - cycledef_end = contin_obs_day_list[-1].strftime('%Y%m%d%H%M') - cycledef_all_obs_days.append(' '.join([cycledef_start, cycledef_end, '24:00:00'])) + cycledefs_all_obs_days = list() + for consec_obs_days_list in consec_obs_days_lists: + cycledef_start = consec_obs_days_list[0].strftime('%Y%m%d%H%M') + cycledef_end = consec_obs_days_list[-1].strftime('%Y%m%d%H%M') + cycledefs_all_obs_days.append(' '.join([cycledef_start, cycledef_end, '24:00:00'])) + + return cycledefs_all_obs_days + + +def get_obs_retrieve_times_by_day( + vx_config, fcst_output_times_all_cycles, obs_days_all_cycles): + """ + This function generates dictionary of dictionaries that, for each + combination of obs type needed and each obs day, contains a string list + of the times at which that type of observation is needed on that day. + The elements of each list are formatted as 'YYYYMMDDHH'. + + Args: + vx_config: + The verification configuration dictionary. + + fcst_output_times_all_cycles: + Dictionary containing a list of forecast output times over all cycles for + instantaneous fields and a second analogous list for cumulative fields. + Each element of these lists is a string of the form 'YYYYMMDDHH'. + + obs_days_all_cycles: + Dictionary containing a list of observation days (i.e. days on which + observations are needed to perform verification) over all cycles for + instantaneous fields and a second analogous list for cumulative fields. + Each element of these lists is a string of the form 'YYYYMMDD'. + + Returns: + obs_retrieve_times_by_day: + Dictionary of dictionaries containing times at which each type of obs is + needed on each obs day. + """ + # Convert string contents of input dictionaries to datetime objects. + for time_type in ['cumul', 'inst']: + fcst_output_times_all_cycles[time_type] \ + = [datetime.strptime(fcst_output_times_all_cycles[time_type][i], "%Y%m%d%H") + for i in range(len(fcst_output_times_all_cycles[time_type]))] + obs_days_all_cycles[time_type] \ + = [datetime.strptime(obs_days_all_cycles[time_type][i], "%Y%m%d") + for i in range(len(obs_days_all_cycles[time_type]))] + + # Get list of forecast fields to be verified. + vx_fields = vx_config['VX_FIELDS'] + + # Define dictionary containing information about all fields that may + # possibly be verified. This information includes their temporal + # characteristics (cumulative vs. instantaneous) and the mapping between + # the observation type and the forecast field. + vx_field_info = {'cumul': [{'obtype': 'CCPA', 'fcst_fields': ['APCP']}, + {'obtype': 'NOHRSC', 'fcst_fields': ['ASNOW']}], + 'inst': [{'obtype': 'MRMS', 'fcst_fields': ['REFC', 'RETOP']}, + {'obtype': 'NDAS', 'fcst_fields': ['ADPSFC', 'ADPUPA']}] + } + + # Keep only those items in the dictionary above that have forecast fields + # that appear in the list of forecast fields to be verified. + for obs_time_type, obtypes_to_fcst_fields_dict_list in vx_field_info.copy().items(): + for obtypes_to_fcst_fields_dict in obtypes_to_fcst_fields_dict_list.copy(): + obtype = obtypes_to_fcst_fields_dict['obtype'] + fcst_fields = obtypes_to_fcst_fields_dict['fcst_fields'] + fcst_fields = [field for field in fcst_fields if field in vx_fields] + obtypes_to_fcst_fields_dict['fcst_fields'] = fcst_fields + if not fcst_fields: obtypes_to_fcst_fields_dict_list.remove(obtypes_to_fcst_fields_dict) + if not obtypes_to_fcst_fields_dict_list: vx_field_info.pop(obs_time_type) + + # Create dictionary containing the temporal characteristics as keys and + # a string list of obs types to verify as the values. + obs_time_type_to_obtypes_dict = dict() + for obs_time_type, obtypes_to_fcst_fields_dict_list in vx_field_info.items(): + obtype_list = [the_dict['obtype'] for the_dict in obtypes_to_fcst_fields_dict_list] + obs_time_type_to_obtypes_dict[obs_time_type] = obtype_list + + # Initialize the return variable. + obs_retrieve_times_by_day = dict() + + # Define timedelta object representing a single day. + one_day = timedelta(days=1) - return cycledef_all_obs_days + # Loop over all obs types to be verified (by looping over the temporal + # type and the specific obs under that type). For each obs type, loop + # over each obs day and find the times within that that at which the obs + # need to be retrieved. + for obs_time_type, obtypes in obs_time_type_to_obtypes_dict.items(): + + fcst_output_times_all_cycles_crnt_ttype = fcst_output_times_all_cycles[obs_time_type] + obs_days_all_cycles_crnt_ttype = obs_days_all_cycles[obs_time_type] + + for obtype in obtypes: + + obs_retrieve_times_by_day[obtype] = dict() + + # Get the availability interval for the current observation type from the + # verification configuration dictionary. Then make sure it divides evenly + # into 24. + config_var_name = "".join([obtype, "_OBS_AVAIL_INTVL_HRS"]) + obs_avail_intvl_hrs = vx_config[config_var_name] + remainder = 24 % obs_avail_intvl_hrs + if remainder != 0: + msg = dedent(f""" + The obs availability interval for obs of type {obtype} must divide evenly + into 24 but doesn't: + obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + 24 % obs_avail_intvl_hrs = {remainder}" + """) + raise Exception(msg) + obs_avail_intvl = timedelta(hours=obs_avail_intvl_hrs) + num_obs_avail_times_per_day = int(24/obs_avail_intvl_hrs) + + # Loop over all obs days over all cycles (for the current obs type). For + # each such day, get the list forecast output times and the list of obs + # availability times. Finally, set the times (on that day) that obs need + # to be retrieved to the intersection of these two lists. + for obs_day in obs_days_all_cycles_crnt_ttype: + + next_day = obs_day + one_day + if obs_time_type == "cumul": + fcst_output_times_crnt_day \ + = [time for time in fcst_output_times_all_cycles_crnt_ttype if obs_day < time <= next_day] + elif obs_time_type == "inst": + fcst_output_times_crnt_day \ + = [time for time in fcst_output_times_all_cycles_crnt_ttype if obs_day <= time < next_day] + fcst_output_times_crnt_day = [datetime.strftime(time, "%Y%m%d%H") for time in fcst_output_times_crnt_day] + + if obs_time_type == "cumul": + obs_avail_times_crnt_day \ + = [obs_day + (i+1)*obs_avail_intvl for i in range(0,num_obs_avail_times_per_day)] + elif obs_time_type == "inst": + obs_avail_times_crnt_day \ + = [obs_day + i*obs_avail_intvl for i in range(0,num_obs_avail_times_per_day)] + obs_avail_times_crnt_day = [datetime.strftime(time, "%Y%m%d%H") for time in obs_avail_times_crnt_day] + + obs_retrieve_times_crnt_day = list(set(fcst_output_times_crnt_day) & set(obs_avail_times_crnt_day)) + obs_retrieve_times_crnt_day.sort() + + obs_day_str = datetime.strftime(obs_day, "%Y%m%d") + obs_retrieve_times_by_day[obtype][obs_day_str] = obs_retrieve_times_crnt_day + + return obs_retrieve_times_by_day diff --git a/ush/setup.py b/ush/setup.py index d6e9e5c2d0..8aaec0ef90 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -42,7 +42,9 @@ from set_cycle_and_obs_timeinfo import \ set_cycle_dates, set_fcst_output_times_and_obs_days_all_cycles, \ - set_cycledefs_for_obs_days + set_rocoto_cycledefs_for_obs_days, \ + check_temporal_consistency_cumul_fields, \ + get_obs_retrieve_times_by_day from set_predef_grid_params import set_predef_grid_params from set_gridparams_ESGgrid import set_gridparams_ESGgrid from set_gridparams_GFDLgrid import set_gridparams_GFDLgrid @@ -546,32 +548,106 @@ def remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # - # For vx fields that are accumulated, remove those accumulation hours - # that are longer than the forecast length. If that leaves the array - # of accumulation hours for that field empty, then remove the field - # from the list of fields to be verified. + # Set some variables needed for running checks on and creating new + # (derived) configuration variables for the verification. # # ----------------------------------------------------------------------- # - # Get the vx fields specified in the experiment configuration. - vx_fields_config = expt_config["verification"]["VX_FIELDS"] - + date_first_cycl = workflow_config.get("DATE_FIRST_CYCL") + date_last_cycl = workflow_config.get("DATE_LAST_CYCL") + incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") - vx_fields_accum = ["APCP", "ASNOW"] - for field in vx_fields_accum: - if field in vx_fields_config: - accum_periods_array_name = "".join(["VX_", field, "_ACCUMS_HRS"]) - accum_periods = expt_config["verification"][accum_periods_array_name] - accum_periods = [accum for accum in accum_periods if (accum <= fcst_len_hrs)] - expt_config["verification"][accum_periods_array_name] = accum_periods - if not accum_periods: - vx_fields_config.remove(field) - - expt_config["verification"]["VX_FIELDS"] = vx_fields_config + + # Set the forecast output interval. Ideally, this should be obtained + # from the SRW App's configuration file, but such a variable doesn't + # yet exist in that file. + fcst_output_intvl_hrs = 1 + workflow_config['FCST_OUTPUT_INTVL_HRS'] = fcst_output_intvl_hrs + + # To enable arithmetic with dates and times, convert various time + # intervals from integer to datetime.timedelta objects. + cycl_intvl_dt = datetime.timedelta(hours=incr_cycl_freq) + fcst_len_dt = datetime.timedelta(hours=fcst_len_hrs) + fcst_output_intvl_dt = datetime.timedelta(hours=fcst_output_intvl_hrs) + # + # ----------------------------------------------------------------------- + # + # Ensure that the configuration parameters associated with cumulative + # fields (e.g. APCP) in the verification section of the experiment + # dicitonary are temporally consistent, e.g. that accumulation intervals + # are less than or equal to the forecast length. Update the verification + # section of the dictionary to remove inconsistencies. + # + # ----------------------------------------------------------------------- + # + vx_config = expt_config["verification"] + vx_config, fcst_obs_matched_times_all_cycles_cumul \ + = check_temporal_consistency_cumul_fields( + vx_config, + date_first_cycl, date_last_cycl, cycl_intvl_dt, + fcst_len_dt, fcst_output_intvl_dt) + expt_config["verification"] = vx_config + # + # ----------------------------------------------------------------------- + # + # Generate a list of forecast output times and a list of obs days (i.e. + # days on which observations are needed to perform verification because + # there is forecast output on those days) over all cycles, both for + # instantaneous fields (e.g. T2m, REFC, RETOP) and for cumulative ones + # (e.g. APCP). Then add these lists to the dictionary containing workflow + # configuration variables. These will be needed in generating the ROCOTO + # XML. + # + # ----------------------------------------------------------------------- + # + fcst_output_times_all_cycles, obs_days_all_cycles, \ + = set_fcst_output_times_and_obs_days_all_cycles( + date_first_cycl, date_last_cycl, cycl_intvl_dt, + fcst_len_dt, fcst_output_intvl_dt) + + workflow_config['OBS_DAYS_ALL_CYCLES_INST'] = obs_days_all_cycles['inst'] + workflow_config['OBS_DAYS_ALL_CYCLES_CUMUL'] = obs_days_all_cycles['cumul'] + # + # ----------------------------------------------------------------------- + # + # Generate lists of ROCOTO cycledef strings corresonding to the obs days + # for instantaneous fields and those for cumulative ones. Then save the + # lists of cycledefs in the dictionary containing values needed to + # construct the ROCOTO XML. # # ----------------------------------------------------------------------- # - # Remove all verification [meta]tasks for which no fields are specified. + cycledefs_obs_days_inst = set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles['inst']) + cycledefs_obs_days_cumul = set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles['cumul']) + + rocoto_config['cycledefs']['cycledefs_obs_days_inst'] = cycledefs_obs_days_inst + rocoto_config['cycledefs']['cycledefs_obs_days_cumul'] = cycledefs_obs_days_cumul + # + # ----------------------------------------------------------------------- + # + # Generate dictionary of dictionaries that, for each combination of obs + # type needed and obs day, contains a string list of the times at which + # that type of observation is needed on that day. The elements of each + # list are formatted as 'YYYYMMDDHH'. This information is used by the + # day-based get_obs tasks in the workflow to get obs only at those times + # at which they are needed (as opposed to for the whole day). + # + # ----------------------------------------------------------------------- + # + vx_config = expt_config["verification"] + obs_retrieve_times_by_day \ + = get_obs_retrieve_times_by_day( + vx_config, fcst_output_times_all_cycles, obs_days_all_cycles) + + for obtype, obs_days_dict in obs_retrieve_times_by_day.items(): + for obs_day, obs_retrieve_times in obs_days_dict.items(): + array_name = '_'.join(["OBS_RETRIEVE_TIMES", obtype, obs_day]) + vx_config[array_name] = obs_retrieve_times + expt_config["verification"] = vx_config + # + # ----------------------------------------------------------------------- + # + # Remove all verification (meta)tasks for which no fields are specified. # # ----------------------------------------------------------------------- # @@ -579,7 +655,8 @@ def remove_tag(tasks, tag): vx_metatasks_all = {} vx_fields_all["CCPA"] = ["APCP"] - vx_metatasks_all["CCPA"] = ["metatask_PcpCombine_obs", + vx_metatasks_all["CCPA"] = ["task_get_obs_ccpa", + "metatask_PcpCombine_obs_CCPA_all_accums", "metatask_PcpCombine_fcst_APCP_all_accums_all_mems", "metatask_GridStat_CCPA_all_accums_all_mems", "metatask_GenEnsProd_EnsembleStat_CCPA", @@ -587,33 +664,38 @@ def remove_tag(tasks, tag): vx_fields_all["NOHRSC"] = ["ASNOW"] vx_metatasks_all["NOHRSC"] = ["task_get_obs_nohrsc", + "metatask_PcpCombine_obs_NOHRSC_all_accums", "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", "metatask_GridStat_NOHRSC_all_accums_all_mems", "metatask_GenEnsProd_EnsembleStat_NOHRSC", "metatask_GridStat_NOHRSC_ensmeanprob_all_accums"] vx_fields_all["MRMS"] = ["REFC", "RETOP"] - vx_metatasks_all["MRMS"] = ["metatask_GridStat_MRMS_all_mems", + vx_metatasks_all["MRMS"] = ["task_get_obs_mrms", + "metatask_GridStat_MRMS_all_mems", "metatask_GenEnsProd_EnsembleStat_MRMS", "metatask_GridStat_MRMS_ensprob"] vx_fields_all["NDAS"] = ["ADPSFC", "ADPUPA"] - vx_metatasks_all["NDAS"] = ["task_run_MET_Pb2nc_obs", + vx_metatasks_all["NDAS"] = ["task_get_obs_ndas", + "task_run_MET_Pb2nc_obs_NDAS", "metatask_PointStat_NDAS_all_mems", "metatask_GenEnsProd_EnsembleStat_NDAS", "metatask_PointStat_NDAS_ensmeanprob"] # If there are no vx fields specified, remove those tasks that are necessary # for all observation types. - if not vx_fields_config: + vx_config = expt_config["verification"] + vx_fields = vx_config["VX_FIELDS"] + if not vx_fields: metatask = "metatask_check_post_output_all_mems" rocoto_config['tasks'].pop(metatask) # If for a given obstype no fields are specified, remove all vx metatasks # for that obstype. for obstype in vx_fields_all: - vx_fields_obstype = [field for field in vx_fields_config if field in vx_fields_all[obstype]] - if not vx_fields_obstype: + vx_fields_by_obstype = [field for field in vx_fields if field in vx_fields_all[obstype]] + if not vx_fields_by_obstype: for metatask in vx_metatasks_all[obstype]: if metatask in rocoto_config['tasks']: logging.info(dedent( @@ -627,6 +709,24 @@ def remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # + # The "cycled_from_second" cycledef in the default workflow configuration + # file (default_workflow.yaml) requires the starting date of the second + # cycle. That is difficult to calculate in the yaml file itself because + # currently, there are no utilities to perform arithmetic with dates. + # Thus, we calculate it here and save it as a variable in the workflow + # configuration dictionary. Note that correct functioning of the default + # workflow yaml file also requires that DATE_[FIRST|SECOND|LAST]_CYCL all + # be strings, not datetime objects. We perform those conversions here. + # + # ----------------------------------------------------------------------- + # + date_second_cycl = date_first_cycl + cycl_intvl_dt + workflow_config['DATE_FIRST_CYCL'] = datetime.datetime.strftime(date_first_cycl, "%Y%m%d%H") + workflow_config['DATE_SECOND_CYCL'] = datetime.datetime.strftime(date_second_cycl, "%Y%m%d%H") + workflow_config['DATE_LAST_CYCL'] = datetime.datetime.strftime(date_last_cycl, "%Y%m%d%H") + # + # ----------------------------------------------------------------------- + # # ICS and LBCS settings and validation # # ----------------------------------------------------------------------- @@ -775,61 +875,6 @@ def get_location(xcs, fmt, expt_cfg): run_envir = expt_config["user"].get("RUN_ENVIR", "") - fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") - date_first_cycl = workflow_config.get("DATE_FIRST_CYCL") - date_last_cycl = workflow_config.get("DATE_LAST_CYCL") - incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) - - # Set the forecast output interval. Ideally, this should be obtained - # from the SRW App's configuration file, but such a variable doesn't - # yet exist in that file. - fcst_output_intvl_hrs = 1 - - # To enable arithmetic with dates and times, convert various time - # intervals from integer to datetime.timedelta objects. - cycl_intvl = datetime.timedelta(days=0, hours=incr_cycl_freq, minutes=0, seconds=0) - fcst_len = datetime.timedelta(days=0, hours=fcst_len_hrs, minutes=0, seconds=0) - fcst_output_intvl = datetime.timedelta(days=0, hours=fcst_output_intvl_hrs, minutes=0, seconds=0) - - # Generate a list of forecast output times and a list of obs days (i.e. - # days on which observations are needed to perform verification) over all - # cycles, both for instantaneous fields (e.g. T2m, REFC, RETOP) and for - # cumulative ones (e.g. APCP). - output_times_all_cycles_inst, obs_days_all_cycles_inst, \ - output_times_all_cycles_cumul, obs_days_all_cycles_cumul \ - = set_fcst_output_times_and_obs_days_all_cycles( \ - date_first_cycl, date_last_cycl, cycl_intvl, fcst_len, fcst_output_intvl) - - # Add the list generated above to the dictionary containing workflow - # configuration variables. These will be needed in generating the ROCOTO - # XML. - workflow_config['OUTPUT_TIMES_ALL_CYCLES_INST'] = output_times_all_cycles_inst - workflow_config['OBS_DAYS_ALL_CYCLES_INST'] = obs_days_all_cycles_inst - workflow_config['OUTPUT_TIMES_ALL_CYCLES_CUMUL'] = output_times_all_cycles_cumul - workflow_config['OBS_DAYS_ALL_CYCLES_CUMUL'] = obs_days_all_cycles_cumul - - # Generate lists of ROCOTO cycledef strings corresonding to the obs days - # for instantaneous fields and those for cumulative ones. - cycledef_obs_days_inst = set_cycledefs_for_obs_days(obs_days_all_cycles_inst) - cycledef_obs_days_cumul = set_cycledefs_for_obs_days(obs_days_all_cycles_cumul) - # Save the lists of cycledefs in the dictionary containing values needed - # to construct the ROCOTO XML. - rocoto_config['cycledefs']['cycledef_obs_days_inst'] = cycledef_obs_days_inst - rocoto_config['cycledefs']['cycledef_obs_days_cumul'] = cycledef_obs_days_cumul - - # The "cycled_from_second" cycledef in the default workflow configuration - # file (default_workflow.yaml) requires the starting date of the second - # cycle. That is difficult to calculate in the yaml file itself because - # currently, there are no utilities to perform arithmetic with dates. - # Thus, we calculate it here and save it as a variable in the workflow - # configuration dictionary. Note that correct functioning of the default - # workflow yaml file also requires that DATE_[FIRST|SECOND|LAST]_CYCL all - # be strings, not datetime objects. We perform those conversions here. - date_second_cycl = date_first_cycl + cycl_intvl - workflow_config['DATE_FIRST_CYCL'] = datetime.datetime.strftime(date_first_cycl, "%Y%m%d%H") - workflow_config['DATE_SECOND_CYCL'] = datetime.datetime.strftime(date_second_cycl, "%Y%m%d%H") - workflow_config['DATE_LAST_CYCL'] = datetime.datetime.strftime(date_last_cycl, "%Y%m%d%H") - # set varying forecast lengths only when fcst_len_hrs=-1 if fcst_len_hrs == -1: fcst_len_cycl = workflow_config.get("FCST_LEN_CYCL") From ab1332d6b6a4d310de55ea8592727d5a94e672ff Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 27 Sep 2024 19:40:19 -0600 Subject: [PATCH 084/208] Bug fixes. --- ...ulticyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml | 2 +- ...ulticyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml index 41428a7939..418e47e95e 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml @@ -57,4 +57,4 @@ verification: VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml index 2fae0d6388..913d5093bb 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml @@ -59,4 +59,4 @@ verification: VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml index e150234a47..a859a03ac8 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml @@ -57,4 +57,4 @@ verification: VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml index d8eb349433..563b8852a8 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml @@ -57,4 +57,4 @@ verification: VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' From d50b4a0ce7fe8eff7d3341b506b5b360636cd4db Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 27 Sep 2024 19:47:24 -0600 Subject: [PATCH 085/208] Change name of cycledefs for obs days as was done in other files (python scripts) in previous commits. --- parm/wflow/verify_pre.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 80831f6f29..3ce65da55d 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -1,7 +1,7 @@ default_task_verify_pre: &default_task_verify_pre account: '&ACCOUNT;' attrs: - cycledefs: cycledef_obs_days_inst + cycledefs: cycledefs_obs_days_inst maxtries: '1' envars: &default_vars GLOBAL_VAR_DEFNS_FP: '&GLOBAL_VAR_DEFNS_FP;' @@ -24,7 +24,7 @@ default_task_verify_pre: &default_task_verify_pre task_get_obs_ccpa: <<: *default_task_verify_pre attrs: - cycledefs: cycledef_obs_days_cumul + cycledefs: cycledefs_obs_days_cumul maxtries: '1' command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: @@ -76,7 +76,7 @@ task_get_obs_ndas: task_run_MET_Pb2nc_obs: <<: *default_task_verify_pre attrs: - cycledefs: cycledef_obs_days_inst + cycledefs: cycledefs_obs_days_inst maxtries: '2' command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PB2NC_OBS"' envars: From 5b5e71f22e7290e1695c9cb9f382487a22ad908a Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 27 Sep 2024 19:49:38 -0600 Subject: [PATCH 086/208] Increase walltime for PcpCombine_fcst tasks since some WE2E tests are running out of time. --- parm/wflow/verify_pre.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 3ce65da55d..102eb9dafa 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -246,7 +246,7 @@ metatask_PcpCombine_fcst_APCP_all_accums_all_mems: attrs: age: 00:00:00:30 text: !cycstr '{{ workflow.EXPTDIR }}/@Y@m@d@H/post_files_exist_mem#mem#.txt' - walltime: 00:10:00 + walltime: 00:30:00 metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems: var: @@ -274,4 +274,4 @@ metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems: attrs: age: 00:00:00:30 text: !cycstr '{{ workflow.EXPTDIR }}/@Y@m@d@H/post_files_exist_mem#mem#.txt' - walltime: 00:10:00 + walltime: 00:30:00 From 2abd9df2df01426924de5641cf0b1cd7281035ee Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 27 Sep 2024 19:53:02 -0600 Subject: [PATCH 087/208] Remove unneeded environment variables from some tasks. --- parm/wflow/verify_det.yaml | 2 -- parm/wflow/verify_pre.yaml | 1 - 2 files changed, 3 deletions(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index c4f420f10c..47be6bb42b 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -102,7 +102,6 @@ metatask_GridStat_MRMS_all_mems: <<: *default_vars OBS_DIR: '&MRMS_OBS_DIR;' VAR: '#VAR#' - ACCUM_HH: '01' METPLUSTOOLNAME: 'GRIDSTAT' OBTYPE: 'MRMS' ENSMEM_INDX: "#mem#" @@ -152,7 +151,6 @@ metatask_PointStat_NDAS_all_mems: VAR: '#VAR#' METPLUSTOOLNAME: 'POINTSTAT' OBTYPE: 'NDAS' - ACCUM_HH: '01' ENSMEM_INDX: "#mem#" SLASH_ENSMEM_SUBDIR_OR_NULL: '{% if global.DO_ENSEMBLE %}{{ "/mem#mem#" }}{% endif %}' OBS_AVAIL_INTVL_HRS: '{{- verification.NDAS_OBS_AVAIL_INTVL_HRS }}' diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 102eb9dafa..6561954d8f 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -154,7 +154,6 @@ metatask_check_post_output_all_mems: envars: <<: *default_vars VAR: APCP - ACCUM_HH: '01' ENSMEM_INDX: '#mem#' dependency: # This "or" checks that the necessary stand-alone post tasks or forecast From 97f0a9c43a4e6a743c5a53d1d32aab352a7f2c90 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 27 Sep 2024 19:58:52 -0600 Subject: [PATCH 088/208] Bug fix for PcpCombine path. --- scripts/exregional_run_met_gridstat_or_pointstat_vx.sh | 7 +++++-- scripts/exregional_run_met_pcpcombine.sh | 9 +++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 263d22053f..aca2795018 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -144,6 +144,7 @@ ensmem_name="mem${ensmem_indx}" if [ "${RUN_ENVIR}" = "nco" ]; then slash_cdate_or_null="" slash_ensmem_subdir_or_null="" + slash_obs_or_null="" else slash_cdate_or_null="/${CDATE}" # @@ -160,8 +161,10 @@ else # if [ $(boolify "${DO_ENSEMBLE}") = "TRUE" ]; then slash_ensmem_subdir_or_null="/${ensmem_name}" + slash_obs_or_null="/obs" else slash_ensmem_subdir_or_null="" + slash_obs_or_null="" fi fi @@ -169,13 +172,13 @@ if [ "${grid_or_point}" = "grid" ]; then case "${FIELDNAME_IN_MET_FILEDIR_NAMES}" in "APCP"*) - OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_obs_or_null}/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" FCST_INPUT_FN_TEMPLATE="${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) - OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_obs_or_null}/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" FCST_INPUT_FN_TEMPLATE="${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 97d156aa62..89d375b7c9 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -147,6 +147,7 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then if [ "${RUN_ENVIR}" = "nco" ]; then slash_cdate_or_null="" slash_ensmem_subdir_or_null="" + slash_obs_or_null="" else slash_cdate_or_null="/${CDATE}" # @@ -169,7 +170,11 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then fi elif [ "${FCST_OR_OBS}" = "OBS" ]; then slash_cdate_or_null="/${CDATE}" - slash_ensmem_subdir_or_null="/obs" + if [ $(boolify "${DO_ENSEMBLE}") = "TRUE" ]; then + slash_obs_or_null="/obs" + else + slash_obs_or_null="" + fi fi OBS_INPUT_DIR="" @@ -193,7 +198,7 @@ elif [ "${FCST_OR_OBS}" = "OBS" ]; then fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE}) OBS_INPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) - OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" + OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_obs_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE_PCPCOMBINE_OUTPUT}) OUTPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) From 1c20ad4f40962cd35f8f13af3654ed01b80a5649 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sat, 28 Sep 2024 08:36:22 -0600 Subject: [PATCH 089/208] Modify metatask and task names for clarity; fix paths; add pcpcombine task for NOHRSC; fix task dependencies. --- ...C_OBS => JREGIONAL_RUN_MET_PB2NC_OBS_NDAS} | 2 +- parm/wflow/verify_det.yaml | 12 ++--- parm/wflow/verify_ens.yaml | 27 +++++----- parm/wflow/verify_pre.yaml | 50 +++++++++++++++++-- ...h => exregional_run_met_pb2nc_obs_ndas.sh} | 9 ++-- scripts/exregional_run_met_pcpcombine.sh | 17 +++---- ush/setup.py | 4 +- 7 files changed, 78 insertions(+), 43 deletions(-) rename jobs/{JREGIONAL_RUN_MET_PB2NC_OBS => JREGIONAL_RUN_MET_PB2NC_OBS_NDAS} (98%) rename scripts/{exregional_run_met_pb2nc_obs.sh => exregional_run_met_pb2nc_obs_ndas.sh} (98%) diff --git a/jobs/JREGIONAL_RUN_MET_PB2NC_OBS b/jobs/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS similarity index 98% rename from jobs/JREGIONAL_RUN_MET_PB2NC_OBS rename to jobs/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS index 89c9bb73f4..a6ed90a1a3 100755 --- a/jobs/JREGIONAL_RUN_MET_PB2NC_OBS +++ b/jobs/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS @@ -76,7 +76,7 @@ NDAS observations. # #----------------------------------------------------------------------- # -$SCRIPTSdir/exregional_run_met_pb2nc_obs.sh || \ +$SCRIPTSdir/exregional_run_met_pb2nc_obs_ndas.sh || \ print_err_msg_exit "\ Call to ex-script corresponding to J-job \"${scrfunc_fn}\" failed." # diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index 47be6bb42b..a08fe69e3e 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -50,7 +50,7 @@ metatask_GridStat_CCPA_all_accums_all_mems: and: taskdep_pcpcombine_obs: attrs: - task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h + task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h_CCPA taskdep_pcpcombine_fcst: attrs: task: run_MET_PcpCombine_fcst_APCP#ACCUM_HH#h_mem#mem# @@ -82,9 +82,9 @@ metatask_GridStat_NOHRSC_all_accums_all_mems: walltime: 02:00:00 dependency: and: - taskdep_get_obs_nohrsc: + taskdep_pcpcombine_obs: attrs: - task: get_obs_nohrsc + task: run_MET_PcpCombine_obs_ASNOW#ACCUM_HH#h_NOHRSC taskdep_pcpcombine_fcst: attrs: task: run_MET_PcpCombine_fcst_ASNOW#ACCUM_HH#h_mem#mem# @@ -159,7 +159,7 @@ metatask_PointStat_NDAS_all_mems: walltime: 01:00:00 dependency: and: - datadep_all_pb2nc_obs_complete: + datadep_all_pb2nc_obs_ndas_complete: attrs: age: 00:00:00:30 # Check that the flag files that indicate that the Pb2NC tasks are @@ -170,11 +170,11 @@ metatask_PointStat_NDAS_all_mems: {%- for n in range(0, num_obs_days) %} {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_INST[n] %} {%- if n == 0 %} - {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_ndas_" ~ yyyymmdd ~ "_complete.txt" }} {%- else %} {{- indent ~ "\n" }} {{- indent ~ "\n" }} - {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_ndas_" ~ yyyymmdd ~ "_complete.txt" }} {%- endif %} {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} {%- endfor %}' diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 046849e126..f92aef4c60 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -48,9 +48,9 @@ metatask_GenEnsProd_EnsembleStat_CCPA: FCST_THRESH: 'none' dependency: and: - taskdep_pcpcombine_obs: &taskdep_pcpcombine_obs + taskdep_pcpcombine_obs_ccpa: &taskdep_pcpcombine_obs_ccpa attrs: - task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h + task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h_CCPA taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h @@ -83,9 +83,9 @@ metatask_GenEnsProd_EnsembleStat_NOHRSC: FCST_THRESH: 'none' dependency: and: - taskdep: + taskdep_pcpcombine_obs_nohrsc: &taskdep_pcpcombine_obs_nohrsc attrs: - task: get_obs_nohrsc + task: run_MET_PcpCombine_obs_ASNOW#ACCUM_HH#h_NOHRSC taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h @@ -169,7 +169,7 @@ metatask_GenEnsProd_EnsembleStat_NDAS: walltime: 01:00:00 dependency: and: - datadep_all_pb2nc_obs_complete: &all_pb2nc_obs_complete + datadep_all_pb2nc_obs_ndas_complete: &all_pb2nc_obs_ndas_complete attrs: age: 00:00:00:30 # Check that the flag files that indicate that the Pb2NC tasks are @@ -180,11 +180,11 @@ metatask_GenEnsProd_EnsembleStat_NDAS: {%- for n in range(0, num_obs_days) %} {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_INST[n] %} {%- if n == 0 %} - {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_ndas_" ~ yyyymmdd ~ "_complete.txt" }} {%- else %} {{- indent ~ "\n" }} {{- indent ~ "\n" }} - {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_ndas_" ~ yyyymmdd ~ "_complete.txt" }} {%- endif %} {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} {%- endfor %}' @@ -213,8 +213,8 @@ metatask_GridStat_CCPA_ensmeanprob_all_accums: FCST_THRESH: 'all' dependency: and: - taskdep_pcpcombine_obs: - <<: *taskdep_pcpcombine_obs + taskdep_pcpcombine_obs_ccpa: + <<: *taskdep_pcpcombine_obs_ccpa taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h @@ -240,9 +240,8 @@ metatask_GridStat_NOHRSC_ensmeanprob_all_accums: FCST_THRESH: 'all' dependency: and: - taskdep: - attrs: - task: get_obs_nohrsc + taskdep_pcpcombine_obs_nohrsc: + <<: *taskdep_pcpcombine_obs_nohrsc taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h @@ -291,8 +290,8 @@ metatask_PointStat_NDAS_ensmeanprob: FCST_THRESH: 'all' dependency: and: - datadep_all_pb2nc_obs_complete: - <<: *all_pb2nc_obs_complete + datadep_all_pb2nc_obs_ndas_complete: + <<: *all_pb2nc_obs_ndas_complete taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_#VAR# diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 6561954d8f..220b029412 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -73,12 +73,12 @@ task_get_obs_ndas: partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' walltime: 02:00:00 -task_run_MET_Pb2nc_obs: +task_run_MET_Pb2nc_obs_NDAS: <<: *default_task_verify_pre attrs: cycledefs: cycledefs_obs_days_inst maxtries: '2' - command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PB2NC_OBS"' + command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS"' envars: <<: *default_vars VAR: ADPSFC @@ -100,10 +100,10 @@ task_run_MET_Pb2nc_obs: attrs: task: get_obs_ndas -metatask_PcpCombine_obs: +metatask_PcpCombine_obs_APCP_all_accums_CCPA: var: ACCUM_HH: '{% for ah in verification.VX_APCP_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - task_run_MET_PcpCombine_obs_APCP#ACCUM_HH#h: + task_run_MET_PcpCombine_obs_APCP#ACCUM_HH#h_CCPA: <<: *default_task_verify_pre attrs: cycledefs: forecast @@ -142,6 +142,48 @@ metatask_PcpCombine_obs: {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} {%- endfor %}' +metatask_PcpCombine_obs_ASNOW_all_accums_NOHRSC: + var: + ACCUM_HH: '{% for ah in verification.VX_ASNOW_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' + task_run_MET_PcpCombine_obs_ASNOW#ACCUM_HH#h_NOHRSC: + <<: *default_task_verify_pre + attrs: + cycledefs: forecast + maxtries: '2' + command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PCPCOMBINE"' + envars: + <<: *default_vars + VAR: ASNOW + ACCUM_HH: '#ACCUM_HH#' + FCST_OR_OBS: OBS + OBTYPE: NOHRSC + OBS_DIR: '&NOHRSC_OBS_DIR;' + OBS_AVAIL_INTVL_HRS: '{{- verification.NOHRSC_OBS_AVAIL_INTVL_HRS }}' + METPLUSTOOLNAME: 'PCPCOMBINE' + dependency: + and: + datadep: + text: "&NOHRSC_OBS_DIR;" + datadep_all_get_obs_nohrsc_complete: + attrs: + age: 00:00:00:30 + # Check that the flag files that indicate that the get_obs_nohrsc tasks + # are complete are all present before launching any PcpCombine task. + text: '{%- set num_obs_days = workflow.OBS_DAYS_ALL_CYCLES_CUMUL|length %} + {%- set indent = " " %} + {%- set indent_p2 = indent + " " %} + {%- for n in range(0, num_obs_days) %} + {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_CUMUL[n] %} + {%- if n == 0 %} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_nohrsc_" ~ yyyymmdd ~ "_complete.txt" }} + {%- else %} + {{- indent ~ "\n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_nohrsc_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' + metatask_check_post_output_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs_ndas.sh similarity index 98% rename from scripts/exregional_run_met_pb2nc_obs.sh rename to scripts/exregional_run_met_pb2nc_obs_ndas.sh index fbf3ec1689..01e0362cc3 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs_ndas.sh @@ -154,8 +154,7 @@ for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do fp="${OBS_INPUT_DIR}/${fn}" if [[ -f "${fp}" ]]; then print_info_msg " -Found ${OBTYPE} obs file corresponding to observation retrieval time -(yyyymmddhh): +Found ${OBTYPE} obs file corresponding to observation retrieval time (yyyymmddhh): yyyymmddhh = \"${yyyymmddhh}\" fp = \"${fp}\" " @@ -263,8 +262,8 @@ metplus_config_tmpl_fn="${MetplusToolName}_obs" # information, but we still include that info in the file name so that # the behavior in the two modes is as similar as possible. # -metplus_config_fn="${metplus_config_tmpl_fn}_${CDATE}" -metplus_log_fn="${metplus_config_fn}" +metplus_config_fn="${metplus_config_tmpl_fn}_NDAS_${CDATE}" +metplus_log_fn="${metplus_config_fn}_NDAS" # # Add prefixes and suffixes (extensions) to the base file names. # @@ -379,7 +378,7 @@ METplus configuration file used is: #----------------------------------------------------------------------- # mkdir -p ${WFLOW_FLAG_FILES_DIR} -touch "${WFLOW_FLAG_FILES_DIR}/run_met_pb2nc_obs_${PDY}_complete.txt" +touch "${WFLOW_FLAG_FILES_DIR}/run_met_pb2nc_obs_ndas_${PDY}_complete.txt" # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 89d375b7c9..43da23ca2e 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -320,18 +320,13 @@ fi # First, set the base file names. # metplus_config_tmpl_fn="${MetplusToolName}" -metplus_config_fn="${metplus_config_tmpl_fn}_$(echo_lowercase ${FCST_OR_OBS})_${FIELDNAME_IN_MET_FILEDIR_NAMES}${ENSMEM_INDX:+_${ensmem_name}}" -metplus_log_fn="${metplus_config_fn}_$CDATE" -# -# If operating on observation files, append the cycle date to the name -# of the configuration file because in this case, the output files from -# METplus are not placed under cycle directories (so another method is -# necessary to associate the configuration file with the cycle for which -# it is used). -# -if [ "${FCST_OR_OBS}" = "OBS" ]; then - metplus_config_fn="${metplus_log_fn}" +if [ "${FCST_OR_OBS}" = "FCST" ]; then + suffix="${ENSMEM_INDX:+_${ensmem_name}}" +elif [ "${FCST_OR_OBS}" = "OBS" ]; then + suffix="_${OBTYPE}" fi +metplus_config_fn="${metplus_config_tmpl_fn}_$(echo_lowercase ${FCST_OR_OBS})_${FIELDNAME_IN_MET_FILEDIR_NAMES}${suffix}" +metplus_log_fn="${metplus_config_fn}_$CDATE" # # Add prefixes and suffixes (extensions) to the base file names. # diff --git a/ush/setup.py b/ush/setup.py index 8aaec0ef90..81e82cc9e6 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -656,7 +656,7 @@ def remove_tag(tasks, tag): vx_fields_all["CCPA"] = ["APCP"] vx_metatasks_all["CCPA"] = ["task_get_obs_ccpa", - "metatask_PcpCombine_obs_CCPA_all_accums", + "metatask_PcpCombine_obs_APCP_all_accums_CCPA", "metatask_PcpCombine_fcst_APCP_all_accums_all_mems", "metatask_GridStat_CCPA_all_accums_all_mems", "metatask_GenEnsProd_EnsembleStat_CCPA", @@ -664,7 +664,7 @@ def remove_tag(tasks, tag): vx_fields_all["NOHRSC"] = ["ASNOW"] vx_metatasks_all["NOHRSC"] = ["task_get_obs_nohrsc", - "metatask_PcpCombine_obs_NOHRSC_all_accums", + "metatask_PcpCombine_obs_ASNOW_all_accums_NOHRSC", "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", "metatask_GridStat_NOHRSC_all_accums_all_mems", "metatask_GenEnsProd_EnsembleStat_NOHRSC", From 057ba700fb72c75a45d3b921ecef5cb5c72179a8 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 29 Sep 2024 17:57:34 -0600 Subject: [PATCH 090/208] Make adjustments to ASNOW settings to account for the fact that for ASNOW obs, it's the netcdf files that come out of PcpCombine_obs tasks that are used for verification. --- parm/metplus/EnsembleStat.conf | 9 +-------- parm/metplus/GridStat_ensmean.conf | 9 +-------- parm/metplus/GridStat_ensprob.conf | 9 +-------- scripts/exregional_run_met_genensprod_or_ensemblestat.sh | 4 ++-- ...xregional_run_met_gridstat_or_pointstat_vx_ensprob.sh | 4 ++-- 5 files changed, 7 insertions(+), 28 deletions(-) diff --git a/parm/metplus/EnsembleStat.conf b/parm/metplus/EnsembleStat.conf index 3759d5d8a1..ce38b2d209 100644 --- a/parm/metplus/EnsembleStat.conf +++ b/parm/metplus/EnsembleStat.conf @@ -516,15 +516,8 @@ PcpCombine tool. In that file, the field name consists of the observation field name here (field_obs) with the accumulation period appended to it (separated by an underscore), so we must do the same here to get an exact match. - -Note: -Turns out for ASNOW, PcpCombine is not run for obs, so we exclude that -from the "if" clause here (so it goes into the "else"). For workflow -behavior uniformity between APCP and ASNOW, consider running PcpCombine -for ASNOW observations as well (just as it's run for APCP observations). - {%- if (input_field_group in ['APCP', 'ASNOW']) %} #} - {%- if (input_field_group in ['APCP']) %} + {%- if (input_field_group in ['APCP', 'ASNOW']) %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}}_{{accum_hh}} {%- else %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}} diff --git a/parm/metplus/GridStat_ensmean.conf b/parm/metplus/GridStat_ensmean.conf index 0cfaa707bf..21d23ac4eb 100644 --- a/parm/metplus/GridStat_ensmean.conf +++ b/parm/metplus/GridStat_ensmean.conf @@ -400,15 +400,8 @@ PcpCombine tool. In that file, the field name consists of the observation field name here (field_obs) with the accumulation period appended to it (separated by an underscore), so we must do the same here to get an exact match. - -Note: -Turns out for ASNOW, PcpCombine is not run for obs, so we exclude that -from the "if" clause here (so it goes into the "else"). For workflow -behavior uniformity between APCP and ASNOW, consider running PcpCombine -for ASNOW observations as well (just as it's run for APCP observations). - {%- if (input_field_group in ['APCP', 'ASNOW']) %} #} - {%- if (input_field_group in ['APCP']) %} + {%- if (input_field_group in ['APCP', 'ASNOW']) %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}}_{{accum_hh}} {%- else %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}} diff --git a/parm/metplus/GridStat_ensprob.conf b/parm/metplus/GridStat_ensprob.conf index 6c34eb6ba0..abde89ef4b 100644 --- a/parm/metplus/GridStat_ensprob.conf +++ b/parm/metplus/GridStat_ensprob.conf @@ -354,15 +354,8 @@ PcpCombine tool. In that file, the field name consists of the observation field name here (field_obs) with the accumulation period appended to it (separated by an underscore), so we must do the same here to get an exact match. - -Note: -Turns out for ASNOW, PcpCombine is not run for obs, so we exclude that -from the "if" clause here (so it goes into the "else"). For workflow -behavior uniformity between APCP and ASNOW, consider running PcpCombine -for ASNOW observations as well (just as it's run for APCP observations). - {%- if (input_field_group in ['APCP', 'ASNOW']) %} #} - {%- if (input_field_group in ['APCP']) %} + {%- if (input_field_group in ['APCP', 'ASNOW']) %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}}_{{accum_hh}} {%- else %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}} diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 73d98754b4..9e1d0bd390 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -140,8 +140,8 @@ if [ "${grid_or_point}" = "grid" ]; then FCST_INPUT_DIR="${vx_output_basedir}" ;; "ASNOW"*) - OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" + OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}" ;; "REFC") diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 9a8c35d1cb..6a8da3166d 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -127,8 +127,8 @@ if [ "${grid_or_point}" = "grid" ]; then OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) - OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" + OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "REFC") OBS_INPUT_DIR="${OBS_DIR}" From abf2014db39ae292d29264c9e6d592708893e8e7 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 1 Oct 2024 16:45:26 -0600 Subject: [PATCH 091/208] Clarify informational message. --- ush/mrms_pull_topofhour.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/mrms_pull_topofhour.py b/ush/mrms_pull_topofhour.py index 310c5d97f9..32f511c393 100644 --- a/ush/mrms_pull_topofhour.py +++ b/ush/mrms_pull_topofhour.py @@ -43,7 +43,7 @@ def main(): valid = datetime.datetime(YYYY, MM, DD, HH, 0, 0) valid_str = valid.strftime("%Y%m%d") - print(f"Pulling {args.valid_time} MRMS data") + print(f"Pulling MRMS product {args.product} for valid time: {args.valid_time}") # Set up working directory From 8937a8c472987d4f2f010d1ca39d5c436d29ab2a Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 1 Oct 2024 16:46:00 -0600 Subject: [PATCH 092/208] Comment out debugging lines to reduce clutter. --- ush/bash_utils/eval_METplus_timestr_tmpl.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ush/bash_utils/eval_METplus_timestr_tmpl.sh b/ush/bash_utils/eval_METplus_timestr_tmpl.sh index ae4a1c7ebf..a4421958ee 100644 --- a/ush/bash_utils/eval_METplus_timestr_tmpl.sh +++ b/ush/bash_utils/eval_METplus_timestr_tmpl.sh @@ -62,7 +62,7 @@ function eval_METplus_timestr_tmpl() { # #----------------------------------------------------------------------- # - print_input_args "valid_args" +# print_input_args "valid_args" # #----------------------------------------------------------------------- # @@ -217,7 +217,7 @@ function eval_single_METplus_timefmt() { # #----------------------------------------------------------------------- # - print_input_args "valid_args" +# print_input_args "valid_args" # #----------------------------------------------------------------------- # From 841e141853d7cd9d19c7b79d5f306ae28ceb99f4 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 2 Oct 2024 16:31:43 -0600 Subject: [PATCH 093/208] Ensure that the observation file name templates specified in the SRW App configuration file are used when running the Pb2nc task (as opposed to hard-coding file names). --- scripts/exregional_run_met_pb2nc_obs_ndas.sh | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/scripts/exregional_run_met_pb2nc_obs_ndas.sh b/scripts/exregional_run_met_pb2nc_obs_ndas.sh index 01e0362cc3..77cdb1221d 100755 --- a/scripts/exregional_run_met_pb2nc_obs_ndas.sh +++ b/scripts/exregional_run_met_pb2nc_obs_ndas.sh @@ -79,6 +79,11 @@ to convert NDAS prep buffer observation files to NetCDF format. #----------------------------------------------------------------------- # yyyymmdd_task=${PDY} + +# Seconds since some reference time that the DATE_UTIL utility uses of +# the day of the current task. This will be used below to find hours +# since the start of this day. +sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) # #----------------------------------------------------------------------- # @@ -150,8 +155,17 @@ num_missing_files=0 for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - fn="prepbufr.ndas.${yyyymmddhh}" - fp="${OBS_INPUT_DIR}/${fn}" + + # Set the full path to the final processed obs file (fp_proc) we want to + # create. + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}" \ + outvarname_evaluated_timestr="fp" + if [[ -f "${fp}" ]]; then print_info_msg " Found ${OBTYPE} obs file corresponding to observation retrieval time (yyyymmddhh): From 2e6299ff3d2d8258b0318cd38059eba40358a8f1 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 2 Oct 2024 16:44:03 -0600 Subject: [PATCH 094/208] Add argument to mrms_pull_topofhour.py that specifies whether a subdirectory having the name of the valid day (of the form "YYYYMMDD") should be assumed to exist under the specified source directory, and whether such a subdirectory should be created under the specified output directory. Previously, such a subdirectory was always assumed to exist/created; now, it is an option. --- ush/mrms_pull_topofhour.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ush/mrms_pull_topofhour.py b/ush/mrms_pull_topofhour.py index 32f511c393..3e5b5ddb6e 100644 --- a/ush/mrms_pull_topofhour.py +++ b/ush/mrms_pull_topofhour.py @@ -20,6 +20,8 @@ def main(): help='Name of MRMS product') parser.add_argument('-l', '--level', type=str, help='MRMS product level', choices=['_00.50_','_18_00.50_']) + parser.add_argument('--add_vdate_subdir', default=True, required=False, action=argparse.BooleanOptionalAction, + help='Flag to add valid-date subdirectory to source and destination directories') parser.add_argument('-d', '--debug', action='store_true', help='Add additional debug output') args = parser.parse_args() @@ -47,14 +49,18 @@ def main(): # Set up working directory - dest_dir = os.path.join(args.outdir, valid_str) + valid_str_or_empty = '' + if args.add_vdate_subdir: + valid_str_or_empty = valid_str + + dest_dir = os.path.join(args.outdir, valid_str_or_empty) if not os.path.exists(dest_dir): os.makedirs(dest_dir) # Sort list of files for each MRMS product if args.debug: print(f"Valid date: {valid_str}") - search_path = f"{args.source}/{valid_str}/{args.product}*.gz" + search_path = os.path.join(args.source, valid_str_or_empty, args.product + "*.gz") file_list = [f for f in glob.glob(search_path)] if args.debug: print(f"Files found: \n{file_list}") @@ -78,7 +84,7 @@ def main(): if difference.total_seconds() <= 900: filename1 = f"{args.product}{args.level}{closest_timestamp.strftime('%Y%m%d-%H%M%S')}.grib2.gz" filename2 = f"{args.product}{args.level}{valid.strftime('%Y%m%d-%H')}0000.grib2" - origfile = os.path.join(args.source, valid_str, filename1) + origfile = os.path.join(args.source, valid_str_or_empty, filename1) target = os.path.join(dest_dir, filename2) if args.debug: From 8eed4a267f4c619146166d9564ca71d6273ef9d6 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 2 Oct 2024 16:51:53 -0600 Subject: [PATCH 095/208] When retrieving files from different data stores (e.g. NOAA's HPSS), make sure that the get_obs tasks place the files (and name them) according to the file name templates specified for each obs type in the SRW App configuration file. Also, remove the variable basedir_proc since it is redundant. --- ush/get_obs_ccpa.sh | 114 +++++++++++++-------- ush/get_obs_mrms.sh | 196 ++++++++++++++++++++++++------------ ush/get_obs_ndas.sh | 227 ++++++++++++++++++++++++------------------ ush/get_obs_nohrsc.sh | 104 ++++++++++++------- 4 files changed, 406 insertions(+), 235 deletions(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index aabb55e5a4..d3c486c607 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -167,7 +167,7 @@ if [ ${remainder} -ne 0 ]; then The obs availability interval CCPA_OBS_AVAIL_INTVL_HRS must divide evenly into 24 but doesn't: CCPA_OBS_AVAIL_INTVL_HRS = ${CCPA_OBS_AVAIL_INTVL_HRS} - mod(24, CCPA_OBS_AVAIL_INTVL_HRS) = ${remainder}" + 24 % CCPA_OBS_AVAIL_INTVL_HRS = ${remainder}" fi # Accumulation period to use when getting obs files. This is simply (a @@ -178,11 +178,10 @@ accum_obs_fmt=$( printf "%02d" "${CCPA_OBS_AVAIL_INTVL_HRS}" ) # task's cycledefs attribute in the ROCOTO xml. yyyymmdd_task=${PDY} -# Base directory in which the daily subdirectories containing the grib2 -# obs files will appear after this script is done. We refer to this as -# the "processed" base directory because it contains the files after all -# processing by this script is complete. -basedir_proc=${OBS_DIR} +# Seconds since some reference time that the DATE_UTIL utility uses of +# the day of the current task. This will be used below to find hours +# since the start of this day. +sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) # #----------------------------------------------------------------------- # @@ -231,16 +230,26 @@ else arcv_hr_end=$(( arcv_hr_end*arcv_hr_incr )) fi -# Check whether any obs files already exist on disk. If so, adjust the -# starting archive hour. In the process, keep a count of the number of -# obs files that already exist on disk. +# Check whether any obs files already exist on disk in their processed +# (i.e. final) locations. Here, by "processed" we mean after any renaming +# and rearrangement of files that this script may do to the "raw" files, +# i.e. the files as they are named and arranged within the archive (tar) +# files on HPSS. If so, adjust the starting archive hour. In the process, +# keep a count of the number of obs files that already exist on disk. num_existing_files=0 for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - day_dir_proc="${basedir_proc}/${yyyymmdd}" - fn_proc="ccpa.t${hh}z.${accum_obs_fmt}h.hrap.conus.gb2" - fp_proc="${day_dir_proc}/${fn_proc}" + + # Set the full path to the final processed obs file (fp_proc). + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_CCPA_APCP_FN_TEMPLATE}" \ + outvarname_evaluated_timestr="fp_proc" + if [[ -f ${fp_proc} ]]; then num_existing_files=$((num_existing_files+1)) print_info_msg " @@ -304,7 +313,8 @@ fi #----------------------------------------------------------------------- # -# Whether to move or copy files from raw to processed directories. +# Whether to move the files or copy them from their raw to their processed +# locations. #mv_or_cp="mv" mv_or_cp="cp" # Whether to remove raw observations after processed directories have @@ -317,11 +327,11 @@ if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then mv_or_cp="mv" fi -# Base directory that will contain the daily subdirectories in which the -# CCPA grib2 files retrieved from archive (tar) files will be placed. -# We refer to this as the "raw" base directory because it contains files +# Base directory that will contain the archive subdirectories in which +# the files extracted from each archive (tar) file will be placed. We +# refer to this as the "raw" base directory because it contains files # as they are found in the archives before any processing by this script. -basedir_raw="${basedir_proc}/${yyyymmdd_task}/raw" +basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" for arcv_hr in ${arcv_hrs[@]}; do @@ -333,10 +343,10 @@ arcv_hr = ${arcv_hr}" yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) - # Directory that will contain the grib2 files retrieved from the current - # archive file. We refer to this as the "raw" archive directory because - # it will contain the files as they are in the archive before any processing - # by this script. + # Directory that will contain the files retrieved from the current archive + # file. We refer to this as the "raw" archive directory because it will + # contain the files as they are in the archive before any processing by + # this script. arcv_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" # Check whether any of the obs retrieval times for the day associated with @@ -378,12 +388,14 @@ The times at which obs need to be retrieved are: # The retrieve_data.py script first extracts the contents of the archive # file into the directory it was called from and then moves them to the - # specified output location (via the --output_path option). In order to - # avoid other get_obs_ccpa tasks (i.e. those associated with other days) - # from interfering with (clobbering) these files (because extracted files - # from different get_obs_ccpa tasks to have the same names or relative - # paths), we change location to the base raw directory so that files with - # same names are extracted into different directories. + # specified output location (via the --output_path option). Note that + # the relative paths of obs files within archives associted with different + # days may be the same. Thus, if files with the same archive-relative + # paths are being simultaneously extracted from multiple archive files + # (by multiple get_obs tasks), they will likely clobber each other if the + # extracton is being carried out into the same location on disk. To avoid + # this, we first change location to the raw base directory (whose name is + # obs-day dependent) and then call the retrieve_data.py script. cd ${basedir_raw} # Pull obs from HPSS. This will get all the obs files in the current @@ -402,36 +414,56 @@ The times at which obs need to be retrieved are: print_info_msg "CALLING: ${cmd}" $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." - # Create the processed CCPA grib2 files. This usually consists of just - # moving or copying the raw files to their processed location, but for - # times between 20180718 and 20210504 and hours-of-day 19 through the - # end of the day (i.e. hour 0 of the next day), it involves using wgrib2 - # to correct an error in the metadata of the raw file and writing the - # corrected data to a new grib2 file in the processed location. - for hrs_ago in $(seq 5 -1 0); do + # Loop over the raw obs files extracted from the current archive and + # generate from them the processed obs files. + # + # For CCPA obs, for most dates this consists of simply copying or moving + # the files from the raw archive directory to the processed directory, + # possibly renaming them in the process. However, for dates between + # 20180718 and 20210504 and hours-of-day 19 through the end of the day + # (i.e. hour 0 of the next day), it involves using wgrib2 to correct an + # error in the metadata of the raw file and writing the corrected data + # to a new grib2 file in the processed location. + for hrs_ago in $(seq 5 -${CCPA_OBS_AVAIL_INTVL_HRS} 0); do yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - # Create the processed grib2 obs file from the raw one (by moving, copying, - # or otherwise) only if the time of the current file in the current archive + # Create the processed obs file from the raw one (by moving, copying, or + # otherwise) only if the time of the current file in the current archive # also exists in the list of obs retrieval times for the current day. if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + + # The raw file name needs to be the same as what the retrieve_data.py + # script called above ends up retrieving. The list of possibile templates + # for this name is given in parm/data_locations.yml, but which of those + # is actually used is not known until retrieve_data.py completes. Thus, + # that information needs to be passed back by the script and used here. + # For now, we hard-code the file name here. fn_raw="ccpa.t${hh}z.${accum_obs_fmt}h.hrap.conus.gb2" fp_raw="${arcv_dir_raw}/${fn_raw}" - day_dir_proc="${basedir_proc}/${yyyymmdd}" - mkdir -p ${day_dir_proc} - fn_proc="${fn_raw}" - fp_proc="${day_dir_proc}/${fn_proc}" - hh_noZero=$((10#${hh})) + + # Set the full path to the final processed obs file (fp_proc) we want to + # create. + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_CCPA_APCP_FN_TEMPLATE}" \ + outvarname_evaluated_timestr="fp_proc" + mkdir -p $( dirname "${fp_proc}" ) + # CCPA files for 1-hour accumulation have incorrect metadata in the files # under the "00" directory from 20180718 to 20210504. After the data is # pulled, reorganize into correct yyyymmdd structure. + hh_noZero=$((10#${hh})) if [[ ${yyyymmdd} -ge 20180718 && ${yyyymmdd} -le 20210504 ]] && \ [[ (${hh_noZero} -ge 19 && ${hh_noZero} -le 23) || (${hh_noZero} -eq 0) ]]; then wgrib2 ${fp_raw} -set_date -24hr -grib ${fp_proc} -s else ${mv_or_cp} ${fp_raw} ${fp_proc} fi + fi done diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index a0d0590667..71eae52b9c 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -49,6 +49,18 @@ set -u #----------------------------------------------------------------------- # +# The time interval (in hours) at which the obs are available on HPSS +# must divide evenly into 24. Otherwise, different days would have obs +# available at different hours-of-day. Make sure this is the case. +remainder=$(( 24 % MRMS_OBS_AVAIL_INTVL_HRS )) +if [ ${remainder} -ne 0 ]; then + print_err_msg_exit "\ +The obs availability interval MRMS_OBS_AVAIL_INTVL_HRS must divide evenly +into 24 but doesn't: + MRMS_OBS_AVAIL_INTVL_HRS = ${MRMS_OBS_AVAIL_INTVL_HRS} + 24 % MRMS_OBS_AVAIL_INTVL_HRS = ${remainder}" +fi + # Create an array-valued counterpart of MRMS_FIELDS. MRMS_FIELDS is an # environment variable created in the ROCOTO XML. It is a scalar variable # because there doesn't seem to be a way to pass a bash array from the @@ -59,14 +71,17 @@ mrms_fields=($(printf "%s" "${MRMS_FIELDS}")) # corresponding to each. fields_in_filenames=() levels_in_filenames=() +obs_mrms_fp_templates=() for field in ${mrms_fields[@]}; do # Set field-dependent parameters needed in forming grib2 file names. if [ "${field}" = "REFC" ]; then fields_in_filenames+=("MergedReflectivityQCComposite") levels_in_filenames+=("00.50") + obs_mrms_fp_templates+=("${OBS_DIR}/${OBS_MRMS_REFC_FN_TEMPLATE}") elif [ "${field}" = "RETOP" ]; then fields_in_filenames+=("EchoTop") levels_in_filenames+=("18_00.50") + obs_mrms_fp_templates+=("${OBS_DIR}/${OBS_MRMS_RETOP_FN_TEMPLATE}") else print_err_msg_exit "\ Invalid field specified: @@ -79,11 +94,10 @@ done # task's cycledefs attribute in the ROCOTO xml. yyyymmdd_task=${PDY} -# Base directory in which the daily subdirectories containing the MRMS -# grib2 files will appear after this script is done. We refer to this -# as the "processed" base directory because it contains the files after -# all processing by this script is complete. -basedir_proc=${OBS_DIR} +# Seconds since some reference time that the DATE_UTIL utility uses of +# the day of the current task. This will be used below to find hours +# since the start of this day. +sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) # #----------------------------------------------------------------------- # @@ -95,45 +109,44 @@ basedir_proc=${OBS_DIR} array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) -# If there are no observation retrieval times on the day of the current -# task, exit the script. -num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} -if [[ ${num_obs_retrieve_times_crnt_day} -eq 0 ]]; then - print_info_msg " -None of the observation retrieval times fall within the day associated -with the current task (yyyymmdd_task): - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any obs files." - exit -fi - # Check whether any obs files already exist on disk. If so, adjust the # starting archive hour. In the process, keep a count of the number of # obs files that already exist on disk. num_existing_files=0 num_mrms_fields=${#mrms_fields[@]} -for (( i=0; i<${num_mrms_fields}; i++ )); do - for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do +for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do + for (( i=0; i<${num_mrms_fields}; i++ )); do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - day_dir_proc="${basedir_proc}/${yyyymmdd}" - fn_proc="${fields_in_filenames[$i]}_${levels_in_filenames[$i]}_${yyyymmdd}-${hh}0000.grib2" - fp_proc="${day_dir_proc}/${fn_proc}" + + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${obs_mrms_fp_templates[$i]}" \ + outvarname_evaluated_timestr="fp_proc" + if [[ -f ${fp_proc} ]]; then num_existing_files=$((num_existing_files+1)) print_info_msg " File already exists on disk: fp_proc = \"${fp_proc}\"" else - break + print_info_msg " +File does not exist on disk: + fp_proc = \"${fp_proc}\" +Will attempt to retrieve all obs files." + break 2 fi done done # If the number of obs files that already exist on disk is equal to the -# number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_obs_retrieve_times_crnt_day*num_mrms_fields)) -if [[ ${num_existing_files} -eq $((num_needed_files)) ]]; then +# number of obs files needed, then there is no need to retrieve any files. +num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} +if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then + print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist on disk: @@ -146,8 +159,9 @@ else At least some obs files needed needed for the current day (yyyymmdd_task) do not exist on disk: yyyymmdd_task = \"${yyyymmdd_task}\" -The number of obs files needed is: - num_needed_files = ${num_needed_files} +The number of obs files needed for the current day (which is equal to the +number of observation retrieval times for the current day) is: + num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} The number of obs files that already exist on disk is: num_existing_files = ${num_existing_files} Will retrieve remaining files. @@ -162,7 +176,8 @@ fi #----------------------------------------------------------------------- # -# Whether to move or copy files from raw to processed directories. +# Whether to move the files or copy them from their raw to their processed +# locations. #mv_or_cp="mv" mv_or_cp="cp" # Whether to remove raw observations after processed directories have @@ -175,42 +190,52 @@ if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then mv_or_cp="mv" fi -# Base directory that will contain the daily subdirectories in which the -# MRMS grib2 files retrieved from archive (tar) files will be placed. -# We refer to this as the "raw" base directory because it contains files +# Base directory that will contain the archive subdirectories in which +# the files extracted from each archive (tar) file will be placed. We +# refer to this as the "raw" base directory because it contains files # as they are found in the archives before any processing by this script. -basedir_raw="${basedir_proc}/${yyyymmdd_task}/raw" +basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" # Time associated with the archive. MRMS data have daily archives that # have the hour-of-day set to "00". yyyymmddhh_arcv="${yyyymmdd_task}00" -# Directory that will contain the MRMS grib2 files retrieved from the -# current 6-hourly archive file. We refer to this as the "raw" quarter- -# daily directory because it will contain the files as they are in the -# archive before any processing by this script. -day_dir_raw="${basedir_raw}/${yyyymmdd_task}" +# Directory that will contain the files retrieved from the current archive +# file. We refer to this as the "raw" archive directory because it will +# contain the files as they are in the archive before any processing by +# this script. +# +# Note: +# Normally, arcv_dir_raw should consist of basedir_raw and a subdirectory +# that depends on the archive date, e.g. +# +# arcv_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" +# +# but since for MRMS data there is only one archive per day, that directory +# is redundant, so simplicity we set arcv_dir_raw to just basedir_raw. +arcv_dir_raw="${basedir_raw}" -# Make sure the raw quarter-daily directory exists because it is used -# below as the output directory of the retrieve_data.py script (so if -# this directory doesn't already exist, that script will fail). Creating -# this directory also ensures that the raw base directory (basedir_raw) -# exists before we change location to it below. -mkdir -p ${day_dir_raw} +# Make sure the raw archive directory exists because it is used below as +# the output directory of the retrieve_data.py script (so if this directory +# doesn't already exist, that script will fail). Creating this directory +# also ensures that the raw base directory (basedir_raw) exists before we +# change location to it below. +mkdir -p ${arcv_dir_raw} # The retrieve_data.py script first extracts the contents of the archive # file into the directory it was called from and then moves them to the -# specified output location (via the --output_path option). In order to -# avoid other get_obs_ndas tasks (i.e. those associated with other days) -# from interfering with (clobbering) these files (because extracted files -# from different get_obs_ndas tasks to have the same names or relative -# paths), we change location to the base raw directory so that files with -# same names are extracted into different directories. +# specified output location (via the --output_path option). Note that +# the relative paths of obs files within archives associted with different +# days may be the same. Thus, if files with the same archive-relative +# paths are being simultaneously extracted from multiple archive files +# (by multiple get_obs tasks), they will likely clobber each other if the +# extracton is being carried out into the same location on disk. To avoid +# this, we first change location to the raw base directory (whose name is +# obs-day dependent) and then call the retrieve_data.py script. cd ${basedir_raw} -# Pull MRMS data from HPSS. This will get all 7 obs files in the current -# archive and place them in the raw quarter-daily directory, although we -# will make use of only 6 of these (we will not use the tm00 file). +# Pull obs from HPSS. This will get all the obs files in the current +# archive and place them in the raw archive directory. cmd=" python3 -u ${USHdir}/retrieve_data.py \ --debug \ @@ -219,7 +244,7 @@ python3 -u ${USHdir}/retrieve_data.py \ --cycle_date ${yyyymmddhh_arcv} \ --data_stores hpss \ --data_type MRMS_obs \ - --output_path ${day_dir_raw} \ + --output_path ${arcv_dir_raw} \ --summary_file retrieve_data.log" print_info_msg "CALLING: ${cmd}" @@ -233,19 +258,68 @@ $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." #----------------------------------------------------------------------- # -# Loop through all hours of the day associated with the task. For each -# hour, find the gzipped grib2 file in the raw daily directory that is -# closest in time to this hour. Then gunzip the file and copy it (in the -# process renaming it) to the processed location. -for hr in $(seq 0 1 23); do +# Loop over the raw obs files extracted from the current archive and +# generate from them the processed obs files. +# +# For MRMS obs, the raw obs consist of gzipped grib2 files that are +# usually a few minutes apart in time. However, because forecast data +# is available at most every hour, the SRW App configuration parameter +# MRMS_OBS_AVAIL_INTVL_HRS is set to 1 hour instead of a few minutes. +# Below, we loop over the whole day using this 1-hourly interval. For +# each hour of the day, we call the script mrms_pull_topofhour.py to find +# the gzipped grib2 file in the raw archive directory that is closest in +# time to the hour and unzip it in a temporary directory. We then copy +# or move it to the processed directory, possibly renaming it in the +# process. +for hr in $(seq 0 ${MRMS_OBS_AVAIL_INTVL_HRS} 23); do yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_task} ${hr} hours" +%Y%m%d%H) + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + # Create the processed obs file from the raw one (by moving, copying, or + # otherwise) only if the time of the current file in the current archive + # also exists in the list of obs retrieval times for the current day. if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then for (( i=0; i<${num_mrms_fields}; i++ )); do + + # First, select from the set of raw files for the current day those that + # are nearest in time to the current hour. Unzip these in a temporary + # subdirectory under the raw base directory. + # + # Note that the script we call to do this (mrms_pull_topofhour.py) assumes + # a certain file naming convention. That convention must match the names + # of the files that the retrieve_data.py script called above ends up + # retrieving. The list of possibile templates for these names is given + # in parm/data_locations.yml, but which of those is actually used is not + # known until retrieve_data.py completes. Thus, that information needs + # to be passed back by retrieve_data.py and then passed to mrms_pull_topofhour.py. + # For now, we hard-code the file name here. python ${USHdir}/mrms_pull_topofhour.py \ --valid_time ${yyyymmddhh} \ - --outdir ${basedir_proc} \ --source ${basedir_raw} \ - --product ${fields_in_filenames[$i]} + --outdir ${basedir_raw}/topofhour \ + --product ${fields_in_filenames[$i]} \ + --no-add_vdate_subdir + + # Set the name of and the full path to the raw obs file created by the + # mrms_pull_topofhour.py script. This name is currently hard-coded to + # the output of that script. In the future, it should be set in a more + # general way (e.g. obtain from a settings file). + fn_raw="${fields_in_filenames[$i]}_${levels_in_filenames[$i]}_${yyyymmdd_task}-${hh}0000.grib2" + fp_raw="${basedir_raw}/topofhour/${fn_raw}" + + # Set the full path to the final processed obs file (fp_proc) we want to + # create. + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${obs_mrms_fp_templates[$i]}" \ + outvarname_evaluated_timestr="fp_proc" + mkdir -p $( dirname "${fp_proc}" ) + + mv ${fp_raw} ${fp_proc} + done fi done diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index 7ab6fc652b..45338714a2 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -58,18 +58,17 @@ if [ ${remainder} -ne 0 ]; then The obs availability interval NDAS_OBS_AVAIL_INTVL_HRS must divide evenly into 24 but doesn't: NDAS_OBS_AVAIL_INTVL_HRS = ${NDAS_OBS_AVAIL_INTVL_HRS} - mod(24, NDAS_OBS_AVAIL_INTVL_HRS) = ${remainder}" + 24 % NDAS_OBS_AVAIL_INTVL_HRS = ${remainder}" fi # The day (in the form YYYMMDD) associated with the current task via the # task's cycledefs attribute in the ROCOTO xml. yyyymmdd_task=${PDY} -# Base directory in which the daily subdirectories containing the NDAS -# prepbufr files will appear after this script is done. We refer to this -# as the "processed" base directory because it contains the files after -# all processing by this script is complete. -basedir_proc=${OBS_DIR} +# Seconds since some reference time that the DATE_UTIL utility uses of +# the day of the current task. This will be used below to find hours +# since the start of this day. +sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) # #----------------------------------------------------------------------- # @@ -80,23 +79,9 @@ basedir_proc=${OBS_DIR} # array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) - - - - - -# If there are no observation retrieval times on the day of the current -# task, exit the script. -num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} -if [[ ${num_obs_retrieve_times_crnt_day} -eq 0 ]]; then - print_info_msg " -None of the observation retrieval times fall within the day associated -with the current task (yyyymmdd_task): - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any obs files." - exit -fi - +# +#----------------------------------------------------------------------- +# # Obs files will be obtained by extracting them from the relevant 6-hourly # archives. Thus, we need the sequence of archive hours over which to # loop. In the simplest case, this sequence will be "6 12 18 24". This @@ -107,32 +92,45 @@ fi # # To generate this sequence, we first set its starting and ending values # as well as the interval. +# +#----------------------------------------------------------------------- +# # Sequence interval must be 6 hours because the archives are 6-hourly. arcv_hr_incr=6 -# Initial guess for starting archive hour. This is set to the archive -# hour containing obs at the first observation retrieval time of the day. +# Initial guess for starting archive hour. This is set to the archive +# hour containing obs at the first obs retrieval time of the day. hh_first=$(echo ${obs_retrieve_times_crnt_day[0]} | cut -c9-10) hr_first=$((10#${hh_first})) arcv_hr_start=$(( (hr_first/arcv_hr_incr + 1)*arcv_hr_incr )) # Ending archive hour. This is set to the archive hour containing obs at -# the last observation retrieval time of the day. +# the last obs retrieval time of the day. hh_last=$(echo ${obs_retrieve_times_crnt_day[-1]} | cut -c9-10) hr_last=$((10#${hh_last})) arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) -# Check whether any obs files already exist on disk. If so, adjust the -# starting archive hour. In the process, keep a count of the number of -# obs files that already exist on disk. +# Check whether any obs files already exist on disk in their processed +# (i.e. final) locations. Here, by "processed" we mean after any renaming +# and rearrangement of files that this script may do to the "raw" files, +# i.e. the files as they are named and arranged within the archive (tar) +# files on HPSS. If so, adjust the starting archive hour. In the process, +# keep a count of the number of obs files that already exist on disk. num_existing_files=0 for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - day_dir_proc="${basedir_proc}" - fn_proc="prepbufr.ndas.${yyyymmddhh}" - fp_proc="${day_dir_proc}/${fn_proc}" + + # Set the full path to the final processed obs file (fp_proc). + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}" \ + outvarname_evaluated_timestr="fp_proc" + if [[ -f ${fp_proc} ]]; then num_existing_files=$((num_existing_files+1)) print_info_msg " @@ -151,32 +149,39 @@ Setting the hour (since 00) of the first archive to retrieve to: done # If the number of obs files that already exist on disk is equal to the -# number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_obs_retrieve_times_crnt_day)) -if [[ ${num_existing_files} -eq ${num_needed_files} ]]; then +# number of obs files needed, then there is no need to retrieve any files. +num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} +if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then + print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist on disk: yyyymmdd_task = \"${yyyymmdd_task}\" Thus, there is no need to retrieve any files." exit -# Otherwise, will need to retrieve files. In this case, set the sequence -# of hours corresponding to the archives from which files will be retrieved. + +# If the number of obs files that already exist on disk is not equal to +# the number of obs files needed, then we will need to retrieve files. +# In this case, set the sequence of hours corresponding to the archives +# from which files will be retrieved. else + arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" print_info_msg " At least some obs files needed needed for the current day (yyyymmdd_task) do not exist on disk: yyyymmdd_task = \"${yyyymmdd_task}\" -The number of obs files needed is: - num_needed_files = ${num_needed_files} +The number of obs files needed for the current day (which is equal to the +number of observation retrieval times for the current day) is: + num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} The number of obs files that already exist on disk is: num_existing_files = ${num_existing_files} Will retrieve remaining files by looping over archives corresponding to the following hours (since 00 of this day): arcv_hrs = ${arcv_hrs_str} " + fi # #----------------------------------------------------------------------- @@ -188,7 +193,8 @@ fi #----------------------------------------------------------------------- # -# Whether to move or copy files from raw to processed directories. +# Whether to move the files or copy them from their raw to their processed +# locations. #mv_or_cp="mv" mv_or_cp="cp" # Whether to remove raw observations after processed directories have @@ -201,11 +207,11 @@ if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then mv_or_cp="mv" fi -# Base directory that will contain the daily subdirectories in which the -# NDAS prepbufr files retrieved from archive (tar) files will be placed. -# We refer to this as the "raw" base directory because it contains files +# Base directory that will contain the archive subdirectories in which +# the files extracted from each archive (tar) file will be placed. We +# refer to this as the "raw" base directory because it contains files # as they are found in the archives before any processing by this script. -basedir_raw="${basedir_proc}/raw_${yyyymmdd_task}" +basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" for arcv_hr in ${arcv_hrs[@]}; do @@ -217,51 +223,65 @@ arcv_hr = ${arcv_hr}" yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) - # Directory that will contain the NDAS prepbufr files retrieved from the - # current 6-hourly archive file. We refer to this as the "raw" quarter- - # daily directory because it will contain the files as they are in the - # archive before any processing by this script. - qrtrday_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" + # Directory that will contain the files retrieved from the current archive + # file. We refer to this as the "raw" archive directory because it will + # contain the files as they are in the archive before any processing by + # this script. + arcv_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" - # Check whether any of the observation retrieval times for the day - # associated with this task fall in the time interval spanned by the - # current archive. If so, set the flag (do_retrieve) to retrieve the - # files in the current + # Check whether any of the obs retrieval times for the day associated with + # this task fall in the time interval spanned by the current archive. If + # so, set the flag (do_retrieve) to retrieve the files in the current # archive. - yyyymmddhh_qrtrday_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 6 hours ago" +%Y%m%d%H) - yyyymmddhh_qrtrday_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 1 hours ago" +%Y%m%d%H) + arcv_contents_yyyymmddhh_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 6 hours ago" +%Y%m%d%H) + arcv_contents_yyyymmddhh_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 1 hours ago" +%Y%m%d%H) do_retrieve="FALSE" for (( i=0; i<${num_obs_retrieve_times_crnt_day}; i++ )); do - retrieve_time=${obs_retrieve_times_crnt_day[i]} - if [[ "${retrieve_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ - [[ "${retrieve_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then + obs_retrieve_time=${obs_retrieve_times_crnt_day[i]} + if [[ "${obs_retrieve_time}" -ge "${arcv_contents_yyyymmddhh_start}" ]] && \ + [[ "${obs_retrieve_time}" -le "${arcv_contents_yyyymmddhh_end}" ]]; then do_retrieve="TRUE" break fi done - if [[ $(boolify "${do_retrieve}") == "TRUE" ]]; then + if [[ $(boolify "${do_retrieve}") != "TRUE" ]]; then + + print_info_msg " +None of the current day's observation retrieval times fall in the range +spanned by the current ${arcv_hr_incr}-hourly archive file. The bounds of the current +archive are: + arcv_contents_yyyymmddhh_start = \"${arcv_contents_yyyymmddhh_start}\" + arcv_contents_yyyymmddhh_end = \"${arcv_contents_yyyymmddhh_end}\" +The times at which obs need to be retrieved are: + obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" + + else - # Make sure the raw quarter-daily directory exists because it is used - # below as the output directory of the retrieve_data.py script (so if - # this directory doesn't already exist, that script will fail). Creating - # this directory also ensures that the raw base directory (basedir_raw) - # exists before we change location to it below. - mkdir -p ${qrtrday_dir_raw} + # Make sure the raw archive directory exists because it is used below as + # the output directory of the retrieve_data.py script (so if this directory + # doesn't already exist, that script will fail). Creating this directory + # also ensures that the raw base directory (basedir_raw) exists before we + # change location to it below. + mkdir -p ${arcv_dir_raw} # The retrieve_data.py script first extracts the contents of the archive # file into the directory it was called from and then moves them to the - # specified output location (via the --output_path option). In order to - # avoid other get_obs_ndas tasks (i.e. those associated with other days) - # from interfering with (clobbering) these files (because extracted files - # from different get_obs_ndas tasks to have the same names or relative - # paths), we change location to the base raw directory so that files with - # same names are extracted into different directories. + # specified output location (via the --output_path option). Note that + # the relative paths of obs files within archives associted with different + # days may be the same. Thus, if files with the same archive-relative + # paths are being simultaneously extracted from multiple archive files + # (by multiple get_obs tasks), they will likely clobber each other if the + # extracton is being carried out into the same location on disk. To avoid + # this, we first change location to the raw base directory (whose name is + # obs-day dependent) and then call the retrieve_data.py script. cd ${basedir_raw} - # Pull NDAS data from HPSS. This will get all 7 obs files in the current - # archive and place them in the raw quarter-daily directory, although we - # will make use of only 6 of these (we will not use the tm00 file). + # Pull obs from HPSS. This will get all the obs files in the current + # archive and place them in the raw archive directory. + # Note that for the specific case of NDAS obs, this will get all 7 obs + # files in the current archive, although we will make use of only 6 of + # these (we will not use the tm00 file). cmd=" python3 -u ${USHdir}/retrieve_data.py \ --debug \ @@ -270,43 +290,56 @@ arcv_hr = ${arcv_hr}" --cycle_date ${yyyymmddhh_arcv} \ --data_stores hpss \ --data_type NDAS_obs \ - --output_path ${qrtrday_dir_raw} \ + --output_path ${arcv_dir_raw} \ --summary_file retrieve_data.log" print_info_msg "CALLING: ${cmd}" $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." - # Create the processed NDAS prepbufr files. This consists of simply - # copying or moving (and in the process renaming) them from the raw - # quarter-daily directory to the processed directory. Note that the - # tm06 files contain more/better observations than tm00 for the - # equivalent time, so we use those. - for hrs_ago in $(seq --format="%02g" 6 -1 1); do + # Loop over the raw obs files extracted from the current archive and + # generate from them the processed obs files. + # + # For NDAS obs, this consists of simply copying or moving the files from + # the raw archive directory to the processed directory, possibly renaming + # them in the process. + # + # Note that the tm06 file in a given archive contain more/better observations + # than the tm00 file in the next archive (their valid times are equivalent), + # so we use the tm06 files. + for hrs_ago in $(seq --format="%02g" 6 -${NDAS_OBS_AVAIL_INTVL_HRS} 1); do yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) + # Create the processed obs file from the raw one (by moving, copying, or + # otherwise) only if the time of the current file in the current archive + # also exists in the list of obs retrieval times for the current day. if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + + # The raw file name needs to be the same as what the retrieve_data.py + # script called above ends up retrieving. The list of possibile templates + # for this name is given in parm/data_locations.yml, but which of those + # is actually used is not known until retrieve_data.py completes. Thus, + # that information needs to be passed back by the script and used here. + # For now, we hard-code the file name here. fn_raw="nam.t${hh_arcv}z.prepbufr.tm${hrs_ago}.nr" - fp_raw="${qrtrday_dir_raw}/${fn_raw}" - day_dir_proc="${basedir_proc}" - mkdir -p ${day_dir_proc} - fn_proc="prepbufr.ndas.${yyyymmddhh}" - fp_proc="${day_dir_proc}/${fn_proc}" + fp_raw="${arcv_dir_raw}/${fn_raw}" + + # Set the full path to the final processed obs file (fp_proc) we want to + # create. + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}" \ + outvarname_evaluated_timestr="fp_proc" + mkdir -p $( dirname "${fp_proc}" ) + ${mv_or_cp} ${fp_raw} ${fp_proc} + fi done - else - - print_info_msg " -None of the current day's observation retrieval times fall in the range -spanned by the current 6-hourly archive file. The bounds of the current -archive are: - yyyymmddhh_qrtrday_start = \"${yyyymmddhh_qrtrday_start}\" - yyyymmddhh_qrtrday_end = \"${yyyymmddhh_qrtrday_end}\" -The observation retrieval times are: - obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" - fi done diff --git a/ush/get_obs_nohrsc.sh b/ush/get_obs_nohrsc.sh index c71266ed07..5c56f8a8df 100755 --- a/ush/get_obs_nohrsc.sh +++ b/ush/get_obs_nohrsc.sh @@ -55,7 +55,7 @@ if [ ${remainder} -ne 0 ]; then The obs availability interval NOHRSC_OBS_AVAIL_INTVL_HRS must divide evenly into 24 but doesn't: NOHRSC_OBS_AVAIL_INTVL_HRS = ${NOHRSC_OBS_AVAIL_INTVL_HRS} - mod(24, NOHRSC_OBS_AVAIL_INTVL_HRS) = ${remainder}" + 24 % NOHRSC_OBS_AVAIL_INTVL_HRS = ${remainder}" fi # Accumulation period to use when getting obs files. This is simply (a @@ -66,11 +66,10 @@ accum_obs_fmt=$( printf "%d" "${NOHRSC_OBS_AVAIL_INTVL_HRS}" ) # task's cycledefs attribute in the ROCOTO xml. yyyymmdd_task=${PDY} -# Base directory in which the daily subdirectories containing the grib2 -# obs files will appear after this script is done. We refer to this as -# the "processed" base directory because it contains the files after all -# processing by this script is complete. -basedir_proc=${OBS_DIR} +# Seconds since some reference time that the DATE_UTIL utility uses of +# the day of the current task. This will be used below to find hours +# since the start of this day. +sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) # #----------------------------------------------------------------------- # @@ -119,16 +118,26 @@ else arcv_hr_end=$(( arcv_hr_end*arcv_hr_incr )) fi -# Check whether any obs files already exist on disk. If so, adjust the -# starting archive hour. In the process, keep a count of the number of -# obs files that already exist on disk. +# Check whether any obs files already exist on disk in their processed +# (i.e. final) locations. Here, by "processed" we mean after any renaming +# and rearrangement of files that this script may do to the "raw" files, +# i.e. the files as they are named and arranged within the archive (tar) +# files on HPSS. If so, adjust the starting archive hour. In the process, +# keep a count of the number of obs files that already exist on disk. num_existing_files=0 for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - day_dir_proc="${basedir_proc}" - fn_proc="sfav2_CONUS_${accum_obs_fmt}h_${yyyymmddhh}_grid184.grb2" - fp_proc="${day_dir_proc}/${fn_proc}" + + # Set the full path to the final processed obs file (fp_proc). + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" \ + outvarname_evaluated_timestr="fp_proc" + if [[ -f ${fp_proc} ]]; then num_existing_files=$((num_existing_files+1)) print_info_msg " @@ -205,7 +214,8 @@ fi #----------------------------------------------------------------------- # -# Whether to move or copy files from raw to processed directories. +# Whether to move the files or copy them from their raw to their processed +# locations. #mv_or_cp="mv" mv_or_cp="cp" # Whether to remove raw observations after processed directories have @@ -218,11 +228,11 @@ if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then mv_or_cp="mv" fi -# Base directory that will contain the daily subdirectories in which the -# NOHRSC grib2 files retrieved from archive (tar) files will be placed. -# We refer to this as the "raw" base directory because it contains files +# Base directory that will contain the archive subdirectories in which +# the files extracted from each archive (tar) file will be placed. We +# refer to this as the "raw" base directory because it contains files # as they are found in the archives before any processing by this script. -basedir_raw="${basedir_proc}/raw_${yyyymmdd_task}" +basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" for arcv_hr in ${arcv_hrs[@]}; do @@ -234,10 +244,10 @@ arcv_hr = ${arcv_hr}" yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) - # Directory that will contain the grib2 files retrieved from the current - # archive file. We refer to this as the "raw" archive directory because - # it will contain the files as they are in the archive before any processing - # by this script. + # Directory that will contain the files retrieved from the current archive + # file. We refer to this as the "raw" archive directory because it will + # contain the files as they are in the archive before any processing by + # this script. arcv_dir_raw="${basedir_raw}/${yyyymmdd_arcv}" # Check whether any of the obs retrieval times for the day associated with @@ -279,12 +289,14 @@ The times at which obs need to be retrieved are: # The retrieve_data.py script first extracts the contents of the archive # file into the directory it was called from and then moves them to the - # specified output location (via the --output_path option). In order to - # avoid other get_obs_ccpa tasks (i.e. those associated with other days) - # from interfering with (clobbering) these files (because extracted files - # from different get_obs_ccpa tasks to have the same names or relative - # paths), we change location to the base raw directory so that files with - # same names are extracted into different directories. + # specified output location (via the --output_path option). Note that + # the relative paths of obs files within archives associted with different + # days may be the same. Thus, if files with the same archive-relative + # paths are being simultaneously extracted from multiple archive files + # (by multiple get_obs tasks), they will likely clobber each other if the + # extracton is being carried out into the same location on disk. To avoid + # this, we first change location to the raw base directory (whose name is + # obs-day dependent) and then call the retrieve_data.py script. cd ${basedir_raw} # Pull obs from HPSS. This will get all the obs files in the current @@ -303,23 +315,43 @@ The times at which obs need to be retrieved are: print_info_msg "CALLING: ${cmd}" $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." - # Create the processed NOHRSC grib2 files. This consists of simply copying - # or moving them from the raw daily directory to the processed directory. - for hrs in $(seq 0 6 18); do + # Loop over the raw obs files extracted from the current archive and + # generate from them the processed obs files. + # + # For NOHRSC obs, this consists of simply copying or moving the files from + # the raw archive directory to the processed directory, possibly renaming + # them in the process. + for hrs in $(seq 0 ${NOHRSC_OBS_AVAIL_INTVL_HRS} 23); do yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs} hours" +%Y%m%d%H) yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - # Create the processed grib2 obs file from the raw one (by moving, copying, - # or otherwise) only if the time of the current file in the current archive + # Create the processed obs file from the raw one (by moving, copying, or + # otherwise) only if the time of the current file in the current archive # also exists in the list of obs retrieval times for the current day. if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + + # The raw file name needs to be the same as what the retrieve_data.py + # script called above ends up retrieving. The list of possibile templates + # for this name is given in parm/data_locations.yml, but which of those + # is actually used is not known until retrieve_data.py completes. Thus, + # that information needs to be passed back by the script and used here. + # For now, we hard-code the file name here. fn_raw="sfav2_CONUS_${accum_obs_fmt}h_${yyyymmddhh}_grid184.grb2" fp_raw="${arcv_dir_raw}/${fn_raw}" - day_dir_proc="${basedir_proc}" - mkdir -p ${day_dir_proc} - fn_proc="${fn_raw}" - fp_proc="${day_dir_proc}/${fn_proc}" + + # Set the full path to the final processed obs file (fp_proc) we want to + # create. + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" \ + outvarname_evaluated_timestr="fp_proc" + mkdir -p $( dirname "${fp_proc}" ) + ${mv_or_cp} ${fp_raw} ${fp_proc} + fi done From 2357cd3d041ea005ad88f4451c9b963e11d4e243 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 3 Oct 2024 03:00:52 -0600 Subject: [PATCH 096/208] Fix bug introduced in previous set of changes. --- ush/get_obs_mrms.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index 71eae52b9c..d13e374620 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -116,6 +116,7 @@ num_existing_files=0 num_mrms_fields=${#mrms_fields[@]} for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do for (( i=0; i<${num_mrms_fields}; i++ )); do + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) @@ -139,13 +140,16 @@ File does not exist on disk: Will attempt to retrieve all obs files." break 2 fi + done done # If the number of obs files that already exist on disk is equal to the -# number of obs files needed, then there is no need to retrieve any files. +# number of obs files needed (which is num_mrms_fields times the number +# of obs retrieval times in the current day), then there is no need to +# retrieve any files. num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} -if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then +if [[ ${num_existing_files} -eq $((num_mrms_fields*num_obs_retrieve_times_crnt_day)) ]]; then print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist From f36a86628ba8f2949fb3c83b6e5b4acd445eeca9 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 3 Oct 2024 03:02:15 -0600 Subject: [PATCH 097/208] Modify existing vx WE2E test so it is able to find the staged NOHRSC obs file. --- .../config.MET_ensemble_verification_winter_wx.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml index 85a515f293..1845255f54 100644 --- a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml @@ -32,3 +32,6 @@ global: NUM_ENS_MEMBERS: 10 verification: VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] + OBS_NOHRSC_ASNOW_FN_TEMPLATE: '{%- set data_intvl_hrs = "%02d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "{valid?fmt=%Y%m%d}/sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' + From 6ba25960f3239b8b51706366dc0a64561f2474b8 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 6 Oct 2024 12:27:46 -0600 Subject: [PATCH 098/208] Bug fix. --- scripts/exregional_run_met_pb2nc_obs_ndas.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/exregional_run_met_pb2nc_obs_ndas.sh b/scripts/exregional_run_met_pb2nc_obs_ndas.sh index 77cdb1221d..55244b7fdf 100755 --- a/scripts/exregional_run_met_pb2nc_obs_ndas.sh +++ b/scripts/exregional_run_met_pb2nc_obs_ndas.sh @@ -192,9 +192,9 @@ done if [ "${num_missing_files}" -gt "${NUM_MISSING_OBS_FILES_MAX}" ]; then print_err_msg_exit "\ The number of missing ${OBTYPE} obs files (num_missing_files) is greater -than the maximum allowed number (num_missing_files_max): +than the maximum allowed number (NUM_MISSING_FILES_MAX): num_missing_files = ${num_missing_files} - num_missing_files_max = ${num_missing_files_max}" + NUM_MISSING_OBS_FILES_MAX = ${NUM_MISSING_OBS_FILES_MAX}" fi # Remove leading comma from LEADHR_LIST. From e3a05bf92a7a4fc05e85662b7f5585b2eec74945 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 6 Oct 2024 12:31:24 -0600 Subject: [PATCH 099/208] Change config variable name for clarity. --- scripts/exregional_run_met_genensprod_or_ensemblestat.sh | 2 +- scripts/exregional_run_met_gridstat_or_pointstat_vx.sh | 2 +- ...exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh | 2 +- ...exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh | 2 +- scripts/exregional_run_met_pb2nc_obs_ndas.sh | 6 +++--- ush/config_defaults.yaml | 8 ++++---- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 9e1d0bd390..67ae70c8b9 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -159,7 +159,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" fi diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index aca2795018..e16b06cb46 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -200,7 +200,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" FCST_INPUT_FN_TEMPLATE="${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE}" diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index 5ad0560f28..adecb68bcd 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -144,7 +144,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/metprd/GenEnsProd" fi diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 6a8da3166d..2c27a9a597 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -143,7 +143,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" fi OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_INPUT_FN_TEMPLATE} ) diff --git a/scripts/exregional_run_met_pb2nc_obs_ndas.sh b/scripts/exregional_run_met_pb2nc_obs_ndas.sh index 55244b7fdf..e93387ed0a 100755 --- a/scripts/exregional_run_met_pb2nc_obs_ndas.sh +++ b/scripts/exregional_run_met_pb2nc_obs_ndas.sh @@ -136,11 +136,11 @@ set_vx_params \ vx_output_basedir=$( eval echo "${VX_OUTPUT_BASEDIR}" ) OBS_INPUT_DIR="${OBS_DIR}" -OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE} ) +OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE} ) OUTPUT_BASE="${vx_output_basedir}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" -OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT} ) +OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT} ) STAGING_DIR="${OUTPUT_BASE}/stage/${MetplusToolName}_obs" # #----------------------------------------------------------------------- @@ -163,7 +163,7 @@ for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do eval_METplus_timestr_tmpl \ init_time="${yyyymmdd_task}00" \ fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE}" \ outvarname_evaluated_timestr="fp" if [[ -f "${fp}" ]]; then diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 56bd15b814..8a02964cc2 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2448,7 +2448,7 @@ verification: # OBS_MRMS_RETOP_FN_TEMPLATE: # File name template for MRMS echo top observations. # - # OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE: + # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE: # File name template for NDAS surface and upper air observations. # This template is used by the workflow tasks that call the METplus Pb2nc # tool on NDAS obs to find the input observation files containing ADP @@ -2461,7 +2461,7 @@ verification: {{- "sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' OBS_MRMS_REFC_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' OBS_MRMS_RETOP_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' - OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE: 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' + OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE: 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' # # OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: # Template used to specify the names of the output NetCDF observation @@ -2470,7 +2470,7 @@ verification: # APCP, both for 1 hour and for > 1 hour accumulation periods, in NetCDF # format.) # - # OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: + # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: # Template used to specify the names of the output NetCDF observation # files generated by the worfklow verification tasks that call the # METplus Pb2nc tool on NDAS observations. (These files will contain @@ -2480,7 +2480,7 @@ verification: {{- "ccpa.t{valid?fmt=%H}z." ~ data_intvl_hrs ~ "h.hrap.conus.gb2_a${ACCUM_HH}h.nc" }}' OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set data_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} {{- "sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2_a${ACCUM_HH}h.nc" }}' - OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}.nc' + OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE}.nc' # # VX_FCST_MODEL_NAME: # String that specifies a descriptive name for the model being verified. From 84c54ba7ae450e0c9504ff11eeb944e24847cf1f Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 6 Oct 2024 16:37:40 -0600 Subject: [PATCH 100/208] Use new python script to get any of the allowed kinds of obs instead of the four separate shell scripts. --- scripts/exregional_get_verif_obs.sh | 102 +-- ush/get_obs.py | 885 +++++++++++++++++++++++++++ ush/get_obs_ccpa.sh | 484 --------------- ush/get_obs_mrms.sh | 341 ----------- ush/get_obs_ndas.sh | 357 ----------- ush/get_obs_nohrsc.sh | 372 ----------- ush/run_eval_METplus_timestr_tmpl.sh | 28 + 7 files changed, 926 insertions(+), 1643 deletions(-) create mode 100644 ush/get_obs.py delete mode 100755 ush/get_obs_ccpa.sh delete mode 100755 ush/get_obs_mrms.sh delete mode 100755 ush/get_obs_ndas.sh delete mode 100755 ush/get_obs_nohrsc.sh create mode 100755 ush/run_eval_METplus_timestr_tmpl.sh diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 4e981b3958..158218889e 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -46,92 +46,8 @@ done # #----------------------------------------------------------------------- # -# This script performs several important tasks for preparing data for -# verification tasks. Depending on the value of the environment variable -# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data -# set. -# -# If data is not available on disk (in the location specified by -# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), -# the script attempts to retrieve the data from HPSS using the retrieve_data.py -# script. Depending on the data set, there are a few strange quirks and/or -# bugs in the way data is organized; see in-line comments for details. -# -# -# CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {CCPA_OBS_DIR}/{YYYYMMDD}/ccpa.t{HH}z.01h.hrap.conus.gb2 -# -# If data is retrieved from HPSS, it will be automatically staged by this -# script. -# -# Notes about the data and how it's used for verification: -# -# 1. Accumulation is currently hardcoded to 01h. The verification will -# use MET/pcp-combine to sum 01h files into desired accumulations. -# -# 2. There is a problem with the valid time in the metadata for files -# valid from 19 - 00 UTC (or files under the '00' directory). This is -# accounted for in this script for data retrieved from HPSS, but if you -# have manually staged data on disk you should be sure this is accounted -# for. See in-line comments below for details. -# -# -# MRMS (Multi-Radar Multi-Sensor) radar observations -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {MRMS_OBS_DIR}/{YYYYMMDD}/[PREFIX]{YYYYMMDD}-{HH}0000.grib2, -# -# Where [PREFIX] is MergedReflectivityQCComposite_00.50_ for reflectivity -# data and EchoTop_18_00.50_ for echo top data. If data is not available -# at the top of the hour, you should rename the file closest in time to -# your hour(s) of interest to the above naming format. A script -# "ush/mrms_pull_topofhour.py" is provided for this purpose. -# -# If data is retrieved from HPSS, it will automatically staged by this -# this script. -# -# -# NDAS (NAM Data Assimilation System) conventional observations -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {NDAS_OBS_DIR}/{YYYYMMDD}/prepbufr.ndas.{YYYYMMDDHH} -# -# Note that data retrieved from HPSS and other sources may be in a -# different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is -# either 00, 06, 12, or 18, and prevhour is the number of hours prior to -# hh (00 through 05). If using custom staged data, you will have to -# rename the files accordingly. -# -# If data is retrieved from HPSS, it will be automatically staged by this -# this script. -# -# -# NOHRSC snow accumulation observations -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {NOHRSC_OBS_DIR}/{YYYYMMDD}/sfav2_CONUS_{AA}h_{YYYYMMDD}{HH}_grid184.grb2 -# -# where AA is the 2-digit accumulation duration in hours: 06 or 24 -# -# METplus is configured to verify snowfall using 06- and 24-h accumulated -# snowfall from 6- and 12-hourly NOHRSC files, respectively. -# -# If data is retrieved from HPSS, it will automatically staged by this -# this script. +# Make sure the obs type is valid. Then call the python script get_obs.py +# to get the obs files. # #----------------------------------------------------------------------- # @@ -144,8 +60,15 @@ Valid observation types are: $(printf "\"%s\" " ${valid_obtypes[@]}) " fi -script_bn="get_obs_$(echo_lowercase ${OBTYPE})" -$USHdir/${script_bn}.sh + +script_bn="get_obs" +cmd="\ +python3 -u ${USHdir}/${script_bn}.py \ +--var_defns_path "${GLOBAL_VAR_DEFNS_FP}" \ +--obtype ${OBTYPE} \ +--obs_day ${PDY}" +print_info_msg "CALLING: ${cmd}" +${cmd} || print_err_msg_exit "Error calling ${script_bn}.py." # #----------------------------------------------------------------------- # @@ -155,7 +78,8 @@ $USHdir/${script_bn}.sh #----------------------------------------------------------------------- # mkdir -p ${WFLOW_FLAG_FILES_DIR} -touch "${WFLOW_FLAG_FILES_DIR}/${script_bn}_${PDY}_complete.txt" +file_bn="get_obs_$(echo_lowercase ${OBTYPE})" +touch "${WFLOW_FLAG_FILES_DIR}/${file_bn}_${PDY}_complete.txt" # #----------------------------------------------------------------------- # diff --git a/ush/get_obs.py b/ush/get_obs.py new file mode 100644 index 0000000000..f6e2fed265 --- /dev/null +++ b/ush/get_obs.py @@ -0,0 +1,885 @@ +#!/usr/bin/env python3 + +import os +import sys +import shutil +import argparse +import logging +from pathlib import Path +import datetime as dt +from textwrap import dedent +from pprint import pprint +from math import ceil, floor +import subprocess +from python_utils import ( + load_yaml_config, +) + + +def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): + """ + This file defines a function that, for the given observation type, obs + archive interval, and hour of day, returns the hour (counting from the + start of the day) corresponding to the archive file in which the obs file + for the given hour of day is included. + + Note that for cumulative fields (like CCPA and NOHRSC, as opposed to + instantaneous ones like MRMS and NDAS), the archive files corresponding + to hour 0 of the day represent accumulations over the previous day. Thus, + here, we never return an achive hour of 0 for cumulative fields. Instead, + if the specified hour-of-day is 0, we consider that to represent the 0th + hour of the NEXT day (i.e. the 24th hour of the current day) and set the + archive hour to 24. + + Args: + obtype: + The observation type. A string. + + arcv_intvl_hrs: + Time interval (in hours) between archive files. An integer. For example, + if the obs files are bundled into 6-hourly archives, then this will be + set to 6. This must be between 1 and 24 and must divide evenly into 24 + (this is checked for elsewhere). + + hod: + The hour of the day. An integer. This must be between 0 and 23. For + cumulative fields (CCPA and NOHRSC), hour 0 is treated as that of the + next day, i.e. as the 24th hour of the current day. + + Returns: + arcv_hr: + The hour since the start of day corresponding to the archive file containing + the obs file for the given hour of day. An integer. + """ + + valid_obtypes = ['CCPA', 'ccpa', 'NOHRSC', 'nohrsc', 'MRMS', 'mrms', 'NDAS', 'ndas'] + if obtype not in valid_obtypes: + msg = dedent(f""" + The specified observation type is not supported: + obtype = {obtype} + Valid observation types are: + {valid_obtypes} + """) + logging.error(msg) + raise Exception(msg) + + if (hod < 0) or (hod > 23): + msg = dedent(f""" + The specified hour-of-day must be between 0 and 23, inclusive but isn't: + hod = {hod} + """) + logging.error(msg) + raise Exception(msg) + + obtype_upper = obtype.upper() + if obtype_upper in ['CCPA']: + if hod == 0: + arcv_hr = 24 + else: + arcv_hr = ceil(hod/arcv_intvl_hrs)*arcv_intvl_hrs + elif obtype_upper in ['NOHRSC']: + if hod == 0: + arcv_hr = 24 + else: + arcv_hr = floor(hod/arcv_intvl_hrs)*arcv_intvl_hrs + elif obtype_upper in ['MRMS']: + arcv_hr = (floor(hod/arcv_intvl_hrs))*arcv_intvl_hrs + elif obtype_upper in ['NDAS']: + arcv_hr = (floor(hod/arcv_intvl_hrs) + 1)*arcv_intvl_hrs + + return arcv_hr + + +def get_obs(config, obtype, yyyymmdd_task): + """ +This script performs several important tasks for preparing data for +verification tasks. Depending on the value of the environment variable +OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data +set. + +If data is not available on disk (in the location specified by +CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), +the script attempts to retrieve the data from HPSS using the retrieve_data.py +script. Depending on the data set, there are a few strange quirks and/or +bugs in the way data is organized; see in-line comments for details. + + +CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs +---------- +If data is available on disk, it must be in the following +directory structure and file name conventions expected by verification +tasks: + +{CCPA_OBS_DIR}/{YYYYMMDD}/ccpa.t{HH}z.01h.hrap.conus.gb2 + +If data is retrieved from HPSS, it will be automatically staged by this +script. + +Notes about the data and how it's used for verification: + +1. Accumulation is currently hardcoded to 01h. The verification will +use MET/pcp-combine to sum 01h files into desired accumulations. + +2. There is a problem with the valid time in the metadata for files +valid from 19 - 00 UTC (or files under the '00' directory). This is +accounted for in this script for data retrieved from HPSS, but if you +have manually staged data on disk you should be sure this is accounted +for. See in-line comments below for details. + + +MRMS (Multi-Radar Multi-Sensor) radar observations +---------- +If data is available on disk, it must be in the following +directory structure and file name conventions expected by verification +tasks: + +{MRMS_OBS_DIR}/{YYYYMMDD}/[PREFIX]{YYYYMMDD}-{HH}0000.grib2, + +Where [PREFIX] is MergedReflectivityQCComposite_00.50_ for reflectivity +data and EchoTop_18_00.50_ for echo top data. If data is not available +at the top of the hour, you should rename the file closest in time to +your hour(s) of interest to the above naming format. A script +"ush/mrms_pull_topofhour.py" is provided for this purpose. + +If data is retrieved from HPSS, it will automatically staged by this +this script. + + +NDAS (NAM Data Assimilation System) conventional observations +---------- +If data is available on disk, it must be in the following +directory structure and file name conventions expected by verification +tasks: + +{NDAS_OBS_DIR}/{YYYYMMDD}/prepbufr.ndas.{YYYYMMDDHH} + +Note that data retrieved from HPSS and other sources may be in a +different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is +either 00, 06, 12, or 18, and prevhour is the number of hours prior to +hh (00 through 05). If using custom staged data, you will have to +rename the files accordingly. + +If data is retrieved from HPSS, it will be automatically staged by this +this script. + + +NOHRSC snow accumulation observations +---------- +If data is available on disk, it must be in the following +directory structure and file name conventions expected by verification +tasks: + +{NOHRSC_OBS_DIR}/{YYYYMMDD}/sfav2_CONUS_{AA}h_{YYYYMMDD}{HH}_grid184.grb2 + +where AA is the 2-digit accumulation duration in hours: 06 or 24 + +METplus is configured to verify snowfall using 06- and 24-h accumulated +snowfall from 6- and 12-hourly NOHRSC files, respectively. + +If data is retrieved from HPSS, it will automatically staged by this +this script. + """ + + # Convert obtype to upper case to simplify code below. + obtype = obtype.upper() + + # For convenience, get the verification portion of the configuration + # dictionary. + vx_config = cfg['verification'] + + # Get the time interval (in hours) at which the obs are available. + key = obtype + '_OBS_AVAIL_INTVL_HRS' + obs_avail_intvl_hrs = vx_config[key] + + # The obs availability inerval must divide evenly into 24 hours. Otherwise, + # different days would have obs available at different hours-of-day. Make + # sure this is the case. + remainder = 24 % obs_avail_intvl_hrs + if remainder != 0: + msg = dedent(f""" + The obs availability interval for obs of type {obtype} must divide evenly + into 24 but doesn't: + obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + 24 % obs_avail_intvl_hrs = {remainder} + """) + raise Exception(msg) + + # For convenience, get obs availability interval as a datetime object. + obs_avail_intvl = dt.timedelta(hours=obs_avail_intvl_hrs) + + # Get the base directory for the observations. + key = obtype + '_OBS_DIR' + obs_dir = config['platform'][key] + + # Set the group of fields for each observation type. We assume there is + # a separate obs file type for each such field group in the observations. + if obtype == 'CCPA': + field_groups_in_obs = ['APCP'] + elif obtype == 'NOHRSC': + field_groups_in_obs = ['ASNOW'] + elif obtype == 'MRMS': + field_groups_in_obs = ['REFC', 'RETOP'] + elif obtype == 'NDAS': + field_groups_in_obs = ['ADPSFCandADPUPA'] + num_field_groups = len(field_groups_in_obs) + + # For each field group in the observations, get the METplus file name + # template for the observation files. Then combine these with the base + # directory to get the METplus template for the full path to the processed + # obs files. + fp_proc_templates = [] + for fg in field_groups_in_obs: + key = 'OBS_' + obtype + '_' + fg + '_FN_TEMPLATE' + fn_proc_template = vx_config[key] + fp_proc_templates.append(os.path.join(obs_dir, fn_proc_template)) + # + #----------------------------------------------------------------------- + # + # Set variables that are only needed for some obs types. + # + #----------------------------------------------------------------------- + # + + # For cumulative obs, set the accumulation period to use when getting obs + # files. This is simply (a properly formatted version of) the obs + # availability interval. + accum_obs_formatted = None + if obtype == 'CCPA': + accum_obs_formatted = f'{obs_avail_intvl_hrs:02d}' + elif obtype == 'NOHRSC': + accum_obs_formatted = f'{obs_avail_intvl_hrs:d}' + + # For MRMS obs, set field-dependent parameters needed in forming grib2 + # file names. + fields_in_filenames = [] + levels_in_filenames = [] + if obtype == 'MRMS': + for fg in field_groups_in_obs: + if fg == 'REFC': + fields_in_filenames.append('MergedReflectivityQCComposite') + levels_in_filenames.append('00.50') + elif fg == 'RETOP': + fields_in_filenames.append('EchoTop') + levels_in_filenames.append('18_00.50') + else: + msg = dedent(f""" + Invalid field specified for obs type: + obtype = {obtype} + field = {field} + """) + logging.error(msg) + raise Exception(msg) + + # CCPA files for 1-hour accumulation have incorrect metadata in the files + # under the "00" directory from 20180718 to 20210504. Set these starting + # and ending dates as datetime objects for later use. + yyyymmdd_bad_metadata_start_str = None + yyyymmdd_bad_metadata_end_str = None + yyyymmdd_bad_metadata_start = None + yyyymmdd_bad_metadata_end = None + if obtype == 'CCPA': + yyyymmdd_bad_metadata_start_str = '20180718' + yyyymmdd_bad_metadata_end_str = '20210504' + yyyymmdd_bad_metadata_start = dt.datetime.strptime(yyyymmdd_bad_metadata_start_str, '%Y%m%d') + yyyymmdd_bad_metadata_end = dt.datetime.strptime(yyyymmdd_bad_metadata_end_str, '%Y%m%d') + # + #----------------------------------------------------------------------- + # + # Get the list of all the times in the current day at which to retrieve + # obs. This is an array with elements having format "YYYYMMDDHH". + # + #----------------------------------------------------------------------- + # + yyyymmdd_task_str = dt.datetime.strftime(yyyymmdd_task, '%Y%m%d') + key = 'OBS_RETRIEVE_TIMES_' + obtype + '_' + yyyymmdd_task_str + obs_retrieve_times_crnt_day_str = vx_config[key] + obs_retrieve_times_crnt_day \ + = [dt.datetime.strptime(yyyymmddhh_str, '%Y%m%d%H') for yyyymmddhh_str in obs_retrieve_times_crnt_day_str] + # + #----------------------------------------------------------------------- + # + # Obs files will be obtained by extracting them from the relevant n-hourly + # archives, where n is the archive interval in hours (denoted below by the + # variable arcv_intvl_hrs). Thus, we must first obtain the sequence of + # hours (since hour 0 of the task day) corresponding to the archive files + # from which we must extract obs files. We refer to this as the sequence + # of archive hours. + # + # To generate this sequence, we first set the archive interval and then + # set the starting and ending archive hour values. + # + # For CCPA, the archive interval is 6 hours, i.e. the obs files are + # bundled into 6-hourly archives. This implies 4 archives per day. The + # archives are organized such that each one contains 6 files, so that the + # obs availability interval is + # + # obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] + # = 1 hr/file + # + # i.e. there is one obs file for each hour of the day containing the + # accumulation over that one hour. The archive corresponding to hour 0 + # of the current day contains 6 files representing accumulations during + # the 6 hours of the previous day. The archive corresponding to hour 6 + # of the current day corresponds to accumulations during the first 6 + # hours of the current day, and the archives corresponding to hours 12 + # and 18 of the current day correspond to accumulations during the 2nd + # and 3rd 6-hourly intervals of the current day. Thus, to obtain all the + # one-hour accumulations for the current day, we must extract all the obs + # files from the archives corresponding to hours 6, 12, and 18 of the + # current day and hour 0 of the next day. This corresponds to an archive + # hour sequence set below of [6, 12, 18, 24]. Thus, in the simplest case + # in which the observation retrieval times include all hours of the + # current task's day at which obs files are available and none of the obs + # files for this day already exist on disk, this sequence will be [6, 12, + # 18, 24]. In other cases, the sequence we loop over will be a subset of + # [6, 12, 18, 24]. + # + # For NOHRSC, the archive interval is 24 hours, i.e. the obs files are + # bundled into 24-hourly archives. This implies just 1 archive per day. + # The archives are organized such that each one contains 4 files, so that + # the obs availability interval is + # + # obs_avail_intvl_hrs = (24 hrs)/[(1 archive)*(4 files/archive)] + # = 6 hr/file + # + # i.e. there is one obs file for each 6-hour interval of the day containing + # the accumulation over those 6 hours. The 4 obs files within each archive + # correspond to hours 0, 6, 12, and 18 of the current day. The obs file + # for hour 0 contains accumulations during the last 6 hours of the previous + # day, while those for hours 6, 12, and 18 contain accumulations for the + # first, second, and third 6-hour chunks of the current day. Thus, to + # obtain all the 6-hour accumulations for the current day, we must extract + # from the archive for the current day the obs files for hours 6, 12, and + # 18 and from the archive for the next day the obs file for hour 0. This + # corresponds to an archive hour sequence set below of [0, 24]. Thus, in + # the simplest case in which the observation retrieval times include all + # hours of the current task's day at which obs files are available and + # none of the obs files for this day already exist on disk, this sequence + # will be [0, 24]. In other cases, the sequence we loop over will be a + # subset of [0, 24]. + # + # For NDAS, the archive interval is 6 hours, i.e. the obs files are + # bundled into 6-hourly archives. This implies 4 archives per day. The + # archives are organized such that each one contains 7 files (not say 6). + # The archive associated with time yyyymmddhh_arcv contains the hourly + # files at + # + # yyyymmddhh_arcv - 6 hours + # yyyymmddhh_arcv - 5 hours + # ... + # yyyymmddhh_arcv - 2 hours + # yyyymmddhh_arcv - 1 hours + # yyyymmddhh_arcv - 0 hours + # + # These are known as the tm06, tm05, ..., tm02, tm01, and tm00 files, + # respectively. Thus, the tm06 file from the current archive, say the + # one associated with time yyyymmddhh_arcv, has the same valid time as + # the tm00 file from the previous archive, i.e. the one associated with + # time (yyyymmddhh_arcv - 6 hours). It turns out the tm06 file from the + # current archive contains more/better observations than the tm00 file + # from the previous archive. Thus, for a given archive time yyyymmddhh_arcv, + # we use 6 of the 7 files at tm06, ..., tm01 but not the one at tm00, + # effectively resulting in an 6 files per archive for NDAS obs. The obs + # availability interval is then + # + # obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] + # = 1 hr/file + # + # i.e. there is one obs file for each hour of the day containing values + # at that hour. The archive corresponding to hour 0 of the current day + # contains 6 files valid at hours 18 through 23 of the previous day. The + # archive corresponding to hour 6 of the current day contains 6 files + # valid at hours 0 through 5 of the current day, and the archives + # corresponding to hours 12 and 18 of the current day each contain 6 + # files valid at hours 6 through 11 and 12 through 17 of the current day. + # Thus, to obtain all the hourly values for the current day (from hour + # 0 to hour 23), we must extract the 6 obs files (excluding the tm00 + # ones) from the archives corresponding to hours 6, 12, and 18 of the + # current day and the archive corresponding to hour 0 of the next day. + # This corresponds to an archive hour sequence set below of [6, 12, 18, + # 24]. Thus, in the simplest case in which the observation retrieval + # times include all hours of the current task's day at which obs files + # are available and none of the obs files for this day already exist on + # disk, this sequence will be [6, 12, 18, 24]. In other cases, the + # sequence we loop over will be a subset of [6, 12, 18, 24]. + # + #----------------------------------------------------------------------- + # + if obtype == 'CCPA': + arcv_intvl_hrs = 6 + elif obtype == 'NOHRSC': + arcv_intvl_hrs = 24 + elif obtype == 'MRMS': + arcv_intvl_hrs = 24 + elif obtype == 'NDAS': + arcv_intvl_hrs = 6 + arcv_intvl = dt.timedelta(hours=arcv_intvl_hrs) + + # Number of obs files within each archive. + num_obs_files_per_arcv = int(arcv_intvl/obs_avail_intvl) + + # Initial guess for starting archive hour. This is set to the archive + # hour containing obs at the first obs retrieval time of the day. + hod_first = obs_retrieve_times_crnt_day[0].hour + arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod_first) + + # Ending archive hour. This is set to the archive hour containing obs at + # the last obs retrieval time of the day. + hod_last = obs_retrieve_times_crnt_day[-1].hour + arcv_hr_end = get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod_last) + + # Set other variables needed below when evaluating the METplus template for + # the full path to the processed observation files. + one_hour = dt.timedelta(hours=1) + ushdir = config['user']['USHdir'] + + # Check whether any obs files already exist on disk in their processed + # (i.e. final) locations. Here, by "processed" we mean after any renaming + # and rearrangement of files that this script may do to the "raw" files, + # i.e. the files as they are named and arranged within the archive (tar) + # files on HPSS. If so, adjust the starting archive hour. In the process, + # keep a count of the number of obs files that already exist on disk. + num_existing_files = 0 + do_break = False + for yyyymmddhh in obs_retrieve_times_crnt_day: + + for fp_proc_templ in fp_proc_templates: + # Set the full path to the final processed obs file (fp_proc). + lhr = int((yyyymmddhh - yyyymmdd_task)/one_hour) + cmd = '; '.join(['export USHdir=' + ushdir, + 'export yyyymmdd_task=' + yyyymmdd_task_str, + 'export lhr=' + str(lhr), + 'export METplus_timestr_tmpl=' + fp_proc_templ, + os.path.join(ushdir, 'run_eval_METplus_timestr_tmpl.sh')]) + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + fp_proc = result.stdout.strip() + + # Check whether file already exists. + if os.path.isfile(fp_proc): + num_existing_files += 1 + msg = dedent(f""" + File already exists on disk: + fp_proc = {fp_proc} + """) + logging.info(msg) + else: + hod = yyyymmddhh.hour + arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod) + msg = dedent(f""" + File does not exist on disk: + fp_proc = {fp_proc} + Setting the hour (since hour 0 of the current task day) of the first + archive to retrieve to: + arcv_hr_start = {arcv_hr_start} + """) + logging.info(msg) + do_break = True + break + + if do_break: break + + # If the number of obs files that already exist on disk is equal to the + # number of obs files needed, then there is no need to retrieve any files. + num_obs_retrieve_times_crnt_day = len(obs_retrieve_times_crnt_day) + num_files_needed = num_obs_retrieve_times_crnt_day*num_field_groups + if num_existing_files == num_files_needed: + + msg = dedent(f""" + All obs files needed for the current day (yyyymmdd_task) already exist + on disk: + yyyymmdd_task = {yyyymmdd_task} + Thus, there is no need to retrieve any files. + """) + logging.info(msg) + return True + + # If the number of obs files that already exist on disk is not equal to + # the number of obs files needed, then we will need to retrieve files. + # In this case, set the sequence of hours corresponding to the archives + # from which files will be retrieved. + else: + + arcv_hrs = [hr for hr in range(arcv_hr_start, arcv_hr_end+arcv_intvl_hrs, arcv_intvl_hrs)] + msg = dedent(f""" + At least some obs files needed needed for the current day (yyyymmdd_task) + do not exist on disk: + yyyymmdd_task = {yyyymmdd_task} + The number of obs files needed for the current day is: + num_files_needed = {num_files_needed} + The number of obs files that already exist on disk is: + num_existing_files = {num_existing_files} + Will retrieve remaining files by looping over archives corresponding to + the following hours (since hour 0 of the current day): + arcv_hrs = {arcv_hrs} + """) + logging.info(msg) + # + #----------------------------------------------------------------------- + # + # At this point, at least some obs files for the current day need to be + # retrieved. Thus, loop over the relevant archives that contain obs for + # the day given by yyyymmdd_task and retrieve files as needed. + # + # Note that the NOHRSC data on HPSS are archived by day, with the archive + # for a given day containing 6-hour as well as 24-hour grib2 files. As + # described above, the four 6-hour files are for accumulated snowfall at + # hour 0 of the current day (which represents accumulation over the last + # 6 hours of the previous day) as well as hours 6, 12, and 18, while the + # two 24-hour files are at hour 0 (which represents accumulation over all + # 24 hours of the previous day) and 12 (which represents accumulation over + # the last 12 hours of the previous day plus the first 12 hours of the + # current day). Here, we will only obtain the 6-hour files. In other + # workflow tasks, the values in these 6-hour files will be added as + # necessary to obtain accumulations over longer periods (e.g. 24 hours). + # Since the four 6-hour files are in one archive and are relatively small + # (on the order of kilobytes), we get them all with a single call to the + # retrieve_data.py script. + # + #----------------------------------------------------------------------- + # + + # Whether to move the files or copy them from their raw to their processed + # locations. + mv_or_cp = 'cp' + # Whether to remove raw observations after processed directories have + # been created from them. + key = 'REMOVE_RAW_OBS_' + obtype + remove_raw_obs = config['platform'][key] + # If the raw directories and files are to be removed at the end of this + # script, no need to copy the files since the raw directories are going + # to be removed anyway. + if remove_raw_obs: + mv_or_cp = 'mv' + + # Base directory that will contain the archive subdirectories in which + # the files extracted from each archive (tar) file will be placed. We + # refer to this as the "raw" base directory because it contains files + # as they are found in the archives before any processing by this script. + basedir_raw = os.path.join(obs_dir, 'raw_' + yyyymmdd_task_str) + + for arcv_hr in arcv_hrs: + + msg = dedent(f""" + Processing archive hour {arcv_hr} ... + """) + logging.info(msg) + + # Calculate the time information for the current archive. + yyyymmddhh_arcv = yyyymmdd_task + dt.timedelta(hours=arcv_hr) + yyyymmddhh_arcv_str = dt.datetime.strftime(yyyymmddhh_arcv, '%Y%m%d%H') + yyyymmdd_arcv_str = dt.datetime.strftime(yyyymmddhh_arcv, '%Y%m%d') + + # Directory that will contain the files retrieved from the current archive + # file. We refer to this as the "raw" archive directory because it will + # contain the files as they are in the archive before any processing by + # this script. + if obtype == 'CCPA': + arcv_dir_raw = os.path.join(basedir_raw, yyyymmddhh_arcv_str) + # For NOHRSC, the hour-of-day for the archive is irrelevant since there + # is only one archive per day, so don't include it in the raw archive + # directory's name. + elif obtype == 'NOHRSC': + arcv_dir_raw = os.path.join(basedir_raw, yyyymmdd_arcv_str) + # Since for MRMS data there is only one archive per day, that directory + # is redundant, so for simplicity we set arcv_dir_raw to just basedir_raw. + elif obtype == 'MRMS': + arcv_dir_raw = basedir_raw + elif obtype == 'NDAS': + arcv_dir_raw = os.path.join(basedir_raw, yyyymmddhh_arcv_str) + + # Check whether any of the obs retrieval times for the day associated with + # this task fall in the time interval spanned by the current archive. If + # so, set the flag (do_retrieve) to retrieve the files in the current + # archive. + if obtype == 'CCPA': + arcv_contents_start = yyyymmddhh_arcv - (num_obs_files_per_arcv - 1)*obs_avail_intvl + arcv_contents_end = yyyymmddhh_arcv + elif obtype == 'NOHRSC': + arcv_contents_start = yyyymmddhh_arcv + arcv_contents_end = yyyymmddhh_arcv + (num_obs_files_per_arcv - 1)*obs_avail_intvl + elif obtype == 'MRMS': + arcv_contents_start = yyyymmddhh_arcv + arcv_contents_end = yyyymmddhh_arcv + (num_obs_files_per_arcv - 1)*obs_avail_intvl + elif obtype == 'NDAS': + arcv_contents_start = yyyymmddhh_arcv - num_obs_files_per_arcv*obs_avail_intvl + arcv_contents_end = yyyymmddhh_arcv - obs_avail_intvl + + do_retrieve = False + for obs_retrieve_time in obs_retrieve_times_crnt_day: + if (obs_retrieve_time >= arcv_contents_start) and \ + (obs_retrieve_time <= arcv_contents_end): + do_retrieve = True + break + + if not do_retrieve: + msg = dedent(f""" + None of the current day's observation retrieval times (possibly including + hour 0 of the next day if considering a cumulative obs type) fall in the + range spanned by the current {arcv_intvl_hrs}-hourly archive file. The + bounds of the data in the current archive are: + arcv_contents_start = {arcv_contents_start} + arcv_contents_end = {arcv_contents_end} + The times at which obs need to be retrieved are: + obs_retrieve_times_crnt_day = {obs_retrieve_times_crnt_day} + """) + logging.info(msg) + + else: + + # Make sure the raw archive directory exists because it is used below as + # the output directory of the retrieve_data.py script (so if this directory + # doesn't already exist, that script will fail). Creating this directory + # also ensures that the raw base directory (basedir_raw) exists before we + # change location to it below. + Path(arcv_dir_raw).mkdir(parents=True, exist_ok=True) + + # The retrieve_data.py script first extracts the contents of the archive + # file into the directory it was called from and then moves them to the + # specified output location (via the --output_path option). Note that + # the relative paths of obs files within archives associted with different + # days may be the same. Thus, if files with the same archive-relative + # paths are being simultaneously extracted from multiple archive files + # (by multiple get_obs tasks), they will likely clobber each other if the + # extracton is being carried out into the same location on disk. To avoid + # this, we first change location to the raw base directory (whose name is + # obs-day dependent) and then call the retrieve_data.py script. + os.chdir(basedir_raw) + + # Pull obs from HPSS. This will get all the obs files in the current + # archive and place them in the raw archive directory. + # + # Note that for the specific case of NDAS obs, this will get all 7 obs + # files in the current archive, although we will make use of only 6 of + # these (we will not use the tm00 file). + parmdir = config['user']['PARMdir'] + cmd = ' '.join(['python3', \ + '-u', os.path.join(ushdir, 'retrieve_data.py'), \ + '--debug', \ + '--file_set', 'obs', \ + '--config', os.path.join(parmdir, 'data_locations.yml'), \ + '--cycle_date', yyyymmddhh_arcv_str, \ + '--data_stores', 'hpss', \ + '--data_type', obtype + '_obs', \ + '--output_path', arcv_dir_raw, \ + '--summary_file', 'retrieve_data.log']) + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + rc = result.returncode + + # Loop over the raw obs files extracted from the current archive and + # generate from them the processed obs files. + # + # For CCPA obs, for most dates this consists of simply copying or moving + # the files from the raw archive directory to the processed directory, + # possibly renaming them in the process. However, for dates between + # 20180718 and 20210504 and hours-of-day 19 through the end of the day + # (i.e. hour 0 of the next day), it involves using wgrib2 to correct an + # error in the metadata of the raw file and writing the corrected data + # to a new grib2 file in the processed location. + # + # For NOHRSC obs, this consists of simply copying or moving the files from + # the raw archive directory to the processed directory, possibly renaming + # them in the process. + # + # For NDAS obs, this consists of simply copying or moving the files from + # the raw archive directory to the processed directory, possibly renaming + # them in the process. Note that the tm06 file in a given archive contain + # more/better observations than the tm00 file in the next archive (their + # valid times are equivalent), so we use the tm06 files. + if obtype == 'CCPA': + in_arcv_times = [yyyymmddhh_arcv - i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + elif obtype == 'NOHRSC': + in_arcv_times = [yyyymmddhh_arcv + i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + elif obtype == 'MRMS': + in_arcv_times = [yyyymmddhh_arcv + i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + elif obtype == 'NDAS': + in_arcv_times = [yyyymmddhh_arcv - (i+1)*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + in_arcv_times.sort() + + for yyyymmddhh in in_arcv_times: + + # Create the processed obs file from the raw one (by moving, copying, or + # otherwise) only if the time of the current file in the current archive + # also exists in the list of obs retrieval times for the current day. + if yyyymmddhh in obs_retrieve_times_crnt_day: + + for i, fp_proc_templ in enumerate(fp_proc_templates): + + # For MRMS obs, first select from the set of raw files for the current day + # those that are nearest in time to the current hour. Unzip these in a + # temporary subdirectory under the raw base directory. + # + # Note that the script we call to do this (mrms_pull_topofhour.py) assumes + # a certain file naming convention. That convention must match the names + # of the files that the retrieve_data.py script called above ends up + # retrieving. The list of possibile templates for these names is given + # in parm/data_locations.yml, but which of those is actually used is not + # known until retrieve_data.py completes. Thus, that information needs + # to be passed back by retrieve_data.py and then passed to mrms_pull_topofhour.py. + # For now, we hard-code the file name here. + if obtype == 'MRMS': + yyyymmddhh_str = dt.datetime.strftime(yyyymmddhh, '%Y%m%d%H') + cmd = ' '.join(['python3', \ + '-u', os.path.join(ushdir, 'mrms_pull_topofhour.py'), \ + '--valid_time', yyyymmddhh_str, \ + '--source', basedir_raw, \ + '--outdir', os.path.join(basedir_raw, 'topofhour'), \ + '--product', fields_in_filenames[i], \ + '--no-add_vdate_subdir']) + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + rc = result.returncode + + # The raw file name needs to be the same as what the retrieve_data.py + # script called above ends up retrieving. The list of possibile templates + # for this name is given in parm/data_locations.yml, but which of those + # is actually used is not known until retrieve_data.py completes. Thus, + # that information needs to be passed back by the script and used here. + # For now, we hard-code the file name here. + if obtype == 'CCPA': + hr = yyyymmddhh.hour + fn_raw = 'ccpa.t' + f'{hr:02d}' + 'z.' + accum_obs_formatted + 'h.hrap.conus.gb2' + elif obtype == 'NOHRSC': + yyyymmddhh_str = dt.datetime.strftime(yyyymmddhh, '%Y%m%d%H') + fn_raw = 'sfav2_CONUS_' + accum_obs_formatted + 'h_' + yyyymmddhh_str + '_grid184.grb2' + elif obtype == 'MRMS': + hr = yyyymmddhh.hour + fn_raw = fields_in_filenames[i] + '_' + levels_in_filenames[i] \ + + '_' + yyyymmdd_task_str + '-' + f'{hr:02d}' + '0000.grib2' + fn_raw = os.path.join('topofhour', fn_raw) + elif obtype == 'NDAS': + time_ago = yyyymmddhh_arcv - yyyymmddhh + hrs_ago = int(time_ago.seconds/3600) + hh_arcv_str = dt.datetime.strftime(yyyymmddhh_arcv, '%H') + fn_raw = 'nam.t' + hh_arcv_str + 'z.prepbufr.tm' + f'{hrs_ago:02d}' + '.nr' + fp_raw = os.path.join(arcv_dir_raw, fn_raw) + + # Set the full path to the final processed obs file (fp_proc) we want to + # create. + lhr = int((yyyymmddhh - yyyymmdd_task)/one_hour) + cmd = '; '.join(['export USHdir=' + ushdir, + 'export yyyymmdd_task=' + yyyymmdd_task_str, + 'export lhr=' + str(lhr), + 'export METplus_timestr_tmpl=' + fp_proc_templ, + os.path.join(ushdir, 'run_eval_METplus_timestr_tmpl.sh')]) + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + fp_proc = result.stdout.strip() + + # Make sure the directory in which the processed file will be created exists. + dir_proc = os.path.dirname(fp_proc) + Path(dir_proc).mkdir(parents=True, exist_ok=True) + + msg = dedent(f""" + Creating the processed obs file + {fp_proc} + from the raw file + {fp_raw} + ... + """) + logging.info(msg) + + yyyymmdd = yyyymmddhh.replace(hour=0, minute=0, second=0) + # CCPA files for 1-hour accumulation have incorrect metadata in the files + # under the "00" directory from 20180718 to 20210504. After the data is + # pulled, reorganize into correct yyyymmdd structure. + if (obtype == 'CCPA') and \ + ((yyyymmdd >= yyyymmdd_bad_metadata_start) and (yyyymmdd <= yyyymmdd_bad_metadata_end)) and \ + (((hr >= 19) and (hr <= 23)) or (hr == 0)): + cmd = ' '.join(['wgrib2', fp_raw, '-set_date -24hr -grib', fp_proc, '-s']) + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + elif mv_or_cp == 'mv': + shutil.move(fp_raw, fp_proc) + elif mv_or_cp == 'cp': + shutil.copy(fp_raw, fp_proc) + # + #----------------------------------------------------------------------- + # + # Clean up raw obs directories. + # + #----------------------------------------------------------------------- + # + if remove_raw_obs: + msg = dedent(f""" + Removing raw obs directories ..." + """) + logging.info(msg) + shutil.rmtree(basedir_raw) + + return True + + + +def parse_args(argv): + """Parse command line arguments""" + parser = argparse.ArgumentParser( + description="Get observations." + ) + + parser.add_argument( + "--obtype", + dest="obtype", + type=str, + required=True, + choices=['CCPA', 'NOHRSC', 'MRMS', 'NDAS'], + help="Cumulative observation type.", + ) + + parser.add_argument( + "--obs_day", + dest="obs_day", + type=lambda d: dt.datetime.strptime(d, '%Y%m%d'), + required=True, + help="Date of observation day, in the form 'YYYMMDD'.", + ) + + parser.add_argument( + "--var_defns_path", + dest="var_defns_path", + type=str, + required=True, + help="Path to variable definitions file.", + ) + + choices_log_level = [pair for lvl in list(logging._nameToLevel.keys()) + for pair in (str.lower(lvl), str.upper(lvl))] + parser.add_argument( + "--log_level", + dest="log_level", + type=str, + required=False, + default='info', + choices=choices_log_level, + help=dedent(f""" + Logging level to use with the 'logging' module. + """)) + + parser.add_argument( + "--log_fp", + dest="log_fp", + type=str, + required=False, + default='', + help=dedent(f""" + Name of or path (absolute or relative) to log file. If not specified, + the output goes to screen. + """)) + + return parser.parse_args(argv) + + +if __name__ == "__main__": + args = parse_args(sys.argv[1:]) + + # Set up logging. + # If the name/path of a log file has been specified in the command line + # arguments, place the logging output in it (existing log files of the + # same name are overwritten). Otherwise, direct the output to the screen. + log_level = str.upper(args.log_level) + msg_format = "[%(levelname)s:%(name)s: %(filename)s, line %(lineno)s: %(funcName)s()] %(message)s" + if args.log_fp: + logging.basicConfig(level=log_level, format=msg_format, filename=args.log_fp, filemode='w') + else: + logging.basicConfig(level=log_level, format=msg_format) + + cfg = load_yaml_config(args.var_defns_path) + get_obs(cfg, args.obtype, args.obs_day) + + diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh deleted file mode 100755 index d3c486c607..0000000000 --- a/ush/get_obs_ccpa.sh +++ /dev/null @@ -1,484 +0,0 @@ -#!/usr/bin/env bash - -# -#----------------------------------------------------------------------- -# -# Source the variable definitions file and the bash utility functions. -# -#----------------------------------------------------------------------- -# -. $USHdir/source_util_funcs.sh -for sect in user platform verification ; do - source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} -done - -set -u -#set -x -# -#----------------------------------------------------------------------- -# -# This script performs several important tasks for preparing data for -# verification tasks. Depending on the value of the environment variable -# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data -# set. -# -# If data is not available on disk (in the location specified by -# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), -# the script attempts to retrieve the data from HPSS using the retrieve_data.py -# script. Depending on the data set, there are a few strange quirks and/or -# bugs in the way data is organized; see in-line comments for details. -# -# -# CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {CCPA_OBS_DIR}/{YYYYMMDD}/ccpa.t{HH}z.01h.hrap.conus.gb2 -# -# If data is retrieved from HPSS, it will be automatically staged by this -# script. -# -# Notes about the data and how it's used for verification: -# -# 1. Accumulation is currently hardcoded to 01h. The verification will -# use MET/pcp-combine to sum 01h files into desired accumulations. -# -# 2. There is a problem with the valid time in the metadata for files -# valid from 19 - 00 UTC (or files under the '00' directory). This is -# accounted for in this script for data retrieved from HPSS, but if you -# have manually staged data on disk you should be sure this is accounted -# for. See in-line comments below for details. -# -#----------------------------------------------------------------------- -# - -# -#----------------------------------------------------------------------- -# -# Below, we will use the retrieve_data.py script to retrieve the CCPA -# grib2 file from a data store (e.g. HPSS). Before doing so, note the -# following: -# -# * The daily archive (tar) file containing CCPA obs has a name of the -# form -# -# [PREFIX].YYYYMMDD.tar -# -# where YYYYMMDD is a given year, month, and day combination, and -# [PREFIX] is a string that is not relevant to the discussion here -# (the value it can take on depends on which of several time periods -# YYYYMMDD falls in, and the retrieve_data.py tries various values -# until it finds one for which a tar file exists). Unintuitively, this -# archive file contains accumulation data for valid times starting at -# hour 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current -# day (YYYYMMDD). In other words, the valid times of the contents of -# this archive file are shifted back by 6 hours relative to the time -# string appearing in the name of the file. See section "DETAILS..." -# for a detailed description of the directory structure in the CCPA -# archive files. -# -# * We call retrieve_data.py in a temporary cycle-specific subdirectory -# in order to prevent get_obs_ccpa tasks for different cycles from -# clobbering each other's output. We refer to this as the "raw" CCPA -# base directory because it contains files as they are found in the -# archives before any processing by this script. -# -# * In each (cycle-specific) raw base directory, the data is arranged in -# daily subdirectories with the same timing as in the archive (tar) -# files (which are described in the section "DETAILS..." below). In -# particular, each daily subdirectory has the form YYYYMDD, and it may -# contain CCPA grib2 files for accumulations valid at hour 19 of the -# previous day (YYYYMM[DD-1]) to hour 18 of the current day (YYYYMMDD). -# (Data valid at hours 19-23 of the current day (YYYYMMDD) go into the -# daily subdirectory for the next day, i.e. YYYYMM[DD+1].) We refer -# to these as raw daily (sub)directories to distinguish them from the -# processed daily subdirectories under the processed (final) CCPA base -# directory (basedir_proc). -# -# * For a given cycle, some of the valid times at which there is forecast -# output may not have a corresponding file under the raw base directory -# for that cycle. This is because another cycle that overlaps this cycle -# has already obtained the grib2 CCPA file for that valid time and placed -# it in its processed location; as a result, the retrieveal of that grib2 -# file for this cycle is skipped. -# -# * To obtain a more intuitive temporal arrangement of the data in the -# processed CCPA directory structure than the temporal arrangement used -# in the archives and raw directories, we process the raw files such -# that the data in the processed directory structure is shifted forward -# in time 6 hours relative to the data in the archives and raw directories. -# This results in a processed base directory that, like the raw base -# directory, also contains daily subdirectories of the form YYYYMMDD, -# but each such subdirectory may only contain CCPA data at valid hours -# within that day, i.e. at valid times YYYYMMDD[00, 01, ..., 23] (but -# may not contain data that is valid on the previous, next, or any other -# day). -# -# * For data between 20180718 and 20210504, the 01h accumulation data -# (which is the only accumulation we are retrieving) have incorrect -# metadata under the "00" directory in the archive files (meaning for -# hour 00 and hours 19-23, which are the ones in the "00" directory). -# Below, we use wgrib2 to make a correction for this when transferring -# (moving or copying) grib2 files from the raw daily directories to -# the processed daily directories. -# -# -# DETAILS OF DIRECTORY STRUCTURE IN CCPA ARCHIVE (TAR) FILES -# ---------------------------------------------------------- -# -# The daily archive file containing CCPA obs is named -# -# [PREFIX].YYYYMMDD.tar -# -# This file contains accumulation data for valid times starting at hour -# 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current day -# (YYYYMMDD). In particular, when untarred, the daily archive file -# expands into four subdirectories: 00, 06, 12, and 18. The 06, 12, and -# 18 subdirectories contain grib2 files for accumulations valid at or -# below the hour-of-day given by the subdirectory name (and on YYYYMMDD). -# For example, the 06 directory contains data valid at: -# -# * YYYYMMDD[01, 02, 03, 04, 05, 06] for 01h accumulations; -# * YYYYMMDD[03, 06] for 03h accumulations; -# * YYYYMMDD[06] for 06h accumulations. -# -# The valid times for the data in the 12 and 18 subdirectories are -# analogous. However, the 00 subdirectory is different in that it -# contains accumulations at hour 00 on YYYYMMDD as well as ones BEFORE -# this time, i.e. the data for valid times other than YYYYMMDD00 are on -# the PREVIOUS day. Thus, the 00 subdirectory contains data valid at -# (note the DD-1, meaning one day prior): -# -# * YYYYMM[DD-1][19, 20, 21, 22, 23] and YYYYMMDD00 for 01h accumulations; -# * YYYYMM[DD-1][19] and YYYYMMDD00 for 03h accumulations; -# * YYYYMMDD00 for 06h accumulations. -# -#----------------------------------------------------------------------- -# - -# The time interval (in hours) at which the obs are available on HPSS -# must divide evenly into 24. Otherwise, different days would have obs -# available at different hours-of-day. Make sure this is the case. -remainder=$(( 24 % CCPA_OBS_AVAIL_INTVL_HRS )) -if [ ${remainder} -ne 0 ]; then - print_err_msg_exit "\ -The obs availability interval CCPA_OBS_AVAIL_INTVL_HRS must divide evenly -into 24 but doesn't: - CCPA_OBS_AVAIL_INTVL_HRS = ${CCPA_OBS_AVAIL_INTVL_HRS} - 24 % CCPA_OBS_AVAIL_INTVL_HRS = ${remainder}" -fi - -# Accumulation period to use when getting obs files. This is simply (a -# properly formatted version of) the obs availability interval. -accum_obs_fmt=$( printf "%02d" "${CCPA_OBS_AVAIL_INTVL_HRS}" ) - -# The day (in the form YYYMMDD) associated with the current task via the -# task's cycledefs attribute in the ROCOTO xml. -yyyymmdd_task=${PDY} - -# Seconds since some reference time that the DATE_UTIL utility uses of -# the day of the current task. This will be used below to find hours -# since the start of this day. -sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) -# -#----------------------------------------------------------------------- -# -# Get the list of all the times in the current day at which to retrieve -# obs. This is an array with elements having format "YYYYMMDDHH". -# -#----------------------------------------------------------------------- -# -array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" -eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) -# -#----------------------------------------------------------------------- -# -# Obs files will be obtained by extracting them from the relevant 6-hourly -# archives. Thus, we need the sequence of archive hours over which to -# loop. In the simplest case, this sequence will be "6 12 18 24". This -# will be the case if the observation retrieval times include all hours -# of the task's day and if none of the obs files for this day already -# exist on disk. In other cases, the sequence we loop over will be a -# subset of "6 12 18 24". -# -# To generate this sequence, we first set its starting and ending values -# as well as the interval. -# -#----------------------------------------------------------------------- -# - -# Sequence interval must be 6 hours because the archives are 6-hourly. -arcv_hr_incr=6 - -# Initial guess for starting archive hour. This is set to the archive -# hour containing obs at the first obs retrieval time of the day. -hh_first=$(echo ${obs_retrieve_times_crnt_day[0]} | cut -c9-10) -hr_first=$((10#${hh_first})) -arcv_hr_start=$(ceil ${hr_first} ${arcv_hr_incr}) -arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) - -# Ending archive hour. This is set to the archive hour containing obs at -# the last obs retrieval time of the day. -hh_last=$(echo ${obs_retrieve_times_crnt_day[-1]} | cut -c9-10) -hr_last=$((10#${hh_last})) -if [[ ${hr_last} -eq 0 ]]; then - arcv_hr_end=24 -else - arcv_hr_end=$(ceil ${hr_last} ${arcv_hr_incr}) - arcv_hr_end=$(( arcv_hr_end*arcv_hr_incr )) -fi - -# Check whether any obs files already exist on disk in their processed -# (i.e. final) locations. Here, by "processed" we mean after any renaming -# and rearrangement of files that this script may do to the "raw" files, -# i.e. the files as they are named and arranged within the archive (tar) -# files on HPSS. If so, adjust the starting archive hour. In the process, -# keep a count of the number of obs files that already exist on disk. -num_existing_files=0 -for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - - # Set the full path to the final processed obs file (fp_proc). - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_CCPA_APCP_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp_proc" - - if [[ -f ${fp_proc} ]]; then - num_existing_files=$((num_existing_files+1)) - print_info_msg " -File already exists on disk: - fp_proc = \"${fp_proc}\"" - else - hr=$((10#${hh})) - arcv_hr_start=$(ceil ${hr} ${arcv_hr_incr}) - arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) - print_info_msg " -File does not exist on disk: - fp_proc = \"${fp_proc}\" -Setting the hour (since 00) of the first archive to retrieve to: - arcv_hr_start = \"${arcv_hr_start}\"" - break - fi -done - -# If the number of obs files that already exist on disk is equal to the -# number of obs files needed, then there is no need to retrieve any files. -num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} -if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then - - print_info_msg " -All obs files needed for the current day (yyyymmdd_task) already exist -on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any files." - exit - -# If the number of obs files that already exist on disk is not equal to -# the number of obs files needed, then we will need to retrieve files. -# In this case, set the sequence of hours corresponding to the archives -# from which files will be retrieved. -else - - arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) - arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" - print_info_msg " -At least some obs files needed needed for the current day (yyyymmdd_task) -do not exist on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -The number of obs files needed for the current day (which is equal to the -number of observation retrieval times for the current day) is: - num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} -The number of obs files that already exist on disk is: - num_existing_files = ${num_existing_files} -Will retrieve remaining files by looping over archives corresponding to -the following hours (since 00 of this day): - arcv_hrs = ${arcv_hrs_str} -" - -fi -# -#----------------------------------------------------------------------- -# -# At this point, at least some obs files for the current day need to be -# retrieved. Thus, loop over the relevant archives that contain obs for -# the day given by yyyymmdd_task and retrieve files as needed. -# -#----------------------------------------------------------------------- -# - -# Whether to move the files or copy them from their raw to their processed -# locations. -#mv_or_cp="mv" -mv_or_cp="cp" -# Whether to remove raw observations after processed directories have -# been created from them. -remove_raw_obs="${REMOVE_RAW_OBS_CCPA}" -# If the raw directories and files are to be removed at the end of this -# script, no need to copy the files since the raw directories are going -# to be removed anyway. -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - mv_or_cp="mv" -fi - -# Base directory that will contain the archive subdirectories in which -# the files extracted from each archive (tar) file will be placed. We -# refer to this as the "raw" base directory because it contains files -# as they are found in the archives before any processing by this script. -basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" - -for arcv_hr in ${arcv_hrs[@]}; do - - print_info_msg " -arcv_hr = ${arcv_hr}" - - # Calculate the time information for the current archive. - yyyymmddhh_arcv=$(${DATE_UTIL} --date "${yyyymmdd_task} ${arcv_hr} hours" +%Y%m%d%H) - yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) - hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) - - # Directory that will contain the files retrieved from the current archive - # file. We refer to this as the "raw" archive directory because it will - # contain the files as they are in the archive before any processing by - # this script. - arcv_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" - - # Check whether any of the obs retrieval times for the day associated with - # this task fall in the time interval spanned by the current archive. If - # so, set the flag (do_retrieve) to retrieve the files in the current - # archive. - hrs_ago=$((arcv_hr_incr - 1)) - arcv_contents_yyyymmddhh_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) - arcv_contents_yyyymmddhh_end=${yyyymmddhh_arcv} - do_retrieve="FALSE" - for (( i=0; i<${num_obs_retrieve_times_crnt_day}; i++ )); do - obs_retrieve_time=${obs_retrieve_times_crnt_day[i]} - if [[ "${obs_retrieve_time}" -ge "${arcv_contents_yyyymmddhh_start}" ]] && \ - [[ "${obs_retrieve_time}" -le "${arcv_contents_yyyymmddhh_end}" ]]; then - do_retrieve="TRUE" - break - fi - done - - if [[ $(boolify "${do_retrieve}") != "TRUE" ]]; then - - print_info_msg " -None of the times in the current day (or hour 00 of the next day) at which -obs need to be retrieved fall in the range spanned by the current ${arcv_hr_incr}-hourly -archive file. The bounds of the data in the current archive file are: - arcv_contents_yyyymmddhh_start = \"${arcv_contents_yyyymmddhh_start}\" - arcv_contents_yyyymmddhh_end = \"${arcv_contents_yyyymmddhh_end}\" -The times at which obs need to be retrieved are: - obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" - - else - - # Make sure the raw archive directory exists because it is used below as - # the output directory of the retrieve_data.py script (so if this directory - # doesn't already exist, that script will fail). Creating this directory - # also ensures that the raw base directory (basedir_raw) exists before we - # change location to it below. - mkdir -p ${arcv_dir_raw} - - # The retrieve_data.py script first extracts the contents of the archive - # file into the directory it was called from and then moves them to the - # specified output location (via the --output_path option). Note that - # the relative paths of obs files within archives associted with different - # days may be the same. Thus, if files with the same archive-relative - # paths are being simultaneously extracted from multiple archive files - # (by multiple get_obs tasks), they will likely clobber each other if the - # extracton is being carried out into the same location on disk. To avoid - # this, we first change location to the raw base directory (whose name is - # obs-day dependent) and then call the retrieve_data.py script. - cd ${basedir_raw} - - # Pull obs from HPSS. This will get all the obs files in the current - # archive and place them in the raw archive directory. - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${yyyymmddhh_arcv} \ - --data_stores hpss \ - --data_type CCPA_obs \ - --output_path ${arcv_dir_raw} \ - --summary_file retrieve_data.log" - - print_info_msg "CALLING: ${cmd}" - $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." - - # Loop over the raw obs files extracted from the current archive and - # generate from them the processed obs files. - # - # For CCPA obs, for most dates this consists of simply copying or moving - # the files from the raw archive directory to the processed directory, - # possibly renaming them in the process. However, for dates between - # 20180718 and 20210504 and hours-of-day 19 through the end of the day - # (i.e. hour 0 of the next day), it involves using wgrib2 to correct an - # error in the metadata of the raw file and writing the corrected data - # to a new grib2 file in the processed location. - for hrs_ago in $(seq 5 -${CCPA_OBS_AVAIL_INTVL_HRS} 0); do - yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - # Create the processed obs file from the raw one (by moving, copying, or - # otherwise) only if the time of the current file in the current archive - # also exists in the list of obs retrieval times for the current day. - if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then - - # The raw file name needs to be the same as what the retrieve_data.py - # script called above ends up retrieving. The list of possibile templates - # for this name is given in parm/data_locations.yml, but which of those - # is actually used is not known until retrieve_data.py completes. Thus, - # that information needs to be passed back by the script and used here. - # For now, we hard-code the file name here. - fn_raw="ccpa.t${hh}z.${accum_obs_fmt}h.hrap.conus.gb2" - fp_raw="${arcv_dir_raw}/${fn_raw}" - - # Set the full path to the final processed obs file (fp_proc) we want to - # create. - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_CCPA_APCP_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp_proc" - mkdir -p $( dirname "${fp_proc}" ) - - # CCPA files for 1-hour accumulation have incorrect metadata in the files - # under the "00" directory from 20180718 to 20210504. After the data is - # pulled, reorganize into correct yyyymmdd structure. - hh_noZero=$((10#${hh})) - if [[ ${yyyymmdd} -ge 20180718 && ${yyyymmdd} -le 20210504 ]] && \ - [[ (${hh_noZero} -ge 19 && ${hh_noZero} -le 23) || (${hh_noZero} -eq 0) ]]; then - wgrib2 ${fp_raw} -set_date -24hr -grib ${fp_proc} -s - else - ${mv_or_cp} ${fp_raw} ${fp_proc} - fi - - fi - done - - fi - -done -# -#----------------------------------------------------------------------- -# -# Clean up raw obs directories. -# -#----------------------------------------------------------------------- -# -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - print_info_msg "Removing raw obs directories..." - rm -rf ${basedir_raw} || print_err_msg_exit "\ -Failed to remove raw obs directories." -fi diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh deleted file mode 100755 index d13e374620..0000000000 --- a/ush/get_obs_mrms.sh +++ /dev/null @@ -1,341 +0,0 @@ -#!/usr/bin/env bash - -# -#----------------------------------------------------------------------- -# -# Source the variable definitions file and the bash utility functions. -# -#----------------------------------------------------------------------- -# -. $USHdir/source_util_funcs.sh -for sect in user platform verification ; do - source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} -done - -set -u -#set -x -# -#----------------------------------------------------------------------- -# -# This script performs several important tasks for preparing data for -# verification tasks. Depending on the value of the environment variable -# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data -# set. -# -# If data is not available on disk (in the location specified by -# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), -# the script attempts to retrieve the data from HPSS using the retrieve_data.py -# script. Depending on the data set, there are a few strange quirks and/or -# bugs in the way data is organized; see in-line comments for details. -# -# -# MRMS (Multi-Radar Multi-Sensor) radar observations -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {MRMS_OBS_DIR}/{YYYYMMDD}/[PREFIX]{YYYYMMDD}-{HH}0000.grib2, -# -# Where [PREFIX] is MergedReflectivityQCComposite_00.50_ for reflectivity -# data and EchoTop_18_00.50_ for echo top data. If data is not available -# at the top of the hour, you should rename the file closest in time to -# your hour(s) of interest to the above naming format. A script -# "ush/mrms_pull_topofhour.py" is provided for this purpose. -# -# If data is retrieved from HPSS, it will automatically staged by this -# this script. -# -#----------------------------------------------------------------------- -# - -# The time interval (in hours) at which the obs are available on HPSS -# must divide evenly into 24. Otherwise, different days would have obs -# available at different hours-of-day. Make sure this is the case. -remainder=$(( 24 % MRMS_OBS_AVAIL_INTVL_HRS )) -if [ ${remainder} -ne 0 ]; then - print_err_msg_exit "\ -The obs availability interval MRMS_OBS_AVAIL_INTVL_HRS must divide evenly -into 24 but doesn't: - MRMS_OBS_AVAIL_INTVL_HRS = ${MRMS_OBS_AVAIL_INTVL_HRS} - 24 % MRMS_OBS_AVAIL_INTVL_HRS = ${remainder}" -fi - -# Create an array-valued counterpart of MRMS_FIELDS. MRMS_FIELDS is an -# environment variable created in the ROCOTO XML. It is a scalar variable -# because there doesn't seem to be a way to pass a bash array from the -# XML to the task's script. -mrms_fields=($(printf "%s" "${MRMS_FIELDS}")) - -# Loop over the fields (REFC and RETOP) and set the file base name -# corresponding to each. -fields_in_filenames=() -levels_in_filenames=() -obs_mrms_fp_templates=() -for field in ${mrms_fields[@]}; do - # Set field-dependent parameters needed in forming grib2 file names. - if [ "${field}" = "REFC" ]; then - fields_in_filenames+=("MergedReflectivityQCComposite") - levels_in_filenames+=("00.50") - obs_mrms_fp_templates+=("${OBS_DIR}/${OBS_MRMS_REFC_FN_TEMPLATE}") - elif [ "${field}" = "RETOP" ]; then - fields_in_filenames+=("EchoTop") - levels_in_filenames+=("18_00.50") - obs_mrms_fp_templates+=("${OBS_DIR}/${OBS_MRMS_RETOP_FN_TEMPLATE}") - else - print_err_msg_exit "\ -Invalid field specified: - field = \"${field}\" -Valid options are 'REFC', 'RETOP'." - fi -done - -# The day (in the form YYYMMDD) associated with the current task via the -# task's cycledefs attribute in the ROCOTO xml. -yyyymmdd_task=${PDY} - -# Seconds since some reference time that the DATE_UTIL utility uses of -# the day of the current task. This will be used below to find hours -# since the start of this day. -sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) -# -#----------------------------------------------------------------------- -# -# Get the list of all the times in the current day at which to retrieve -# obs. This is an array with elements having format "YYYYMMDDHH". -# -#----------------------------------------------------------------------- -# -array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" -eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) - -# Check whether any obs files already exist on disk. If so, adjust the -# starting archive hour. In the process, keep a count of the number of -# obs files that already exist on disk. -num_existing_files=0 -num_mrms_fields=${#mrms_fields[@]} -for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do - for (( i=0; i<${num_mrms_fields}; i++ )); do - - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${obs_mrms_fp_templates[$i]}" \ - outvarname_evaluated_timestr="fp_proc" - - if [[ -f ${fp_proc} ]]; then - num_existing_files=$((num_existing_files+1)) - print_info_msg " -File already exists on disk: - fp_proc = \"${fp_proc}\"" - else - print_info_msg " -File does not exist on disk: - fp_proc = \"${fp_proc}\" -Will attempt to retrieve all obs files." - break 2 - fi - - done -done - -# If the number of obs files that already exist on disk is equal to the -# number of obs files needed (which is num_mrms_fields times the number -# of obs retrieval times in the current day), then there is no need to -# retrieve any files. -num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} -if [[ ${num_existing_files} -eq $((num_mrms_fields*num_obs_retrieve_times_crnt_day)) ]]; then - - print_info_msg " -All obs files needed for the current day (yyyymmdd_task) already exist -on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any files." - exit -# Otherwise, will need to retrieve files. -else - print_info_msg " -At least some obs files needed needed for the current day (yyyymmdd_task) -do not exist on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -The number of obs files needed for the current day (which is equal to the -number of observation retrieval times for the current day) is: - num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} -The number of obs files that already exist on disk is: - num_existing_files = ${num_existing_files} -Will retrieve remaining files. -" -fi -# -#----------------------------------------------------------------------- -# -# At this point, at least some obs files for the current day need to be -# retrieved. -# -#----------------------------------------------------------------------- -# - -# Whether to move the files or copy them from their raw to their processed -# locations. -#mv_or_cp="mv" -mv_or_cp="cp" -# Whether to remove raw observations after processed directories have -# been created from them. -remove_raw_obs="${REMOVE_RAW_OBS_MRMS}" -# If the raw directories and files are to be removed at the end of this -# script, no need to copy the files since the raw directories are going -# to be removed anyway. -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - mv_or_cp="mv" -fi - -# Base directory that will contain the archive subdirectories in which -# the files extracted from each archive (tar) file will be placed. We -# refer to this as the "raw" base directory because it contains files -# as they are found in the archives before any processing by this script. -basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" - -# Time associated with the archive. MRMS data have daily archives that -# have the hour-of-day set to "00". -yyyymmddhh_arcv="${yyyymmdd_task}00" - -# Directory that will contain the files retrieved from the current archive -# file. We refer to this as the "raw" archive directory because it will -# contain the files as they are in the archive before any processing by -# this script. -# -# Note: -# Normally, arcv_dir_raw should consist of basedir_raw and a subdirectory -# that depends on the archive date, e.g. -# -# arcv_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" -# -# but since for MRMS data there is only one archive per day, that directory -# is redundant, so simplicity we set arcv_dir_raw to just basedir_raw. -arcv_dir_raw="${basedir_raw}" - -# Make sure the raw archive directory exists because it is used below as -# the output directory of the retrieve_data.py script (so if this directory -# doesn't already exist, that script will fail). Creating this directory -# also ensures that the raw base directory (basedir_raw) exists before we -# change location to it below. -mkdir -p ${arcv_dir_raw} - -# The retrieve_data.py script first extracts the contents of the archive -# file into the directory it was called from and then moves them to the -# specified output location (via the --output_path option). Note that -# the relative paths of obs files within archives associted with different -# days may be the same. Thus, if files with the same archive-relative -# paths are being simultaneously extracted from multiple archive files -# (by multiple get_obs tasks), they will likely clobber each other if the -# extracton is being carried out into the same location on disk. To avoid -# this, we first change location to the raw base directory (whose name is -# obs-day dependent) and then call the retrieve_data.py script. -cd ${basedir_raw} - -# Pull obs from HPSS. This will get all the obs files in the current -# archive and place them in the raw archive directory. -cmd=" -python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${yyyymmddhh_arcv} \ - --data_stores hpss \ - --data_type MRMS_obs \ - --output_path ${arcv_dir_raw} \ - --summary_file retrieve_data.log" - -print_info_msg "CALLING: ${cmd}" -$cmd || print_err_msg_exit "Could not retrieve obs from HPSS." -# -#----------------------------------------------------------------------- -# -# Loop over the 24 hour period starting with the zeroth hour of the day -# associated with this task and ending with the 23rd hour. -# -#----------------------------------------------------------------------- -# - -# Loop over the raw obs files extracted from the current archive and -# generate from them the processed obs files. -# -# For MRMS obs, the raw obs consist of gzipped grib2 files that are -# usually a few minutes apart in time. However, because forecast data -# is available at most every hour, the SRW App configuration parameter -# MRMS_OBS_AVAIL_INTVL_HRS is set to 1 hour instead of a few minutes. -# Below, we loop over the whole day using this 1-hourly interval. For -# each hour of the day, we call the script mrms_pull_topofhour.py to find -# the gzipped grib2 file in the raw archive directory that is closest in -# time to the hour and unzip it in a temporary directory. We then copy -# or move it to the processed directory, possibly renaming it in the -# process. -for hr in $(seq 0 ${MRMS_OBS_AVAIL_INTVL_HRS} 23); do - yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_task} ${hr} hours" +%Y%m%d%H) - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - # Create the processed obs file from the raw one (by moving, copying, or - # otherwise) only if the time of the current file in the current archive - # also exists in the list of obs retrieval times for the current day. - if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then - for (( i=0; i<${num_mrms_fields}; i++ )); do - - # First, select from the set of raw files for the current day those that - # are nearest in time to the current hour. Unzip these in a temporary - # subdirectory under the raw base directory. - # - # Note that the script we call to do this (mrms_pull_topofhour.py) assumes - # a certain file naming convention. That convention must match the names - # of the files that the retrieve_data.py script called above ends up - # retrieving. The list of possibile templates for these names is given - # in parm/data_locations.yml, but which of those is actually used is not - # known until retrieve_data.py completes. Thus, that information needs - # to be passed back by retrieve_data.py and then passed to mrms_pull_topofhour.py. - # For now, we hard-code the file name here. - python ${USHdir}/mrms_pull_topofhour.py \ - --valid_time ${yyyymmddhh} \ - --source ${basedir_raw} \ - --outdir ${basedir_raw}/topofhour \ - --product ${fields_in_filenames[$i]} \ - --no-add_vdate_subdir - - # Set the name of and the full path to the raw obs file created by the - # mrms_pull_topofhour.py script. This name is currently hard-coded to - # the output of that script. In the future, it should be set in a more - # general way (e.g. obtain from a settings file). - fn_raw="${fields_in_filenames[$i]}_${levels_in_filenames[$i]}_${yyyymmdd_task}-${hh}0000.grib2" - fp_raw="${basedir_raw}/topofhour/${fn_raw}" - - # Set the full path to the final processed obs file (fp_proc) we want to - # create. - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${obs_mrms_fp_templates[$i]}" \ - outvarname_evaluated_timestr="fp_proc" - mkdir -p $( dirname "${fp_proc}" ) - - mv ${fp_raw} ${fp_proc} - - done - fi -done -# -#----------------------------------------------------------------------- -# -# Clean up raw obs directories. -# -#----------------------------------------------------------------------- -# -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - print_info_msg "Removing raw obs directories..." - rm -rf ${basedir_raw} || print_err_msg_exit "\ -Failed to remove raw obs directories." -fi diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh deleted file mode 100755 index 45338714a2..0000000000 --- a/ush/get_obs_ndas.sh +++ /dev/null @@ -1,357 +0,0 @@ -#!/usr/bin/env bash - -# -#----------------------------------------------------------------------- -# -# Source the variable definitions file and the bash utility functions. -# -#----------------------------------------------------------------------- -# -. $USHdir/source_util_funcs.sh -for sect in user platform verification ; do - source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} -done - -set -u -#set -x -# -#----------------------------------------------------------------------- -# -# This script performs several important tasks for preparing data for -# verification tasks. Depending on the value of the environment variable -# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data -# set. -# -# If data is not available on disk (in the location specified by -# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), -# the script attempts to retrieve the data from HPSS using the retrieve_data.py -# script. Depending on the data set, there are a few strange quirks and/or -# bugs in the way data is organized; see in-line comments for details. -# -# -# NDAS (NAM Data Assimilation System) conventional observations -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {NDAS_OBS_DIR}/{YYYYMMDD}/prepbufr.ndas.{YYYYMMDDHH} -# -# Note that data retrieved from HPSS and other sources may be in a -# different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is -# either 00, 06, 12, or 18, and prevhour is the number of hours prior to -# hh (00 through 05). If using custom staged data, you will have to -# rename the files accordingly. -# -# If data is retrieved from HPSS, it will be automatically staged by this -# this script. -# -#----------------------------------------------------------------------- -# - -# The time interval (in hours) at which the obs are available on HPSS -# must divide evenly into 24. Otherwise, different days would have obs -# available at different hours-of-day. Make sure this is the case. -remainder=$(( 24 % NDAS_OBS_AVAIL_INTVL_HRS )) -if [ ${remainder} -ne 0 ]; then - print_err_msg_exit "\ -The obs availability interval NDAS_OBS_AVAIL_INTVL_HRS must divide evenly -into 24 but doesn't: - NDAS_OBS_AVAIL_INTVL_HRS = ${NDAS_OBS_AVAIL_INTVL_HRS} - 24 % NDAS_OBS_AVAIL_INTVL_HRS = ${remainder}" -fi - -# The day (in the form YYYMMDD) associated with the current task via the -# task's cycledefs attribute in the ROCOTO xml. -yyyymmdd_task=${PDY} - -# Seconds since some reference time that the DATE_UTIL utility uses of -# the day of the current task. This will be used below to find hours -# since the start of this day. -sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) -# -#----------------------------------------------------------------------- -# -# Get the list of all the times in the current day at which to retrieve -# obs. This is an array with elements having format "YYYYMMDDHH". -# -#----------------------------------------------------------------------- -# -array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" -eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) -# -#----------------------------------------------------------------------- -# -# Obs files will be obtained by extracting them from the relevant 6-hourly -# archives. Thus, we need the sequence of archive hours over which to -# loop. In the simplest case, this sequence will be "6 12 18 24". This -# will be the case if the observation retrieval times include all hours -# of the task's day and if none of the obs files for this day already -# exist on disk. In other cases, the sequence we loop over will be a -# subset of "6 12 18 24". -# -# To generate this sequence, we first set its starting and ending values -# as well as the interval. -# -#----------------------------------------------------------------------- -# - -# Sequence interval must be 6 hours because the archives are 6-hourly. -arcv_hr_incr=6 - -# Initial guess for starting archive hour. This is set to the archive -# hour containing obs at the first obs retrieval time of the day. -hh_first=$(echo ${obs_retrieve_times_crnt_day[0]} | cut -c9-10) -hr_first=$((10#${hh_first})) -arcv_hr_start=$(( (hr_first/arcv_hr_incr + 1)*arcv_hr_incr )) - -# Ending archive hour. This is set to the archive hour containing obs at -# the last obs retrieval time of the day. -hh_last=$(echo ${obs_retrieve_times_crnt_day[-1]} | cut -c9-10) -hr_last=$((10#${hh_last})) -arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) - -# Check whether any obs files already exist on disk in their processed -# (i.e. final) locations. Here, by "processed" we mean after any renaming -# and rearrangement of files that this script may do to the "raw" files, -# i.e. the files as they are named and arranged within the archive (tar) -# files on HPSS. If so, adjust the starting archive hour. In the process, -# keep a count of the number of obs files that already exist on disk. -num_existing_files=0 -for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - - # Set the full path to the final processed obs file (fp_proc). - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp_proc" - - if [[ -f ${fp_proc} ]]; then - num_existing_files=$((num_existing_files+1)) - print_info_msg " -File already exists on disk: - fp_proc = \"${fp_proc}\"" - else - hr=$((10#${hh})) - arcv_hr_start=$(( (hr/arcv_hr_incr + 1)*arcv_hr_incr )) - print_info_msg " -File does not exist on disk: - fp_proc = \"${fp_proc}\" -Setting the hour (since 00) of the first archive to retrieve to: - arcv_hr_start = \"${arcv_hr_start}\"" - break - fi -done - -# If the number of obs files that already exist on disk is equal to the -# number of obs files needed, then there is no need to retrieve any files. -num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} -if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then - - print_info_msg " -All obs files needed for the current day (yyyymmdd_task) already exist -on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any files." - exit - -# If the number of obs files that already exist on disk is not equal to -# the number of obs files needed, then we will need to retrieve files. -# In this case, set the sequence of hours corresponding to the archives -# from which files will be retrieved. -else - - arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) - arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" - print_info_msg " -At least some obs files needed needed for the current day (yyyymmdd_task) -do not exist on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -The number of obs files needed for the current day (which is equal to the -number of observation retrieval times for the current day) is: - num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} -The number of obs files that already exist on disk is: - num_existing_files = ${num_existing_files} -Will retrieve remaining files by looping over archives corresponding to -the following hours (since 00 of this day): - arcv_hrs = ${arcv_hrs_str} -" - -fi -# -#----------------------------------------------------------------------- -# -# At this point, at least some obs files for the current day need to be -# retrieved. Thus, loop over the relevant archives that contain obs for -# the day given by yyyymmdd_task and retrieve files as needed. -# -#----------------------------------------------------------------------- -# - -# Whether to move the files or copy them from their raw to their processed -# locations. -#mv_or_cp="mv" -mv_or_cp="cp" -# Whether to remove raw observations after processed directories have -# been created from them. -remove_raw_obs="${REMOVE_RAW_OBS_NDAS}" -# If the raw directories and files are to be removed at the end of this -# script, no need to copy the files since the raw directories are going -# to be removed anyway. -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - mv_or_cp="mv" -fi - -# Base directory that will contain the archive subdirectories in which -# the files extracted from each archive (tar) file will be placed. We -# refer to this as the "raw" base directory because it contains files -# as they are found in the archives before any processing by this script. -basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" - -for arcv_hr in ${arcv_hrs[@]}; do - - print_info_msg " -arcv_hr = ${arcv_hr}" - - # Calculate the time information for the current archive. - yyyymmddhh_arcv=$(${DATE_UTIL} --date "${yyyymmdd_task} ${arcv_hr} hours" +%Y%m%d%H) - yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) - hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) - - # Directory that will contain the files retrieved from the current archive - # file. We refer to this as the "raw" archive directory because it will - # contain the files as they are in the archive before any processing by - # this script. - arcv_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" - - # Check whether any of the obs retrieval times for the day associated with - # this task fall in the time interval spanned by the current archive. If - # so, set the flag (do_retrieve) to retrieve the files in the current - # archive. - arcv_contents_yyyymmddhh_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 6 hours ago" +%Y%m%d%H) - arcv_contents_yyyymmddhh_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 1 hours ago" +%Y%m%d%H) - do_retrieve="FALSE" - for (( i=0; i<${num_obs_retrieve_times_crnt_day}; i++ )); do - obs_retrieve_time=${obs_retrieve_times_crnt_day[i]} - if [[ "${obs_retrieve_time}" -ge "${arcv_contents_yyyymmddhh_start}" ]] && \ - [[ "${obs_retrieve_time}" -le "${arcv_contents_yyyymmddhh_end}" ]]; then - do_retrieve="TRUE" - break - fi - done - - if [[ $(boolify "${do_retrieve}") != "TRUE" ]]; then - - print_info_msg " -None of the current day's observation retrieval times fall in the range -spanned by the current ${arcv_hr_incr}-hourly archive file. The bounds of the current -archive are: - arcv_contents_yyyymmddhh_start = \"${arcv_contents_yyyymmddhh_start}\" - arcv_contents_yyyymmddhh_end = \"${arcv_contents_yyyymmddhh_end}\" -The times at which obs need to be retrieved are: - obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" - - else - - # Make sure the raw archive directory exists because it is used below as - # the output directory of the retrieve_data.py script (so if this directory - # doesn't already exist, that script will fail). Creating this directory - # also ensures that the raw base directory (basedir_raw) exists before we - # change location to it below. - mkdir -p ${arcv_dir_raw} - - # The retrieve_data.py script first extracts the contents of the archive - # file into the directory it was called from and then moves them to the - # specified output location (via the --output_path option). Note that - # the relative paths of obs files within archives associted with different - # days may be the same. Thus, if files with the same archive-relative - # paths are being simultaneously extracted from multiple archive files - # (by multiple get_obs tasks), they will likely clobber each other if the - # extracton is being carried out into the same location on disk. To avoid - # this, we first change location to the raw base directory (whose name is - # obs-day dependent) and then call the retrieve_data.py script. - cd ${basedir_raw} - - # Pull obs from HPSS. This will get all the obs files in the current - # archive and place them in the raw archive directory. - # Note that for the specific case of NDAS obs, this will get all 7 obs - # files in the current archive, although we will make use of only 6 of - # these (we will not use the tm00 file). - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${yyyymmddhh_arcv} \ - --data_stores hpss \ - --data_type NDAS_obs \ - --output_path ${arcv_dir_raw} \ - --summary_file retrieve_data.log" - - print_info_msg "CALLING: ${cmd}" - $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." - - # Loop over the raw obs files extracted from the current archive and - # generate from them the processed obs files. - # - # For NDAS obs, this consists of simply copying or moving the files from - # the raw archive directory to the processed directory, possibly renaming - # them in the process. - # - # Note that the tm06 file in a given archive contain more/better observations - # than the tm00 file in the next archive (their valid times are equivalent), - # so we use the tm06 files. - for hrs_ago in $(seq --format="%02g" 6 -${NDAS_OBS_AVAIL_INTVL_HRS} 1); do - yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - # Create the processed obs file from the raw one (by moving, copying, or - # otherwise) only if the time of the current file in the current archive - # also exists in the list of obs retrieval times for the current day. - if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then - - # The raw file name needs to be the same as what the retrieve_data.py - # script called above ends up retrieving. The list of possibile templates - # for this name is given in parm/data_locations.yml, but which of those - # is actually used is not known until retrieve_data.py completes. Thus, - # that information needs to be passed back by the script and used here. - # For now, we hard-code the file name here. - fn_raw="nam.t${hh_arcv}z.prepbufr.tm${hrs_ago}.nr" - fp_raw="${arcv_dir_raw}/${fn_raw}" - - # Set the full path to the final processed obs file (fp_proc) we want to - # create. - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp_proc" - mkdir -p $( dirname "${fp_proc}" ) - - ${mv_or_cp} ${fp_raw} ${fp_proc} - - fi - done - - fi - -done -# -#----------------------------------------------------------------------- -# -# Clean up raw obs directories. -# -#----------------------------------------------------------------------- -# -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - print_info_msg "Removing raw obs directories..." - rm -rf ${basedir_raw} || print_err_msg_exit "\ -Failed to remove raw obs directories." -fi diff --git a/ush/get_obs_nohrsc.sh b/ush/get_obs_nohrsc.sh deleted file mode 100755 index 5c56f8a8df..0000000000 --- a/ush/get_obs_nohrsc.sh +++ /dev/null @@ -1,372 +0,0 @@ -#!/usr/bin/env bash - -# -#----------------------------------------------------------------------- -# -# Source the variable definitions file and the bash utility functions. -# -#----------------------------------------------------------------------- -# -. $USHdir/source_util_funcs.sh -for sect in user platform verification ; do - source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} -done - -set -u -#set -x -# -#----------------------------------------------------------------------- -# -# This script performs several important tasks for preparing data for -# verification tasks. Depending on the value of the environment variable -# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data -# set. -# -# If data is not available on disk (in the location specified by -# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), -# the script attempts to retrieve the data from HPSS using the retrieve_data.py -# script. Depending on the data set, there are a few strange quirks and/or -# bugs in the way data is organized; see in-line comments for details. -# -# NOHRSC snow accumulation observations -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {NOHRSC_OBS_DIR}/{YYYYMMDD}/sfav2_CONUS_{AA}h_{YYYYMMDD}{HH}_grid184.grb2 -# -# where AA is the 2-digit accumulation duration in hours: 06 or 24 -# -# METplus is configured to verify snowfall using 06- and 24-h accumulated -# snowfall from 6- and 12-hourly NOHRSC files, respectively. -# -# If data is retrieved from HPSS, it will automatically staged by this -# this script. -#----------------------------------------------------------------------- -# - -# The time interval (in hours) at which the obs are available on HPSS -# must divide evenly into 24. Otherwise, different days would have obs -# available at different hours-of-day. Make sure this is the case. -remainder=$(( 24 % NOHRSC_OBS_AVAIL_INTVL_HRS )) -if [ ${remainder} -ne 0 ]; then - print_err_msg_exit "\ -The obs availability interval NOHRSC_OBS_AVAIL_INTVL_HRS must divide evenly -into 24 but doesn't: - NOHRSC_OBS_AVAIL_INTVL_HRS = ${NOHRSC_OBS_AVAIL_INTVL_HRS} - 24 % NOHRSC_OBS_AVAIL_INTVL_HRS = ${remainder}" -fi - -# Accumulation period to use when getting obs files. This is simply (a -# properly formatted version of) the obs availability interval. -accum_obs_fmt=$( printf "%d" "${NOHRSC_OBS_AVAIL_INTVL_HRS}" ) - -# The day (in the form YYYMMDD) associated with the current task via the -# task's cycledefs attribute in the ROCOTO xml. -yyyymmdd_task=${PDY} - -# Seconds since some reference time that the DATE_UTIL utility uses of -# the day of the current task. This will be used below to find hours -# since the start of this day. -sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) -# -#----------------------------------------------------------------------- -# -# Get the list of all the times in the current day at which to retrieve -# obs. This is an array with elements having format "YYYYMMDDHH". -# -#----------------------------------------------------------------------- -# -array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" -eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) -# -#----------------------------------------------------------------------- -# -# Obs files will be obtained by extracting them from the relevant 24-hourly -# archives. Thus, we need the sequence of archive hours over which to -# loop. In the simplest case, this sequence will be "0 24". This will -# be the case if the observation retrieval times include all hours of the -# task's day and if none of the obs files for this day already exist on -# disk. In other cases, the sequence we loop over will be a subset of -# "0 24", e.g. just "0" or just "24". -# -# To generate this sequence, we first set its starting and ending values -# as well as the interval. -# -#----------------------------------------------------------------------- -# - -# Sequence interval must be 24 hours because the archives are 24-hourly. -arcv_hr_incr=24 - -# Initial guess for starting archive hour. This is set to the archive -# hour containing obs at the first obs retrieval time of the day. -hh_first=$(echo ${obs_retrieve_times_crnt_day[0]} | cut -c9-10) -hr_first=$((10#${hh_first})) -arcv_hr_start=$(( hr_first/arcv_hr_incr )) -arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) - -# Ending archive hour. This is set to the archive hour containing obs at -# the last obs retrieval time of the day. -hh_last=$(echo ${obs_retrieve_times_crnt_day[-1]} | cut -c9-10) -hr_last=$((10#${hh_last})) -if [[ ${hr_last} -eq 0 ]]; then - arcv_hr_end=24 -else - arcv_hr_end=$(( hr_last/arcv_hr_incr )) - arcv_hr_end=$(( arcv_hr_end*arcv_hr_incr )) -fi - -# Check whether any obs files already exist on disk in their processed -# (i.e. final) locations. Here, by "processed" we mean after any renaming -# and rearrangement of files that this script may do to the "raw" files, -# i.e. the files as they are named and arranged within the archive (tar) -# files on HPSS. If so, adjust the starting archive hour. In the process, -# keep a count of the number of obs files that already exist on disk. -num_existing_files=0 -for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - - # Set the full path to the final processed obs file (fp_proc). - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp_proc" - - if [[ -f ${fp_proc} ]]; then - num_existing_files=$((num_existing_files+1)) - print_info_msg " -File already exists on disk: - fp_proc = \"${fp_proc}\"" - else - hr=$((10#${hh})) - arcv_hr_start=$(( hr/arcv_hr_incr )) - arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) - print_info_msg " -File does not exist on disk: - fp_proc = \"${fp_proc}\" -Setting the hour (since 00) of the first archive to retrieve to: - arcv_hr_start = \"${arcv_hr_start}\"" - break - fi -done - -# If the number of obs files that already exist on disk is equal to the -# number of obs files needed, then there is no need to retrieve any files. -num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} -if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then - - print_info_msg " -All obs files needed for the current day (yyyymmdd_task) already exist -on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any files." - exit - -# If the number of obs files that already exist on disk is not equal to -# the number of obs files needed, then we will need to retrieve files. -# In this case, set the sequence of hours corresponding to the archives -# from which files will be retrieved. -else - - arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) - arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" - print_info_msg " -At least some obs files needed needed for the current day (yyyymmdd_task) -do not exist on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -The number of obs files needed for the current day (which is equal to the -number of observation retrieval times for the current day) is: - num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} -The number of obs files that already exist on disk is: - num_existing_files = ${num_existing_files} -Will retrieve remaining files by looping over archives corresponding to -the following hours (since 00 of this day): - arcv_hrs = ${arcv_hrs_str} -" - -fi -# -#----------------------------------------------------------------------- -# -# At this point, at least some obs files for the current day need to be -# retrieved. The NOHRSC data on HPSS are archived by day, with the -# archive for a given day containing 6-hour as well as 24-hour grib2 -# files. The four 6-hour files are for accumulated snowfall at 00z -# (which represents accumulation over the last 6 hours of the previous -# day), 06z, 12z, and 18z, while the two 24-hour files are at 00z (which -# represents accumulation over all 24 hours of the previous day) and 12z -# (which represents accumulation over the last 12 hours of the previous -# day plus the first 12 hours of the current day). -# -# Here, we will only obtain the 6-hour files. In other workflow tasks, -# the values in these 6-hour files will be added as necessary to obtain -# accumulations over longer periods (e.g. 24 hours). Since the four -# 6-hour files are in one archive and are relatively small (on the order -# of kilobytes), we get them all with a single call to the retrieve_data.py -# script. -# -#----------------------------------------------------------------------- -# - -# Whether to move the files or copy them from their raw to their processed -# locations. -#mv_or_cp="mv" -mv_or_cp="cp" -# Whether to remove raw observations after processed directories have -# been created from them. -remove_raw_obs="${REMOVE_RAW_OBS_NOHRSC}" -# If the raw directories and files are to be removed at the end of this -# script, no need to copy the files since the raw directories are going -# to be removed anyway. -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - mv_or_cp="mv" -fi - -# Base directory that will contain the archive subdirectories in which -# the files extracted from each archive (tar) file will be placed. We -# refer to this as the "raw" base directory because it contains files -# as they are found in the archives before any processing by this script. -basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" - -for arcv_hr in ${arcv_hrs[@]}; do - - print_info_msg " -arcv_hr = ${arcv_hr}" - - # Calculate the time information for the current archive. - yyyymmddhh_arcv=$(${DATE_UTIL} --date "${yyyymmdd_task} ${arcv_hr} hours" +%Y%m%d%H) - yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) - hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) - - # Directory that will contain the files retrieved from the current archive - # file. We refer to this as the "raw" archive directory because it will - # contain the files as they are in the archive before any processing by - # this script. - arcv_dir_raw="${basedir_raw}/${yyyymmdd_arcv}" - - # Check whether any of the obs retrieval times for the day associated with - # this task fall in the time interval spanned by the current archive. If - # so, set the flag (do_retrieve) to retrieve the files in the current - # archive. - arcv_contents_yyyymmddhh_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv}" +%Y%m%d%H) - hrs=$((arcv_hr_incr - 1)) - arcv_contents_yyyymmddhh_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs} hours" +%Y%m%d%H) - do_retrieve="FALSE" - for (( i=0; i<${num_obs_retrieve_times_crnt_day}; i++ )); do - obs_retrieve_time=${obs_retrieve_times_crnt_day[i]} - if [[ "${obs_retrieve_time}" -ge "${arcv_contents_yyyymmddhh_start}" ]] && \ - [[ "${obs_retrieve_time}" -le "${arcv_contents_yyyymmddhh_end}" ]]; then - do_retrieve="TRUE" - break - fi - done - - if [[ $(boolify "${do_retrieve}") != "TRUE" ]]; then - - print_info_msg " -None of the times in the current day (or hour 00 of the next day) at which -obs need to be retrieved fall in the range spanned by the current ${arcv_hr_incr}-hourly -archive file. The bounds of the data in the current archive file are: - arcv_contents_yyyymmddhh_start = \"${arcv_contents_yyyymmddhh_start}\" - arcv_contents_yyyymmddhh_end = \"${arcv_contents_yyyymmddhh_end}\" -The times at which obs need to be retrieved are: - obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" - - else - - # Make sure the raw archive directory exists because it is used below as - # the output directory of the retrieve_data.py script (so if this directory - # doesn't already exist, that script will fail). Creating this directory - # also ensures that the raw base directory (basedir_raw) exists before we - # change location to it below. - mkdir -p ${arcv_dir_raw} - - # The retrieve_data.py script first extracts the contents of the archive - # file into the directory it was called from and then moves them to the - # specified output location (via the --output_path option). Note that - # the relative paths of obs files within archives associted with different - # days may be the same. Thus, if files with the same archive-relative - # paths are being simultaneously extracted from multiple archive files - # (by multiple get_obs tasks), they will likely clobber each other if the - # extracton is being carried out into the same location on disk. To avoid - # this, we first change location to the raw base directory (whose name is - # obs-day dependent) and then call the retrieve_data.py script. - cd ${basedir_raw} - - # Pull obs from HPSS. This will get all the obs files in the current - # archive and place them in the raw archive directory. - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${yyyymmddhh_arcv} \ - --data_stores hpss \ - --data_type NOHRSC_obs \ - --output_path ${arcv_dir_raw} \ - --summary_file retrieve_data.log" - - print_info_msg "CALLING: ${cmd}" - $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." - - # Loop over the raw obs files extracted from the current archive and - # generate from them the processed obs files. - # - # For NOHRSC obs, this consists of simply copying or moving the files from - # the raw archive directory to the processed directory, possibly renaming - # them in the process. - for hrs in $(seq 0 ${NOHRSC_OBS_AVAIL_INTVL_HRS} 23); do - yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs} hours" +%Y%m%d%H) - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - # Create the processed obs file from the raw one (by moving, copying, or - # otherwise) only if the time of the current file in the current archive - # also exists in the list of obs retrieval times for the current day. - if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then - - # The raw file name needs to be the same as what the retrieve_data.py - # script called above ends up retrieving. The list of possibile templates - # for this name is given in parm/data_locations.yml, but which of those - # is actually used is not known until retrieve_data.py completes. Thus, - # that information needs to be passed back by the script and used here. - # For now, we hard-code the file name here. - fn_raw="sfav2_CONUS_${accum_obs_fmt}h_${yyyymmddhh}_grid184.grb2" - fp_raw="${arcv_dir_raw}/${fn_raw}" - - # Set the full path to the final processed obs file (fp_proc) we want to - # create. - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp_proc" - mkdir -p $( dirname "${fp_proc}" ) - - ${mv_or_cp} ${fp_raw} ${fp_proc} - - fi - done - - fi - -done -# -#----------------------------------------------------------------------- -# -# Clean up raw obs directories. -# -#----------------------------------------------------------------------- -# -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - print_info_msg "Removing raw obs directories..." - rm -rf ${basedir_raw} || print_err_msg_exit "\ -Failed to remove raw obs directories." -fi diff --git a/ush/run_eval_METplus_timestr_tmpl.sh b/ush/run_eval_METplus_timestr_tmpl.sh new file mode 100755 index 0000000000..b2df03c56c --- /dev/null +++ b/ush/run_eval_METplus_timestr_tmpl.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +#OBS_DIR="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/expt_dirs/get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW/obs_data/ccpa" +#OBS_CCPA_APCP_FN_TEMPLATE="{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2" + +#USHdir="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/ufs-srweather-app/ush" +#yyyymmdd_task="20230217" +#lhr="22" +#METplus_timestr_tmpl="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/expt_dirs/get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW/obs_data/ccpa/{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2" + +#USHdir="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/ufs-srweather-app/ush"; yyyymmdd_task="20230217"; lhr="22"; METplus_timestr_tmpl="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/expt_dirs/get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW/obs_data/ccpa/{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2" +set -u +. $USHdir/source_util_funcs.sh +eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${METplus_timestr_tmpl}" \ + outvarname_evaluated_timestr="fp_proc" +echo "${fp_proc}" + +# METplus_timestr_tmpl="${OBS_DIR}/${OBS_CCPA_APCP_FN_TEMPLATE}" \ From 7d684057f7e73b75804549735fdd1fbf3830b5e7 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 7 Oct 2024 12:51:33 -0600 Subject: [PATCH 101/208] Clean up and clarify comments; calculate list of processed obs file paths only once and save for later use; other minor code improvements. --- ush/get_obs.py | 202 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 132 insertions(+), 70 deletions(-) diff --git a/ush/get_obs.py b/ush/get_obs.py index f6e2fed265..84e49e6f40 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -19,8 +19,8 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): """ This file defines a function that, for the given observation type, obs - archive interval, and hour of day, returns the hour (counting from the - start of the day) corresponding to the archive file in which the obs file + archive interval, and hour of day, returns the hour (counting from hour + zero of the day) corresponding to the archive file in which the obs file for the given hour of day is included. Note that for cumulative fields (like CCPA and NOHRSC, as opposed to @@ -38,8 +38,7 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): arcv_intvl_hrs: Time interval (in hours) between archive files. An integer. For example, if the obs files are bundled into 6-hourly archives, then this will be - set to 6. This must be between 1 and 24 and must divide evenly into 24 - (this is checked for elsewhere). + set to 6. This must be between 1 and 24 and must divide evenly into 24. hod: The hour of the day. An integer. This must be between 0 and 23. For @@ -52,26 +51,45 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): the obs file for the given hour of day. An integer. """ - valid_obtypes = ['CCPA', 'ccpa', 'NOHRSC', 'nohrsc', 'MRMS', 'mrms', 'NDAS', 'ndas'] - if obtype not in valid_obtypes: + valid_obtypes = ['CCPA', 'NOHRSC', 'MRMS', 'NDAS'] + obtype_upper = obtype.upper() + if obtype_upper not in valid_obtypes: msg = dedent(f""" - The specified observation type is not supported: - obtype = {obtype} + The specified observation type (after converting to upper case) is not + supported: + obtype_upper = {obtype_upper} Valid observation types are: - {valid_obtypes} + {valid_obtypes} """) logging.error(msg) raise Exception(msg) + # Ensure that the archive inerval divides evenly into 24 hours. + remainder = 24 % arcv_intvl_hrs + if remainder != 0: + msg = dedent(f""" + The archive interval for obs of type {obtype} must divide evenly into 24 + but doesn't: + arcv_intvl_hrs = {arcv_intvl_hrs} + 24 % arcv_intvl_hrs = {remainder} + """) + logging.error(msg) + raise Exception(msg) + if (hod < 0) or (hod > 23): msg = dedent(f""" - The specified hour-of-day must be between 0 and 23, inclusive but isn't: - hod = {hod} + The specified hour-of-day must be between 0 and 23, inclusive, but isn't: + hod = {hod} """) logging.error(msg) raise Exception(msg) - obtype_upper = obtype.upper() + # Set the archive hour. This depends on the obs type because each obs + # type can organize its observation files into archives in a different + # way, e.g. a cumulative obs type may put the obs files for hours 1 + # through 6 of the day in the archive labeled with hour 6 while an + # instantaneous obs type may put the obs files for hours 0 through 5 of + # the day in the archive labeled with hour 6. if obtype_upper in ['CCPA']: if hod == 0: arcv_hr = 24 @@ -199,20 +217,48 @@ def get_obs(config, obtype, yyyymmdd_task): msg = dedent(f""" The obs availability interval for obs of type {obtype} must divide evenly into 24 but doesn't: - obs_avail_intvl_hrs = {obs_avail_intvl_hrs} - 24 % obs_avail_intvl_hrs = {remainder} + obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + 24 % obs_avail_intvl_hrs = {remainder} """) + logging.error(msg) raise Exception(msg) - # For convenience, get obs availability interval as a datetime object. + # For convenience, convert the obs availability interval to a datetime + # object. obs_avail_intvl = dt.timedelta(hours=obs_avail_intvl_hrs) # Get the base directory for the observations. key = obtype + '_OBS_DIR' obs_dir = config['platform'][key] - # Set the group of fields for each observation type. We assume there is - # a separate obs file type for each such field group in the observations. + # For each observation type, set the group of fields contained in those + # observation files that we need for verification. Each group of fields + # is one that is verified together in the workflow. We assume there is + # a separate set of obs files for each such field group in the observations, + # and in the code below we loop over these sets of files as necessary. + # There are several scenarios to consider: + # + # * An obs type consists of only one set of files containing only one + # field. + # This is the case for CCPA and NOHRSC obs. CCPA obs consist only one + # set of files that contain APCP data, and NOHRSC obs consist of only + # one set of files that contain ASNOW data. + # + # * An obs type consists of more than one set of files, with each file + # containing a different field. + # This is the case for MRMS obs. These consist of two sets of files. + # The first set contains REFC data, and the second contains RETOP data. + # + # * An obs type consists of only one set of files, but each file contains + # multiple groups of fields needed for verification. + # This is the case for NDAS obs. These consist of a single set of files, + # but each file contains both the ADPSFC fields (like 2-m temperature) + # and ADPUPA fields (like 500-mb temperature) that are verified separately + # in the workflow tasks and thus are considered separate field groups. + # + # Other obs type and field group scenarios are also possible, but we do + # not describe them since they are not applicable to any of the obs types + # considered here. if obtype == 'CCPA': field_groups_in_obs = ['APCP'] elif obtype == 'NOHRSC': @@ -225,8 +271,9 @@ def get_obs(config, obtype, yyyymmdd_task): # For each field group in the observations, get the METplus file name # template for the observation files. Then combine these with the base - # directory to get the METplus template for the full path to the processed - # obs files. + # directory to get the METplus template for the full path on disk to + # the processed obs files. If obs files do not already exist at these + # locations, they will be retrieved from HPSS and placed at these locations. fp_proc_templates = [] for fg in field_groups_in_obs: key = 'OBS_' + obtype + '_' + fg + '_FN_TEMPLATE' @@ -241,8 +288,8 @@ def get_obs(config, obtype, yyyymmdd_task): # # For cumulative obs, set the accumulation period to use when getting obs - # files. This is simply (a properly formatted version of) the obs - # availability interval. + # files. This is simply a properly formatted version of the obs availability + # interval. accum_obs_formatted = None if obtype == 'CCPA': accum_obs_formatted = f'{obs_avail_intvl_hrs:02d}' @@ -264,8 +311,8 @@ def get_obs(config, obtype, yyyymmdd_task): else: msg = dedent(f""" Invalid field specified for obs type: - obtype = {obtype} - field = {field} + obtype = {obtype} + field = {field} """) logging.error(msg) raise Exception(msg) @@ -433,19 +480,23 @@ def get_obs(config, obtype, yyyymmdd_task): one_hour = dt.timedelta(hours=1) ushdir = config['user']['USHdir'] - # Check whether any obs files already exist on disk in their processed - # (i.e. final) locations. Here, by "processed" we mean after any renaming - # and rearrangement of files that this script may do to the "raw" files, - # i.e. the files as they are named and arranged within the archive (tar) - # files on HPSS. If so, adjust the starting archive hour. In the process, - # keep a count of the number of obs files that already exist on disk. - num_existing_files = 0 - do_break = False - for yyyymmddhh in obs_retrieve_times_crnt_day: - - for fp_proc_templ in fp_proc_templates: - # Set the full path to the final processed obs file (fp_proc). + # Create dictionary containing the paths to all the processed obs files + # that should exist once this script successfully completes. In this + # dictionary, the keys are the field groups, and the values are lists of + # paths. Here, by "paths to processed files" we mean the paths after any + # renaming and rearrangement of files that this script may do to the "raw" + # files, i.e. the files as they are named and arranged within the archive + # (tar) files on HPSS. + all_fp_proc_dict = {} + for fg, fp_proc_templ in zip(field_groups_in_obs, fp_proc_templates): + all_fp_proc_dict[fg] = [] + for yyyymmddhh in obs_retrieve_times_crnt_day: + # Set the lead hour, i.e. the number of hours from the beginning of the + # day at which the file is valid. lhr = int((yyyymmddhh - yyyymmdd_task)/one_hour) + # Call a bash script to evaluate the template for the full path to the + # file containing METplus timestrings at the current time. This should + # be upgraded to a python script at some point. cmd = '; '.join(['export USHdir=' + ushdir, 'export yyyymmdd_task=' + yyyymmdd_task_str, 'export lhr=' + str(lhr), @@ -453,13 +504,22 @@ def get_obs(config, obtype, yyyymmdd_task): os.path.join(ushdir, 'run_eval_METplus_timestr_tmpl.sh')]) result = subprocess.run(cmd, shell=True, capture_output=True, text=True) fp_proc = result.stdout.strip() + all_fp_proc_dict[fg].append(fp_proc) - # Check whether file already exists. + # Check whether any obs files already exist on disk in their processed + # (i.e. final) locations. If so, adjust the starting archive hour. In + # the process, keep a count of the number of obs files that already exist + # on disk. + num_existing_files = 0 + do_break = False + for fg in field_groups_in_obs: + for yyyymmddhh, fp_proc in zip(obs_retrieve_times_crnt_day, all_fp_proc_dict[fg]): + # Check whether the processed file already exists. if os.path.isfile(fp_proc): num_existing_files += 1 msg = dedent(f""" File already exists on disk: - fp_proc = {fp_proc} + fp_proc = {fp_proc} """) logging.info(msg) else: @@ -467,15 +527,14 @@ def get_obs(config, obtype, yyyymmdd_task): arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod) msg = dedent(f""" File does not exist on disk: - fp_proc = {fp_proc} + fp_proc = {fp_proc} Setting the hour (since hour 0 of the current task day) of the first archive to retrieve to: - arcv_hr_start = {arcv_hr_start} + arcv_hr_start = {arcv_hr_start} """) logging.info(msg) do_break = True break - if do_break: break # If the number of obs files that already exist on disk is equal to the @@ -487,7 +546,7 @@ def get_obs(config, obtype, yyyymmdd_task): msg = dedent(f""" All obs files needed for the current day (yyyymmdd_task) already exist on disk: - yyyymmdd_task = {yyyymmdd_task} + yyyymmdd_task = {yyyymmdd_task} Thus, there is no need to retrieve any files. """) logging.info(msg) @@ -503,14 +562,14 @@ def get_obs(config, obtype, yyyymmdd_task): msg = dedent(f""" At least some obs files needed needed for the current day (yyyymmdd_task) do not exist on disk: - yyyymmdd_task = {yyyymmdd_task} + yyyymmdd_task = {yyyymmdd_task} The number of obs files needed for the current day is: - num_files_needed = {num_files_needed} + num_files_needed = {num_files_needed} The number of obs files that already exist on disk is: - num_existing_files = {num_existing_files} + num_existing_files = {num_existing_files} Will retrieve remaining files by looping over archives corresponding to the following hours (since hour 0 of the current day): - arcv_hrs = {arcv_hrs} + arcv_hrs = {arcv_hrs} """) logging.info(msg) # @@ -617,10 +676,10 @@ def get_obs(config, obtype, yyyymmdd_task): hour 0 of the next day if considering a cumulative obs type) fall in the range spanned by the current {arcv_intvl_hrs}-hourly archive file. The bounds of the data in the current archive are: - arcv_contents_start = {arcv_contents_start} - arcv_contents_end = {arcv_contents_end} + arcv_contents_start = {arcv_contents_start} + arcv_contents_end = {arcv_contents_end} The times at which obs need to be retrieved are: - obs_retrieve_times_crnt_day = {obs_retrieve_times_crnt_day} + obs_retrieve_times_crnt_day = {obs_retrieve_times_crnt_day} """) logging.info(msg) @@ -665,6 +724,18 @@ def get_obs(config, obtype, yyyymmdd_task): result = subprocess.run(cmd, shell=True, capture_output=True, text=True) rc = result.returncode + # Get the list of times corresponding to the obs files in the current + # archive. This is a list of datetime objects. + if obtype == 'CCPA': + obs_times_in_arcv = [yyyymmddhh_arcv - i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + elif obtype == 'NOHRSC': + obs_times_in_arcv = [yyyymmddhh_arcv + i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + elif obtype == 'MRMS': + obs_times_in_arcv = [yyyymmddhh_arcv + i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + elif obtype == 'NDAS': + obs_times_in_arcv = [yyyymmddhh_arcv - (i+1)*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + obs_times_in_arcv.sort() + # Loop over the raw obs files extracted from the current archive and # generate from them the processed obs files. # @@ -685,24 +756,21 @@ def get_obs(config, obtype, yyyymmdd_task): # them in the process. Note that the tm06 file in a given archive contain # more/better observations than the tm00 file in the next archive (their # valid times are equivalent), so we use the tm06 files. - if obtype == 'CCPA': - in_arcv_times = [yyyymmddhh_arcv - i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] - elif obtype == 'NOHRSC': - in_arcv_times = [yyyymmddhh_arcv + i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] - elif obtype == 'MRMS': - in_arcv_times = [yyyymmddhh_arcv + i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] - elif obtype == 'NDAS': - in_arcv_times = [yyyymmddhh_arcv - (i+1)*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] - in_arcv_times.sort() - - for yyyymmddhh in in_arcv_times: + for yyyymmddhh in obs_times_in_arcv: # Create the processed obs file from the raw one (by moving, copying, or # otherwise) only if the time of the current file in the current archive - # also exists in the list of obs retrieval times for the current day. + # also exists in the list of obs retrieval times for the current day. We + # need to check this because it is possible that some of the obs retrieval + # times come before the range of times spanned by the current archive while + # the others come after, but none fall within that range. This can happen + # because the set of archive hours over which we are looping were constructed + # above without considering whether there are obs retrieve time gaps that + # make it unnecessary to retrieve some of the archives between the first + # and last ones that must be retrieved. if yyyymmddhh in obs_retrieve_times_crnt_day: - for i, fp_proc_templ in enumerate(fp_proc_templates): + for i, fg in enumerate(field_groups_in_obs): # For MRMS obs, first select from the set of raw files for the current day # those that are nearest in time to the current hour. Unzip these in a @@ -752,16 +820,10 @@ def get_obs(config, obtype, yyyymmdd_task): fn_raw = 'nam.t' + hh_arcv_str + 'z.prepbufr.tm' + f'{hrs_ago:02d}' + '.nr' fp_raw = os.path.join(arcv_dir_raw, fn_raw) - # Set the full path to the final processed obs file (fp_proc) we want to + # Get the full path to the final processed obs file (fp_proc) we want to # create. - lhr = int((yyyymmddhh - yyyymmdd_task)/one_hour) - cmd = '; '.join(['export USHdir=' + ushdir, - 'export yyyymmdd_task=' + yyyymmdd_task_str, - 'export lhr=' + str(lhr), - 'export METplus_timestr_tmpl=' + fp_proc_templ, - os.path.join(ushdir, 'run_eval_METplus_timestr_tmpl.sh')]) - result = subprocess.run(cmd, shell=True, capture_output=True, text=True) - fp_proc = result.stdout.strip() + indx = obs_retrieve_times_crnt_day.index(yyyymmddhh) + fp_proc = all_fp_proc_dict[fg][indx] # Make sure the directory in which the processed file will be created exists. dir_proc = os.path.dirname(fp_proc) From 2b4c9569bcbb70656ed99edc0c16b4162ea61b6c Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 7 Oct 2024 16:22:52 -0600 Subject: [PATCH 102/208] Minor cleanup. --- scripts/exregional_get_verif_obs.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 158218889e..d1ee4116e8 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -46,8 +46,8 @@ done # #----------------------------------------------------------------------- # -# Make sure the obs type is valid. Then call the python script get_obs.py -# to get the obs files. +# Make sure the obs type is valid. Then call a python script to check +# for the presence of obs files on disk and get them if needed. # #----------------------------------------------------------------------- # @@ -67,7 +67,8 @@ python3 -u ${USHdir}/${script_bn}.py \ --var_defns_path "${GLOBAL_VAR_DEFNS_FP}" \ --obtype ${OBTYPE} \ --obs_day ${PDY}" -print_info_msg "CALLING: ${cmd}" +print_info_msg " +CALLING: ${cmd}" ${cmd} || print_err_msg_exit "Error calling ${script_bn}.py." # #----------------------------------------------------------------------- From a35f240400709838ebda05198891e2fde5e804cc Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 7 Oct 2024 16:32:47 -0600 Subject: [PATCH 103/208] Remove unneeded environment variables. --- parm/wflow/verify_pre.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 220b029412..567f045188 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -29,7 +29,6 @@ task_get_obs_ccpa: command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars - OBS_DIR: '&CCPA_OBS_DIR;' OBTYPE: 'CCPA' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' @@ -41,7 +40,6 @@ task_get_obs_nohrsc: command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars - OBS_DIR: '&NOHRSC_OBS_DIR;' OBTYPE: 'NOHRSC' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' @@ -53,7 +51,6 @@ task_get_obs_mrms: command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars - OBS_DIR: '&MRMS_OBS_DIR;' OBTYPE: 'MRMS' MRMS_FIELDS: 'REFC RETOP' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' @@ -66,7 +63,6 @@ task_get_obs_ndas: command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars - OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' From 9d7c0478b09e3637ad8e8967cb5ea4f0582030af Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 7 Oct 2024 16:38:18 -0600 Subject: [PATCH 104/208] Move the two sets of variables [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR and REMOVE_RAW_OBS_[CCPA|NOHRSC|MRMS|NDAS] in the default app configuration file from the "platform" section to the "verification" section so that they are closer to the METplus file name template variables OBS_[...]_FN_TEMPLATE that they are closely coupled with. --- parm/wflow/default_workflow.yaml | 8 +- tests/WE2E/run_WE2E_tests.py | 6 +- ush/config_defaults.yaml | 186 +++++++++++++++---------------- ush/get_obs.py | 4 +- 4 files changed, 99 insertions(+), 105 deletions(-) diff --git a/parm/wflow/default_workflow.yaml b/parm/wflow/default_workflow.yaml index 39b66fc95c..4ffb6f288a 100644 --- a/parm/wflow/default_workflow.yaml +++ b/parm/wflow/default_workflow.yaml @@ -4,7 +4,7 @@ rocoto: entities: ACCOUNT: '{{ user.ACCOUNT }}' - CCPA_OBS_DIR: '{{ platform.CCPA_OBS_DIR }}' + CCPA_OBS_DIR: '{{ verification.CCPA_OBS_DIR }}' COLDSTART: '{{ workflow.COLDSTART }}' COMINgfs: '{{ platform.get("COMINgfs") }}' GLOBAL_VAR_DEFNS_FP: '{{ workflow.GLOBAL_VAR_DEFNS_FP }}' @@ -14,10 +14,10 @@ rocoto: LOAD_MODULES_RUN_TASK: '{{ workflow.LOAD_MODULES_RUN_TASK_FP }} {{ user.MACHINE }}' LOGEXT: ".log" NET: '{{ nco.NET_default }}' - MRMS_OBS_DIR: '{{ platform.MRMS_OBS_DIR }}' + MRMS_OBS_DIR: '{{ verification.MRMS_OBS_DIR }}' NCORES_PER_NODE: '{{ platform.NCORES_PER_NODE }}' - NDAS_OBS_DIR: '{{ platform.NDAS_OBS_DIR }}' - NOHRSC_OBS_DIR: '{{ platform.NOHRSC_OBS_DIR }}' + NDAS_OBS_DIR: '{{ verification.NDAS_OBS_DIR }}' + NOHRSC_OBS_DIR: '{{ verification.NOHRSC_OBS_DIR }}' PARTITION_DEFAULT: '{{ platform.get("PARTITION_DEFAULT") }}' PARTITION_FCST: '{{ platform.get("PARTITION_FCST") }}' PARTITION_HPSS: '{{ platform.get("PARTITION_HPSS") }}' diff --git a/tests/WE2E/run_WE2E_tests.py b/tests/WE2E/run_WE2E_tests.py index d3c2cb98ab..6a3e3bc7f4 100755 --- a/tests/WE2E/run_WE2E_tests.py +++ b/tests/WE2E/run_WE2E_tests.py @@ -207,13 +207,11 @@ def run_we2e_tests(homedir, args) -> None: # obs. If so, and if the config file does not explicitly set the observation locations, # fill these in with defaults from the machine files obs_vars = ['CCPA_OBS_DIR','MRMS_OBS_DIR','NDAS_OBS_DIR','NOHRSC_OBS_DIR'] - if 'platform' not in test_cfg: - test_cfg['platform'] = {} for obvar in obs_vars: mach_path = machine_defaults['platform'].get('TEST_'+obvar) - if not test_cfg['platform'].get(obvar) and mach_path: + if not test_cfg['verification'].get(obvar) and mach_path: logging.debug(f'Setting {obvar} = {mach_path} from machine file') - test_cfg['platform'][obvar] = mach_path + test_cfg['verification'][obvar] = mach_path if args.compiler == "gnu": # 2D decomposition doesn't work with GNU compilers. Deactivate 2D decomposition for GNU diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 8a02964cc2..9750724494 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -276,72 +276,6 @@ platform: # #----------------------------------------------------------------------- # - # Set METplus parameters. Definitions: - # - # CCPA_OBS_DIR: - # User-specified location of the directory where CCPA hourly - # precipitation files used by METplus are located (or, if - # retrieved by the workflow, where they will be placed). See comments - # in file scripts/exregional_get_verif_obs.sh for more details about - # files and directory structure, as well as important caveats about - # errors in the metadata and file names. - # NOTE: Do not set this to the same path as other *_OBS_DIR variables; - # otherwise unexpected results and data loss may occur. - # - # NOHRSC_OBS_DIR: - # User-specified location of top-level directory where NOHRSC 6- and - # 24-hour snowfall accumulation files used by METplus are located (or, - # if retrieved by the workflow, where they will be placed). See comments - # in file scripts/exregional_get_verif_obs.sh for more details about - # files and directory structure - # NOTE: Do not set this to the same path as other *_OBS_DIR variables; - # otherwise unexpected results and data loss may occur. - # - # MRMS_OBS_DIR: - # User-specified location of the directory where MRMS composite - # reflectivity and echo top files used by METplus are located (or, if - # retrieved by the workflow, where they will be placed). See comments - # in the scripts/exregional_get_verif_obs.sh for more details about - # files and directory structure. - # NOTE: Do not set this to the same path as other *_OBS_DIR variables; - # otherwise unexpected results and data loss may occur. - # - # NDAS_OBS_DIR: - # User-specified location of top-level directory where NDAS prepbufr - # files used by METplus are located (or, if retrieved by the workflow, - # where they will be placed). See comments in file - # scripts/exregional_get_verif_obs.sh for more details about files - # and directory structure. - # NOTE: Do not set this to the same path as other *_OBS_DIR variables; - # otherwise unexpected results and data loss may occur. - # - #----------------------------------------------------------------------- - # - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # - #----------------------------------------------------------------------- - # - # REMOVE_RAW_OBS_DIRS_[CCPA|MRMS|NDAS|NOHRSC]: - # Boolean flag specifying whether to remove the "raw" observation - # directories after pulling the specified type of obs (CCPA, MRMS, - # NDAS, or NOHRSC). The raw directories are the ones in which the - # observation files are placed immediately after pulling them from - # a data store (e.g. NOAA's HPSS) but before performing any processing - # on them (e.g. renaming the files or reorganizing their directory - # structure). - # - #----------------------------------------------------------------------- - # - REMOVE_RAW_OBS_CCPA: true - REMOVE_RAW_OBS_MRMS: true - REMOVE_RAW_OBS_NDAS: true - REMOVE_RAW_OBS_NOHRSC: true - # - #----------------------------------------------------------------------- - # # DOMAIN_PREGEN_BASEDIR: # The base directory containing pregenerated grid, orography, and surface # climatology files. This is an alternative for setting GRID_DIR, @@ -2423,37 +2357,66 @@ verification: # METPLUS_VERBOSITY_LEVEL: 2 # - # Time interval (in hours) at which various types of obs are available on - # NOAA's HPSS. - CCPA_OBS_AVAIL_INTVL_HRS: 1 - NOHRSC_OBS_AVAIL_INTVL_HRS: 6 - MRMS_OBS_AVAIL_INTVL_HRS: 1 - NDAS_OBS_AVAIL_INTVL_HRS: 1 + # [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR: + # Base directory in which CCPA, NOHRSC, MRMS, or NDAS obs files needed by + # the verification tasks are located. If the files do not exist, they + # will be retrieved and placed under this directory. # - # Templates for CCPA, MRMS, and NDAS observation files. - # - # OBS_CCPA_APCP_FN_TEMPLATE: - # File name template for CCPA accumulated precipitation (APCP) observations. - # This template is used by the workflow tasks that call the METplus PcpCombine - # tool on CCPA obs to find the input observation files containing 1-hour - # APCP and then generate NetCDF files containing either 1-hour or greater - # than 1-hour APCP. - # - # OBS_NOHRSC_ASNOW_FN_TEMPLATE: - # File name template for NOHRSC snow observations. - # - # OBS_MRMS_REFC_FN_TEMPLATE: - # File name template for MRMS reflectivity observations. - # - # OBS_MRMS_RETOP_FN_TEMPLATE: - # File name template for MRMS echo top observations. + # Notes: # - # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE: - # File name template for NDAS surface and upper air observations. - # This template is used by the workflow tasks that call the METplus Pb2nc - # tool on NDAS obs to find the input observation files containing ADP - # surface (ADPSFC) or ADP upper air (ADPUPA) fields and then generate - # NetCDF versions of these files. + # * If the obs files need to be retrieved (e.g. from NOAA's HPSS), then + # the user must have write permission to this directory. Otherwise, + # the get_obs tasks that attempt to create these files will fail. + # + # * Do not set two or more of these directories to the same location. + # Otherwise, unexpected results and data loss may occur. + # + # * The script ush/get_obs.py contains further details on the files and + # directory structure of each obs type. + # + # * CCPA obs contain errors in the metadata for a certain range of dates + # that need to be corrected during obs retrieval. This is described + # in more detail in ush/get_obs.py. + # + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # + # OBS_[CCPA_APCP|NOHRSC_ASNOW|MRMS_[REFC|RETOP]|NDAS_ADPSFCandADPUPA]_FN_TEMPLATE: + # File name templates for various obs type and vx field group combinations. + # + # Notes: + # + # * These are relative to the obs base directories + # [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR + # defined above. Thus, the full template to the obs files is given, e.g. + # for CCPA obs, by {CCPA_OBS_DIR}/{OBS_CCPA_APCP_FN_TEMPLATE}. + # + # * These may represent file names only, or they may include relative paths + # before the file names. + # + # * These templates must contain full information about the year, month, + # day, and hour by including METplus time strings that serve as templates + # for this information. Some of this information may be in the relative + # directory portion and the rest in the file name, or there may be no + # relative directory portion and all of it may be in the file name, but + # all four pieces of timing information must be present somewhere in + # this template as METplus time strings. Otherwise, obs files created + # by the get_obs tasks for different days might overwrite each other. + # + # * If one or more of the obs files specified by this full path do not + # exist on disk, all the files will be created by first retrieving "raw" + # versions of them from a data store (e.g. NOAA's HPSS) and then placing + # these raw files in the locations specified by this full path template. + # + # * The raw obs files, i.e. the obs files as they are named and arranged + # in the data stores, may be different than the file path/name specified + # in these variables. The list of templates for raw files to search + # for in the data stores is given in the data retrieval configuration + # file at parm/data_locations.yml. Once retrieved, these raw files are + # renamed and relocated on disk to the locations specified by + # {..._OBS_DIR}/{..._FN_TEMPLATE}. # OBS_CCPA_APCP_FN_TEMPLATE: '{%- set data_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} {{- "{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z." ~ data_intvl_hrs ~ "h.hrap.conus.gb2" }}' @@ -2462,6 +2425,32 @@ verification: OBS_MRMS_REFC_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' OBS_MRMS_RETOP_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE: 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' + # + # Time interval (in hours) at which various types of obs are available on + # NOAA's HPSS. + # + # Note that MRMS files are in fact available every few minutes, but here + # we set the obs availability interval to 1 hour because the forecast + # cannot (yet) support sub-hourly output. + # + CCPA_OBS_AVAIL_INTVL_HRS: 1 + NOHRSC_OBS_AVAIL_INTVL_HRS: 6 + MRMS_OBS_AVAIL_INTVL_HRS: 1 + NDAS_OBS_AVAIL_INTVL_HRS: 1 + # + # REMOVE_RAW_OBS_DIRS_[CCPA|MRMS|NDAS|NOHRSC]: + # Boolean flag specifying whether to remove the "raw" observation + # directories after pulling the specified type of obs (CCPA, NOHRSC, + # MRMS, or NOHRSC). The raw directories are the ones in which the + # observation files are placed immediately after pulling them from + # a data store (e.g. NOAA's HPSS) but before performing any processing + # on them (e.g. renaming the files or reorganizing their directory + # structure). + # + REMOVE_RAW_OBS_CCPA: true + REMOVE_RAW_OBS_MRMS: true + REMOVE_RAW_OBS_NDAS: true + REMOVE_RAW_OBS_NOHRSC: true # # OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: # Template used to specify the names of the output NetCDF observation @@ -2470,6 +2459,13 @@ verification: # APCP, both for 1 hour and for > 1 hour accumulation periods, in NetCDF # format.) # + # OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: + # Template used to specify the names of the output NetCDF observation + # files generated by the worfklow verification tasks that call the METplus + # PcpCombine tool on NOHRSC observations. (These files will contain obs + # APCP, both for 1 hour and for > 1 hour accumulation periods, in NetCDF + # format.) + # # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: # Template used to specify the names of the output NetCDF observation # files generated by the worfklow verification tasks that call the diff --git a/ush/get_obs.py b/ush/get_obs.py index 84e49e6f40..b70d8c3ea9 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -229,7 +229,7 @@ def get_obs(config, obtype, yyyymmdd_task): # Get the base directory for the observations. key = obtype + '_OBS_DIR' - obs_dir = config['platform'][key] + obs_dir = vx_config[key] # For each observation type, set the group of fields contained in those # observation files that we need for verification. Each group of fields @@ -603,7 +603,7 @@ def get_obs(config, obtype, yyyymmdd_task): # Whether to remove raw observations after processed directories have # been created from them. key = 'REMOVE_RAW_OBS_' + obtype - remove_raw_obs = config['platform'][key] + remove_raw_obs = vx_config[key] # If the raw directories and files are to be removed at the end of this # script, no need to copy the files since the raw directories are going # to be removed anyway. From 9ac85a30cf2cb20682398c9b488acb160e34ee08 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 7 Oct 2024 17:21:33 -0600 Subject: [PATCH 105/208] Fixes to WE2E test config files to reflect moving of variables from "platform" section to "verification" section in config_defaults.yaml. --- ...g.custom_ESGgrid_Great_Lakes_snow_8km.yaml | 5 +-- ...nsemble_verification_only_vx_time_lag.yaml | 10 ++--- ...7hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml | 37 ++++++++++--------- ...1hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml | 37 ++++++++++--------- ...24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml | 37 ++++++++++--------- ...24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml | 37 ++++++++++--------- ...24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 37 ++++++++++--------- ...24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml | 37 ++++++++++--------- ...96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 37 ++++++++++--------- ...c.init_00z_fcstlen_36hr.winter_wx.SRW.yaml | 37 ++++++++++--------- 10 files changed, 159 insertions(+), 152 deletions(-) diff --git a/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml b/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml index d773c632e2..0caffe5a46 100644 --- a/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml +++ b/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml @@ -55,10 +55,9 @@ task_run_fcst: task_run_post: POST_OUTPUT_DOMAIN_NAME: custom_ESGgrid_Michigan_Ontario verification: - VX_FCST_MODEL_NAME: Michigan_Ontario_snow_8km - VX_FIELDS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA", "ASNOW" ] -platform: CCPA_OBS_DIR: '{{ workflow.EXPTDIR }}/CCPA_obs' MRMS_OBS_DIR: '{{ workflow.EXPTDIR }}/MRMS_obs' NDAS_OBS_DIR: '{{ workflow.EXPTDIR }}/NDAS_obs' NOHRSC_OBS_DIR: '{{ workflow.EXPTDIR }}/NOHRSC_obs' + VX_FCST_MODEL_NAME: Michigan_Ontario_snow_8km + VX_FIELDS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA", "ASNOW" ] diff --git a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml index d0edccca01..f26ae7db21 100644 --- a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml @@ -28,14 +28,14 @@ global: NUM_ENS_MEMBERS: 2 ENS_TIME_LAG_HRS: '[ 0, 12 ]' -# If the following is commented out, then the obs files staged on each -# platform will be (found and) used. -platform: + +verification: + # If the following is commented out, then the obs files staged on each + # platform will be (found and) used. CCPA_OBS_DIR: '{{ workflow.EXPTDIR }}/obs_data/ccpa/proc' MRMS_OBS_DIR: '{{ workflow.EXPTDIR }}/obs_data/mrms/proc' NDAS_OBS_DIR: '{{ workflow.EXPTDIR }}/obs_data/ndas/proc' - -verification: + # VX_FCST_MODEL_NAME: FV3_GFS_v15p2_CONUS_25km VX_FCST_INPUT_BASEDIR: '{{ platform.get("TEST_VX_FCST_INPUT_BASEDIR") }}' VX_NDIGITS_ENSMEM_NAMES: 1 diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml index 418e47e95e..3286066021 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml @@ -15,24 +15,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -54,6 +36,25 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml index 913d5093bb..3963a616b4 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml @@ -17,24 +17,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -56,6 +38,25 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml index a859a03ac8..23035f3a92 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml @@ -15,24 +15,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -54,6 +36,25 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml index 2e180e2714..10ceddd9a8 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml @@ -17,24 +17,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -56,6 +38,25 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index 37c3eceb24..c4f62a679d 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -16,24 +16,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -55,6 +37,25 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml index 563b8852a8..55cbf5b13f 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml @@ -15,24 +15,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -54,6 +36,25 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index 514dbed8d3..20cab966ef 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -17,24 +17,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -56,6 +38,25 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml index 6069ce8212..10ff318dd9 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml @@ -16,24 +16,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -56,6 +38,25 @@ task_run_post: POST_OUTPUT_DOMAIN_NAME: 'custom_ESGgrid_Michigan_Ontario' verification: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'Michigan_Ontario_snow_8km' VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/SRW' From 09f8531580c6eee8f806aa6cb8bb99f110bdd7aa Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 7 Oct 2024 19:51:05 -0600 Subject: [PATCH 106/208] Fix bug found in latest develop branch. --- ush/get_crontab_contents.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/get_crontab_contents.py b/ush/get_crontab_contents.py index 5c651f3b0c..82bb350a0e 100644 --- a/ush/get_crontab_contents.py +++ b/ush/get_crontab_contents.py @@ -224,7 +224,7 @@ def _parse_args(argv): ) # Check that inputs are correct and consistent - args = parser._parse_args(argv) + args = parser.parse_args(argv) if args.remove: if args.line is None: From b43a9d223b5054e74dd7e0f6d2a3e89e78ed9574 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 8 Oct 2024 10:15:22 -0600 Subject: [PATCH 107/208] Fix up documentation and comments. Minor code changes. --- ush/config_defaults.yaml | 3 +- ush/get_obs.py | 465 ++++++++++++++++++++++----------------- 2 files changed, 263 insertions(+), 205 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index e50e51406d..1e967ef9e4 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2419,7 +2419,8 @@ verification: # NOAA's HPSS. # # Note that MRMS files are in fact available every few minutes, but here - # we set the obs availability interval to 1 hour because the forecast + # we set the obs availability interval to 1 hour because currently that + # is the shortest output interval for the forecast, i.e. the forecast # cannot (yet) support sub-hourly output. # CCPA_OBS_AVAIL_INTVL_HRS: 1 diff --git a/ush/get_obs.py b/ush/get_obs.py index b70d8c3ea9..c831ad909e 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -110,92 +110,184 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): def get_obs(config, obtype, yyyymmdd_task): """ -This script performs several important tasks for preparing data for -verification tasks. Depending on the value of the environment variable -OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data -set. + This script checks for the existence of obs files of the specified type + at the locations specified by variables in the SRW App's configuration + file. If one or more of these files do not exist, it retrieves them from + a data store and places them in the locations specified by the configuration + variables, renaming them if necessary. -If data is not available on disk (in the location specified by -CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), -the script attempts to retrieve the data from HPSS using the retrieve_data.py -script. Depending on the data set, there are a few strange quirks and/or -bugs in the way data is organized; see in-line comments for details. - - -CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs ----------- -If data is available on disk, it must be in the following -directory structure and file name conventions expected by verification -tasks: - -{CCPA_OBS_DIR}/{YYYYMMDD}/ccpa.t{HH}z.01h.hrap.conus.gb2 - -If data is retrieved from HPSS, it will be automatically staged by this -script. - -Notes about the data and how it's used for verification: - -1. Accumulation is currently hardcoded to 01h. The verification will -use MET/pcp-combine to sum 01h files into desired accumulations. - -2. There is a problem with the valid time in the metadata for files -valid from 19 - 00 UTC (or files under the '00' directory). This is -accounted for in this script for data retrieved from HPSS, but if you -have manually staged data on disk you should be sure this is accounted -for. See in-line comments below for details. - - -MRMS (Multi-Radar Multi-Sensor) radar observations ----------- -If data is available on disk, it must be in the following -directory structure and file name conventions expected by verification -tasks: - -{MRMS_OBS_DIR}/{YYYYMMDD}/[PREFIX]{YYYYMMDD}-{HH}0000.grib2, - -Where [PREFIX] is MergedReflectivityQCComposite_00.50_ for reflectivity -data and EchoTop_18_00.50_ for echo top data. If data is not available -at the top of the hour, you should rename the file closest in time to -your hour(s) of interest to the above naming format. A script -"ush/mrms_pull_topofhour.py" is provided for this purpose. - -If data is retrieved from HPSS, it will automatically staged by this -this script. - - -NDAS (NAM Data Assimilation System) conventional observations ----------- -If data is available on disk, it must be in the following -directory structure and file name conventions expected by verification -tasks: - -{NDAS_OBS_DIR}/{YYYYMMDD}/prepbufr.ndas.{YYYYMMDDHH} - -Note that data retrieved from HPSS and other sources may be in a -different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is -either 00, 06, 12, or 18, and prevhour is the number of hours prior to -hh (00 through 05). If using custom staged data, you will have to -rename the files accordingly. - -If data is retrieved from HPSS, it will be automatically staged by this -this script. - - -NOHRSC snow accumulation observations ----------- -If data is available on disk, it must be in the following -directory structure and file name conventions expected by verification -tasks: - -{NOHRSC_OBS_DIR}/{YYYYMMDD}/sfav2_CONUS_{AA}h_{YYYYMMDD}{HH}_grid184.grb2 + Args: + config: + The final configuration dictionary (obtained from var_defns.yaml). -where AA is the 2-digit accumulation duration in hours: 06 or 24 + obtype: + The observation type. A string. -METplus is configured to verify snowfall using 06- and 24-h accumulated -snowfall from 6- and 12-hourly NOHRSC files, respectively. + yyyymmdd_task: + The date for which obs may be needed. A datetime object. -If data is retrieved from HPSS, it will automatically staged by this -this script. + Returns: + True if all goes well. + + + Detailed Description: + + In this script, the main (outer) loop to obtain obs files is over a + sequence of archive hours, where each archive hour in the sequence + represents one archive (tar) file in the data store, and archive hours + are with respect to hour 0 of the day. The number of archive hours in + this sequence depends on how the obs files are arranged into archives + for the given obs type. For example, if the obs files for a given day + are arranged into four archives, then the archive interval is 6 hours, + and in order to get all the obs files for that day, the loop must + iterate over a sequence of 4 hours, either [0, 6, 12, 18] or [6, 12, + 18, 24] (which of these it will be depends on how the obs files are + arranged into the archives). + + Below, we give a description of archive layout for each obs type and + give the archive hours to loop over for the case in which we need to + obtain all available obs for the current day. + + + CCPA (Climatology-Calibrated Precipitation Analysis) precipitation + accumulation obs: + ---------- + For CCPA, the archive interval is 6 hours, i.e. the obs files are bundled + into 6-hourly archives. The archives are organized such that each one + contains 6 files, so that the obs availability interval is + + obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] + = 1 hr/file + + i.e. there is one obs file for each hour of the day containing the + accumulation over that one hour. The archive corresponding to hour 0 + of the current day contains 6 files representing accumulations during + the 6 hours of the previous day. The archive corresponding to hour 6 + of the current day contains 6 files for the accumulations during the + first 6 hours of the current day, and the archives corresponding to + hours 12 and 18 of the current day each contain 6 files for accumulations + during hours 6-12 and 12-18, respectively, of the current day. Thus, + to obtain all the one-hour accumulations for the current day, we must + extract all the obs files from the three archives corresponding to hours + 6, 12, and 18 of the current day and from the archive corresponding to + hour 0 of the next day. This corresponds to an archive hour sequence + of [6, 12, 18, 24]. Thus, in the simplest case in which the observation + retrieval times include all hours of the current task's day at which + obs files are available and none of the obs files for this day already + exist on disk, this sequence will be [6, 12, 18, 24]. In other cases, + the sequence we loop over will be a subset of [6, 12, 18, 24]. + + Note that CCPA files for 1-hour accumulation have incorrect metadata in + the files under the "00" directory (i.e. for hours-of-day 19 to 00 of + the next day) from 20180718 to 20210504. This script corrects these + errors if getting CCPA obs at these times. + + + NOHRSC (National Operational Hydrologic Remote Sensing Center) snow + accumulation observations: + ---------- + For NOHRSC, the archive interval is 24 hours, i.e. the obs files are + bundled into 24-hourly archives. The archives are organized such that + each one contains 4 files, so that the obs availability interval is + + obs_avail_intvl_hrs = (24 hrs)/[(1 archive)*(4 files/archive)] + = 6 hr/file + + i.e. there is one obs file for each 6-hour interval of the day containing + the accumulation over those 6 hours. The 4 obs files within each archive + correspond to hours 0, 6, 12, and 18 of the current day. The obs file + for hour 0 contains accumulations during the last 6 hours of the previous + day, while those for hours 6, 12, and 18 contain accumulations for the + first, second, and third 6-hour chunks of the current day. Thus, to + obtain all the 6-hour accumulations for the current day, we must extract + from the archive for the current day the obs files for hours 6, 12, and + 18 and from the archive for the next day the obs file for hour 0. This + corresponds to an archive hour sequence of [0, 24]. Thus, in the simplest + case in which the observation retrieval times include all hours of the + current task's day at which obs files are available and none of the obs + files for this day already exist on disk, this sequence will be [0, 24]. + In other cases, the sequence we loop over will be a subset of [0, 24]. + + + MRMS (Multi-Radar Multi-Sensor) radar observations: + ---------- + For MRMS, the archive interval is 24 hours, i.e. the obs files are + bundled into 24-hourly archives. The archives are organized such that + each contains gzipped grib2 files for that day that are usually only a + few minutes apart. However, since the forecasts cannot (yet) perform + sub-hourly output, we filter this data in time by using only those obs + files that are closest to each hour of the day for which obs are needed. + This effectively sets the obs availability interval for MRMS to one + hour, i.e. + + obs_avail_intvl_hrs = 1 hr/file + + i.e. there is one obs file for each hour of the day containing values + at that hour (but only after filtering in time; also see notes for + MRMS_OBS_AVAIL_INTVL_HRS in config_defaults.yaml). Thus, to obtain the + obs at all hours of the day, we only need to extract files from one + archive. Thus, in the simplest case in which the observation retrieval + times include all hours of the current task's day at which obs files + are available and none of the obs files for this day already exist on + disk, the sequence of archive hours over which we loop will be just + [0]. Note that: + + * For cases in which MRMS data are not needed for all hours of the day, + we still need to retrieve and extract from this single daily archive. + Thus, the archive hour sequence over which we loop over will always + be just [0] for MRMS obs. + + * Because MRMS obs are split into two sets of archives -- one for + composite reflectivity (REFC) and another for echo top (RETOP) -- + on any given day (and with an archive hour of 0) we actually retrive + and extract two different archive files (one per field). + + + NDAS (NAM Data Assimilation System) conventional observations: + ---------- + For NDAS, the archive interval is 6 hours, i.e. the obs files are + bundled into 6-hourly archives. The archives are organized such that + each one contains 7 files (not say 6). The archive associated with + time yyyymmddhh_arcv contains the hourly files at + + yyyymmddhh_arcv - 6 hours + yyyymmddhh_arcv - 5 hours + ... + yyyymmddhh_arcv - 2 hours + yyyymmddhh_arcv - 1 hours + yyyymmddhh_arcv - 0 hours + + These are known as the tm06, tm05, ..., tm02, tm01, and tm00 files, + respectively. Thus, the tm06 file from the current archive, say the + one associated with time yyyymmddhh_arcv, has the same valid time as + the tm00 file from the previous archive, i.e. the one associated with + time (yyyymmddhh_arcv - 6 hours). It turns out that the tm06 file from + the current archive contains more/better observations than the tm00 + file from the previous archive. Thus, for a given archive time + yyyymmddhh_arcv, we use 6 of the 7 files at tm06, ..., tm01 but not + the one at tm00, effectively resulting in 6 files per archive for NDAS + obs. The obs availability interval is then + + obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] + = 1 hr/file + + i.e. there is one obs file for each hour of the day containing values + at that hour. The archive corresponding to hour 0 of the current day + contains 6 files valid at hours 18 through 23 of the previous day. The + archive corresponding to hour 6 of the current day contains 6 files + valid at hours 0 through 5 of the current day, and the archives + corresponding to hours 12 and 18 of the current day each contain 6 + files valid at hours 6 through 11 and 12 through 17 of the current day. + Thus, to obtain all the hourly values for the current day (from hour + 0 to hour 23), we must extract the 6 obs files (excluding the tm00 + ones) from the three archives corresponding to hours 6, 12, and 18 of + the current day and the archive corresponding to hour 0 of the next + day. This corresponds to an archive hour sequence set below of [6, 12, + 18, 24]. Thus, in the simplest case in which the observation retrieval + times include all hours of the current task's day at which obs files + are available and none of the obs files for this day already exist on + disk, this sequence will be [6, 12, 18, 24]. In other cases, the + sequence we loop over will be a subset of [6, 12, 18, 24]. """ # Convert obtype to upper case to simplify code below. @@ -355,100 +447,6 @@ def get_obs(config, obtype, yyyymmdd_task): # To generate this sequence, we first set the archive interval and then # set the starting and ending archive hour values. # - # For CCPA, the archive interval is 6 hours, i.e. the obs files are - # bundled into 6-hourly archives. This implies 4 archives per day. The - # archives are organized such that each one contains 6 files, so that the - # obs availability interval is - # - # obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] - # = 1 hr/file - # - # i.e. there is one obs file for each hour of the day containing the - # accumulation over that one hour. The archive corresponding to hour 0 - # of the current day contains 6 files representing accumulations during - # the 6 hours of the previous day. The archive corresponding to hour 6 - # of the current day corresponds to accumulations during the first 6 - # hours of the current day, and the archives corresponding to hours 12 - # and 18 of the current day correspond to accumulations during the 2nd - # and 3rd 6-hourly intervals of the current day. Thus, to obtain all the - # one-hour accumulations for the current day, we must extract all the obs - # files from the archives corresponding to hours 6, 12, and 18 of the - # current day and hour 0 of the next day. This corresponds to an archive - # hour sequence set below of [6, 12, 18, 24]. Thus, in the simplest case - # in which the observation retrieval times include all hours of the - # current task's day at which obs files are available and none of the obs - # files for this day already exist on disk, this sequence will be [6, 12, - # 18, 24]. In other cases, the sequence we loop over will be a subset of - # [6, 12, 18, 24]. - # - # For NOHRSC, the archive interval is 24 hours, i.e. the obs files are - # bundled into 24-hourly archives. This implies just 1 archive per day. - # The archives are organized such that each one contains 4 files, so that - # the obs availability interval is - # - # obs_avail_intvl_hrs = (24 hrs)/[(1 archive)*(4 files/archive)] - # = 6 hr/file - # - # i.e. there is one obs file for each 6-hour interval of the day containing - # the accumulation over those 6 hours. The 4 obs files within each archive - # correspond to hours 0, 6, 12, and 18 of the current day. The obs file - # for hour 0 contains accumulations during the last 6 hours of the previous - # day, while those for hours 6, 12, and 18 contain accumulations for the - # first, second, and third 6-hour chunks of the current day. Thus, to - # obtain all the 6-hour accumulations for the current day, we must extract - # from the archive for the current day the obs files for hours 6, 12, and - # 18 and from the archive for the next day the obs file for hour 0. This - # corresponds to an archive hour sequence set below of [0, 24]. Thus, in - # the simplest case in which the observation retrieval times include all - # hours of the current task's day at which obs files are available and - # none of the obs files for this day already exist on disk, this sequence - # will be [0, 24]. In other cases, the sequence we loop over will be a - # subset of [0, 24]. - # - # For NDAS, the archive interval is 6 hours, i.e. the obs files are - # bundled into 6-hourly archives. This implies 4 archives per day. The - # archives are organized such that each one contains 7 files (not say 6). - # The archive associated with time yyyymmddhh_arcv contains the hourly - # files at - # - # yyyymmddhh_arcv - 6 hours - # yyyymmddhh_arcv - 5 hours - # ... - # yyyymmddhh_arcv - 2 hours - # yyyymmddhh_arcv - 1 hours - # yyyymmddhh_arcv - 0 hours - # - # These are known as the tm06, tm05, ..., tm02, tm01, and tm00 files, - # respectively. Thus, the tm06 file from the current archive, say the - # one associated with time yyyymmddhh_arcv, has the same valid time as - # the tm00 file from the previous archive, i.e. the one associated with - # time (yyyymmddhh_arcv - 6 hours). It turns out the tm06 file from the - # current archive contains more/better observations than the tm00 file - # from the previous archive. Thus, for a given archive time yyyymmddhh_arcv, - # we use 6 of the 7 files at tm06, ..., tm01 but not the one at tm00, - # effectively resulting in an 6 files per archive for NDAS obs. The obs - # availability interval is then - # - # obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] - # = 1 hr/file - # - # i.e. there is one obs file for each hour of the day containing values - # at that hour. The archive corresponding to hour 0 of the current day - # contains 6 files valid at hours 18 through 23 of the previous day. The - # archive corresponding to hour 6 of the current day contains 6 files - # valid at hours 0 through 5 of the current day, and the archives - # corresponding to hours 12 and 18 of the current day each contain 6 - # files valid at hours 6 through 11 and 12 through 17 of the current day. - # Thus, to obtain all the hourly values for the current day (from hour - # 0 to hour 23), we must extract the 6 obs files (excluding the tm00 - # ones) from the archives corresponding to hours 6, 12, and 18 of the - # current day and the archive corresponding to hour 0 of the next day. - # This corresponds to an archive hour sequence set below of [6, 12, 18, - # 24]. Thus, in the simplest case in which the observation retrieval - # times include all hours of the current task's day at which obs files - # are available and none of the obs files for this day already exist on - # disk, this sequence will be [6, 12, 18, 24]. In other cases, the - # sequence we loop over will be a subset of [6, 12, 18, 24]. # #----------------------------------------------------------------------- # @@ -628,24 +626,62 @@ def get_obs(config, obtype, yyyymmdd_task): yyyymmddhh_arcv_str = dt.datetime.strftime(yyyymmddhh_arcv, '%Y%m%d%H') yyyymmdd_arcv_str = dt.datetime.strftime(yyyymmddhh_arcv, '%Y%m%d') - # Directory that will contain the files retrieved from the current archive - # file. We refer to this as the "raw" archive directory because it will - # contain the files as they are in the archive before any processing by - # this script. + # Set the subdirectory under the raw base directory that will contain the + # files retrieved from the current archive. We refer to this as the "raw" + # archive sudirectory because it will contain the files as they are in + # the archive before any processing by this script. Later below, this + # will be combined with the raw base directory (whose name depends on the + # year, month, and day of the current obs day) to obtain the full path to + # the raw archive directory (arcv_dir_raw). + # + # Notes on each obs type: + # + # CCPA: + # The raw subdirectory name must include the year, month, day, and hour + # in order to avoid get_obs tasks for different days clobbering each + # others' obs files. + # + # NOHRSC: + # The hour-of-day of the archive is irrelevant because there is only one + # archive per day, so we don't include it in the raw archive subdirectory's + # name. However, we still need a subdirectory that contains the year, + # month, and day information of the archive because in the simplest case + # of having to get the NOHRSC obs for all hours of the current obs day, + # we need to extract obs files from two archives -- one for the current + # day (which includes the files for accumulations over hours 0-6, 6-12, + # and 12-18 of the current day) and another for the next day (which + # includes the file for accumulations over hours 18-24 of the current + # day). To distinguish between the raw obs files from these two archives, + # we create an archive-time dependent raw subdirectory for each possible + # archive. + # + # MRMS: + # There is only one archive per day, and it contains all the raw obs + # files needed to generate processed obs files for all hours of the + # current day. Thus, we will only ever need this one archive, so there + # is no need to include the archive's hour information (there really + # isn't any) in the raw subdirectory name. In addition, the archive's + # year, month, and day is the same as that of the obs day's, so it is + # already included in the name of the raw base directory. Sine this is + # the only info we need to avoid differnt get_obs tasks clobbering each + # other's output obs files, for simplicity we simply do not create a raw + # archive subdirectory. + # + # NDAS: + # Same as for CCPA. if obtype == 'CCPA': - arcv_dir_raw = os.path.join(basedir_raw, yyyymmddhh_arcv_str) - # For NOHRSC, the hour-of-day for the archive is irrelevant since there - # is only one archive per day, so don't include it in the raw archive - # directory's name. + arcv_subdir_raw = yyyymmddhh_arcv_str elif obtype == 'NOHRSC': - arcv_dir_raw = os.path.join(basedir_raw, yyyymmdd_arcv_str) - # Since for MRMS data there is only one archive per day, that directory - # is redundant, so for simplicity we set arcv_dir_raw to just basedir_raw. + arcv_subdir_raw = yyyymmdd_arcv_str elif obtype == 'MRMS': - arcv_dir_raw = basedir_raw + arcv_subdir_raw = '' elif obtype == 'NDAS': arcv_dir_raw = os.path.join(basedir_raw, yyyymmddhh_arcv_str) + # Combine the raw archive base directory with the raw archive subdirectory + # name to obtain the full path to the raw archive directory. + arcv_dir_raw = os.path.join(basedir_raw, arcv_subdir_raw) + # Check whether any of the obs retrieval times for the day associated with # this task fall in the time interval spanned by the current archive. If # so, set the flag (do_retrieve) to retrieve the files in the current @@ -739,23 +775,44 @@ def get_obs(config, obtype, yyyymmdd_task): # Loop over the raw obs files extracted from the current archive and # generate from them the processed obs files. # - # For CCPA obs, for most dates this consists of simply copying or moving - # the files from the raw archive directory to the processed directory, - # possibly renaming them in the process. However, for dates between - # 20180718 and 20210504 and hours-of-day 19 through the end of the day - # (i.e. hour 0 of the next day), it involves using wgrib2 to correct an + # Notes on each obs type: + # + # CCPA: + # For most dates, generating the processed obs files consists of simply + # copying or moving the files from the raw archive directory to the processed + # directory, possibly renaming them in the process. However, for dates + # between 20180718 and 20210504 and hours-of-day 19 through the end of the + # day (i.e. hour 0 of the next day), it involves using wgrib2 to correct an # error in the metadata of the raw file and writing the corrected data # to a new grib2 file in the processed location. - # - # For NOHRSC obs, this consists of simply copying or moving the files from - # the raw archive directory to the processed directory, possibly renaming - # them in the process. - # - # For NDAS obs, this consists of simply copying or moving the files from - # the raw archive directory to the processed directory, possibly renaming - # them in the process. Note that the tm06 file in a given archive contain - # more/better observations than the tm00 file in the next archive (their - # valid times are equivalent), so we use the tm06 files. + # + # NOHRSC: + # Generating the processed obs files consists of simply copying or moving + # the files from the raw archive directory to the processed directory, + # possibly renaming them in the process. + # + # MRMS: + # The MRMS obs are in fact available every few minutes, but the smallest + # value we allow the obs availability interval to be set to is 1 hour + # because the forecasts cannot (yet) perform sub-hourly output (also see + # notes for MRMS_OBS_AVAIL_INTVL_HRS in config_defaults.yaml). For this + # reason, MRMS obs require an extra processing step on the raw files (before + # creating the processed files). In this step, at each obs retrieval time + # we first generate an intermediate grib2 file from the set of all raw (and + # gzipped) grib2 files for the current day (the latter usually being only a + # few minutes apart) the file that is nearest in time to the obs retrieval + # time. After selecting this gzipped grib2 file, we unzip it and place it + # in a temporary subdirectory under the raw base directory. Only after this + # step do we then generate the processed file by moving this intermediate + # file to the processed directory, possibly renaming it in the process. + # + # NDAS: + # Generating the processed obs files consists of simply copying or moving + # the files from the raw archive directory to the processed directory, + # possibly renaming them in the process. Note that for a given NDAS archive, + # the tm06 file in a contains more/better observations than the tm00 file + # in the previous archive (their valid times being equivalent), so we always + # use the tm06 files. for yyyymmddhh in obs_times_in_arcv: # Create the processed obs file from the raw one (by moving, copying, or @@ -870,7 +927,7 @@ def get_obs(config, obtype, yyyymmdd_task): def parse_args(argv): - """Parse command line arguments""" + """Parse command line arguments.""" parser = argparse.ArgumentParser( description="Get observations." ) From 50729f5dc02a20c982653a97025bbdfecd256d90 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 8 Oct 2024 10:56:36 -0600 Subject: [PATCH 108/208] Bug fix. --- ush/get_obs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/get_obs.py b/ush/get_obs.py index c831ad909e..d7833a70f7 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -676,7 +676,7 @@ def get_obs(config, obtype, yyyymmdd_task): elif obtype == 'MRMS': arcv_subdir_raw = '' elif obtype == 'NDAS': - arcv_dir_raw = os.path.join(basedir_raw, yyyymmddhh_arcv_str) + arcv_subdir_raw = yyyymmddhh_arcv_str # Combine the raw archive base directory with the raw archive subdirectory # name to obtain the full path to the raw archive directory. From 601284359644fd5547d91be0dc6c828bfa26814a Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 8 Oct 2024 11:30:29 -0600 Subject: [PATCH 109/208] Remove commented-out lines; remove trailing whitespace. --- parm/metplus/PcpCombine.conf | 1 - ush/get_obs.py | 76 ++++++++++++++++++------------------ 2 files changed, 38 insertions(+), 39 deletions(-) diff --git a/parm/metplus/PcpCombine.conf b/parm/metplus/PcpCombine.conf index de99871bed..04562dc14b 100644 --- a/parm/metplus/PcpCombine.conf +++ b/parm/metplus/PcpCombine.conf @@ -126,7 +126,6 @@ FCST_PCP_COMBINE_RUN = False # # Accumulation interval available in the input data. # -#{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_ACCUMS = 01 {{FCST_OR_OBS}}_PCP_COMBINE_INPUT_ACCUMS = {{input_accum_hh}} # # Accumulation interval to generate in the output file. diff --git a/ush/get_obs.py b/ush/get_obs.py index d7833a70f7..50b7c45ae3 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -42,7 +42,7 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): hod: The hour of the day. An integer. This must be between 0 and 23. For - cumulative fields (CCPA and NOHRSC), hour 0 is treated as that of the + cumulative fields (CCPA and NOHRSC), hour 0 is treated as that of the next day, i.e. as the 24th hour of the current day. Returns: @@ -132,7 +132,7 @@ def get_obs(config, obtype, yyyymmdd_task): Detailed Description: - In this script, the main (outer) loop to obtain obs files is over a + In this script, the main (outer) loop to obtain obs files is over a sequence of archive hours, where each archive hour in the sequence represents one archive (tar) file in the data store, and archive hours are with respect to hour 0 of the day. The number of archive hours in @@ -143,22 +143,22 @@ def get_obs(config, obtype, yyyymmdd_task): iterate over a sequence of 4 hours, either [0, 6, 12, 18] or [6, 12, 18, 24] (which of these it will be depends on how the obs files are arranged into the archives). - + Below, we give a description of archive layout for each obs type and give the archive hours to loop over for the case in which we need to obtain all available obs for the current day. - - + + CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs: ---------- For CCPA, the archive interval is 6 hours, i.e. the obs files are bundled into 6-hourly archives. The archives are organized such that each one contains 6 files, so that the obs availability interval is - + obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] = 1 hr/file - + i.e. there is one obs file for each hour of the day containing the accumulation over that one hour. The archive corresponding to hour 0 of the current day contains 6 files representing accumulations during @@ -176,23 +176,23 @@ def get_obs(config, obtype, yyyymmdd_task): obs files are available and none of the obs files for this day already exist on disk, this sequence will be [6, 12, 18, 24]. In other cases, the sequence we loop over will be a subset of [6, 12, 18, 24]. - + Note that CCPA files for 1-hour accumulation have incorrect metadata in the files under the "00" directory (i.e. for hours-of-day 19 to 00 of the next day) from 20180718 to 20210504. This script corrects these errors if getting CCPA obs at these times. - - + + NOHRSC (National Operational Hydrologic Remote Sensing Center) snow accumulation observations: ---------- For NOHRSC, the archive interval is 24 hours, i.e. the obs files are bundled into 24-hourly archives. The archives are organized such that - each one contains 4 files, so that the obs availability interval is - + each one contains 4 files, so that the obs availability interval is + obs_avail_intvl_hrs = (24 hrs)/[(1 archive)*(4 files/archive)] = 6 hr/file - + i.e. there is one obs file for each 6-hour interval of the day containing the accumulation over those 6 hours. The 4 obs files within each archive correspond to hours 0, 6, 12, and 18 of the current day. The obs file @@ -201,14 +201,14 @@ def get_obs(config, obtype, yyyymmdd_task): first, second, and third 6-hour chunks of the current day. Thus, to obtain all the 6-hour accumulations for the current day, we must extract from the archive for the current day the obs files for hours 6, 12, and - 18 and from the archive for the next day the obs file for hour 0. This + 18 and from the archive for the next day the obs file for hour 0. This corresponds to an archive hour sequence of [0, 24]. Thus, in the simplest case in which the observation retrieval times include all hours of the current task's day at which obs files are available and none of the obs files for this day already exist on disk, this sequence will be [0, 24]. In other cases, the sequence we loop over will be a subset of [0, 24]. - - + + MRMS (Multi-Radar Multi-Sensor) radar observations: ---------- For MRMS, the archive interval is 24 hours, i.e. the obs files are @@ -219,9 +219,9 @@ def get_obs(config, obtype, yyyymmdd_task): files that are closest to each hour of the day for which obs are needed. This effectively sets the obs availability interval for MRMS to one hour, i.e. - + obs_avail_intvl_hrs = 1 hr/file - + i.e. there is one obs file for each hour of the day containing values at that hour (but only after filtering in time; also see notes for MRMS_OBS_AVAIL_INTVL_HRS in config_defaults.yaml). Thus, to obtain the @@ -231,33 +231,33 @@ def get_obs(config, obtype, yyyymmdd_task): are available and none of the obs files for this day already exist on disk, the sequence of archive hours over which we loop will be just [0]. Note that: - + * For cases in which MRMS data are not needed for all hours of the day, we still need to retrieve and extract from this single daily archive. Thus, the archive hour sequence over which we loop over will always be just [0] for MRMS obs. - + * Because MRMS obs are split into two sets of archives -- one for composite reflectivity (REFC) and another for echo top (RETOP) -- on any given day (and with an archive hour of 0) we actually retrive and extract two different archive files (one per field). - - + + NDAS (NAM Data Assimilation System) conventional observations: ---------- For NDAS, the archive interval is 6 hours, i.e. the obs files are bundled into 6-hourly archives. The archives are organized such that each one contains 7 files (not say 6). The archive associated with - time yyyymmddhh_arcv contains the hourly files at - + time yyyymmddhh_arcv contains the hourly files at + yyyymmddhh_arcv - 6 hours yyyymmddhh_arcv - 5 hours ... yyyymmddhh_arcv - 2 hours yyyymmddhh_arcv - 1 hours yyyymmddhh_arcv - 0 hours - - These are known as the tm06, tm05, ..., tm02, tm01, and tm00 files, + + These are known as the tm06, tm05, ..., tm02, tm01, and tm00 files, respectively. Thus, the tm06 file from the current archive, say the one associated with time yyyymmddhh_arcv, has the same valid time as the tm00 file from the previous archive, i.e. the one associated with @@ -267,10 +267,10 @@ def get_obs(config, obtype, yyyymmdd_task): yyyymmddhh_arcv, we use 6 of the 7 files at tm06, ..., tm01 but not the one at tm00, effectively resulting in 6 files per archive for NDAS obs. The obs availability interval is then - + obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] = 1 hr/file - + i.e. there is one obs file for each hour of the day containing values at that hour. The archive corresponding to hour 0 of the current day contains 6 files valid at hours 18 through 23 of the previous day. The @@ -327,7 +327,7 @@ def get_obs(config, obtype, yyyymmdd_task): # observation files that we need for verification. Each group of fields # is one that is verified together in the workflow. We assume there is # a separate set of obs files for each such field group in the observations, - # and in the code below we loop over these sets of files as necessary. + # and in the code below we loop over these sets of files as necessary. # There are several scenarios to consider: # # * An obs type consists of only one set of files containing only one @@ -336,7 +336,7 @@ def get_obs(config, obtype, yyyymmdd_task): # set of files that contain APCP data, and NOHRSC obs consist of only # one set of files that contain ASNOW data. # - # * An obs type consists of more than one set of files, with each file + # * An obs type consists of more than one set of files, with each file # containing a different field. # This is the case for MRMS obs. These consist of two sets of files. # The first set contains REFC data, and the second contains RETOP data. @@ -344,13 +344,13 @@ def get_obs(config, obtype, yyyymmdd_task): # * An obs type consists of only one set of files, but each file contains # multiple groups of fields needed for verification. # This is the case for NDAS obs. These consist of a single set of files, - # but each file contains both the ADPSFC fields (like 2-m temperature) + # but each file contains both the ADPSFC fields (like 2-m temperature) # and ADPUPA fields (like 500-mb temperature) that are verified separately # in the workflow tasks and thus are considered separate field groups. # # Other obs type and field group scenarios are also possible, but we do # not describe them since they are not applicable to any of the obs types - # considered here. + # considered here. if obtype == 'CCPA': field_groups_in_obs = ['APCP'] elif obtype == 'NOHRSC': @@ -659,7 +659,7 @@ def get_obs(config, obtype, yyyymmdd_task): # There is only one archive per day, and it contains all the raw obs # files needed to generate processed obs files for all hours of the # current day. Thus, we will only ever need this one archive, so there - # is no need to include the archive's hour information (there really + # is no need to include the archive's hour information (there really # isn't any) in the raw subdirectory name. In addition, the archive's # year, month, and day is the same as that of the obs day's, so it is # already included in the name of the raw base directory. Sine this is @@ -785,12 +785,12 @@ def get_obs(config, obtype, yyyymmdd_task): # day (i.e. hour 0 of the next day), it involves using wgrib2 to correct an # error in the metadata of the raw file and writing the corrected data # to a new grib2 file in the processed location. - # + # # NOHRSC: # Generating the processed obs files consists of simply copying or moving # the files from the raw archive directory to the processed directory, # possibly renaming them in the process. - # + # # MRMS: # The MRMS obs are in fact available every few minutes, but the smallest # value we allow the obs availability interval to be set to is 1 hour @@ -800,12 +800,12 @@ def get_obs(config, obtype, yyyymmdd_task): # creating the processed files). In this step, at each obs retrieval time # we first generate an intermediate grib2 file from the set of all raw (and # gzipped) grib2 files for the current day (the latter usually being only a - # few minutes apart) the file that is nearest in time to the obs retrieval + # few minutes apart) the file that is nearest in time to the obs retrieval # time. After selecting this gzipped grib2 file, we unzip it and place it # in a temporary subdirectory under the raw base directory. Only after this # step do we then generate the processed file by moving this intermediate # file to the processed directory, possibly renaming it in the process. - # + # # NDAS: # Generating the processed obs files consists of simply copying or moving # the files from the raw archive directory to the processed directory, @@ -832,7 +832,7 @@ def get_obs(config, obtype, yyyymmdd_task): # For MRMS obs, first select from the set of raw files for the current day # those that are nearest in time to the current hour. Unzip these in a # temporary subdirectory under the raw base directory. - # + # # Note that the script we call to do this (mrms_pull_topofhour.py) assumes # a certain file naming convention. That convention must match the names # of the files that the retrieve_data.py script called above ends up From 1c924a896664e6a815398c11299709bf4bf82465 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 8 Oct 2024 14:13:08 -0600 Subject: [PATCH 110/208] Remove unnecessary bash utility function. --- ush/bash_utils/ceil.sh | 122 --------------------------------------- ush/source_util_funcs.sh | 9 --- 2 files changed, 131 deletions(-) delete mode 100644 ush/bash_utils/ceil.sh diff --git a/ush/bash_utils/ceil.sh b/ush/bash_utils/ceil.sh deleted file mode 100644 index dc8a21c90d..0000000000 --- a/ush/bash_utils/ceil.sh +++ /dev/null @@ -1,122 +0,0 @@ -# -#----------------------------------------------------------------------- -# -# This function returns the ceiling of the quotient of two numbers. The -# ceiling of a number is the number rounded up to the nearest integer. -# -#----------------------------------------------------------------------- -# -function ceil() { -# -#----------------------------------------------------------------------- -# -# Save current shell options (in a global array). Then set new options -# for this script/function. -# -#----------------------------------------------------------------------- -# - { save_shell_opts; . ${USHdir}/preamble.sh; } > /dev/null 2>&1 -# -#----------------------------------------------------------------------- -# -# Get the full path to the file in which this script/function is located -# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in -# which the file is located (scrfunc_dir). -# -#----------------------------------------------------------------------- -# - local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) - local scrfunc_fn=$( basename "${scrfunc_fp}" ) - local scrfunc_dir=$( dirname "${scrfunc_fp}" ) -# -#----------------------------------------------------------------------- -# -# Get the name of this function. -# -#----------------------------------------------------------------------- -# - local func_name="${FUNCNAME[0]}" -# -#----------------------------------------------------------------------- -# -# Check number of arguments. -# -#----------------------------------------------------------------------- -# - if [ "$#" -ne 2 ]; then - - print_err_msg_exit " -Incorrect number of arguments specified: - - Function name: \"${func_name}\" - Number of arguments specified: $# - -Usage: - - ${func_name} numer denom - -where denom is a nonnegative integer and denom is a positive integer. -" - - fi -# -#----------------------------------------------------------------------- -# -# Make sure arguments are of the right form. -# -#----------------------------------------------------------------------- -# - local numer="$1" - local denom="$2" - - if ! [[ "${numer}" =~ ^[0-9]+$ ]]; then - print_err_msg_exit " -The first argument to the \"${func_name}\" function (numer) must be a nonnegative -integer but isn't: - numer = ${numer} -" - fi - - if [[ "${denom}" -eq 0 ]]; then - print_err_msg_exit " -The second argument to the \"${func_name}\" function (denom) cannot be zero: - denom = ${denom} -" - fi - - if ! [[ "${denom}" =~ ^[0-9]+$ ]]; then - print_err_msg_exit " -The second argument to the \"${func_name}\" function (denom) must be a positive -integer but isn't: - denom = ${denom} -" - fi -# -#----------------------------------------------------------------------- -# -# Let ceil(a,b) denote the ceiling of the quotient of a and b. It can be -# shown that for two positive integers a and b, we have: -# -# ceil(a,b) = floor((a+b-1)/b) -# -# where floor(a,b) is the integer obtained by rounding the quotient of -# a and b (i.e. a/b) down to the nearest integer. Since in bash a -# division returns only the integer part of the result, it is effectively -# the floor function. Thus the following. -# -#----------------------------------------------------------------------- -# - result=$(( (numer+denom-1)/denom )) - print_info_msg "${result}" -# -#----------------------------------------------------------------------- -# -# Restore the shell options saved at the beginning of this script/func- -# tion. -# -#----------------------------------------------------------------------- -# - { restore_shell_opts; } > /dev/null 2>&1 - -} - diff --git a/ush/source_util_funcs.sh b/ush/source_util_funcs.sh index ef7c669910..9feceaf68e 100644 --- a/ush/source_util_funcs.sh +++ b/ush/source_util_funcs.sh @@ -96,15 +96,6 @@ function source_util_funcs() { # #----------------------------------------------------------------------- # -# Source the file containing the function that returns the ceiling of -# the quotient of two positive integers. -# -#----------------------------------------------------------------------- -# - . ${bashutils_dir}/ceil.sh -# -#----------------------------------------------------------------------- -# # Source the file containing the functions that will echo given strings # as uppercase or lowercase # From 9435f7f29accbee047ac5a8c0c679996e03ac9d7 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 8 Oct 2024 14:14:10 -0600 Subject: [PATCH 111/208] Clean up comments, remove commented-out code. --- ush/run_eval_METplus_timestr_tmpl.sh | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/ush/run_eval_METplus_timestr_tmpl.sh b/ush/run_eval_METplus_timestr_tmpl.sh index b2df03c56c..f5438be2f4 100755 --- a/ush/run_eval_METplus_timestr_tmpl.sh +++ b/ush/run_eval_METplus_timestr_tmpl.sh @@ -1,21 +1,13 @@ #!/usr/bin/env bash - # #----------------------------------------------------------------------- # -# Source the variable definitions file and the bash utility functions. +# This script is simply a wrapper to the eval_METplus_timestr_tmpl bash +# function. It is needed in order to enable the function to be called +# from a python script. # #----------------------------------------------------------------------- # -#OBS_DIR="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/expt_dirs/get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW/obs_data/ccpa" -#OBS_CCPA_APCP_FN_TEMPLATE="{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2" - -#USHdir="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/ufs-srweather-app/ush" -#yyyymmdd_task="20230217" -#lhr="22" -#METplus_timestr_tmpl="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/expt_dirs/get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW/obs_data/ccpa/{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2" - -#USHdir="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/ufs-srweather-app/ush"; yyyymmdd_task="20230217"; lhr="22"; METplus_timestr_tmpl="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/expt_dirs/get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW/obs_data/ccpa/{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2" set -u . $USHdir/source_util_funcs.sh eval_METplus_timestr_tmpl \ @@ -24,5 +16,3 @@ eval_METplus_timestr_tmpl \ METplus_timestr_tmpl="${METplus_timestr_tmpl}" \ outvarname_evaluated_timestr="fp_proc" echo "${fp_proc}" - -# METplus_timestr_tmpl="${OBS_DIR}/${OBS_CCPA_APCP_FN_TEMPLATE}" \ From 2218ca4e1ceabc9949fe8c1901066f8dcb1b0899 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 8 Oct 2024 14:44:40 -0600 Subject: [PATCH 112/208] Remove unneeded variable from task. --- parm/wflow/verify_pre.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 567f045188..c239eae8d3 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -52,7 +52,6 @@ task_get_obs_mrms: envars: <<: *default_vars OBTYPE: 'MRMS' - MRMS_FIELDS: 'REFC RETOP' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" From ee5566b1aac7ee9ca27a2f98d50a3159c58031bd Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 9 Oct 2024 10:22:19 -0600 Subject: [PATCH 113/208] Fix typo. --- ush/set_cycle_and_obs_timeinfo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 634e646745..cae3bc37ee 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -72,7 +72,7 @@ def check_temporal_consistency_cumul_fields( This function reads in a subset of the parameters in the verification configuration dictionary and ensures that certain temporal constraints on these parameters are satisfied. It then returns an updated version of - the verification configuration dictionary that satisfies these constranints. + the verification configuration dictionary that satisfies these constraints. The constraints are on the accumulation intervals associated with the cumulative forecast fields and corresponding observation type pairs that From befe769c7a8fe8c01b3119f18ee17744713fc7e3 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 9 Oct 2024 16:43:31 -0600 Subject: [PATCH 114/208] Fix typo. --- ...ulticyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml | 2 +- ...ulticyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 2 +- ...do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml index 3286066021..ced46215d0 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml @@ -37,7 +37,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml index 3963a616b4..97e1393864 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml @@ -39,7 +39,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml index 23035f3a92..3ce4ff5f08 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml @@ -37,7 +37,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml index 10ceddd9a8..3264c93eca 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml @@ -39,7 +39,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index c4f62a679d..a7af3f27c9 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -38,7 +38,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml index 55cbf5b13f..a0f10d8b05 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml @@ -37,7 +37,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index 20cab966ef..429e8e0086 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -39,7 +39,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml index 10ff318dd9..aa4b731e3a 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml @@ -39,7 +39,7 @@ task_run_post: verification: # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these From 6dd8e20723f39d5e2cf628d18729614bea824b99 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 10 Oct 2024 09:01:14 -0600 Subject: [PATCH 115/208] Modify old test for set_cycle_dates to fit new version of this function. This includes adding a new test for the case in which the output should be a list of datetime objects (the default is for the output to be a list of strings). --- tests/test_python/test_set_cycle_dates.py | 39 +++++++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/tests/test_python/test_set_cycle_dates.py b/tests/test_python/test_set_cycle_dates.py index eb76f579c6..8baae643ac 100644 --- a/tests/test_python/test_set_cycle_dates.py +++ b/tests/test_python/test_set_cycle_dates.py @@ -1,20 +1,22 @@ """ Test set_cycle_dates.py """ -from datetime import datetime +from datetime import datetime, timedelta import unittest -from set_cycle_dates import set_cycle_dates +from set_cycle_and_obs_timeinfo import set_cycle_dates class Testing(unittest.TestCase): """ Define the tests""" - def test_set_cycle_dates(self): + + def test_set_cycle_dates_string(self): """ Test that the proper list of dates are produced given the - intput data""" + input data and return_type left to its default value (so the + output should be a list of strings)""" cdates = set_cycle_dates( - date_start=datetime(2022, 1, 1, 6), - date_end=datetime(2022, 1, 2, 12), - incr_cycl_freq=6, + start_time_first_cycl=datetime(2022, 1, 1, 6), + start_time_last_cycl=datetime(2022, 1, 2, 12), + cycl_intvl=timedelta(hours=6), ) self.assertEqual( cdates, @@ -27,3 +29,26 @@ def test_set_cycle_dates(self): "2022010212", ], ) + + def test_set_cycle_dates_datetime(self): + + """ Test that the proper list of dates are produced given the + input data and return_type left set to "datetime" (so the output + should be a list of datetime objects)""" + cdates = set_cycle_dates( + start_time_first_cycl=datetime(2022, 1, 1, 6), + start_time_last_cycl=datetime(2022, 1, 2, 12), + cycl_intvl=timedelta(hours=6), + return_type="datetime", + ) + self.assertEqual( + cdates, + [ + datetime(2022, 1, 1, 6), + datetime(2022, 1, 1, 12), + datetime(2022, 1, 1, 18), + datetime(2022, 1, 2, 0), + datetime(2022, 1, 2, 6), + datetime(2022, 1, 2, 12), + ], + ) From 03d2ab6f4b7ae2d9de74fe355019d9cf8611f6d4 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 10 Oct 2024 12:04:35 -0600 Subject: [PATCH 116/208] First attempt at modifying documentation to see if I can view it in the PR page. --- .../CustomizingTheWorkflow/ConfigWorkflow.rst | 85 ++++++++++--------- 1 file changed, 43 insertions(+), 42 deletions(-) diff --git a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst index 4d88173028..14fccdd5e5 100644 --- a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -168,48 +168,6 @@ These settings define platform-specific run commands. Users should set run comma ``PRE_TASK_CMDS``: (Default: "") Pre-task commands such as ``ulimit`` needed by tasks. For example: ``'{ ulimit -s unlimited; ulimit -a; }'`` -METplus Parameters ----------------------- - -:ref:`METplus ` is a scientific verification framework that spans a wide range of temporal and spatial scales. Many of the METplus parameters are described below, but additional documentation for the METplus components is available on the `METplus website `__. - -.. _METParamNote: - -.. note:: - Where a date field is required: - * ``YYYY`` refers to the 4-digit valid year - * ``MM`` refers to the 2-digit valid month - * ``DD`` refers to the 2-digit valid day of the month - * ``HH`` refers to the 2-digit valid hour of the day - * ``mm`` refers to the 2-digit valid minutes of the hour - * ``SS`` refers to the two-digit valid seconds of the hour - -``CCPA_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/ccpa/proc"``) - User-specified location of the directory where :term:`CCPA` hourly precipitation files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure, as well as important caveats about errors in the metadata and file names. - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. - -``NOHRSC_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/nohrsc/proc"``) - User-specified location of top-level directory where NOHRSC 6- and 24-hour snowfall accumulation files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file scripts/exregional_get_verif_obs.sh for more details about files and directory structure - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. - - .. note:: - Due to limited availability of NOHRSC observation data on NOAA :term:`HPSS` and the likelihood that snowfall accumulation verification will not be desired outside of winter cases, this verification option is currently not present in the workflow by default. In order to use it, the verification environment variable ``VX_FIELDS`` should be updated to include ``ASNOW``. This will allow the related workflow tasks to be run. - -``MRMS_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/mrms/proc"``) - User-specified location of the directory where :term:`MRMS` composite reflectivity and echo top files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in the ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure. - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. - -``NDAS_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/ndas/proc"``) - User-specified location of top-level directory where :term:`NDAS` prepbufr files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure. - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. Other Platform-Specific Directories -------------------------------------- @@ -1635,6 +1593,49 @@ General Verification Parameters ``METPLUS_VERBOSITY_LEVEL``: (Default: ``2``) Logging verbosity level used by METplus verification tools. Valid values: 0 to 5, with 0 quiet and 5 loud. +METplus Parameters +---------------------- + +:ref:`METplus ` is a scientific verification framework that spans a wide range of temporal and spatial scales. Many of the METplus parameters are described below, but additional documentation for the METplus components is available on the `METplus website `__. + +.. _METParamNote: + +.. note:: + Where a date field is required: + * ``YYYY`` refers to the 4-digit valid year + * ``MM`` refers to the 2-digit valid month + * ``DD`` refers to the 2-digit valid day of the month + * ``HH`` refers to the 2-digit valid hour of the day + * ``mm`` refers to the 2-digit valid minutes of the hour + * ``SS`` refers to the two-digit valid seconds of the hour + +``CCPA_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/ccpa/proc"``) + User-specified location of the directory where :term:`CCPA` hourly precipitation files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure, as well as important caveats about errors in the metadata and file names. + + .. attention:: + Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. + +``NOHRSC_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/nohrsc/proc"``) + User-specified location of top-level directory where NOHRSC 6- and 24-hour snowfall accumulation files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file scripts/exregional_get_verif_obs.sh for more details about files and directory structure + + .. attention:: + Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. + + .. note:: + Due to limited availability of NOHRSC observation data on NOAA :term:`HPSS` and the likelihood that snowfall accumulation verification will not be desired outside of winter cases, this verification option is currently not present in the workflow by default. In order to use it, the verification environment variable ``VX_FIELDS`` should be updated to include ``ASNOW``. This will allow the related workflow tasks to be run. + +``MRMS_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/mrms/proc"``) + User-specified location of the directory where :term:`MRMS` composite reflectivity and echo top files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in the ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure. + + .. attention:: + Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. + +``NDAS_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/ndas/proc"``) + User-specified location of top-level directory where :term:`NDAS` prepbufr files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure. + + .. attention:: + Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. + Templates for Observation Files --------------------------------- From c0a841e712a3555a93677cc1e6ad982efc9f5303 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 11 Oct 2024 06:42:51 -0600 Subject: [PATCH 117/208] Bug fix. --- ush/set_cycle_and_obs_timeinfo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index cae3bc37ee..9a7644ed29 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -261,7 +261,7 @@ def check_temporal_consistency_cumul_fields( field_fcst = {field_fcst} obtype = {obtype} accum_hrs = {accum_hrs} hr - fcst_output_intvl_hrs = {forecast_output_intvl} hr + fcst_output_intvl_hrs = {fcst_output_intvl} hr accum_hrs % fcst_output_intvl_hrs = {rem_fcst} Thus, this forecast field cannot be accumulated over this interval. Will remove this accumulation interval from the list of accumulation From d3485729fec1b4699ecb6f5f4f045c34a67fdfd7 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 11 Oct 2024 09:23:43 -0600 Subject: [PATCH 118/208] Fix up comments. --- ush/set_cycle_and_obs_timeinfo.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 9a7644ed29..52271d2362 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -75,7 +75,7 @@ def check_temporal_consistency_cumul_fields( the verification configuration dictionary that satisfies these constraints. The constraints are on the accumulation intervals associated with the - cumulative forecast fields and corresponding observation type pairs that + cumulative forecast fields (and corresponding observation type pairs) that are to be verified. The constraints on each such accumulation interval are as follows: @@ -85,14 +85,18 @@ def check_temporal_consistency_cumul_fields( 2) The obs availability interval evenly divides the accumulation interval. This ensures that the obs can be added together to obtain accumulated - values of the obs field, e.g. the 6-hourly NOHRSC obs can be added - to obtain 24-hour observed snowfall accumulations. + values of the obs field, e.g. the 6-hourly NOHRSC obs can be added to + obtain 24-hour observed snowfall accumulations. Note that this also + ensures that the accumulation interval is greater than or equal to the + obs availability interval. 3) The forecast output interval evenly divides the accumulation interval. This ensures that the forecast output can be added together to obtain accumulated values of the forecast field, e.g. if the forecast output - interval is 3 hours, the resulting 3-hourly APCP outputs from the - forecast can be added to obtain 6-hourly forecast APCP. + interval is 3 hours, the resulting 3-hourly APCP outputs from the forecast + can be added to obtain 6-hourly forecast APCP. Note that this also ensures + that the accumulation interval is greater than or equal to the forecast + output interval. 4) The hour-of-day at which the accumulated forecast values will be available are a subset of the ones at which the accumulated obs @@ -207,7 +211,8 @@ def check_temporal_consistency_cumul_fields( # Initialize a sub-sub-dictionary in one of the dictionaries to be returned. fcst_obs_matched_times_all_cycles_cumul[field_fcst][accum_hh] = [] # - # Check that accumulation inervals are shorter than the forecast length. + # Make sure that the accumulation interval is less than or equal to the + # forecast length. # if accum_hrs > fcst_len_hrs: msg = dedent(f""" @@ -225,7 +230,7 @@ def check_temporal_consistency_cumul_fields( logging.info(msg) accum_intvls_hrs.remove(accum_hrs) # - # Check that accumulation inervals are evenly divisible by the observation + # Make sure that accumulation interval is evenly divisible by the observation # availability interval. # if accum_hrs in accum_intvls_hrs: @@ -248,7 +253,7 @@ def check_temporal_consistency_cumul_fields( logging.info(msg) accum_intvls_hrs.remove(accum_hrs) # - # Check that accumulation inervals are evenly divisible by the forecast + # Make sure that accumulation interval is evenly divisible by the forecast # output interval. # if accum_hrs in accum_intvls_hrs: @@ -270,9 +275,9 @@ def check_temporal_consistency_cumul_fields( logging.info(msg) accum_intvls_hrs.remove(accum_hrs) # - # Check that the hours-of-day at which the current cumulative field will - # be output are a subset of the hours-of-day at which the corresponding - # obs type is output. + # Make sure that the hours-of-day at which the current cumulative field + # will be output are a subset of the hours-of-day at which the corresponding + # obs type is available. # if accum_hrs in accum_intvls_hrs: From 28140699caa8045b246998993d8183f11ffc6c9b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 11 Oct 2024 16:24:33 -0600 Subject: [PATCH 119/208] In config.community.yaml, move [CCPA|MRMS|NDAS]_OBS_DIR variables from the "platform" to the "verification" section to be consistent with the changes in config_defaults.yaml. --- ush/config.community.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ush/config.community.yaml b/ush/config.community.yaml index 417b9edb91..f380bd28cc 100644 --- a/ush/config.community.yaml +++ b/ush/config.community.yaml @@ -5,10 +5,6 @@ user: RUN_ENVIR: community MACHINE: hera ACCOUNT: an_account -platform: - CCPA_OBS_DIR: "" - MRMS_OBS_DIR: "" - NDAS_OBS_DIR: "" workflow: USE_CRON_TO_RELAUNCH: false EXPT_SUBDIR: test_community @@ -35,6 +31,9 @@ global: DO_ENSEMBLE: false NUM_ENS_MEMBERS: 2 verification: + CCPA_OBS_DIR: "" + MRMS_OBS_DIR: "" + NDAS_OBS_DIR: "" VX_FCST_MODEL_NAME: FV3_GFS_v16_CONUS_25km rocoto: tasks: From 52ebd99c75daf96097df4579c92adf49d0a0adce Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 15 Oct 2024 14:00:19 -0600 Subject: [PATCH 120/208] Bug fix: the get_obs_nohrsc tasks need to be based on obs days for cumulative fields, not obs days for instantaneous fields (which is the default cycledef in verify_pre.yaml). --- parm/wflow/verify_pre.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index c239eae8d3..a3b49cc169 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -37,6 +37,9 @@ task_get_obs_ccpa: task_get_obs_nohrsc: <<: *default_task_verify_pre + attrs: + cycledefs: cycledefs_obs_days_cumul + maxtries: '1' command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars From 42c3d6c06f0ebdc4c6b6b4111d5e410b40ded419 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 16 Oct 2024 13:13:28 -0600 Subject: [PATCH 121/208] Add logging statements when exceptions occur; fix comments and code indentation. --- ush/set_cycle_and_obs_timeinfo.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 52271d2362..ded2f92fe2 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -49,6 +49,7 @@ def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, Valid values are: valid_values = {valid_values} """) + logging.error(msg) raise Exception(msg) # iterate over cycles @@ -184,6 +185,7 @@ def check_temporal_consistency_cumul_fields( obs_avail_intvl_hrs = {obs_avail_intvl_hrs} 24 % obs_avail_intvl_hrs = {remainder}" """) + logging.error(msg) raise Exception(msg) # Assume that the obs are available at hour 0 of the day regardless # of obs type. @@ -630,12 +632,12 @@ def get_obs_retrieve_times_by_day( """ # Convert string contents of input dictionaries to datetime objects. for time_type in ['cumul', 'inst']: - fcst_output_times_all_cycles[time_type] \ - = [datetime.strptime(fcst_output_times_all_cycles[time_type][i], "%Y%m%d%H") - for i in range(len(fcst_output_times_all_cycles[time_type]))] - obs_days_all_cycles[time_type] \ - = [datetime.strptime(obs_days_all_cycles[time_type][i], "%Y%m%d") - for i in range(len(obs_days_all_cycles[time_type]))] + fcst_output_times_all_cycles[time_type] \ + = [datetime.strptime(fcst_output_times_all_cycles[time_type][i], "%Y%m%d%H") + for i in range(len(fcst_output_times_all_cycles[time_type]))] + obs_days_all_cycles[time_type] \ + = [datetime.strptime(obs_days_all_cycles[time_type][i], "%Y%m%d") + for i in range(len(obs_days_all_cycles[time_type]))] # Get list of forecast fields to be verified. vx_fields = vx_config['VX_FIELDS'] @@ -650,8 +652,9 @@ def get_obs_retrieve_times_by_day( {'obtype': 'NDAS', 'fcst_fields': ['ADPSFC', 'ADPUPA']}] } - # Keep only those items in the dictionary above that have forecast fields - # that appear in the list of forecast fields to be verified. + # Keep only those items in the dictionary vx_field_info defined above + # that have forecast fields that appear in the list of forecast fields to + # be verified. for obs_time_type, obtypes_to_fcst_fields_dict_list in vx_field_info.copy().items(): for obtypes_to_fcst_fields_dict in obtypes_to_fcst_fields_dict_list.copy(): obtype = obtypes_to_fcst_fields_dict['obtype'] From 5a6da53c82088208589f8361eee302708e384257 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 18 Oct 2024 10:03:31 -0600 Subject: [PATCH 122/208] Minor moving of config variable. --- ush/config_defaults.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 1e967ef9e4..b0a6438111 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2428,7 +2428,7 @@ verification: MRMS_OBS_AVAIL_INTVL_HRS: 1 NDAS_OBS_AVAIL_INTVL_HRS: 1 # - # REMOVE_RAW_OBS_DIRS_[CCPA|MRMS|NDAS|NOHRSC]: + # REMOVE_RAW_OBS_DIRS_[CCPA|NOHRSC|MRMS|NDAS]: # Boolean flag specifying whether to remove the "raw" observation # directories after pulling the specified type of obs (CCPA, NOHRSC, # MRMS, or NOHRSC). The raw directories are the ones in which the @@ -2438,9 +2438,9 @@ verification: # structure). # REMOVE_RAW_OBS_CCPA: true + REMOVE_RAW_OBS_NOHRSC: true REMOVE_RAW_OBS_MRMS: true REMOVE_RAW_OBS_NDAS: true - REMOVE_RAW_OBS_NOHRSC: true # # OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: # Template used to specify the names of the output NetCDF observation From 7dc7db309eee5de53087e74d273647b182a8701d Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 9 Oct 2024 15:59:07 -0600 Subject: [PATCH 123/208] Add new parameter VX_FCST_OUTPUT_INTVL_HRS into config_defaults.yaml and use it as the forecast output interval when performing vx. --- scripts/exregional_check_post_output.sh | 2 +- ...egional_run_met_genensprod_or_ensemblestat.sh | 2 +- ...xregional_run_met_gridstat_or_pointstat_vx.sh | 2 +- ...l_run_met_gridstat_or_pointstat_vx_ensmean.sh | 2 +- ...l_run_met_gridstat_or_pointstat_vx_ensprob.sh | 2 +- scripts/exregional_run_met_pcpcombine.sh | 2 +- ush/config_defaults.yaml | 9 +++++++++ ush/setup.py | 16 ++++++---------- 8 files changed, 21 insertions(+), 16 deletions(-) diff --git a/scripts/exregional_check_post_output.sh b/scripts/exregional_check_post_output.sh index 433aba1e4e..f176c9a12e 100755 --- a/scripts/exregional_check_post_output.sh +++ b/scripts/exregional_check_post_output.sh @@ -126,7 +126,7 @@ set_leadhrs \ yyyymmddhh_init="${CDATE}" \ lhr_min="0" \ lhr_max="${FCST_LEN_HRS}" \ - lhr_intvl="${FCST_OUTPUT_INTVL_HRS}" \ + lhr_intvl="${VX_FCST_OUTPUT_INTVL_HRS}" \ base_dir="${VX_FCST_INPUT_BASEDIR}" \ fn_template="${FCST_INPUT_FN_TEMPLATE}" \ num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" \ diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 67ae70c8b9..475417ee53 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -226,7 +226,7 @@ case "$OBTYPE" in vx_hr_start="${vx_intvl}" ;; *) - vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_intvl="$((${VX_FCST_OUTPUT_INTVL_HRS}))" vx_hr_start="0" ;; esac diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index e16b06cb46..a6130ba50d 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -227,7 +227,7 @@ case "$OBTYPE" in vx_hr_start="${vx_intvl}" ;; *) - vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_intvl="$((${VX_FCST_OUTPUT_INTVL_HRS}))" vx_hr_start="0" ;; esac diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index adecb68bcd..75332e4929 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -170,7 +170,7 @@ case "$OBTYPE" in vx_hr_start="${vx_intvl}" ;; *) - vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_intvl="$((${VX_FCST_OUTPUT_INTVL_HRS}))" vx_hr_start="0" ;; esac diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 2c27a9a597..382bd71ac8 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -169,7 +169,7 @@ case "$OBTYPE" in vx_hr_start="${vx_intvl}" ;; *) - vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_intvl="$((${VX_FCST_OUTPUT_INTVL_HRS}))" vx_hr_start="0" ;; esac diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 43da23ca2e..590ceb43ef 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -229,7 +229,7 @@ set_leadhrs_no_missing \ if [ "${FCST_OR_OBS}" = "FCST" ]; then base_dir="${FCST_INPUT_DIR}" fn_template="${FCST_INPUT_FN_TEMPLATE}" - subintvl="${FCST_OUTPUT_INTVL_HRS}" + subintvl="${VX_FCST_OUTPUT_INTVL_HRS}" elif [ "${FCST_OR_OBS}" = "OBS" ]; then base_dir="${OBS_INPUT_DIR}" fn_template="${OBS_INPUT_FN_TEMPLATE}" diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index b0a6438111..b216ccdd72 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2498,6 +2498,15 @@ verification: VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] VX_ASNOW_ACCUMS_HRS: [ 6, 24 ] # + # Set the forecast output interval to use for verification purposes. + # If the forecasts to be verified are being run in the SRW (i.e. they + # are not staged from another forecast model), then this should be set + # set to the SRW's forecast output interval, but such a variable is + # currently not available in this configuration file. Instead, for + # now we set it to a default value of 1 hour. + # + VX_FCST_OUTPUT_INTVL_HRS: 1 + # # VX_FCST_INPUT_BASEDIR: # Template for top-level directory containing forecast (but not obs) # files that will be used as input into METplus for verification. diff --git a/ush/setup.py b/ush/setup.py index 3a034f7476..dfc59ffaba 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -578,22 +578,19 @@ def _remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # + vx_config = expt_config["verification"] + date_first_cycl = workflow_config.get("DATE_FIRST_CYCL") date_last_cycl = workflow_config.get("DATE_LAST_CYCL") incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") - - # Set the forecast output interval. Ideally, this should be obtained - # from the SRW App's configuration file, but such a variable doesn't - # yet exist in that file. - fcst_output_intvl_hrs = 1 - workflow_config['FCST_OUTPUT_INTVL_HRS'] = fcst_output_intvl_hrs + vx_fcst_output_intvl_hrs = vx_config.get("VX_FCST_OUTPUT_INTVL_HRS") # To enable arithmetic with dates and times, convert various time # intervals from integer to datetime.timedelta objects. cycl_intvl_dt = datetime.timedelta(hours=incr_cycl_freq) fcst_len_dt = datetime.timedelta(hours=fcst_len_hrs) - fcst_output_intvl_dt = datetime.timedelta(hours=fcst_output_intvl_hrs) + vx_fcst_output_intvl_dt = datetime.timedelta(hours=vx_fcst_output_intvl_hrs) # # ----------------------------------------------------------------------- # @@ -605,12 +602,11 @@ def _remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # - vx_config = expt_config["verification"] vx_config, fcst_obs_matched_times_all_cycles_cumul \ = check_temporal_consistency_cumul_fields( vx_config, date_first_cycl, date_last_cycl, cycl_intvl_dt, - fcst_len_dt, fcst_output_intvl_dt) + fcst_len_dt, vx_fcst_output_intvl_dt) expt_config["verification"] = vx_config # # ----------------------------------------------------------------------- @@ -628,7 +624,7 @@ def _remove_tag(tasks, tag): fcst_output_times_all_cycles, obs_days_all_cycles, \ = set_fcst_output_times_and_obs_days_all_cycles( date_first_cycl, date_last_cycl, cycl_intvl_dt, - fcst_len_dt, fcst_output_intvl_dt) + fcst_len_dt, vx_fcst_output_intvl_dt) workflow_config['OBS_DAYS_ALL_CYCLES_INST'] = obs_days_all_cycles['inst'] workflow_config['OBS_DAYS_ALL_CYCLES_CUMUL'] = obs_days_all_cycles['cumul'] From 57fcbc6e04ce57569fb10b00ff66861611c7279b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 16 Oct 2024 13:29:26 -0600 Subject: [PATCH 124/208] Change arguments so the cycle start times don't need to be called multiple times by different functions. --- ush/set_cycle_and_obs_timeinfo.py | 12 +++--------- ush/setup.py | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index ded2f92fe2..f345008f04 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -361,8 +361,7 @@ def check_temporal_consistency_cumul_fields( def set_fcst_output_times_and_obs_days_all_cycles( - start_time_first_cycl, start_time_last_cycl, cycl_intvl, - fcst_len, fcst_output_intvl): + cycle_start_times, fcst_len, fcst_output_intvl): """ This function returns forecast output times and observation days (i.e. days on which obs are needed because there is forecast output on those @@ -401,12 +400,6 @@ def set_fcst_output_times_and_obs_days_all_cycles( Each element of these lists is a string of the form 'YYYYMMDD'. """ - # Get the list containing the starting times of the cycles. Each element - # of the list will be a datetime object. - cycle_start_times \ - = set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, - return_type='datetime') - # Get the number of forecast output times per cycle/forecast. num_fcst_output_times_per_cycle = int(fcst_len/fcst_output_intvl + 1) @@ -603,7 +596,8 @@ def set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles): def get_obs_retrieve_times_by_day( - vx_config, fcst_output_times_all_cycles, obs_days_all_cycles): + vx_config, cycle_start_times, fcst_len, + fcst_output_times_all_cycles, obs_days_all_cycles): """ This function generates dictionary of dictionaries that, for each combination of obs type needed and each obs day, contains a string list diff --git a/ush/setup.py b/ush/setup.py index dfc59ffaba..ce5dad28f3 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -611,6 +611,18 @@ def _remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # + # Generate a list containing the starting times of the cycles. This will + # be needed in checking that the hours-of-day of the forecast output match + # those of the observations. + # + # ----------------------------------------------------------------------- + # + cycle_start_times \ + = set_cycle_dates(date_first_cycl, date_last_cycl, cycl_intvl_dt, + return_type='datetime') + # + # ----------------------------------------------------------------------- + # # Generate a list of forecast output times and a list of obs days (i.e. # days on which observations are needed to perform verification because # there is forecast output on those days) over all cycles, both for @@ -623,8 +635,7 @@ def _remove_tag(tasks, tag): # fcst_output_times_all_cycles, obs_days_all_cycles, \ = set_fcst_output_times_and_obs_days_all_cycles( - date_first_cycl, date_last_cycl, cycl_intvl_dt, - fcst_len_dt, vx_fcst_output_intvl_dt) + cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) workflow_config['OBS_DAYS_ALL_CYCLES_INST'] = obs_days_all_cycles['inst'] workflow_config['OBS_DAYS_ALL_CYCLES_CUMUL'] = obs_days_all_cycles['cumul'] @@ -658,7 +669,8 @@ def _remove_tag(tasks, tag): vx_config = expt_config["verification"] obs_retrieve_times_by_day \ = get_obs_retrieve_times_by_day( - vx_config, fcst_output_times_all_cycles, obs_days_all_cycles) + vx_config, cycle_start_times, fcst_len_dt, + fcst_output_times_all_cycles, obs_days_all_cycles) for obtype, obs_days_dict in obs_retrieve_times_by_day.items(): for obs_day, obs_retrieve_times in obs_days_dict.items(): From a3a7996844acb26f440a4da5278ecc828d983f01 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 16 Oct 2024 14:02:49 -0600 Subject: [PATCH 125/208] Further changes to avoid calling the function that calculates the cycle start times multiple times. --- ush/set_cycle_and_obs_timeinfo.py | 40 +++++++++++-------------------- ush/setup.py | 28 ++++++++++------------ 2 files changed, 27 insertions(+), 41 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index f345008f04..a354139352 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -66,9 +66,7 @@ def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, def check_temporal_consistency_cumul_fields( - vx_config, - start_time_first_cycl, start_time_last_cycl, cycl_intvl, - fcst_len, fcst_output_intvl): + vx_config, cycle_start_times, fcst_len, fcst_output_intvl): """ This function reads in a subset of the parameters in the verification configuration dictionary and ensures that certain temporal constraints on @@ -113,14 +111,9 @@ def check_temporal_consistency_cumul_fields( vx_config: The verification configuration dictionary. - start_time_first_cycl: - Starting time of first cycle; a datetime object. - - start_time_last_cycl: - Starting time of last cycle; a datetime object. - - cycl_intvl: - Time interval between cycle starting times; a timedelta object. + cycle_start_times: + List containing the starting times of the cycles in the experiment; + each list element is a datetime object. fcst_len: The length of each forecast; a timedelta object. @@ -147,13 +140,6 @@ def check_temporal_consistency_cumul_fields( fcst_len_hrs = int(fcst_len/one_hour) fcst_output_intvl_hrs = int(fcst_output_intvl/one_hour) - # Generate a list containing the starting times of the cycles. This will - # be needed in checking that the hours-of-day of the forecast output match - # those of the observations. - cycle_start_times \ - = set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, - return_type='datetime') - # Initialize one of the variables that will be returned to an empty # dictionary. fcst_obs_matched_times_all_cycles_cumul = dict() @@ -372,14 +358,9 @@ def set_fcst_output_times_and_obs_days_all_cycles( accumulation interval smaller than this are obviously not allowed). Args: - start_time_first_cycl: - Starting time of first cycle; a datetime object. - - start_time_last_cycl: - Starting time of last cycle; a datetime object. - - cycl_intvl: - Time interval between cycle starting times; a timedelta object. + cycle_start_times: + List containing the starting times of the cycles in the experiment; + each list element is a datetime object. fcst_len: The length of each forecast; a timedelta object. @@ -608,6 +589,13 @@ def get_obs_retrieve_times_by_day( vx_config: The verification configuration dictionary. + cycle_start_times: + List containing the starting times of the cycles in the experiment; + each list element is a datetime object. + + fcst_len: + The length of each forecast; a timedelta object. + fcst_output_times_all_cycles: Dictionary containing a list of forecast output times over all cycles for instantaneous fields and a second analogous list for cumulative fields. diff --git a/ush/setup.py b/ush/setup.py index ce5dad28f3..a4ba2f0001 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -594,6 +594,18 @@ def _remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # + # Generate a list containing the starting times of the cycles. This will + # be needed in checking that the hours-of-day of the forecast output match + # those of the observations. + # + # ----------------------------------------------------------------------- + # + cycle_start_times \ + = set_cycle_dates(date_first_cycl, date_last_cycl, cycl_intvl_dt, + return_type='datetime') + # + # ----------------------------------------------------------------------- + # # Ensure that the configuration parameters associated with cumulative # fields (e.g. APCP) in the verification section of the experiment # dicitonary are temporally consistent, e.g. that accumulation intervals @@ -604,25 +616,11 @@ def _remove_tag(tasks, tag): # vx_config, fcst_obs_matched_times_all_cycles_cumul \ = check_temporal_consistency_cumul_fields( - vx_config, - date_first_cycl, date_last_cycl, cycl_intvl_dt, - fcst_len_dt, vx_fcst_output_intvl_dt) + vx_config, cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) expt_config["verification"] = vx_config # # ----------------------------------------------------------------------- # - # Generate a list containing the starting times of the cycles. This will - # be needed in checking that the hours-of-day of the forecast output match - # those of the observations. - # - # ----------------------------------------------------------------------- - # - cycle_start_times \ - = set_cycle_dates(date_first_cycl, date_last_cycl, cycl_intvl_dt, - return_type='datetime') - # - # ----------------------------------------------------------------------- - # # Generate a list of forecast output times and a list of obs days (i.e. # days on which observations are needed to perform verification because # there is forecast output on those days) over all cycles, both for From 2685e37382d1da9a5a5de07bb3b5b917636ee115 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 18 Oct 2024 13:43:00 -0600 Subject: [PATCH 126/208] Remove trailing whitespace. --- ush/setup.py | 56 ++++++++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/ush/setup.py b/ush/setup.py index a4ba2f0001..899f05586f 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -51,24 +51,24 @@ from link_fix import link_fix def load_config_for_setup(ushdir, default_config, user_config): - """Updates a Python dictionary in place with experiment configuration settings from the - default, machine, and user configuration files. + """Updates a Python dictionary in place with experiment configuration settings from the + default, machine, and user configuration files. Args: ushdir (str): Path to the ``ush`` directory for the SRW App default_config (str): Path to ``config_defaults.yaml`` - user_config (str): Path to the user-provided config YAML (usually named + user_config (str): Path to the user-provided config YAML (usually named ``config.yaml``) Returns: None - + Raises: - FileNotFoundError: If the user-provided configuration file or the machine file does not + FileNotFoundError: If the user-provided configuration file or the machine file does not exist. - Exception: If (1) the user-provided configuration file cannot be loaded or (2) it contains - invalid sections/keys or (3) it does not contain mandatory information or (4) - an invalid datetime format is used. + Exception: If (1) the user-provided configuration file cannot be loaded or (2) it contains + invalid sections/keys or (3) it does not contain mandatory information or (4) + an invalid datetime format is used. """ # Load the default config. @@ -268,7 +268,7 @@ def _add_jobname(tasks): Mandatory variable "{val}" not found in: user config file {user_config} OR - machine file {machine_file} + machine file {machine_file} """ ) ) @@ -300,17 +300,17 @@ def set_srw_paths(ushdir, expt_config): Other paths for the SRW App are set as defaults in ``config_defaults.yaml``. Args: - ushdir (str) : Path to the system location of the ``ush`` directory under the + ushdir (str) : Path to the system location of the ``ush`` directory under the SRW App clone expt_config (dict): Contains the configuration settings for the user-defined experiment Returns: Dictionary of configuration settings and system paths as keys/values - + Raises: - KeyError: If the external repository required is not listed in the externals + KeyError: If the external repository required is not listed in the externals configuration file (e.g., ``Externals.cfg``) - FileNotFoundError: If the ``ufs-weather-model`` code containing the FV3 source code has + FileNotFoundError: If the ``ufs-weather-model`` code containing the FV3 source code has not been cloned properly """ @@ -371,23 +371,23 @@ def setup(USHdir, user_config_fn="config.yaml", debug: bool = False): time. Args: - USHdir (str): The full path of the ``ush/`` directory where this script + USHdir (str): The full path of the ``ush/`` directory where this script (``setup.py``) is located - user_config_fn (str): The name of a user-provided configuration YAML (usually + user_config_fn (str): The name of a user-provided configuration YAML (usually ``config.yaml``) debug (bool): Enable extra output for debugging Returns: None - - Raises: - ValueError: If checked configuration values are invalid (e.g., forecast length, + + Raises: + ValueError: If checked configuration values are invalid (e.g., forecast length, ``EXPTDIR`` path) - FileExistsError: If ``EXPTDIR`` already exists, and ``PREEXISTING_DIR_METHOD`` is not + FileExistsError: If ``EXPTDIR`` already exists, and ``PREEXISTING_DIR_METHOD`` is not set to a compatible handling method - FileNotFoundError: If the path to a particular file does not exist or if the file itself + FileNotFoundError: If the path to a particular file does not exist or if the file itself does not exist at the expected path - TypeError: If ``USE_CUSTOM_POST_CONFIG_FILE`` or ``USE_CRTM`` are set to true but no + TypeError: If ``USE_CUSTOM_POST_CONFIG_FILE`` or ``USE_CRTM`` are set to true but no corresponding custom configuration file or CRTM fix file directory is set KeyError: If an invalid value is provided (i.e., for ``GRID_GEN_METHOD``) """ @@ -480,7 +480,7 @@ def setup(USHdir, user_config_fn="config.yaml", debug: bool = False): f""" EXPTDIR ({exptdir}) already exists, and PREEXISTING_DIR_METHOD = {preexisting_dir_method} - To ignore this error, delete the directory, or set + To ignore this error, delete the directory, or set PREEXISTING_DIR_METHOD = delete, or PREEXISTING_DIR_METHOD = rename in your config file. @@ -667,7 +667,7 @@ def _remove_tag(tasks, tag): vx_config = expt_config["verification"] obs_retrieve_times_by_day \ = get_obs_retrieve_times_by_day( - vx_config, cycle_start_times, fcst_len_dt, + vx_config, cycle_start_times, fcst_len_dt, fcst_output_times_all_cycles, obs_days_all_cycles) for obtype, obs_days_dict in obs_retrieve_times_by_day.items(): @@ -922,7 +922,7 @@ def _get_location(xcs, fmt, expt_cfg): if num_cycles != len(fcst_len_cycl): logger.error(f""" The number of entries in FCST_LEN_CYCL does not divide evenly into a 24 hour day or the number of cycles - in your experiment! + in your experiment! FCST_LEN_CYCL = {fcst_len_cycl} """ ) @@ -1303,7 +1303,7 @@ def _get_location(xcs, fmt, expt_cfg): post_output_domain_name = lowercase(post_output_domain_name) # Write updated value of POST_OUTPUT_DOMAIN_NAME back to dictionary - post_config["POST_OUTPUT_DOMAIN_NAME"] = post_output_domain_name + post_config["POST_OUTPUT_DOMAIN_NAME"] = post_output_domain_name # # ----------------------------------------------------------------------- @@ -1591,7 +1591,7 @@ def _dict_find(user_dict, substring): workflow_config["SDF_USES_THOMPSON_MP"] = has_tag_with_value(ccpp_suite_xml, "scheme", "mp_thompson") if workflow_config["SDF_USES_THOMPSON_MP"]: - + logging.debug(f'Selected CCPP suite ({workflow_config["CCPP_PHYS_SUITE"]}) uses Thompson MP') logging.debug(f'Setting up links for additional fix files') @@ -1701,8 +1701,8 @@ def clean_rocoto_dict(rocotodict): 1. A task dictionary containing no "command" key 2. A metatask dictionary containing no task dictionaries - - Args: + + Args: rocotodict (dict): A dictionary containing Rocoto workflow settings """ From dbcbcaf679e404b7d1a6553ed4e3c95042816eca Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 18 Oct 2024 13:45:29 -0600 Subject: [PATCH 127/208] Remove trailing whitespace. --- ush/set_cycle_and_obs_timeinfo.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index a354139352..108615516c 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -39,7 +39,7 @@ def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, """ print_input_args(locals()) - + valid_values = ['string', 'datetime'] if return_type not in valid_values: raise ValueError("Invalid value for 'a'. Expected 1, 2, or 3.") @@ -72,7 +72,7 @@ def check_temporal_consistency_cumul_fields( configuration dictionary and ensures that certain temporal constraints on these parameters are satisfied. It then returns an updated version of the verification configuration dictionary that satisfies these constraints. - + The constraints are on the accumulation intervals associated with the cumulative forecast fields (and corresponding observation type pairs) that are to be verified. The constraints on each such accumulation interval @@ -181,7 +181,7 @@ def check_temporal_consistency_cumul_fields( obs_avail_hrs_of_day = [hr for hr in range(obs_avail_hr_start, obs_avail_hr_end, obs_avail_intvl_hrs)] obs_avail_hrs_of_day_str = ['%02d' % int(hr) for hr in obs_avail_hrs_of_day] # - # Get the array of accumulation intervals for the current cumulative field. + # Get the array of accumulation intervals for the current cumulative field. # Then loop over them to ensure that the constraints listed above are # satisfied. If for a given accumulation one or more of the constraints # is not satisfied, remove that accumulation from the list of accumulations @@ -199,7 +199,7 @@ def check_temporal_consistency_cumul_fields( # Initialize a sub-sub-dictionary in one of the dictionaries to be returned. fcst_obs_matched_times_all_cycles_cumul[field_fcst][accum_hh] = [] # - # Make sure that the accumulation interval is less than or equal to the + # Make sure that the accumulation interval is less than or equal to the # forecast length. # if accum_hrs > fcst_len_hrs: @@ -386,7 +386,7 @@ def set_fcst_output_times_and_obs_days_all_cycles( # Initialize dictionaries that will contain the various forecast output # time and obs day information. Note that we initialize the contents of - # these dictionaries as sets because that better suites the data manipulation + # these dictionaries as sets because that better suites the data manipulation # we will need to do, but these sets will later be converted to lists. fcst_output_times_all_cycles = dict() fcst_output_times_all_cycles['inst'] = set() From 7545d253888786b6d6ee3d8171dbd0b48ca29bdd Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 16 Oct 2024 14:19:52 -0600 Subject: [PATCH 128/208] In order for the temporal consistency checks on various vx parameters and corresponding adjustments to them to be effective (i.e. in order for any necessary adjustments to make it into the rocoto xml file), move the call to the function that performs these checks and adjustments to a place BEFORE the call to extend_yaml() that "freezes" (hard-codes) the accumulations for which the PcpCombine and other tasks are run (this freezing should happen AFTER any adjustments are made to the list of user-specified accumulations). --- ush/setup.py | 59 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/ush/setup.py b/ush/setup.py index 899f05586f..975c22264a 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -241,7 +241,46 @@ def _add_jobname(tasks): except: pass cfg_d["workflow"]["EXPT_BASEDIR"] = os.path.abspath(expt_basedir) + # + # ----------------------------------------------------------------------- + # + # Ensure that the configuration parameters associated with cumulative + # fields (e.g. APCP) in the verification section of the experiment + # dicitonary are temporally consistent, e.g. that accumulation intervals + # are less than or equal to the forecast length. Update the verification + # section of the dictionary to remove inconsistencies. + # + # ----------------------------------------------------------------------- + # + vx_config = cfg_d["verification"] + workflow_config = cfg_d["workflow"] + + date_first_cycl = workflow_config.get("DATE_FIRST_CYCL") + date_last_cycl = workflow_config.get("DATE_LAST_CYCL") + incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) + fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") + vx_fcst_output_intvl_hrs = vx_config.get("VX_FCST_OUTPUT_INTVL_HRS") + + # Convert various times and time intervals from integers or strings to + # datetime or timedelta objects. + date_first_cycl_dt = datetime.datetime.strptime(date_first_cycl, "%Y%m%d%H") + date_last_cycl_dt = datetime.datetime.strptime(date_last_cycl, "%Y%m%d%H") + cycl_intvl_dt = datetime.timedelta(hours=incr_cycl_freq) + fcst_len_dt = datetime.timedelta(hours=fcst_len_hrs) + vx_fcst_output_intvl_dt = datetime.timedelta(hours=vx_fcst_output_intvl_hrs) + + # Generate a list containing the starting times of the cycles. + cycle_start_times \ + = set_cycle_dates(date_first_cycl_dt, date_last_cycl_dt, cycl_intvl_dt, + return_type='datetime') + + # Call function that runs the consistency checks on the vx parameters. + vx_config, fcst_obs_matched_times_all_cycles_cumul \ + = check_temporal_consistency_cumul_fields( + vx_config, cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) + + cfg_d['verification'] = vx_config extend_yaml(cfg_d) # Do any conversions of data types @@ -603,21 +642,11 @@ def _remove_tag(tasks, tag): cycle_start_times \ = set_cycle_dates(date_first_cycl, date_last_cycl, cycl_intvl_dt, return_type='datetime') - # - # ----------------------------------------------------------------------- - # - # Ensure that the configuration parameters associated with cumulative - # fields (e.g. APCP) in the verification section of the experiment - # dicitonary are temporally consistent, e.g. that accumulation intervals - # are less than or equal to the forecast length. Update the verification - # section of the dictionary to remove inconsistencies. - # - # ----------------------------------------------------------------------- - # - vx_config, fcst_obs_matched_times_all_cycles_cumul \ - = check_temporal_consistency_cumul_fields( - vx_config, cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) - expt_config["verification"] = vx_config + print(f"") + print(f"IIIIIIIIIIIIIII") + print(f"cycle_start_times = ") + pprint(cycle_start_times) + #mnmnmnmnmnmnmn # # ----------------------------------------------------------------------- # From 21374ca6c643363cc09f8094a7704774a1816921 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 18 Oct 2024 13:54:27 -0600 Subject: [PATCH 129/208] Remove debugging code and add a blank line. --- ush/set_cycle_and_obs_timeinfo.py | 1 + ush/setup.py | 5 ----- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 108615516c..ddc948b583 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -612,6 +612,7 @@ def get_obs_retrieve_times_by_day( Dictionary of dictionaries containing times at which each type of obs is needed on each obs day. """ + # Convert string contents of input dictionaries to datetime objects. for time_type in ['cumul', 'inst']: fcst_output_times_all_cycles[time_type] \ diff --git a/ush/setup.py b/ush/setup.py index 975c22264a..0aae872b68 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -642,11 +642,6 @@ def _remove_tag(tasks, tag): cycle_start_times \ = set_cycle_dates(date_first_cycl, date_last_cycl, cycl_intvl_dt, return_type='datetime') - print(f"") - print(f"IIIIIIIIIIIIIII") - print(f"cycle_start_times = ") - pprint(cycle_start_times) - #mnmnmnmnmnmnmn # # ----------------------------------------------------------------------- # From 5401569f1904d9d1940b7a532b05bd3a778325b5 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 20 Oct 2024 08:28:23 -0600 Subject: [PATCH 130/208] Drop the "_NDAS" and "_ndas" suffixes from pb2nc tasks since prepbufr files can come from sources other than NDAS (e.g. GDAS). --- ...IONAL_RUN_MET_PB2NC_OBS_NDAS => JREGIONAL_RUN_MET_PB2NC_OBS} | 2 +- parm/wflow/verify_pre.yaml | 2 +- ...un_met_pb2nc_obs_ndas.sh => exregional_run_met_pb2nc_obs.sh} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename jobs/{JREGIONAL_RUN_MET_PB2NC_OBS_NDAS => JREGIONAL_RUN_MET_PB2NC_OBS} (98%) rename scripts/{exregional_run_met_pb2nc_obs_ndas.sh => exregional_run_met_pb2nc_obs.sh} (100%) diff --git a/jobs/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS b/jobs/JREGIONAL_RUN_MET_PB2NC_OBS similarity index 98% rename from jobs/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS rename to jobs/JREGIONAL_RUN_MET_PB2NC_OBS index a6ed90a1a3..89c9bb73f4 100755 --- a/jobs/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS +++ b/jobs/JREGIONAL_RUN_MET_PB2NC_OBS @@ -76,7 +76,7 @@ NDAS observations. # #----------------------------------------------------------------------- # -$SCRIPTSdir/exregional_run_met_pb2nc_obs_ndas.sh || \ +$SCRIPTSdir/exregional_run_met_pb2nc_obs.sh || \ print_err_msg_exit "\ Call to ex-script corresponding to J-job \"${scrfunc_fn}\" failed." # diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index a3b49cc169..d5ce7885e2 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -76,7 +76,7 @@ task_run_MET_Pb2nc_obs_NDAS: attrs: cycledefs: cycledefs_obs_days_inst maxtries: '2' - command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS"' + command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PB2NC_OBS"' envars: <<: *default_vars VAR: ADPSFC diff --git a/scripts/exregional_run_met_pb2nc_obs_ndas.sh b/scripts/exregional_run_met_pb2nc_obs.sh similarity index 100% rename from scripts/exregional_run_met_pb2nc_obs_ndas.sh rename to scripts/exregional_run_met_pb2nc_obs.sh From 88e48e29bb06f2c829ce2eb5119bf3fbe1a39bf7 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 21 Oct 2024 15:53:10 -0600 Subject: [PATCH 131/208] Modifications to address Mike K's PR review comments. --- scripts/exregional_get_verif_obs.sh | 3 +- ush/get_obs.py | 207 ++++++++++++---------------- ush/set_cycle_and_obs_timeinfo.py | 81 ++++++----- ush/setup.py | 1 - 4 files changed, 126 insertions(+), 166 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index d1ee4116e8..a07deecc25 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -61,9 +61,8 @@ Valid observation types are: " fi -script_bn="get_obs" cmd="\ -python3 -u ${USHdir}/${script_bn}.py \ +python3 -u ${USHdir}/get_obs.py \ --var_defns_path "${GLOBAL_VAR_DEFNS_FP}" \ --obtype ${OBTYPE} \ --obs_day ${PDY}" diff --git a/ush/get_obs.py b/ush/get_obs.py index 50b7c45ae3..666c6f1298 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -11,6 +11,7 @@ from pprint import pprint from math import ceil, floor import subprocess +import retrieve_data from python_utils import ( load_yaml_config, ) @@ -26,7 +27,7 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): Note that for cumulative fields (like CCPA and NOHRSC, as opposed to instantaneous ones like MRMS and NDAS), the archive files corresponding to hour 0 of the day represent accumulations over the previous day. Thus, - here, we never return an achive hour of 0 for cumulative fields. Instead, + here, we never return an archive hour of 0 for cumulative fields. Instead, if the specified hour-of-day is 0, we consider that to represent the 0th hour of the NEXT day (i.e. the 24th hour of the current day) and set the archive hour to 24. @@ -57,32 +58,32 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): msg = dedent(f""" The specified observation type (after converting to upper case) is not supported: - obtype_upper = {obtype_upper} + {obtype_upper = } Valid observation types are: {valid_obtypes} """) logging.error(msg) - raise Exception(msg) + raise ValueError(msg) - # Ensure that the archive inerval divides evenly into 24 hours. + # Ensure that the archive interval divides evenly into 24 hours. remainder = 24 % arcv_intvl_hrs if remainder != 0: msg = dedent(f""" The archive interval for obs of type {obtype} must divide evenly into 24 but doesn't: - arcv_intvl_hrs = {arcv_intvl_hrs} + {arcv_intvl_hrs = } 24 % arcv_intvl_hrs = {remainder} """) logging.error(msg) - raise Exception(msg) + raise ValueError(msg) if (hod < 0) or (hod > 23): msg = dedent(f""" The specified hour-of-day must be between 0 and 23, inclusive, but isn't: - hod = {hod} + {hod = } """) logging.error(msg) - raise Exception(msg) + raise ValueError(msg) # Set the archive hour. This depends on the obs type because each obs # type can organize its observation files into archives in a different @@ -113,8 +114,10 @@ def get_obs(config, obtype, yyyymmdd_task): This script checks for the existence of obs files of the specified type at the locations specified by variables in the SRW App's configuration file. If one or more of these files do not exist, it retrieves them from - a data store and places them in the locations specified by the configuration - variables, renaming them if necessary. + a data store (using the retrieve_data.py script and as specified by the + configuration file parm/data_locations.yml for that script) and places + them in the locations specified by the App's configuration variables, + renaming them if necessary. Args: config: @@ -298,10 +301,9 @@ def get_obs(config, obtype, yyyymmdd_task): vx_config = cfg['verification'] # Get the time interval (in hours) at which the obs are available. - key = obtype + '_OBS_AVAIL_INTVL_HRS' - obs_avail_intvl_hrs = vx_config[key] + obs_avail_intvl_hrs = vx_config[f'{obtype}_OBS_AVAIL_INTVL_HRS'] - # The obs availability inerval must divide evenly into 24 hours. Otherwise, + # The obs availability interval must divide evenly into 24 hours. Otherwise, # different days would have obs available at different hours-of-day. Make # sure this is the case. remainder = 24 % obs_avail_intvl_hrs @@ -309,19 +311,18 @@ def get_obs(config, obtype, yyyymmdd_task): msg = dedent(f""" The obs availability interval for obs of type {obtype} must divide evenly into 24 but doesn't: - obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + {obs_avail_intvl_hrs = } 24 % obs_avail_intvl_hrs = {remainder} """) logging.error(msg) - raise Exception(msg) + raise ValueError(msg) # For convenience, convert the obs availability interval to a datetime # object. obs_avail_intvl = dt.timedelta(hours=obs_avail_intvl_hrs) # Get the base directory for the observations. - key = obtype + '_OBS_DIR' - obs_dir = vx_config[key] + obs_dir = vx_config[f'{obtype}_OBS_DIR'] # For each observation type, set the group of fields contained in those # observation files that we need for verification. Each group of fields @@ -368,8 +369,7 @@ def get_obs(config, obtype, yyyymmdd_task): # locations, they will be retrieved from HPSS and placed at these locations. fp_proc_templates = [] for fg in field_groups_in_obs: - key = 'OBS_' + obtype + '_' + fg + '_FN_TEMPLATE' - fn_proc_template = vx_config[key] + fn_proc_template = vx_config[f'OBS_{obtype}_{fg}_FN_TEMPLATE'] fp_proc_templates.append(os.path.join(obs_dir, fn_proc_template)) # #----------------------------------------------------------------------- @@ -393,45 +393,41 @@ def get_obs(config, obtype, yyyymmdd_task): fields_in_filenames = [] levels_in_filenames = [] if obtype == 'MRMS': + valid_mrms_field_groups = ['REFC', 'RETOP'] for fg in field_groups_in_obs: + if fg not in valid_mrms_field_groups: + msg = dedent(f""" + Invalid field group specified for obs type: + {obtype = } + {fg = } + Valid field group are: + {valid_mrms_field_groups} + """) + logging.error(msg) + raise ValueError(msg) if fg == 'REFC': fields_in_filenames.append('MergedReflectivityQCComposite') levels_in_filenames.append('00.50') elif fg == 'RETOP': fields_in_filenames.append('EchoTop') levels_in_filenames.append('18_00.50') - else: - msg = dedent(f""" - Invalid field specified for obs type: - obtype = {obtype} - field = {field} - """) - logging.error(msg) - raise Exception(msg) # CCPA files for 1-hour accumulation have incorrect metadata in the files # under the "00" directory from 20180718 to 20210504. Set these starting # and ending dates as datetime objects for later use. - yyyymmdd_bad_metadata_start_str = None - yyyymmdd_bad_metadata_end_str = None - yyyymmdd_bad_metadata_start = None - yyyymmdd_bad_metadata_end = None - if obtype == 'CCPA': - yyyymmdd_bad_metadata_start_str = '20180718' - yyyymmdd_bad_metadata_end_str = '20210504' - yyyymmdd_bad_metadata_start = dt.datetime.strptime(yyyymmdd_bad_metadata_start_str, '%Y%m%d') - yyyymmdd_bad_metadata_end = dt.datetime.strptime(yyyymmdd_bad_metadata_end_str, '%Y%m%d') + ccpa_bad_metadata_start = dt.datetime.strptime('20180718', '%Y%m%d') + ccpa_bad_metadata_end = dt.datetime.strptime('20210504', '%Y%m%d') + # #----------------------------------------------------------------------- # - # Get the list of all the times in the current day at which to retrieve - # obs. This is an array with elements having format "YYYYMMDDHH". + # Form a string list of all the times in the current day (each in the + # format "YYYYMMDDHH") at which to retrieve obs. # #----------------------------------------------------------------------- # yyyymmdd_task_str = dt.datetime.strftime(yyyymmdd_task, '%Y%m%d') - key = 'OBS_RETRIEVE_TIMES_' + obtype + '_' + yyyymmdd_task_str - obs_retrieve_times_crnt_day_str = vx_config[key] + obs_retrieve_times_crnt_day_str = vx_config[f'OBS_RETRIEVE_TIMES_{obtype}_{yyyymmdd_task_str}'] obs_retrieve_times_crnt_day \ = [dt.datetime.strptime(yyyymmddhh_str, '%Y%m%d%H') for yyyymmddhh_str in obs_retrieve_times_crnt_day_str] # @@ -465,17 +461,14 @@ def get_obs(config, obtype, yyyymmdd_task): # Initial guess for starting archive hour. This is set to the archive # hour containing obs at the first obs retrieval time of the day. - hod_first = obs_retrieve_times_crnt_day[0].hour - arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod_first) + arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, obs_retrieve_times_crnt_day[0].hour) # Ending archive hour. This is set to the archive hour containing obs at # the last obs retrieval time of the day. - hod_last = obs_retrieve_times_crnt_day[-1].hour - arcv_hr_end = get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod_last) + arcv_hr_end = get_obs_arcv_hr(obtype, arcv_intvl_hrs, obs_retrieve_times_crnt_day[-1].hour) # Set other variables needed below when evaluating the METplus template for # the full path to the processed observation files. - one_hour = dt.timedelta(hours=1) ushdir = config['user']['USHdir'] # Create dictionary containing the paths to all the processed obs files @@ -491,7 +484,7 @@ def get_obs(config, obtype, yyyymmdd_task): for yyyymmddhh in obs_retrieve_times_crnt_day: # Set the lead hour, i.e. the number of hours from the beginning of the # day at which the file is valid. - lhr = int((yyyymmddhh - yyyymmdd_task)/one_hour) + lhr = int((yyyymmddhh - yyyymmdd_task)/dt.timedelta(hours=1)) # Call a bash script to evaluate the template for the full path to the # file containing METplus timestrings at the current time. This should # be upgraded to a python script at some point. @@ -517,18 +510,17 @@ def get_obs(config, obtype, yyyymmdd_task): num_existing_files += 1 msg = dedent(f""" File already exists on disk: - fp_proc = {fp_proc} + {fp_proc = } """) - logging.info(msg) + logging.debug(msg) else: - hod = yyyymmddhh.hour - arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod) + arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, yyyymmddhh.hour) msg = dedent(f""" File does not exist on disk: - fp_proc = {fp_proc} + {fp_proc = } Setting the hour (since hour 0 of the current task day) of the first archive to retrieve to: - arcv_hr_start = {arcv_hr_start} + {arcv_hr_start = } """) logging.info(msg) do_break = True @@ -537,14 +529,13 @@ def get_obs(config, obtype, yyyymmdd_task): # If the number of obs files that already exist on disk is equal to the # number of obs files needed, then there is no need to retrieve any files. - num_obs_retrieve_times_crnt_day = len(obs_retrieve_times_crnt_day) - num_files_needed = num_obs_retrieve_times_crnt_day*num_field_groups + num_files_needed = len(obs_retrieve_times_crnt_day)*num_field_groups if num_existing_files == num_files_needed: msg = dedent(f""" All obs files needed for the current day (yyyymmdd_task) already exist on disk: - yyyymmdd_task = {yyyymmdd_task} + {yyyymmdd_task = } Thus, there is no need to retrieve any files. """) logging.info(msg) @@ -554,22 +545,20 @@ def get_obs(config, obtype, yyyymmdd_task): # the number of obs files needed, then we will need to retrieve files. # In this case, set the sequence of hours corresponding to the archives # from which files will be retrieved. - else: - - arcv_hrs = [hr for hr in range(arcv_hr_start, arcv_hr_end+arcv_intvl_hrs, arcv_intvl_hrs)] - msg = dedent(f""" - At least some obs files needed needed for the current day (yyyymmdd_task) - do not exist on disk: - yyyymmdd_task = {yyyymmdd_task} - The number of obs files needed for the current day is: - num_files_needed = {num_files_needed} - The number of obs files that already exist on disk is: - num_existing_files = {num_existing_files} - Will retrieve remaining files by looping over archives corresponding to - the following hours (since hour 0 of the current day): - arcv_hrs = {arcv_hrs} - """) - logging.info(msg) + arcv_hrs = [hr for hr in range(arcv_hr_start, arcv_hr_end+arcv_intvl_hrs, arcv_intvl_hrs)] + msg = dedent(f""" + At least some obs files needed needed for the current day (yyyymmdd_task) + do not exist on disk: + {yyyymmdd_task = } + The number of obs files needed for the current day is: + {num_files_needed = } + The number of obs files that already exist on disk is: + {num_existing_files = } + Will retrieve remaining files by looping over archives corresponding to + the following hours (since hour 0 of the current day): + {arcv_hrs = } + """) + logging.info(msg) # #----------------------------------------------------------------------- # @@ -595,18 +584,9 @@ def get_obs(config, obtype, yyyymmdd_task): #----------------------------------------------------------------------- # - # Whether to move the files or copy them from their raw to their processed - # locations. - mv_or_cp = 'cp' # Whether to remove raw observations after processed directories have # been created from them. - key = 'REMOVE_RAW_OBS_' + obtype - remove_raw_obs = vx_config[key] - # If the raw directories and files are to be removed at the end of this - # script, no need to copy the files since the raw directories are going - # to be removed anyway. - if remove_raw_obs: - mv_or_cp = 'mv' + remove_raw_obs = vx_config[f'REMOVE_RAW_OBS_{obtype}'] # Base directory that will contain the archive subdirectories in which # the files extracted from each archive (tar) file will be placed. We @@ -657,15 +637,9 @@ def get_obs(config, obtype, yyyymmdd_task): # # MRMS: # There is only one archive per day, and it contains all the raw obs - # files needed to generate processed obs files for all hours of the - # current day. Thus, we will only ever need this one archive, so there - # is no need to include the archive's hour information (there really - # isn't any) in the raw subdirectory name. In addition, the archive's - # year, month, and day is the same as that of the obs day's, so it is - # already included in the name of the raw base directory. Sine this is - # the only info we need to avoid differnt get_obs tasks clobbering each - # other's output obs files, for simplicity we simply do not create a raw - # archive subdirectory. + # files needed to generate processed obs files for the current day. + # Since we will only ever need this one archive for a given day, + # for simplicity we simply do not create a raw archive subdirectory. # # NDAS: # Same as for CCPA. @@ -703,8 +677,8 @@ def get_obs(config, obtype, yyyymmdd_task): for obs_retrieve_time in obs_retrieve_times_crnt_day: if (obs_retrieve_time >= arcv_contents_start) and \ (obs_retrieve_time <= arcv_contents_end): - do_retrieve = True - break + do_retrieve = True + break if not do_retrieve: msg = dedent(f""" @@ -712,10 +686,10 @@ def get_obs(config, obtype, yyyymmdd_task): hour 0 of the next day if considering a cumulative obs type) fall in the range spanned by the current {arcv_intvl_hrs}-hourly archive file. The bounds of the data in the current archive are: - arcv_contents_start = {arcv_contents_start} - arcv_contents_end = {arcv_contents_end} + {arcv_contents_start = } + {arcv_contents_end = } The times at which obs need to be retrieved are: - obs_retrieve_times_crnt_day = {obs_retrieve_times_crnt_day} + {obs_retrieve_times_crnt_day = } """) logging.info(msg) @@ -747,18 +721,15 @@ def get_obs(config, obtype, yyyymmdd_task): # files in the current archive, although we will make use of only 6 of # these (we will not use the tm00 file). parmdir = config['user']['PARMdir'] - cmd = ' '.join(['python3', \ - '-u', os.path.join(ushdir, 'retrieve_data.py'), \ - '--debug', \ - '--file_set', 'obs', \ - '--config', os.path.join(parmdir, 'data_locations.yml'), \ - '--cycle_date', yyyymmddhh_arcv_str, \ - '--data_stores', 'hpss', \ - '--data_type', obtype + '_obs', \ - '--output_path', arcv_dir_raw, \ - '--summary_file', 'retrieve_data.log']) - result = subprocess.run(cmd, shell=True, capture_output=True, text=True) - rc = result.returncode + args = ['--debug', \ + '--file_set', 'obs', \ + '--config', os.path.join(parmdir, 'data_locations.yml'), \ + '--cycle_date', yyyymmddhh_arcv_str, \ + '--data_stores', 'hpss', \ + '--data_type', obtype + '_obs', \ + '--output_path', arcv_dir_raw, \ + '--summary_file', 'retrieve_data.log'] + retrieve_data.main(args) # Get the list of times corresponding to the obs files in the current # archive. This is a list of datetime objects. @@ -836,7 +807,7 @@ def get_obs(config, obtype, yyyymmdd_task): # Note that the script we call to do this (mrms_pull_topofhour.py) assumes # a certain file naming convention. That convention must match the names # of the files that the retrieve_data.py script called above ends up - # retrieving. The list of possibile templates for these names is given + # retrieving. The list of possible templates for these names is given # in parm/data_locations.yml, but which of those is actually used is not # known until retrieve_data.py completes. Thus, that information needs # to be passed back by retrieve_data.py and then passed to mrms_pull_topofhour.py. @@ -854,7 +825,7 @@ def get_obs(config, obtype, yyyymmdd_task): rc = result.returncode # The raw file name needs to be the same as what the retrieve_data.py - # script called above ends up retrieving. The list of possibile templates + # script called above ends up retrieving. The list of possible templates # for this name is given in parm/data_locations.yml, but which of those # is actually used is not known until retrieve_data.py completes. Thus, # that information needs to be passed back by the script and used here. @@ -893,20 +864,20 @@ def get_obs(config, obtype, yyyymmdd_task): {fp_raw} ... """) - logging.info(msg) + logging.debug(msg) yyyymmdd = yyyymmddhh.replace(hour=0, minute=0, second=0) # CCPA files for 1-hour accumulation have incorrect metadata in the files # under the "00" directory from 20180718 to 20210504. After the data is # pulled, reorganize into correct yyyymmdd structure. if (obtype == 'CCPA') and \ - ((yyyymmdd >= yyyymmdd_bad_metadata_start) and (yyyymmdd <= yyyymmdd_bad_metadata_end)) and \ + ((yyyymmdd >= ccpa_bad_metadata_start) and (yyyymmdd <= ccpa_bad_metadata_end)) and \ (((hr >= 19) and (hr <= 23)) or (hr == 0)): cmd = ' '.join(['wgrib2', fp_raw, '-set_date -24hr -grib', fp_proc, '-s']) result = subprocess.run(cmd, shell=True, capture_output=True, text=True) - elif mv_or_cp == 'mv': + elif remove_raw_obs: shutil.move(fp_raw, fp_proc) - elif mv_or_cp == 'cp': + else: shutil.copy(fp_raw, fp_proc) # #----------------------------------------------------------------------- @@ -916,10 +887,7 @@ def get_obs(config, obtype, yyyymmdd_task): #----------------------------------------------------------------------- # if remove_raw_obs: - msg = dedent(f""" - Removing raw obs directories ..." - """) - logging.info(msg) + logging.info("Removing raw obs directories ...") shutil.rmtree(basedir_raw) return True @@ -934,7 +902,6 @@ def parse_args(argv): parser.add_argument( "--obtype", - dest="obtype", type=str, required=True, choices=['CCPA', 'NOHRSC', 'MRMS', 'NDAS'], @@ -943,7 +910,6 @@ def parse_args(argv): parser.add_argument( "--obs_day", - dest="obs_day", type=lambda d: dt.datetime.strptime(d, '%Y%m%d'), required=True, help="Date of observation day, in the form 'YYYMMDD'.", @@ -951,7 +917,6 @@ def parse_args(argv): parser.add_argument( "--var_defns_path", - dest="var_defns_path", type=str, required=True, help="Path to variable definitions file.", @@ -961,7 +926,6 @@ def parse_args(argv): for pair in (str.lower(lvl), str.upper(lvl))] parser.add_argument( "--log_level", - dest="log_level", type=str, required=False, default='info', @@ -972,7 +936,6 @@ def parse_args(argv): parser.add_argument( "--log_fp", - dest="log_fp", type=str, required=False, default='', diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index ddc948b583..9029731a94 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -42,15 +42,14 @@ def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, valid_values = ['string', 'datetime'] if return_type not in valid_values: - raise ValueError("Invalid value for 'a'. Expected 1, 2, or 3.") msg = dedent(f""" Invalid value for optional argument "return_type": - return_type = {return_type} + {return_type = } Valid values are: - valid_values = {valid_values} + {valid_values = } """) logging.error(msg) - raise Exception(msg) + raise ValueError(msg) # iterate over cycles all_cdates = [] @@ -168,17 +167,17 @@ def check_temporal_consistency_cumul_fields( msg = dedent(f""" The obs availability interval for obs of type {obtype} must divide evenly into 24 but doesn't: - obs_avail_intvl_hrs = {obs_avail_intvl_hrs} - 24 % obs_avail_intvl_hrs = {remainder}" + {obs_avail_intvl_hrs = } + 24 % obs_avail_intvl_hrs = {remainder}" """) logging.error(msg) - raise Exception(msg) + raise ValueError(msg) # Assume that the obs are available at hour 0 of the day regardless # of obs type. obs_avail_hr_start = 0 obs_avail_hr_end = obs_avail_hr_start + 24 # Construct list of obs availability hours-of-day. - obs_avail_hrs_of_day = [hr for hr in range(obs_avail_hr_start, obs_avail_hr_end, obs_avail_intvl_hrs)] + obs_avail_hrs_of_day = list(range(obs_avail_hr_start, obs_avail_hr_end, obs_avail_intvl_hrs)) obs_avail_hrs_of_day_str = ['%02d' % int(hr) for hr in obs_avail_hrs_of_day] # # Get the array of accumulation intervals for the current cumulative field. @@ -207,13 +206,13 @@ def check_temporal_consistency_cumul_fields( The accumulation interval (accum_hrs) for the current cumulative forecast field (field_fcst) and corresponding observation type (obtype) is greater than the forecast length (fcst_len_hrs): - field_fcst = {field_fcst} - obtype = {obtype} - accum_hrs = {accum_hrs} - fcst_len_hrs = {fcst_len_hrs} - Thus, this forecast field cannot be accumulated over this interval. - Will remove this accumulation interval from the list of accumulation - intervals to verify for this field/obtype. + {field_fcst = } + {obtype = } + {accum_hrs = } + {fcst_len_hrs = } + Thus, this forecast field cannot be accumulated over this interval. Will + remove this accumulation interval from the list of accumulation intervals + to verify for this field/obtype. """) logging.info(msg) accum_intvls_hrs.remove(accum_hrs) @@ -229,11 +228,11 @@ def check_temporal_consistency_cumul_fields( field (field_fcst) and corresponding observation type (obtype) is not evenly divisible by the observation type's availability interval (obs_avail_intvl_hrs): - field_fcst = {field_fcst} - obtype = {obtype} - accum_hrs = {accum_hrs} - obs_avail_intvl_hrs = {obs_avail_intvl_hrs} - accum_hrs % obs_avail_intvl_hrs = {rem_obs} + {field_fcst = } + {obtype = } + {accum_hrs = } + {obs_avail_intvl_hrs = } + accum_hrs % obs_avail_intvl_hrs = {rem_obs} Thus, this observation type cannot be accumulated over this interval. Will remove this accumulation interval from the list of accumulation intervals to verify for this field/obtype. @@ -251,14 +250,14 @@ def check_temporal_consistency_cumul_fields( The accumulation interval (accum_hrs) for the current cumulative forecast field (field_fcst) and corresponding observation type (obtype) is not evenly divisible by the forecast output interval (fcst_output_intvl): - field_fcst = {field_fcst} - obtype = {obtype} - accum_hrs = {accum_hrs} hr - fcst_output_intvl_hrs = {fcst_output_intvl} hr - accum_hrs % fcst_output_intvl_hrs = {rem_fcst} - Thus, this forecast field cannot be accumulated over this interval. - Will remove this accumulation interval from the list of accumulation - intervals to verify for this field/obtype. + {field_fcst = } + {obtype = } + {accum_hrs = } + {fcst_output_intvl_hrs = } + accum_hrs % fcst_output_intvl_hrs = {rem_fcst} + Thus, this forecast field cannot be accumulated over this interval. Will + remove this accumulation interval from the list of accumulation intervals + to verify for this field/obtype. """) logging.info(msg) accum_intvls_hrs.remove(accum_hrs) @@ -303,17 +302,17 @@ def check_temporal_consistency_cumul_fields( field (field_fcst) is such that the forecast will output the field on at least one of hour-of-day on which the corresponding observation type is not available: - field_fcst = {field_fcst} - obtype = {obtype} - accum_hrs = {accum_hrs} hr + {field_fcst = } + {obtype = } + {accum_hrs = } The forecast output hours-of-day for this field/accumulation interval combination are: - fcst_output_hrs_of_day_str = {fcst_output_hrs_of_day_str} + {fcst_output_hrs_of_day_str = } The hours-of-day at which the obs are available are: - obs_avail_hrs_of_day_str = {obs_avail_hrs_of_day_str} - Thus, at least some of the forecast output cannot be verified. - Will remove this accumulation interval from the list of accumulation - intervals to verify for this field/obtype. + {obs_avail_hrs_of_day_str = } + Thus, at least some of the forecast output cannot be verified. Will remove + this accumulation interval from the list of accumulation intervals to + verify for this field/obtype. """) logging.info(msg) accum_intvls_hrs.remove(accum_hrs) @@ -335,8 +334,8 @@ def check_temporal_consistency_cumul_fields( msg = dedent(f""" The list of accumulation intervals (accum_intvls_hrs) for the current cumulative field to verify (field_fcst) is empty: - field_fcst = {field_fcst} - accum_intvls_hrs = {accum_intvls_hrs} + {field_fcst = } + {accum_intvls_hrs = } Removing this field from the list of fields to verify. The updated list is: {vx_config["VX_FIELDS"]} @@ -683,10 +682,10 @@ def get_obs_retrieve_times_by_day( msg = dedent(f""" The obs availability interval for obs of type {obtype} must divide evenly into 24 but doesn't: - obs_avail_intvl_hrs = {obs_avail_intvl_hrs} - 24 % obs_avail_intvl_hrs = {remainder}" + {obs_avail_intvl_hrs = } + 24 % obs_avail_intvl_hrs = {remainder}" """) - raise Exception(msg) + raise ValueError(msg) obs_avail_intvl = timedelta(hours=obs_avail_intvl_hrs) num_obs_avail_times_per_day = int(24/obs_avail_intvl_hrs) diff --git a/ush/setup.py b/ush/setup.py index 0aae872b68..703bc094fd 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -740,7 +740,6 @@ def _remove_tag(tasks, tag): # If there are no vx fields specified, remove those tasks that are necessary # for all observation types. - vx_config = expt_config["verification"] vx_fields = vx_config["VX_FIELDS"] if not vx_fields: metatask = "metatask_check_post_output_all_mems" From eb06d428e9590c693f988739804e8cae0e90d622 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 22 Oct 2024 09:12:25 -0600 Subject: [PATCH 132/208] Additional mods for Mike K.'s PR review. --- scripts/exregional_get_verif_obs.sh | 2 +- ush/get_obs.py | 16 +++++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index a07deecc25..d457a6b5d8 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -68,7 +68,7 @@ python3 -u ${USHdir}/get_obs.py \ --obs_day ${PDY}" print_info_msg " CALLING: ${cmd}" -${cmd} || print_err_msg_exit "Error calling ${script_bn}.py." +${cmd} || print_err_msg_exit "Error calling get_obs.py" # #----------------------------------------------------------------------- # diff --git a/ush/get_obs.py b/ush/get_obs.py index 666c6f1298..f88ab9a27e 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -53,12 +53,10 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): """ valid_obtypes = ['CCPA', 'NOHRSC', 'MRMS', 'NDAS'] - obtype_upper = obtype.upper() - if obtype_upper not in valid_obtypes: + if obtype not in valid_obtypes: msg = dedent(f""" - The specified observation type (after converting to upper case) is not - supported: - {obtype_upper = } + The specified observation type is not supported: + {obtype = } Valid observation types are: {valid_obtypes} """) @@ -91,19 +89,19 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): # through 6 of the day in the archive labeled with hour 6 while an # instantaneous obs type may put the obs files for hours 0 through 5 of # the day in the archive labeled with hour 6. - if obtype_upper in ['CCPA']: + if obtype in ['CCPA']: if hod == 0: arcv_hr = 24 else: arcv_hr = ceil(hod/arcv_intvl_hrs)*arcv_intvl_hrs - elif obtype_upper in ['NOHRSC']: + elif obtype in ['NOHRSC']: if hod == 0: arcv_hr = 24 else: arcv_hr = floor(hod/arcv_intvl_hrs)*arcv_intvl_hrs - elif obtype_upper in ['MRMS']: + elif obtype in ['MRMS']: arcv_hr = (floor(hod/arcv_intvl_hrs))*arcv_intvl_hrs - elif obtype_upper in ['NDAS']: + elif obtype in ['NDAS']: arcv_hr = (floor(hod/arcv_intvl_hrs) + 1)*arcv_intvl_hrs return arcv_hr From 4f6bdda1b038b3a1078cf160adc666308e2fac4e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 22 Oct 2024 09:12:25 -0600 Subject: [PATCH 133/208] Additional mods for Mike K.'s PR review. --- scripts/exregional_get_verif_obs.sh | 2 +- ush/get_obs.py | 16 +++++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index a07deecc25..d457a6b5d8 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -68,7 +68,7 @@ python3 -u ${USHdir}/get_obs.py \ --obs_day ${PDY}" print_info_msg " CALLING: ${cmd}" -${cmd} || print_err_msg_exit "Error calling ${script_bn}.py." +${cmd} || print_err_msg_exit "Error calling get_obs.py" # #----------------------------------------------------------------------- # diff --git a/ush/get_obs.py b/ush/get_obs.py index 666c6f1298..f88ab9a27e 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -53,12 +53,10 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): """ valid_obtypes = ['CCPA', 'NOHRSC', 'MRMS', 'NDAS'] - obtype_upper = obtype.upper() - if obtype_upper not in valid_obtypes: + if obtype not in valid_obtypes: msg = dedent(f""" - The specified observation type (after converting to upper case) is not - supported: - {obtype_upper = } + The specified observation type is not supported: + {obtype = } Valid observation types are: {valid_obtypes} """) @@ -91,19 +89,19 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): # through 6 of the day in the archive labeled with hour 6 while an # instantaneous obs type may put the obs files for hours 0 through 5 of # the day in the archive labeled with hour 6. - if obtype_upper in ['CCPA']: + if obtype in ['CCPA']: if hod == 0: arcv_hr = 24 else: arcv_hr = ceil(hod/arcv_intvl_hrs)*arcv_intvl_hrs - elif obtype_upper in ['NOHRSC']: + elif obtype in ['NOHRSC']: if hod == 0: arcv_hr = 24 else: arcv_hr = floor(hod/arcv_intvl_hrs)*arcv_intvl_hrs - elif obtype_upper in ['MRMS']: + elif obtype in ['MRMS']: arcv_hr = (floor(hod/arcv_intvl_hrs))*arcv_intvl_hrs - elif obtype_upper in ['NDAS']: + elif obtype in ['NDAS']: arcv_hr = (floor(hod/arcv_intvl_hrs) + 1)*arcv_intvl_hrs return arcv_hr From 105d1d839a33a927ecd60f41eeb41b38346b6ca8 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 23 Oct 2024 14:37:09 -0600 Subject: [PATCH 134/208] Change name of App variable VX_FIELDS to VX_FIELD_GROUPS to more correctly represent its contents since it contains a list of field groups (not just fields) to verify; fix comments and output messages related to this issue; rename some local variables related to this issue. --- parm/wflow/verify_det.yaml | 4 +- parm/wflow/verify_ens.yaml | 8 +- ...g.custom_ESGgrid_Great_Lakes_snow_8km.yaml | 2 +- ...g.MET_ensemble_verification_winter_wx.yaml | 2 +- ...c.init_00z_fcstlen_36hr.winter_wx.SRW.yaml | 2 +- ush/config_defaults.yaml | 25 +-- ush/set_cycle_and_obs_timeinfo.py | 145 +++++++++--------- ush/setup.py | 93 +++++------ ush/valid_param_vals.yaml | 2 +- 9 files changed, 144 insertions(+), 139 deletions(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index a08fe69e3e..f416ce7974 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -94,7 +94,7 @@ metatask_GridStat_MRMS_all_mems: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' metatask_GridStat_MRMS_mem#mem#: var: - VAR: '{% for var in verification.VX_FIELDS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' task_run_MET_GridStat_vx_#VAR#_mem#mem#: <<: *default_task_verify_det command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX"' @@ -141,7 +141,7 @@ metatask_PointStat_NDAS_all_mems: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' metatask_PointStat_NDAS_mem#mem#: var: - VAR: '{% for var in verification.VX_FIELDS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' task_run_MET_PointStat_vx_#VAR#_mem#mem#: <<: *default_task_verify_det command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX"' diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index f92aef4c60..8aed2d02b3 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -92,7 +92,7 @@ metatask_GenEnsProd_EnsembleStat_NOHRSC: metatask_GenEnsProd_EnsembleStat_MRMS: var: - VAR: '{% for var in verification.VX_FIELDS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' task_run_MET_GenEnsProd_vx_#VAR#: &task_GenEnsProd_MRMS <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GENENSPROD_OR_ENSEMBLESTAT"' @@ -144,7 +144,7 @@ metatask_GenEnsProd_EnsembleStat_MRMS: metatask_GenEnsProd_EnsembleStat_NDAS: var: - VAR: '{% for var in verification.VX_FIELDS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' task_run_MET_GenEnsProd_vx_#VAR#: &task_GenEnsProd_NDAS <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GENENSPROD_OR_ENSEMBLESTAT"' @@ -248,7 +248,7 @@ metatask_GridStat_NOHRSC_ensmeanprob_all_accums: metatask_GridStat_MRMS_ensprob: var: - VAR: '{% for var in verification.VX_FIELDS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' task_run_MET_GridStat_vx_ensprob_#VAR#: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENSPROB"' @@ -275,7 +275,7 @@ metatask_PointStat_NDAS_ensmeanprob: statlc: mean prob metatask_PointStat_NDAS_ens#statlc#: var: - VAR: '{% for var in verification.VX_FIELDS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' task_run_MET_PointStat_vx_ens#statlc#_#VAR#: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENS#stat#"' diff --git a/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml b/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml index 0caffe5a46..a55cc5f91a 100644 --- a/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml +++ b/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml @@ -60,4 +60,4 @@ verification: NDAS_OBS_DIR: '{{ workflow.EXPTDIR }}/NDAS_obs' NOHRSC_OBS_DIR: '{{ workflow.EXPTDIR }}/NOHRSC_obs' VX_FCST_MODEL_NAME: Michigan_Ontario_snow_8km - VX_FIELDS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA", "ASNOW" ] + VX_FIELD_GROUPS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA", "ASNOW" ] diff --git a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml index 1845255f54..018b8abbc6 100644 --- a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml @@ -31,7 +31,7 @@ global: DO_ENSEMBLE: true NUM_ENS_MEMBERS: 10 verification: - VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] + VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] OBS_NOHRSC_ASNOW_FN_TEMPLATE: '{%- set data_intvl_hrs = "%02d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} {{- "{valid?fmt=%Y%m%d}/sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml index aa4b731e3a..11eaf7b63c 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml @@ -58,5 +58,5 @@ verification: REMOVE_RAW_OBS_NOHRSC: false # VX_FCST_MODEL_NAME: 'Michigan_Ontario_snow_8km' - VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] + VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/SRW' diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index b216ccdd72..220c5e6a2b 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2473,28 +2473,29 @@ verification: # This is used in forming the names of the verification output files as # well as in the contents of those files. # - # VX_FIELDS: - # The fields or groups of fields on which to run verification. Because - # accumulated snow (ASNOW) is often not of interest in non-winter cases - # and because observation files for ASNOW are not available on NOAA - # HPSS for retrospective cases before March 2020, by default ASNOW is - # not included VX_FIELDS, but it may be added to this list in order to - # include the verification tasks for ASNOW in the workflow. + # VX_FIELD_GROUPS: + # The groups of fields (some of which may consist of a single field) on + # which to run verification. Because accumulated snow (ASNOW) is often + # not of interest in non-winter cases and because observation files for + # ASNOW are not available on NOAA HPSS for retrospective cases before + # March 2020, by default ASNOW is not included VX_FIELD_GROUPS, but it + # may be added to this list in order to include the verification tasks + # for ASNOW in the workflow. # # VX_APCP_ACCUMS_HRS: # The 2-digit accumulation periods (in units of hours) to consider for - # APCP (accumulated precipitation). If VX_FIELDS contains "APCP", then - # VX_APCP_ACCUMS_HRS must contain at least one element. If not, + # APCP (accumulated precipitation). If VX_FIELD_GROUPS contains "APCP", + # then VX_APCP_ACCUMS_HRS must contain at least one element. If not, # VX_APCP_ACCUMS_HRS will be ignored. # # VX_ASNOW_ACCUMS_HRS: # The 2-digit accumulation periods (in units of hours) to consider for - # ASNOW (accumulated snowfall). If VX_FIELDS contains "ASNOW", then - # VX_ASNOW_ACCUMS_HRS must contain at least one element. If not, + # ASNOW (accumulated snowfall). If VX_FIELD_GROUPS contains "ASNOW", + # then VX_ASNOW_ACCUMS_HRS must contain at least one element. If not, # VX_ASNOW_ACCUMS_HRS will be ignored. # VX_FCST_MODEL_NAME: '{{ nco.NET_default }}.{{ task_run_post.POST_OUTPUT_DOMAIN_NAME }}' - VX_FIELDS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] + VX_FIELD_GROUPS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] VX_ASNOW_ACCUMS_HRS: [ 6, 24 ] # diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 9029731a94..2130ad99ea 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -73,13 +73,13 @@ def check_temporal_consistency_cumul_fields( the verification configuration dictionary that satisfies these constraints. The constraints are on the accumulation intervals associated with the - cumulative forecast fields (and corresponding observation type pairs) that + cumulative field groups (and the corresponding observation types) that are to be verified. The constraints on each such accumulation interval are as follows: - 1) The accumulation interval is less than or equal to the forecast length - (since otherwise, the forecast field cannot be accumulated over that - interval). + 1) The accumulation interval is less than or equal to the forecast length. + This ensures that the forecast(s) can accumulate the field(s) in the + field group over that interval. 2) The obs availability interval evenly divides the accumulation interval. This ensures that the obs can be added together to obtain accumulated @@ -90,11 +90,11 @@ def check_temporal_consistency_cumul_fields( 3) The forecast output interval evenly divides the accumulation interval. This ensures that the forecast output can be added together to obtain - accumulated values of the forecast field, e.g. if the forecast output - interval is 3 hours, the resulting 3-hourly APCP outputs from the forecast - can be added to obtain 6-hourly forecast APCP. Note that this also ensures - that the accumulation interval is greater than or equal to the forecast - output interval. + accumulated values of the fields in the field group. For example, if + the forecast output interval is 3 hours, the resulting 3-hourly APCP + outputs from the forecast can be added to obtain 6-hourly forecast APCP. + Note that this also ensures that the accumulation interval is greater + than or equal to the forecast output interval. 4) The hour-of-day at which the accumulated forecast values will be available are a subset of the ones at which the accumulated obs @@ -129,10 +129,11 @@ def check_temporal_consistency_cumul_fields( which various field/accumlation combinations are output and at which the corresponding obs type is also available. """ - # Set dictionary containing all cumulative fields (i.e. whether or not - # they are to be verified). The keys are the observation types and the - # values are the field names in the forecasts. - vx_cumul_fields_all = {"CCPA": "APCP", "NOHRSC": "ASNOW"} + + # Set dictionary containing all field groups that consist of cumulative + # fields (i.e. whether or not those field groups are to be verified). + # The keys are the observation types and the field groups. + obtype_to_fg_dict_cumul = {"CCPA": "APCP", "NOHRSC": "ASNOW"} # Convert from datetime.timedelta objects to integers. one_hour = timedelta(hours=1) @@ -143,15 +144,15 @@ def check_temporal_consistency_cumul_fields( # dictionary. fcst_obs_matched_times_all_cycles_cumul = dict() - for obtype, field_fcst in vx_cumul_fields_all.items(): + for obtype, fg in obtype_to_fg_dict_cumul.items(): # If the current cumulative field is not in the list of fields to be # verified, just skip to the next field. - if field_fcst not in vx_config["VX_FIELDS"]: + if fg not in vx_config["VX_FIELD_GROUPS"]: continue # Initialize a sub-dictionary in one of the dictionaries to be returned. - fcst_obs_matched_times_all_cycles_cumul.update({field_fcst: {}}) + fcst_obs_matched_times_all_cycles_cumul.update({fg: {}}) # # Get the availability interval of the current observation type from the @@ -186,7 +187,7 @@ def check_temporal_consistency_cumul_fields( # is not satisfied, remove that accumulation from the list of accumulations # for the current field. # - accum_intvls_array_name = "".join(["VX_", field_fcst, "_ACCUMS_HRS"]) + accum_intvls_array_name = "".join(["VX_", fg, "_ACCUMS_HRS"]) accum_intvls_hrs = vx_config[accum_intvls_array_name] # # Loop through the accumulation intervals and check the temporal constraints @@ -196,23 +197,23 @@ def check_temporal_consistency_cumul_fields( accum_hh = f"{accum_hrs:02d}" # Initialize a sub-sub-dictionary in one of the dictionaries to be returned. - fcst_obs_matched_times_all_cycles_cumul[field_fcst][accum_hh] = [] + fcst_obs_matched_times_all_cycles_cumul[fg][accum_hh] = [] # # Make sure that the accumulation interval is less than or equal to the # forecast length. # if accum_hrs > fcst_len_hrs: msg = dedent(f""" - The accumulation interval (accum_hrs) for the current cumulative forecast - field (field_fcst) and corresponding observation type (obtype) is greater - than the forecast length (fcst_len_hrs): - {field_fcst = } + The accumulation interval (accum_hrs) for the current cumulative field + group (fg) and corresponding observation type (obtype) is greater than + the forecast length (fcst_len_hrs): + {fg = } {obtype = } {accum_hrs = } {fcst_len_hrs = } - Thus, this forecast field cannot be accumulated over this interval. Will - remove this accumulation interval from the list of accumulation intervals - to verify for this field/obtype. + Thus, the forecast(s) cannot accumulate the field(s) in this field group + over this interval. Will remove this accumulation interval from the list + of accumulation intervals to verify for this field group/obtype. """) logging.info(msg) accum_intvls_hrs.remove(accum_hrs) @@ -224,18 +225,17 @@ def check_temporal_consistency_cumul_fields( rem_obs = accum_hrs % obs_avail_intvl_hrs if rem_obs != 0: msg = dedent(f""" - The accumulation interval (accum_hrs) for the current cumulative forecast - field (field_fcst) and corresponding observation type (obtype) is not - evenly divisible by the observation type's availability interval - (obs_avail_intvl_hrs): - {field_fcst = } + The accumulation interval (accum_hrs) for the current cumulative field + group (fg) and corresponding observation type (obtype) is not evenly + divisible by the observation type's availability interval (obs_avail_intvl_hrs): + {fg = } {obtype = } {accum_hrs = } {obs_avail_intvl_hrs = } accum_hrs % obs_avail_intvl_hrs = {rem_obs} Thus, this observation type cannot be accumulated over this interval. Will remove this accumulation interval from the list of accumulation - intervals to verify for this field/obtype. + intervals to verify for this field group/obtype. """) logging.info(msg) accum_intvls_hrs.remove(accum_hrs) @@ -247,17 +247,17 @@ def check_temporal_consistency_cumul_fields( rem_fcst = accum_hrs % fcst_output_intvl_hrs if rem_fcst != 0: msg = dedent(f""" - The accumulation interval (accum_hrs) for the current cumulative forecast - field (field_fcst) and corresponding observation type (obtype) is not - evenly divisible by the forecast output interval (fcst_output_intvl): - {field_fcst = } + The accumulation interval (accum_hrs) for the current cumulative field + group (fg) and corresponding observation type (obtype) is not evenly + divisible by the forecast output interval (fcst_output_intvl): + {fg = } {obtype = } {accum_hrs = } {fcst_output_intvl_hrs = } accum_hrs % fcst_output_intvl_hrs = {rem_fcst} - Thus, this forecast field cannot be accumulated over this interval. Will - remove this accumulation interval from the list of accumulation intervals - to verify for this field/obtype. + Thus, the forecast(s) cannot accumulate the field(s) in this field group + over this interval. Will remove this accumulation interval from the list + of accumulation intervals to verify for this field group/obtype. """) logging.info(msg) accum_intvls_hrs.remove(accum_hrs) @@ -298,26 +298,26 @@ def check_temporal_consistency_cumul_fields( # interval from the list of intervals to verify. if not set(fcst_output_hrs_of_day_str) <= set(obs_avail_hrs_of_day_str): msg = dedent(f""" - The accumulation interval (accum_hrs) for the current cumulative forecast - field (field_fcst) is such that the forecast will output the field on at - least one of hour-of-day on which the corresponding observation type is - not available: - {field_fcst = } + The accumulation interval (accum_hrs) for the current cumulative field + group (fg) is such that the forecast will output the field(s) in the + field group at at least one hour-of-day at which the corresponding + observation type is not available: + {fg = } {obtype = } {accum_hrs = } - The forecast output hours-of-day for this field/accumulation interval + The forecast output hours-of-day for this field group/accumulation interval combination are: {fcst_output_hrs_of_day_str = } The hours-of-day at which the obs are available are: {obs_avail_hrs_of_day_str = } Thus, at least some of the forecast output cannot be verified. Will remove - this accumulation interval from the list of accumulation intervals to - verify for this field/obtype. + this accumulation interval from the list of accumulation intervals to verify + for this field group/obtype. """) logging.info(msg) accum_intvls_hrs.remove(accum_hrs) else: - fcst_obs_matched_times_all_cycles_cumul[field_fcst][accum_hh] = fcst_output_times_all_cycles_str + fcst_obs_matched_times_all_cycles_cumul[fg][accum_hh] = fcst_output_times_all_cycles_str # # Update the value in the experiment configuration dictionary of the list # of accumulation intervals to verify for this cumulative field (since @@ -330,15 +330,15 @@ def check_temporal_consistency_cumul_fields( # verification configuration dictionary. # if not accum_intvls_hrs: - vx_config["VX_FIELDS"].remove(field_fcst) + vx_config["VX_FIELD_GROUPS"].remove(fg) msg = dedent(f""" The list of accumulation intervals (accum_intvls_hrs) for the current - cumulative field to verify (field_fcst) is empty: - {field_fcst = } + cumulative field group to verify (fg) is empty: + {fg = } {accum_intvls_hrs = } Removing this field from the list of fields to verify. The updated list is: - {vx_config["VX_FIELDS"]} + {vx_config["VX_FIELD_GROUPS"]} """) logging.info(msg) @@ -621,36 +621,35 @@ def get_obs_retrieve_times_by_day( = [datetime.strptime(obs_days_all_cycles[time_type][i], "%Y%m%d") for i in range(len(obs_days_all_cycles[time_type]))] - # Get list of forecast fields to be verified. - vx_fields = vx_config['VX_FIELDS'] + # Get list of field groups to be verified. + vx_field_groups = vx_config['VX_FIELD_GROUPS'] - # Define dictionary containing information about all fields that may - # possibly be verified. This information includes their temporal + # Define dictionary containing information about all field groups that + # can possibly be verified. This information includes their temporal # characteristics (cumulative vs. instantaneous) and the mapping between - # the observation type and the forecast field. - vx_field_info = {'cumul': [{'obtype': 'CCPA', 'fcst_fields': ['APCP']}, - {'obtype': 'NOHRSC', 'fcst_fields': ['ASNOW']}], - 'inst': [{'obtype': 'MRMS', 'fcst_fields': ['REFC', 'RETOP']}, - {'obtype': 'NDAS', 'fcst_fields': ['ADPSFC', 'ADPUPA']}] + # the observation type and the field group. + vx_field_info = {'cumul': [{'obtype': 'CCPA', 'field_groups': ['APCP']}, + {'obtype': 'NOHRSC', 'field_groups': ['ASNOW']}], + 'inst': [{'obtype': 'MRMS', 'field_groups': ['REFC', 'RETOP']}, + {'obtype': 'NDAS', 'field_groups': ['ADPSFC', 'ADPUPA']}] } - # Keep only those items in the dictionary vx_field_info defined above - # that have forecast fields that appear in the list of forecast fields to - # be verified. - for obs_time_type, obtypes_to_fcst_fields_dict_list in vx_field_info.copy().items(): - for obtypes_to_fcst_fields_dict in obtypes_to_fcst_fields_dict_list.copy(): - obtype = obtypes_to_fcst_fields_dict['obtype'] - fcst_fields = obtypes_to_fcst_fields_dict['fcst_fields'] - fcst_fields = [field for field in fcst_fields if field in vx_fields] - obtypes_to_fcst_fields_dict['fcst_fields'] = fcst_fields - if not fcst_fields: obtypes_to_fcst_fields_dict_list.remove(obtypes_to_fcst_fields_dict) - if not obtypes_to_fcst_fields_dict_list: vx_field_info.pop(obs_time_type) + # Keep only those items in the dictionary vx_field_info defined above that + # have field groups that appear in the list of field groups to verify. + for obs_time_type, obtypes_to_field_groups_dict_list in vx_field_info.copy().items(): + for obtypes_to_field_groups_dict in obtypes_to_field_groups_dict_list.copy(): + obtype = obtypes_to_field_groups_dict['obtype'] + field_groups = obtypes_to_field_groups_dict['field_groups'] + field_groups = [fg for fg in field_groups if fg in vx_field_groups] + obtypes_to_field_groups_dict['field_groups'] = field_groups + if not field_groups: obtypes_to_field_groups_dict_list.remove(obtypes_to_field_groups_dict) + if not obtypes_to_field_groups_dict_list: vx_field_info.pop(obs_time_type) # Create dictionary containing the temporal characteristics as keys and # a string list of obs types to verify as the values. obs_time_type_to_obtypes_dict = dict() - for obs_time_type, obtypes_to_fcst_fields_dict_list in vx_field_info.items(): - obtype_list = [the_dict['obtype'] for the_dict in obtypes_to_fcst_fields_dict_list] + for obs_time_type, obtypes_to_field_groups_dict_list in vx_field_info.items(): + obtype_list = [a_dict['obtype'] for a_dict in obtypes_to_field_groups_dict_list] obs_time_type_to_obtypes_dict[obs_time_type] = obtype_list # Initialize the return variable. diff --git a/ush/setup.py b/ush/setup.py index 703bc094fd..8a1b8e21c5 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -706,57 +706,62 @@ def _remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # - vx_fields_all = {} - vx_metatasks_all = {} - - vx_fields_all["CCPA"] = ["APCP"] - vx_metatasks_all["CCPA"] = ["task_get_obs_ccpa", - "metatask_PcpCombine_obs_APCP_all_accums_CCPA", - "metatask_PcpCombine_fcst_APCP_all_accums_all_mems", - "metatask_GridStat_CCPA_all_accums_all_mems", - "metatask_GenEnsProd_EnsembleStat_CCPA", - "metatask_GridStat_CCPA_ensmeanprob_all_accums"] - - vx_fields_all["NOHRSC"] = ["ASNOW"] - vx_metatasks_all["NOHRSC"] = ["task_get_obs_nohrsc", - "metatask_PcpCombine_obs_ASNOW_all_accums_NOHRSC", - "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", - "metatask_GridStat_NOHRSC_all_accums_all_mems", - "metatask_GenEnsProd_EnsembleStat_NOHRSC", - "metatask_GridStat_NOHRSC_ensmeanprob_all_accums"] - - vx_fields_all["MRMS"] = ["REFC", "RETOP"] - vx_metatasks_all["MRMS"] = ["task_get_obs_mrms", - "metatask_GridStat_MRMS_all_mems", - "metatask_GenEnsProd_EnsembleStat_MRMS", - "metatask_GridStat_MRMS_ensprob"] - - vx_fields_all["NDAS"] = ["ADPSFC", "ADPUPA"] - vx_metatasks_all["NDAS"] = ["task_get_obs_ndas", - "task_run_MET_Pb2nc_obs_NDAS", - "metatask_PointStat_NDAS_all_mems", - "metatask_GenEnsProd_EnsembleStat_NDAS", - "metatask_PointStat_NDAS_ensmeanprob"] - - # If there are no vx fields specified, remove those tasks that are necessary - # for all observation types. - vx_fields = vx_config["VX_FIELDS"] - if not vx_fields: + vx_field_groups_all_by_obtype = {} + vx_metatasks_all_by_obtype = {} + + vx_field_groups_all_by_obtype["CCPA"] = ["APCP"] + vx_metatasks_all_by_obtype["CCPA"] \ + = ["task_get_obs_ccpa", + "metatask_PcpCombine_obs_APCP_all_accums_CCPA", + "metatask_PcpCombine_fcst_APCP_all_accums_all_mems", + "metatask_GridStat_CCPA_all_accums_all_mems", + "metatask_GenEnsProd_EnsembleStat_CCPA", + "metatask_GridStat_CCPA_ensmeanprob_all_accums"] + + vx_field_groups_all_by_obtype["NOHRSC"] = ["ASNOW"] + vx_metatasks_all_by_obtype["NOHRSC"] \ + = ["task_get_obs_nohrsc", + "metatask_PcpCombine_obs_ASNOW_all_accums_NOHRSC", + "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", + "metatask_GridStat_NOHRSC_all_accums_all_mems", + "metatask_GenEnsProd_EnsembleStat_NOHRSC", + "metatask_GridStat_NOHRSC_ensmeanprob_all_accums"] + + vx_field_groups_all_by_obtype["MRMS"] = ["REFC", "RETOP"] + vx_metatasks_all_by_obtype["MRMS"] \ + = ["task_get_obs_mrms", + "metatask_GridStat_MRMS_all_mems", + "metatask_GenEnsProd_EnsembleStat_MRMS", + "metatask_GridStat_MRMS_ensprob"] + + vx_field_groups_all_by_obtype["NDAS"] = ["ADPSFC", "ADPUPA"] + vx_metatasks_all_by_obtype["NDAS"] \ + = ["task_get_obs_ndas", + "task_run_MET_Pb2nc_obs_NDAS", + "metatask_PointStat_NDAS_all_mems", + "metatask_GenEnsProd_EnsembleStat_NDAS", + "metatask_PointStat_NDAS_ensmeanprob"] + + # If there are no field groups specified for verification, remove those + # tasks that are common to all observation types. + vx_field_groups = vx_config["VX_FIELD_GROUPS"] + if not vx_field_groups: metatask = "metatask_check_post_output_all_mems" rocoto_config['tasks'].pop(metatask) - # If for a given obstype no fields are specified, remove all vx metatasks - # for that obstype. - for obstype in vx_fields_all: - vx_fields_by_obstype = [field for field in vx_fields if field in vx_fields_all[obstype]] - if not vx_fields_by_obstype: - for metatask in vx_metatasks_all[obstype]: + # If for a given obs type none of its field groups are specified for + # verification, remove all vx metatasks for that obs type. + for obtype in vx_field_groups_all_by_obtype: + #vx_field_groups_crnt_obtype = [field for field in vx_fields if field in vx_fields_all[obtype]] + vx_field_groups_crnt_obtype = list(set(vx_field_groups) & set(vx_field_groups_all_by_obtype[obtype])) + if not vx_field_groups_crnt_obtype: + for metatask in vx_metatasks_all_by_obtype[obtype]: if metatask in rocoto_config['tasks']: logging.info(dedent( f""" - Removing verification [meta]task + Removing verification (meta)task "{metatask}" - from workflow since no fields belonging to observation type "{obstype}" + from workflow since no fields belonging to observation type "{obtype}" are specified for verification.""" )) rocoto_config['tasks'].pop(metatask) diff --git a/ush/valid_param_vals.yaml b/ush/valid_param_vals.yaml index 18bc4d453f..017404aa2e 100644 --- a/ush/valid_param_vals.yaml +++ b/ush/valid_param_vals.yaml @@ -76,6 +76,6 @@ valid_vals_DO_AQM_CHEM_LBCS: [True, False] valid_vals_DO_AQM_GEFS_LBCS: [True, False] valid_vals_DO_AQM_SAVE_AIRNOW_HIST: [True, False] valid_vals_COLDSTART: [True, False] -valid_vals_VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] +valid_vals_VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] valid_vals_VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] valid_vals_VX_ASNOW_ACCUMS_HRS: [ 6, 12, 18, 24 ] From 3817033cbdcd83484b101cdc97c446430d3a2202 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 24 Oct 2024 06:25:31 -0600 Subject: [PATCH 135/208] Rename and reformat the App variables containing METplus templates for obs files so that they are lists of strings (rather than just string scalars) that mimic python dictionaries, with pairs of elements specifying the field group(s) and corresponding file name template, respectively; fix comments accordingly; rename some local variables for clarity. --- ...onal_run_met_genensprod_or_ensemblestat.sh | 4 +- ...gional_run_met_gridstat_or_pointstat_vx.sh | 4 +- ...un_met_gridstat_or_pointstat_vx_ensmean.sh | 4 +- ...un_met_gridstat_or_pointstat_vx_ensprob.sh | 4 +- scripts/exregional_run_met_pb2nc_obs.sh | 4 +- scripts/exregional_run_met_pcpcombine.sh | 2 +- ...g.MET_ensemble_verification_winter_wx.yaml | 5 +- ush/config_defaults.yaml | 185 ++++++++++++------ ush/get_obs.py | 109 +++++------ 9 files changed, 180 insertions(+), 141 deletions(-) diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 475417ee53..40cb510f3e 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -146,12 +146,12 @@ if [ "${grid_or_point}" = "grid" ]; then ;; "REFC") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_REFC_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[1]}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" ;; "RETOP") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_RETOP_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[3]}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" ;; esac diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index a6130ba50d..3cb3658588 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -185,13 +185,13 @@ if [ "${grid_or_point}" = "grid" ]; then ;; "REFC") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_REFC_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[1]}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" FCST_INPUT_FN_TEMPLATE="${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE}" ;; "RETOP") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_RETOP_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[3]}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" FCST_INPUT_FN_TEMPLATE="${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE}" ;; diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index 75332e4929..4d6ae4fedb 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -132,11 +132,11 @@ if [ "${grid_or_point}" = "grid" ]; then ;; "REFC") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_REFC_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[1]}" ;; "RETOP") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_RETOP_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[3]}" ;; esac FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/metprd/GenEnsProd" diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 382bd71ac8..32a987e96c 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -132,11 +132,11 @@ if [ "${grid_or_point}" = "grid" ]; then ;; "REFC") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_REFC_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[1]}" ;; "RETOP") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_RETOP_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[3]}" ;; esac diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index e93387ed0a..a7b4e691a8 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -136,7 +136,7 @@ set_vx_params \ vx_output_basedir=$( eval echo "${VX_OUTPUT_BASEDIR}" ) OBS_INPUT_DIR="${OBS_DIR}" -OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE} ) +OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_FN_TEMPLATES[1]} ) OUTPUT_BASE="${vx_output_basedir}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" @@ -163,7 +163,7 @@ for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do eval_METplus_timestr_tmpl \ init_time="${yyyymmdd_task}00" \ fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_FN_TEMPLATES[1]}" \ outvarname_evaluated_timestr="fp" if [[ -f "${fp}" ]]; then diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 590ceb43ef..ee06ef8df7 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -195,7 +195,7 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then elif [ "${FCST_OR_OBS}" = "OBS" ]; then OBS_INPUT_DIR="${OBS_DIR}" - fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE}) + fn_template=$(eval echo \${OBS_${OBTYPE}_FN_TEMPLATES[1]}) OBS_INPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_obs_or_null}" diff --git a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml index 018b8abbc6..fc6c9f56af 100644 --- a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml @@ -32,6 +32,7 @@ global: NUM_ENS_MEMBERS: 10 verification: VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] - OBS_NOHRSC_ASNOW_FN_TEMPLATE: '{%- set data_intvl_hrs = "%02d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} - {{- "{valid?fmt=%Y%m%d}/sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' + OBS_NOHRSC_FN_TEMPLATES: [ 'ASNOW', + '{%- set data_intvl_hrs = "%02d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "{valid?fmt=%Y%m%d}/sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' ] diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 220c5e6a2b..a4867261dd 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2372,48 +2372,108 @@ verification: MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" # - # OBS_[CCPA_APCP|NOHRSC_ASNOW|MRMS_[REFC|RETOP]|NDAS_ADPSFCandADPUPA]_FN_TEMPLATE: - # File name templates for various obs type and vx field group combinations. + # OBS_[CCPA|NOHRSC|MRMS|NDAS]_FN_TEMPLATES: + # File name templates for various obs types. These are meant to be used + # in METplus configuration files and thus contain METplus time formatting + # strings. Each of these variables is a python list containing pairs of + # values. The first element of each pair specifies the verification field + # group(s) for which the file name template will be needed, and the second + # element is the file name template itself, which may include a leading + # relative directory. (Here, by "verification field group" we mean a + # group of fields that is verified together in the workflow.) For example, + # for the CCPA obs type, the variable name is OBS_CCPA_FN_TEMPLATES, and + # its default value contains only one pair of values given by + # + # [ 'APCP', + # '{%- set obs_avail_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} + # {{- "{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z." ~ obs_avail_intvl_hrs ~ "h.hrap.conus.gb2" }}' ] + # + # Thus, if CCPA_OBS_AVAIL_INTVL_HRS is set to 1 above (i.e. the CCPA obs + # are available every 1 hour), then for a valid time of 2024042903, the + # obs file name (including a relative path) to look for and, if necessary, + # create, will be + # + # 20240429/ccpa.t03z.01h.hrap.conus.gb2 + # + # This file will be used in the verification of fields under the APCP + # field group (which consist of accumulated precipitation for various + # accumulation intervals). # # Notes: # - # * These are relative to the obs base directories + # * The file name templates are relative to the obs base directories given + # in the variables + # # [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR - # defined above. Thus, the full template to the obs files is given, e.g. - # for CCPA obs, by {CCPA_OBS_DIR}/{OBS_CCPA_APCP_FN_TEMPLATE}. - # - # * These may represent file names only, or they may include relative paths - # before the file names. - # - # * These templates must contain full information about the year, month, - # day, and hour by including METplus time strings that serve as templates - # for this information. Some of this information may be in the relative - # directory portion and the rest in the file name, or there may be no - # relative directory portion and all of it may be in the file name, but - # all four pieces of timing information must be present somewhere in - # this template as METplus time strings. Otherwise, obs files created - # by the get_obs tasks for different days might overwrite each other. - # - # * If one or more of the obs files specified by this full path do not - # exist on disk, all the files will be created by first retrieving "raw" - # versions of them from a data store (e.g. NOAA's HPSS) and then placing - # these raw files in the locations specified by this full path template. + # + # defined above. Thus, the template for the full path to the obs files + # is given, e.g. for CCPA obs, by + # + # {CCPA_OBS_DIR}/{OBS_CCPA_FN_TEMPLATES[1]}, + # + # where the [1] indicates the second element of the list OBS_CCPA_FN_TEMPLATES. + # + # * The file name templates may represent file names only, or they may + # include leading relative directories. + # + # * The default values of these variables for the CCPA, NOHRSC, and NDAS + # obs types contain only one pair of values (because these obs types + # contain only one set of file that we use in the verification) while + # the default value for the MRMS obs type contains two pairs of values, + # one for obs files that contain composite reflectivity data and another + # for the ones that contain echo top data (simply because the MRMS obs + # do not group these two fields together in one set of file as do, for + # example, the NDAS obs). + # + # * Each template must contain full information about the year, month, + # day, and hour by including METplus time formatting strings that serve + # as templates for this information. Some of this information (e.g. + # the year, month, and day) may be in the relative directory portion + # of the template and the rest (e.g. the hour) in the file name, or + # there may be no relative directory portion and all of this information + # may be in the file name, but all four pieces of timing information + # must be present somewhere in each template as METplus time formatting + # strings. If not, obs files created by the get_obs tasks for different + # days might overwrite each other. + # + # * The workflow creates a get_obs task for each obs type that is needed + # in the verification and for each day on which that obs type is needed + # at at least some hours. That get_obs task first checks whether all + # the necessary obs files for that day already exist at the locations + # specified by the full path template(s) (which is formed by combining + # the base directory and the file name template). If for a given day + # one or more of these obs files do not exist on disk, the get_obs task + # will retrieve "raw" versions of these files from a data store (e.g. + # NOAA's HPSS) and will place them in a temporary "raw" directory. It + # will then move or copy these raw files to the locations specified by + # the full path template(s). # # * The raw obs files, i.e. the obs files as they are named and arranged - # in the data stores, may be different than the file path/name specified - # in these variables. The list of templates for raw files to search - # for in the data stores is given in the data retrieval configuration - # file at parm/data_locations.yml. Once retrieved, these raw files are - # renamed and relocated on disk to the locations specified by - # {..._OBS_DIR}/{..._FN_TEMPLATE}. - # - OBS_CCPA_APCP_FN_TEMPLATE: '{%- set data_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} - {{- "{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z." ~ data_intvl_hrs ~ "h.hrap.conus.gb2" }}' - OBS_NOHRSC_ASNOW_FN_TEMPLATE: '{%- set data_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} - {{- "sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' - OBS_MRMS_REFC_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' - OBS_MRMS_RETOP_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' - OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE: 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' + # in the data stores and retrieved to the raw directories, may be + # arranged differently and/or have names that are different from the + # ones specified in the file name templates. If so, they are renamed + # while being moved or copied from the raw directories to the locations + # specified by the full path template(s). (The lists of templates for + # searching for and retrieving files from the data stores is different + # than the METplus templates described here; the former are given in + # the data retrieval configuration file at parm/data_locations.yml.) + # + # * When the ex-scripts for the various vx tasks are converted from bash + # to python scripts, these variables should be converted from python + # lists to python dictionaries, where the first element of each pair + # becomes the key and the second becomes the value. This currently + # cannot be done due to limitations in the workflow on converting + # python dictionaries to bash variables. + # + OBS_CCPA_FN_TEMPLATES: [ 'APCP', + '{%- set obs_avail_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} + {{- "{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z." ~ obs_avail_intvl_hrs ~ "h.hrap.conus.gb2" }}' ] + OBS_NOHRSC_FN_TEMPLATES: [ 'ASNOW', + '{%- set obs_avail_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "sfav2_CONUS_" ~ obs_avail_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' ] + OBS_MRMS_FN_TEMPLATES: [ 'REFC', '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2', + 'RETOP', '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' ] + OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] # # Time interval (in hours) at which various types of obs are available on # NOAA's HPSS. @@ -2429,13 +2489,13 @@ verification: NDAS_OBS_AVAIL_INTVL_HRS: 1 # # REMOVE_RAW_OBS_DIRS_[CCPA|NOHRSC|MRMS|NDAS]: - # Boolean flag specifying whether to remove the "raw" observation - # directories after pulling the specified type of obs (CCPA, NOHRSC, - # MRMS, or NOHRSC). The raw directories are the ones in which the - # observation files are placed immediately after pulling them from - # a data store (e.g. NOAA's HPSS) but before performing any processing - # on them (e.g. renaming the files or reorganizing their directory - # structure). + # Flag specifying whether to remove the "raw" observation directories + # after retrieving the specified type of obs (CCPA, NOHRSC, MRMS, or + # NOHRSC) from a data store (e.g. NOAA's HPSS). The raw directories + # are the ones in which the observation files are placed immediately + # after pulling them from the data store but before performing any + # processing on them such as renaming the files and/or reorganizing + # their directory structure. # REMOVE_RAW_OBS_CCPA: true REMOVE_RAW_OBS_NOHRSC: true @@ -2443,30 +2503,29 @@ verification: REMOVE_RAW_OBS_NDAS: true # # OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: - # Template used to specify the names of the output NetCDF observation - # files generated by the worfklow verification tasks that call the METplus - # PcpCombine tool on CCPA observations. (These files will contain obs - # APCP, both for 1 hour and for > 1 hour accumulation periods, in NetCDF - # format.) + # METplus file name template used to specify the names of the NetCDF + # files generated by the worfklow verification tasks that call METplus's + # PcpCombine tool on CCPA observations. These files will contain observed + # accumulated precip in NetCDF format for various accumulation intervals. # # OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: - # Template used to specify the names of the output NetCDF observation - # files generated by the worfklow verification tasks that call the METplus - # PcpCombine tool on NOHRSC observations. (These files will contain obs - # APCP, both for 1 hour and for > 1 hour accumulation periods, in NetCDF - # format.) + # METplus file name template used to specify the names of the NetCDF + # files generated by the worfklow verification tasks that call METplus's + # PcpCombine tool on NOHRSC observations. These files will contain + # observed observed accumulated snow for various accumulaton intervals. # # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: - # Template used to specify the names of the output NetCDF observation - # files generated by the worfklow verification tasks that call the - # METplus Pb2nc tool on NDAS observations. (These files will contain - # obs ADPSFC or ADPUPA fields in NetCDF format.) - # - OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set data_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} - {{- "ccpa.t{valid?fmt=%H}z." ~ data_intvl_hrs ~ "h.hrap.conus.gb2_a${ACCUM_HH}h.nc" }}' - OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set data_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} - {{- "sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2_a${ACCUM_HH}h.nc" }}' - OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE}.nc' + # METplus file name template used to specify the names of the NetCDF + # files generated by the worfklow verification tasks that call METplus's + # Pb2nc tool on NDAS observations. These files will contain the observed + # ADPSFC or ADPUPA fields in NetCDF format (instead of NDAS's native + # prepbufr format). + # + OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set obs_avail_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} + {{- "ccpa.t{valid?fmt=%H}z." ~ obs_avail_intvl_hrs ~ "h.hrap.conus.gb2_a${ACCUM_HH}h.nc" }}' + OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set obs_avail_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "sfav2_CONUS_" ~ obs_avail_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2_a${ACCUM_HH}h.nc" }}' + OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_FN_TEMPLATES[1]}.nc' # # VX_FCST_MODEL_NAME: # String that specifies a descriptive name for the model being verified. diff --git a/ush/get_obs.py b/ush/get_obs.py index f88ab9a27e..4079b15cea 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -322,53 +322,30 @@ def get_obs(config, obtype, yyyymmdd_task): # Get the base directory for the observations. obs_dir = vx_config[f'{obtype}_OBS_DIR'] - # For each observation type, set the group of fields contained in those - # observation files that we need for verification. Each group of fields - # is one that is verified together in the workflow. We assume there is - # a separate set of obs files for each such field group in the observations, - # and in the code below we loop over these sets of files as necessary. - # There are several scenarios to consider: + # Get from the verification configuration dictionary the list of METplus + # file name template(s) corresponding to the obs type. + obs_fn_templates = vx_config[f'OBS_{obtype}_FN_TEMPLATES'] + + # Note that the list obs_fn_templates consists of pairs of elements such + # that the first element of the pair represents the verification field + # group(s) for which an obs file name template will be needed and the + # second element is the template itself. For convenience, convert this + # information to a dictionary in which the field groups are the keys and + # the templates are the values. # - # * An obs type consists of only one set of files containing only one - # field. - # This is the case for CCPA and NOHRSC obs. CCPA obs consist only one - # set of files that contain APCP data, and NOHRSC obs consist of only - # one set of files that contain ASNOW data. - # - # * An obs type consists of more than one set of files, with each file - # containing a different field. - # This is the case for MRMS obs. These consist of two sets of files. - # The first set contains REFC data, and the second contains RETOP data. - # - # * An obs type consists of only one set of files, but each file contains - # multiple groups of fields needed for verification. - # This is the case for NDAS obs. These consist of a single set of files, - # but each file contains both the ADPSFC fields (like 2-m temperature) - # and ADPUPA fields (like 500-mb temperature) that are verified separately - # in the workflow tasks and thus are considered separate field groups. - # - # Other obs type and field group scenarios are also possible, but we do - # not describe them since they are not applicable to any of the obs types - # considered here. - if obtype == 'CCPA': - field_groups_in_obs = ['APCP'] - elif obtype == 'NOHRSC': - field_groups_in_obs = ['ASNOW'] - elif obtype == 'MRMS': - field_groups_in_obs = ['REFC', 'RETOP'] - elif obtype == 'NDAS': - field_groups_in_obs = ['ADPSFCandADPUPA'] - num_field_groups = len(field_groups_in_obs) - - # For each field group in the observations, get the METplus file name - # template for the observation files. Then combine these with the base - # directory to get the METplus template for the full path on disk to - # the processed obs files. If obs files do not already exist at these - # locations, they will be retrieved from HPSS and placed at these locations. - fp_proc_templates = [] - for fg in field_groups_in_obs: - fn_proc_template = vx_config[f'OBS_{obtype}_{fg}_FN_TEMPLATE'] - fp_proc_templates.append(os.path.join(obs_dir, fn_proc_template)) + # Note: + # Once the ex-scripts for the vx tasks are converted from bash to python, + # the lists in the SRW App's configuration file containing the METplus + # obs file name template(s) (from which the variable obs_fn_templates + # was obtained above) can be converted to python dictionaries. Then the + # list-to-dictionary conversion step here will no longer be needed. + obs_fn_templates_by_fg = dict() + for i in range(0, len(obs_fn_templates), 2): + obs_fn_templates_by_fg[obs_fn_templates[i]] = obs_fn_templates[i+1] + + # For convenience, get the list of verification field groups for which + # the various obs file templates will be used. + field_groups_in_obs = obs_fn_templates_by_fg.keys() # #----------------------------------------------------------------------- # @@ -388,27 +365,25 @@ def get_obs(config, obtype, yyyymmdd_task): # For MRMS obs, set field-dependent parameters needed in forming grib2 # file names. - fields_in_filenames = [] - levels_in_filenames = [] + mrms_fields_in_obs_filenames = [] + mrms_levels_in_obs_filenames = [] if obtype == 'MRMS': - valid_mrms_field_groups = ['REFC', 'RETOP'] for fg in field_groups_in_obs: - if fg not in valid_mrms_field_groups: + if fg == 'REFC': + mrms_fields_in_obs_filenames.append('MergedReflectivityQCComposite') + mrms_levels_in_obs_filenames.append('00.50') + elif fg == 'RETOP': + mrms_fields_in_obs_filenames.append('EchoTop') + mrms_levels_in_obs_filenames.append('18_00.50') + else: msg = dedent(f""" - Invalid field group specified for obs type: + Field and level names have not been specified for this {obtype} field + group: {obtype = } {fg = } - Valid field group are: - {valid_mrms_field_groups} """) logging.error(msg) raise ValueError(msg) - if fg == 'REFC': - fields_in_filenames.append('MergedReflectivityQCComposite') - levels_in_filenames.append('00.50') - elif fg == 'RETOP': - fields_in_filenames.append('EchoTop') - levels_in_filenames.append('18_00.50') # CCPA files for 1-hour accumulation have incorrect metadata in the files # under the "00" directory from 20180718 to 20210504. Set these starting @@ -477,7 +452,8 @@ def get_obs(config, obtype, yyyymmdd_task): # files, i.e. the files as they are named and arranged within the archive # (tar) files on HPSS. all_fp_proc_dict = {} - for fg, fp_proc_templ in zip(field_groups_in_obs, fp_proc_templates): + for fg, fn_proc_tmpl in obs_fn_templates_by_fg.items(): + fp_proc_tmpl = os.path.join(obs_dir, fn_proc_tmpl) all_fp_proc_dict[fg] = [] for yyyymmddhh in obs_retrieve_times_crnt_day: # Set the lead hour, i.e. the number of hours from the beginning of the @@ -489,7 +465,7 @@ def get_obs(config, obtype, yyyymmdd_task): cmd = '; '.join(['export USHdir=' + ushdir, 'export yyyymmdd_task=' + yyyymmdd_task_str, 'export lhr=' + str(lhr), - 'export METplus_timestr_tmpl=' + fp_proc_templ, + 'export METplus_timestr_tmpl=' + fp_proc_tmpl, os.path.join(ushdir, 'run_eval_METplus_timestr_tmpl.sh')]) result = subprocess.run(cmd, shell=True, capture_output=True, text=True) fp_proc = result.stdout.strip() @@ -527,7 +503,10 @@ def get_obs(config, obtype, yyyymmdd_task): # If the number of obs files that already exist on disk is equal to the # number of obs files needed, then there is no need to retrieve any files. - num_files_needed = len(obs_retrieve_times_crnt_day)*num_field_groups + # The number of obs files needed (i.e. that need to be staged) is equal + # to the number of times in the current day that obs are needed times the + # number of sets of files that the current obs type contains. + num_files_needed = len(obs_retrieve_times_crnt_day)*len(obs_fn_templates_by_fg) if num_existing_files == num_files_needed: msg = dedent(f""" @@ -545,7 +524,7 @@ def get_obs(config, obtype, yyyymmdd_task): # from which files will be retrieved. arcv_hrs = [hr for hr in range(arcv_hr_start, arcv_hr_end+arcv_intvl_hrs, arcv_intvl_hrs)] msg = dedent(f""" - At least some obs files needed needed for the current day (yyyymmdd_task) + At least some obs files needed for the current day (yyyymmdd_task) do not exist on disk: {yyyymmdd_task = } The number of obs files needed for the current day is: @@ -817,7 +796,7 @@ def get_obs(config, obtype, yyyymmdd_task): '--valid_time', yyyymmddhh_str, \ '--source', basedir_raw, \ '--outdir', os.path.join(basedir_raw, 'topofhour'), \ - '--product', fields_in_filenames[i], \ + '--product', mrms_fields_in_obs_filenames[i], \ '--no-add_vdate_subdir']) result = subprocess.run(cmd, shell=True, capture_output=True, text=True) rc = result.returncode @@ -836,7 +815,7 @@ def get_obs(config, obtype, yyyymmdd_task): fn_raw = 'sfav2_CONUS_' + accum_obs_formatted + 'h_' + yyyymmddhh_str + '_grid184.grb2' elif obtype == 'MRMS': hr = yyyymmddhh.hour - fn_raw = fields_in_filenames[i] + '_' + levels_in_filenames[i] \ + fn_raw = mrms_fields_in_obs_filenames[i] + '_' + mrms_levels_in_obs_filenames[i] \ + '_' + yyyymmdd_task_str + '-' + f'{hr:02d}' + '0000.grib2' fn_raw = os.path.join('topofhour', fn_raw) elif obtype == 'NDAS': From c1bfb8f1cf2ee710d590b11fe0e07f29eded1c70 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 24 Oct 2024 08:11:53 -0600 Subject: [PATCH 136/208] Fix comments. --- ush/config_defaults.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index a4867261dd..6a2e31a94e 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2558,12 +2558,12 @@ verification: VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] VX_ASNOW_ACCUMS_HRS: [ 6, 24 ] # - # Set the forecast output interval to use for verification purposes. - # If the forecasts to be verified are being run in the SRW (i.e. they - # are not staged from another forecast model), then this should be set - # set to the SRW's forecast output interval, but such a variable is - # currently not available in this configuration file. Instead, for - # now we set it to a default value of 1 hour. + # VX_FCST_OUTPUT_INTVL_HRS: + # The forecast output interval to use for verification purposes. The + # default value is currently 1 hour, but if/when a variable is created + # in this configuration file that specifies the forecast output interval + # for native SRW forecasts, then the default value of VX_FCST_OUTPUT_INTVL_HRS + # should be set to that. # VX_FCST_OUTPUT_INTVL_HRS: 1 # From 95a372c084a8ac7734634212b5ee75150b831919 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 24 Oct 2024 12:33:00 -0600 Subject: [PATCH 137/208] Add a check to make sure obs base directories are distinct for the various types of obs. --- ush/setup.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/ush/setup.py b/ush/setup.py index 8a1b8e21c5..a4bcab3b32 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -768,6 +768,36 @@ def _remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # + # If there are at least some field groups to verify, then make sure that + # the base directories in which retrieved obs files will be placed are + # distinct for the different obs types. + # + # ----------------------------------------------------------------------- + # + if vx_field_groups: + obtypes_all = ['CCPA', 'NOHRSC', 'MRMS', 'NDAS'] + obs_basedir_var_names = [f'{obtype}_OBS_DIR' for obtype in obtypes_all] + obs_basedirs_dict = {key: vx_config[key] for key in obs_basedir_var_names} + obs_basedirs_orig = list(obs_basedirs_dict.values()) + obs_basedirs_uniq = list(set(obs_basedirs_orig)) + if len(obs_basedirs_orig) != len(obs_basedirs_uniq): + msg1 = dedent(f""" + The base directories for the obs files must be distinct, but at least two + are identical:""") + msg2 = '' + for obs_basedir_var_name, obs_dir in obs_basedirs_dict.items(): + msg2 = msg2 + dedent(f""" + {obs_basedir_var_name} = {obs_dir}""") + msg3 = dedent(f""" + Modify these in the SRW App's user configuration file to make them distinct + and rerun. + """) + msg = msg1 + ' '.join(msg2.splitlines(True)) + msg3 + logging.error(msg) + raise ValueError(msg) + # + # ----------------------------------------------------------------------- + # # The "cycled_from_second" cycledef in the default workflow configuration # file (default_workflow.yaml) requires the starting date of the second # cycle. That is difficult to calculate in the yaml file itself because From e47cfe60aabe33b255cec636ba421084371b0f87 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sat, 26 Oct 2024 18:46:33 -0600 Subject: [PATCH 138/208] (1) Reorganize verification section in config_defaults.yaml so that parameters are grouped together in a way that is more natural; (2) update and improve description of variables in the verification section of config_defaults.yaml; (3) update documentation in ConfigWorkflow.rst to reflect the updates in config_defaults.yaml. --- .../CustomizingTheWorkflow/ConfigWorkflow.rst | 404 ++++++++++++++---- ush/config_defaults.yaml | 382 +++++++++-------- 2 files changed, 526 insertions(+), 260 deletions(-) diff --git a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst index 14fccdd5e5..df9a0dfa22 100644 --- a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -293,6 +293,9 @@ Directory Parameters ``EXPTDIR``: (Default: ``'{{ [workflow.EXPT_BASEDIR, workflow.EXPT_SUBDIR]|path_join }}'``) The full path to the experiment directory. By default, this value will point to ``"${EXPT_BASEDIR}/${EXPT_SUBDIR}"``, but the user can define it differently in the configuration file if desired. +``WFLOW_FLAG_FILES_DIR``: (Default: ``'{{ [workflow.EXPTDIR, "wflow_flag_files"]|path_join }}'``) + Directory in which flag files marking completion of various workflow tasks can be placed. + Pre-Processing File Separator Parameters -------------------------------------------- @@ -1582,26 +1585,21 @@ Pressure Tendency Diagnostic ``PRINT_DIFF_PGR``: (Default: false) Option to turn on/off the pressure tendency diagnostic. -Verification Parameters -========================== +Verification (VX) Parameters +================================= Non-default parameters for verification tasks are set in the ``verification:`` section of the ``config.yaml`` file. -General Verification Parameters ---------------------------------- - -``METPLUS_VERBOSITY_LEVEL``: (Default: ``2``) - Logging verbosity level used by METplus verification tools. Valid values: 0 to 5, with 0 quiet and 5 loud. - -METplus Parameters ----------------------- - -:ref:`METplus ` is a scientific verification framework that spans a wide range of temporal and spatial scales. Many of the METplus parameters are described below, but additional documentation for the METplus components is available on the `METplus website `__. +.. note:: + The verification tasks in the SRW App are based on the :ref:`METplus ` + verification software developed at the Developmental Testbed Center (:ref:`DTC`). + :ref:`METplus ` is a scientific verification framework that spans a wide range of temporal and spatial scales. + Full documentation for METplus is available on the `METplus website `__. .. _METParamNote: .. note:: - Where a date field is required: + Where a date field is required: * ``YYYY`` refers to the 4-digit valid year * ``MM`` refers to the 2-digit valid month * ``DD`` refers to the 2-digit valid day of the month @@ -1609,111 +1607,345 @@ METplus Parameters * ``mm`` refers to the 2-digit valid minutes of the hour * ``SS`` refers to the two-digit valid seconds of the hour -``CCPA_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/ccpa/proc"``) - User-specified location of the directory where :term:`CCPA` hourly precipitation files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure, as well as important caveats about errors in the metadata and file names. - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. -``NOHRSC_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/nohrsc/proc"``) - User-specified location of top-level directory where NOHRSC 6- and 24-hour snowfall accumulation files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file scripts/exregional_get_verif_obs.sh for more details about files and directory structure - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. +General VX Parameters +--------------------------------- - .. note:: - Due to limited availability of NOHRSC observation data on NOAA :term:`HPSS` and the likelihood that snowfall accumulation verification will not be desired outside of winter cases, this verification option is currently not present in the workflow by default. In order to use it, the verification environment variable ``VX_FIELDS`` should be updated to include ``ASNOW``. This will allow the related workflow tasks to be run. +``VX_FIELD_GROUPS``: (Default: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA" ]) + The groups of fields (some of which may consist of only a single field) on which + to run verification. -``MRMS_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/mrms/proc"``) - User-specified location of the directory where :term:`MRMS` composite reflectivity and echo top files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in the ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure. - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. + Since accumulated snowfall (``ASNOW``) is often not of interest in non-winter + cases and because observation files for ``ASNOW`` are not available on NOAA + HPSS for retrospective cases before March 2020, by default ``ASNOW`` is not + included ``VX_FIELD_GROUPS``, but it may be added to this list in order to + include the verification tasks for ``ASNOW`` in the workflow. Valid values: + ``"APCP"`` | ``"ASNOW"`` | ``"REFC"`` | ``"RETOP"`` | ``"ADPSFC"`` | ``"ADPUPA"`` -``NDAS_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/ndas/proc"``) - User-specified location of top-level directory where :term:`NDAS` prepbufr files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure. - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. +``VX_APCP_ACCUMS_HRS``: (Default: [ 1, 3, 6, 24 ]) + The accumulation intervals (in hours) to include in the verification of + accumulated precipitation (APCP). If ``VX_FIELD_GROUPS`` contains ``"APCP"``, + then ``VX_APCP_ACCUMS_HRS`` must contain at least one element. Otherwise, + ``VX_APCP_ACCUMS_HRS`` will be ignored. Valid values: ``1`` | ``3`` | ``6`` | ``24`` -Templates for Observation Files ---------------------------------- +``VX_ASNOW_ACCUMS_HRS``: (Default: [ 6, 24 ]) + The accumulation intervals (in hours) to include in the verification of + accumulated snowfall (ASNOW). If ``VX_FIELD_GROUPS`` contains ``"ASNOW"``, + then ``VX_ASNOW_ACCUMS_HRS`` must contain at least one element. Otherwise, + ``VX_ASNOW_ACCUMS_HRS`` will be ignored. Valid values: ``6`` | ``12`` | ``18`` | ``24`` + +``VX_CONFIG_[DET|ENS]_FN``: (Default: ``vx_config_[det|ens].yaml``) + Names of configuration files for deterministic and ensemble verification + that specify the field groups, field names, levels, and (if applicable) + thresholds for which to run verification. These are relative to the + directory ``METPLUS_CONF`` in which the METplus config templates are + located. They may include leading relative paths before the file + names, e.g. ``some_dir/another_dir/vx_config_det.yaml``. -This section includes template variables for :term:`CCPA`, :term:`MRMS`, :term:`NOHRSC`, and :term:`NDAS` observation files. +``VX_OUTPUT_BASEDIR``: (Default: ``'{% if user.RUN_ENVIR == "nco" %}$COMOUT/metout{% else %}{{ workflow.EXPTDIR }}{% endif %}'``) + Template for base (i.e. top-level) directory in which METplus will place + its output. -``OBS_CCPA_APCP_FN_TEMPLATE``: (Default: ``'{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2'``) - File name template for CCPA accumulated precipitation (APCP) observations. This template is used by the workflow tasks that call the METplus *PcpCombine* tool on CCPA obs to find the input observation files containing 1-hour APCP and then generate NetCDF files containing either 1-hour or greater than 1-hour APCP. -``OBS_NOHRSC_ASNOW_FN_TEMPLATE``: (Default: ``'{valid?fmt=%Y%m%d}/sfav2_CONUS_${ACCUM_HH}h_{valid?fmt=%Y%m%d%H}_grid184.grb2'``) - File name template for NOHRSC snow observations. +METplus-Specific Parameters +----------------------------------- -``OBS_MRMS_REFC_FN_TEMPLATE``: (Default: ``'{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2'``) - File name template for :term:`MRMS` reflectivity observations. +``METPLUS_VERBOSITY_LEVEL``: (Default: ``2``) + Logging verbosity level used by METplus verification tools. Valid values: 0 to 5, with 0 quiet and 5 loudest. -``OBS_MRMS_RETOP_FN_TEMPLATE``: (Default: ``'{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2'``) - File name template for MRMS echo top observations. -``OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE``: (Default: ``'prepbufr.ndas.{valid?fmt=%Y%m%d%H}'``) - File name template for :term:`NDAS` surface and upper air observations. This template is used by the workflow tasks that call the METplus *Pb2nc* tool on NDAS obs to find the input observation files containing ADP surface (ADPSFC) or ADP upper air (ADPUPA) fields and then generate NetCDF versions of these files. +VX Parameters for Observations +------------------------------------- -``OBS_NDAS_SFCorUPA_FN_METPROC_TEMPLATE``: (Default: ``'${OBS_NDAS_SFCorUPA_FN_TEMPLATE}.nc'``) - File name template for NDAS surface and upper air observations after processing by MET's *pb2nc* tool (to change format to NetCDF). +.. note:: + The observation types that the SRW App can currently retrieve (if necessary) + and use in verification are: + * CCPA (Climatology-Calibrated Precipitation Analysis) + * NOHRSC (National Operational Hydrologic Remote Sensing Center) + * MRMS (Multi-Radar Multi-Sensor) + * NDAS (NAM Data Assimilation System) + The script ``ush/get_obs.py`` contains further details on the files and + directory structure of each obs type. -``OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT``: (Default: ``'${OBS_CCPA_APCP_FN_TEMPLATE}_a${ACCUM_HH}h.nc'``) - Template used to specify the names of the output NetCDF observation files generated by the workflow verification tasks that call the METplus *PcpCombine* tool on CCPA observations. (These files will contain observations of accumulated precipitation [APCP], both for 1 hour and for > 1 hour accumulation periods, in NetCDF format.) +``[CCPA|NOHRSC|MRMS|NDAS]_OBS_AVAIL_INTVL_HRS``: (Defaults: [1|6|1|1]) + Time interval (in hours) at which the various types of obs are available + on NOAA's HPSS. + + Note that MRMS files are in fact available every few minutes, but here + we set the obs availability interval to 1 hour because currently that + is the shortest output interval for forecasts, i.e. the forecasts cannot + (yet) support sub-hourly output. + +``[CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/[ccpa|nohrsc|mrms|ndas]"``) + Base directory in which CCPA, NOHRSC, MRMS, or NDAS obs files needed by + the verification tasks are located. If the files do not exist, they + will be retrieved and placed under this directory. Note that: + + * If the obs files need to be retrieved (e.g. from NOAA's HPSS), because + they are not already staged on disk, then the user must have write + permission to this directory. Otherwise, the ``get_obs`` workflow + tasks that attempt to create these files will fail. + + * CCPA obs contain errors in the metadata for a certain range of dates + that need to be corrected during obs retrieval. This is described + in more detail in the script ``ush/get_obs.py``. + +``OBS_[CCPA|NOHRSC|MRMS|NDAS]_FN_TEMPLATES``: + **Defaults:** + + ``OBS_CCPA_FN_TEMPLATES``: + .. code-block:: console + + [ 'APCP', + '{%- set obs_avail_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} + {{- "{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z." ~ obs_avail_intvl_hrs ~ "h.hrap.conus.gb2" }}' ] + + ``OBS_NOHRSC_FN_TEMPLATES``: + .. code-block:: console + + [ 'ASNOW', + '{%- set obs_avail_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "sfav2_CONUS_" ~ obs_avail_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' ] + + ``OBS_MRMS_FN_TEMPLATES``: + .. code-block:: console + + [ 'REFC', '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2', + 'RETOP', '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' ] + + ``OBS_NDAS_FN_TEMPLATES``: + .. code-block:: console + + [ 'ADPSFCandADPUPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] + + File name templates for various obs types. These are meant to be used + in METplus configuration files and thus contain METplus time formatting + strings. Each of these variables is a python list containing pairs of + values. The first element of each pair specifies the verification field + group(s) for which the file name template will be needed, and the second + element is the file name template itself, which may include a leading + relative directory. (Here, by "verification field group", we mean a + group of fields that is verified together in the workflow; see the + description of the variable ``VX_FIELD_GROUPS``.) For example, for CCPA + obs, the variable name is ``OBS_CCPA_FN_TEMPLATES``. From the default value + of this variable given above, we see that if ``CCPA_OBS_AVAIL_INTVL_HRS`` + is set to 1 (i.e. the CCPA obs are assumed to be available every hour) + and the valid time is 2024042903, then the obs file (including a relative + path) to look for and, if necessary, create is + + ``20240429/ccpa.t03z.01h.hrap.conus.gb2`` + + This file will be used in the verification of fields under the APCP + field group (which consist of accumulated precipitation for the + accumulation intervals specified in ``VX_APCP_ACCUMS_HRS``). + + Note that: + + * The file name templates are relative to the obs base directories given in + the variables + + ``[CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR`` + + defined above. Thus, the template for the full path to the obs files + is given, e.g. for CCPA obs, by + + .. code-block:: console + + CCPA_OBS_DIR/OBS_CCPA_FN_TEMPLATES[1] + + where the ``[1]`` indicates the second element of the list ``OBS_CCPA_FN_TEMPLATES``. + + * The file name templates may represent file names only, or they may + include leading relative directories. + + * The default values of these variables for the CCPA, NOHRSC, and NDAS + obs types contain only one pair of values (because these obs types + contain only one set of files that we use in the verification) while + the default value for the MRMS obs type contains two pairs of values, + one for the set of files that contains composite reflectivity data + and another for the set that contains echo top data. This is simply + because the MRMS obs type does not group all its fields together into + one set of files as does, for example, the NDAS obs type. + + * Each file name template must contain full information about the year, + month, day, and hour by including METplus time formatting strings for + this information. Some of this information (e.g. the year, month, + and day) may be in the relative directory portion of the template and + the rest (e.g. the hour) in the file name, or there may be no relative + directory portion and all of this information may be in the file name, + but all four pieces of timing information must be present somewhere in + each template as METplus time formatting strings. If not, obs files + created by the ``get_obs`` tasks for different days might overwrite each + other. + + * The workflow generation scripts create a ``get_obs`` task for each obs + type that is needed in the verification and for each day on which that + obs type is needed at at least some hours. That ``get_obs`` task first + checks whether all the necessary obs files for that day already exist + at the locations specified by the full path template(s) (which are + obtained by combining the base directories [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR + with the file name template(s)). If for a given day one or more of + these obs files do not exist on disk, the ``get_obs`` task will retrieve + "raw" versions of these files from a data store (e.g. NOAA's HPSS) + and will place them in a temporary "raw" directory. It will then + move or copy these raw files to the locations specified by the full + path template(s). + + * The raw obs files, i.e. the obs files as they are named and arranged + in the data stores and retrieved and placed in the raw directories, + may be arranged differently and/or have names that are different from + the ones specified in the file name templates. If so, they are renamed + while being moved or copied from the raw directories to the locations + specified by the full path template(s). (The lists of templates for + searching for and retrieving files from the data stores is different + than the METplus templates described here; the former are given in + the data retrieval configuration file at ``parm/data_locations.yml``.) + + * When the ex-scripts for the various vx tasks are converted from bash + to python scripts, these variables should be converted from python + lists to python dictionaries, where the first element of each pair + becomes the key and the second becomes the value. This currently + cannot be done due to limitations in the workflow on converting + python dictionaries to bash variables. + +``REMOVE_RAW_OBS_DIRS_[CCPA|NOHRSC|MRMS|NDAS]``: (Defaults: [True|True|True|True]) + Flag specifying whether to remove the "raw" observation directories + after retrieving the specified type of obs (CCPA, NOHRSC, MRMS, or + NOHRSC) from a data store (e.g. NOAA's HPSS). The raw directories + are the ones in which the observation files are placed immediately + after pulling them from the data store but before performing any + processing on them such as renaming the files and/or reorganizing + their directory structure. + +``OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT``: + **Default:** -``OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT``: (Default: ``'${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}.nc'``) - Template used to specify the names of the output NetCDF observation files generated by the workflow verification tasks that call the METplus Pb2nc tool on NDAS observations. (These files will contain obs ADPSFC or ADPUPA fields in NetCDF format.) + .. code-block:: console + {%- set obs_avail_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} + {{- "ccpa.t{valid?fmt=%H}z." ~ obs_avail_intvl_hrs ~ "h.hrap.conus.gb2_a${ACCUM_HH}h.nc" }} + METplus template for the names of the NetCDF files generated by the + worfklow verification tasks that call METplus's PcpCombine tool on + CCPA observations. These files will contain observed accumulated + precipitation in NetCDF format for various accumulation intervals. -VX Forecast Model Name ------------------------- +``OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT``: + **Default:** -``VX_FCST_MODEL_NAME``: (Default: ``'{{ nco.NET_default }}.{{ task_run_post.POST_OUTPUT_DOMAIN_NAME }}'``) - String that specifies a descriptive name for the model being verified. This is used in forming the names of the verification output files as well as in the contents of those files. + .. code-block:: console -``VX_FIELDS``: (Default: [ "APCP", "REFC", "RETOP", "SFC", "UPA" ]) - The fields or groups of fields for which verification tasks will run. Because ``ASNOW`` is often not of interest in cases outside of winter, and because observation files are not located for retrospective cases on NOAA HPSS before March 2020, ``ASNOW`` is not included by default. ``"ASNOW"`` may be added to this list in order to include the related verification tasks in the workflow. Valid values: ``"APCP"`` | ``"REFC"`` | ``"RETOP"`` | ``"SFC"`` | ``"UPA"`` | ``"ASNOW"`` - -``VX_APCP_ACCUMS_HRS``: (Default: [ 1, 3, 6, 24 ]) - The accumulation periods (in hours) to consider for accumulated precipitation (APCP). If ``VX_FIELDS`` contains ``"APCP"``, then ``VX_APCP_ACCUMS_HRS`` must contain at least one element. If ``VX_FIELDS`` does not contain ``"APCP"``, ``VX_APCP_ACCUMS_HRS`` will be ignored. Valid values: ``1`` | ``3`` | ``6`` | ``24`` + {%- set obs_avail_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "sfav2_CONUS_" ~ obs_avail_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2_a${ACCUM_HH}h.nc" }} -``VX_ASNOW_ACCUMS_HRS``: (Default: [ 6, 24 ]) - The accumulation periods (in hours) to consider for ``ASNOW`` (accumulated snowfall). If ``VX_FIELDS`` contains ``"ASNOW"``, then ``VX_ASNOW_ACCUMS_HRS`` must contain at least one element. If ``VX_FIELDS`` does not contain ``"ASNOW"``, ``VX_ASNOW_ACCUMS_HRS`` will be ignored. Valid values: ``6`` | ``24`` + METplus template for the names of the NetCDF files generated by the + worfklow verification tasks that call METplus's PcpCombine tool on + NOHRSC observations. These files will contain observed accumulated + snowfall for various accumulaton intervals. -Verification (VX) Directories ------------------------------- +``OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT``: (Default: ``'${OBS_NDAS_FN_TEMPLATES[1]}.nc'``) + METplus template for the names of the NetCDF files generated by the + worfklow verification tasks that call METplus's Pb2nc tool on NDAS + observations. These files will contain the observed ADPSFC or ADPUPA + fields in NetCDF format (instead of NDAS's native prepbufr format). -``VX_FCST_INPUT_BASEDIR``: (Default: ``'{% if user.RUN_ENVIR == "nco" %}$COMOUT/../..{% else %}{{ workflow.EXPTDIR }}{% endif %}'``) - Template for top-level directory containing forecast (but not obs) files that will be used as input into METplus for verification. +``NUM_MISSING_OBS_FILES_MAX``: (Default: 2) + For verification tasks that need observational data, this specifies + the maximum number of observation files that may be missing. If more + than this number are missing, the verification task will error out. + This is a crude way of checking that there are enough obs to conduct + verification (crude because this number should probably depend on the + field being verified, the time interval between observations, the + length of the forecast, etc; an alternative may be to specify the + maximum allowed fraction of obs files that can be missing). -``VX_OUTPUT_BASEDIR``: (Default: ``'{% if user.RUN_ENVIR == "nco" %}$COMOUT/metout{% else %}{{ workflow.EXPTDIR }}{% endif %}'``) - Template for top-level directory in which METplus will place its output. -``VX_NDIGITS_ENSMEM_NAMES``: 3 - Number of digits in the ensemble member names. This is a configurable variable to allow users to change its value (e.g., to go from "mem004" to "mem04") when using staged forecast files that do not use the same number of digits as the SRW App. +VX Parameters for Forecasts +---------------------------------- -Verification (VX) File Name and Path Templates ------------------------------------------------- +``VX_FCST_MODEL_NAME``: (Default: ``'{{ nco.NET_default }}.{{ task_run_post.POST_OUTPUT_DOMAIN_NAME }}'``) + String that specifies a descriptive name for the model being verified. + This is used in forming the names of the verification output files and + is also included in the contents of those files. + +``VX_FCST_OUTPUT_INTVL_HRS``: (Default: 1) + The forecast output interval (in hours) to assume for verification + purposes. + + .. note:: + If/when a variable is created in this configuration file that specifies + the forecast output interval for native SRW forecasts, it should be + used as the default value of this variable. + +``VX_FCST_INPUT_BASEDIR``: (Default: ``'{% if user.RUN_ENVIR == "nco" %}$COMOUT/../..{% else %}{{ workflow.EXPTDIR }}{% endif %}'``) + METplus template for the name of the base (i.e. top-level) directory + containing the forecast files to use as inputs to the verification + tasks. -This section contains file name and path templates used in the verification (VX) tasks. +``FCST_SUBDIR_TEMPLATE``: + **Default:** + + .. code-block:: console + + {%- if user.RUN_ENVIR == "nco" %} + {{- "${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}" }} + {%- else %} + {{- "{init?fmt=%Y%m%d%H?shift=-${time_lag}}" }} + {%- if global.DO_ENSEMBLE %} + {{- "/${ensmem_name}" }} + {%- endif %} + {{- "/postprd" }} + {%- endif %} + + METplus template for the name of the subdirectory containing forecast + files to use as inputs to the verification tasks. + +``FCST_FN_TEMPLATE``: + **Default:** -``FCST_SUBDIR_TEMPLATE``: (Default: ``'{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}/postprd{% endif %}'``) - A template for the subdirectory containing input forecast files for VX tasks. + .. code-block:: console + + {{- "${NET_default}.t{init?fmt=%H?shift=-${time_lag}}z" }} + {%- if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %} + {{- ".${ensmem_name}" }} + {%- endif %} + {{- ".prslev.f{lead?fmt=%HHH?shift=${time_lag}}.${POST_OUTPUT_DOMAIN_NAME}.grib2" }} -``FCST_FN_TEMPLATE``: (Default: ``'${NET_default}.t{init?fmt=%H?shift=-${time_lag}}z{% if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %}.${ensmem_name}{% endif %}.prslev.f{lead?fmt=%HHH?shift=${time_lag}}.${POST_OUTPUT_DOMAIN_NAME}.grib2'``) - A template for the forecast file names used as input to verification tasks. + METplus template for the names of the forecast files to use as inputs + to the verification tasks. -``FCST_FN_METPROC_TEMPLATE``: (Default: ``'${NET_default}.t{init?fmt=%H}z{% if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %}.${ensmem_name}{% endif %}.prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${VAR}_a${ACCUM_HH}h.nc'``) - A template for how to name the forecast files for accumulated precipitation (APCP) with greater than 1-hour accumulation (i.e., 3-, 6-, and 24-hour accumulations) after processing by ``PcpCombine``. +``FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT``: + **Default:** -``NUM_MISSING_OBS_FILES_MAX``: (Default: 2) - For verification tasks that need observational data, this specifies the maximum number of observation files that may be missing. If more than this number are missing, the verification task will error out. - Note that this is a crude way of checking that there are enough observations to conduct verification since this number should probably depend on the field being verified, the time interval between observations, the length of the forecast, etc. An alternative may be to specify the maximum allowed fraction of observation files that can be missing (i.e., the number missing divided by the number that are expected to exist). + .. code-block:: console + + {{- "${NET_default}.t{init?fmt=%H}z" }} + {%- if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %} + {{- ".${ensmem_name}" }} + {%- endif %} + {{- ".prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${VAR}_a${ACCUM_HH}h.nc" }} + + METplus template for the names of the NetCDF files generated by the + worfklow verification tasks that call METplus's PcpCombine tool on + forecast output. These files will contain forecast accumulated + precipitation in NetCDF format for various accumulation intervals. + +``VX_NDIGITS_ENSMEM_NAMES``: (Default: 3) + Number of digits to assume/use in the forecast ensemble member identifier + string used in directory and file names and other instances in which the + ensemble member needs to be identified. For example, if this is set to + 3, the identifier for ensemble member 4 will be "mem004", while if it's + set to 2, the identifier will be "mem04". This is useful when verifying + staged forecast files from a forecasting model/system other than the + SRW that uses a different number of digits in the ensemble member + identifier string. ``NUM_MISSING_FCST_FILES_MAX``: (Default: 0) - For verification tasks that need forecast data, this specifies the maximum number of post-processed forecast files that may be missing. If more than this number are missing, the verification task will not be run. + For verification tasks that need forecast data, this specifies the + maximum number of post-processed forecast files that may be missing. + If more than this number are missing, the verification task will exit + with an error. + Coupled AQM Configuration Parameters ===================================== diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 6a2e31a94e..3957c3c0db 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2339,33 +2339,110 @@ global: # verification (vx) parameters #----------------------------- verification: + # + # General Verification Parameters + # ------------------------------- + # + # VX_FIELD_GROUPS: + # The groups of fields (some of which may consist of only a single field) + # on which to run verification. + # + # Since accumulated snowfall (ASNOW) is often not of interest in non-winter + # cases and because observation files for ASNOW are not available on NOAA + # HPSS for retrospective cases before March 2020, by default ASNOW is not + # included VX_FIELD_GROUPS, but it may be added to this list in order to + # include the verification tasks for ASNOW in the workflow. + # + VX_FIELD_GROUPS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] + # + # VX_APCP_ACCUMS_HRS: + # The accumulation intervals (in hours) to include in the verification of + # accumulated precipitation (APCP). If VX_FIELD_GROUPS contains "APCP", + # then VX_APCP_ACCUMS_HRS must contain at least one element. Otherwise, + # VX_APCP_ACCUMS_HRS will be ignored. + # + VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] + # + # VX_ASNOW_ACCUMS_HRS: + # The accumulation intervals (in hours) to include in the verification of + # accumulated snowfall (ASNOW). If VX_FIELD_GROUPS contains "ASNOW", + # then VX_ASNOW_ACCUMS_HRS must contain at least one element. Otherwise, + # VX_ASNOW_ACCUMS_HRS will be ignored. + # + VX_ASNOW_ACCUMS_HRS: [ 6, 24 ] + # + # VX_CONFIG_[DET|ENS]_FN: + # Names of configuration files for deterministic and ensemble verification + # that specify the field groups, field names, levels, and (if applicable) + # thresholds for which to run verification. These are relative to the + # directory METPLUS_CONF in which the METplus config templates are + # located. They may include leading relative paths before the file + # names, e.g. ``some_dir/another_dir/vx_config_det.yaml``. + # + VX_CONFIG_DET_FN: 'vx_config_det.yaml' + VX_CONFIG_ENS_FN: 'vx_config_ens.yaml' + # + # VX_OUTPUT_BASEDIR: + # Template for base (i.e. top-level) directory in which METplus will place + # its output. + # + VX_OUTPUT_BASEDIR: '{% if user.RUN_ENVIR == "nco" %}$COMOUT/metout{% else %}{{ workflow.EXPTDIR }}{% endif %}' + # + # METplus-Specific Parameters + # ------------------------------- # # METPLUS_VERBOSITY_LEVEL: # Logging verbosity level used by METplus verification tools. 0 to 5, - # with 0 quiet and 5 loud. + # with 0 quiet and 5 loudest. # METPLUS_VERBOSITY_LEVEL: 2 # + # Observation-Specific Parameters + # ------------------------------- + # + # Note: + # The observation types that the SRW App can currently retrieve (if + # necessary) and use in verification are: + # + # * CCPA (Climatology-Calibrated Precipitation Analysis) + # * NOHRSC (National Operational Hydrologic Remote Sensing Center) + # * MRMS (Multi-Radar Multi-Sensor) + # * NDAS (NAM Data Assimilation System) + # + # The script ush/get_obs.py contains further details on the files and + # directory structure of each obs type. + # + + # + # [CCPA|NOHRSC|MRMS|NDAS]_OBS_AVAIL_INTVL_HRS: + # Time interval (in hours) at which various types of obs are available on + # NOAA's HPSS. + # + # Note that MRMS files are in fact available every few minutes, but here + # we set the obs availability interval to 1 hour because currently that + # is the shortest output interval for forecasts, i.e. the forecasts cannot + # (yet) support sub-hourly output. + # + CCPA_OBS_AVAIL_INTVL_HRS: 1 + NOHRSC_OBS_AVAIL_INTVL_HRS: 6 + MRMS_OBS_AVAIL_INTVL_HRS: 1 + NDAS_OBS_AVAIL_INTVL_HRS: 1 + # # [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR: # Base directory in which CCPA, NOHRSC, MRMS, or NDAS obs files needed by # the verification tasks are located. If the files do not exist, they # will be retrieved and placed under this directory. # - # Notes: - # - # * If the obs files need to be retrieved (e.g. from NOAA's HPSS), then - # the user must have write permission to this directory. Otherwise, - # the get_obs tasks that attempt to create these files will fail. - # - # * Do not set two or more of these directories to the same location. - # Otherwise, unexpected results and data loss may occur. + # Note that: # - # * The script ush/get_obs.py contains further details on the files and - # directory structure of each obs type. + # * If the obs files need to be retrieved (e.g. from NOAA's HPSS), because + # they are not already staged on disk, then the user must have write + # permission to this directory. Otherwise, the "get_obs" workflow tasks + # that attempt to create these files will fail. # # * CCPA obs contain errors in the metadata for a certain range of dates # that need to be corrected during obs retrieval. This is described - # in more detail in ush/get_obs.py. + # in more detail in the script ush/get_obs.py. # CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" @@ -2379,37 +2456,32 @@ verification: # values. The first element of each pair specifies the verification field # group(s) for which the file name template will be needed, and the second # element is the file name template itself, which may include a leading - # relative directory. (Here, by "verification field group" we mean a - # group of fields that is verified together in the workflow.) For example, - # for the CCPA obs type, the variable name is OBS_CCPA_FN_TEMPLATES, and - # its default value contains only one pair of values given by - # - # [ 'APCP', - # '{%- set obs_avail_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} - # {{- "{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z." ~ obs_avail_intvl_hrs ~ "h.hrap.conus.gb2" }}' ] - # - # Thus, if CCPA_OBS_AVAIL_INTVL_HRS is set to 1 above (i.e. the CCPA obs - # are available every 1 hour), then for a valid time of 2024042903, the - # obs file name (including a relative path) to look for and, if necessary, - # create, will be + # relative directory. (Here, by "verification field group", we mean a + # group of fields that is verified together in the workflow; see the + # description of the variable VX_FIELD_GROUPS.) For example, for CCPA + # obs, the variable name is OBS_CCPA_FN_TEMPLATES. From the default value + # of this variable given above, we see that if CCPA_OBS_AVAIL_INTVL_HRS + # is set to 1 (i.e. the CCPA obs are assumed to be available every hour) + # and the valid time is 2024042903, then the obs file (including a relative + # path) to look for and, if necessary, create is # # 20240429/ccpa.t03z.01h.hrap.conus.gb2 # # This file will be used in the verification of fields under the APCP - # field group (which consist of accumulated precipitation for various - # accumulation intervals). + # field group (which consist of accumulated precipitation for the + # accumulation intervals specified in VX_APCP_ACCUMS_HRS). # # Notes: # - # * The file name templates are relative to the obs base directories given - # in the variables + # * The file name templates are relative to the base directories given in + # the variables # # [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR # # defined above. Thus, the template for the full path to the obs files # is given, e.g. for CCPA obs, by # - # {CCPA_OBS_DIR}/{OBS_CCPA_FN_TEMPLATES[1]}, + # {CCPA_OBS_DIR}/{OBS_CCPA_FN_TEMPLATES[1]} # # where the [1] indicates the second element of the list OBS_CCPA_FN_TEMPLATES. # @@ -2418,40 +2490,41 @@ verification: # # * The default values of these variables for the CCPA, NOHRSC, and NDAS # obs types contain only one pair of values (because these obs types - # contain only one set of file that we use in the verification) while + # contain only one set of files that we use in the verification) while # the default value for the MRMS obs type contains two pairs of values, - # one for obs files that contain composite reflectivity data and another - # for the ones that contain echo top data (simply because the MRMS obs - # do not group these two fields together in one set of file as do, for - # example, the NDAS obs). - # - # * Each template must contain full information about the year, month, - # day, and hour by including METplus time formatting strings that serve - # as templates for this information. Some of this information (e.g. - # the year, month, and day) may be in the relative directory portion - # of the template and the rest (e.g. the hour) in the file name, or - # there may be no relative directory portion and all of this information - # may be in the file name, but all four pieces of timing information - # must be present somewhere in each template as METplus time formatting - # strings. If not, obs files created by the get_obs tasks for different - # days might overwrite each other. - # - # * The workflow creates a get_obs task for each obs type that is needed - # in the verification and for each day on which that obs type is needed - # at at least some hours. That get_obs task first checks whether all - # the necessary obs files for that day already exist at the locations - # specified by the full path template(s) (which is formed by combining - # the base directory and the file name template). If for a given day - # one or more of these obs files do not exist on disk, the get_obs task - # will retrieve "raw" versions of these files from a data store (e.g. - # NOAA's HPSS) and will place them in a temporary "raw" directory. It - # will then move or copy these raw files to the locations specified by - # the full path template(s). + # one for the set of files that contains composite reflectivity data + # and another for the set that contains echo top data. This is simply + # because the MRMS obs type does not group these two fields together + # one set of files as does, for example, the NDAS obs type. + # + # * Each file name template must contain full information about the year, + # month, day, and hour by including METplus time formatting strings for + # this information. Some of this information (e.g. the year, month, + # and day) may be in the relative directory portion of the template and + # the rest (e.g. the hour) in the file name, or there may be no relative + # directory portion and all of this information may be in the file name, + # but all four pieces of timing information must be present somewhere in + # each template as METplus time formatting strings. If not, obs files + # created by the "get_obs" tasks for different days might overwrite each + # other. + # + # * The workflow generation scripts create a "get_obs" task for each obs + # type that is needed in the verification and for each day on which that + # obs type is needed at at least some hours. That "get_obs" task first + # checks whether all the necessary obs files for that day already exist + # at the locations specified by the full path template(s) (which are + # obtained by combining the base directories [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR + # with the file name template(s)). If for a given day one or more of + # these obs files do not exist on disk, the "get_obs" task will retrieve + # "raw" versions of these files from a data store (e.g. NOAA's HPSS) + # and will place them in a temporary "raw" directory. It will then + # move or copy these raw files to the locations specified by the full + # path template(s). # # * The raw obs files, i.e. the obs files as they are named and arranged - # in the data stores and retrieved to the raw directories, may be - # arranged differently and/or have names that are different from the - # ones specified in the file name templates. If so, they are renamed + # in the data stores and retrieved and placed in the raw directories, + # may be arranged differently and/or have names that are different from + # the ones specified in the file name templates. If so, they are renamed # while being moved or copied from the raw directories to the locations # specified by the full path template(s). (The lists of templates for # searching for and retrieving files from the data stores is different @@ -2475,19 +2548,6 @@ verification: 'RETOP', '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' ] OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] # - # Time interval (in hours) at which various types of obs are available on - # NOAA's HPSS. - # - # Note that MRMS files are in fact available every few minutes, but here - # we set the obs availability interval to 1 hour because currently that - # is the shortest output interval for the forecast, i.e. the forecast - # cannot (yet) support sub-hourly output. - # - CCPA_OBS_AVAIL_INTVL_HRS: 1 - NOHRSC_OBS_AVAIL_INTVL_HRS: 6 - MRMS_OBS_AVAIL_INTVL_HRS: 1 - NDAS_OBS_AVAIL_INTVL_HRS: 1 - # # REMOVE_RAW_OBS_DIRS_[CCPA|NOHRSC|MRMS|NDAS]: # Flag specifying whether to remove the "raw" observation directories # after retrieving the specified type of obs (CCPA, NOHRSC, MRMS, or @@ -2497,111 +2557,80 @@ verification: # processing on them such as renaming the files and/or reorganizing # their directory structure. # - REMOVE_RAW_OBS_CCPA: true - REMOVE_RAW_OBS_NOHRSC: true - REMOVE_RAW_OBS_MRMS: true - REMOVE_RAW_OBS_NDAS: true + REMOVE_RAW_OBS_CCPA: True + REMOVE_RAW_OBS_NOHRSC: True + REMOVE_RAW_OBS_MRMS: True + REMOVE_RAW_OBS_NDAS: True # # OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: - # METplus file name template used to specify the names of the NetCDF - # files generated by the worfklow verification tasks that call METplus's - # PcpCombine tool on CCPA observations. These files will contain observed - # accumulated precip in NetCDF format for various accumulation intervals. - # - # OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: - # METplus file name template used to specify the names of the NetCDF - # files generated by the worfklow verification tasks that call METplus's - # PcpCombine tool on NOHRSC observations. These files will contain - # observed observed accumulated snow for various accumulaton intervals. - # - # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: - # METplus file name template used to specify the names of the NetCDF - # files generated by the worfklow verification tasks that call METplus's - # Pb2nc tool on NDAS observations. These files will contain the observed - # ADPSFC or ADPUPA fields in NetCDF format (instead of NDAS's native - # prepbufr format). + # METplus template for the names of the NetCDF files generated by the + # worfklow verification tasks that call METplus's PcpCombine tool on + # CCPA observations. These files will contain observed accumulated + # precipitation in NetCDF format for various accumulation intervals. # OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set obs_avail_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} {{- "ccpa.t{valid?fmt=%H}z." ~ obs_avail_intvl_hrs ~ "h.hrap.conus.gb2_a${ACCUM_HH}h.nc" }}' + # + # OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: + # METplus template for the names of the NetCDF files generated by the + # worfklow verification tasks that call METplus's PcpCombine tool on + # NOHRSC observations. These files will contain observed accumulated + # snowfall for various accumulaton intervals. + # OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set obs_avail_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} {{- "sfav2_CONUS_" ~ obs_avail_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2_a${ACCUM_HH}h.nc" }}' + # + # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: + # METplus template for the names of the NetCDF files generated by the + # worfklow verification tasks that call METplus's Pb2nc tool on NDAS + # observations. These files will contain the observed ADPSFC or ADPUPA + # fields in NetCDF format (instead of NDAS's native prepbufr format). + # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_FN_TEMPLATES[1]}.nc' # - # VX_FCST_MODEL_NAME: - # String that specifies a descriptive name for the model being verified. - # This is used in forming the names of the verification output files as - # well as in the contents of those files. + # NUM_MISSING_OBS_FILES_MAX: + # For verification tasks that need observational data, this specifies + # the maximum number of observation files that may be missing. If more + # than this number are missing, the verification task will error out. + # This is a crude way of checking that there are enough obs to conduct + # verification (crude because this number should probably depend on the + # field being verified, the time interval between observations, the + # length of the forecast, etc; an alternative may be to specify the + # maximum allowed fraction of obs files that can be missing). # - # VX_FIELD_GROUPS: - # The groups of fields (some of which may consist of a single field) on - # which to run verification. Because accumulated snow (ASNOW) is often - # not of interest in non-winter cases and because observation files for - # ASNOW are not available on NOAA HPSS for retrospective cases before - # March 2020, by default ASNOW is not included VX_FIELD_GROUPS, but it - # may be added to this list in order to include the verification tasks - # for ASNOW in the workflow. + NUM_MISSING_OBS_FILES_MAX: 2 # - # VX_APCP_ACCUMS_HRS: - # The 2-digit accumulation periods (in units of hours) to consider for - # APCP (accumulated precipitation). If VX_FIELD_GROUPS contains "APCP", - # then VX_APCP_ACCUMS_HRS must contain at least one element. If not, - # VX_APCP_ACCUMS_HRS will be ignored. + # Forecast-Specific Parameters + # ---------------------------- # - # VX_ASNOW_ACCUMS_HRS: - # The 2-digit accumulation periods (in units of hours) to consider for - # ASNOW (accumulated snowfall). If VX_FIELD_GROUPS contains "ASNOW", - # then VX_ASNOW_ACCUMS_HRS must contain at least one element. If not, - # VX_ASNOW_ACCUMS_HRS will be ignored. + # VX_FCST_MODEL_NAME: + # String that specifies a descriptive name for the model being verified. + # This is used in forming the names of the verification output files and + # is also included in the contents of those files. # VX_FCST_MODEL_NAME: '{{ nco.NET_default }}.{{ task_run_post.POST_OUTPUT_DOMAIN_NAME }}' - VX_FIELD_GROUPS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] - VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] - VX_ASNOW_ACCUMS_HRS: [ 6, 24 ] # # VX_FCST_OUTPUT_INTVL_HRS: - # The forecast output interval to use for verification purposes. The - # default value is currently 1 hour, but if/when a variable is created - # in this configuration file that specifies the forecast output interval - # for native SRW forecasts, then the default value of VX_FCST_OUTPUT_INTVL_HRS - # should be set to that. + # The forecast output interval (in hours) to assume for verification + # purposes. + # Note: + # If/when a variable is created in this configuration file that specifies + # the forecast output interval for native SRW forecasts, it should be + # used as the default value of this variable. # VX_FCST_OUTPUT_INTVL_HRS: 1 # # VX_FCST_INPUT_BASEDIR: - # Template for top-level directory containing forecast (but not obs) - # files that will be used as input into METplus for verification. - # - # VX_OUTPUT_BASEDIR: - # Template for top-level directory in which METplus will place its - # output. + # METplus template for the name of the base (i.e. top-level) directory + # containing the forecast files to use as inputs to the verification + # tasks. # VX_FCST_INPUT_BASEDIR: '{% if user.RUN_ENVIR == "nco" %}$COMOUT/../..{% else %}{{ workflow.EXPTDIR }}{% endif %}' - VX_OUTPUT_BASEDIR: '{% if user.RUN_ENVIR == "nco" %}$COMOUT/metout{% else %}{{ workflow.EXPTDIR }}{% endif %}' - # - # Number of digits in the ensemble member names. This is a configurable - # variable to allow users to change its value (e.g. to go from "mem004" - # to "mem04") when using staged forecast files that do not use the same - # number of digits as the SRW App. - # - VX_NDIGITS_ENSMEM_NAMES: 3 - # - # File name and path templates used in the verification tasks. # # FCST_SUBDIR_TEMPLATE: - # Template for the subdirectory containing forecast files that are - # inputs to the verification tasks. + # METplus template for the name of the subdirectory containing forecast + # files to use as inputs to the verification tasks. # - # FCST_FN_TEMPLATE: - # Template for the names of the forecast files that are inputs to the - # verification tasks. - # - # FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: - # Template used to specify the names of the output NetCDF forecast files - # generated by the worfklow verification tasks that call the METplus - # PcpCombine tool on forecasts. (These files will contain forecast APCP, - # both for 1 hour and for > 1 hour accumulation periods, in NetCDF - # format.) - # FCST_SUBDIR_TEMPLATE: '{%- if user.RUN_ENVIR == "nco" %} {{- "${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}" }} {%- else %} @@ -2611,43 +2640,48 @@ verification: {%- endif %} {{- "/postprd" }} {%- endif %}' + # + # FCST_FN_TEMPLATE: + # METplus template for the names of the forecast files to use as inputs + # to the verification tasks. + # FCST_FN_TEMPLATE: '{{- "${NET_default}.t{init?fmt=%H?shift=-${time_lag}}z" }} {%- if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %} {{- ".${ensmem_name}" }} {%- endif %} {{- ".prslev.f{lead?fmt=%HHH?shift=${time_lag}}.${POST_OUTPUT_DOMAIN_NAME}.grib2" }}' + # + # FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: + # METplus template for the names of the NetCDF files generated by the + # worfklow verification tasks that call METplus's PcpCombine tool on + # forecast output. These files will contain forecast accumulated + # precipitation in NetCDF format for various accumulation intervals. + # FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- "${NET_default}.t{init?fmt=%H}z" }} {%- if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %} {{- ".${ensmem_name}" }} {%- endif %} {{- ".prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${VAR}_a${ACCUM_HH}h.nc" }}' # - # For verification tasks that need observational data, this specifies - # the maximum number of observation files that may be missing. If more - # than this number are missing, the verification task will error out. - # - # Note that this is a crude way of checking that there are enough obs to - # conduct verification since this number should probably depend on the - # field being verified, the time interval between observations, the - # length of the forecast, etc. An alternative may be to specify the - # maximum allowed fraction of obs files that can be missing (i.e. the - # number missing divided by the number that are expected to exist). + # VX_NDIGITS_ENSMEM_NAMES: + # Number of digits to assume/use in the forecast ensemble member identifier + # string used in directory and file names and other instances in which the + # ensemble member needs to be identified. For example, if this is set to + # 3, the identifier for ensemble member 4 will be "mem004", while if it's + # set to 2, the identifier will be "mem04". This is useful when verifying + # staged forecast files from a forecasting model/system other than the + # SRW that uses a different number of digits in the ensemble member + # identifier string. # - NUM_MISSING_OBS_FILES_MAX: 2 + VX_NDIGITS_ENSMEM_NAMES: 3 # + # NUM_MISSING_FCST_FILES_MAX: # For verification tasks that need forecast data, this specifies the # maximum number of post-processed forecast files that may be missing. - # If more than this number are missing, the verification task will not - # be run. + # If more than this number are missing, the verification task will exit + # with an error. # NUM_MISSING_FCST_FILES_MAX: 0 - # - # Names of configuration files for deterministic and ensemble vx that - # specify the field groups, field names, levels, and (if applicable) - # thresholds for which to run verification. - # - VX_CONFIG_DET_FN: 'vx_config_det.yaml' - VX_CONFIG_ENS_FN: 'vx_config_ens.yaml' #---------------------------- # CPL_AQM config parameters From fd635cbc5d4c5f21b2b5155ed5a5051f94d721f4 Mon Sep 17 00:00:00 2001 From: Michael Kavulich Date: Mon, 28 Oct 2024 10:23:34 -0600 Subject: [PATCH 139/208] Refactor some bash to python, import filename templating directly from METplus (#3) * Replace call to custom templating script with direct invocation of metplus templating routines in get_obs.py * Replace set_leadhrs.sh with set_leadhrs.py * Fixes from pylint * Convert eval_METplus_timestr_tmpl.sh to python, redistribute some logic from set_leadhrs.py to this new script for efficiency; also fix some problems in set_leadhrs.py found by pylint * Refactor mrms_pull_topofhour.py to make it easily importable, call that function directly from get_obs.py * Remove the deprecated shell scripts * time_lag needs to be passed as an integer to set_leadhrs.py * Fixes to refactored mrms_pull_topofhour.py * Suggested test names --- modulefiles/tasks/derecho/get_obs.local.lua | 1 + modulefiles/tasks/gaea/get_obs.local.lua | 1 + modulefiles/tasks/hera/get_obs.local.lua | 3 +- modulefiles/tasks/hercules/get_obs.local.lua | 1 + modulefiles/tasks/jet/get_obs.local.lua | 3 +- modulefiles/tasks/noaacloud/get_obs.local.lua | 1 + modulefiles/tasks/orion/get_obs.local.lua | 1 + scripts/exregional_check_post_output.sh | 28 +- ...onal_run_met_genensprod_or_ensemblestat.sh | 34 +- ...gional_run_met_gridstat_or_pointstat_vx.sh | 21 +- ...un_met_gridstat_or_pointstat_vx_ensmean.sh | 21 +- ...un_met_gridstat_or_pointstat_vx_ensprob.sh | 21 +- scripts/exregional_run_met_pb2nc_obs.sh | 11 +- scripts/exregional_run_met_pcpcombine.sh | 32 +- tests/WE2E/run_WE2E_tests.py | 8 +- ...x-det_long-fcst_winter-wx_SRW-staged.yaml} | 0 ...-det_multicyc_fcst-overlap_ncep-hrrr.yaml} | 0 ...det_multicyc_first-obs-00z_ncep-hrrr.yaml} | 0 ...-det_multicyc_last-obs-00z_ncep-hrrr.yaml} | 0 ...ticyc_long-fcst-no-overlap_nssl-mpas.yaml} | 0 ...multicyc_long-fcst-overlap_nssl-mpas.yaml} | 0 ...vx-det_multicyc_no-00z-obs_nssl-mpas.yaml} | 0 ...t_multicyc_no-fcst-overlap_ncep-hrrr.yaml} | 0 ush/bash_utils/eval_METplus_timestr_tmpl.sh | 438 ------------------ ush/get_obs.py | 47 +- ush/mrms_pull_topofhour.py | 77 +-- ush/run_eval_METplus_timestr_tmpl.sh | 18 - ush/run_eval_metplus_timestr_tmpl.py | 63 +++ ush/set_leadhrs.py | 94 ++++ ush/set_leadhrs.sh | 334 ------------- ush/source_util_funcs.sh | 10 - 31 files changed, 313 insertions(+), 955 deletions(-) create mode 100644 modulefiles/tasks/derecho/get_obs.local.lua create mode 100644 modulefiles/tasks/gaea/get_obs.local.lua create mode 100644 modulefiles/tasks/hercules/get_obs.local.lua create mode 100644 modulefiles/tasks/noaacloud/get_obs.local.lua create mode 100644 modulefiles/tasks/orion/get_obs.local.lua rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml => config.vx-det_long-fcst_winter-wx_SRW-staged.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml => config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml => config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml => config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml => config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml => config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml => config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml => config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml} (100%) delete mode 100644 ush/bash_utils/eval_METplus_timestr_tmpl.sh delete mode 100755 ush/run_eval_METplus_timestr_tmpl.sh create mode 100644 ush/run_eval_metplus_timestr_tmpl.py create mode 100644 ush/set_leadhrs.py delete mode 100644 ush/set_leadhrs.sh diff --git a/modulefiles/tasks/derecho/get_obs.local.lua b/modulefiles/tasks/derecho/get_obs.local.lua new file mode 100644 index 0000000000..c03abd8dfe --- /dev/null +++ b/modulefiles/tasks/derecho/get_obs.local.lua @@ -0,0 +1 @@ +load("run_vx.local") diff --git a/modulefiles/tasks/gaea/get_obs.local.lua b/modulefiles/tasks/gaea/get_obs.local.lua new file mode 100644 index 0000000000..c03abd8dfe --- /dev/null +++ b/modulefiles/tasks/gaea/get_obs.local.lua @@ -0,0 +1 @@ +load("run_vx.local") diff --git a/modulefiles/tasks/hera/get_obs.local.lua b/modulefiles/tasks/hera/get_obs.local.lua index dcca3116d8..e8d902abab 100644 --- a/modulefiles/tasks/hera/get_obs.local.lua +++ b/modulefiles/tasks/hera/get_obs.local.lua @@ -1,3 +1,2 @@ load("hpss") -unload("python") -load("python_srw") +load("run_vx.local") diff --git a/modulefiles/tasks/hercules/get_obs.local.lua b/modulefiles/tasks/hercules/get_obs.local.lua new file mode 100644 index 0000000000..c03abd8dfe --- /dev/null +++ b/modulefiles/tasks/hercules/get_obs.local.lua @@ -0,0 +1 @@ +load("run_vx.local") diff --git a/modulefiles/tasks/jet/get_obs.local.lua b/modulefiles/tasks/jet/get_obs.local.lua index dcca3116d8..e8d902abab 100644 --- a/modulefiles/tasks/jet/get_obs.local.lua +++ b/modulefiles/tasks/jet/get_obs.local.lua @@ -1,3 +1,2 @@ load("hpss") -unload("python") -load("python_srw") +load("run_vx.local") diff --git a/modulefiles/tasks/noaacloud/get_obs.local.lua b/modulefiles/tasks/noaacloud/get_obs.local.lua new file mode 100644 index 0000000000..c03abd8dfe --- /dev/null +++ b/modulefiles/tasks/noaacloud/get_obs.local.lua @@ -0,0 +1 @@ +load("run_vx.local") diff --git a/modulefiles/tasks/orion/get_obs.local.lua b/modulefiles/tasks/orion/get_obs.local.lua new file mode 100644 index 0000000000..c03abd8dfe --- /dev/null +++ b/modulefiles/tasks/orion/get_obs.local.lua @@ -0,0 +1 @@ +load("run_vx.local") diff --git a/scripts/exregional_check_post_output.sh b/scripts/exregional_check_post_output.sh index f176c9a12e..2a66a2fecf 100755 --- a/scripts/exregional_check_post_output.sh +++ b/scripts/exregional_check_post_output.sh @@ -12,6 +12,7 @@ # ENSMEM_INDX # GLOBAL_VAR_DEFNS_FP # VAR +# METPLUS_ROOT (used by ush/set_leadhrs.py) # # Experiment variables # @@ -52,14 +53,6 @@ done # #----------------------------------------------------------------------- # -# Source files defining auxiliary functions for verification. -# -#----------------------------------------------------------------------- -# -. $USHdir/set_leadhrs.sh -# -#----------------------------------------------------------------------- -# # Save current shell options (in a global array). Then set new options # for this script/function. # @@ -122,15 +115,16 @@ ensmem_indx=$(printf "%0${VX_NDIGITS_ENSMEM_NAMES}d" $(( 10#${ENSMEM_INDX}))) ensmem_name="mem${ensmem_indx}" FCST_INPUT_FN_TEMPLATE=$( eval echo ${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE} ) -set_leadhrs \ - yyyymmddhh_init="${CDATE}" \ - lhr_min="0" \ - lhr_max="${FCST_LEN_HRS}" \ - lhr_intvl="${VX_FCST_OUTPUT_INTVL_HRS}" \ - base_dir="${VX_FCST_INPUT_BASEDIR}" \ - fn_template="${FCST_INPUT_FN_TEMPLATE}" \ - num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" \ - outvarname_lhrs_list="FHR_LIST" +FHR_LIST=$( python3 $USHdir/set_leadhrs.py \ + --date_init="${CDATE}" \ + --lhr_min="0" \ + --lhr_max="${FCST_LEN_HRS}" \ + --lhr_intvl="${VX_FCST_OUTPUT_INTVL_HRS}" \ + --base_dir="${VX_FCST_INPUT_BASEDIR}" \ + --fn_template="${FCST_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" \ + --time_lag="${time_lag%.*}") || \ +print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 475417ee53..89f92e8e55 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -22,7 +22,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -232,23 +231,28 @@ case "$OBTYPE" in esac vx_hr_end="${FCST_LEN_HRS}" +set -x if [ "${MetplusToolName}" = "GenEnsProd" ]; then - set_leadhrs_no_missing \ - lhr_min="${vx_hr_start}" \ - lhr_max="${vx_hr_end}" \ - lhr_intvl="${vx_intvl}" \ - outvarname_lhrs_list_no_missing="VX_LEADHR_LIST" + VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ + --lhr_min="${vx_hr_start}" \ + --lhr_max="${vx_hr_end}" \ + --lhr_intvl="${vx_intvl}" \ + --skip_check_files ) || \ +print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + elif [ "${MetplusToolName}" = "EnsembleStat" ]; then - set_leadhrs \ - yyyymmddhh_init="${CDATE}" \ - lhr_min="${vx_hr_start}" \ - lhr_max="${vx_hr_end}" \ - lhr_intvl="${vx_intvl}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_lhrs_list="VX_LEADHR_LIST" + VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ + --date_init="${CDATE}" \ + --lhr_min="${vx_hr_start}" \ + --lhr_max="${vx_hr_end}" \ + --lhr_intvl="${vx_intvl}" \ + --base_dir="${OBS_INPUT_DIR}" \ + --fn_template="${OBS_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ + --time_lag="${time_lag%.*}" ) || \ +print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" fi +echo "VX_LEADHR_LIST=$VX_LEADHR_LIST" # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index a6130ba50d..27ac6f11e6 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -22,7 +22,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -233,15 +232,17 @@ case "$OBTYPE" in esac vx_hr_end="${FCST_LEN_HRS}" -set_leadhrs \ - yyyymmddhh_init="${CDATE}" \ - lhr_min="${vx_hr_start}" \ - lhr_max="${vx_hr_end}" \ - lhr_intvl="${vx_intvl}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_lhrs_list="VX_LEADHR_LIST" +set -x +VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ + --date_init="${CDATE}" \ + --lhr_min="${vx_hr_start}" \ + --lhr_max="${vx_hr_end}" \ + --lhr_intvl="${vx_intvl}" \ + --base_dir="${OBS_INPUT_DIR}" \ + --fn_template="${OBS_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ + --time_lag="${time_lag%.*}") || \ +print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index 75332e4929..70f13c27c1 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -22,7 +22,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -176,15 +175,17 @@ case "$OBTYPE" in esac vx_hr_end="${FCST_LEN_HRS}" -set_leadhrs \ - yyyymmddhh_init="${CDATE}" \ - lhr_min="${vx_hr_start}" \ - lhr_max="${vx_hr_end}" \ - lhr_intvl="${vx_intvl}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_lhrs_list="VX_LEADHR_LIST" +set -x +VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ + --date_init="${CDATE}" \ + --lhr_min="${vx_hr_start}" \ + --lhr_max="${vx_hr_end}" \ + --lhr_intvl="${vx_intvl}" \ + --base_dir="${OBS_INPUT_DIR}" \ + --fn_template="${OBS_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" ) || \ +print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 382bd71ac8..b4e279218b 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -22,7 +22,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -175,15 +174,17 @@ case "$OBTYPE" in esac vx_hr_end="${FCST_LEN_HRS}" -set_leadhrs \ - yyyymmddhh_init="${CDATE}" \ - lhr_min="${vx_hr_start}" \ - lhr_max="${vx_hr_end}" \ - lhr_intvl="${vx_intvl}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_lhrs_list="VX_LEADHR_LIST" +set -x +VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ + --date_init="${CDATE}" \ + --lhr_min="${vx_hr_start}" \ + --lhr_max="${vx_hr_end}" \ + --lhr_intvl="${vx_intvl}" \ + --base_dir="${OBS_INPUT_DIR}" \ + --fn_template="${OBS_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" ) || \ +print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index e93387ed0a..046f4fea67 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -160,11 +160,12 @@ for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do # create. sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp" + + fp=$( python3 $USHdir/run_eval_metplus_timestr_tmpl.py \ + --init_time="${yyyymmdd_task}00" \ + --fhr="${lhr}" \ + --fn_template="${OBS_DIR}/${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE}") || \ +print_err_msg_exit "Call to run_eval_metplus_timestr_tmpl.py failed with return code: $?" if [[ -f "${fp}" ]]; then print_info_msg " diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 590ceb43ef..23b14ce154 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -22,7 +22,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -212,12 +211,14 @@ fi # #----------------------------------------------------------------------- # +set -x vx_intvl="$((10#${ACCUM_HH}))" -set_leadhrs_no_missing \ - lhr_min="${vx_intvl}" \ - lhr_max="${FCST_LEN_HRS}" \ - lhr_intvl="${vx_intvl}" \ - outvarname_lhrs_list_no_missing="VX_LEADHR_LIST" +VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ + --lhr_min="${vx_intvl}" \ + --lhr_max="${FCST_LEN_HRS}" \ + --lhr_intvl="${vx_intvl}" \ + --skip_check_files ) || \ +print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" # #----------------------------------------------------------------------- # @@ -250,15 +251,16 @@ for hr_end in ${subintvl_end_hrs[@]}; do Checking for the presence of files that will contribute to the ${vx_intvl}-hour accumulation ending at lead hour ${hr_end} (relative to ${CDATE})... " - set_leadhrs \ - yyyymmddhh_init="${CDATE}" \ - lhr_min="${hr_start}" \ - lhr_max="${hr_end}" \ - lhr_intvl="${subintvl}" \ - base_dir="${base_dir}" \ - fn_template="${fn_template}" \ - num_missing_files_max="${num_missing_files_max}" \ - outvarname_lhrs_list="tmp" + python3 $USHdir/set_leadhrs.py \ + --date_init="${CDATE}" \ + --lhr_min="${hr_start}" \ + --lhr_max="${hr_end}" \ + --lhr_intvl="${subintvl}" \ + --base_dir="${base_dir}" \ + --fn_template="${fn_template}" \ + --num_missing_files_max="${num_missing_files_max}" \ + --time_lag="${time_lag%.*}" || \ +print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" done print_info_msg " diff --git a/tests/WE2E/run_WE2E_tests.py b/tests/WE2E/run_WE2E_tests.py index f983d3452e..992fac88a3 100755 --- a/tests/WE2E/run_WE2E_tests.py +++ b/tests/WE2E/run_WE2E_tests.py @@ -159,13 +159,7 @@ def run_we2e_tests(homedir, args) -> None: # test-specific options, then write resulting complete config.yaml starttime = datetime.now() starttime_string = starttime.strftime("%Y%m%d%H%M%S") - test_fn = os.path.basename(test) - # Set the test name to all characters between the initial "config." and - # the final ".yaml" in the file name. This will allow any characters to - # be used as part of the test name, in particular a ".". - prefix = 'config.' - suffix = '.yaml' - test_name = test_fn[test_fn.find(prefix)+len(prefix):test_fn.rfind(suffix)] + test_name = os.path.basename(test).split('.')[1] logging.debug(f"For test {test_name}, constructing config.yaml") test_cfg = load_config_file(test) diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml rename to tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml rename to tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml rename to tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml rename to tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml rename to tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml rename to tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml rename to tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml rename to tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml diff --git a/ush/bash_utils/eval_METplus_timestr_tmpl.sh b/ush/bash_utils/eval_METplus_timestr_tmpl.sh deleted file mode 100644 index a4421958ee..0000000000 --- a/ush/bash_utils/eval_METplus_timestr_tmpl.sh +++ /dev/null @@ -1,438 +0,0 @@ -# -#----------------------------------------------------------------------- -# -# This function evaluates a METplus time-string template, i.e. a string -# (e.g. a file name template) containing one or more METplus time- -# formatting strings. -# -#----------------------------------------------------------------------- -# -function eval_METplus_timestr_tmpl() { -# -#----------------------------------------------------------------------- -# -# Save current shell options (in a global array). Then set new options -# for this script/function. -# -#----------------------------------------------------------------------- -# - { save_shell_opts; . ${USHdir}/preamble.sh; } > /dev/null 2>&1 -# -#----------------------------------------------------------------------- -# -# Get the full path to the file in which this script/function is located -# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in -# which the file is located (scrfunc_dir). -# -#----------------------------------------------------------------------- -# - local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) - local scrfunc_fn=$( basename "${scrfunc_fp}" ) - local scrfunc_dir=$( dirname "${scrfunc_fp}" ) -# -#----------------------------------------------------------------------- -# -# Get the name of this function. -# -#----------------------------------------------------------------------- -# - local func_name="${FUNCNAME[0]}" -# -#----------------------------------------------------------------------- -# -# Specify the set of valid argument names for this script/function. Then -# process the arguments provided to this script/function (which should -# consist of a set of name-value pairs of the form arg1="value1", etc). -# -#----------------------------------------------------------------------- -# - local valid_args=( \ - "init_time" \ - "fhr" \ - "METplus_timestr_tmpl" \ - "outvarname_evaluated_timestr" \ - ) - process_args valid_args "$@" -# -#----------------------------------------------------------------------- -# -# For debugging purposes, print out values of arguments passed to this -# script. Note that these will be printed out only if VERBOSE is set to -# TRUE. -# -#----------------------------------------------------------------------- -# -# print_input_args "valid_args" -# -#----------------------------------------------------------------------- -# -# Declare local variables. -# -#----------------------------------------------------------------------- -# - local crnt_timefmt \ - crnt_timefmt_esc \ - evaluated_timestr \ - regex_search_tmpl \ - the_time \ - tmpl_remainder -# -#----------------------------------------------------------------------- -# -# Loop over all METplus time-formatting strings in the given METplus -# template and evaluate each using the given initial time (init_time) and -# forecast hour (fhr). -# -# Note that the while-loop below is over all METplus time-formatting -# strings of the form {...} in the template METplus_timestr_tmpl; it -# continues until all such time-formatting strings have been evaluated -# to actual times. -# -#----------------------------------------------------------------------- -# -# Regular expression used by the sed utility below to pick out the next -# METplus time-formatting string in the given METplus time-string template. -# - regex_search_tmpl="(.*)(\{.*\})(.*)" -# -# Initialize while-loop variables. -# - evaluated_timestr="${METplus_timestr_tmpl}" - - crnt_timefmt=$( printf "%s" "${METplus_timestr_tmpl}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\2|p" ) - tmpl_remainder=$( printf "%s" "${METplus_timestr_tmpl}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\1\3|p" ) - - while [ ! -z "${crnt_timefmt}" ]; do - - eval_single_METplus_timefmt \ - init_time="${init_time}" \ - fhr="${fhr}" \ - METplus_timefmt="${crnt_timefmt}" \ - outvarname_evaluated_timefmt="the_time" -# -# Replace the next METplus time string in evaluated_timestr with an actual -# time. -# -# Note that when using sed, we need to escape various characters (question -# mark, closing and opening curly braces, etc) in the METplus template in -# order for the sed command below to work properly. -# - crnt_timefmt_esc=$( echo "${crnt_timefmt}" | \ - $SED -r -e "s/\?/\\\?/g" -e "s/\{/\\\{/g" -e "s/\}/\\\}/g" ) - evaluated_timestr=$( echo "${evaluated_timestr}" | \ - $SED -n -r "s|(.*)(${crnt_timefmt_esc})(.*)|\1${the_time}\3|p" ) -# -# Set up values for the next iteration of the while-loop. -# - crnt_timefmt=$( printf "%s" "${tmpl_remainder}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\2|p" ) - tmpl_remainder=$( printf "%s" "${tmpl_remainder}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\1\3|p" ) - - done -# -#----------------------------------------------------------------------- -# -# Set output variables. -# -#----------------------------------------------------------------------- -# - if [ ! -z "${outvarname_evaluated_timestr}" ]; then - printf -v ${outvarname_evaluated_timestr} "%s" "${evaluated_timestr}" - fi -# -#----------------------------------------------------------------------- -# -# Restore the shell options saved at the beginning of this script/function. -# -#----------------------------------------------------------------------- -# - { restore_shell_opts; } > /dev/null 2>&1 - -} - -# -#----------------------------------------------------------------------- -# -# This function uses the specified initial forecast time and forecast -# hour to evaluate a single METplus time-formatting string and return -# the corresponding time. -# -#----------------------------------------------------------------------- -# -function eval_single_METplus_timefmt() { -# -#----------------------------------------------------------------------- -# -# Save current shell options (in a global array). Then set new options -# for this script/function. -# -#----------------------------------------------------------------------- -# - { save_shell_opts; . ${USHdir}/preamble.sh; } > /dev/null 2>&1 -# -#----------------------------------------------------------------------- -# -# Get the full path to the file in which this script/function is located -# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in -# which the file is located (scrfunc_dir). -# -#----------------------------------------------------------------------- -# - local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) - local scrfunc_fn=$( basename "${scrfunc_fp}" ) - local scrfunc_dir=$( dirname "${scrfunc_fp}" ) -# -#----------------------------------------------------------------------- -# -# Get the name of this function. -# -#----------------------------------------------------------------------- -# - local func_name="${FUNCNAME[0]}" -# -#----------------------------------------------------------------------- -# -# Specify the set of valid argument names for this script/function. Then -# process the arguments provided to this script/function (which should -# consist of a set of name-value pairs of the form arg1="value1", etc). -# -#----------------------------------------------------------------------- -# - local valid_args=( \ - "init_time" \ - "fhr" \ - "METplus_timefmt" \ - "outvarname_evaluated_timefmt" \ - ) - process_args valid_args "$@" -# -#----------------------------------------------------------------------- -# -# For debugging purposes, print out values of arguments passed to this -# script. Note that these will be printed out only if VERBOSE is set to -# TRUE. -# -#----------------------------------------------------------------------- -# -# print_input_args "valid_args" -# -#----------------------------------------------------------------------- -# -# Declare local variables. -# -#----------------------------------------------------------------------- -# - local evaluated_timefmt \ - fmt \ - hh_init \ - init_time_str \ - lead_hrs \ - len \ - METplus_time_codes \ - METplus_time_shift \ - METplus_time_type \ - mn_init \ - regex_search \ - ss_init \ - valid_time_str \ - yyyymmdd_init -# -#----------------------------------------------------------------------- -# -# Run checks on input arguments. -# -#----------------------------------------------------------------------- -# - if [ -z "${METplus_timefmt}" ]; then - print_err_msg_exit "\ -The specified METplus time-formatting string (METplus_timefmt) cannot be -empty: - METplus_timefmt = \"${METplus_timefmt}\"" - fi - - len=${#init_time} - if [[ ${init_time} =~ ^[0-9]+$ ]]; then - if [ "$len" -ne 10 ] && [ "$len" -ne 12 ] && [ "$len" -ne 14 ]; then - print_err_msg_exit "\ -The specified initial time (init_time) must contain 10, 12, or 14 digits -but instead contains $len: - init_time = \"${init_time}\"" - fi - else - print_err_msg_exit "\ -The specified initial time (init_time) must consist of digits only and -cannot be empty: - init_time = \"${init_time}\"" - fi - - if ! [[ $fhr =~ ^[0-9]+$ ]]; then - print_err_msg_exit "\ -The specified forecast hour (fhr) must consist of digits only and cannot -be empty: - fhr = \"${fhr}\"" - fi -# -#----------------------------------------------------------------------- -# -# Set strings for the initial and valid times that can be passed to the -# "date" utility for evaluation. -# -#----------------------------------------------------------------------- -# - yyyymmdd_init=${init_time:0:8} - hh_init=${init_time:8:2} - - mn_init="00" - if [ "$len" -gt "10" ]; then - mn_init=${init_time:10:2} - fi - - ss_init="00" - if [ "$len" -gt "12" ]; then - ss_init=${init_time:12:2} - fi - - init_time_str=$( printf "%s" "${yyyymmdd_init} + ${hh_init} hours + ${mn_init} minutes + ${ss_init} seconds" ) - valid_time_str=$( printf "%s" "${init_time_str} + ${fhr} hours" ) -# -#----------------------------------------------------------------------- -# -# Parse the input METplus time string template. -# -#----------------------------------------------------------------------- -# - regex_search="^\{(init|valid|lead)(\?)(fmt=)([^\?]*)(\?)?(shift=)?([^\?]*)?\}" - METplus_time_type=$( \ - printf "%s" "${METplus_timefmt}" | $SED -n -r -e "s/${regex_search}/\1/p" ) - METplus_time_codes=$( \ - printf "%s" "${METplus_timefmt}" | $SED -n -r -e "s/${regex_search}/\4/p" ) - METplus_time_shift=$( \ - printf "%s" "${METplus_timefmt}" | $SED -n -r -e "s/${regex_search}/\7/p" ) -# -#----------------------------------------------------------------------- -# -# Get strings for the time format and time shift that can be passed to -# the "date" utility or the "printf" command. -# -#----------------------------------------------------------------------- -# - case "${METplus_time_codes}" in - "%Y%m%d%H"|"%Y%m%d"|"%H%M%S") - fmt="${METplus_time_codes}" - ;; - "%H") -# -# The "%H" format needs to be treated differently depending on if it's -# formatting a "lead" time type or another (e.g. "init" or "vald") because -# for "lead", the printf function is used below (which doesn't understand -# the "%H" format) whereas for the others, the date utility is used (which -# does understand "%H"). -# - if [ "${METplus_time_type}" = "lead" ]; then - fmt="%02.0f" - else - fmt="${METplus_time_codes}" - fi - ;; - "%HHH") -# -# Print format assumes that the argument to printf (i.e. the number to -# print out) may be a float. If we instead assume an integer and use -# "%03d" as the format, the printf function below will fail if the argument -# happens to be a float. The "%03.0f" format will work for both a float -# and an integer argument (and will truncate the float and print out a -# 3-digit integer). -# - fmt="%03.0f" - ;; - *) - print_err_msg_exit "\ -Unsupported METplus time codes: - METplus_time_codes = \"${METplus_time_codes}\" -METplus time-formatting string passed to this function is: - METplus_timefmt = \"${METplus_timefmt}\"" - ;; - esac -# -# Calculate the time shift as an integer in units of seconds. -# - time_shift_str=$(( $(printf "%.0f" "${METplus_time_shift}") + 0 ))" seconds" -# -#----------------------------------------------------------------------- -# -# Set the formatted time string. -# -#----------------------------------------------------------------------- -# - case "${METplus_time_type}" in - "init") - evaluated_timefmt=$( ${DATE_UTIL} --date="${init_time_str} + ${time_shift_str}" +"${fmt}" ) - ;; - "valid") - evaluated_timefmt=$( ${DATE_UTIL} --date="${valid_time_str} + ${time_shift_str}" +"${fmt}" ) - ;; - "lead") - lead_secs=$(( $( ${DATE_UTIL} --date="${valid_time_str} + ${time_shift_str}" +"%s" ) \ - - $( ${DATE_UTIL} --date="${init_time_str}" +"%s" ) )) - lead_hrs=$( bc -l <<< "${lead_secs}/${SECS_PER_HOUR}" ) -# -# Check to make sure lead_hrs is an integer. -# - lead_hrs_trunc=$( bc <<< "${lead_secs}/${SECS_PER_HOUR}" ) - lead_hrs_rem=$( bc -l <<< "${lead_hrs} - ${lead_hrs_trunc}" ) - if [ "${lead_hrs_rem}" != "0" ]; then - print_err_msg_exit "\ -The lead in hours (lead_hrs) must be an integer but isn't: - lead_hrs = ${lead_hrs} -The lead in seconds (lead_secs) is: - lead_secs = ${lead_secs} -The remainder (lead_hrs_rem) after dividing the lead_secs by SECS_PER_HOUR -= ${SECS_PER_HOUR} is: - lead_hrs_rem = ${lead_hrs_rem}" - fi -# -# Get the lead in the proper format. -# - evaluated_timefmt=$( printf "${fmt}" "${lead_hrs}" ) - ;; - *) - print_err_msg_exit "\ -Unsupported METplus time type: - METplus_time_type = \"${METplus_time_type}\" -METplus time-formatting string passed to this function is: - METplus_timefmt = \"${METplus_timefmt}\"" - ;; - esac - - if [ -z "${evaluated_timefmt}" ]; then - print_err_msg_exit "\ -The specified METplus time-formatting string (METplus_timefmt) could not -be evaluated for the given initial time (init_time) and forecast hour -(fhr): - METplus_timefmt = \"${METplus_timefmt}\" - init_time = \"${init_time}\" - fhr = \"${fhr}\"" - fi -# -#----------------------------------------------------------------------- -# -# Set output variables. -# -#----------------------------------------------------------------------- -# - if [ ! -z "${outvarname_evaluated_timefmt}" ]; then - printf -v ${outvarname_evaluated_timefmt} "%s" "${evaluated_timefmt}" - fi -# -#----------------------------------------------------------------------- -# -# Restore the shell options saved at the beginning of this script/function. -# -#----------------------------------------------------------------------- -# - { restore_shell_opts; } > /dev/null 2>&1 - -} diff --git a/ush/get_obs.py b/ush/get_obs.py index 666c6f1298..9681eb8c69 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -15,7 +15,14 @@ from python_utils import ( load_yaml_config, ) - +from mrms_pull_topofhour import mrms_pull_topofhour +try: + sys.path.append(os.environ['METPLUS_ROOT']) +except: + print("\nERROR ERROR ERROR\n") + print("Environment variable METPLUS_ROOT must be set to use this script\n") + raise +from metplus.util import string_template_substitution as sts def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): """ @@ -482,20 +489,14 @@ def get_obs(config, obtype, yyyymmdd_task): for fg, fp_proc_templ in zip(field_groups_in_obs, fp_proc_templates): all_fp_proc_dict[fg] = [] for yyyymmddhh in obs_retrieve_times_crnt_day: - # Set the lead hour, i.e. the number of hours from the beginning of the + # Set the lead time, a timedelta object from the beginning of the # day at which the file is valid. - lhr = int((yyyymmddhh - yyyymmdd_task)/dt.timedelta(hours=1)) - # Call a bash script to evaluate the template for the full path to the - # file containing METplus timestrings at the current time. This should - # be upgraded to a python script at some point. - cmd = '; '.join(['export USHdir=' + ushdir, - 'export yyyymmdd_task=' + yyyymmdd_task_str, - 'export lhr=' + str(lhr), - 'export METplus_timestr_tmpl=' + fp_proc_templ, - os.path.join(ushdir, 'run_eval_METplus_timestr_tmpl.sh')]) - result = subprocess.run(cmd, shell=True, capture_output=True, text=True) - fp_proc = result.stdout.strip() - all_fp_proc_dict[fg].append(fp_proc) + leadtime = yyyymmddhh - yyyymmdd_task + # Call METplus subroutine to evaluate the template for the full path to + # the file containing METplus timestrings at the current time. + fn = sts.do_string_sub(tmpl=fp_proc_templ,init=yyyymmdd_task,valid=yyyymmddhh, + lead=leadtime.total_seconds()) + all_fp_proc_dict[fg].append(fn) # Check whether any obs files already exist on disk in their processed # (i.e. final) locations. If so, adjust the starting archive hour. In @@ -804,25 +805,21 @@ def get_obs(config, obtype, yyyymmdd_task): # those that are nearest in time to the current hour. Unzip these in a # temporary subdirectory under the raw base directory. # - # Note that the script we call to do this (mrms_pull_topofhour.py) assumes + # Note that the function we call to do this (mrms_pull_topofhour) assumes # a certain file naming convention. That convention must match the names # of the files that the retrieve_data.py script called above ends up # retrieving. The list of possible templates for these names is given # in parm/data_locations.yml, but which of those is actually used is not # known until retrieve_data.py completes. Thus, that information needs - # to be passed back by retrieve_data.py and then passed to mrms_pull_topofhour.py. + # to be passed back by retrieve_data.py and then passed to mrms_pull_topofhour. # For now, we hard-code the file name here. if obtype == 'MRMS': yyyymmddhh_str = dt.datetime.strftime(yyyymmddhh, '%Y%m%d%H') - cmd = ' '.join(['python3', \ - '-u', os.path.join(ushdir, 'mrms_pull_topofhour.py'), \ - '--valid_time', yyyymmddhh_str, \ - '--source', basedir_raw, \ - '--outdir', os.path.join(basedir_raw, 'topofhour'), \ - '--product', fields_in_filenames[i], \ - '--no-add_vdate_subdir']) - result = subprocess.run(cmd, shell=True, capture_output=True, text=True) - rc = result.returncode + mrms_pull_topofhour(valid_time=yyyymmddhh_str, + source=basedir_raw, + outdir=os.path.join(basedir_raw, 'topofhour'), + product=fields_in_filenames[i], + add_vdate_subdir=False) # The raw file name needs to be the same as what the retrieve_data.py # script called above ends up retrieving. The list of possible templates diff --git a/ush/mrms_pull_topofhour.py b/ush/mrms_pull_topofhour.py index cad54e74dc..58d24aeff1 100644 --- a/ush/mrms_pull_topofhour.py +++ b/ush/mrms_pull_topofhour.py @@ -6,7 +6,7 @@ import shutil import gzip -def main(): +def mrms_pull_topofhour(valid_time, outdir, source, product, level=None, add_vdate_subdir=True, debug=False): """Identifies the MRMS file closest to the valid time of the forecast. METplus is configured to look for a MRMS composite reflectivity file for the valid time of the forecast being verified; since MRMS composite @@ -22,61 +22,45 @@ def main(): time of the forecast """ - #Parse input arguments - parser = argparse.ArgumentParser() - parser.add_argument('-v', '--valid_time', type=str, required=True, - help='Valid time (in string format YYYYMMDDHH) to find MRMS data for') - parser.add_argument('-o', '--outdir', type=str, required=True, - help='Destination directory for extracted MRMS data; data will be placed in `dest/YYYYMMDD`') - parser.add_argument('-s', '--source', type=str, required=True, - help='Source directory where zipped MRMS data is found') - parser.add_argument('-p', '--product', type=str, required=True, choices=['MergedReflectivityQCComposite', 'EchoTop'], - help='Name of MRMS product') - parser.add_argument('-l', '--level', type=str, help='MRMS product level', - choices=['_00.50_','_18_00.50_']) - parser.add_argument('--add_vdate_subdir', default=True, required=False, action=argparse.BooleanOptionalAction, - help='Flag to add valid-date subdirectory to source and destination directories') - parser.add_argument('-d', '--debug', action='store_true', help='Add additional debug output') - args = parser.parse_args() # Level is determined by MRMS product; set if not provided - if args.level is None: - if args.product == "MergedReflectivityQCComposite": - args.level = "_00.50_" - elif args.product == "EchoTop": - args.level = "_18_00.50_" + if level is None: + if product == "MergedReflectivityQCComposite": + level = "_00.50_" + elif product == "EchoTop": + level = "_18_00.50_" else: raise Exception("This should never have happened") # Copy and unzip MRMS files that are closest to top of hour # Done every hour on a 20-minute lag - YYYY = int(args.valid_time[0:4]) - MM = int(args.valid_time[4:6]) - DD = int(args.valid_time[6:8]) - HH = int(args.valid_time[8:19]) + YYYY = int(valid_time[0:4]) + MM = int(valid_time[4:6]) + DD = int(valid_time[6:8]) + HH = int(valid_time[8:19]) valid = datetime.datetime(YYYY, MM, DD, HH, 0, 0) valid_str = valid.strftime("%Y%m%d") - print(f"Pulling MRMS product {args.product} for valid time: {args.valid_time}") + print(f"Pulling MRMS product {product} for valid time: {valid_time}") # Set up working directory valid_str_or_empty = '' - if args.add_vdate_subdir: + if add_vdate_subdir: valid_str_or_empty = valid_str - dest_dir = os.path.join(args.outdir, valid_str_or_empty) + dest_dir = os.path.join(outdir, valid_str_or_empty) if not os.path.exists(dest_dir): os.makedirs(dest_dir) # Sort list of files for each MRMS product - if args.debug: + if debug: print(f"Valid date: {valid_str}") - search_path = os.path.join(args.source, valid_str_or_empty, args.product + "*.gz") + search_path = os.path.join(source, valid_str_or_empty, product + "*.gz") file_list = [f for f in glob.glob(search_path)] - if args.debug: + if debug: print(f"Files found: \n{file_list}") time_list = [file_list[x][-24:-9] for x in range(len(file_list))] int_list = [ @@ -96,12 +80,12 @@ def main(): # Check to make sure closest file is within +/- 15 mins of top of the hour difference = abs(closest_timestamp - valid) if difference.total_seconds() <= 900: - filename1 = f"{args.product}{args.level}{closest_timestamp.strftime('%Y%m%d-%H%M%S')}.grib2.gz" - filename2 = f"{args.product}{args.level}{valid.strftime('%Y%m%d-%H')}0000.grib2" - origfile = os.path.join(args.source, valid_str_or_empty, filename1) + filename1 = f"{product}{level}{closest_timestamp.strftime('%Y%m%d-%H%M%S')}.grib2.gz" + filename2 = f"{product}{level}{valid.strftime('%Y%m%d-%H')}0000.grib2" + origfile = os.path.join(source, valid_str_or_empty, filename1) target = os.path.join(dest_dir, filename2) - if args.debug: + if debug: print(f"Unzipping file {origfile} to {target}") @@ -113,4 +97,23 @@ def main(): raise FileNotFoundError(f"Did not find a valid file within 15 minutes of {valid}") if __name__ == "__main__": - main() + #Parse input arguments + parser = argparse.ArgumentParser() + parser.add_argument('-v', '--valid_time', type=str, required=True, + help='Valid time (in string format YYYYMMDDHH) to find MRMS data for') + parser.add_argument('-o', '--outdir', type=str, required=True, + help='Destination directory for extracted MRMS data; data will be placed in `dest/YYYYMMDD`') + parser.add_argument('-s', '--source', type=str, required=True, + help='Source directory where zipped MRMS data is found') + parser.add_argument('-p', '--product', type=str, required=True, choices=['MergedReflectivityQCComposite', 'EchoTop'], + help='Name of MRMS product') + parser.add_argument('-l', '--level', type=str, help='MRMS product level', + choices=['_00.50_','_18_00.50_']) + parser.add_argument('--add_vdate_subdir', default=True, required=False, action=argparse.BooleanOptionalAction, + help='Flag to add valid-date subdirectory to source and destination directories') + parser.add_argument('-d', '--debug', action='store_true', help='Add additional debug output') + args = parser.parse_args() + + #Consistency checks + + mrms_pull_topofhour(**vars(args)) diff --git a/ush/run_eval_METplus_timestr_tmpl.sh b/ush/run_eval_METplus_timestr_tmpl.sh deleted file mode 100755 index f5438be2f4..0000000000 --- a/ush/run_eval_METplus_timestr_tmpl.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash -# -#----------------------------------------------------------------------- -# -# This script is simply a wrapper to the eval_METplus_timestr_tmpl bash -# function. It is needed in order to enable the function to be called -# from a python script. -# -#----------------------------------------------------------------------- -# -set -u -. $USHdir/source_util_funcs.sh -eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${METplus_timestr_tmpl}" \ - outvarname_evaluated_timestr="fp_proc" -echo "${fp_proc}" diff --git a/ush/run_eval_metplus_timestr_tmpl.py b/ush/run_eval_metplus_timestr_tmpl.py new file mode 100644 index 0000000000..259531ea8d --- /dev/null +++ b/ush/run_eval_metplus_timestr_tmpl.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +import argparse +import os +import sys +from datetime import datetime, timedelta +try: + sys.path.append(os.environ['METPLUS_ROOT']) +except: + print("\nERROR ERROR ERROR\n") + print("Environment variable METPLUS_ROOT must be set to use this script\n") + raise +from metplus.util import string_template_substitution as sts + +def eval_tmpl(init_time, fhr, time_lag, fn_template, verbose=False): + """ + Calls native METplus routine for evaluating filename templates + + Args: + init_time (str): Date string for initial time in YYYYMMDD[mmss] format, where minutes and + seconds are optional. + fhr (int): Forecast hour (number of hours since init_time) + time_lag (int): Hours of time lag for a time-lagged ensemble member + fn_template (str): The METplus filename template for finding the files + verbose (bool): By default this script only outputs the list of forecast hours + Returns: + str: The fully resolved filename based on the input parameters + """ + + if len(init_time) == 10: + initdate=datetime.strptime(init_time, '%Y%m%d%H') + elif len(init_time) == 12: + initdate=datetime.strptime(init_time, '%Y%m%d%H%M') + elif len(init_time) == 14: + initdate=datetime.strptime(init_time, '%Y%m%d%H%M%S') + else: + raise ValueError(f"Invalid {init_time=}; must be 10, 12, or 14 characters in length") + + validdate=initdate + timedelta(hours=fhr) + leadsec=fhr*3600 + # Evaluate the METplus timestring template for the current lead hour + if verbose: + print("Resolving METplus template for:") + print(f"{fn_template=}\ninit={initdate}\nvalid={validdate}\nlead={leadsec}\n{time_lag=}\n") + # Return the full path with templates resolved + return sts.do_string_sub(tmpl=fn_template,init=initdate,valid=validdate, + lead=leadsec,time_lag=time_lag) + +if __name__ == "__main__": + + parser = argparse.ArgumentParser( + description="Print a list of forecast hours in bash-readable comma-separated format such that there is a corresponding file (can be observations or forecast files) for each list entry.", + ) + parser.add_argument("-v", "--verbose", help="Verbose output", action="store_true") + parser.add_argument("-i", "--init_time", help="Initial date in YYYYMMDDHH[mmss] format", type=str, default='') + parser.add_argument("-f", "--fhr", help="Forecast hour", type=int, required=True) + parser.add_argument("-tl", "--time_lag", help="Hours of time lag for a time-lagged ensemble member", type=int, default=0) + parser.add_argument("-ft", "--fn_template", help="Template for file names to search; see ??? for details on template settings", type=str, default='') + + args = parser.parse_args() + + filename = eval_tmpl(**vars(args)) + # If called from command line, we want to print the resolved filename + print(filename) diff --git a/ush/set_leadhrs.py b/ush/set_leadhrs.py new file mode 100644 index 0000000000..64d483f652 --- /dev/null +++ b/ush/set_leadhrs.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +import argparse +import os +from run_eval_metplus_timestr_tmpl import eval_tmpl + +def set_leadhrs(date_init, lhr_min, lhr_max, lhr_intvl, base_dir, time_lag, fn_template, num_missing_files_max, + skip_check_files=False, verbose=False): + """ + Creates a list of lead hours based on the provided range and interval, + checks for the existence of corresponding files, and returns a list + of lead hours for which files exist. If too many files are missing, it fails with an exception. + + Args: + date_init (str): Date string for initial time in YYYYMMDD[mmss] format, where + minutes and seconds are optional. + lhr_min (int): Minimum lead hour to check + lhr_max (int): Maximum lead hour to check + lhr_intvl (int): Interval between lead hours + base_dir (str): Base directory for forecast/observation file + time_lag (int): Hours of time lag for a time-lagged ensemble member + fn_template (str): The METplus filename template for finding the files + verbose (bool): By default this script only outputs the list of forecast hours + (for easier parsing from bash contexts). Set the verbose flag + to True for additional debugging output. + num_missing_files_max (int): If more files than this value are not found, raise exception + skip_check_files (bool): If true, return the list of forecast hours, skipping the file check + Returns: + A list of forecast hours where files were found + """ + + # Step 1: Generate lead hours without filtering for missing files + lhrs_list = list(range(lhr_min, lhr_max + 1, lhr_intvl)) + if verbose: + print(f"Initial set of lead hours (relative to {date_init}): {lhrs_list}") + + if skip_check_files: + return lhrs_list + + # Step 2: Loop through lead hours and check for corresponding file existence + final_list = [] + num_missing_files = 0 + for lhr in lhrs_list: + + # Evaluate the METplus timestring template for the current lead hour + fn = eval_tmpl(date_init, lhr, time_lag, fn_template, verbose=False) + + # Get the full path and check if the file exists + fp = os.path.join(base_dir, fn) + if os.path.isfile(fp): + if verbose: + print(f"Found file for lead hour {lhr} (relative to {date_init}): {fp}") + final_list.append(lhr) + else: + num_missing_files += 1 + + if verbose: + print(f"File for lead hour {lhr} (relative to {date_init}) is MISSING: {fp}") + + if verbose: + print(f"Final set of lead hours relative to {date_init}: {final_list}") + + # Step 3: Check if the number of missing files exceeds the maximum allowed + if num_missing_files > num_missing_files_max: + raise Exception(f"Number of missing files ({num_missing_files}) exceeds maximum allowed ({num_missing_files_max}).") + + return final_list + +if __name__ == "__main__": + + parser = argparse.ArgumentParser( + description="Print a list of forecast hours in bash-readable comma-separated format such that there is a corresponding file (can be observations or forecast files) for each list entry.", + ) + parser.add_argument("-v", "--verbose", help="Verbose output", action="store_true") + parser.add_argument("-d", "--date_init", help="Initial date in YYYYMMDDHH[mmss] format", type=str, default='') + parser.add_argument("-min", "--lhr_min", help="Minimum lead hour to check", type=int, required=True) + parser.add_argument("-max", "--lhr_max", help="Maximum lead hour to check", type=int, required=True) + parser.add_argument("-int", "--lhr_intvl", help="Interval between lead hours", type=int, required=True) + parser.add_argument("-tl", "--time_lag", help="Hours of time lag for a time-lagged ensemble member", type=int, default=0) + parser.add_argument("-bd", "--base_dir", help="Base directory for forecast/observation file", type=str, default='') + parser.add_argument("-ft", "--fn_template", help="Template for file names to search; see ??? for details on template settings", type=str, default='') + parser.add_argument("-n", "--num_missing_files_max", type=int, default=5, + help="Number of missing files to tolerate; if more files than this number can not be found, raise an exception") + parser.add_argument("-s", "--skip_check_files", action="store_true", + help="Flag to skip file check and just return the list of lead hours") + + args = parser.parse_args() + + #Consistency checks + if not args.skip_check_files and not args.date_init: + raise argparse.ArgumentTypeError('--date_init must be specified unless --skip_check_files is specified') + + leadhr_list = set_leadhrs(**vars(args)) + # If called from command line, we want to print a bash-parsable list + print(', '.join(str(x) for x in leadhr_list)) diff --git a/ush/set_leadhrs.sh b/ush/set_leadhrs.sh deleted file mode 100644 index aa3b4b338f..0000000000 --- a/ush/set_leadhrs.sh +++ /dev/null @@ -1,334 +0,0 @@ -# -#----------------------------------------------------------------------- -# -# This file defines functions used to generate sets of lead hours for -# which verification will be performed. -# -#----------------------------------------------------------------------- -# - -function set_leadhrs_no_missing() { -# -#----------------------------------------------------------------------- -# -# This function sets the lead hours (relative to some unspecified initial/ -# reference time) for which verification will be performed under the -# assumption that the data file (which may be a forecast output file or -# an observation file) for each hour is available (i.e. it assumes that -# there are no missing files). -# -#----------------------------------------------------------------------- -# - -# -#----------------------------------------------------------------------- -# -# Save current shell options (in a global array). Then set new options -# for this script/function. -# -#----------------------------------------------------------------------- -# - { save_shell_opts; set -u +x; } > /dev/null 2>&1 -# -#----------------------------------------------------------------------- -# -# Get the full path to the file in which this script/function is located -# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in -# which the file is located (scrfunc_dir). -# -#----------------------------------------------------------------------- -# - local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) - local scrfunc_fn=$( basename "${scrfunc_fp}" ) - local scrfunc_dir=$( dirname "${scrfunc_fp}" ) -# -#----------------------------------------------------------------------- -# -# Get the name of this function. -# -#----------------------------------------------------------------------- -# - local func_name="${FUNCNAME[0]}" -# -#----------------------------------------------------------------------- -# -# Specify the set of valid argument names for this script/function. Then -# process the arguments provided to this script/function (which should -# consist of a set of name-value pairs of the form arg1="value1", etc). -# -#----------------------------------------------------------------------- -# - local valid_args=( \ - "lhr_min" \ - "lhr_max" \ - "lhr_intvl" \ - "outvarname_lhrs_list_no_missing" \ - ) - process_args valid_args "$@" -# -#----------------------------------------------------------------------- -# -# For debugging purposes, print out values of arguments passed to this -# script. Note that these will be printed out only if VERBOSE is set to -# TRUE. -# -#----------------------------------------------------------------------- -# - print_input_args valid_args -# -#----------------------------------------------------------------------- -# -# Declare local variables. -# -#----------------------------------------------------------------------- -# - local lhrs_array \ - lhrs_list -# -#----------------------------------------------------------------------- -# -# Create the array of lead hours. -# -#----------------------------------------------------------------------- -# - lhrs_array=($( seq ${lhr_min} ${lhr_intvl} ${lhr_max} )) - - # Express the array of lead hours as a (scalar) string containing a comma - # (and space) separated list of the elements of lhrs_array. - lhrs_list=$( printf "%s, " "${lhrs_array[@]}" ) - lhrs_list=$( echo "${lhrs_list}" | $SED "s/, $//g" ) -# -#----------------------------------------------------------------------- -# -# Set output variables. -# -#----------------------------------------------------------------------- -# - if [ ! -z "${outvarname_lhrs_list_no_missing}" ]; then - printf -v ${outvarname_lhrs_list_no_missing} "%s" "${lhrs_list}" - fi -# -#----------------------------------------------------------------------- -# -# Restore the shell options saved at the beginning of this script/function. -# -#----------------------------------------------------------------------- -# - { restore_shell_opts; } > /dev/null 2>&1 - -} - - -# -#----------------------------------------------------------------------- -# -# This function generates a list of lead hours (relative to an initial or -# reference time yyyymmddhh_init) such that for each such hour, there -# exists a corresponding data file with a name of the form specified by -# the template fn_template. Depending on fn_template, this file may -# contain forecast or observation data. -# -#----------------------------------------------------------------------- -# -function set_leadhrs() { -# -#----------------------------------------------------------------------- -# -# Save current shell options (in a global array). Then set new options -# for this script/function. -# -#----------------------------------------------------------------------- -# - { save_shell_opts; set -u +x; } > /dev/null 2>&1 -# -#----------------------------------------------------------------------- -# -# Get the full path to the file in which this script/function is located -# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in -# which the file is located (scrfunc_dir). -# -#----------------------------------------------------------------------- -# - local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) - local scrfunc_fn=$( basename "${scrfunc_fp}" ) - local scrfunc_dir=$( dirname "${scrfunc_fp}" ) -# -#----------------------------------------------------------------------- -# -# Get the name of this function. -# -#----------------------------------------------------------------------- -# - local func_name="${FUNCNAME[0]}" -# -#----------------------------------------------------------------------- -# -# Specify the set of valid argument names for this script/function. Then -# process the arguments provided to this script/function (which should -# consist of a set of name-value pairs of the form arg1="value1", etc). -# -#----------------------------------------------------------------------- -# - local valid_args=( \ - "yyyymmddhh_init" \ - "lhr_min" \ - "lhr_max" \ - "lhr_intvl" \ - "base_dir" \ - "fn_template" \ - "num_missing_files_max" \ - "outvarname_lhrs_list" \ - ) - process_args valid_args "$@" -# -#----------------------------------------------------------------------- -# -# For debugging purposes, print out values of arguments passed to this -# script. Note that these will be printed out only if VERBOSE is set to -# TRUE. -# -#----------------------------------------------------------------------- -# - print_input_args valid_args -# -#----------------------------------------------------------------------- -# -# Declare local variables. -# -#----------------------------------------------------------------------- -# - local crnt_tmpl \ - crnt_tmpl_esc \ - fn \ - fp \ - i \ - lhr \ - lhrs_array \ - lhrs_list \ - num_hrs \ - num_missing_files \ - remainder \ - skip_this_hour -# -#----------------------------------------------------------------------- -# -# For the specified field, generate the set of lead hours at which -# verification will be performed under the assumption that for each such -# hour, the corresponding or observation file exists. Thus, this set is -# an initial guess for the lead hours at which vx will be performed. -# -#----------------------------------------------------------------------- -# - set_leadhrs_no_missing \ - lhr_min="${lhr_min}" \ - lhr_max="${lhr_max}" \ - lhr_intvl="${lhr_intvl}" \ - outvarname_lhrs_list_no_missing="lhrs_list_no_missing" - - # For convenience, save the scalar variable lhrs_list_no_missing to a - # bash array. - lhrs_array=($( printf "%s" "${lhrs_list_no_missing}" | $SED "s/,//g" )) - - print_info_msg "$VERBOSE" "\ -Initial (i.e. before filtering for missing files) set of lead hours -(relative to ${yyyymmddhh_init}) is: - lhrs_array = ( $( printf "\"%s\" " "${lhrs_array[@]}" )) -" -# -#----------------------------------------------------------------------- -# -# Loop through the array of lead hours generated above and construct the -# variable lhrs_list that will be scalar (string) containing a comma- -# separated list of hours for which corresponding forecast or observation -# files have been confirmed to exist. Also, use the variable -# num_missing_files to keep track of the number of files that are missing. -# -#----------------------------------------------------------------------- -# - lhrs_list="" - num_missing_files="0" - num_hrs=${#lhrs_array[@]} - for (( i=0; i<${num_hrs}; i++ )); do - - lhr="${lhrs_array[$i]}" - skip_this_hour="FALSE" -# -# Evaluate the METplus file name template containing METplus timestrings -# for the specified yyyymmddhh_init and current hour (lhr) to obtain the -# name of the current file (including possibly a relative directory). -# - eval_METplus_timestr_tmpl \ - init_time="${yyyymmddhh_init}" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${fn_template}" \ - outvarname_evaluated_timestr="fn" -# -# Get the full path to the file and check if it exists. -# - fp="${base_dir}/${fn}" - if [ -f "${fp}" ]; then - print_info_msg "\ -Found file (fp) for lead hour ${lhr} (relative to ${yyyymmddhh_init}): - fp = \"${fp}\" -" - else - skip_this_hour="TRUE" - num_missing_files=$(( ${num_missing_files} + 1 )) - print_info_msg "\ -The file (fp) for lead hour ${lhr} (relative to ${yyyymmddhh_init}) is MISSING: - fp = \"${fp}\" -Excluding this hour from the list of lead hours to return. -" - break - fi - - if [[ ! $(boolify "${skip_this_hour}") == "TRUE" ]]; then - lhrs_list="${lhrs_list},${lhr}" - fi - - done -# -# Remove leading comma from lhrs_list. -# - lhrs_list=$( echo "${lhrs_list}" | $SED "s/^,//g" ) - print_info_msg "$VERBOSE" "\ -Final (i.e. after filtering for missing files) set of lead hours relative -to ${yyyymmddhh_init} (saved in a scalar string variable) is: - lhrs_list = \"${lhrs_list}\" -" -# -#----------------------------------------------------------------------- -# -# If the number of missing files is greater than the maximum allowed -# (specified by num_missing_files_max), print out an error message and -# exit. -# -#----------------------------------------------------------------------- -# - if [ "${num_missing_files}" -gt "${num_missing_files_max}" ]; then - print_err_msg_exit "\ -The number of missing files (num_missing_files) is greater than the -maximum allowed number (num_missing_files_max): - num_missing_files = ${num_missing_files} - num_missing_files_max = ${num_missing_files_max}" - fi -# -#----------------------------------------------------------------------- -# -# Set output variables. -# -#----------------------------------------------------------------------- -# - if [ ! -z "${outvarname_lhrs_list}" ]; then - printf -v ${outvarname_lhrs_list} "%s" "${lhrs_list}" - fi -# -#----------------------------------------------------------------------- -# -# Restore the shell options saved at the beginning of this script/function. -# -#----------------------------------------------------------------------- -# - { restore_shell_opts; } > /dev/null 2>&1 - -} diff --git a/ush/source_util_funcs.sh b/ush/source_util_funcs.sh index 9feceaf68e..266975e97d 100644 --- a/ush/source_util_funcs.sh +++ b/ush/source_util_funcs.sh @@ -214,16 +214,6 @@ function source_util_funcs() { # #----------------------------------------------------------------------- # -# Source the file containing the function that evaluates a METplus time -# string template. -# -#----------------------------------------------------------------------- -# - . ${bashutils_dir}/eval_METplus_timestr_tmpl.sh - -# -#----------------------------------------------------------------------- -# # Source the file that sources YAML files as if they were bash # #----------------------------------------------------------------------- From f3d21bdbc438f2fd2aba67539a12291dea84767f Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 28 Oct 2024 12:24:22 -0600 Subject: [PATCH 140/208] Minor modifications to Mike's PR changes. --- ...exregional_run_met_genensprod_or_ensemblestat.sh | 6 ++---- .../exregional_run_met_gridstat_or_pointstat_vx.sh | 3 +-- ...onal_run_met_gridstat_or_pointstat_vx_ensmean.sh | 3 +-- ...onal_run_met_gridstat_or_pointstat_vx_ensprob.sh | 3 +-- scripts/exregional_run_met_pb2nc_obs.sh | 13 ++++--------- scripts/exregional_run_met_pcpcombine.sh | 5 ++--- ush/eval_metplus_timestr_tmpl.py | 12 ++++++------ ush/set_leadhrs.py | 4 ++-- 8 files changed, 19 insertions(+), 30 deletions(-) diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 2ff346442a..934ba63283 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -231,14 +231,13 @@ case "$OBTYPE" in esac vx_hr_end="${FCST_LEN_HRS}" -set -x if [ "${MetplusToolName}" = "GenEnsProd" ]; then VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ --lhr_min="${vx_hr_start}" \ --lhr_max="${vx_hr_end}" \ --lhr_intvl="${vx_intvl}" \ --skip_check_files ) || \ -print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" elif [ "${MetplusToolName}" = "EnsembleStat" ]; then VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ @@ -250,9 +249,8 @@ elif [ "${MetplusToolName}" = "EnsembleStat" ]; then --fn_template="${OBS_INPUT_FN_TEMPLATE}" \ --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ --time_lag="${time_lag%.*}" ) || \ -print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" fi -echo "VX_LEADHR_LIST=$VX_LEADHR_LIST" # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 1f4b65a7c9..6200b0ba7e 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -232,7 +232,6 @@ case "$OBTYPE" in esac vx_hr_end="${FCST_LEN_HRS}" -set -x VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ --date_init="${CDATE}" \ --lhr_min="${vx_hr_start}" \ @@ -242,7 +241,7 @@ VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ --fn_template="${OBS_INPUT_FN_TEMPLATE}" \ --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ --time_lag="${time_lag%.*}") || \ -print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index 6c6be42a52..0bfcff36d6 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -175,7 +175,6 @@ case "$OBTYPE" in esac vx_hr_end="${FCST_LEN_HRS}" -set -x VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ --date_init="${CDATE}" \ --lhr_min="${vx_hr_start}" \ @@ -184,7 +183,7 @@ VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ --base_dir="${OBS_INPUT_DIR}" \ --fn_template="${OBS_INPUT_FN_TEMPLATE}" \ --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" ) || \ -print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" # #----------------------------------------------------------------------- diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 3fe23d7510..0e8d44578c 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -174,7 +174,6 @@ case "$OBTYPE" in esac vx_hr_end="${FCST_LEN_HRS}" -set -x VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ --date_init="${CDATE}" \ --lhr_min="${vx_hr_start}" \ @@ -183,7 +182,7 @@ VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ --base_dir="${OBS_INPUT_DIR}" \ --fn_template="${OBS_INPUT_FN_TEMPLATE}" \ --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" ) || \ -print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" # #----------------------------------------------------------------------- diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index afe91f14a1..3e6631cd1d 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -160,17 +160,12 @@ for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do # create. sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp" - fp=$( python3 $USHdir/run_eval_metplus_timestr_tmpl.py \ + fp=$( python3 $USHdir/eval_metplus_timestr_tmpl.py \ --init_time="${yyyymmdd_task}00" \ - --fhr="${lhr}" \ - --fn_template="${OBS_DIR}/${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE}") || \ -print_err_msg_exit "Call to run_eval_metplus_timestr_tmpl.py failed with return code: $?" + --lhr="${lhr}" \ + --fn_template="${OBS_DIR}/${OBS_NDAS_FN_TEMPLATES[1]}") || \ + print_err_msg_exit "Call to eval_metplus_timestr_tmpl.py failed with return code: $?" if [[ -f "${fp}" ]]; then print_info_msg " diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 9a1eb33bc8..c60ac30e36 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -211,14 +211,13 @@ fi # #----------------------------------------------------------------------- # -set -x vx_intvl="$((10#${ACCUM_HH}))" VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ --lhr_min="${vx_intvl}" \ --lhr_max="${FCST_LEN_HRS}" \ --lhr_intvl="${vx_intvl}" \ --skip_check_files ) || \ -print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" # #----------------------------------------------------------------------- # @@ -260,7 +259,7 @@ accumulation ending at lead hour ${hr_end} (relative to ${CDATE})... --fn_template="${fn_template}" \ --num_missing_files_max="${num_missing_files_max}" \ --time_lag="${time_lag%.*}" || \ -print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" done print_info_msg " diff --git a/ush/eval_metplus_timestr_tmpl.py b/ush/eval_metplus_timestr_tmpl.py index 259531ea8d..edbe0e7012 100644 --- a/ush/eval_metplus_timestr_tmpl.py +++ b/ush/eval_metplus_timestr_tmpl.py @@ -11,14 +11,14 @@ raise from metplus.util import string_template_substitution as sts -def eval_tmpl(init_time, fhr, time_lag, fn_template, verbose=False): +def eval_metplus_timestr_tmpl(init_time, lhr, time_lag, fn_template, verbose=False): """ Calls native METplus routine for evaluating filename templates Args: init_time (str): Date string for initial time in YYYYMMDD[mmss] format, where minutes and seconds are optional. - fhr (int): Forecast hour (number of hours since init_time) + lhr (int): Lead hour (number of hours since init_time) time_lag (int): Hours of time lag for a time-lagged ensemble member fn_template (str): The METplus filename template for finding the files verbose (bool): By default this script only outputs the list of forecast hours @@ -35,8 +35,8 @@ def eval_tmpl(init_time, fhr, time_lag, fn_template, verbose=False): else: raise ValueError(f"Invalid {init_time=}; must be 10, 12, or 14 characters in length") - validdate=initdate + timedelta(hours=fhr) - leadsec=fhr*3600 + validdate=initdate + timedelta(hours=lhr) + leadsec=lhr*3600 # Evaluate the METplus timestring template for the current lead hour if verbose: print("Resolving METplus template for:") @@ -52,12 +52,12 @@ def eval_tmpl(init_time, fhr, time_lag, fn_template, verbose=False): ) parser.add_argument("-v", "--verbose", help="Verbose output", action="store_true") parser.add_argument("-i", "--init_time", help="Initial date in YYYYMMDDHH[mmss] format", type=str, default='') - parser.add_argument("-f", "--fhr", help="Forecast hour", type=int, required=True) + parser.add_argument("-f", "--lhr", help="Forecast hour", type=int, required=True) parser.add_argument("-tl", "--time_lag", help="Hours of time lag for a time-lagged ensemble member", type=int, default=0) parser.add_argument("-ft", "--fn_template", help="Template for file names to search; see ??? for details on template settings", type=str, default='') args = parser.parse_args() - filename = eval_tmpl(**vars(args)) + filename = eval_metplus_timestr_tmpl(**vars(args)) # If called from command line, we want to print the resolved filename print(filename) diff --git a/ush/set_leadhrs.py b/ush/set_leadhrs.py index 64d483f652..3256297af2 100644 --- a/ush/set_leadhrs.py +++ b/ush/set_leadhrs.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import argparse import os -from run_eval_metplus_timestr_tmpl import eval_tmpl +from eval_metplus_timestr_tmpl import eval_metplus_timestr_tmpl def set_leadhrs(date_init, lhr_min, lhr_max, lhr_intvl, base_dir, time_lag, fn_template, num_missing_files_max, skip_check_files=False, verbose=False): @@ -42,7 +42,7 @@ def set_leadhrs(date_init, lhr_min, lhr_max, lhr_intvl, base_dir, time_lag, fn_t for lhr in lhrs_list: # Evaluate the METplus timestring template for the current lead hour - fn = eval_tmpl(date_init, lhr, time_lag, fn_template, verbose=False) + fn = eval_metplus_timestr_tmpl(date_init, lhr, time_lag, fn_template, verbose=False) # Get the full path and check if the file exists fp = os.path.join(base_dir, fn) From 879fd98d92b746c9d4b790eed7c176d15a665c81 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 28 Oct 2024 13:43:48 -0600 Subject: [PATCH 141/208] Bug fixes to the merge. --- ush/get_obs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ush/get_obs.py b/ush/get_obs.py index c6f8732be3..361426c16c 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -469,7 +469,7 @@ def get_obs(config, obtype, yyyymmdd_task): leadtime = yyyymmddhh - yyyymmdd_task # Call METplus subroutine to evaluate the template for the full path to # the file containing METplus timestrings at the current time. - fn = sts.do_string_sub(tmpl=fp_proc_templ,init=yyyymmdd_task,valid=yyyymmddhh, + fn = sts.do_string_sub(tmpl=fp_proc_tmpl,init=yyyymmdd_task,valid=yyyymmddhh, lead=leadtime.total_seconds()) all_fp_proc_dict[fg].append(fn) @@ -796,7 +796,7 @@ def get_obs(config, obtype, yyyymmdd_task): mrms_pull_topofhour(valid_time=yyyymmddhh_str, source=basedir_raw, outdir=os.path.join(basedir_raw, 'topofhour'), - product=fields_in_filenames[i], + product=mrms_fields_in_obs_filenames[i], add_vdate_subdir=False) # The raw file name needs to be the same as what the retrieve_data.py From 476eb15be578f92de7fe4bc1e314406da88ff7a3 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 29 Oct 2024 10:41:03 -0600 Subject: [PATCH 142/208] First shot at modifications to enable variable forecast output interval in the verification. --- ush/set_cycle_and_obs_timeinfo.py | 295 +++++++++++++++++++++--------- 1 file changed, 211 insertions(+), 84 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 2130ad99ea..36c20e126c 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -624,97 +624,224 @@ def get_obs_retrieve_times_by_day( # Get list of field groups to be verified. vx_field_groups = vx_config['VX_FIELD_GROUPS'] - # Define dictionary containing information about all field groups that - # can possibly be verified. This information includes their temporal - # characteristics (cumulative vs. instantaneous) and the mapping between - # the observation type and the field group. - vx_field_info = {'cumul': [{'obtype': 'CCPA', 'field_groups': ['APCP']}, - {'obtype': 'NOHRSC', 'field_groups': ['ASNOW']}], - 'inst': [{'obtype': 'MRMS', 'field_groups': ['REFC', 'RETOP']}, - {'obtype': 'NDAS', 'field_groups': ['ADPSFC', 'ADPUPA']}] - } - - # Keep only those items in the dictionary vx_field_info defined above that - # have field groups that appear in the list of field groups to verify. - for obs_time_type, obtypes_to_field_groups_dict_list in vx_field_info.copy().items(): - for obtypes_to_field_groups_dict in obtypes_to_field_groups_dict_list.copy(): - obtype = obtypes_to_field_groups_dict['obtype'] - field_groups = obtypes_to_field_groups_dict['field_groups'] - field_groups = [fg for fg in field_groups if fg in vx_field_groups] - obtypes_to_field_groups_dict['field_groups'] = field_groups - if not field_groups: obtypes_to_field_groups_dict_list.remove(obtypes_to_field_groups_dict) - if not obtypes_to_field_groups_dict_list: vx_field_info.pop(obs_time_type) - - # Create dictionary containing the temporal characteristics as keys and - # a string list of obs types to verify as the values. - obs_time_type_to_obtypes_dict = dict() - for obs_time_type, obtypes_to_field_groups_dict_list in vx_field_info.items(): - obtype_list = [a_dict['obtype'] for a_dict in obtypes_to_field_groups_dict_list] - obs_time_type_to_obtypes_dict[obs_time_type] = obtype_list - - # Initialize the return variable. - obs_retrieve_times_by_day = dict() - - # Define timedelta object representing a single day. + # Define a list of dictionaries containing information about all the obs + # types that can possibly be used for verification in the SRW App. Each + # dictionary in the list contains the name of the obs type, the temporal + # nature of that obs type (i.e. whether the obs type contains cumulative + # or instantaneous fields), and a list of the field groups that the obs + # type may be used to verify. + all_obs_info \ + = [{'obtype': 'CCPA', 'time_type': 'cumul', 'field_groups': ['APCP']}, + {'obtype': 'NOHRSC', 'time_type': 'cumul', 'field_groups': ['ASNOW']}, + {'obtype': 'MRMS', 'time_type': 'inst', 'field_groups': ['REFC', 'RETOP']}, + {'obtype': 'NDAS', 'time_type': 'inst', 'field_groups': ['ADPSFC', 'ADPUPA']} + ] + + # Create new list that has the same form as the list of dictionaries + # defined above but contains only those obs types that have at least one + # field group that appears in the list of field groups to verify. Note + # that for those obs types that are retained in the list, the field groups + # that will not be verified are discarded. + obs_info = [] + for obs_dict in all_obs_info.copy(): + obtype = obs_dict['obtype'] + field_groups = obs_dict['field_groups'] + field_groups = [field for field in field_groups if field in vx_field_groups] + obs_dict = obs_dict.copy() + obs_dict['field_groups'] = field_groups + if field_groups: obs_info.append(obs_dict) + + # For convenience, define timedelta object representing a single day. one_day = timedelta(days=1) - # Loop over all obs types to be verified (by looping over the temporal - # type and the specific obs under that type). For each obs type, loop - # over each obs day and find the times within that that at which the obs - # need to be retrieved. - for obs_time_type, obtypes in obs_time_type_to_obtypes_dict.items(): + # Generate a dictionary (of dictionaries) that, for each obs type to be + # used in the vx and for each day for which there is forecast output, + # will contain the times at which verification will be performed, i.e. + # the times at which the forecast output will be compared to observations. + # We refer to these times as the vx comparison times. + vx_compare_times_by_day = dict() + for obs_dict in obs_info: + + obtype = obs_dict['obtype'] + obs_time_type = obs_dict['time_type'] fcst_output_times_all_cycles_crnt_ttype = fcst_output_times_all_cycles[obs_time_type] obs_days_all_cycles_crnt_ttype = obs_days_all_cycles[obs_time_type] - for obtype in obtypes: + vx_compare_times_by_day[obtype] = dict() - obs_retrieve_times_by_day[obtype] = dict() + # Get the availability interval for the current observation type from the + # verification configuration dictionary. Then make sure it divides evenly + # into 24. + config_var_name = "".join([obtype, "_OBS_AVAIL_INTVL_HRS"]) + obs_avail_intvl_hrs = vx_config[config_var_name] + remainder = 24 % obs_avail_intvl_hrs + if remainder != 0: + msg = dedent(f""" + The obs availability interval for obs of type {obtype} must divide evenly + into 24 but doesn't: + obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + 24 % obs_avail_intvl_hrs = {remainder}" + """) + logging.error(msg) + raise Exception(msg) + obs_avail_intvl = timedelta(hours=obs_avail_intvl_hrs) + num_obs_avail_times_per_day = int(24/obs_avail_intvl_hrs) + + # Loop over all obs days over all cycles (for the current obs type). For + # each such day, get the list forecast output times and the list of obs + # availability times. Finally, set the times (on that day) that verification + # will be performed to the intersection of these two lists. + for obs_day in obs_days_all_cycles_crnt_ttype: + + next_day = obs_day + one_day + if obs_time_type == "cumul": + fcst_output_times_crnt_day \ + = [time for time in fcst_output_times_all_cycles_crnt_ttype if obs_day < time <= next_day] + elif obs_time_type == "inst": + fcst_output_times_crnt_day \ + = [time for time in fcst_output_times_all_cycles_crnt_ttype if obs_day <= time < next_day] + fcst_output_times_crnt_day = [datetime.strftime(time, "%Y%m%d%H") for time in fcst_output_times_crnt_day] + + if obs_time_type == "cumul": + obs_avail_times_crnt_day \ + = [obs_day + (i+1)*obs_avail_intvl for i in range(0,num_obs_avail_times_per_day)] + elif obs_time_type == "inst": + obs_avail_times_crnt_day \ + = [obs_day + i*obs_avail_intvl for i in range(0,num_obs_avail_times_per_day)] + obs_avail_times_crnt_day = [datetime.strftime(time, "%Y%m%d%H") for time in obs_avail_times_crnt_day] + + vx_compare_times_crnt_day = list(set(fcst_output_times_crnt_day) & set(obs_avail_times_crnt_day)) + vx_compare_times_crnt_day.sort() + + obs_day_str = datetime.strftime(obs_day, "%Y%m%d") + vx_compare_times_by_day[obtype][obs_day_str] = vx_compare_times_crnt_day + + # For each obs type to be used in the vx and for each day for which there + # is forecast output, calculate the times at which obs need to be retrieved. + # For instantaneous fields, the obs retrieval times are the same as the + # times at which vx will be performed. For cumulative fields, each field + # value needs to be constructed by adding values from previous times. For + # example, if we're verifying 6-hourly precipitation and the obs availability + # interval for precip obs (CCPA) is 1 hour, then the 6-hourly values must + # be built by adding the 1-hour values. Thus, this requires obs at every + # hour, not just every 6 hours. + # + # First, initialze the dictionary (of dictionaries) that will contain the + # obs retreival times (for all obs types and each day for which there is + # forecast output), and set the values for instantaneous obs to the vx + # comparison times calculated above. + obs_retrieve_times_by_day = dict() + for obs_dict in obs_info: + obtype = obs_dict['obtype'] + obs_time_type = obs_dict['time_type'] + if obs_time_type == 'inst': + obs_retrieve_times_by_day[obtype] = vx_compare_times_by_day[obtype] + + # Next, calculate the obs retrieval times for cumulative fields. We want + # these times grouped into days because the get_obs workflow tasks that + # will use this information are day-based (i.e. each task will get obs + # for a single day). However, it is easier to first calculate these + # times as a single group over all cycles. We do this next. + obs_retrieve_times_all_cycles = dict() + for obs_dict in obs_info: + + obtype = obs_dict['obtype'] + obs_time_type = obs_dict['time_type'] + field_groups = obs_dict['field_groups'] + + # Consider only cumulative fields. + if obs_time_type != 'cumul': + continue - # Get the availability interval for the current observation type from the - # verification configuration dictionary. Then make sure it divides evenly - # into 24. - config_var_name = "".join([obtype, "_OBS_AVAIL_INTVL_HRS"]) - obs_avail_intvl_hrs = vx_config[config_var_name] - remainder = 24 % obs_avail_intvl_hrs - if remainder != 0: - msg = dedent(f""" - The obs availability interval for obs of type {obtype} must divide evenly - into 24 but doesn't: - {obs_avail_intvl_hrs = } - 24 % obs_avail_intvl_hrs = {remainder}" - """) - raise ValueError(msg) - obs_avail_intvl = timedelta(hours=obs_avail_intvl_hrs) - num_obs_avail_times_per_day = int(24/obs_avail_intvl_hrs) - - # Loop over all obs days over all cycles (for the current obs type). For - # each such day, get the list forecast output times and the list of obs - # availability times. Finally, set the times (on that day) that obs need - # to be retrieved to the intersection of these two lists. - for obs_day in obs_days_all_cycles_crnt_ttype: - - next_day = obs_day + one_day - if obs_time_type == "cumul": - fcst_output_times_crnt_day \ - = [time for time in fcst_output_times_all_cycles_crnt_ttype if obs_day < time <= next_day] - elif obs_time_type == "inst": - fcst_output_times_crnt_day \ - = [time for time in fcst_output_times_all_cycles_crnt_ttype if obs_day <= time < next_day] - fcst_output_times_crnt_day = [datetime.strftime(time, "%Y%m%d%H") for time in fcst_output_times_crnt_day] - - if obs_time_type == "cumul": - obs_avail_times_crnt_day \ - = [obs_day + (i+1)*obs_avail_intvl for i in range(0,num_obs_avail_times_per_day)] - elif obs_time_type == "inst": - obs_avail_times_crnt_day \ - = [obs_day + i*obs_avail_intvl for i in range(0,num_obs_avail_times_per_day)] - obs_avail_times_crnt_day = [datetime.strftime(time, "%Y%m%d%H") for time in obs_avail_times_crnt_day] - - obs_retrieve_times_crnt_day = list(set(fcst_output_times_crnt_day) & set(obs_avail_times_crnt_day)) - obs_retrieve_times_crnt_day.sort() - - obs_day_str = datetime.strftime(obs_day, "%Y%m%d") - obs_retrieve_times_by_day[obtype][obs_day_str] = obs_retrieve_times_crnt_day + # Initialize the set that will contain the obs retrieval times over all + # cycles. + obs_retrieve_times_all_cycles[obtype] = set() + + # Get the availability interval for the current observation type from the + # verification configuration dictionary. + config_var_name = "".join([obtype, "_OBS_AVAIL_INTVL_HRS"]) + obs_avail_intvl_hrs = vx_config[config_var_name] + obs_avail_intvl = timedelta(hours=obs_avail_intvl_hrs) + + # Consider all field groups to be verified for the current obs type. + for fg in field_groups: + + # Get the list of accumulation intervals for the current cumulative obs + # type and field group combination. + accum_intvls_array_name = "".join(["VX_", fg, "_ACCUMS_HRS"]) + accum_intvls_hrs = vx_config[accum_intvls_array_name] + + for cycle_start_time in cycle_start_times: + + # Loop through the accumulation intervals for this obs type and field + # group combination. + for accum_intvl_hrs in accum_intvls_hrs: + accum_intvl = timedelta(hours=accum_intvl_hrs) + # Get the number of accumulation intervals that fits in the duration of + # the forecast. Note that the accumulation interval doesn't necessarily + # have to evenly divide the forecast duration; we simply drop any fractional + # accumulation intervals by rounding down to the nearest integer. + num_accum_intvls_in_fcst = int(fcst_len/accum_intvl) + # Calulate the times at which the current cumulative obs field will be + # compared to the forecast field(s) in the corresponding cumulative field + # group (for the current accumulation interval). + vx_compare_times_crnt_cycl = [cycle_start_time + (i+1)*accum_intvl + for i in range(0,num_accum_intvls_in_fcst)] + # For each such comparison time, get the times at which obs are needed + # to form that accumulation. For example, if the current accumulation + # interval is 6 hours and the obs are available every hour, then the + # times at which obs are needed will be the comparison time as well as + # the five hours preceeding it. Then put all such times over all vx + # comparison times within all cycles into a single array of times (which + # is stored in the dictionary obs_retrieve_times_all_cycles). + for vx_compare_time in vx_compare_times_crnt_cycl: + remainder = accum_intvl_hrs % obs_avail_intvl_hrs + if remainder != 0: + msg = dedent(f""" + The obs availability interval for obs of type {obtype} must divide evenly + into the current accumulation interval (accum_intvl) but doesn't: + accum_intvl_hrs = {accum_intvl_hrs} + obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + accum_intvl_hrs % obs_avail_intvl_hrs = {remainder}" + """) + logging.error(msg) + raise Exception(msg) + num_obs_avail_times_in_accum_intvl = int(accum_intvl/obs_avail_intvl) + obs_retrieve_times_crnt_accum_intvl \ + = [vx_compare_time - i*obs_avail_intvl \ + for i in range(0,num_obs_avail_times_in_accum_intvl)] + obs_retrieve_times_all_cycles[obtype] \ + = obs_retrieve_times_all_cycles[obtype] | set(obs_retrieve_times_crnt_accum_intvl) + + # Convert the final set of obs retrieval times for the current obs type + # to a sorted list. Note that the sorted() function will convert a set + # to a sorted list (a set itself cannot be sorted). + obs_retrieve_times_all_cycles[obtype] = sorted(obs_retrieve_times_all_cycles[obtype]) + + # Now that the obs retrival times for cumulative fields have been obtained + # but grouped by cycle start date, regroup them by day and save results + # in obs_retrieve_times_by_day. + for obs_dict in obs_info: + + obtype = obs_dict['obtype'] + obs_time_type = obs_dict['time_type'] + + # Consider only cumulative obs/fields. + if obs_time_type != 'cumul': + continue + + # Initialize variables before looping over obs days. + obs_retrieve_times_by_day[obtype] = dict() + obs_days_all_cycles_crnt_ttype = obs_days_all_cycles[obs_time_type] + obs_retrieve_times_all_cycles_crnt_obtype = obs_retrieve_times_all_cycles[obtype] + + for obs_day in obs_days_all_cycles_crnt_ttype: + next_day = obs_day + one_day + obs_retrieve_times_crnt_day \ + = [time for time in obs_retrieve_times_all_cycles_crnt_obtype if obs_day < time <= next_day] + obs_retrieve_times_crnt_day = [datetime.strftime(time, "%Y%m%d%H") for time in obs_retrieve_times_crnt_day] + obs_day_str = datetime.strftime(obs_day, "%Y%m%d") + obs_retrieve_times_by_day[obtype][obs_day_str] = obs_retrieve_times_crnt_day return obs_retrieve_times_by_day From fcc7b1b26ed0d854ef79ad074e8295f3f003ef46 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 29 Oct 2024 12:57:46 -0600 Subject: [PATCH 143/208] Introduce flag that specifies whether or not vx tasks are being run in the workflow; run checks on and/or adjustments to vx parameters only if this flag is true. --- ush/setup.py | 434 +++++++++++++++++++++++++++------------------------ 1 file changed, 229 insertions(+), 205 deletions(-) diff --git a/ush/setup.py b/ush/setup.py index a4bcab3b32..01db240362 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -61,7 +61,9 @@ def load_config_for_setup(ushdir, default_config, user_config): ``config.yaml``) Returns: - None + cfg_d (dict): Experiment configuration dictionary based on default, + machine, and user config files + do_vx (bool): Flag specifying whether workflow will run vx tasks Raises: FileNotFoundError: If the user-provided configuration file or the machine file does not @@ -170,11 +172,13 @@ def load_config_for_setup(ushdir, default_config, user_config): if taskgroups: cfg_wflow['rocoto']['tasks']['taskgroups'] = taskgroups + # Save string specifying final workflow taskgroups for use later on. + taskgroups = cfg_wflow['rocoto']['tasks']['taskgroups'] + # Extend yaml here on just the rocoto section to include the # appropriate groups of tasks extend_yaml(cfg_wflow) - # Put the entries expanded under taskgroups in tasks rocoto_tasks = cfg_wflow["rocoto"]["tasks"] cfg_wflow["rocoto"]["tasks"] = yaml.load(rocoto_tasks.pop("taskgroups"),Loader=yaml.SafeLoader) @@ -244,40 +248,51 @@ def _add_jobname(tasks): # # ----------------------------------------------------------------------- # - # Ensure that the configuration parameters associated with cumulative - # fields (e.g. APCP) in the verification section of the experiment - # dicitonary are temporally consistent, e.g. that accumulation intervals - # are less than or equal to the forecast length. Update the verification - # section of the dictionary to remove inconsistencies. + # If the workflow includes at least one verification task, ensure that + # the configuration parameters associated with cumulative fields (e.g. + # APCP) in the verification section of the experiment dicitonary are + # temporally consistent, e.g. that accumulation intervals are less than + # or equal to the forecast length. Update the verification section of + # the dictionary to remove inconsistencies. # # ----------------------------------------------------------------------- # + # List containing the names of all workflow config files for vx (i.e. + # whether or not they're included in the workflow). + vx_taskgroup_fns = ['verify_pre.yaml', 'verify_det.yaml', 'verify_ens.yaml'] + # Flag that specifies whether the workflow will be running any vx tasks. + do_vx = any([fn for fn in vx_taskgroup_fns if fn in taskgroups]) + + # Initialize variable containing the vx configuration. This may be + # modified within the if-statement below. vx_config = cfg_d["verification"] - workflow_config = cfg_d["workflow"] - date_first_cycl = workflow_config.get("DATE_FIRST_CYCL") - date_last_cycl = workflow_config.get("DATE_LAST_CYCL") - incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) - fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") - vx_fcst_output_intvl_hrs = vx_config.get("VX_FCST_OUTPUT_INTVL_HRS") + if do_vx: + workflow_config = cfg_d["workflow"] - # Convert various times and time intervals from integers or strings to - # datetime or timedelta objects. - date_first_cycl_dt = datetime.datetime.strptime(date_first_cycl, "%Y%m%d%H") - date_last_cycl_dt = datetime.datetime.strptime(date_last_cycl, "%Y%m%d%H") - cycl_intvl_dt = datetime.timedelta(hours=incr_cycl_freq) - fcst_len_dt = datetime.timedelta(hours=fcst_len_hrs) - vx_fcst_output_intvl_dt = datetime.timedelta(hours=vx_fcst_output_intvl_hrs) + date_first_cycl = workflow_config.get("DATE_FIRST_CYCL") + date_last_cycl = workflow_config.get("DATE_LAST_CYCL") + incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) + fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") + vx_fcst_output_intvl_hrs = vx_config.get("VX_FCST_OUTPUT_INTVL_HRS") - # Generate a list containing the starting times of the cycles. - cycle_start_times \ - = set_cycle_dates(date_first_cycl_dt, date_last_cycl_dt, cycl_intvl_dt, - return_type='datetime') + # Convert various times and time intervals from integers or strings to + # datetime or timedelta objects. + date_first_cycl_dt = datetime.datetime.strptime(date_first_cycl, "%Y%m%d%H") + date_last_cycl_dt = datetime.datetime.strptime(date_last_cycl, "%Y%m%d%H") + cycl_intvl_dt = datetime.timedelta(hours=incr_cycl_freq) + fcst_len_dt = datetime.timedelta(hours=fcst_len_hrs) + vx_fcst_output_intvl_dt = datetime.timedelta(hours=vx_fcst_output_intvl_hrs) - # Call function that runs the consistency checks on the vx parameters. - vx_config, fcst_obs_matched_times_all_cycles_cumul \ - = check_temporal_consistency_cumul_fields( - vx_config, cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) + # Generate a list containing the starting times of the cycles. + cycle_start_times \ + = set_cycle_dates(date_first_cycl_dt, date_last_cycl_dt, cycl_intvl_dt, + return_type='datetime') + + # Call function that runs the consistency checks on the vx parameters. + vx_config, fcst_obs_matched_times_all_cycles_cumul \ + = check_temporal_consistency_cumul_fields( + vx_config, cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) cfg_d['verification'] = vx_config @@ -326,7 +341,7 @@ def _add_jobname(tasks): ) ) - return cfg_d + return cfg_d, do_vx def set_srw_paths(ushdir, expt_config): @@ -445,7 +460,7 @@ def setup(USHdir, user_config_fn="config.yaml", debug: bool = False): # user config files. default_config_fp = os.path.join(USHdir, "config_defaults.yaml") user_config_fp = os.path.join(USHdir, user_config_fn) - expt_config = load_config_for_setup(USHdir, default_config_fp, user_config_fp) + expt_config, do_vx = load_config_for_setup(USHdir, default_config_fp, user_config_fp) # Set up some paths relative to the SRW clone expt_config["user"].update(set_srw_paths(USHdir, expt_config)) @@ -609,192 +624,201 @@ def _remove_tag(tasks, tag): post_meta = rocoto_tasks.get("metatask_run_ens_post", {}) post_meta.pop("metatask_run_sub_hourly_post", None) post_meta.pop("metatask_sub_hourly_last_hour_post", None) - # - # ----------------------------------------------------------------------- - # - # Set some variables needed for running checks on and creating new - # (derived) configuration variables for the verification. - # - # ----------------------------------------------------------------------- - # - vx_config = expt_config["verification"] + date_first_cycl = workflow_config.get("DATE_FIRST_CYCL") date_last_cycl = workflow_config.get("DATE_LAST_CYCL") incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) - fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") - vx_fcst_output_intvl_hrs = vx_config.get("VX_FCST_OUTPUT_INTVL_HRS") - - # To enable arithmetic with dates and times, convert various time - # intervals from integer to datetime.timedelta objects. cycl_intvl_dt = datetime.timedelta(hours=incr_cycl_freq) - fcst_len_dt = datetime.timedelta(hours=fcst_len_hrs) - vx_fcst_output_intvl_dt = datetime.timedelta(hours=vx_fcst_output_intvl_hrs) - # - # ----------------------------------------------------------------------- - # - # Generate a list containing the starting times of the cycles. This will - # be needed in checking that the hours-of-day of the forecast output match - # those of the observations. - # - # ----------------------------------------------------------------------- - # - cycle_start_times \ - = set_cycle_dates(date_first_cycl, date_last_cycl, cycl_intvl_dt, - return_type='datetime') - # - # ----------------------------------------------------------------------- - # - # Generate a list of forecast output times and a list of obs days (i.e. - # days on which observations are needed to perform verification because - # there is forecast output on those days) over all cycles, both for - # instantaneous fields (e.g. T2m, REFC, RETOP) and for cumulative ones - # (e.g. APCP). Then add these lists to the dictionary containing workflow - # configuration variables. These will be needed in generating the ROCOTO - # XML. # # ----------------------------------------------------------------------- # - fcst_output_times_all_cycles, obs_days_all_cycles, \ - = set_fcst_output_times_and_obs_days_all_cycles( - cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) - - workflow_config['OBS_DAYS_ALL_CYCLES_INST'] = obs_days_all_cycles['inst'] - workflow_config['OBS_DAYS_ALL_CYCLES_CUMUL'] = obs_days_all_cycles['cumul'] - # - # ----------------------------------------------------------------------- - # - # Generate lists of ROCOTO cycledef strings corresonding to the obs days - # for instantaneous fields and those for cumulative ones. Then save the - # lists of cycledefs in the dictionary containing values needed to - # construct the ROCOTO XML. - # - # ----------------------------------------------------------------------- - # - cycledefs_obs_days_inst = set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles['inst']) - cycledefs_obs_days_cumul = set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles['cumul']) - - rocoto_config['cycledefs']['cycledefs_obs_days_inst'] = cycledefs_obs_days_inst - rocoto_config['cycledefs']['cycledefs_obs_days_cumul'] = cycledefs_obs_days_cumul - # - # ----------------------------------------------------------------------- - # - # Generate dictionary of dictionaries that, for each combination of obs - # type needed and obs day, contains a string list of the times at which - # that type of observation is needed on that day. The elements of each - # list are formatted as 'YYYYMMDDHH'. This information is used by the - # day-based get_obs tasks in the workflow to get obs only at those times - # at which they are needed (as opposed to for the whole day). - # - # ----------------------------------------------------------------------- - # - vx_config = expt_config["verification"] - obs_retrieve_times_by_day \ - = get_obs_retrieve_times_by_day( - vx_config, cycle_start_times, fcst_len_dt, - fcst_output_times_all_cycles, obs_days_all_cycles) - - for obtype, obs_days_dict in obs_retrieve_times_by_day.items(): - for obs_day, obs_retrieve_times in obs_days_dict.items(): - array_name = '_'.join(["OBS_RETRIEVE_TIMES", obtype, obs_day]) - vx_config[array_name] = obs_retrieve_times - expt_config["verification"] = vx_config - # - # ----------------------------------------------------------------------- - # - # Remove all verification (meta)tasks for which no fields are specified. - # - # ----------------------------------------------------------------------- - # - vx_field_groups_all_by_obtype = {} - vx_metatasks_all_by_obtype = {} - - vx_field_groups_all_by_obtype["CCPA"] = ["APCP"] - vx_metatasks_all_by_obtype["CCPA"] \ - = ["task_get_obs_ccpa", - "metatask_PcpCombine_obs_APCP_all_accums_CCPA", - "metatask_PcpCombine_fcst_APCP_all_accums_all_mems", - "metatask_GridStat_CCPA_all_accums_all_mems", - "metatask_GenEnsProd_EnsembleStat_CCPA", - "metatask_GridStat_CCPA_ensmeanprob_all_accums"] - - vx_field_groups_all_by_obtype["NOHRSC"] = ["ASNOW"] - vx_metatasks_all_by_obtype["NOHRSC"] \ - = ["task_get_obs_nohrsc", - "metatask_PcpCombine_obs_ASNOW_all_accums_NOHRSC", - "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", - "metatask_GridStat_NOHRSC_all_accums_all_mems", - "metatask_GenEnsProd_EnsembleStat_NOHRSC", - "metatask_GridStat_NOHRSC_ensmeanprob_all_accums"] - - vx_field_groups_all_by_obtype["MRMS"] = ["REFC", "RETOP"] - vx_metatasks_all_by_obtype["MRMS"] \ - = ["task_get_obs_mrms", - "metatask_GridStat_MRMS_all_mems", - "metatask_GenEnsProd_EnsembleStat_MRMS", - "metatask_GridStat_MRMS_ensprob"] - - vx_field_groups_all_by_obtype["NDAS"] = ["ADPSFC", "ADPUPA"] - vx_metatasks_all_by_obtype["NDAS"] \ - = ["task_get_obs_ndas", - "task_run_MET_Pb2nc_obs_NDAS", - "metatask_PointStat_NDAS_all_mems", - "metatask_GenEnsProd_EnsembleStat_NDAS", - "metatask_PointStat_NDAS_ensmeanprob"] - - # If there are no field groups specified for verification, remove those - # tasks that are common to all observation types. - vx_field_groups = vx_config["VX_FIELD_GROUPS"] - if not vx_field_groups: - metatask = "metatask_check_post_output_all_mems" - rocoto_config['tasks'].pop(metatask) - - # If for a given obs type none of its field groups are specified for - # verification, remove all vx metatasks for that obs type. - for obtype in vx_field_groups_all_by_obtype: - #vx_field_groups_crnt_obtype = [field for field in vx_fields if field in vx_fields_all[obtype]] - vx_field_groups_crnt_obtype = list(set(vx_field_groups) & set(vx_field_groups_all_by_obtype[obtype])) - if not vx_field_groups_crnt_obtype: - for metatask in vx_metatasks_all_by_obtype[obtype]: - if metatask in rocoto_config['tasks']: - logging.info(dedent( - f""" - Removing verification (meta)task - "{metatask}" - from workflow since no fields belonging to observation type "{obtype}" - are specified for verification.""" - )) - rocoto_config['tasks'].pop(metatask) + # If running vx tasks, check and possibly reset values in expt_config + # and rocoto_config. # # ----------------------------------------------------------------------- # - # If there are at least some field groups to verify, then make sure that - # the base directories in which retrieved obs files will be placed are - # distinct for the different obs types. - # - # ----------------------------------------------------------------------- - # - if vx_field_groups: - obtypes_all = ['CCPA', 'NOHRSC', 'MRMS', 'NDAS'] - obs_basedir_var_names = [f'{obtype}_OBS_DIR' for obtype in obtypes_all] - obs_basedirs_dict = {key: vx_config[key] for key in obs_basedir_var_names} - obs_basedirs_orig = list(obs_basedirs_dict.values()) - obs_basedirs_uniq = list(set(obs_basedirs_orig)) - if len(obs_basedirs_orig) != len(obs_basedirs_uniq): - msg1 = dedent(f""" - The base directories for the obs files must be distinct, but at least two - are identical:""") - msg2 = '' - for obs_basedir_var_name, obs_dir in obs_basedirs_dict.items(): - msg2 = msg2 + dedent(f""" - {obs_basedir_var_name} = {obs_dir}""") - msg3 = dedent(f""" - Modify these in the SRW App's user configuration file to make them distinct - and rerun. - """) - msg = msg1 + ' '.join(msg2.splitlines(True)) + msg3 - logging.error(msg) - raise ValueError(msg) + if do_vx: + # + # ----------------------------------------------------------------------- + # + # Set some variables needed for running checks on and creating new + # (derived) configuration variables for the verification. + # + # ----------------------------------------------------------------------- + # + vx_config = expt_config["verification"] + + fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") + vx_fcst_output_intvl_hrs = vx_config.get("VX_FCST_OUTPUT_INTVL_HRS") + + # To enable arithmetic with dates and times, convert various time + # intervals from integer to datetime.timedelta objects. + fcst_len_dt = datetime.timedelta(hours=fcst_len_hrs) + vx_fcst_output_intvl_dt = datetime.timedelta(hours=vx_fcst_output_intvl_hrs) + # + # ----------------------------------------------------------------------- + # + # Generate a list containing the starting times of the cycles. This will + # be needed in checking that the hours-of-day of the forecast output match + # those of the observations. + # + # ----------------------------------------------------------------------- + # + cycle_start_times \ + = set_cycle_dates(date_first_cycl, date_last_cycl, cycl_intvl_dt, + return_type='datetime') + # + # ----------------------------------------------------------------------- + # + # Generate a list of forecast output times and a list of obs days (i.e. + # days on which observations are needed to perform verification because + # there is forecast output on those days) over all cycles, both for + # instantaneous fields (e.g. T2m, REFC, RETOP) and for cumulative ones + # (e.g. APCP). Then add these lists to the dictionary containing workflow + # configuration variables. These will be needed in generating the ROCOTO + # XML. + # + # ----------------------------------------------------------------------- + # + fcst_output_times_all_cycles, obs_days_all_cycles, \ + = set_fcst_output_times_and_obs_days_all_cycles( + cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) + + workflow_config['OBS_DAYS_ALL_CYCLES_INST'] = obs_days_all_cycles['inst'] + workflow_config['OBS_DAYS_ALL_CYCLES_CUMUL'] = obs_days_all_cycles['cumul'] + # + # ----------------------------------------------------------------------- + # + # Generate lists of ROCOTO cycledef strings corresonding to the obs days + # for instantaneous fields and those for cumulative ones. Then save the + # lists of cycledefs in the dictionary containing values needed to + # construct the ROCOTO XML. + # + # ----------------------------------------------------------------------- + # + cycledefs_obs_days_inst = set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles['inst']) + cycledefs_obs_days_cumul = set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles['cumul']) + + rocoto_config['cycledefs']['cycledefs_obs_days_inst'] = cycledefs_obs_days_inst + rocoto_config['cycledefs']['cycledefs_obs_days_cumul'] = cycledefs_obs_days_cumul + # + # ----------------------------------------------------------------------- + # + # Generate dictionary of dictionaries that, for each combination of obs + # type needed and obs day, contains a string list of the times at which + # that type of observation is needed on that day. The elements of each + # list are formatted as 'YYYYMMDDHH'. This information is used by the + # day-based get_obs tasks in the workflow to get obs only at those times + # at which they are needed (as opposed to for the whole day). + # + # ----------------------------------------------------------------------- + # + obs_retrieve_times_by_day \ + = get_obs_retrieve_times_by_day( + vx_config, cycle_start_times, fcst_len_dt, + fcst_output_times_all_cycles, obs_days_all_cycles) + + for obtype, obs_days_dict in obs_retrieve_times_by_day.items(): + for obs_day, obs_retrieve_times in obs_days_dict.items(): + array_name = '_'.join(["OBS_RETRIEVE_TIMES", obtype, obs_day]) + vx_config[array_name] = obs_retrieve_times + expt_config["verification"] = vx_config + # + # ----------------------------------------------------------------------- + # + # Remove all verification (meta)tasks for which no fields are specified. + # + # ----------------------------------------------------------------------- + # + vx_field_groups_all_by_obtype = {} + vx_metatasks_all_by_obtype = {} + + vx_field_groups_all_by_obtype["CCPA"] = ["APCP"] + vx_metatasks_all_by_obtype["CCPA"] \ + = ["task_get_obs_ccpa", + "metatask_PcpCombine_obs_APCP_all_accums_CCPA", + "metatask_PcpCombine_fcst_APCP_all_accums_all_mems", + "metatask_GridStat_CCPA_all_accums_all_mems", + "metatask_GenEnsProd_EnsembleStat_CCPA", + "metatask_GridStat_CCPA_ensmeanprob_all_accums"] + + vx_field_groups_all_by_obtype["NOHRSC"] = ["ASNOW"] + vx_metatasks_all_by_obtype["NOHRSC"] \ + = ["task_get_obs_nohrsc", + "metatask_PcpCombine_obs_ASNOW_all_accums_NOHRSC", + "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", + "metatask_GridStat_NOHRSC_all_accums_all_mems", + "metatask_GenEnsProd_EnsembleStat_NOHRSC", + "metatask_GridStat_NOHRSC_ensmeanprob_all_accums"] + + vx_field_groups_all_by_obtype["MRMS"] = ["REFC", "RETOP"] + vx_metatasks_all_by_obtype["MRMS"] \ + = ["task_get_obs_mrms", + "metatask_GridStat_MRMS_all_mems", + "metatask_GenEnsProd_EnsembleStat_MRMS", + "metatask_GridStat_MRMS_ensprob"] + + vx_field_groups_all_by_obtype["NDAS"] = ["ADPSFC", "ADPUPA"] + vx_metatasks_all_by_obtype["NDAS"] \ + = ["task_get_obs_ndas", + "task_run_MET_Pb2nc_obs_NDAS", + "metatask_PointStat_NDAS_all_mems", + "metatask_GenEnsProd_EnsembleStat_NDAS", + "metatask_PointStat_NDAS_ensmeanprob"] + + # If there are no field groups specified for verification, remove those + # tasks that are common to all observation types. + vx_field_groups = vx_config["VX_FIELD_GROUPS"] + if not vx_field_groups: + metatask = "metatask_check_post_output_all_mems" + rocoto_config['tasks'].pop(metatask) + + # If for a given obs type none of its field groups are specified for + # verification, remove all vx metatasks for that obs type. + for obtype in vx_field_groups_all_by_obtype: + vx_field_groups_crnt_obtype = list(set(vx_field_groups) & set(vx_field_groups_all_by_obtype[obtype])) + if not vx_field_groups_crnt_obtype: + for metatask in vx_metatasks_all_by_obtype[obtype]: + if metatask in rocoto_config['tasks']: + logging.info(dedent( + f""" + Removing verification (meta)task + "{metatask}" + from workflow since no fields belonging to observation type "{obtype}" + are specified for verification.""" + )) + rocoto_config['tasks'].pop(metatask) + # + # ----------------------------------------------------------------------- + # + # If there are at least some field groups to verify, then make sure that + # the base directories in which retrieved obs files will be placed are + # distinct for the different obs types. + # + # ----------------------------------------------------------------------- + # + if vx_field_groups: + obtypes_all = ['CCPA', 'NOHRSC', 'MRMS', 'NDAS'] + obs_basedir_var_names = [f'{obtype}_OBS_DIR' for obtype in obtypes_all] + obs_basedirs_dict = {key: vx_config[key] for key in obs_basedir_var_names} + obs_basedirs_orig = list(obs_basedirs_dict.values()) + obs_basedirs_uniq = list(set(obs_basedirs_orig)) + if len(obs_basedirs_orig) != len(obs_basedirs_uniq): + msg1 = dedent(f""" + The base directories for the obs files must be distinct, but at least two + are identical:""") + msg2 = '' + for obs_basedir_var_name, obs_dir in obs_basedirs_dict.items(): + msg2 = msg2 + dedent(f""" + {obs_basedir_var_name} = {obs_dir}""") + msg3 = dedent(f""" + Modify these in the SRW App's user configuration file to make them distinct + and rerun. + """) + msg = msg1 + ' '.join(msg2.splitlines(True)) + msg3 + logging.error(msg) + raise ValueError(msg) # # ----------------------------------------------------------------------- # From 2213e93074298944e2cbc7830a42cd367974d36b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 29 Oct 2024 14:44:40 -0600 Subject: [PATCH 144/208] Add WE2E tests for AI/ML models (FourCastNet, GraphCast, and PanguWeather) and GFS that all use a forecast output interval other than 1 hour (all use 6 hours) and use GDAS (instead of NDAS) as the obs in the verification. --- ...cst_custom-vx-config_aiml-fourcastnet.yaml | 63 ++++++++++++++++++ ...-fcst_custom-vx-config_aiml-graphcast.yaml | 63 ++++++++++++++++++ ...st_custom-vx-config_aiml-panguweather.yaml | 63 ++++++++++++++++++ ...vx-det_long-fcst_custom-vx-config_gfs.yaml | 66 +++++++++++++++++++ .../vx_config_det.obs_gdas.model_aiml.yaml | 54 +++++++++++++++ .../vx_config_det.obs_gdas.model_gfs.yaml | 54 +++++++++++++++ 6 files changed, 363 insertions(+) create mode 100644 tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml create mode 100644 tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml create mode 100644 tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml create mode 100644 tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml create mode 100644 tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml create mode 100644 tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml new file mode 100644 index 0000000000..47116561ed --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml @@ -0,0 +1,63 @@ +metadata: + description: |- + SRW App configuration file to test deterministic verification of the + FourCastNet (fcnv2) global AI model. Note that this test uses a custom + verification configuration file (as opposed to the default one in the + SRW) because the AI model output often does not include many of the + fields that exist in physics-based models. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + DATE_FIRST_CYCL: '2024073000' + DATE_LAST_CYCL: '2024073000' + FCST_LEN_HRS: 240 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'global' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the required obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "/scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_ai_tests" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS (if it's necessary to fetch obs files). + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # + OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + # + VX_FCST_MODEL_NAME: 'fcnv2' + VX_CONFIG_DET_FN: '../../tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_ai_tests' + FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}' + FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}.grb2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' + # + VX_FCST_OUTPUT_INTVL_HRS: 6 + VX_FIELD_GROUPS: [ "ADPSFC" ] diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml new file mode 100644 index 0000000000..a9067a9114 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml @@ -0,0 +1,63 @@ +metadata: + description: |- + SRW App configuration file to test deterministic verification of the + GraphCast (gc) global AI model. Note that this test uses a custom + verification configuration file (as opposed to the default one in the + SRW) because the AI model output often does not include many of the + fields that exist in physics-based models. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + DATE_FIRST_CYCL: '2024073000' + DATE_LAST_CYCL: '2024073000' + FCST_LEN_HRS: 240 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'global' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the required obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "/scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_ai_tests" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS (if it's necessary to fetch obs files). + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # + OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + # + VX_FCST_MODEL_NAME: 'gc' + VX_CONFIG_DET_FN: '../../tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_ai_tests' + FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}' + FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' + # + VX_FCST_OUTPUT_INTVL_HRS: 6 + VX_FIELD_GROUPS: [ "ADPSFC" ] diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml new file mode 100644 index 0000000000..aa3e3a834d --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml @@ -0,0 +1,63 @@ +metadata: + description: |- + SRW App configuration file to test deterministic verification of the + Pangu-Weather (pw) global AI model. Note that this test uses a custom + verification configuration file (as opposed to the default one in the + SRW) because the AI model output often does not include many of the + fields that exist in physics-based models. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + DATE_FIRST_CYCL: '2024073000' + DATE_LAST_CYCL: '2024073000' + FCST_LEN_HRS: 240 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'global' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the required obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "/scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_ai_tests" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS (if it's necessary to fetch obs files). + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # + OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + # + VX_FCST_MODEL_NAME: 'pw' + VX_CONFIG_DET_FN: '../../tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_ai_tests' + FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}' + FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}.grb2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' + # + VX_FCST_OUTPUT_INTVL_HRS: 6 + VX_FIELD_GROUPS: [ "ADPSFC" ] diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml new file mode 100644 index 0000000000..1672bd4fc9 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml @@ -0,0 +1,66 @@ +metadata: + description: |- + SRW App configuration file to test deterministic verification of the + Global Forecast System (GFS) model in a way that is comparable to vx + for several AI models [GraphCast (gc), FourCastNet (fcnv2), and Pangu- + Weather (pw)]. The idea is for this test to serve as a baseline to + which the AI vx can be compared. Thus, this test uses a custom vx + verification configuration file (as opposed to the default one in the + SRW) because the AI model output often does not include many of the + fields that exist in physics-based models. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + DATE_FIRST_CYCL: '2024073000' + DATE_LAST_CYCL: '2024073000' + FCST_LEN_HRS: 240 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'global' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the required obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "/scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_ai_tests" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS (if it's necessary to fetch obs files). + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # + OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + # + VX_FCST_MODEL_NAME: 'gfs' + VX_CONFIG_DET_FN: '../../tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_ai_tests' + FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}/atmos' + FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.pgrb2.0p25.f{lead?fmt=%HHH}' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.pgrb2.0p25.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' + # + VX_FCST_OUTPUT_INTVL_HRS: 6 + VX_FIELD_GROUPS: [ "ADPSFC" ] diff --git a/tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml b/tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml new file mode 100644 index 0000000000..11bcb2e568 --- /dev/null +++ b/tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml @@ -0,0 +1,54 @@ +# +# This configuration file specifies the field groups, fields, levels, +# and thresholds to use for DETERMINISTIC verification. The format is +# as follows: +# +# FIELD_GROUP1: +# FIELD1: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# FIELD2: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# ... +# +# FIELD_GROUP2: +# FIELD1: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# FIELD2: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# ... +# +# ... +# +# If the threshold list for a given combination of field group, field, +# and level is set to the empty string ([]), then all values of that +# field will be included in the verification. +# +# Both the keys that represent field groups, fields, and levels and the +# strings in the list of thresholds may contain the separator string "%%" +# that separates the value of the quantity for the forecast from that for +# the observations. For example, if a field is set to +# +# RETOP%%EchoTop18 +# +# it means the name of the field in the forecast data is RETOP while its +# name in the observations is EchoTop18. +# +ADPSFC: + TMP: + Z2: [] + UGRD: + Z10: ['ge2.572'] + VGRD: + Z10: ['ge2.572'] + WIND: + Z10: ['ge2.572', 'ge2.572&<5.144', 'ge5.144', 'ge10.288', 'ge15.433'] + PRES%%PRMSL: + Z0: [] diff --git a/tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml b/tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml new file mode 100644 index 0000000000..9b8e25ff59 --- /dev/null +++ b/tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml @@ -0,0 +1,54 @@ +# +# This configuration file specifies the field groups, fields, levels, +# and thresholds to use for DETERMINISTIC verification. The format is +# as follows: +# +# FIELD_GROUP1: +# FIELD1: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# FIELD2: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# ... +# +# FIELD_GROUP2: +# FIELD1: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# FIELD2: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# ... +# +# ... +# +# If the threshold list for a given combination of field group, field, +# and level is set to the empty string ([]), then all values of that +# field will be included in the verification. +# +# Both the keys that represent field groups, fields, and levels and the +# strings in the list of thresholds may contain the separator string "%%" +# that separates the value of the quantity for the forecast from that for +# the observations. For example, if a field is set to +# +# RETOP%%EchoTop18 +# +# it means the name of the field in the forecast data is RETOP while its +# name in the observations is EchoTop18. +# +ADPSFC: + TMP: + Z2: [] + UGRD: + Z10: ['ge2.572'] + VGRD: + Z10: ['ge2.572'] + WIND: + Z10: ['ge2.572', 'ge2.572&<5.144', 'ge5.144', 'ge10.288', 'ge15.433'] + PRMSL: + Z0: [] From 51972efaa52307f406e78bae30530f7e88b6f4aa Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 30 Oct 2024 10:01:38 -0600 Subject: [PATCH 145/208] Move vx configuration files, both the defaults and the custom ones used for some WE2E tests, to parm/metplus/vx_configs. --- doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst | 2 +- .../vx_configs/vx_config_det.obs_gdas.model_aiml.yaml | 0 .../vx_configs/vx_config_det.obs_gdas.model_gfs.yaml | 0 parm/metplus/{ => vx_configs}/vx_config_det.yaml | 0 parm/metplus/{ => vx_configs}/vx_config_ens.yaml | 0 ....vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml | 2 +- ...ig.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml | 2 +- ...vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml | 2 +- .../config.vx-det_long-fcst_custom-vx-config_gfs.yaml | 2 +- ush/config_defaults.yaml | 6 +++--- 10 files changed, 8 insertions(+), 8 deletions(-) rename {tests/WE2E => parm/metplus}/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml (100%) rename {tests/WE2E => parm/metplus}/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml (100%) rename parm/metplus/{ => vx_configs}/vx_config_det.yaml (100%) rename parm/metplus/{ => vx_configs}/vx_config_ens.yaml (100%) diff --git a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst index df9a0dfa22..8689fdda8e 100644 --- a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -1634,7 +1634,7 @@ General VX Parameters then ``VX_ASNOW_ACCUMS_HRS`` must contain at least one element. Otherwise, ``VX_ASNOW_ACCUMS_HRS`` will be ignored. Valid values: ``6`` | ``12`` | ``18`` | ``24`` -``VX_CONFIG_[DET|ENS]_FN``: (Default: ``vx_config_[det|ens].yaml``) +``VX_CONFIG_[DET|ENS]_FN``: (Default: ``vx_configs/vx_config_[det|ens].yaml``) Names of configuration files for deterministic and ensemble verification that specify the field groups, field names, levels, and (if applicable) thresholds for which to run verification. These are relative to the diff --git a/tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml b/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml similarity index 100% rename from tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml rename to parm/metplus/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml diff --git a/tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml b/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml similarity index 100% rename from tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml rename to parm/metplus/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml diff --git a/parm/metplus/vx_config_det.yaml b/parm/metplus/vx_configs/vx_config_det.yaml similarity index 100% rename from parm/metplus/vx_config_det.yaml rename to parm/metplus/vx_configs/vx_config_det.yaml diff --git a/parm/metplus/vx_config_ens.yaml b/parm/metplus/vx_configs/vx_config_ens.yaml similarity index 100% rename from parm/metplus/vx_config_ens.yaml rename to parm/metplus/vx_configs/vx_config_ens.yaml diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml index 47116561ed..b21755f3cf 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml @@ -53,7 +53,7 @@ verification: OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # VX_FCST_MODEL_NAME: 'fcnv2' - VX_CONFIG_DET_FN: '../../tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' + VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_ai_tests' FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}' FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}.grb2' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml index a9067a9114..6c330f9ba5 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml @@ -53,7 +53,7 @@ verification: OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # VX_FCST_MODEL_NAME: 'gc' - VX_CONFIG_DET_FN: '../../tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' + VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_ai_tests' FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}' FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml index aa3e3a834d..a354793981 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml @@ -53,7 +53,7 @@ verification: OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # VX_FCST_MODEL_NAME: 'pw' - VX_CONFIG_DET_FN: '../../tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' + VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_ai_tests' FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}' FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}.grb2' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml index 1672bd4fc9..b7ded54bb9 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml @@ -56,7 +56,7 @@ verification: OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # VX_FCST_MODEL_NAME: 'gfs' - VX_CONFIG_DET_FN: '../../tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml' + VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_gfs.yaml' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_ai_tests' FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}/atmos' FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.pgrb2.0p25.f{lead?fmt=%HHH}' diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 3957c3c0db..fb1ff66142 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2377,10 +2377,10 @@ verification: # thresholds for which to run verification. These are relative to the # directory METPLUS_CONF in which the METplus config templates are # located. They may include leading relative paths before the file - # names, e.g. ``some_dir/another_dir/vx_config_det.yaml``. + # names, e.g. "some_dir/another_dir/vx_config_det.yaml". # - VX_CONFIG_DET_FN: 'vx_config_det.yaml' - VX_CONFIG_ENS_FN: 'vx_config_ens.yaml' + VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.yaml' + VX_CONFIG_ENS_FN: 'vx_configs/vx_config_ens.yaml' # # VX_OUTPUT_BASEDIR: # Template for base (i.e. top-level) directory in which METplus will place From 36b647fe76a9a822d254c5b79b46fc4f140a70a8 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 30 Oct 2024 10:49:26 -0600 Subject: [PATCH 146/208] Add the 12 new WE2E vx tests to the various test suites. --- tests/WE2E/machine_suites/comprehensive | 12 ++++++++++++ tests/WE2E/machine_suites/coverage.hera.gnu.com | 4 ++++ tests/WE2E/machine_suites/coverage.hera.intel.nco | 4 ++++ tests/WE2E/machine_suites/coverage.jet | 4 ++++ 4 files changed, 24 insertions(+) diff --git a/tests/WE2E/machine_suites/comprehensive b/tests/WE2E/machine_suites/comprehensive index 8397e5d0c0..8c42aa4599 100644 --- a/tests/WE2E/machine_suites/comprehensive +++ b/tests/WE2E/machine_suites/comprehensive @@ -75,3 +75,15 @@ MET_verification_only_vx pregen_grid_orog_sfc_climo specify_EXTRN_MDL_SYSBASEDIR_ICS_LBCS specify_template_filenames +vx-det_long-fcst_custom-vx-config_aiml-fourcastnet +vx-det_long-fcst_custom-vx-config_aiml-graphcast +vx-det_long-fcst_custom-vx-config_aiml-panguweather +vx-det_long-fcst_custom-vx-config_gfs +vx-det_long-fcst_winter-wx_SRW-staged +vx-det_multicyc_fcst-overlap_ncep-hrrr +vx-det_multicyc_first-obs-00z_ncep-hrrr +vx-det_multicyc_last-obs-00z_ncep-hrrr +vx-det_multicyc_long-fcst-no-overlap_nssl-mpas +vx-det_multicyc_long-fcst-overlap_nssl-mpas +vx-det_multicyc_no-00z-obs_nssl-mpas +vx-det_multicyc_no-fcst-overlap_ncep-hrrr diff --git a/tests/WE2E/machine_suites/coverage.hera.gnu.com b/tests/WE2E/machine_suites/coverage.hera.gnu.com index c2018a6e78..09dadbaedd 100644 --- a/tests/WE2E/machine_suites/coverage.hera.gnu.com +++ b/tests/WE2E/machine_suites/coverage.hera.gnu.com @@ -9,3 +9,7 @@ MET_verification_only_vx MET_ensemble_verification_only_vx_time_lag 2019_halloween_storm 2020_jan_cold_blast +vx-det_long-fcst_custom-vx-config_aiml-fourcastnet +vx-det_long-fcst_custom-vx-config_gfs +vx-det_long-fcst_winter-wx_SRW-staged +vx-det_multicyc_fcst-overlap_ncep-hrrr diff --git a/tests/WE2E/machine_suites/coverage.hera.intel.nco b/tests/WE2E/machine_suites/coverage.hera.intel.nco index d5ab0d6fe8..cf8b92b59f 100644 --- a/tests/WE2E/machine_suites/coverage.hera.intel.nco +++ b/tests/WE2E/machine_suites/coverage.hera.intel.nco @@ -10,3 +10,7 @@ grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2 grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16 grid_RRFS_CONUScompact_3km_ics_HRRR_lbcs_RAP_suite_HRRR pregen_grid_orog_sfc_climo +vx-det_long-fcst_custom-vx-config_aiml-graphcast +vx-det_multicyc_long-fcst-no-overlap_nssl-mpas +vx-det_multicyc_first-obs-00z_ncep-hrrr +vx-det_multicyc_no-00z-obs_nssl-mpas diff --git a/tests/WE2E/machine_suites/coverage.jet b/tests/WE2E/machine_suites/coverage.jet index 53308090b1..5078e127ef 100644 --- a/tests/WE2E/machine_suites/coverage.jet +++ b/tests/WE2E/machine_suites/coverage.jet @@ -9,3 +9,7 @@ grid_RRFS_AK_3km_ics_FV3GFS_lbcs_FV3GFS_suite_HRRR grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16_plot grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2 grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_RRFS_v1beta +vx-det_long-fcst_custom-vx-config_aiml-panguweather +vx-det_multicyc_long-fcst-overlap_nssl-mpas +vx-det_multicyc_last-obs-00z_ncep-hrrr +vx-det_multicyc_no-fcst-overlap_ncep-hrrr From f426302d29fd1552cd597a15e16f90305466d178 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sat, 2 Nov 2024 13:32:52 -0600 Subject: [PATCH 147/208] Rename vx tasks, metatasks, and variables and update documentation. Details below. * Since the vx tasks under the deterministic and ensemble vx metatasks are grouped by verification field group, rename the metatasks (in verify_det.yaml and veriify_ens.yaml and elsewhere as necessary) so that they refer to the vx field groups they apply to instead of the obs types. For example: * Change "metatask_GridStat_NOHRSC_all_accums_all_mems" to "metatask_GridStat_ASNOW_all_accums_all_mems". * Change "metatask_GenEnsProd_EnsembleStat_CCPA" to "metatask_GenEnsProd_EnsembleStat_APCP_all_accums" (add the "_all_accums" at the end to be consistent with the way other metatask names for cumulative field groups are named). * Change "metatask_GridStat_MRMS_ensprob" to "metatask_GridStat_ensprob_REFC_RETOP". * Replace the "ADPSFC" and "ADPUPA" field groups with "SFC" and "UPA", respectively, since the "ADP" part is specific to NDAS obs, and we want the verification field groups to have general names that are not connected to the obs type. * Move "_obs" and "_fcst" substrings in the names of several pre-vx tasks/metatasks (in verify_pre.yaml) towards the end for clarity/consistency. * For clarity, change the environment variable "VAR" in the vx tasks (where it is set to the name of the field group to be verified) to "FIELD_GROUP". Make corresponding necessary changes elsewhere (e.g. in the ex-scripts for the vx tasks). * Expand jinja2 code in verify_pre.yaml from a one-liner to multiple lines to make it easier to understand. * Make corresponding modifications in documentation. --- .../BuildingRunningTesting/RunSRW.rst | 366 +++++++++++++----- .../CustomizingTheWorkflow/ConfigWorkflow.rst | 15 +- parm/wflow/verify_det.yaml | 40 +- parm/wflow/verify_ens.yaml | 74 ++-- parm/wflow/verify_pre.yaml | 41 +- scripts/exregional_check_post_output.sh | 1 - ...onal_run_met_genensprod_or_ensemblestat.sh | 8 +- ...gional_run_met_gridstat_or_pointstat_vx.sh | 10 +- ...un_met_gridstat_or_pointstat_vx_ensmean.sh | 10 +- ...un_met_gridstat_or_pointstat_vx_ensprob.sh | 9 +- scripts/exregional_run_met_pb2nc_obs.sh | 4 +- scripts/exregional_run_met_pcpcombine.sh | 6 +- ...cst_custom-vx-config_aiml-fourcastnet.yaml | 2 +- ...-fcst_custom-vx-config_aiml-graphcast.yaml | 2 +- ...st_custom-vx-config_aiml-panguweather.yaml | 2 +- ...vx-det_long-fcst_custom-vx-config_gfs.yaml | 2 +- ush/config_defaults.yaml | 15 +- ush/set_vx_params.sh | 81 ++-- ush/setup.py | 36 +- ush/valid_param_vals.yaml | 2 +- 20 files changed, 449 insertions(+), 277 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 0eb10e1519..de4e5e54d8 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -203,15 +203,6 @@ The user must set the specifics of their experiment configuration in a ``config. * - ACCOUNT - "" - "an_account" - * - CCPA_OBS_DIR - - "{{ workflow.EXPTDIR }}/obs_data/ccpa/proc" - - "" - * - MRMS_OBS_DIR - - "{{ workflow.EXPTDIR }}/obs_data/mrms/proc" - - "" - * - NDAS_OBS_DIR - - "{{ workflow.EXPTDIR }}/obs_data/ndas/proc" - - "" * - USE_CRON_TO_RELAUNCH - false - false @@ -269,9 +260,6 @@ The user must set the specifics of their experiment configuration in a ``config. * - NUM_ENS_MEMBERS - 1 - 2 - * - VX_FCST_MODEL_NAME - - '{{ nco.NET_default }}.{{ task_run_post.POST_OUTPUT_DOMAIN_NAME }}' - - FV3_GFS_v16_CONUS_25km .. _GeneralConfig: @@ -639,7 +627,7 @@ The output files (in ``.png`` format) will be located in the ``postprd`` directo Configure METplus Verification Suite (Optional) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Users who want to use the METplus verification suite to evaluate their forecasts need to add additional information to their machine file (``ush/machine/.yaml``) or their ``config.yaml`` file. Other users may skip to the next step (:numref:`Section %s: Generate the SRW App Workflow `). +Users who want to use the METplus verification suite to evaluate their forecasts need to add additional information to their machine file (``ush/machine/.yaml``) [what would need to change in the machine file?] or their ``config.yaml`` file. Other users may skip to the next step (:numref:`Section %s: Generate the SRW App Workflow `). .. note:: If METplus users update their METplus installation, they must update the module load statements in ``ufs-srweather-app/modulefiles/tasks//run_vx.local`` to correspond to their system's updated installation: @@ -658,7 +646,12 @@ To use METplus verification, MET and METplus modules need to be installed. To t tasks: taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml", "parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' -:numref:`Table %s ` indicates which functions each ``verify_*.yaml`` file configures. Users must add ``verify_pre.yaml`` anytime they want to run verification (VX); it runs preprocessing tasks that are necessary for both deterministic and ensemble VX. Then users can add ``verify_det.yaml`` for deterministic VX or ``verify_ens.yaml`` for ensemble VX (or both). Note that ensemble VX requires the user to be running an ensemble forecast or to stage ensemble forecast files in an appropriate location. +:numref:`Table %s ` indicates which verification capabilities/workflow tasks each ``verify_*.yaml`` file enables. +Users must add ``verify_pre.yaml`` anytime they want to run verification (VX); it runs preprocessing tasks that are necessary +for both deterministic and ensemble VX, including retrieval of obs files from various data stores (e.g. NOAA's HPSS) if those +files do not already exist on disk at the locations specified by some of the parameters in the ``verification:`` section of +``config_defaults.yaml`` and/or ``config.yaml`` (see ?? for details). +Then users can add ``verify_det.yaml`` for deterministic VX or ``verify_ens.yaml`` for ensemble VX (or both). Note that ensemble VX requires the user to be running an ensemble forecast or to stage ensemble forecast files in an appropriate location. .. _VX-yamls: @@ -669,11 +662,11 @@ To use METplus verification, MET and METplus modules need to be installed. To t * - File - Description * - verify_pre.yaml - - Contains (meta)tasks that are prerequisites for both deterministic and ensemble verification (vx) + - Enables (meta)tasks that are prerequisites for both deterministic and ensemble verification (vx) * - verify_det.yaml - - Perform deterministic vx + - Enables (meta)tasks that perform deterministic vx on a single forecast or on each member of an ensemble forecast * - verify_ens.yaml - - Perform ensemble vx (must set ``DO_ENSEMBLE: true`` in ``config.yaml``) + - Enables (meta)tasks that perform ensemble vx on an ensemble of forecasts as a whole (must set ``DO_ENSEMBLE: true`` in ``config.yaml``) The ``verify_*.yaml`` files include the definitions of several common verification tasks by default. Individual verification tasks appear in :numref:`Table %s `. The tasks in the ``verify_*.yaml`` files are independent of each other, so users may want to turn some off depending on the needs of their experiment. To turn off a task, simply include its entry from ``verify_*.yaml`` as an empty YAML entry in ``config.yaml``. For example, to turn off PointStat tasks: @@ -688,21 +681,51 @@ The ``verify_*.yaml`` files include the definitions of several common verificati More information about configuring the ``rocoto:`` section can be found in :numref:`Section %s `. -If users have access to NOAA :term:`HPSS` but have not pre-staged the data, the default ``verify_pre.yaml`` taskgroup will activate the tasks, and the workflow will attempt to download the appropriate data from NOAA HPSS. In this case, the ``*_OBS_DIR`` paths must be set to the location where users want the downloaded data to reside. +If users have access to NOAA :term:`HPSS` but have not pre-staged the obs data, the default ``verify_pre.yaml`` +taskgroup will activate a set of ``get_obs_...`` workflow tasks that will attempt to retrieve the required +files from a data store such as NOAA HPSS. In this case, the variables ``*_OBS_DIR`` in ``config.yaml`` must +be set to the base directories under which users want the files to reside, and the variables ``OBS_*_FN_TEMPLATES[1]`` +must be set to METplus file name templates (possibly including leading subdirectories relative to ``*_OBS_DIR``) +that will be used to name the obs files. (Here, the ``*`` represents any one of the obs types :term:`CCPA`, +:term:`NOHRSC`, :term:`MRMS`, and :term:`NDAS`.) -Users who do not have access to NOAA HPSS and do not have the data on their system will need to download :term:`CCPA`, :term:`MRMS`, and :term:`NDAS` data manually from collections of publicly available data, such as the ones listed `here `__. +Users who do not have access to NOAA HPSS and do not have the data on their system will need to download +:term:`CCPA`, :term:`MRMS`, and :term:`NDAS` data manually from collections of publicly available data, +such as the ones listed here [is there supposed to be a link here?]. -Users who have already staged the observation data needed for METplus (i.e., the :term:`CCPA`, :term:`MRMS`, and :term:`NDAS` data) on their system should set the path to this data in ``config.yaml``. +Users who have already staged the observation data needed for verification on their system should set +``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1]`` in ``config.yaml`` to match those staging locations and +file names For example, for a case in which all four types of obs are needed for vx, these variables +might be set as follows: .. code-block:: console - platform: - CCPA_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/ccpa/proc - NOHRSC_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/nohrsc/proc - MRMS_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/mrms/proc - NDAS_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/ndas/proc + verification: + + CCPA_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/ccpa + NOHRSC_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/nohrsc + MRMS_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/mrms + NDAS_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/ndas + + OBS_CCPA_FN_TEMPLATES: [ 'APCP', '{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2' ] + OBS_NOHRSC_FN_TEMPLATES: [ 'ASNOW', 'sfav2_CONUS_6h_{valid?fmt=%Y%m%d%H}_grid184.grb2' ] + OBS_MRMS_FN_TEMPLATES: [ 'REFC', '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2', + 'RETOP', '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' ] + OBS_NDAS_FN_TEMPLATES: [ 'SFC_UPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] + +If one of the days encompassed by the experiment was 20240429, and if one of the hours during +that day at which vx will be performed was 03, then, taking the CCPA obs type as an example, +one of the ``get_obs_ccpa_...`` tasks in the workflow will look for a CCPA file on disk +corresponding to this day and hour at + +``/path/to/UFS_SRW_data/develop/obs_data/ccpa/20240429/ccpa.t03z.01h.hrap.conus.gb2`` -After adding the VX tasks to the ``rocoto:`` section and the data paths to the ``platform:`` section, users can proceed to generate the experiment, which will perform VX tasks in addition to the default workflow tasks. +As described above, if this file does not exist, it will try to retrieve it from a data store +and place it at this location. + +After adding the VX tasks to the ``rocoto:`` section and the data paths to the ``verification:`` +section, users can proceed to generate the experiment, which will perform VX tasks in addition +to the default workflow tasks. .. _GenerateWorkflow: @@ -800,94 +823,231 @@ In addition to the baseline tasks described in :numref:`Table %s ` below. The column "taskgroup" indicates the taskgroup file that must be included in the user's ``config.yaml`` file under ``rocoto: tasks: taskgroups:`` (see :numref:`Section %s ` for more details). For each task, ``mem###`` refers to either ``mem000`` (if running a deterministic forecast) or a specific forecast member number (if running an ensemble forecast). "Metatasks" indicate task definitions that will become more than one workflow task based on different variables, number of hours, etc., as described in the Task Description column. See :numref:`Section %s ` for more details about metatasks. + +METplus verification tasks and metatasks are described in :numref:`Table %s ` below. +The ``taskgroup`` entry after the name of each task or metatask indicates the taskgroup file that must be +included in the user's ``config.yaml`` file under ``rocoto: tasks: taskgroups:`` in order for that task or +metatask to be considered for inclusion in the workflow (see :numref:`Section %s ` for more +details). Metatasks define a set of tasks in the workflow based on multiple values of one or more parameters +such as the ensemble member index, the accumulation interval (for cumulative fields such as accumulated +precipitation), and the name of the verificaiton field group (see description of ``VX_FIELD_GROUPS`` in +:numref:`Section %s `). See :numref:`Section %s ` for more details +about metatasks. .. _VXWorkflowTasksTable: -.. list-table:: Verification (VX) Workflow Tasks in the SRW App - :widths: 20 20 50 +.. list-table:: Verification (VX) Workflow Tasks and Metatasks in the SRW App + :widths: 5 95 :header-rows: 1 - * - Workflow Task - - ``taskgroup`` + * - Workflow Task (``taskgroup``) - Task Description - * - :bolditalic:`task_get_obs_ccpa` - - ``verify_pre.yaml`` - - If user has staged :term:`CCPA` data for verification, checks to ensure that data exists in the specified location (``CCPA_OBS_DIR``). If data does not exist, attempts to retrieve that data from NOAA :term:`HPSS`. - * - :bolditalic:`task_get_obs_ndas` - - ``verify_pre.yaml`` - - If user has staged :term:`NDAS` data for verification, checks to ensure that data exists in the specified location (``NDAS_OBS_DIR``). If data does not exist, attempts to retrieve that data from NOAA HPSS. - * - :bolditalic:`task_get_obs_nohrsc` - - ``verify_pre.yaml`` - - Retrieves and organizes hourly :term:`NOHRSC` data from NOAA HPSS. Can only be run if ``verify_pre.yaml`` is included in a ``tasksgroups`` list *and* user has access to NOAA :term:`HPSS` data. ``ASNOW`` should also be added to the ``VX_FIELDS`` list. - * - :bolditalic:`task_get_obs_mrms` - - ``verify_pre.yaml`` - - If user has staged :term:`MRMS` data for verification, checks to ensure that data exists in the specified location (``MRMS_OBS_DIR``). If data does not exist, attempts to retrieve that data from NOAA HPSS. - * - :bolditalic:`task_run_MET_Pb2nc_obs` - - ``verify_pre.yaml`` - - Converts files from prepbufr to NetCDF format. - * - :bolditalic:`metatask_PcpCombine_obs` - - ``verify_pre.yaml`` - - Derives 3-hr, 6-hr, and 24-hr accumulated precipitation observations from the 1-hr observation files. In log files, tasks will be named like ``MET_PcpCombine_obs_APCP##h``, where ``##h`` is 03h, 06h, or 24h. - * - :bolditalic:`metatask_check_post_output_all_mems` - - ``verify_pre.yaml`` - - Ensures that required post-processing tasks have completed and that the output exists in the correct form and location for each forecast member. In log files, tasks will be named like ``check_post_output_mem###``. - * - :bolditalic:`metatask_PcpCombine_fcst_APCP_all_accums_all_mems` - - ``verify_pre.yaml`` - - Derives accumulated precipitation forecast for 3-hr, 6-hr, and 24-hr windows for all forecast members based on 1-hr precipitation forecast values. In log files, tasks will be named like ``MET_PcpCombine_fcst_APCP##h_mem###``, where ``##h`` is 03h, 06h, or 24h. - * - :bolditalic:`metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems` - - ``verify_pre.yaml`` - - Derives accumulated snow forecast for 6-hr and 24-hr windows for all forecast members based on 1-hr precipitation forecast values. In log files, tasks will be named like ``MET_PcpCombine_fcst_ASNOW##h_mem###``, where ``##h`` is 06h or 24h. - * - :bolditalic:`metatask_GridStat_CCPA_all_accums_all_mems` - - ``verify_det.yaml`` - - Runs METplus grid-to-grid verification for 1-h, 3-h, 6-h, and 24-h (i.e., daily) accumulated precipitation. In log files, tasks will be named like ``run_MET_GridStat_vx_APCP##h_mem###``. - * - :bolditalic:`metatask_GridStat_NOHRSC_all_accums_all_mems` - - ``verify_det.yaml`` - - Runs METplus grid-to-grid verification for 6-h and 24-h (i.e., daily) accumulated snow. In log files, tasks will be named like ``run_MET_GridStat_vx_ASNOW##h_mem###``. - * - :bolditalic:`metatask_GridStat_MRMS_all_mems` - - ``verify_det.yaml`` - - Runs METplus grid-to-grid verification for composite reflectivity and :term:`echo top`. In log files, tasks will be named like ``run_MET_GridStat_vx_REFC_mem###`` or ``run_MET_GridStat_vx_RETOP_mem###``. - * - :bolditalic:`metatask_PointStat_NDAS_all_mems` - - ``verify_det.yaml`` - - Runs METplus grid-to-point verification for surface and upper-air variables. In log files, tasks will be named like ``run_MET_PointStat_vx_SFC_mem###`` or ``run_MET_PointStat_vx_UPA_mem###``. - * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_CCPA` :raw-html:`

` - (formerly *VX_ENSGRID_##h*) - - ``verify_ens.yaml`` - - Runs METplus grid-to-grid ensemble verification for 1-h, 3-h, 6-h, and 24-h (i.e., daily) accumulated precipitation. In log files, tasks will be named like ``run_MET_EnsembleStat_vx_APCP##h`` or ``run_MET_GenEnsProd_vx_APCP##h``. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. - * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_NOHRSC` - - ``verify_ens.yaml`` - - Runs METplus grid-to-grid ensemble verification for 6-h and 24-h (i.e., daily) accumulated snow. In log files, tasks will be named like ``run_MET_EnsembleStat_vx_ASNOW##h`` or ``run_MET_GenEnsProd_vx_ASNOW##h``. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. - * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_MRMS` :raw-html:`

` - (formerly *VX_ENSGRID_[REFC|RETOP]*) - - ``verify_ens.yaml`` - - Runs METplus grid-to-grid ensemble verification for composite reflectivity and :term:`echo top`. In log files, tasks will be named like ``run_MET_GenEnsProd_vx_[REFC|RETOP]`` or ``run_MET_EnsembleStat_vx_[REFC|RETOP]``. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. - * - :bolditalic:`metatask_GridStat_CCPA_ensmeanprob_all_accums` :raw-html:`

` - (formerly *VX_ENSGRID_MEAN_##h* and *VX_ENSGRID_PROB_##h*) - - ``verify_ens.yaml`` - - Runs METplus grid-to-grid verification for (1) ensemble mean 1-h, 3-h, 6-h, and 24h (i.e., daily) accumulated precipitation and (2) 1-h, 3-h, 6-h, and 24h (i.e., daily) accumulated precipitation probabilistic output. In log files, the ensemble mean subtask will be named like ``run_MET_GridStat_vx_ensmean_APCP##h`` and the ensemble probabilistic output subtask will be named like ``run_MET_GridStat_vx_ensprob_APCP##h``, where ``##h`` is 01h, 03h, 06h, or 24h. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. - * - :bolditalic:`metatask_GridStat_NOHRSC_ensmeanprob_all_accums` - - ``verify_ens.yaml`` - - Runs METplus grid-to-grid verification for (1) ensemble mean 6-h and 24h (i.e., daily) accumulated snow and (2) 6-h and 24h (i.e., daily) accumulated snow probabilistic output. In log files, the ensemble mean subtask will be named like ``run_MET_GridStat_vx_ensmean_ASNOW##h`` and the ensemble probabilistic output subtask will be named like ``run_MET_GridStat_vx_ensprob_ASNOW##h``, where ``##h`` is 06h or 24h. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. - * - :bolditalic:`metatask_GridStat_MRMS_ensprob` :raw-html:`

` - (formerly *VX_ENSGRID_PROB_[REFC|RETOP]*) - - ``verify_ens.yaml`` - - Runs METplus grid-to-grid verification for ensemble probabilities for composite reflectivity and :term:`echo top`. In log files, tasks will be named like ``run_MET_GridStat_vx_ensprob_[REFC|RETOP]``. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. - * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_NDAS` :raw-html:`

` - (formerly *VX_ENSPOINT*) - - ``verify_ens.yaml`` - - Runs METplus grid-to-point ensemble verification for surface and upper-air variables. In log files, tasks will be named like ``run_MET_GenEnsProd_vx_[SFC|UPA]`` or ``run_MET_EnsembleStat_vx_[SFC|UPA]``. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. - * - :bolditalic:`metatask_PointStat_NDAS_ensmeanprob` :raw-html:`

` - (formerly *VX_ENSPOINT_[MEAN|PROB]*) - - ``verify_ens.yaml`` - - Runs METplus grid-to-point verification for (1) ensemble mean surface and upper-air variables and (2) ensemble probabilities for surface and upper-air variables. In log files, tasks will be named like ``run_MET_PointStat_vx_ensmean_[SFC|UPA]`` or ``run_MET_PointStat_vx_ensprob_[SFC|UPA]``. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. + + * - :bolditalic:`task_get_obs_ccpa` (``verify_pre.yaml``) + - Checks for existence of staged :term:`CCPA` obs files at locations specified by ``CCPA_OBS_DIR`` + and ``OBS_CCPA_FN_TEMPLATES``. If any files do not exist, it attempts to retrieve all the files + from a data store (e.g. NOAA :term:`HPSS`) and place them in those locations. This task is included + in the workflow only if ``'APCP'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`task_get_obs_nohrsc` (``verify_pre.yaml``) + - Checks for existence of staged :term:`NOHRSC` obs files at locations specified by ``NOHRSC_OBS_DIR`` + and ``OBS_NOHRSC_FN_TEMPLATES``. If any files do not exist, it attempts to retrieve all the files + from a data store (e.g. NOAA :term:`HPSS`) and place them in those locations. This task is included + in the workflow only if ``'ASNOW'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`task_get_obs_mrms` (``verify_pre.yaml``) + - Checks for existence of staged :term:`MRMS` obs files at locations specified by ``MRMS_OBS_DIR`` + and ``OBS_MRMS_FN_TEMPLATES``. If any files do not exist, it attempts to retrieve all the files + from a data store (e.g. NOAA :term:`HPSS`) and place them in those locations. This task is included + in the workflow only if ``'REFC'`` and/or ``'RETOP'`` are included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`task_get_obs_ndas` (``verify_pre.yaml``) + - Checks for existence of staged :term:`NDAS` obs files at locations specified by ``NDAS_OBS_DIR`` + and ``OBS_NDAS_FN_TEMPLATES``. If any files do not exist, it attempts to retrieve all the files + from a data store (e.g. NOAA :term:`HPSS`) and place them in those locations. This task is included + in the workflow only if `'SFC'`` and/or ``'UPA'`` are included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`task_run_MET_Pb2nc_obs_NDAS` (``verify_pre.yaml``) + - Converts NDAS obs prepbufr files to NetCDF format. + + * - :bolditalic:`metatask_PcpCombine_APCP_all_accums_obs_CCPA` (``verify_pre.yaml``) + - Set of tasks that generate NetCDF files containing observed APCP for the accumulation intervals + specified in ``VX_APCP_ACCUMS_HRS``. Files for accumulation intervals larger than the one + provided in the obs are obtained by adding APCP values over multiple obs accumulation intervals, + e.g. if the obs contain 1-hour accumulations and 3-hr accumulation is specified in ``VX_APCP_ACCUMS_HRS``, + then groups of 3 successive 1-hour APCP values in the obs are added to obtain the 3-hour values. + In rocoto, the tasks under this metatask are named ``run_MET_PcpCombine_APCP{accum_intvl}h_obs_CCPA``, + where ``{accum_intvl}`` is the accumulation interval in hours (e.g. ``01``, ``03``, ``06``, etc) + for which the task is being run. This metatask is included in the workflow only if ``'APCP'`` is + included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_PcpCombine_ASNOW_all_accums_obs_NOHRSC` (``verify_pre.yaml``) + - Set of tasks that generate NetCDF files containing observed ASNOW for the accumulation intervals + specified in ``VX_ASNOW_ACCUMS_HRS``. Files for accumulation intervals larger than the one + provided in the obs are obtained by adding ASNOW values over multiple obs accumulation intervals, + e.g. if the obs contain 6-hour accumulations and 24-hr accumulation is specified in ``VX_ASNOW_ACCUMS_HRS``, + then groups of 4 successive 6-hour ASNOW values in the obs are added to obtain the 24-hour values. + In rocoto, the tasks under this metatask are named ``run_MET_PcpCombine_ASNOW{accum_intvl}h_obs_NOHRSC``, + where ``{accum_intvl}`` is the accumulation interval in hours (e.g. ``06``, ``24``, etc) for which + the task is being run. This metatask is included in the workflow only if ``'ASNOW'`` is included in + ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_check_post_output_all_mems` (``verify_pre.yaml``) + - Set of tasks that ensure that the post-processed forecast files required for verification exist in + the locations specified by ``VX_FCST_INPUT_BASEDIR``, ``FCST_SUBDIR_TEMPLATE``, and ``FCST_FN_TEMPLATE``. + In rocoto, the tasks under this metatask are named ``check_post_output_mem{mem_indx}``, where ``{mem_indx}`` + is the index of the ensemble forecast member. This takes on the values ``001``, ``002``, ... for an + ensemble of forecasts or just ``000`` for a single deterministic forecast. This metatask is included + in the workflow if at least one other verification task or metatask is included. + + * - :bolditalic:`metatask_PcpCombine_APCP_all_accums_all_mems` (``verify_pre.yaml``) + - Set of tasks that generate NetCDF files containing forecast APCP for the accumulation intervals + specified in ``VX_APCP_ACCUMS_HRS``. Files for accumulation intervals larger than the one + provided in the forecasts are obtained by adding APCP values over multiple forecast accumulation + intervals, e.g. if the forecasts contain 1-hour accumulations and 3-hr accumulation is specified + in ``VX_APCP_ACCUMS_HRS``, then groups of 3 successive 1-hour APCP values in the forecasts are + added to obtain the 3-hour values. In rocoto, the tasks under this metatask are named + ``run_MET_PcpCombine_APCP{accum_intvl}h_fcst_mem{mem_indx}``, where ``{accum_intvl}`` and + ``{mem_indx}`` are the accumulation interval (in hours, e.g. ``01``, ``03``, ``06``, etc) and + the ensemble forecast member index (or just ``000`` for a single deterministic forecast) for + which the task is being run. This metatask is included in the workflow only if ``'APCP'`` is + included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_PcpCombine_ASNOW_all_accums_all_mems` (``verify_pre.yaml``) + - Set of tasks that generate NetCDF files containing forecast ASNOW for the accumulation intervals + specified in ``VX_ASNOW_ACCUMS_HRS``. Files for accumulation intervals larger than the one + provided in the forecasts are obtained by adding ASNOW values over multiple forecast accumulation + intervals, e.g. if the forecasts contain 1-hour accumulations and 6-hr accumulation is specified + in ``VX_ASNOW_ACCUMS_HRS``, then groups of 6 successive 1-hour ASNOW values in the forecasts are + added to obtain 6-hour values. In rocoto, the tasks under this metatask are named + ``run_MET_PcpCombine_ASNOW{accum_intvl}h_fcst_mem{mem_indx}``, where ``{accum_intvl}`` and + ``{mem_indx}`` are the accumulation interval (in hours, e.g. ``06``, ``24``, etc) and the ensemble + forecast member index (or just ``000`` for a single deterministic forecast) for which the task is + being run. This metatask is included in the workflow only if ``'ASNOW'`` is included in + ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GridStat_APCP_all_accums_all_mems` (``verify_det.yaml``) + - Set of tasks that run grid-to-grid verification of accumulated precipitation (represented by the + verification field group ``APCP``) for the intervals specified in ``VX_APCP_ACCUMS_HRS``. In rocoto, + the tasks under this metatask are named ``run_MET_GridStat_vx_APCP{accum_intvl}h_mem{mem_indx}``, + where ``{accum_intvl}`` and ``{mem_indx}`` are the accumulation interval (in hours, e.g. ``01``, + ``03``, ``06``, etc) and the ensemble forecast member index (or just ``000`` for a single deterministic + forecast) for which the task is being run. This metatask is included in the workflow only if ``'APCP'`` + is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GridStat_ASNOW_all_accums_all_mems` (``verify_det.yaml``) + - Set of tasks that run grid-to-grid verification of accumulated snowfall (represented by the verification + field group ``ASNOW``) for the intervals specified in ``VX_ASNOW_ACCUMS_HRS``. In rocoto, the tasks under + this metatask are named ``run_MET_GridStat_vx_ASNOW{accum_intvl}h_mem{mem_indx}``, where ``{accum_intvl}`` + and ``{mem_indx}`` are the accumulation interval (in hours, e.g. ``06``, ``24``, etc) and the ensemble + forecast member index (or just ``000`` for a single deterministic forecast) for which the task is being + run. This metatask is included in the workflow only if ``'ASNOW'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GridStat_REFC_RETOP_all_mems` (``verify_det.yaml``) + - Set of tasks that run grid-to-grid verification of :term:`composite reflectivity` (represented by + the verification field group ``REFC``) and :term:`echo top` (represented by the verification field + group ``RETOP``). In rocoto, the tasks under this metatask are named ``run_MET_GridStat_vx_{field_group}_mem{mem_indx}``, + where ``field_group`` and ``{mem_indx}`` are the field group (in this case either ``REFC`` or ``RETOP``) + and the ensemble forecast member index (or just ``000`` for a single deterministic forecast) for which + the task is being run. The tasks for ``REFC`` are included in the workflow only if ``'REFC'`` is + included in ``VX_FIELD_GROUPS``, and the ones for ``RETOP`` are included only if ``'RETOP'`` is included + in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_PointStat_SFC_UPA_all_mems` (``verify_det.yaml``) + - Set of tasks that run grid-to-point verification of surface fields (represented by the verification field + group ``SFC``) and upper-air fields (represented by the verification field group ``UPA``). In rocoto, + the tasks under this metatask are named ``run_MET_PointStat_vx_{field_group}_mem{mem_indx}``, where + ``field_group`` and ``{mem_indx}`` are the field group (in this case either ``SFC`` or ``UPA``) and the + ensemble forecast member index (or just ``000`` for a single deterministic forecast) for which the task + is being run. The tasks for the surface fields are included in the workflow only if ``'SFC'`` is included + in ``VX_FIELD_GROUPS``, and the ones for the upper-air fields are included only if ``'UPA'`` is included + in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_APCP_all_accums` (``verify_ens.yaml``) + - Set of tasks that run :term:`MET`'s ``GenEnsProd`` and ``EnsembleStat`` tools on APCP for the intervals + specified in ``VX_APCP_ACCUMS_HRS``. In rocoto, the tasks under this metatask that run ``GenEnsProd`` + are named ``run_MET_GenEnsProd_vx_APCP{accum_intvl}h``, and the ones that run `EnsembleStat`` are + named ``run_MET_EnsembleStat_vx_APCP{accum_intvl}h``, where ``{accum_intvl}`` is the accumulation + interval (in hours, e.g. ``01``, ``03``, ``06``, etc) for which the tasks are being run. This metatask + is included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'APCP'`` + is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_ASNOW_all_accums` (``verify_ens.yaml``) + - Set of tasks that run :term:`MET`'s ``GenEnsProd`` and ``EnsembleStat`` tools on ASNOW for the intervals + specified in ``VX_ASNOW_ACCUMS_HRS``. In rocoto, the tasks under this metatask that run ``GenEnsProd`` + are named ``run_MET_GenEnsProd_vx_ASNOW{accum_intvl}h`` and the ones that run `EnsembleStat`` are + named ``run_MET_EnsembleStat_vx_ASNOW{accum_intvl}h``, where ``{accum_intvl}`` is the accumulation + interval (in hours, e.g. ``06``, ``24``, etc) for which the tasks are being run. This metatask will be + included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'ASNOW'`` + is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_REFC_RETOP` (``verify_ens.yaml``) + - Set of tasks that run :term:`MET`'s ``GenEnsProd`` and ``EnsembleStat`` tools on REFC (:term:`composite + reflectivity`) and RETOP (:term:`echo top`). In rocoto, the tasks under this metatask that run + ``GenEnsProd`` are named ``run_MET_GenEnsProd_vx_{field_group}``, and the ones that run `EnsembleStat`` + are named ``run_MET_EnsembleStat_vx_{field_group}``, where ``{field_group}`` is the field group (in + this case either ``REFC`` or ``RETOP``) for which the tasks are being run. The tasks for ``REFC`` are + included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'REFC'`` + is included in ``VX_FIELD_GROUPS``, and the ones for ``RETOP`` are included only if ``DO_ENSEMBLE`` is + set to ``True`` in ``config.yaml`` and ``'RETOP'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_SFC_UPA` (``verify_ens.yaml``) + - Set of tasks that run :term:`MET`'s ``GenEnsProd`` and ``EnsembleStat`` tools on surface fields (represented + by the verification field group ``SFC``) and upper-air fields (represented by the verification field group + ``UPA``). In rocoto, the tasks under this metatask that run ``GenEnsProd`` are named ``run_MET_GenEnsProd_vx_{field_group}``, + and the ones that run `EnsembleStat`` are named ``run_MET_EnsembleStat_vx_{field_group}``, where ``{field_group}`` + is the field group (in this case either ``SFC`` or ``UPA``) for which the tasks are being run. The tasks for + ``SFC`` are included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'SFC'`` + is included in ``VX_FIELD_GROUPS``, and the ones for ``UPA`` are included only if ``DO_ENSEMBLE`` is set to + ``True`` in ``config.yaml`` and ``'UPA'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GridStat_ensmeanprob_APCP_all_accums` (``verify_ens.yaml``) + - Set of tasks that run grid-to-grid verification of the ensemble mean of APCP and grid-to-grid probabilistic + verification of the ensemble of APCP forecasts as a whole. In rocoto, the tasks under this metatask for + ensemble mean verification are named ``run_MET_GridStat_vx_ensmean_APCP{accum_intvl}h``, and the ones for + ensemble probabilistic verification are named ``run_MET_GridStat_vx_ensprob_APCP{accum_intvl}h``, where + ``{accum_intvl}`` is the accumulation interval (in hours, e.g. ``01``, ``03``, ``06``, etc) for which the + tasks are being run. This metatask is included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` + in ``config.yaml`` and ``'APCP'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GridStat_ensmeanprob_ASNOW_all_accums` (``verify_ens.yaml``) + - Set of tasks that run grid-to-grid verification of the ensemble mean of ASNOW and grid-to-grid probabilistic + verification of the ensemble of ASNOW forecasts as a whole. In rocoto, the tasks under this metatask for + ensemble mean verification are named ``run_MET_GridStat_vx_ensmean_ASNOW{accum_intvl}h``, and the ones for + ensemble probabilistic verification are named ``run_MET_GridStat_vx_ensprob_ASNOW{accum_intvl}h``, where + ``{accum_intvl}`` is the accumulation interval (in hours, e.g. ``01``, ``03``, ``06``, etc) for which the + tasks are being run. These tasks will be included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` + in ``config.yaml`` and ``'ASNOW'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GridStat_ensprob_REFC_RETOP` (``verify_ens.yaml``) + - Set of tasks that run grid-to-grid probabilistic verification of the ensemble of :term:`composite reflectivity` + (represented by the verification field group ``REFC``) and :term:`echo top` (represented by the field group + ``RETOP``). (Note that there is no grid-to-grid verification of the ensemble mean of these quantities.) + In rocoto, the tasks under this metatask are named ``run_MET_GridStat_vx_ensprob_{field_group}``, where + ``{field_group}`` is the field group (in this case either ``REFC`` or ``RETOP``) for which the task is + being run. The task for ``REFC`` is included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` + in ``config.yaml`` and ``'REFC'`` is included in ``VX_FIELD_GROUPS``, and the one for ``RETOP`` is included + only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'RETOP'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_PointStat_ensmeanprob_SFC_UPA` (``verify_ens.yaml``) + - Set of tasks that run grid-to-grid verification of the ensemble mean of surface fields (represented by the + verification field group ``SFC``) and upper-air fields (represented by the verification field group ``UPA``) + as well as grid-to-grid probabilistic verification of the ensemble of the surface and upper-air field + forecasts as a whole. In rocoto, the tasks under this metatask for ensemble mean verification are named + ``run_MET_PointStat_vx_ensmean_{field_group}``, and the ones for ensemble probabilistic verification are + named ``run_MET_PointStat_vx_ensprob_{field_group}``, where ``{field_group}`` is the field group (in this + case either ``SFC`` or ``UPA``) on which the task is being run. The tasks for ``SFC`` are included in the + workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'SFC'`` is included in + ``VX_FIELD_GROUPS``, and the ones for ``UPA`` are included only if ``DO_ENSEMBLE`` is set to ``True`` in + ``config.yaml`` and ``'UPA'`` is included in ``VX_FIELD_GROUPS``. + .. _Run: @@ -1181,7 +1341,7 @@ Each task should finish with error code 0. For example: End exregional_get_extrn_mdl_files.sh at Wed Nov 16 18:08:19 UTC 2022 with error code 0 (time elapsed: 00:00:01) -Check the batch script output file in your experiment directory for a “SUCCESS” message near the end of the file. +Check the batch script output file in your experiment directory for a "SUCCESS" message near the end of the file. .. _RegionalWflowTasks: diff --git a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst index 8689fdda8e..231dc49dd5 100644 --- a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -1611,7 +1611,7 @@ Non-default parameters for verification tasks are set in the ``verification:`` s General VX Parameters --------------------------------- -``VX_FIELD_GROUPS``: (Default: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA" ]) +``VX_FIELD_GROUPS``: (Default: [ "APCP", "REFC", "RETOP", "SFC", "UPA" ]) The groups of fields (some of which may consist of only a single field) on which to run verification. @@ -1620,7 +1620,7 @@ General VX Parameters HPSS for retrospective cases before March 2020, by default ``ASNOW`` is not included ``VX_FIELD_GROUPS``, but it may be added to this list in order to include the verification tasks for ``ASNOW`` in the workflow. Valid values: - ``"APCP"`` | ``"ASNOW"`` | ``"REFC"`` | ``"RETOP"`` | ``"ADPSFC"`` | ``"ADPUPA"`` + ``"APCP"`` | ``"ASNOW"`` | ``"REFC"`` | ``"RETOP"`` | ``"SFC"`` | ``"UPA"`` ``VX_APCP_ACCUMS_HRS``: (Default: [ 1, 3, 6, 24 ]) The accumulation intervals (in hours) to include in the verification of @@ -1716,7 +1716,7 @@ VX Parameters for Observations ``OBS_NDAS_FN_TEMPLATES``: .. code-block:: console - [ 'ADPSFCandADPUPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] + [ 'SFCandUPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] File name templates for various obs types. These are meant to be used in METplus configuration files and thus contain METplus time formatting @@ -1843,11 +1843,12 @@ VX Parameters for Observations NOHRSC observations. These files will contain observed accumulated snowfall for various accumulaton intervals. -``OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT``: (Default: ``'${OBS_NDAS_FN_TEMPLATES[1]}.nc'``) +``OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT``: (Default: ``'${OBS_NDAS_FN_TEMPLATES[1]}.nc'``) METplus template for the names of the NetCDF files generated by the - worfklow verification tasks that call METplus's Pb2nc tool on NDAS - observations. These files will contain the observed ADPSFC or ADPUPA - fields in NetCDF format (instead of NDAS's native prepbufr format). + worfklow verification tasks that call METplus's Pb2nc tool on the + prepbufr files in NDAS observations. These files will contain the + observed surface (SFC) and upper-air (UPA) fields in NetCDF format + (instead of NDAS's native prepbufr format). ``NUM_MISSING_OBS_FILES_MAX``: (Default: 2) For verification tasks that need observational data, this specifies diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index f416ce7974..c090ea8b0c 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -21,10 +21,10 @@ default_task_verify_det: &default_task_verify_det queue: '&QUEUE_DEFAULT;' walltime: 00:30:00 -metatask_GridStat_CCPA_all_accums_all_mems: +metatask_GridStat_APCP_all_accums_all_mems: var: ACCUM_HH: '{% for ah in verification.VX_APCP_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - metatask_GridStat_CCPA_APCP#ACCUM_HH#h_all_mems: + metatask_GridStat_APCP#ACCUM_HH#h_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' task_run_MET_GridStat_vx_APCP#ACCUM_HH#h_mem#mem#: @@ -36,7 +36,7 @@ metatask_GridStat_CCPA_all_accums_all_mems: envars: <<: *default_vars OBS_DIR: '&CCPA_OBS_DIR;' - VAR: APCP + FIELD_GROUP: 'APCP' ACCUM_HH: '#ACCUM_HH#' METPLUSTOOLNAME: 'GRIDSTAT' OBTYPE: 'CCPA' @@ -50,15 +50,15 @@ metatask_GridStat_CCPA_all_accums_all_mems: and: taskdep_pcpcombine_obs: attrs: - task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h_CCPA + task: run_MET_PcpCombine_APCP#ACCUM_HH#h_obs_CCPA taskdep_pcpcombine_fcst: attrs: - task: run_MET_PcpCombine_fcst_APCP#ACCUM_HH#h_mem#mem# + task: run_MET_PcpCombine_APCP#ACCUM_HH#h_fcst_mem#mem# -metatask_GridStat_NOHRSC_all_accums_all_mems: +metatask_GridStat_ASNOW_all_accums_all_mems: var: ACCUM_HH: '{% for ah in verification.VX_ASNOW_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - metatask_GridStat_NOHRSC_ASNOW#ACCUM_HH#h_all_mems: + metatask_GridStat_ASNOW#ACCUM_HH#h_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' task_run_MET_GridStat_vx_ASNOW#ACCUM_HH#h_mem#mem#: @@ -70,7 +70,7 @@ metatask_GridStat_NOHRSC_all_accums_all_mems: envars: <<: *default_vars OBS_DIR: '&NOHRSC_OBS_DIR;' - VAR: ASNOW + FIELD_GROUP: 'ASNOW' ACCUM_HH: '#ACCUM_HH#' METPLUSTOOLNAME: 'GRIDSTAT' OBTYPE: 'NOHRSC' @@ -84,24 +84,24 @@ metatask_GridStat_NOHRSC_all_accums_all_mems: and: taskdep_pcpcombine_obs: attrs: - task: run_MET_PcpCombine_obs_ASNOW#ACCUM_HH#h_NOHRSC + task: run_MET_PcpCombine_ASNOW#ACCUM_HH#h_obs_NOHRSC taskdep_pcpcombine_fcst: attrs: - task: run_MET_PcpCombine_fcst_ASNOW#ACCUM_HH#h_mem#mem# + task: run_MET_PcpCombine_ASNOW#ACCUM_HH#h_fcst_mem#mem# -metatask_GridStat_MRMS_all_mems: +metatask_GridStat_REFC_RETOP_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' - metatask_GridStat_MRMS_mem#mem#: + metatask_GridStat_REFC_RETOP_mem#mem#: var: - VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_GridStat_vx_#VAR#_mem#mem#: + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + task_run_MET_GridStat_vx_#FIELD_GROUP#_mem#mem#: <<: *default_task_verify_det command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX"' envars: <<: *default_vars OBS_DIR: '&MRMS_OBS_DIR;' - VAR: '#VAR#' + FIELD_GROUP: '#FIELD_GROUP#' METPLUSTOOLNAME: 'GRIDSTAT' OBTYPE: 'MRMS' ENSMEM_INDX: "#mem#" @@ -136,19 +136,19 @@ metatask_GridStat_MRMS_all_mems: age: 00:00:00:30 text: !cycstr '{{ workflow.EXPTDIR }}/@Y@m@d@H/post_files_exist_mem#mem#.txt' -metatask_PointStat_NDAS_all_mems: +metatask_PointStat_SFC_UPA_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' - metatask_PointStat_NDAS_mem#mem#: + metatask_PointStat_SFC_UPA_mem#mem#: var: - VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_PointStat_vx_#VAR#_mem#mem#: + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["SFC", "UPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + task_run_MET_PointStat_vx_#FIELD_GROUP#_mem#mem#: <<: *default_task_verify_det command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX"' envars: <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' - VAR: '#VAR#' + FIELD_GROUP: '#FIELD_GROUP#' METPLUSTOOLNAME: 'POINTSTAT' OBTYPE: 'NDAS' ENSMEM_INDX: "#mem#" diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 8aed2d02b3..63d5392af1 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -21,7 +21,7 @@ default_task_verify_ens: &default_task_verify_ens queue: '&QUEUE_DEFAULT;' walltime: 01:00:00 -metatask_GenEnsProd_EnsembleStat_CCPA: +metatask_GenEnsProd_EnsembleStat_APCP_all_accums: var: ACCUM_HH: '{% for ah in verification.VX_APCP_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' task_run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h: &task_GenEnsProd_CCPA @@ -31,7 +31,7 @@ metatask_GenEnsProd_EnsembleStat_CCPA: <<: *default_vars ACCUM_HH: '#ACCUM_HH#' OBS_DIR: '&CCPA_OBS_DIR;' - VAR: APCP + FIELD_GROUP: 'APCP' METPLUSTOOLNAME: 'GENENSPROD' OBTYPE: 'CCPA' FCST_LEVEL: 'A#ACCUM_HH#' @@ -39,7 +39,7 @@ metatask_GenEnsProd_EnsembleStat_CCPA: dependency: metataskdep_pcpcombine_fcst: attrs: - metatask: PcpCombine_fcst_APCP#ACCUM_HH#h_all_mems + metatask: PcpCombine_APCP#ACCUM_HH#h_fcst_all_mems task_run_MET_EnsembleStat_vx_APCP#ACCUM_HH#h: <<: *task_GenEnsProd_CCPA envars: @@ -50,12 +50,12 @@ metatask_GenEnsProd_EnsembleStat_CCPA: and: taskdep_pcpcombine_obs_ccpa: &taskdep_pcpcombine_obs_ccpa attrs: - task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h_CCPA + task: run_MET_PcpCombine_APCP#ACCUM_HH#h_obs_CCPA taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h -metatask_GenEnsProd_EnsembleStat_NOHRSC: +metatask_GenEnsProd_EnsembleStat_ASNOW_all_accums: var: ACCUM_HH: '{% for ah in verification.VX_ASNOW_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' task_run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h: &task_GenEnsProd_NOHRSC @@ -65,7 +65,7 @@ metatask_GenEnsProd_EnsembleStat_NOHRSC: <<: *default_vars ACCUM_HH: '#ACCUM_HH#' OBS_DIR: '&NOHRSC_OBS_DIR;' - VAR: ASNOW + FIELD_GROUP: 'ASNOW' METPLUSTOOLNAME: 'GENENSPROD' OBTYPE: 'NOHRSC' FCST_LEVEL: 'A#ACCUM_HH#' @@ -74,7 +74,7 @@ metatask_GenEnsProd_EnsembleStat_NOHRSC: and: metataskdep_pcpcombine_fcst: attrs: - metatask: PcpCombine_fcst_ASNOW#ACCUM_HH#h_all_mems + metatask: PcpCombine_ASNOW#ACCUM_HH#h_fcst_all_mems task_run_MET_EnsembleStat_vx_ASNOW#ACCUM_HH#h: <<: *task_GenEnsProd_NOHRSC envars: @@ -85,22 +85,22 @@ metatask_GenEnsProd_EnsembleStat_NOHRSC: and: taskdep_pcpcombine_obs_nohrsc: &taskdep_pcpcombine_obs_nohrsc attrs: - task: run_MET_PcpCombine_obs_ASNOW#ACCUM_HH#h_NOHRSC + task: run_MET_PcpCombine_ASNOW#ACCUM_HH#h_obs_NOHRSC taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h -metatask_GenEnsProd_EnsembleStat_MRMS: +metatask_GenEnsProd_EnsembleStat_REFC_RETOP: var: - VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_GenEnsProd_vx_#VAR#: &task_GenEnsProd_MRMS + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + task_run_MET_GenEnsProd_vx_#FIELD_GROUP#: &task_GenEnsProd_MRMS <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GENENSPROD_OR_ENSEMBLESTAT"' envars: &envars_GenEnsProd_MRMS <<: *default_vars ACCUM_HH: '01' OBS_DIR: '&MRMS_OBS_DIR;' - VAR: '#VAR#' + FIELD_GROUP: '#FIELD_GROUP#' METPLUSTOOLNAME: 'GENENSPROD' OBTYPE: 'MRMS' FCST_LEVEL: 'L0' @@ -110,7 +110,7 @@ metatask_GenEnsProd_EnsembleStat_MRMS: metataskdep_check_post_output: &check_post_output attrs: metatask: check_post_output_all_mems - task_run_MET_EnsembleStat_vx_#VAR#: + task_run_MET_EnsembleStat_vx_#FIELD_GROUP#: <<: *task_GenEnsProd_MRMS envars: <<: *envars_GenEnsProd_MRMS @@ -140,18 +140,18 @@ metatask_GenEnsProd_EnsembleStat_MRMS: {%- endfor %}' taskdep_genensprod: attrs: - task: run_MET_GenEnsProd_vx_#VAR# + task: run_MET_GenEnsProd_vx_#FIELD_GROUP# -metatask_GenEnsProd_EnsembleStat_NDAS: +metatask_GenEnsProd_EnsembleStat_SFC_UPA: var: - VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_GenEnsProd_vx_#VAR#: &task_GenEnsProd_NDAS + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + task_run_MET_GenEnsProd_vx_#FIELD_GROUP#: &task_GenEnsProd_NDAS <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GENENSPROD_OR_ENSEMBLESTAT"' envars: &envars_GenEnsProd_NDAS <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' - VAR: '#VAR#' + FIELD_GROUP: '#FIELD_GROUP#' METPLUSTOOLNAME: 'GENENSPROD' OBTYPE: 'NDAS' ACCUM_HH: '01' @@ -161,7 +161,7 @@ metatask_GenEnsProd_EnsembleStat_NDAS: dependency: metataskdep_check_post_output: <<: *check_post_output - task_run_MET_EnsembleStat_vx_#VAR#: + task_run_MET_EnsembleStat_vx_#FIELD_GROUP#: <<: *task_GenEnsProd_NDAS envars: <<: *envars_GenEnsProd_NDAS @@ -190,13 +190,13 @@ metatask_GenEnsProd_EnsembleStat_NDAS: {%- endfor %}' taskdep_genensprod: attrs: - task: run_MET_GenEnsProd_vx_#VAR# + task: run_MET_GenEnsProd_vx_#FIELD_GROUP# -metatask_GridStat_CCPA_ensmeanprob_all_accums: +metatask_GridStat_ensmeanprob_APCP_all_accums: var: stat: MEAN PROB statlc: mean prob - metatask_GridStat_CCPA_ens#statlc#_all_accums: + metatask_GridStat_ens#statlc#_APCP_all_accums: var: ACCUM_HH: '{% for ah in verification.VX_APCP_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' task_run_MET_GridStat_vx_ens#statlc#_APCP#ACCUM_HH#h: @@ -205,7 +205,7 @@ metatask_GridStat_CCPA_ensmeanprob_all_accums: envars: <<: *default_vars OBS_DIR: '&CCPA_OBS_DIR;' - VAR: APCP + FIELD_GROUP: 'APCP' METPLUSTOOLNAME: 'GRIDSTAT' OBTYPE: 'CCPA' ACCUM_HH: '#ACCUM_HH#' @@ -219,11 +219,11 @@ metatask_GridStat_CCPA_ensmeanprob_all_accums: attrs: task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h -metatask_GridStat_NOHRSC_ensmeanprob_all_accums: +metatask_GridStat_ensmeanprob_ASNOW_all_accums: var: stat: MEAN PROB statlc: mean prob - metatask_GridStat_NOHRSC_ens#statlc#_all_accums: + metatask_GridStat_ens#statlc#_ASNOW_all_accums: var: ACCUM_HH: '{% for ah in verification.VX_ASNOW_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' task_run_MET_GridStat_vx_ens#statlc#_ASNOW#ACCUM_HH#h: @@ -232,7 +232,7 @@ metatask_GridStat_NOHRSC_ensmeanprob_all_accums: envars: <<: *default_vars OBS_DIR: '&NOHRSC_OBS_DIR;' - VAR: ASNOW + FIELD_GROUP: 'ASNOW' METPLUSTOOLNAME: 'GRIDSTAT' OBTYPE: 'NOHRSC' ACCUM_HH: '#ACCUM_HH#' @@ -246,17 +246,17 @@ metatask_GridStat_NOHRSC_ensmeanprob_all_accums: attrs: task: run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h -metatask_GridStat_MRMS_ensprob: +metatask_GridStat_ensprob_REFC_RETOP: var: - VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_GridStat_vx_ensprob_#VAR#: + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + task_run_MET_GridStat_vx_ensprob_#FIELD_GROUP#: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENSPROB"' envars: <<: *default_vars ACCUM_HH: '01' OBS_DIR: '&MRMS_OBS_DIR;' - VAR: '#VAR#' + FIELD_GROUP: '#FIELD_GROUP#' METPLUSTOOLNAME: 'GRIDSTAT' OBTYPE: 'MRMS' FCST_LEVEL: 'L0' @@ -267,22 +267,22 @@ metatask_GridStat_MRMS_ensprob: <<: *all_get_obs_mrms_complete taskdep_genensprod: attrs: - task: run_MET_GenEnsProd_vx_#VAR# + task: run_MET_GenEnsProd_vx_#FIELD_GROUP# -metatask_PointStat_NDAS_ensmeanprob: +metatask_PointStat_ensmeanprob_SFC_UPA: var: stat: MEAN PROB statlc: mean prob - metatask_PointStat_NDAS_ens#statlc#: + metatask_PointStat_ens#statlc#_SFC_UPA: var: - VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_PointStat_vx_ens#statlc#_#VAR#: + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + task_run_MET_PointStat_vx_ens#statlc#_#FIELD_GROUP#: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENS#stat#"' envars: <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' - VAR: '#VAR#' + FIELD_GROUP: '#FIELD_GROUP#' METPLUSTOOLNAME: 'POINTSTAT' OBTYPE: 'NDAS' ACCUM_HH: '01' @@ -294,4 +294,4 @@ metatask_PointStat_NDAS_ensmeanprob: <<: *all_pb2nc_obs_ndas_complete taskdep_genensprod: attrs: - task: run_MET_GenEnsProd_vx_#VAR# + task: run_MET_GenEnsProd_vx_#FIELD_GROUP# diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index d5ce7885e2..2b86772565 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -79,7 +79,7 @@ task_run_MET_Pb2nc_obs_NDAS: command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PB2NC_OBS"' envars: <<: *default_vars - VAR: ADPSFC + FIELD_GROUP: 'SFC' ACCUM_HH: '01' FCST_OR_OBS: OBS OBTYPE: NDAS @@ -98,10 +98,10 @@ task_run_MET_Pb2nc_obs_NDAS: attrs: task: get_obs_ndas -metatask_PcpCombine_obs_APCP_all_accums_CCPA: +metatask_PcpCombine_APCP_all_accums_obs_CCPA: var: ACCUM_HH: '{% for ah in verification.VX_APCP_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - task_run_MET_PcpCombine_obs_APCP#ACCUM_HH#h_CCPA: + task_run_MET_PcpCombine_APCP#ACCUM_HH#h_obs_CCPA: <<: *default_task_verify_pre attrs: cycledefs: forecast @@ -109,7 +109,7 @@ metatask_PcpCombine_obs_APCP_all_accums_CCPA: command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PCPCOMBINE"' envars: <<: *default_vars - VAR: APCP + FIELD_GROUP: 'APCP' ACCUM_HH: '#ACCUM_HH#' FCST_OR_OBS: OBS OBTYPE: CCPA @@ -140,10 +140,10 @@ metatask_PcpCombine_obs_APCP_all_accums_CCPA: {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} {%- endfor %}' -metatask_PcpCombine_obs_ASNOW_all_accums_NOHRSC: +metatask_PcpCombine_ASNOW_all_accums_obs_NOHRSC: var: ACCUM_HH: '{% for ah in verification.VX_ASNOW_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - task_run_MET_PcpCombine_obs_ASNOW#ACCUM_HH#h_NOHRSC: + task_run_MET_PcpCombine_ASNOW#ACCUM_HH#h_obs_NOHRSC: <<: *default_task_verify_pre attrs: cycledefs: forecast @@ -151,7 +151,7 @@ metatask_PcpCombine_obs_ASNOW_all_accums_NOHRSC: command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PCPCOMBINE"' envars: <<: *default_vars - VAR: ASNOW + FIELD_GROUP: 'ASNOW' ACCUM_HH: '#ACCUM_HH#' FCST_OR_OBS: OBS OBTYPE: NOHRSC @@ -193,7 +193,6 @@ metatask_check_post_output_all_mems: command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_CHECK_POST_OUTPUT"' envars: <<: *default_vars - VAR: APCP ENSMEM_INDX: '#mem#' dependency: # This "or" checks that the necessary stand-alone post tasks or forecast @@ -232,7 +231,15 @@ metatask_check_post_output_all_mems: # metatask: run_post_mem#mem#_all_fhrs taskdep: attrs: - task: '{% for h in range(0, workflow.LONG_FCST_LEN+1) %}{% if h > 0 %}{{" \n"}}{% endif %}{%- endfor -%}' + task: '{%- for h in range(0, workflow.LONG_FCST_LEN+1) %} + {%- if h > 0 %} + {{- " \n" }} + {%- endif %} + {%- endfor %}' # This "and" is to check whether post is being run inline (i.e. as part of # the weather model), and if so, to ensure that the forecast task for the # current member has completed. @@ -259,13 +266,13 @@ metatask_check_post_output_all_mems: taskvalid: <<: *fcst_task_exists -metatask_PcpCombine_fcst_APCP_all_accums_all_mems: +metatask_PcpCombine_APCP_all_accums_all_mems: var: ACCUM_HH: '{% for ah in verification.VX_APCP_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - metatask_PcpCombine_fcst_APCP#ACCUM_HH#h_all_mems: + metatask_PcpCombine_APCP#ACCUM_HH#h_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' - task_run_MET_PcpCombine_fcst_APCP#ACCUM_HH#h_mem#mem#: + task_run_MET_PcpCombine_APCP#ACCUM_HH#h_fcst_mem#mem#: <<: *default_task_verify_pre attrs: cycledefs: forecast @@ -273,7 +280,7 @@ metatask_PcpCombine_fcst_APCP_all_accums_all_mems: command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PCPCOMBINE"' envars: <<: *default_vars - VAR: APCP + FIELD_GROUP: 'APCP' ACCUM_HH: '#ACCUM_HH#' FCST_OR_OBS: FCST OBTYPE: CCPA @@ -287,13 +294,13 @@ metatask_PcpCombine_fcst_APCP_all_accums_all_mems: text: !cycstr '{{ workflow.EXPTDIR }}/@Y@m@d@H/post_files_exist_mem#mem#.txt' walltime: 00:30:00 -metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems: +metatask_PcpCombine_ASNOW_all_accums_all_mems: var: ACCUM_HH: '{% for ah in verification.VX_ASNOW_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - metatask_PcpCombine_fcst_ASNOW#ACCUM_HH#h_all_mems: + metatask_PcpCombine_ASNOW#ACCUM_HH#h_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' - task_run_MET_PcpCombine_fcst_ASNOW#ACCUM_HH#h_mem#mem#: + task_run_MET_PcpCombine_ASNOW#ACCUM_HH#h_fcst_mem#mem#: <<: *default_task_verify_pre attrs: cycledefs: forecast @@ -301,7 +308,7 @@ metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems: command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PCPCOMBINE"' envars: <<: *default_vars - VAR: ASNOW + FIELD_GROUP: 'ASNOW' ACCUM_HH: '#ACCUM_HH#' FCST_OR_OBS: FCST OBTYPE: NOHRSC diff --git a/scripts/exregional_check_post_output.sh b/scripts/exregional_check_post_output.sh index 2a66a2fecf..4d5836519c 100755 --- a/scripts/exregional_check_post_output.sh +++ b/scripts/exregional_check_post_output.sh @@ -11,7 +11,6 @@ # CDATE # ENSMEM_INDX # GLOBAL_VAR_DEFNS_FP -# VAR # METPLUS_ROOT (used by ush/set_leadhrs.py) # # Experiment variables diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 934ba63283..d02adddf77 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -107,7 +107,7 @@ FIELDNAME_IN_MET_FILEDIR_NAMES="" set_vx_params \ obtype="${OBTYPE}" \ - field="$VAR" \ + field_group="${FIELD_GROUP}" \ accum_hh="${ACCUM_HH}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ @@ -158,7 +158,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" fi @@ -183,7 +183,7 @@ for (( i=0; i<${NUM_ENS_MEMBERS}; i++ )); do time_lag=$( bc -l <<< "${ENS_TIME_LAG_HRS[$i]}*${SECS_PER_HOUR}" ) - if [ "${VAR}" = "APCP" ] || [ "${VAR}" = "ASNOW" ]; then + if [ "${FIELD_GROUP}" = "APCP" ] || [ "${FIELD_GROUP}" = "ASNOW" ]; then template="${cdate_ensmem_subdir_or_null:+${cdate_ensmem_subdir_or_null}/}metprd/PcpCombine_fcst/${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" else template="${FCST_SUBDIR_TEMPLATE}/${FCST_FN_TEMPLATE}" @@ -387,7 +387,7 @@ settings="\ 'accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' -'input_field_group': '${VAR:-}' +'input_field_group': '${FIELD_GROUP:-}' 'input_level_fcst': '${FCST_LEVEL:-}' 'input_thresh_fcst': '${FCST_THRESH:-}' # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 6200b0ba7e..2641080fed 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -69,8 +69,8 @@ Entering script: \"${scrfunc_fn}\" In directory: \"${scrfunc_dir}\" This is the ex-script for the task that runs the METplus ${MetplusToolName} -tool to perform deterministic verification of the specified field (VAR) -for a single forecast. +tool to perform deterministic verification of the specified field gropup +(FIELD_GROUP) for a single forecast. ========================================================================" # #----------------------------------------------------------------------- @@ -98,7 +98,7 @@ FIELDNAME_IN_MET_FILEDIR_NAMES="" # ADPUPA field groups. set_vx_params \ obtype="${OBTYPE}" \ - field="$VAR" \ + field_group="${FIELD_GROUP}" \ accum_hh="${ACCUM_HH:-}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ @@ -199,7 +199,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" FCST_INPUT_FN_TEMPLATE="${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE}" @@ -378,7 +378,7 @@ settings="\ 'accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' -'input_field_group': '${VAR:-}' +'input_field_group': '${FIELD_GROUP:-}' 'input_level_fcst': '${FCST_LEVEL:-}' 'input_thresh_fcst': '${FCST_THRESH:-}' # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index 0bfcff36d6..424756d72b 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -69,8 +69,8 @@ Entering script: \"${scrfunc_fn}\" In directory: \"${scrfunc_dir}\" This is the ex-script for the task that runs the METplus ${MetplusToolName} -tool to perform verification of the specified field (VAR) on the ensemble -mean. +tool to perform verification of the specified field group (FIELD_GROUP) +on the ensemble mean. ========================================================================" # #----------------------------------------------------------------------- @@ -96,7 +96,7 @@ FIELDNAME_IN_MET_FILEDIR_NAMES="" set_vx_params \ obtype="${OBTYPE}" \ - field="$VAR" \ + field_group="${FIELD_GROUP}" \ accum_hh="${ACCUM_HH}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ @@ -143,7 +143,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/metprd/GenEnsProd" fi @@ -330,7 +330,7 @@ settings="\ 'accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' -'input_field_group': '${VAR:-}' +'input_field_group': '${FIELD_GROUP:-}' 'input_level_fcst': '${FCST_LEVEL:-}' 'input_thresh_fcst': '${FCST_THRESH:-}' # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 0e8d44578c..e6ad107e81 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -69,7 +69,8 @@ Entering script: \"${scrfunc_fn}\" In directory: \"${scrfunc_dir}\" This is the ex-script for the task that runs the METplus ${MetplusToolName} -tool to perform verification of the specified field (VAR) on the ensemble +tool to perform verification of the specified field group (FIELD_GROUP) +on the ensemble frequencies/probabilities. ========================================================================" # @@ -96,7 +97,7 @@ FIELDNAME_IN_MET_FILEDIR_NAMES="" set_vx_params \ obtype="${OBTYPE}" \ - field="$VAR" \ + field_group="${FIELD_GROUP}" \ accum_hh="${ACCUM_HH}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ @@ -142,7 +143,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT}" fi OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_INPUT_FN_TEMPLATE} ) @@ -329,7 +330,7 @@ settings="\ 'accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' -'input_field_group': '${VAR:-}' +'input_field_group': '${FIELD_GROUP:-}' 'input_level_fcst': '${FCST_LEVEL:-}' 'input_thresh_fcst': '${FCST_THRESH:-}' # diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index 3e6631cd1d..d1d055fe66 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -118,7 +118,7 @@ FIELDNAME_IN_MET_FILEDIR_NAMES="" set_vx_params \ obtype="${OBTYPE}" \ - field="$VAR" \ + field_group="ADP${FIELD_GROUP}" \ accum_hh="${ACCUM_HH}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ @@ -140,7 +140,7 @@ OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_FN_TEMPLATES[1]} ) OUTPUT_BASE="${vx_output_basedir}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" -OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT} ) +OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT} ) STAGING_DIR="${OUTPUT_BASE}/stage/${MetplusToolName}_obs" # #----------------------------------------------------------------------- diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index c60ac30e36..23b10f6ff8 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -98,7 +98,7 @@ FIELDNAME_IN_MET_FILEDIR_NAMES="" set_vx_params \ obtype="${OBTYPE}" \ - field="$VAR" \ + field="${FIELD_GROUP}" \ accum_hh="${ACCUM_HH}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ @@ -199,7 +199,7 @@ elif [ "${FCST_OR_OBS}" = "OBS" ]; then OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_obs_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" - fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE_PCPCOMBINE_OUTPUT}) + fn_template=$(eval echo \${OBS_${OBTYPE}_${FIELD_GROUP}_FN_TEMPLATE_PCPCOMBINE_OUTPUT}) OUTPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" @@ -394,7 +394,7 @@ settings="\ 'output_accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' - 'input_field_group': '${VAR:-}' + 'input_field_group': '${FIELD_GROUP:-}' 'input_level_fcst': '${FCST_LEVEL:-}' 'input_thresh_fcst': '${FCST_THRESH:-}' " diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml index b21755f3cf..e5db0cd451 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml @@ -50,7 +50,7 @@ verification: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false # - OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # VX_FCST_MODEL_NAME: 'fcnv2' VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml index 6c330f9ba5..5411b0a34f 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml @@ -50,7 +50,7 @@ verification: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false # - OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # VX_FCST_MODEL_NAME: 'gc' VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml index a354793981..9a088cf468 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml @@ -50,7 +50,7 @@ verification: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false # - OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # VX_FCST_MODEL_NAME: 'pw' VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml index b7ded54bb9..9fbca68833 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml @@ -53,7 +53,7 @@ verification: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false # - OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # VX_FCST_MODEL_NAME: 'gfs' VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_gfs.yaml' diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index fb1ff66142..aa4cdf99e0 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2353,7 +2353,7 @@ verification: # included VX_FIELD_GROUPS, but it may be added to this list in order to # include the verification tasks for ASNOW in the workflow. # - VX_FIELD_GROUPS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] + VX_FIELD_GROUPS: [ "APCP", "REFC", "RETOP", "SFC", "UPA" ] # # VX_APCP_ACCUMS_HRS: # The accumulation intervals (in hours) to include in the verification of @@ -2546,7 +2546,7 @@ verification: {{- "sfav2_CONUS_" ~ obs_avail_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' ] OBS_MRMS_FN_TEMPLATES: [ 'REFC', '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2', 'RETOP', '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' ] - OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] + OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] # # REMOVE_RAW_OBS_DIRS_[CCPA|NOHRSC|MRMS|NDAS]: # Flag specifying whether to remove the "raw" observation directories @@ -2580,13 +2580,14 @@ verification: OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set obs_avail_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} {{- "sfav2_CONUS_" ~ obs_avail_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2_a${ACCUM_HH}h.nc" }}' # - # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: + # OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT: # METplus template for the names of the NetCDF files generated by the - # worfklow verification tasks that call METplus's Pb2nc tool on NDAS - # observations. These files will contain the observed ADPSFC or ADPUPA - # fields in NetCDF format (instead of NDAS's native prepbufr format). + # worfklow verification tasks that call METplus's Pb2nc tool on the + # prepbufr files in NDAS observations. These files will contain the + # observed surface (SFC) and upper-air (UPA) fields in NetCDF format + # (instead of NDAS's native prepbufr format). # - OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_FN_TEMPLATES[1]}.nc' + OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_FN_TEMPLATES[1]}.nc' # # NUM_MISSING_OBS_FILES_MAX: # For verification tasks that need observational data, this specifies diff --git a/ush/set_vx_params.sh b/ush/set_vx_params.sh index 267cd6902f..e17a360c38 100644 --- a/ush/set_vx_params.sh +++ b/ush/set_vx_params.sh @@ -3,9 +3,10 @@ # # This file defines a function that sets various parameters needed when # performing verification. The way these parameters are set depends on -# the field being verified and, if the field is cumulative (e.g. -# accumulated precipitation or snowfall), the accumulation period -# (both of which are inputs to this function). +# the field group being verified and, if the field group consists of a +# set of cumulative fields (e.g. accumulated precipitation or accumulated +# snowfall), the accumulation interval (both of which are inputs to this +# function). # # As of 20220928, the verification tasks in the SRW App workflow use the # MET/METplus software (MET = Model Evaluation Tools) developed at the @@ -54,7 +55,7 @@ function set_vx_params() { # local valid_args=( \ "obtype" \ - "field" \ + "field_group" \ "accum_hh" \ "outvarname_grid_or_point" \ "outvarname_fieldname_in_obs_input" \ @@ -108,15 +109,17 @@ be a 2-digit integer: # # grid_or_point: # String that is set to either "grid" or "point" depending on whether -# the field in consideration has obs that are gridded or point-based. +# obs type containing the field group is gridded or point-based. # # fieldname_in_obs_input: -# String used to search for the field in the input observation files -# read in by MET. +# If the field group represents a single field, this is the string used +# to search for that field in the input observation files read in by MET. +# If not, this is set to a null string. # # fieldname_in_fcst_input: -# String used to search for the field in the input forecast files read -# in by MET. +# If the field group represents a single field, this is the string used +# to search for that field in the input forecast files read in by MET. +# If not, this is set to a null string. # # fieldname_in_MET_output: # String that will be used in naming arrays defined in MET output files @@ -140,21 +143,21 @@ be a 2-digit integer: "CCPA") _grid_or_point_="grid" - case "${field}" in + case "${field_group}" in "APCP") - fieldname_in_obs_input="${field}" - fieldname_in_fcst_input="${field}" - fieldname_in_MET_output="${field}" - fieldname_in_MET_filedir_names="${field}${accum_hh}h" + fieldname_in_obs_input="${field_group}" + fieldname_in_fcst_input="${field_group}" + fieldname_in_MET_output="${field_group}" + fieldname_in_MET_filedir_names="${field_group}${accum_hh}h" ;; *) print_err_msg_exit "\ A method for setting verification parameters has not been specified for -this observation type (obtype) and field (field) combination: +this observation type (obtype) and field group (field_group) combination: obtype = \"${obtype}\" - field = \"${field}\"" + field_group = \"${field_group}\"" ;; esac @@ -163,21 +166,21 @@ this observation type (obtype) and field (field) combination: "NOHRSC") _grid_or_point_="grid" - case "${field}" in + case "${field_group}" in "ASNOW") - fieldname_in_obs_input="${field}" - fieldname_in_fcst_input="${field}" - fieldname_in_MET_output="${field}" - fieldname_in_MET_filedir_names="${field}${accum_hh}h" + fieldname_in_obs_input="${field_group}" + fieldname_in_fcst_input="${field_group}" + fieldname_in_MET_output="${field_group}" + fieldname_in_MET_filedir_names="${field_group}${accum_hh}h" ;; *) print_err_msg_exit "\ A method for setting verification parameters has not been specified for -this observation type (obtype) and field (field) combination: +this observation type (obtype) and field group (field_group) combination: obtype = \"${obtype}\" - field = \"${field}\"" + field_group = \"${field_group}\"" ;; esac @@ -186,28 +189,28 @@ this observation type (obtype) and field (field) combination: "MRMS") _grid_or_point_="grid" - case "${field}" in + case "${field_group}" in "REFC") fieldname_in_obs_input="MergedReflectivityQCComposite" - fieldname_in_fcst_input="${field}" - fieldname_in_MET_output="${field}" - fieldname_in_MET_filedir_names="${field}" + fieldname_in_fcst_input="${field_group}" + fieldname_in_MET_output="${field_group}" + fieldname_in_MET_filedir_names="${field_group}" ;; "RETOP") fieldname_in_obs_input="EchoTop18" - fieldname_in_fcst_input="${field}" - fieldname_in_MET_output="${field}" - fieldname_in_MET_filedir_names="${field}" + fieldname_in_fcst_input="${field_group}" + fieldname_in_MET_output="${field_group}" + fieldname_in_MET_filedir_names="${field_group}" ;; *) print_err_msg_exit "\ A method for setting verification parameters has not been specified for -this observation type (obtype) and field (field) combination: +this observation type (obtype) and field group (field_group) combination: obtype = \"${obtype}\" - field = \"${field}\"" + field_group = \"${field_group}\"" ;; esac @@ -216,28 +219,28 @@ this observation type (obtype) and field (field) combination: "NDAS") _grid_or_point_="point" - case "${field}" in + case "${field_group}" in "ADPSFC") fieldname_in_obs_input="" fieldname_in_fcst_input="" - fieldname_in_MET_output="${field}" - fieldname_in_MET_filedir_names="${field}" + fieldname_in_MET_output="${field_group}" + fieldname_in_MET_filedir_names="${field_group}" ;; "ADPUPA") fieldname_in_obs_input="" fieldname_in_fcst_input="" - fieldname_in_MET_output="${field}" - fieldname_in_MET_filedir_names="${field}" + fieldname_in_MET_output="${field_group}" + fieldname_in_MET_filedir_names="${field_group}" ;; *) print_err_msg_exit "\ A method for setting verification parameters has not been specified for -this observation type (obtype) and field (field) combination: +this observation type (obtype) and field group (field_group) combination: obtype = \"${obtype}\" - field = \"${field}\"" + field_group = \"${field_group}\"" ;; esac diff --git a/ush/setup.py b/ush/setup.py index 01db240362..b9ca7df848 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -737,35 +737,35 @@ def _remove_tag(tasks, tag): vx_field_groups_all_by_obtype["CCPA"] = ["APCP"] vx_metatasks_all_by_obtype["CCPA"] \ = ["task_get_obs_ccpa", - "metatask_PcpCombine_obs_APCP_all_accums_CCPA", - "metatask_PcpCombine_fcst_APCP_all_accums_all_mems", - "metatask_GridStat_CCPA_all_accums_all_mems", - "metatask_GenEnsProd_EnsembleStat_CCPA", - "metatask_GridStat_CCPA_ensmeanprob_all_accums"] + "metatask_PcpCombine_APCP_all_accums_obs_CCPA", + "metatask_PcpCombine_APCP_all_accums_all_mems", + "metatask_GridStat_APCP_all_accums_all_mems", + "metatask_GenEnsProd_EnsembleStat_APCP_all_accums", + "metatask_GridStat_ensmeanprob_APCP_all_accums"] vx_field_groups_all_by_obtype["NOHRSC"] = ["ASNOW"] vx_metatasks_all_by_obtype["NOHRSC"] \ = ["task_get_obs_nohrsc", - "metatask_PcpCombine_obs_ASNOW_all_accums_NOHRSC", - "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", - "metatask_GridStat_NOHRSC_all_accums_all_mems", - "metatask_GenEnsProd_EnsembleStat_NOHRSC", - "metatask_GridStat_NOHRSC_ensmeanprob_all_accums"] + "metatask_PcpCombine_ASNOW_all_accums_obs_NOHRSC", + "metatask_PcpCombine_ASNOW_all_accums_all_mems", + "metatask_GridStat_ASNOW_all_accums_all_mems", + "metatask_GenEnsProd_EnsembleStat_ASNOW_all_accums", + "metatask_GridStat_ensmeanprob_ASNOW_all_accums"] vx_field_groups_all_by_obtype["MRMS"] = ["REFC", "RETOP"] vx_metatasks_all_by_obtype["MRMS"] \ = ["task_get_obs_mrms", - "metatask_GridStat_MRMS_all_mems", - "metatask_GenEnsProd_EnsembleStat_MRMS", - "metatask_GridStat_MRMS_ensprob"] + "metatask_GridStat_REFC_RETOP_all_mems", + "metatask_GenEnsProd_EnsembleStat_REFC_RETOP", + "metatask_GridStat_ensprob_REFC_RETOP"] vx_field_groups_all_by_obtype["NDAS"] = ["ADPSFC", "ADPUPA"] vx_metatasks_all_by_obtype["NDAS"] \ = ["task_get_obs_ndas", "task_run_MET_Pb2nc_obs_NDAS", - "metatask_PointStat_NDAS_all_mems", - "metatask_GenEnsProd_EnsembleStat_NDAS", - "metatask_PointStat_NDAS_ensmeanprob"] + "metatask_PointStat_SFC_UPA_all_mems", + "metatask_GenEnsProd_EnsembleStat_SFC_UPA", + "metatask_PointStat_ensmeanprob_SFC_UPA"] # If there are no field groups specified for verification, remove those # tasks that are common to all observation types. @@ -785,8 +785,8 @@ def _remove_tag(tasks, tag): f""" Removing verification (meta)task "{metatask}" - from workflow since no fields belonging to observation type "{obtype}" - are specified for verification.""" + from workflow since no field groups from observation type "{obtype}" are + specified for verification.""" )) rocoto_config['tasks'].pop(metatask) # diff --git a/ush/valid_param_vals.yaml b/ush/valid_param_vals.yaml index 017404aa2e..16f0aeb9ae 100644 --- a/ush/valid_param_vals.yaml +++ b/ush/valid_param_vals.yaml @@ -76,6 +76,6 @@ valid_vals_DO_AQM_CHEM_LBCS: [True, False] valid_vals_DO_AQM_GEFS_LBCS: [True, False] valid_vals_DO_AQM_SAVE_AIRNOW_HIST: [True, False] valid_vals_COLDSTART: [True, False] -valid_vals_VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] +valid_vals_VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "SFC", "UPA" ] valid_vals_VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] valid_vals_VX_ASNOW_ACCUMS_HRS: [ 6, 12, 18, 24 ] From 522c573aab91a21ff053fc0b2e1c32066c747c2f Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sat, 2 Nov 2024 20:49:15 -0600 Subject: [PATCH 148/208] Changes from the feature/daily_obs_tasks_doc_mods that were accidentally left out. --- doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst | 1 + ush/config.community.yaml | 5 ----- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst index 231dc49dd5..01bc917594 100644 --- a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -1607,6 +1607,7 @@ Non-default parameters for verification tasks are set in the ``verification:`` s * ``mm`` refers to the 2-digit valid minutes of the hour * ``SS`` refers to the two-digit valid seconds of the hour +.. _GeneralVXParams: General VX Parameters --------------------------------- diff --git a/ush/config.community.yaml b/ush/config.community.yaml index f380bd28cc..1ce7fc0108 100644 --- a/ush/config.community.yaml +++ b/ush/config.community.yaml @@ -30,11 +30,6 @@ task_plot_allvars: global: DO_ENSEMBLE: false NUM_ENS_MEMBERS: 2 -verification: - CCPA_OBS_DIR: "" - MRMS_OBS_DIR: "" - NDAS_OBS_DIR: "" - VX_FCST_MODEL_NAME: FV3_GFS_v16_CONUS_25km rocoto: tasks: metatask_run_ensemble: From 2c8b015d5007b39039747fedd350e43dadecefea Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 02:53:15 -0700 Subject: [PATCH 149/208] Move the 4 new WE2E vx tests that were recently added to coverage.jet to the set of coverage tests for Hera since there is no data staged on Jet yet. --- tests/WE2E/machine_suites/coverage.hera.gnu.com | 2 ++ tests/WE2E/machine_suites/coverage.hera.intel.nco | 2 ++ tests/WE2E/machine_suites/coverage.jet | 4 ---- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/WE2E/machine_suites/coverage.hera.gnu.com b/tests/WE2E/machine_suites/coverage.hera.gnu.com index 09dadbaedd..e820e6327e 100644 --- a/tests/WE2E/machine_suites/coverage.hera.gnu.com +++ b/tests/WE2E/machine_suites/coverage.hera.gnu.com @@ -10,6 +10,8 @@ MET_ensemble_verification_only_vx_time_lag 2019_halloween_storm 2020_jan_cold_blast vx-det_long-fcst_custom-vx-config_aiml-fourcastnet +vx-det_long-fcst_custom-vx-config_aiml-panguweather vx-det_long-fcst_custom-vx-config_gfs vx-det_long-fcst_winter-wx_SRW-staged vx-det_multicyc_fcst-overlap_ncep-hrrr +vx-det_multicyc_last-obs-00z_ncep-hrrr diff --git a/tests/WE2E/machine_suites/coverage.hera.intel.nco b/tests/WE2E/machine_suites/coverage.hera.intel.nco index cf8b92b59f..ba90d0f5dc 100644 --- a/tests/WE2E/machine_suites/coverage.hera.intel.nco +++ b/tests/WE2E/machine_suites/coverage.hera.intel.nco @@ -11,6 +11,8 @@ grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16 grid_RRFS_CONUScompact_3km_ics_HRRR_lbcs_RAP_suite_HRRR pregen_grid_orog_sfc_climo vx-det_long-fcst_custom-vx-config_aiml-graphcast +vx-det_multicyc_long-fcst-overlap_nssl-mpas vx-det_multicyc_long-fcst-no-overlap_nssl-mpas vx-det_multicyc_first-obs-00z_ncep-hrrr vx-det_multicyc_no-00z-obs_nssl-mpas +vx-det_multicyc_no-fcst-overlap_ncep-hrrr diff --git a/tests/WE2E/machine_suites/coverage.jet b/tests/WE2E/machine_suites/coverage.jet index 5078e127ef..53308090b1 100644 --- a/tests/WE2E/machine_suites/coverage.jet +++ b/tests/WE2E/machine_suites/coverage.jet @@ -9,7 +9,3 @@ grid_RRFS_AK_3km_ics_FV3GFS_lbcs_FV3GFS_suite_HRRR grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16_plot grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2 grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_RRFS_v1beta -vx-det_long-fcst_custom-vx-config_aiml-panguweather -vx-det_multicyc_long-fcst-overlap_nssl-mpas -vx-det_multicyc_last-obs-00z_ncep-hrrr -vx-det_multicyc_no-fcst-overlap_ncep-hrrr From f13988926c33ddceca456dbc9a1b2b575658f86f Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 02:59:47 -0700 Subject: [PATCH 150/208] Bug fix related to removing "ADP" from "ADPSFC" and "ADPUPA". --- .../config.custom_ESGgrid_Great_Lakes_snow_8km.yaml | 2 +- .../config.MET_ensemble_verification_winter_wx.yaml | 2 +- ...nfig.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml | 2 +- ...config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml | 2 +- ...fig.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml | 2 +- .../config.vx-det_long-fcst_custom-vx-config_gfs.yaml | 2 +- .../config.vx-det_long-fcst_winter-wx_SRW-staged.yaml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml b/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml index a55cc5f91a..ffacb0a8cb 100644 --- a/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml +++ b/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml @@ -60,4 +60,4 @@ verification: NDAS_OBS_DIR: '{{ workflow.EXPTDIR }}/NDAS_obs' NOHRSC_OBS_DIR: '{{ workflow.EXPTDIR }}/NOHRSC_obs' VX_FCST_MODEL_NAME: Michigan_Ontario_snow_8km - VX_FIELD_GROUPS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA", "ASNOW" ] + VX_FIELD_GROUPS: [ "APCP", "REFC", "RETOP", "SFC", "UPA", "ASNOW" ] diff --git a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml index fc6c9f56af..7f761117bb 100644 --- a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml @@ -31,7 +31,7 @@ global: DO_ENSEMBLE: true NUM_ENS_MEMBERS: 10 verification: - VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] + VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "SFC", "UPA" ] OBS_NOHRSC_FN_TEMPLATES: [ 'ASNOW', '{%- set data_intvl_hrs = "%02d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} {{- "{valid?fmt=%Y%m%d}/sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' ] diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml index e5db0cd451..6e6caff5eb 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml @@ -60,4 +60,4 @@ verification: FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' # VX_FCST_OUTPUT_INTVL_HRS: 6 - VX_FIELD_GROUPS: [ "ADPSFC" ] + VX_FIELD_GROUPS: [ "SFC" ] diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml index 5411b0a34f..95b63a3d0c 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml @@ -60,4 +60,4 @@ verification: FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' # VX_FCST_OUTPUT_INTVL_HRS: 6 - VX_FIELD_GROUPS: [ "ADPSFC" ] + VX_FIELD_GROUPS: [ "SFC" ] diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml index 9a088cf468..796042fd81 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml @@ -60,4 +60,4 @@ verification: FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' # VX_FCST_OUTPUT_INTVL_HRS: 6 - VX_FIELD_GROUPS: [ "ADPSFC" ] + VX_FIELD_GROUPS: [ "SFC" ] diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml index 9fbca68833..d755752d5f 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml @@ -63,4 +63,4 @@ verification: FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.pgrb2.0p25.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' # VX_FCST_OUTPUT_INTVL_HRS: 6 - VX_FIELD_GROUPS: [ "ADPSFC" ] + VX_FIELD_GROUPS: [ "SFC" ] diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml index 11eaf7b63c..87b9f44631 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml @@ -58,5 +58,5 @@ verification: REMOVE_RAW_OBS_NOHRSC: false # VX_FCST_MODEL_NAME: 'Michigan_Ontario_snow_8km' - VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] + VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "SFC", "UPA" ] VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/SRW' From 3ef4e810f2ef7ad35e3a57e471b448d7b2983bd5 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 03:50:31 -0700 Subject: [PATCH 151/208] Bug fixes. --- scripts/exregional_run_met_pcpcombine.sh | 2 +- ush/config_defaults.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 23b10f6ff8..9ff0ee5ada 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -98,7 +98,7 @@ FIELDNAME_IN_MET_FILEDIR_NAMES="" set_vx_params \ obtype="${OBTYPE}" \ - field="${FIELD_GROUP}" \ + field_group="${FIELD_GROUP}" \ accum_hh="${ACCUM_HH}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index aa4cdf99e0..10bf82c0a4 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2662,7 +2662,7 @@ verification: {%- if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %} {{- ".${ensmem_name}" }} {%- endif %} - {{- ".prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${VAR}_a${ACCUM_HH}h.nc" }}' + {{- ".prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${FIELD_GROUP}_a${ACCUM_HH}h.nc" }}' # # VX_NDIGITS_ENSMEM_NAMES: # Number of digits to assume/use in the forecast ensemble member identifier From 1fce8276c0ba8bd183aabaa461273bdbde9c9fb1 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 03:50:52 -0700 Subject: [PATCH 152/208] Address Mike K.'s comment. --- ush/eval_metplus_timestr_tmpl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/eval_metplus_timestr_tmpl.py b/ush/eval_metplus_timestr_tmpl.py index edbe0e7012..205fee1593 100644 --- a/ush/eval_metplus_timestr_tmpl.py +++ b/ush/eval_metplus_timestr_tmpl.py @@ -52,7 +52,7 @@ def eval_metplus_timestr_tmpl(init_time, lhr, time_lag, fn_template, verbose=Fal ) parser.add_argument("-v", "--verbose", help="Verbose output", action="store_true") parser.add_argument("-i", "--init_time", help="Initial date in YYYYMMDDHH[mmss] format", type=str, default='') - parser.add_argument("-f", "--lhr", help="Forecast hour", type=int, required=True) + parser.add_argument("-l", "--lhr", help="Lead hour", type=int, required=True) parser.add_argument("-tl", "--time_lag", help="Hours of time lag for a time-lagged ensemble member", type=int, default=0) parser.add_argument("-ft", "--fn_template", help="Template for file names to search; see ??? for details on template settings", type=str, default='') From 22cdd37d3a0ef98736d1defcb7fed8375b6917c2 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 14:54:35 -0700 Subject: [PATCH 153/208] Bug fixes related to (meta)task name and variable name changes. --- parm/wflow/verify_ens.yaml | 6 +++--- scripts/exregional_run_met_gridstat_or_pointstat_vx.sh | 4 ++-- .../config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml | 2 +- .../config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml | 2 +- .../config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml | 2 +- ...nfig.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml | 2 +- .../config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml | 2 +- .../config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml | 2 +- .../config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml | 2 +- ush/set_cycle_and_obs_timeinfo.py | 2 +- ush/setup.py | 2 +- 11 files changed, 14 insertions(+), 14 deletions(-) diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 63d5392af1..07444cf757 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -39,7 +39,7 @@ metatask_GenEnsProd_EnsembleStat_APCP_all_accums: dependency: metataskdep_pcpcombine_fcst: attrs: - metatask: PcpCombine_APCP#ACCUM_HH#h_fcst_all_mems + metatask: PcpCombine_APCP#ACCUM_HH#h_all_mems task_run_MET_EnsembleStat_vx_APCP#ACCUM_HH#h: <<: *task_GenEnsProd_CCPA envars: @@ -144,7 +144,7 @@ metatask_GenEnsProd_EnsembleStat_REFC_RETOP: metatask_GenEnsProd_EnsembleStat_SFC_UPA: var: - FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["SFC", "UPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' task_run_MET_GenEnsProd_vx_#FIELD_GROUP#: &task_GenEnsProd_NDAS <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GENENSPROD_OR_ENSEMBLESTAT"' @@ -275,7 +275,7 @@ metatask_PointStat_ensmeanprob_SFC_UPA: statlc: mean prob metatask_PointStat_ens#statlc#_SFC_UPA: var: - FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["SFC", "UPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' task_run_MET_PointStat_vx_ens#statlc#_#FIELD_GROUP#: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENS#stat#"' diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 2641080fed..654983c70e 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -94,8 +94,8 @@ FIELDNAME_IN_FCST_INPUT="" FIELDNAME_IN_MET_OUTPUT="" FIELDNAME_IN_MET_FILEDIR_NAMES="" -# Note that ACCUM_HH will not be defined for the REFC, RETOP, ADPSFC, and -# ADPUPA field groups. +# Note that ACCUM_HH will not be defined for the REFC, RETOP, SFC, and +# UPA field groups. set_vx_params \ obtype="${OBTYPE}" \ field_group="${FIELD_GROUP}" \ diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml index ced46215d0..c7caeec015 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml @@ -58,4 +58,4 @@ verification: VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml index 3ce4ff5f08..834c83d4f4 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml @@ -58,4 +58,4 @@ verification: VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml index a0f10d8b05..ba711145b5 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml @@ -58,4 +58,4 @@ verification: VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml index 429e8e0086..b82dfb5e42 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml @@ -60,4 +60,4 @@ verification: VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml index a7af3f27c9..5cd1b35ac2 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml @@ -59,4 +59,4 @@ verification: VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml index 3264c93eca..5972bfa002 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml @@ -60,4 +60,4 @@ verification: VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml index 97e1393864..f3e18104d2 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml @@ -60,4 +60,4 @@ verification: VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 36c20e126c..31f615d573 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -634,7 +634,7 @@ def get_obs_retrieve_times_by_day( = [{'obtype': 'CCPA', 'time_type': 'cumul', 'field_groups': ['APCP']}, {'obtype': 'NOHRSC', 'time_type': 'cumul', 'field_groups': ['ASNOW']}, {'obtype': 'MRMS', 'time_type': 'inst', 'field_groups': ['REFC', 'RETOP']}, - {'obtype': 'NDAS', 'time_type': 'inst', 'field_groups': ['ADPSFC', 'ADPUPA']} + {'obtype': 'NDAS', 'time_type': 'inst', 'field_groups': ['SFC', 'UPA']} ] # Create new list that has the same form as the list of dictionaries diff --git a/ush/setup.py b/ush/setup.py index b9ca7df848..e19864cee6 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -759,7 +759,7 @@ def _remove_tag(tasks, tag): "metatask_GenEnsProd_EnsembleStat_REFC_RETOP", "metatask_GridStat_ensprob_REFC_RETOP"] - vx_field_groups_all_by_obtype["NDAS"] = ["ADPSFC", "ADPUPA"] + vx_field_groups_all_by_obtype["NDAS"] = ["SFC", "UPA"] vx_metatasks_all_by_obtype["NDAS"] \ = ["task_get_obs_ndas", "task_run_MET_Pb2nc_obs_NDAS", From cc7dcdd838e3abd0775eb16a44084ae3ff3ff6d2 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 17:47:06 -0700 Subject: [PATCH 154/208] Bug fix in METplus conf templates to account for the field group names 'ADPSFC' and 'ADPUPA' being changed to 'SFC' and 'UPA'. --- parm/metplus/EnsembleStat.conf | 32 ++++++++++++------------- parm/metplus/GenEnsProd.conf | 4 ++-- parm/metplus/GridStat_ensmean.conf | 8 +++---- parm/metplus/GridStat_or_PointStat.conf | 8 +++---- parm/metplus/PointStat_ensmean.conf | 8 +++---- parm/metplus/PointStat_ensprob.conf | 6 ++--- 6 files changed, 33 insertions(+), 33 deletions(-) diff --git a/parm/metplus/EnsembleStat.conf b/parm/metplus/EnsembleStat.conf index ce38b2d209..15ba1d9321 100644 --- a/parm/metplus/EnsembleStat.conf +++ b/parm/metplus/EnsembleStat.conf @@ -87,7 +87,7 @@ OBS_FILE_WINDOW_BEGIN = -300 OBS_FILE_WINDOW_END = 300 OBS_{{METPLUS_TOOL_NAME}}_WINDOW_BEGIN = 0 OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = 0 -{%- elif input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- elif input_field_group in ['SFC', 'UPA'] %} OBS_WINDOW_BEGIN = -1799 OBS_WINDOW_END = 1800 OBS_{{METPLUS_TOOL_NAME}}_WINDOW_BEGIN = {OBS_WINDOW_BEGIN} @@ -105,7 +105,7 @@ OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = {OBS_WINDOW_END} # ens.vld_thresh value in the MET config file {{METPLUS_TOOL_NAME}}_ENS_VLD_THRESH = 1.0 -{%- if input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- if input_field_group in ['SFC', 'UPA'] %} {{METPLUS_TOOL_NAME}}_OBS_QUALITY_INC = 0, 1, 2, 3, 9, NA #{{METPLUS_TOOL_NAME}}_OBS_QUALITY_EXC = @@ -118,7 +118,7 @@ OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = {OBS_WINDOW_END} {{METPLUS_TOOL_NAME}}_MET_OBS_ERR_TABLE = {MET_BASE}/table_files/obs_error_table.txt {%- elif input_field_group in ['REFC', 'RETOP'] %} {{METPLUS_TOOL_NAME}}_MET_OBS_ERR_TABLE = -{%- elif input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- elif input_field_group in ['SFC', 'UPA'] %} {{METPLUS_TOOL_NAME}}_MET_OBS_ERR_TABLE = {MET_BASE}/table_files/obs_error_table.txt {%- endif %} @@ -135,7 +135,7 @@ OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = {OBS_WINDOW_END} {%- set comment_or_null = '' %} {%- set regrid_to_grid = 'FCST' %} {%- set regrid_method = 'BUDGET' %} -{%- elif input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- elif input_field_group in ['SFC', 'UPA'] %} {%- set comment_or_null = '#' %} {%- set regrid_to_grid = 'NONE' %} {%- set regrid_method = 'BILIN' %} @@ -160,8 +160,8 @@ OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = {OBS_WINDOW_END} {{METPLUS_TOOL_NAME}}_DUPLICATE_FLAG = NONE {{METPLUS_TOOL_NAME}}_SKIP_CONST = TRUE {{METPLUS_TOOL_NAME}}_OBS_ERROR_FLAG = FALSE -{%- elif input_field_group in ['ADPSFC', 'ADPUPA'] %} -{{METPLUS_TOOL_NAME}}_MESSAGE_TYPE = {{input_field_group}} +{%- elif input_field_group in ['SFC', 'UPA'] %} +{{METPLUS_TOOL_NAME}}_MESSAGE_TYPE = {{fieldname_in_met_filedir_names}} {{METPLUS_TOOL_NAME}}_DUPLICATE_FLAG = NONE {{METPLUS_TOOL_NAME}}_SKIP_CONST = FALSE {{METPLUS_TOOL_NAME}}_OBS_ERROR_FLAG = FALSE @@ -197,7 +197,7 @@ OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = {OBS_WINDOW_END} {%- set comment_or_null = '' %} {%- elif input_field_group in ['REFC', 'RETOP'] %} {%- set comment_or_null = '' %} -{%- elif input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- elif input_field_group in ['SFC', 'UPA'] %} {%- set comment_or_null = '#' %} {%- endif %} {{comment_or_null}}{{METPLUS_TOOL_NAME}}_CLIMO_CDF_BINS = 1 @@ -207,7 +207,7 @@ OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = {OBS_WINDOW_END} {{METPLUS_TOOL_NAME}}_MASK_GRID = {%- elif input_field_group in ['REFC', 'RETOP'] %} {{METPLUS_TOOL_NAME}}_MASK_GRID = FULL -{%- elif input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- elif input_field_group in ['SFC', 'UPA'] %} {{METPLUS_TOOL_NAME}}_MASK_GRID = {%- endif %} @@ -483,7 +483,7 @@ FCST_VAR{{ns.var_count}}_OPTIONS = convert(x) = x * 3.28084 * 0.001; ;; Convert {{opts_indent}}ens_phist_bin_size = 0.05; {%- endif %} - {%- elif input_field_group == 'ADPSFC' %} + {%- elif input_field_group == 'SFC' %} {%- if field_fcst == 'HGT' %} FCST_VAR{{ns.var_count}}_OPTIONS = GRIB_lvl_typ = 215; @@ -499,7 +499,7 @@ FCST_VAR{{ns.var_count}}_OPTIONS = interp = { type = [ { method = NEAREST; width FCST_VAR{{ns.var_count}}_OPTIONS = GRIB2_pdt = 0; ;; Derive instantaneous 10-m wind from U/V components, overriding max 10-m wind. {%- endif %} - {%- elif input_field_group == 'ADPUPA' %} + {%- elif input_field_group == 'UPA' %} {%- if field_fcst == 'CAPE' %} FCST_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; @@ -608,7 +608,7 @@ OBS_VAR{{ns.var_count}}_OPTIONS = censor_thresh = lt-20; {{opts_indent}}ens_phist_bin_size = 0.05; {%- endif %} - {%- elif input_field_group == 'ADPSFC' %} + {%- elif input_field_group == 'SFC' %} {%- if field_obs in ['DPT', 'TMP', 'WIND'] %} OBS_VAR{{ns.var_count}}_OPTIONS = obs_error = { flag = TRUE; dist_type = NONE; dist_parm = []; inst_bias_scale = 1.0; inst_bias_offset = 0.0; min = NA; max = NA; } @@ -616,7 +616,7 @@ OBS_VAR{{ns.var_count}}_OPTIONS = obs_error = { flag = TRUE; dist_type = NONE; d OBS_VAR{{ns.var_count}}_OPTIONS = GRIB_lvl_typ = 215 {%- endif %} - {%- elif input_field_group == 'ADPUPA' %} + {%- elif input_field_group == 'UPA' %} {%- if field_obs in ['DPT', 'HGT', 'TMP', 'WIND'] %} OBS_VAR{{ns.var_count}}_OPTIONS = obs_error = { flag = TRUE; dist_type = NONE; dist_parm = []; inst_bias_scale = 1.0; inst_bias_offset = 0.0; min = NA; max = NA; } @@ -649,7 +649,7 @@ OUTPUT_BASE = {{output_base}} # # Point observation input directory for {{MetplusToolName}}. # -{%- if input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- if input_field_group in ['SFC', 'UPA'] %} OBS_{{METPLUS_TOOL_NAME}}_POINT_INPUT_DIR = {{obs_input_dir}} {%- else %} OBS_{{METPLUS_TOOL_NAME}}_POINT_INPUT_DIR = @@ -657,7 +657,7 @@ OBS_{{METPLUS_TOOL_NAME}}_POINT_INPUT_DIR = # # Grid observation input directory for {{MetplusToolName}}. # -{%- if input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- if input_field_group in ['SFC', 'UPA'] %} OBS_{{METPLUS_TOOL_NAME}}_GRID_INPUT_DIR = {%- else %} OBS_{{METPLUS_TOOL_NAME}}_GRID_INPUT_DIR = {{obs_input_dir}} @@ -691,7 +691,7 @@ STAGING_DIR = {{staging_dir}} # Template for point observation input to {{MetplusToolName}} relative to # OBS_{{METPLUS_TOOL_NAME}}_POINT_INPUT_DIR. # -{%- if input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- if input_field_group in ['SFC', 'UPA'] %} OBS_{{METPLUS_TOOL_NAME}}_POINT_INPUT_TEMPLATE = {{obs_input_fn_template}} {%- else %} OBS_{{METPLUS_TOOL_NAME}}_POINT_INPUT_TEMPLATE = @@ -700,7 +700,7 @@ OBS_{{METPLUS_TOOL_NAME}}_POINT_INPUT_TEMPLATE = # Template for gridded observation input to {{MetplusToolName}} relative to # OBS_{{METPLUS_TOOL_NAME}}_GRID_INPUT_DIR. # -{%- if input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- if input_field_group in ['SFC', 'UPA'] %} OBS_{{METPLUS_TOOL_NAME}}_GRID_INPUT_TEMPLATE = {%- else %} OBS_{{METPLUS_TOOL_NAME}}_GRID_INPUT_TEMPLATE = {{obs_input_fn_template}} diff --git a/parm/metplus/GenEnsProd.conf b/parm/metplus/GenEnsProd.conf index 17005ecd1a..153eae196b 100644 --- a/parm/metplus/GenEnsProd.conf +++ b/parm/metplus/GenEnsProd.conf @@ -328,7 +328,7 @@ Set forecast field options. ENS_VAR{{ns.var_count}}_OPTIONS = convert(x) = x * 3.28084 * 0.001; ;; Convert from meters to kilofeet. {%- endif %} - {%- elif input_field_group == 'ADPSFC' %} + {%- elif input_field_group == 'SFC' %} {%- if field_fcst == 'HGT' %} ENS_VAR{{ns.var_count}}_OPTIONS = GRIB_lvl_typ = 215; @@ -344,7 +344,7 @@ ENS_VAR{{ns.var_count}}_OPTIONS = interp = { type = [ { method = NEAREST; width ENS_VAR{{ns.var_count}}_OPTIONS = GRIB2_pdt = 0; ;; Derive instantaneous 10-m wind from U/V components, overriding max 10-m wind. {%- endif %} - {%- elif input_field_group == 'ADPUPA' %} + {%- elif input_field_group == 'UPA' %} {%- if field_fcst == 'CAPE' %} ENS_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; diff --git a/parm/metplus/GridStat_ensmean.conf b/parm/metplus/GridStat_ensmean.conf index 21d23ac4eb..7c3b3b7ad9 100644 --- a/parm/metplus/GridStat_ensmean.conf +++ b/parm/metplus/GridStat_ensmean.conf @@ -174,8 +174,8 @@ following dictionary. 'ASNOW': [], 'REFC': [], 'RETOP': [], - 'ADPSFC': ['TCDC', 'VIS', 'HGT'], - 'ADPUPA': []} %} + 'SFC': ['TCDC', 'VIS', 'HGT'], + 'UPA': []} %} {%- set fields_fcst_to_exclude = fields_fcst_to_exclude_by_field_group[input_field_group] %} {#- @@ -383,7 +383,7 @@ Set forecast field options. {%- endif %} {%- set opts_indent = ' '*opts_indent_len %} - {%- if input_field_group == 'ADPUPA' %} + {%- if input_field_group == 'UPA' %} {%- if field_fcst == 'CAPE' %} FCST_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; @@ -462,7 +462,7 @@ Set observation field options. OBS_VAR{{ns.var_count}}_OPTIONS = convert(x) = 100.0*x; {%- endif %} - {%- elif input_field_group == 'ADPUPA' %} + {%- elif input_field_group == 'UPA' %} {%- if field_obs == 'CAPE' %} OBS_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; diff --git a/parm/metplus/GridStat_or_PointStat.conf b/parm/metplus/GridStat_or_PointStat.conf index 865f1c8d14..155b028291 100644 --- a/parm/metplus/GridStat_or_PointStat.conf +++ b/parm/metplus/GridStat_or_PointStat.conf @@ -500,7 +500,7 @@ FCST_VAR{{ns.var_count}}_OPTIONS = set_attr_lead = "{lead?fmt=%H%M%S}"; {{opts_indent}}cnt_logic = UNION; {%- endif %} - {%- elif (input_field_group == 'ADPSFC') %} + {%- elif (input_field_group == 'SFC') %} {%- if (field_fcst in ['WIND']) %} {{opts_indent}}GRIB2_pdt = 0; ;; Derive instantaneous 10-m wind from U/V components, overriding max 10-m wind. @@ -518,7 +518,7 @@ FCST_VAR{{ns.var_count}}_OPTIONS = set_attr_lead = "{lead?fmt=%H%M%S}"; {{opts_indent}}desc = "CEILING"; {%- endif %} - {%- elif (input_field_group == 'ADPUPA') %} + {%- elif (input_field_group == 'UPA') %} {%- if (field_fcst in ['HGT']) %} {%- if (levels_fcst[0] in ['L0']) %} @@ -599,7 +599,7 @@ OBS_VAR{{ns.var_count}}_OPTIONS = convert(x) = x * 3280.84 * 0.001; {{opts_indent}}cnt_logic = UNION; {%- endif %} - {%- elif (input_field_group == 'ADPSFC') %} + {%- elif (input_field_group == 'SFC') %} {%- if (field_obs in ['WIND']) %} OBS_VAR{{ns.var_count}}_OPTIONS = GRIB2_pdt = 0; ;; Derive instantaneous 10-m wind from U/V components, overriding max 10-m wind. @@ -612,7 +612,7 @@ OBS_VAR{{ns.var_count}}_OPTIONS = GRIB_lvl_typ = 215; {{opts_indent}}interp = { type = [ { method = NEAREST; width = 1; } ]; } {%- endif %} - {%- elif (input_field_group == 'ADPUPA') %} + {%- elif (input_field_group == 'UPA') %} {%- if (field_obs in ['CAPE', 'MLCAPE']) %} OBS_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; diff --git a/parm/metplus/PointStat_ensmean.conf b/parm/metplus/PointStat_ensmean.conf index 8637a7501d..fc9ccec85b 100644 --- a/parm/metplus/PointStat_ensmean.conf +++ b/parm/metplus/PointStat_ensmean.conf @@ -238,8 +238,8 @@ following dictionary. 'ASNOW': [], 'REFC': [], 'RETOP': [], - 'ADPSFC': ['TCDC', 'VIS', 'HGT'], - 'ADPUPA': []} %} + 'SFC': ['TCDC', 'VIS', 'HGT'], + 'UPA': []} %} {%- set fields_fcst_to_exclude = fields_fcst_to_exclude_by_field_group[input_field_group] %} {#- @@ -419,7 +419,7 @@ Set forecast field options. {%- endif %} {%- set opts_indent = ' '*opts_indent_len %} - {%- if input_field_group == 'ADPUPA' %} + {%- if input_field_group == 'UPA' %} {%- if field_fcst == 'CAPE' %} FCST_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; @@ -481,7 +481,7 @@ Set observation field options. {%- set opts_indent_len = opts_indent_len - 1 %} {%- set opts_indent = ' '*opts_indent_len %} - {%- if input_field_group == 'ADPUPA' %} + {%- if input_field_group == 'UPA' %} {%- if field_obs == 'CAPE' %} OBS_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; diff --git a/parm/metplus/PointStat_ensprob.conf b/parm/metplus/PointStat_ensprob.conf index 885ba121be..42ac254a4b 100644 --- a/parm/metplus/PointStat_ensprob.conf +++ b/parm/metplus/PointStat_ensprob.conf @@ -355,7 +355,7 @@ Set forecast field options. {%- endif %} {%- set opts_indent = ' '*opts_indent_len %} - {%- if input_field_group == 'ADPSFC' %} + {%- if input_field_group == 'SFC' %} {%- if field_fcst == 'HGT' %} FCST_VAR{{ns.var_count}}_OPTIONS = desc = "CEILING"; @@ -400,7 +400,7 @@ Set observation field options. {%- set opts_indent_len = opts_indent_len - 1 %} {%- set opts_indent = ' '*opts_indent_len %} - {%- if input_field_group == 'ADPSFC' %} + {%- if input_field_group == 'SFC' %} {%- if field_obs == 'CEILING' %} OBS_VAR{{ns.var_count}}_OPTIONS = GRIB_lvl_typ = 215; @@ -409,7 +409,7 @@ OBS_VAR{{ns.var_count}}_OPTIONS = GRIB_lvl_typ = 215; OBS_VAR{{ns.var_count}}_OPTIONS = interp = { type = [ { method = NEAREST; width = 1; } ]; } {%- endif %} - {%- elif input_field_group == 'ADPUPA' %} + {%- elif input_field_group == 'UPA' %} {%- if field_obs == 'CAPE' %} OBS_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; From dc91a690d1f1f83e5ccb78218e7bf998bdf04c40 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 17:51:21 -0700 Subject: [PATCH 155/208] Bug fix in vx configuration files to account for the field group names 'ADPSFC' and 'ADPUPA' being changed to 'SFC' and 'UPA'. --- .../metplus/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml | 2 +- parm/metplus/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml | 2 +- parm/metplus/vx_configs/vx_config_det.yaml | 4 ++-- parm/metplus/vx_configs/vx_config_ens.yaml | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml b/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml index 11bcb2e568..81425cc1a1 100644 --- a/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml +++ b/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml @@ -41,7 +41,7 @@ # it means the name of the field in the forecast data is RETOP while its # name in the observations is EchoTop18. # -ADPSFC: +SFC: TMP: Z2: [] UGRD: diff --git a/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml b/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml index 9b8e25ff59..dde2dd3302 100644 --- a/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml +++ b/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml @@ -41,7 +41,7 @@ # it means the name of the field in the forecast data is RETOP while its # name in the observations is EchoTop18. # -ADPSFC: +SFC: TMP: Z2: [] UGRD: diff --git a/parm/metplus/vx_configs/vx_config_det.yaml b/parm/metplus/vx_configs/vx_config_det.yaml index c20e192dcb..48b8aff97b 100644 --- a/parm/metplus/vx_configs/vx_config_det.yaml +++ b/parm/metplus/vx_configs/vx_config_det.yaml @@ -57,7 +57,7 @@ REFC: RETOP: RETOP%%EchoTop18: L0%%Z500: ['ge20', 'ge30', 'ge40', 'ge50'] -ADPSFC: +SFC: TMP: Z2: [] DPT: @@ -90,7 +90,7 @@ ADPSFC: L0%%Z0: ['ge1.0%%ge164&&le166'] CICEP%%PRWE: L0%%Z0: ['ge1.0%%ge174&&le176'] -ADPUPA: +UPA: TMP: P1000: &adpupa_tmp_threshes [] diff --git a/parm/metplus/vx_configs/vx_config_ens.yaml b/parm/metplus/vx_configs/vx_config_ens.yaml index 2608490565..4eb1524648 100644 --- a/parm/metplus/vx_configs/vx_config_ens.yaml +++ b/parm/metplus/vx_configs/vx_config_ens.yaml @@ -21,7 +21,7 @@ REFC: RETOP: RETOP%%EchoTop18: L0%%Z500: ['ge20', 'ge30', 'ge40', 'ge50'] -ADPSFC: +SFC: TMP: Z2: ['ge268', 'ge273', 'ge278', 'ge293', 'ge298', 'ge303'] DPT: @@ -34,7 +34,7 @@ ADPSFC: L0: ['lt1609', 'lt8045', 'ge8045'] HGT%%CEILING: L0: ['lt152', 'lt305', 'lt914'] -ADPUPA: +UPA: TMP: P850: ['ge288', 'ge293', 'ge298'] P700: ['ge273', 'ge278', 'ge283'] From 8eb38f240635e85366260cd913fa64b5296df850 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 17:52:18 -0700 Subject: [PATCH 156/208] Fix typo. --- scripts/exregional_run_met_gridstat_or_pointstat_vx.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 654983c70e..84db54ea6f 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -69,7 +69,7 @@ Entering script: \"${scrfunc_fn}\" In directory: \"${scrfunc_dir}\" This is the ex-script for the task that runs the METplus ${MetplusToolName} -tool to perform deterministic verification of the specified field gropup +tool to perform deterministic verification of the specified field group (FIELD_GROUP) for a single forecast. ========================================================================" # From 8caae0a49aa52d1db6a2292d11faa314ae62a04d Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 17:53:40 -0700 Subject: [PATCH 157/208] Bug fix in ex-script and an auxiliary bash script to account for the field group names 'ADPSFC' and 'ADPUPA' being changed to 'SFC' and 'UPA'. --- scripts/exregional_run_met_pb2nc_obs.sh | 2 +- ush/set_vx_params.sh | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index d1d055fe66..63ea5ca760 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -118,7 +118,7 @@ FIELDNAME_IN_MET_FILEDIR_NAMES="" set_vx_params \ obtype="${OBTYPE}" \ - field_group="ADP${FIELD_GROUP}" \ + field_group="${FIELD_GROUP}" \ accum_hh="${ACCUM_HH}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ diff --git a/ush/set_vx_params.sh b/ush/set_vx_params.sh index e17a360c38..993e45ac67 100644 --- a/ush/set_vx_params.sh +++ b/ush/set_vx_params.sh @@ -221,18 +221,18 @@ this observation type (obtype) and field group (field_group) combination: _grid_or_point_="point" case "${field_group}" in - "ADPSFC") + "SFC") fieldname_in_obs_input="" fieldname_in_fcst_input="" - fieldname_in_MET_output="${field_group}" - fieldname_in_MET_filedir_names="${field_group}" + fieldname_in_MET_output="ADP${field_group}" + fieldname_in_MET_filedir_names="ADP${field_group}" ;; - "ADPUPA") + "UPA") fieldname_in_obs_input="" fieldname_in_fcst_input="" - fieldname_in_MET_output="${field_group}" - fieldname_in_MET_filedir_names="${field_group}" + fieldname_in_MET_output="ADP${field_group}" + fieldname_in_MET_filedir_names="ADP${field_group}" ;; *) From f0ccb7829e9fdb6b31808d18f0229d2676942981 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 18:38:54 -0700 Subject: [PATCH 158/208] Fix typo. --- scripts/exregional_run_met_gridstat_or_pointstat_vx.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 84db54ea6f..0531d21755 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -282,7 +282,7 @@ export LOGDIR # if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: +The list of lead hours for which to run METplus is empty: VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # From f1bd90a2da4a415b2f8634874dcd27f09463ea2a Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 18:39:52 -0700 Subject: [PATCH 159/208] Bug fix: For ensemble vx tasks, add omitted cycle dates to the end of the names of the METplus log files. --- scripts/exregional_run_met_genensprod_or_ensemblestat.sh | 2 +- .../exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh | 4 ++-- .../exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index d02adddf77..a7ec52ad6a 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -307,7 +307,7 @@ fi # metplus_config_tmpl_bn="${MetplusToolName}" metplus_config_bn="${MetplusToolName}_${FIELDNAME_IN_MET_FILEDIR_NAMES}" -metplus_log_bn="${metplus_config_bn}" +metplus_log_bn="${metplus_config_bn}_$CDATE" # # Add prefixes and suffixes (extensions) to the base file names. # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index 424756d72b..5ecc588316 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -249,8 +249,8 @@ fi # First, set the base file names. # metplus_config_tmpl_bn="${MetplusToolName}_ensmean" -metplus_config_bn="${MetplusToolName}_ensmean_${FIELDNAME_IN_MET_FILEDIR_NAMES}" -metplus_log_bn="${metplus_config_bn}" +metplus_config_bn="${MetplusToolName}_${FIELDNAME_IN_MET_FILEDIR_NAMES}_ensmean" +metplus_log_bn="${metplus_config_bn}_$CDATE" # # Add prefixes and suffixes (extensions) to the base file names. # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index e6ad107e81..c7693fe06c 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -249,8 +249,8 @@ fi # First, set the base file names. # metplus_config_tmpl_bn="${MetplusToolName}_ensprob" -metplus_config_bn="${MetplusToolName}_ensprob_${FIELDNAME_IN_MET_FILEDIR_NAMES}" -metplus_log_bn="${metplus_config_bn}" +metplus_config_bn="${MetplusToolName}_${FIELDNAME_IN_MET_FILEDIR_NAMES}_ensprob" +metplus_log_bn="${metplus_config_bn}_$CDATE" # # Add prefixes and suffixes (extensions) to the base file names. # From ddf1a552ee6a35ff602c744f6b9c43df1a85ca13 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 4 Nov 2024 04:03:32 -0700 Subject: [PATCH 160/208] Modify ensemble vx task and metatask names to match those for deterministic vx. Make corresponding changes in the docs. --- .../BuildingRunningTesting/RunSRW.rst | 26 +++++++++---------- parm/wflow/verify_ens.yaml | 22 ++++++++-------- ush/setup.py | 8 +++--- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index fb8261697f..4689b94308 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -839,11 +839,11 @@ about metatasks. .. _VXWorkflowTasksTable: -.. list-table:: Verification (VX) Workflow Tasks and Metatasks in the SRW App +.. list-table:: Default Verification (VX) Workflow Tasks and Metatasks in the SRW App :widths: 5 95 :header-rows: 1 - * - Workflow Task (``taskgroup``) + * - Workflow (Meta)Task (``taskgroup``) - Task Description * - :bolditalic:`task_get_obs_ccpa` (``verify_pre.yaml``) @@ -1004,41 +1004,41 @@ about metatasks. is included in ``VX_FIELD_GROUPS``, and the ones for ``UPA`` are included only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'UPA'`` is included in ``VX_FIELD_GROUPS``. - * - :bolditalic:`metatask_GridStat_ensmeanprob_APCP_all_accums` (``verify_ens.yaml``) + * - :bolditalic:`metatask_GridStat_APCP_all_accums_ensmeanprob` (``verify_ens.yaml``) - Set of tasks that run grid-to-grid verification of the ensemble mean of APCP and grid-to-grid probabilistic verification of the ensemble of APCP forecasts as a whole. In rocoto, the tasks under this metatask for - ensemble mean verification are named ``run_MET_GridStat_vx_ensmean_APCP{accum_intvl}h``, and the ones for - ensemble probabilistic verification are named ``run_MET_GridStat_vx_ensprob_APCP{accum_intvl}h``, where + ensemble mean verification are named ``run_MET_GridStat_vx_APCP{accum_intvl}h_ensmean``, and the ones for + ensemble probabilistic verification are named ``run_MET_GridStat_vx_APCP{accum_intvl}h_ensprob``, where ``{accum_intvl}`` is the accumulation interval (in hours, e.g. ``01``, ``03``, ``06``, etc) for which the tasks are being run. This metatask is included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'APCP'`` is included in ``VX_FIELD_GROUPS``. - * - :bolditalic:`metatask_GridStat_ensmeanprob_ASNOW_all_accums` (``verify_ens.yaml``) + * - :bolditalic:`metatask_GridStat_ASNOW_all_accums_ensmeanprob` (``verify_ens.yaml``) - Set of tasks that run grid-to-grid verification of the ensemble mean of ASNOW and grid-to-grid probabilistic verification of the ensemble of ASNOW forecasts as a whole. In rocoto, the tasks under this metatask for - ensemble mean verification are named ``run_MET_GridStat_vx_ensmean_ASNOW{accum_intvl}h``, and the ones for - ensemble probabilistic verification are named ``run_MET_GridStat_vx_ensprob_ASNOW{accum_intvl}h``, where + ensemble mean verification are named ``run_MET_GridStat_vx_ASNOW{accum_intvl}h_ensmean``, and the ones for + ensemble probabilistic verification are named ``run_MET_GridStat_vx_ASNOW{accum_intvl}h_ensprob``, where ``{accum_intvl}`` is the accumulation interval (in hours, e.g. ``01``, ``03``, ``06``, etc) for which the tasks are being run. These tasks will be included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'ASNOW'`` is included in ``VX_FIELD_GROUPS``. - * - :bolditalic:`metatask_GridStat_ensprob_REFC_RETOP` (``verify_ens.yaml``) + * - :bolditalic:`metatask_GridStat_REFC_RETOP_ensprob` (``verify_ens.yaml``) - Set of tasks that run grid-to-grid probabilistic verification of the ensemble of :term:`composite reflectivity` (represented by the verification field group ``REFC``) and :term:`echo top` (represented by the field group ``RETOP``). (Note that there is no grid-to-grid verification of the ensemble mean of these quantities.) - In rocoto, the tasks under this metatask are named ``run_MET_GridStat_vx_ensprob_{field_group}``, where + In rocoto, the tasks under this metatask are named ``run_MET_GridStat_vx_{field_group}_ensprob``, where ``{field_group}`` is the field group (in this case either ``REFC`` or ``RETOP``) for which the task is being run. The task for ``REFC`` is included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'REFC'`` is included in ``VX_FIELD_GROUPS``, and the one for ``RETOP`` is included only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'RETOP'`` is included in ``VX_FIELD_GROUPS``. - * - :bolditalic:`metatask_PointStat_ensmeanprob_SFC_UPA` (``verify_ens.yaml``) + * - :bolditalic:`metatask_PointStat_SFC_UPA_ensmeanprob` (``verify_ens.yaml``) - Set of tasks that run grid-to-grid verification of the ensemble mean of surface fields (represented by the verification field group ``SFC``) and upper-air fields (represented by the verification field group ``UPA``) as well as grid-to-grid probabilistic verification of the ensemble of the surface and upper-air field forecasts as a whole. In rocoto, the tasks under this metatask for ensemble mean verification are named - ``run_MET_PointStat_vx_ensmean_{field_group}``, and the ones for ensemble probabilistic verification are - named ``run_MET_PointStat_vx_ensprob_{field_group}``, where ``{field_group}`` is the field group (in this + ``run_MET_PointStat_vx_{field_group}_ensmean``, and the ones for ensemble probabilistic verification are + named ``run_MET_PointStat_vx_{field_group}_ensprob``, where ``{field_group}`` is the field group (in this case either ``SFC`` or ``UPA``) on which the task is being run. The tasks for ``SFC`` are included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'SFC'`` is included in ``VX_FIELD_GROUPS``, and the ones for ``UPA`` are included only if ``DO_ENSEMBLE`` is set to ``True`` in diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 07444cf757..eb13e97e1c 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -192,14 +192,14 @@ metatask_GenEnsProd_EnsembleStat_SFC_UPA: attrs: task: run_MET_GenEnsProd_vx_#FIELD_GROUP# -metatask_GridStat_ensmeanprob_APCP_all_accums: +metatask_GridStat_APCP_all_accums_ensmeanprob: var: stat: MEAN PROB statlc: mean prob - metatask_GridStat_ens#statlc#_APCP_all_accums: + metatask_GridStat_APCP_all_accums_ens#statlc#: var: ACCUM_HH: '{% for ah in verification.VX_APCP_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - task_run_MET_GridStat_vx_ens#statlc#_APCP#ACCUM_HH#h: + task_run_MET_GridStat_vx_APCP#ACCUM_HH#h_ens#statlc#: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENS#stat#"' envars: @@ -219,14 +219,14 @@ metatask_GridStat_ensmeanprob_APCP_all_accums: attrs: task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h -metatask_GridStat_ensmeanprob_ASNOW_all_accums: +metatask_GridStat_ASNOW_all_accums_ensmeanprob: var: stat: MEAN PROB statlc: mean prob - metatask_GridStat_ens#statlc#_ASNOW_all_accums: + metatask_GridStat_ASNOW_all_accums_ens#statlc#: var: ACCUM_HH: '{% for ah in verification.VX_ASNOW_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - task_run_MET_GridStat_vx_ens#statlc#_ASNOW#ACCUM_HH#h: + task_run_MET_GridStat_vx_ASNOW#ACCUM_HH#h_ens#statlc#: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENS#stat#"' envars: @@ -246,10 +246,10 @@ metatask_GridStat_ensmeanprob_ASNOW_all_accums: attrs: task: run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h -metatask_GridStat_ensprob_REFC_RETOP: +metatask_GridStat_REFC_RETOP_ensprob: var: FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_GridStat_vx_ensprob_#FIELD_GROUP#: + task_run_MET_GridStat_vx_#FIELD_GROUP#_ensprob: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENSPROB"' envars: @@ -269,14 +269,14 @@ metatask_GridStat_ensprob_REFC_RETOP: attrs: task: run_MET_GenEnsProd_vx_#FIELD_GROUP# -metatask_PointStat_ensmeanprob_SFC_UPA: +metatask_PointStat_SFC_UPA_ensmeanprob: var: stat: MEAN PROB statlc: mean prob - metatask_PointStat_ens#statlc#_SFC_UPA: + metatask_PointStat_SFC_UPA_ens#statlc#: var: FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["SFC", "UPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_PointStat_vx_ens#statlc#_#FIELD_GROUP#: + task_run_MET_PointStat_vx_#FIELD_GROUP#_ens#statlc#: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENS#stat#"' envars: diff --git a/ush/setup.py b/ush/setup.py index e19864cee6..85e0cedca1 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -741,7 +741,7 @@ def _remove_tag(tasks, tag): "metatask_PcpCombine_APCP_all_accums_all_mems", "metatask_GridStat_APCP_all_accums_all_mems", "metatask_GenEnsProd_EnsembleStat_APCP_all_accums", - "metatask_GridStat_ensmeanprob_APCP_all_accums"] + "metatask_GridStat_APCP_all_accums_ensmeanprob"] vx_field_groups_all_by_obtype["NOHRSC"] = ["ASNOW"] vx_metatasks_all_by_obtype["NOHRSC"] \ @@ -750,14 +750,14 @@ def _remove_tag(tasks, tag): "metatask_PcpCombine_ASNOW_all_accums_all_mems", "metatask_GridStat_ASNOW_all_accums_all_mems", "metatask_GenEnsProd_EnsembleStat_ASNOW_all_accums", - "metatask_GridStat_ensmeanprob_ASNOW_all_accums"] + "metatask_GridStat_ASNOW_all_accums_ensmeanprob"] vx_field_groups_all_by_obtype["MRMS"] = ["REFC", "RETOP"] vx_metatasks_all_by_obtype["MRMS"] \ = ["task_get_obs_mrms", "metatask_GridStat_REFC_RETOP_all_mems", "metatask_GenEnsProd_EnsembleStat_REFC_RETOP", - "metatask_GridStat_ensprob_REFC_RETOP"] + "metatask_GridStat_REFC_RETOP_ensprob"] vx_field_groups_all_by_obtype["NDAS"] = ["SFC", "UPA"] vx_metatasks_all_by_obtype["NDAS"] \ @@ -765,7 +765,7 @@ def _remove_tag(tasks, tag): "task_run_MET_Pb2nc_obs_NDAS", "metatask_PointStat_SFC_UPA_all_mems", "metatask_GenEnsProd_EnsembleStat_SFC_UPA", - "metatask_PointStat_ensmeanprob_SFC_UPA"] + "metatask_PointStat_SFC_UPA_ensmeanprob"] # If there are no field groups specified for verification, remove those # tasks that are common to all observation types. From 9dcec551ff42e7db51fc25ae975d555e4aab84e2 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 4 Nov 2024 05:40:46 -0700 Subject: [PATCH 161/208] Minor fixes to code comments. --- ush/config_defaults.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 10bf82c0a4..8c406040ab 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2336,11 +2336,11 @@ global: PRINT_DIFF_PGR: false #---------------------------- -# verification (vx) parameters +# Verification (VX) parameters #----------------------------- verification: # - # General Verification Parameters + # General VX Parameters # ------------------------------- # # VX_FIELD_GROUPS: @@ -2397,7 +2397,7 @@ verification: # METPLUS_VERBOSITY_LEVEL: 2 # - # Observation-Specific Parameters + # VX Parameters for Observations # ------------------------------- # # Note: @@ -2601,7 +2601,7 @@ verification: # NUM_MISSING_OBS_FILES_MAX: 2 # - # Forecast-Specific Parameters + # VX Parameters for Forecasts # ---------------------------- # # VX_FCST_MODEL_NAME: From 81bd5b31a7c9c939937b257d7290a2814b606029 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 4 Nov 2024 05:41:35 -0700 Subject: [PATCH 162/208] Bug fixe for ensemble vx of ASNOW related to (meta)task name changes. --- parm/wflow/verify_ens.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index eb13e97e1c..45cf5385be 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -74,7 +74,7 @@ metatask_GenEnsProd_EnsembleStat_ASNOW_all_accums: and: metataskdep_pcpcombine_fcst: attrs: - metatask: PcpCombine_ASNOW#ACCUM_HH#h_fcst_all_mems + metatask: PcpCombine_ASNOW#ACCUM_HH#h_all_mems task_run_MET_EnsembleStat_vx_ASNOW#ACCUM_HH#h: <<: *task_GenEnsProd_NOHRSC envars: From 46af6f7277265796440757ba94344ce7edfe39cf Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 4 Nov 2024 08:33:34 -0700 Subject: [PATCH 163/208] Updates to docs. --- .../BuildingRunningTesting/RunSRW.rst | 109 +++++++++++++----- 1 file changed, 81 insertions(+), 28 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 4689b94308..7ee105dc61 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -641,12 +641,21 @@ To use METplus verification, MET and METplus modules need to be installed. To t tasks: taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml", "parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' -:numref:`Table %s ` indicates which verification capabilities/workflow tasks each ``verify_*.yaml`` file enables. -Users must add ``verify_pre.yaml`` anytime they want to run verification (VX); it runs preprocessing tasks that are necessary -for both deterministic and ensemble VX, including retrieval of obs files from various data stores (e.g. NOAA's HPSS) if those -files do not already exist on disk at the locations specified by some of the parameters in the ``verification:`` section of -``config_defaults.yaml`` and/or ``config.yaml`` (see ?? for details). -Then users can add ``verify_det.yaml`` for deterministic VX or ``verify_ens.yaml`` for ensemble VX (or both). Note that ensemble VX requires the user to be running an ensemble forecast or to stage ensemble forecast files in an appropriate location. +:numref:`Table %s ` indicates which workflow (meta)tasks each ``verify_*.yaml`` file enables. +Users must include ``verify_pre.yaml`` anytime they want to run verification (VX) because this contains +preprocessing tasks that are necessary for both deterministic and ensemble VX, including retrieval of +obs files from various data stores (e.g. NOAA HPSS) if those files do not already exist on disk (the +files must exist at the locations specified by the variables ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1]`` +in the ``verification:`` section of ``config.yaml``; see discussion below for details). +Then users can add ``verify_det.yaml`` for deterministic VX, ``verify_ens.yaml`` for ensemble VX, +or both if they want to run ensemble VX on an ensemble forecast but also run deterministic VX on +each ensemble member. + +Note that ensemble VX requires the user to either run an ensemble forecast with the SRW App or to stage +ensemble forecast files (at the locations specified by the variables ``VX_FCST_INPUT_BASEDIR``, +``FCST_SUBDIR_TEMPLATE``, and ``FCST_FN_TEMPLATE`` in the ``verification:`` section of ``config.yaml``). +In either case, ``DO_ENSEMBLE`` in ``config.yaml`` must be set to ``True``. + .. _VX-yamls: @@ -659,11 +668,16 @@ Then users can add ``verify_det.yaml`` for deterministic VX or ``verify_ens.yaml * - verify_pre.yaml - Enables (meta)tasks that are prerequisites for both deterministic and ensemble verification (vx) * - verify_det.yaml - - Enables (meta)tasks that perform deterministic vx on a single forecast or on each member of an ensemble forecast + - Enables (meta)tasks that perform deterministic VX on a single forecast or on each member of an ensemble forecast * - verify_ens.yaml - - Enables (meta)tasks that perform ensemble vx on an ensemble of forecasts as a whole (must set ``DO_ENSEMBLE: true`` in ``config.yaml``) + - Enables (meta)tasks that perform ensemble VX on an ensemble of forecasts as a whole (requires ``DO_ENSEMBLE`` + to be set to ``True`` in ``config.yaml``) -The ``verify_*.yaml`` files include the definitions of several common verification tasks by default. Individual verification tasks appear in :numref:`Table %s `. The tasks in the ``verify_*.yaml`` files are independent of each other, so users may want to turn some off depending on the needs of their experiment. To turn off a task, simply include its entry from ``verify_*.yaml`` as an empty YAML entry in ``config.yaml``. For example, to turn off PointStat tasks: +The ``verify_*.yaml`` files include by default the definitions of several common verification tasks and metatasks. +These default verification (meta)tasks are described in :numref:`Table %s `. The tasks in the +``verify_*.yaml`` files are independent of each other, so users may want to turn some off depending on the needs of +their experiment. To turn off a task, simply include its entry from ``verify_*.yaml`` as an empty YAML entry in +``config.yaml``. For example, to turn off PointStat tasks: .. code-block:: console @@ -676,22 +690,23 @@ The ``verify_*.yaml`` files include the definitions of several common verificati More information about configuring the ``rocoto:`` section can be found in :numref:`Section %s `. -If users have access to NOAA :term:`HPSS` but have not pre-staged the obs data, the default ``verify_pre.yaml`` -taskgroup will activate a set of ``get_obs_...`` workflow tasks that will attempt to retrieve the required +If users have access to NOAA :term:`HPSS` but have not pre-staged the obs data, the taskgroup in ``verify_pre.yaml`` +will by default activate a set of ``get_obs_...`` workflow tasks that will attempt to retrieve the required files from a data store such as NOAA HPSS. In this case, the variables ``*_OBS_DIR`` in ``config.yaml`` must be set to the base directories under which users want the files to reside, and the variables ``OBS_*_FN_TEMPLATES[1]`` must be set to METplus file name templates (possibly including leading subdirectories relative to ``*_OBS_DIR``) that will be used to name the obs files. (Here, the ``*`` represents any one of the obs types :term:`CCPA`, -:term:`NOHRSC`, :term:`MRMS`, and :term:`NDAS`.) +:term:`NOHRSC`, :term:`MRMS`, and :term:`NDAS`, and the ``[1]`` in ``OBS_*_FN_TEMPLATES[1]`` refers to the second +element of ``OBS_*_FN_TEMPLATES``; the first element should not be changed). Users who do not have access to NOAA HPSS and do not have the data on their system will need to download -:term:`CCPA`, :term:`MRMS`, and :term:`NDAS` data manually from collections of publicly available data, -such as the ones listed `here `__. +:term:`CCPA`, :term:`NOHRSC`, :term:`MRMS`, and/or :term:`NDAS` data manually from collections of publicly +available data, such as the ones listed `here `__. Users who have already staged the observation data needed for verification on their system (i.e., the :term:`CCPA`, :term:`NOHRSC`, :term:`MRMS`, and/or :term:`NDAS` data) should set ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1]`` in ``config.yaml`` to match those staging locations and -file names For example, for a case in which all four types of obs are needed for vx, these variables +file names. For example, for a case in which all four types of obs are needed for VX, these variables might be set as follows: .. code-block:: console @@ -709,20 +724,59 @@ might be set as follows: 'RETOP', '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' ] OBS_NDAS_FN_TEMPLATES: [ 'SFC_UPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] -If one of the days encompassed by the experiment was 20240429, and if one of the hours during -that day at which vx will be performed was 03, then, taking the CCPA obs type as an example, +If one of the days encompassed by the experiment is 20240429, and if one of the hours during +that day at which VX will be performed is 03, then, taking the CCPA obs type as an example, one of the ``get_obs_ccpa_...`` tasks in the workflow will look for a CCPA file on disk corresponding to this day and hour at ``/path/to/UFS_SRW_data/develop/obs_data/ccpa/20240429/ccpa.t03z.01h.hrap.conus.gb2`` -As described above, if this file does not exist, it will try to retrieve it from a data store -and place it at this location. +As described above, if this file does not exist, the ``get_obs`` task will try to retrieve it +from a data store and place it at this location. After adding the VX tasks to the ``rocoto:`` section and the data paths to the ``verification:`` section, users can proceed to generate the experiment, which will perform VX tasks in addition to the default workflow tasks. + +Note that inclusion of the ``verify_*.yaml`` files under the ``rocoto: tasks: taskgroups:`` section of +``config.yaml`` does not mean all the (eta)tasks in those files will necessarily be included in the workflow. +This is because the VX tasks are grouped into field groups, and only those (meta)tasks in ``verify_*.yaml`` +associated with field groups that are included in the list ``VX_FIELD_GROUPS`` in ``config.yaml`` +are included in the worklow. +Each field group represents one or more meteorologial fields that can be verified. The valid field +groups and their descriptions are given in :numref:`Table %s `. +Thus, setting + +.. code-block:: console + + VX_FIELD_GROUPS: [ 'APCP', 'REFC', 'RETOP', 'SFC', 'UPA' ] + +will run the VX (meta)tasks for all field groups except accumulated snowfall. + + +.. _VXFieldGroupDescsTable: + +.. list-table:: Valid Verification Field Groups and Descriptions + :widths: 20 50 + :header-rows: 1 + + * - Field Group + - Description + * - APCP + - Accumulated precipitation for the accumulation intervals specified in ``VX_APCP_ACCUMS_HRS`` + * - ASNOW + - Accumulated snowfall for the accumulation intervals specified in ``VX_APCP_ACCUMS_HRS`` + * - REFC + - Composite reflectivity + * - RETOP + - Echo top + * - SFC + - Surface fields + * - UPA + - Upper-air fields + + .. _GenerateWorkflow: Generate the SRW App Workflow @@ -827,15 +881,14 @@ In addition to the baseline tasks described in :numref:`Table %s ` below. -The ``taskgroup`` entry after the name of each task or metatask indicates the taskgroup file that must be -included in the user's ``config.yaml`` file under ``rocoto: tasks: taskgroups:`` in order for that task or -metatask to be considered for inclusion in the workflow (see :numref:`Section %s ` for more -details). Metatasks define a set of tasks in the workflow based on multiple values of one or more parameters -such as the ensemble member index, the accumulation interval (for cumulative fields such as accumulated -precipitation), and the name of the verificaiton field group (see description of ``VX_FIELD_GROUPS`` in -:numref:`Section %s `). See :numref:`Section %s ` for more details -about metatasks. +The METplus verification tasks and metatasks that are included by default in ``verify_*.yaml`` are described +in :numref:`Table %s `. The ``taskgroup`` entry after the name of each (meta)task indicates +the taskgroup file that must be included in the user's ``config.yaml`` file under ``rocoto: tasks: taskgroups:`` +in order for that (meta)task to be considered for inclusion in the workflow (see :numref:`Section %s ` +for details). As described in :numref:`Section %s `, metatasks define a set of tasks in the +workflow based on multiple values of one or more parameters such as the ensemble member index, the accumulation +interval (for cumulative fields such as accumulated precipitation), and the name of the verificaiton field group +(see description of ``VX_FIELD_GROUPS`` in :numref:`Section %s `). .. _VXWorkflowTasksTable: From 4484bc0172848b78f08b32c509c00b6ab2d1ada0 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 4 Nov 2024 11:02:36 -0700 Subject: [PATCH 164/208] Get minor changes from the feature/daily_obs_tasks_doc_mods branch. --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 2 +- doc/UsersGuide/Reference/Glossary.rst | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 7ee105dc61..72cf85f494 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -622,7 +622,7 @@ The output files (in ``.png`` format) will be located in the ``postprd`` directo Configure METplus Verification Suite (Optional) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Users who want to use the METplus verification suite to evaluate their forecasts need to add additional information to their machine file (``ush/machine/.yaml``) [what would need to change in the machine file?] or their ``config.yaml`` file. Other users may skip to the next step (:numref:`Section %s: Generate the SRW App Workflow `). +Users who want to use the METplus verification suite to evaluate their forecasts need to add additional information to their machine file (``ush/machine/.yaml``) or their ``config.yaml`` file. Other users may skip to the next step (:numref:`Section %s: Generate the SRW App Workflow `). .. note:: If METplus users update their METplus installation, they must update the module load statements in ``ufs-srweather-app/modulefiles/tasks//run_vx.local`` to correspond to their system's updated installation: diff --git a/doc/UsersGuide/Reference/Glossary.rst b/doc/UsersGuide/Reference/Glossary.rst index 7ffc569b21..7b8489f82d 100644 --- a/doc/UsersGuide/Reference/Glossary.rst +++ b/doc/UsersGuide/Reference/Glossary.rst @@ -26,7 +26,7 @@ Glossary chgres_cube The preprocessing software used to create initial and boundary condition files to - “cold start” the forecast model. It is part of :term:`UFS_UTILS`. + "cold start" the forecast model. It is part of :term:`UFS_UTILS`. CIN Convective Inhibition. @@ -87,7 +87,7 @@ Glossary Extended Schmidt Gnomonic (ESG) grid. The ESG grid uses the map projection developed by Jim Purser of NOAA :term:`EMC` (:cite:t:`Purser_2020`). ESMF - `Earth System Modeling Framework `__. The ESMF defines itself as “a suite of software tools for developing high-performance, multi-component Earth science modeling applications.” + `Earth System Modeling Framework `__. The ESMF defines itself as "a suite of software tools for developing high-performance, multi-component Earth science modeling applications." ex-scripts Scripting layer (contained in ``ufs-srweather-app/scripts/``) that should be called by a :term:`J-job ` for each workflow componentto run a specific task or sub-task in the workflow. The different scripting layers are described in detail in the :nco:`NCO Implementation Standards document ` @@ -215,7 +215,7 @@ Glossary The branch of physical geography dealing with mountains. Parameterizations - Simplified functions that approximate the effects of small-scale processes (e.g., microphysics, gravity wave drag) that cannot be explicitly resolved by a model grid’s representation of the earth. + Simplified functions that approximate the effects of small-scale processes (e.g., microphysics, gravity wave drag) that cannot be explicitly resolved by a model grid's representation of the earth. RAP `Rapid Refresh `__. The continental-scale NOAA hourly-updated assimilation/modeling system operational at :term:`NCEP`. RAP covers North America and is comprised primarily of a numerical forecast model and an analysis/assimilation system to initialize that model. RAP is complemented by the higher-resolution 3km High-Resolution Rapid Refresh (:term:`HRRR`) model. From 3ddc95c72bdce4acfd6c8dc6842a0f4203adbb4e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 7 Nov 2024 08:41:05 -0700 Subject: [PATCH 165/208] Update documentation on how to run the verification capabilities in the SRW. --- .../BuildingRunningTesting/RunSRW.rst | 371 +++++++++++++----- .../CustomizingTheWorkflow/ConfigWorkflow.rst | 3 + 2 files changed, 278 insertions(+), 96 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 72cf85f494..c27332ae38 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -617,15 +617,21 @@ The output files (in ``.png`` format) will be located in the ``postprd`` directo * To configure an experiment to run METplus verification tasks, see the :ref:`next section `. * Otherwise, skip to :numref:`Section %s ` to generate the workflow. + .. _VXConfig: Configure METplus Verification Suite (Optional) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Users who want to use the METplus verification suite to evaluate their forecasts need to add additional information to their machine file (``ush/machine/.yaml``) or their ``config.yaml`` file. Other users may skip to the next step (:numref:`Section %s: Generate the SRW App Workflow `). +Users who want to use the METplus verification (VX) suite to evaluate their forecasts or to evaluate +a staged forecast (e.g. from another forecasting system) need to add additional information to their +machine file (``ush/machine/.yaml``) or their ``config.yaml`` file. Other users may skip +to the next step (:numref:`Section %s: Generate the SRW App Workflow `). + +To use METplus verification, MET and METplus modules need to be installed on the system. .. note:: - If METplus users update their METplus installation, they must update the module load statements in ``ufs-srweather-app/modulefiles/tasks//run_vx.local`` to correspond to their system's updated installation: + If users update their METplus installation, they must also update the module load statements in ``ufs-srweather-app/modulefiles/tasks//run_vx.local`` to correspond to their system's updated installation: .. code-block:: console @@ -633,81 +639,228 @@ Users who want to use the METplus verification suite to evaluate their forecasts module load met/ module load metplus/ -To use METplus verification, MET and METplus modules need to be installed. To turn on verification tasks in the workflow, include the ``parm/wflow/verify_*.yaml`` file(s) in the ``rocoto: tasks: taskgroups:`` section of ``config.yaml``. For example: + +Background +`````````````` +Whether generated by the SRW App or another forecasting system, a forecasting experiment consists +of one or more forecast periods known as cycles. If there is one forecast per cycle, the experiment +is referred to briefly as a deterministic forecast, and if there are multiple, it is referred to as +an ensemble forecast. Verification of a deterministic forecast is known (unsurprisingly) as +deterministic VX, while verification of an ensemble forecast as a whole is known as ensemble VX. +It is also possible to consider each member of an ensemble separately and verify each such member +deterministically. + +The SRW App allows users to include in the Rocoto XML that defines the workflow various tasks that +perform deterministic and/or ensemble VX. The forecast files to be verified may be generated as part +of the SRW experiment that is performing the verification, or they may be pre-generated files that +are staged somewhere on disk. In the latter case, the forecast files may have been generated from a +previous SRW experiment, or they may have been generated from another forecasting system altogether +(see :numref:`Section %s ` for the procedure to stage forecast files). In the SRW +App, the flag ``DO_ENSEMBLE`` in the ``global:`` section of ``config.yaml`` specifies whether the +(generated or staged) forecast files to be verified constitute a deterministic or an ensemble forecast. +Setting ``DO_ENSEMBLE: False`` (the default) causes the SRW App workflow to assume that the forecast +is deterministic, while setting ``DO_ENSEMBLE: True`` causes it to assume that the forecast is ensemble. +In the latter case, the number of ensemble members must be specified via the variable ``NUM_ENS_MEMBERS``, +also found in the ``global:`` section of ``config.yaml``. + +Both deterministic and ensemble VX require observation and forecast files as well as possible preprocessing +of those files. Thus, whenever deterministic or ensemble VX tasks are included in the workflow, preprocessing +(meta)tasks must also be included that check for the presence of the required obs and forecast files on disk, +retrieve obs files if necessary from a data store such as NOAA HPSS (see note below regarding forecast files), +and preprocess both types of files as needed. We refer to these collectively as the VX preprocessing tasks. + +.. note:: + Currently, the SRW App workflow does not support the ability to retrieve forecast files from data stores; + these must either be generated by the forecast model in the SRW App or be manually staged by the user. + See :numref:`Section %s ` for details. + + +Adding VX Tasks to the Workflow +`````````````````````````````````` +To add verification tasks to the workflow, users must include the VX taskgroup files ``verify_pre.yaml``, +``verify_det.yaml``, and/or ``verify_ens.yaml`` (all located in the ``parm/wflow`` directory) in the ``rocoto: +tasks: taskgroups:`` section of ``config.yaml``. :numref:`Table %s ` specifies the set of workflow +VX (meta)tasks that each ``verify_*.yaml`` file defines. As implied above, users must add ``verify_pre.yaml`` +to ``rocoto: tasks: taskgroups:`` anytime they want to run deterministic and/or ensemble VX because this +contains VX preprocessing tasks that are required by both VX types. Then users can add ``verify_det.yaml`` +to run deterministic VX on either a deterministic forecast or on each member of an ensemble forecast, they +can add ``verify_ens.yaml`` to run ensemble VX on an ensemble forecast, or they can add both if they want to +run both deterministic and ensemble VX on an ensemble forecast (where the deterministic VX is performed on +each member of the ensemble). + +.. _VX-yamls: + +.. list-table:: Verification YAML Task Groupings + :widths: 20 50 + :header-rows: 1 + + * - Taskgroup File + - Taskgroup Description + * - ``verify_pre.yaml`` + - Defines (meta)tasks that run the VX preprocessing tasks that are prerequisites for both deterministic + and ensemble VX. + * - ``verify_det.yaml`` + - Defines (meta)tasks that perform deterministic VX on a single forecast or on each member of an ensemble + forecast (the latter requires ``DO_ENSEMBLE`` and ``NUM_ENS_MEMBERS`` in ``config.yaml`` to be set to + ``True`` and the number of ensemble members, respectively). + * - ``verify_ens.yaml`` + - Defines (meta)tasks that perform ensemble VX on an ensemble of forecasts as a whole (requires ``DO_ENSEMBLE`` + and ``NUM_ENS_MEMBERS`` in ``config.yaml`` to be set to ``True`` and the number of ensemble members, + respectively). + +For example, to enable deterministic VX, ``rocoto: tasks: taskgroups:`` may be set as follows: .. code-block:: console rocoto: tasks: - taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml", "parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml", + "parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +This setting can apply to either a deterministic or an ensemble forecast. In the latter case, it will +cause deterministic VX will be performed on each member of the ensemble (but not on the ensemble as a +whole). Note that with this setting, the weather model will be run as part of the workflow to generate +forecast output because ``prep.yaml``, ``coldstart.yaml``, and ``post.yaml`` are also included in +``rocoto: tasks: taskgroups:``. Whether these forecasts are deterministic or ensemble depends on +whether ``DO_ENSEMBLE`` in ``config.yaml`` is set to ``False`` or ``True``, respectively (and, if +``True``, ``NUM_ENS_MEMBERS`` must be set to the number of ensemble members). Similarly, to enable +ensemble VX for an ensemble forecast as well as deterministic VX for each member of that ensemble, +``rocoto: tasks: taskgroups:`` may be set as follows: -:numref:`Table %s ` indicates which workflow (meta)tasks each ``verify_*.yaml`` file enables. -Users must include ``verify_pre.yaml`` anytime they want to run verification (VX) because this contains -preprocessing tasks that are necessary for both deterministic and ensemble VX, including retrieval of -obs files from various data stores (e.g. NOAA HPSS) if those files do not already exist on disk (the -files must exist at the locations specified by the variables ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1]`` -in the ``verification:`` section of ``config.yaml``; see discussion below for details). -Then users can add ``verify_det.yaml`` for deterministic VX, ``verify_ens.yaml`` for ensemble VX, -or both if they want to run ensemble VX on an ensemble forecast but also run deterministic VX on -each ensemble member. +.. code-block:: console -Note that ensemble VX requires the user to either run an ensemble forecast with the SRW App or to stage -ensemble forecast files (at the locations specified by the variables ``VX_FCST_INPUT_BASEDIR``, -``FCST_SUBDIR_TEMPLATE``, and ``FCST_FN_TEMPLATE`` in the ``verification:`` section of ``config.yaml``). -In either case, ``DO_ENSEMBLE`` in ``config.yaml`` must be set to ``True``. + rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml", + "parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml" "parm/wflow/verify_ens.yaml"]|include }}' + +If deterministic VX of each ensemble member is not desired, ``verify_det.yaml`` must be left out of the +above. Note that, as in the previous example, this setting of ``rocoto: tasks: taskgroups:`` will cause +the workflow to run the weather model to generate forecast output because ``prep.yaml``, ``coldstart.yaml``, +and ``post.yaml`` are again included, but in this case, ``DO_ENSEMBLE`` **must be** set to ``True`` (and +``NUM_ENS_MEMBERS`` set appropriately) in ``config.yaml`` because inclusion of ``verify_ens.yaml`` requires +that the forecast be an ensemble one. + +If users want to manually stage the forecast files instead of generating them with the SRW's native weather +model (see :numref:`Section %s ` for the procedure), they must exclude ``prep.yaml``, +``coldstart.yaml``, and ``post.yaml`` from the examples above. Also, regardless of whether the forecast +files are generated by the SRW App or staged manually by the user, if the forecast to be verified is an +ensemble one, in the ``global:`` section of ``config.yaml`` users must set ``DO_ENSEMBLE`` to ``True`` +and ``NUM_ENS_MEMBERS`` to the number of ensemble members. This tells the workflow to look for multiple +forecasts for each cycle instead of just one (as well as the number of such forecasts). +More information about configuring the ``rocoto:`` section can be found in :numref:`Section %s `. -.. _VX-yamls: -.. list-table:: Verification YAML Task Groupings - :widths: 20 50 +VX Taskgroup Organization and VX Field Groups +````````````````````````````````````````````````` +The VX (meta)tasks in the ``verify_*.yaml`` taskgroup files are described in detail in :numref:`Table %s +`. They are organized as follows. + +The (meta)tasks in ``verify_pre.yaml`` each +operate on a single obs type (except for ``metatask_check_post_output_all_mems``, which operates on the +forecast(s) and checks for the presence of all necessary forecast files), while the ones in ``verify_det.yaml`` +and ``verify_ens.yaml`` operate on one or more verification field groups. A verification field group +represents one or more meteorologial fields that are operated on (e.g. verified) together in a single +call to one of the METplus tools (such as GridStat, PointStat, GenEnsProd, and EnsembleStat), and each +field group has associated with it an obs type against which those forecast fields are verified. The +set of valid VX field groups, the obs types they are associated with, and a brief description of the +fields they include are given in :numref:`Table %s `. + +.. _VXFieldGroupDescsTable: + +.. list-table:: Valid Verification Field Groups and Descriptions + :widths: 20 20 60 :header-rows: 1 - * - File - - Description - * - verify_pre.yaml - - Enables (meta)tasks that are prerequisites for both deterministic and ensemble verification (vx) - * - verify_det.yaml - - Enables (meta)tasks that perform deterministic VX on a single forecast or on each member of an ensemble forecast - * - verify_ens.yaml - - Enables (meta)tasks that perform ensemble VX on an ensemble of forecasts as a whole (requires ``DO_ENSEMBLE`` - to be set to ``True`` in ``config.yaml``) + * - VX Field Group + - Associated Obs Type + - Fields Included in Group + * - APCP + - CCPA + - Accumulated precipitation for the accumulation intervals specified in ``VX_APCP_ACCUMS_HRS`` + * - ASNOW + - NOHRSC + - Accumulated snowfall for the accumulation intervals specified in ``VX_APCP_ACCUMS_HRS`` + * - REFC + - MRMS + - Composite reflectivity + * - RETOP + - MRMS + - Echo top + * - SFC + - NDAS + - Various surface and near-surface fields (e.g. at the surface, 2 m, 10 m, etc) + * - UPA + - NDAS + - Various upper-air fields (e.g. at 800 mb, 500 mb, etc) + +The list ``VX_FIELD_GROUPS`` in the ``verification:`` section of ``config.yaml`` specifies the VX field +groups for which to run verification. Thus, inclusion of a ``verify_*.yaml`` taskgroup file under the +``rocoto: tasks: taskgroups:`` section of ``config.yaml`` does not mean that all the (meta)tasks in that +file will necessarily be included in the workflow. This is because, in order to avoid unwanted computation, +only those (meta)tasks in ``verify_det.yaml`` and/or ``verify_ens.yaml`` that operate on field groups +included in ``VX_FIELD_GROUPS`` will appear in the Rocoto XML, and only those (meta)tasks in ``verify_pre.yaml`` +that operate on obs types associated with one of the field groups in ``VX_FIELD_GROUPS`` will appear in +the Rocoto XML. Thus, for example, setting -The ``verify_*.yaml`` files include by default the definitions of several common verification tasks and metatasks. -These default verification (meta)tasks are described in :numref:`Table %s `. The tasks in the -``verify_*.yaml`` files are independent of each other, so users may want to turn some off depending on the needs of -their experiment. To turn off a task, simply include its entry from ``verify_*.yaml`` as an empty YAML entry in -``config.yaml``. For example, to turn off PointStat tasks: +.. code-block:: console + + VX_FIELD_GROUPS: [ 'APCP', 'REFC', 'RETOP', 'SFC', 'UPA' ] + +in ``config.yaml`` and including all three taskgroups ``verify_*.yaml`` in ``rocoto: tasks: taskgroups:`` +will add to the Rocoto XML the VX (meta)tasks for all valid field groups except those for accumulated +snowfall (``'ASNOW'``) and its associated obs type (:term:`NOHRSC`). In other words, all the (meta)tasks +in :numref:`Table %s `. will be included in the Rocoto XML except for those +associated with the :term:`NOHRSC` obs type and the ``'ASNOW'`` field group. Users might want to set +``VX_FIELD_GROUPS`` in this way for example because the forecast experiment they are verifying is for a +summer period for which ``ASNOW`` is not relevant. + + +Staging Observation Files +`````````````````````````````````` +The taskgroup in ``verify_pre.yaml`` defines a set of workflow tasks named ``get_obs_*`` --- where the ``*`` +represents any one of the supported obs types :term:`CCPA`, :term:`NOHRSC`, :term:`MRMS`, and :term:`NDAS` --- +that will first check for the existence of the obs files required for VX at the locations on disk specified +by the variables ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]`` in the ``verification:`` section of +``config.yaml``. The ``*_OBS_DIR`` are the base directories in which the obs files are or should be +staged, and the ``OBS_*_FN_TEMPLATES[1,3,...]`` are the file name templates (with METplus time strings +used for templating; see example below). The ``[1,3,...]`` in ``OBS_*_FN_TEMPLATES[1,3,...]`` refer +to the second, fourth, etc elements of ``OBS_*_FN_TEMPLATES`` and correspond to the various sets of files +that the obs type contains. (The first, third, etc elements, i.e. ``OBS_*_FN_TEMPLATES[0,2,...]``, +indicate the VX field groups for which the respective sets of obs files are used to verify and should +not be changed.) Note that ``OBS_*_FN_TEMPLATES[1,3,...]`` may include leading subdirectories and are +relative to the obs type's ``*_OBS_DIR``. Thus, the templates for the full paths to the obs files are +given by .. code-block:: console - rocoto: - tasks: - taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml", "parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' - metatask_vx_ens_member: - metatask_PointStat_mem#mem#: + {*_OBS_DIR}/{OBS_*_FN_TEMPLATES[1,3,...]} +If the obs files exist at the locations specified by these variables, then the ``get_obs_*`` tasks will +succeed and the workflow will move on to subsequent tasks. If one or more obs files do not exist, the +``get_obs_*`` tasks will attempt to retrieve the required files from a data store such as NOAA HPSS and +place them in the locations specified by ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]``. Assuming +that attempt is successful, the workflow will move on to subsequent tasks. Thus: + + * Users who have the obs files already available (staged) on their system only need to set ``*_OBS_DIR`` + and ``OBS_*_FN_TEMPLATES[1,3,...]`` in ``config.yaml`` to match those staging locations and file names. + + * Users who do not have the obs files available on their systems and do not have access to NOAA HPSS + need to download :term:`CCPA`, :term:`NOHRSC`, :term:`MRMS`, and/or :term:`NDAS` files manually + from collections of publicly available data such as the ones listed `here `__. + Then, as above, they must set ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]`` to match those + staging locations and file names. + + * Users who have access to a data store that hosts the necessary files (e.g. NOAA HPSS) do not need to + manually stage the obs data because the ``get_obs_*`` tasks will retrieve the necessary obs and place + them in the locations specified by ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]``. The default + values of these variables are such that the files are placed under the experiment directory, but + users may change these if they want the retrieved files to be placed elsewhere. -More information about configuring the ``rocoto:`` section can be found in :numref:`Section %s `. -If users have access to NOAA :term:`HPSS` but have not pre-staged the obs data, the taskgroup in ``verify_pre.yaml`` -will by default activate a set of ``get_obs_...`` workflow tasks that will attempt to retrieve the required -files from a data store such as NOAA HPSS. In this case, the variables ``*_OBS_DIR`` in ``config.yaml`` must -be set to the base directories under which users want the files to reside, and the variables ``OBS_*_FN_TEMPLATES[1]`` -must be set to METplus file name templates (possibly including leading subdirectories relative to ``*_OBS_DIR``) -that will be used to name the obs files. (Here, the ``*`` represents any one of the obs types :term:`CCPA`, -:term:`NOHRSC`, :term:`MRMS`, and :term:`NDAS`, and the ``[1]`` in ``OBS_*_FN_TEMPLATES[1]`` refers to the second -element of ``OBS_*_FN_TEMPLATES``; the first element should not be changed). - -Users who do not have access to NOAA HPSS and do not have the data on their system will need to download -:term:`CCPA`, :term:`NOHRSC`, :term:`MRMS`, and/or :term:`NDAS` data manually from collections of publicly -available data, such as the ones listed `here `__. - -Users who have already staged the observation data needed for verification on their system (i.e., the -:term:`CCPA`, :term:`NOHRSC`, :term:`MRMS`, and/or :term:`NDAS` data) should set -``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1]`` in ``config.yaml`` to match those staging locations and -file names. For example, for a case in which all four types of obs are needed for VX, these variables -might be set as follows: +As an example, consider a case in which all four types of obs are needed for verification. Then ``*_OBS_DIR`` +and ``OBS_*_FN_TEMPLATES`` might be set as follows: .. code-block:: console @@ -724,57 +877,83 @@ might be set as follows: 'RETOP', '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' ] OBS_NDAS_FN_TEMPLATES: [ 'SFC_UPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] -If one of the days encompassed by the experiment is 20240429, and if one of the hours during -that day at which VX will be performed is 03, then, taking the CCPA obs type as an example, -one of the ``get_obs_ccpa_...`` tasks in the workflow will look for a CCPA file on disk -corresponding to this day and hour at +Now further consider the CCPA obs type. If one of the days encompassed by the forecast(s) is 20240429, +then the ``get_obs_ccpa`` task associated with this day will check for the existence of the set of obs +files given by -``/path/to/UFS_SRW_data/develop/obs_data/ccpa/20240429/ccpa.t03z.01h.hrap.conus.gb2`` +``/path/to/UFS_SRW_data/develop/obs_data/ccpa/20240429/ccpa.t{HH}z.01h.hrap.conus.gb2`` -As described above, if this file does not exist, the ``get_obs`` task will try to retrieve it -from a data store and place it at this location. +where ``{HH}`` takes on all hours of this day at which the verification requires CCPA obs. For example, +if performing (deterministic or ensemble) VX on 1-hour APCP for a 3-hour forecast that starts at 06z, +``{HH}`` will take on the values 07, 08, and 09. Then the files that ``get_obs_ccpa`` will look for +are: + +.. code-block:: console -After adding the VX tasks to the ``rocoto:`` section and the data paths to the ``verification:`` -section, users can proceed to generate the experiment, which will perform VX tasks in addition -to the default workflow tasks. + /path/to/UFS_SRW_data/develop/obs_data/ccpa/20240429/ccpa.t07z.01h.hrap.conus.gb2 + /path/to/UFS_SRW_data/develop/obs_data/ccpa/20240429/ccpa.t08z.01h.hrap.conus.gb2 + /path/to/UFS_SRW_data/develop/obs_data/ccpa/20240429/ccpa.t09z.01h.hrap.conus.gb2 +If all these exist, ``get_obs_ccpa`` will simply confirm their existence and will not need to retrieve +any files. If not, it will try to retrieve the files from a data store such as NOAA HPSS and place them +at the above locations. -Note that inclusion of the ``verify_*.yaml`` files under the ``rocoto: tasks: taskgroups:`` section of -``config.yaml`` does not mean all the (eta)tasks in those files will necessarily be included in the workflow. -This is because the VX tasks are grouped into field groups, and only those (meta)tasks in ``verify_*.yaml`` -associated with field groups that are included in the list ``VX_FIELD_GROUPS`` in ``config.yaml`` -are included in the worklow. -Each field group represents one or more meteorologial fields that can be verified. The valid field -groups and their descriptions are given in :numref:`Table %s `. -Thus, setting + +.. _VXStageFcstFiles: + +Staging Forecast Files +`````````````````````````````````` +As noted above, the SRW App currently does not support the ability to retrieve forecast files from +data stores. Thus, the forecast files must either be generated by the forecast model in the SRW App, +or they must be manually staged by the user. Note that manually staged forecast files do not have +to be ones generated by the SRW App; they can be outputs from another forecasting system. + +The locations of the forecast files are defined by the variables ``VX_FCST_INPUT_BASEDIR``, +``FCST_SUBDIR_TEMPLATE``, and ``FCST_FN_TEMPLATE`` in the ``verification:`` section of ``config.yaml``. +``VX_FCST_INPUT_BASEDIR`` is the base directory in which the files are located, ``FCST_SUBDIR_TEMPLATE`` +is a template specifying a set of subdirectories under ``VX_FCST_INPUT_BASEDIR``, and ``FCST_FN_TEMPLATE`` +is the file name template. As with the obs, the templating in ``FCST_SUBDIR_TEMPLATE`` and +``FCST_FN_TEMPLATE`` uses METplus time strings. Thus, the full template to the forecast files +is given by .. code-block:: console - VX_FIELD_GROUPS: [ 'APCP', 'REFC', 'RETOP', 'SFC', 'UPA' ] + {VX_FCST_INPUT_BASEDIR}/{FCST_SUBDIR_TEMPLATE}/{FCST_FN_TEMPLATE} -will run the VX (meta)tasks for all field groups except accumulated snowfall. +If the forecast files are manually staged, then these three variables must be set such that they +together point to the locations of the staged files. If they are generated by the SRW App, then +the user does not need to set these variables; they will by default be set to point to the forecast +files. -.. _VXFieldGroupDescsTable: +Summary +`````````````` +In summary, users must take the following steps to enable VX tasks in the SRW App workflow: -.. list-table:: Valid Verification Field Groups and Descriptions - :widths: 20 50 - :header-rows: 1 + #. Add the necessary VX taskgroup files ``verify_*.yaml`` to the ``rocoto: tasks: taskgroups:`` + section of ``config.yaml``. ``verify_pre.yaml`` must always be added; ``verify_det.yaml`` + must be added to enable deterministic VX (either of a deterministic forecast or of each + member of an ensemble forecast); and ``verify_ens.yaml`` must be added to enable ensemble + VX (of an ensemble forecast as a whole). - * - Field Group - - Description - * - APCP - - Accumulated precipitation for the accumulation intervals specified in ``VX_APCP_ACCUMS_HRS`` - * - ASNOW - - Accumulated snowfall for the accumulation intervals specified in ``VX_APCP_ACCUMS_HRS`` - * - REFC - - Composite reflectivity - * - RETOP - - Echo top - * - SFC - - Surface fields - * - UPA - - Upper-air fields + #. If performing ensemble verification and/or deterministic verification of ensemble members + (i.e. if the forecast to be verified is an ensemble), in the ``global:`` section of ``config.yaml`` + set ``DO_ENSEMBLE`` to ``True`` and ``NUM_ENS_MEMBERS`` to the number of ensemble members. + + #. If manually staging the obs files (e.g. because users don't have access to NOAA HPSS), set + the variables ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]`` in the ``verification:`` section + of ``config.yaml`` to the locations of these files on disk (where the ``*`` in these variable + names can be any of the supported obs types). + + #. If manually staging the forecast files (as opposed to generating them by running the weather + model in the SRW App), set the forecast file paths to the locations of these files on disk + using the variables ``VX_FCST_INPUT_BASEDIR``, ``FCST_SUBDIR_TEMPLATE``, and ``FCST_FN_TEMPLATE`` + in the ``verification:`` section of ``config.yaml``. + + #. Specify the field groups to verify in the list ``VX_FIELD_GROUPS`` in the ``verification:`` + section of ``config.yaml``. Valid values for field groups are given in :numref:`Table %s `. + +After completing these steps, users can proceed to generate the experiment (see :numref:`Section %s `) .. _GenerateWorkflow: diff --git a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst index 81d72d5144..04a45e1b02 100644 --- a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -1659,12 +1659,15 @@ VX Parameters for Observations ------------------------------------- .. note:: + The observation types that the SRW App can currently retrieve (if necessary) and use in verification are: + * CCPA (Climatology-Calibrated Precipitation Analysis) * NOHRSC (National Operational Hydrologic Remote Sensing Center) * MRMS (Multi-Radar Multi-Sensor) * NDAS (NAM Data Assimilation System) + The script ``ush/get_obs.py`` contains further details on the files and directory structure of each obs type. From 9a61ca3a0e00e067145ac59fefca44d946b5deac Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 8 Nov 2024 11:04:02 -0700 Subject: [PATCH 166/208] Change locations of staged forecast and obs files to cleaned-up directories. --- ...ig.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml | 4 ++-- ...nfig.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml | 4 ++-- ...g.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml | 4 ++-- .../config.vx-det_long-fcst_custom-vx-config_gfs.yaml | 4 ++-- .../config.vx-det_long-fcst_winter-wx_SRW-staged.yaml | 2 +- .../config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml | 2 +- .../config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml | 2 +- .../config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml | 2 +- ...config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml | 2 +- .../config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml | 2 +- .../config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml | 2 +- .../config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml | 2 +- 12 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml index 6e6caff5eb..8c5d16bb99 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml @@ -42,7 +42,7 @@ verification: CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "/scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_ai_tests" + NDAS_OBS_DIR: "/scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_SRW_WE2E_tests.final" # Do not remove raw obs files to be able to verify that only the necessary # raw files are fetched from HPSS (if it's necessary to fetch obs files). REMOVE_RAW_OBS_CCPA: false @@ -54,7 +54,7 @@ verification: # VX_FCST_MODEL_NAME: 'fcnv2' VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_ai_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final' FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}' FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}.grb2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml index 95b63a3d0c..fe0d7f57cc 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml @@ -42,7 +42,7 @@ verification: CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "/scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_ai_tests" + NDAS_OBS_DIR: "/scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_SRW_WE2E_tests.final" # Do not remove raw obs files to be able to verify that only the necessary # raw files are fetched from HPSS (if it's necessary to fetch obs files). REMOVE_RAW_OBS_CCPA: false @@ -54,7 +54,7 @@ verification: # VX_FCST_MODEL_NAME: 'gc' VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_ai_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final' FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}' FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml index 796042fd81..940744ef6e 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml @@ -42,7 +42,7 @@ verification: CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "/scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_ai_tests" + NDAS_OBS_DIR: "/scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_SRW_WE2E_tests.final" # Do not remove raw obs files to be able to verify that only the necessary # raw files are fetched from HPSS (if it's necessary to fetch obs files). REMOVE_RAW_OBS_CCPA: false @@ -54,7 +54,7 @@ verification: # VX_FCST_MODEL_NAME: 'pw' VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_ai_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final' FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}' FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}.grb2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml index d755752d5f..55be708544 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml @@ -45,7 +45,7 @@ verification: CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "/scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_ai_tests" + NDAS_OBS_DIR: "/scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_SRW_WE2E_tests.final" # Do not remove raw obs files to be able to verify that only the necessary # raw files are fetched from HPSS (if it's necessary to fetch obs files). REMOVE_RAW_OBS_CCPA: false @@ -57,7 +57,7 @@ verification: # VX_FCST_MODEL_NAME: 'gfs' VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_gfs.yaml' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_ai_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final' FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}/atmos' FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.pgrb2.0p25.f{lead?fmt=%HHH}' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.pgrb2.0p25.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml index 87b9f44631..c57c95cefe 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml @@ -59,4 +59,4 @@ verification: # VX_FCST_MODEL_NAME: 'Michigan_Ontario_snow_8km' VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "SFC", "UPA" ] - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/SRW' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final/SRW' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml index c7caeec015..7edbc2471c 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml @@ -56,6 +56,6 @@ verification: REMOVE_RAW_OBS_NOHRSC: false # VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml index 834c83d4f4..e892c124e8 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml @@ -56,6 +56,6 @@ verification: REMOVE_RAW_OBS_NOHRSC: false # VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml index ba711145b5..ccd35e1068 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml @@ -56,6 +56,6 @@ verification: REMOVE_RAW_OBS_NOHRSC: false # VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml index b82dfb5e42..28076456df 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml @@ -58,6 +58,6 @@ verification: REMOVE_RAW_OBS_NOHRSC: false # VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml index 5cd1b35ac2..8d97a7f337 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml @@ -57,6 +57,6 @@ verification: REMOVE_RAW_OBS_NOHRSC: false # VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml index 5972bfa002..6e47df886c 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml @@ -58,6 +58,6 @@ verification: REMOVE_RAW_OBS_NOHRSC: false # VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml index f3e18104d2..f8fd100b85 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml @@ -58,6 +58,6 @@ verification: REMOVE_RAW_OBS_NOHRSC: false # VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' From 94bf72e6cfadbbad73f3383e4c0a8c1b23ec7aa1 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 8 Nov 2024 13:51:41 -0700 Subject: [PATCH 167/208] Change locations of staged forecast and obs files in the new vx WE2E test to use the "TEST_" variables from the machine files. This makes it easier to get the tests to run from EPIC directories. --- ...ig.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml | 4 ++-- ...nfig.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml | 4 ++-- ...g.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml | 4 ++-- .../config.vx-det_long-fcst_custom-vx-config_gfs.yaml | 4 ++-- .../config.vx-det_long-fcst_winter-wx_SRW-staged.yaml | 2 +- .../config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml | 2 +- .../config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml | 2 +- .../config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml | 2 +- ...config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml | 2 +- .../config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml | 2 +- .../config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml | 2 +- .../config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml | 2 +- 12 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml index 8c5d16bb99..f91c3a84be 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml @@ -42,7 +42,7 @@ verification: CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "/scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_SRW_WE2E_tests.final" + NDAS_OBS_DIR: "{{ platform.TEST_GDAS_OBS_DIR }}" # Do not remove raw obs files to be able to verify that only the necessary # raw files are fetched from HPSS (if it's necessary to fetch obs files). REMOVE_RAW_OBS_CCPA: false @@ -54,7 +54,7 @@ verification: # VX_FCST_MODEL_NAME: 'fcnv2' VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final' + VX_FCST_INPUT_BASEDIR: '{{- platform.TEST_EXTRN_MDL_SOURCE_BASEDIR }}' FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}' FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}.grb2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml index fe0d7f57cc..2a3dfad03d 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml @@ -42,7 +42,7 @@ verification: CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "/scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_SRW_WE2E_tests.final" + NDAS_OBS_DIR: "{{ platform.TEST_GDAS_OBS_DIR }}" # Do not remove raw obs files to be able to verify that only the necessary # raw files are fetched from HPSS (if it's necessary to fetch obs files). REMOVE_RAW_OBS_CCPA: false @@ -54,7 +54,7 @@ verification: # VX_FCST_MODEL_NAME: 'gc' VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final' + VX_FCST_INPUT_BASEDIR: '{{- platform.TEST_EXTRN_MDL_SOURCE_BASEDIR }}' FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}' FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml index 940744ef6e..d6b2e22020 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml @@ -42,7 +42,7 @@ verification: CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "/scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_SRW_WE2E_tests.final" + NDAS_OBS_DIR: "{{ platform.TEST_GDAS_OBS_DIR }}" # Do not remove raw obs files to be able to verify that only the necessary # raw files are fetched from HPSS (if it's necessary to fetch obs files). REMOVE_RAW_OBS_CCPA: false @@ -54,7 +54,7 @@ verification: # VX_FCST_MODEL_NAME: 'pw' VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final' + VX_FCST_INPUT_BASEDIR: '{{- platform.TEST_EXTRN_MDL_SOURCE_BASEDIR }}' FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}' FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}.grb2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml index 55be708544..e6a8333c9a 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml @@ -45,7 +45,7 @@ verification: CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "/scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_SRW_WE2E_tests.final" + NDAS_OBS_DIR: "{{ platform.TEST_GDAS_OBS_DIR }}" # Do not remove raw obs files to be able to verify that only the necessary # raw files are fetched from HPSS (if it's necessary to fetch obs files). REMOVE_RAW_OBS_CCPA: false @@ -57,7 +57,7 @@ verification: # VX_FCST_MODEL_NAME: 'gfs' VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_gfs.yaml' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final' + VX_FCST_INPUT_BASEDIR: '{{- platform.TEST_EXTRN_MDL_SOURCE_BASEDIR }}' FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}/atmos' FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.pgrb2.0p25.f{lead?fmt=%HHH}' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.pgrb2.0p25.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml index c57c95cefe..2a9fe731a0 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml @@ -59,4 +59,4 @@ verification: # VX_FCST_MODEL_NAME: 'Michigan_Ontario_snow_8km' VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "SFC", "UPA" ] - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final/SRW' + VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_VX_FCST_INPUT_BASEDIR, "..", "custom_ESGgrid_Michigan_Ontario_8km"]) }}' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml index 7edbc2471c..0be883f1e8 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml @@ -56,6 +56,6 @@ verification: REMOVE_RAW_OBS_NOHRSC: false # VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final/{{- verification.VX_FCST_MODEL_NAME }}' + VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_EXTRN_MDL_SOURCE_BASEDIR, verification.VX_FCST_MODEL_NAME]) }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml index e892c124e8..80654ec42d 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml @@ -56,6 +56,6 @@ verification: REMOVE_RAW_OBS_NOHRSC: false # VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final/{{- verification.VX_FCST_MODEL_NAME }}' + VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_EXTRN_MDL_SOURCE_BASEDIR, verification.VX_FCST_MODEL_NAME]) }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml index ccd35e1068..18508af72e 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml @@ -56,6 +56,6 @@ verification: REMOVE_RAW_OBS_NOHRSC: false # VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final/{{- verification.VX_FCST_MODEL_NAME }}' + VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_EXTRN_MDL_SOURCE_BASEDIR, verification.VX_FCST_MODEL_NAME]) }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml index 28076456df..2745c580e3 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml @@ -58,6 +58,6 @@ verification: REMOVE_RAW_OBS_NOHRSC: false # VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final/{{- verification.VX_FCST_MODEL_NAME }}' + VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_EXTRN_MDL_SOURCE_BASEDIR, verification.VX_FCST_MODEL_NAME]) }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml index 8d97a7f337..fbd67884a5 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml @@ -57,6 +57,6 @@ verification: REMOVE_RAW_OBS_NOHRSC: false # VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final/{{- verification.VX_FCST_MODEL_NAME }}' + VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_EXTRN_MDL_SOURCE_BASEDIR, verification.VX_FCST_MODEL_NAME]) }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml index 6e47df886c..85f55c8fe4 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml @@ -58,6 +58,6 @@ verification: REMOVE_RAW_OBS_NOHRSC: false # VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final/{{- verification.VX_FCST_MODEL_NAME }}' + VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_EXTRN_MDL_SOURCE_BASEDIR, verification.VX_FCST_MODEL_NAME]) }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml index f8fd100b85..c65fb74ec4 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml @@ -58,6 +58,6 @@ verification: REMOVE_RAW_OBS_NOHRSC: false # VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final/{{- verification.VX_FCST_MODEL_NAME }}' + VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_EXTRN_MDL_SOURCE_BASEDIR, verification.VX_FCST_MODEL_NAME]) }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' From 6c4d679b12698b29b3a02eb610a335898c186175 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 8 Nov 2024 14:17:24 -0700 Subject: [PATCH 168/208] Set the locations of the staging directories for WE2E testing to personal directories; add new testing directory for GDAS. --- ush/machine/hera.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ush/machine/hera.yaml b/ush/machine/hera.yaml index 189689f30d..b334f827d1 100644 --- a/ush/machine/hera.yaml +++ b/ush/machine/hera.yaml @@ -3,9 +3,10 @@ platform: NCORES_PER_NODE: 40 SCHED: slurm TEST_CCPA_OBS_DIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/obs_data/ccpa/proc + TEST_NOHRSC_OBS_DIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/obs_data/nohrsc/proc TEST_MRMS_OBS_DIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/obs_data/mrms/proc TEST_NDAS_OBS_DIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/obs_data/ndas/proc - TEST_NOHRSC_OBS_DIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/obs_data/nohrsc/proc + TEST_GDAS_OBS_DIR: /scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_SRW_WE2E_tests.final DOMAIN_PREGEN_BASEDIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/FV3LAM_pregen PARTITION_DEFAULT: hera QUEUE_DEFAULT: batch @@ -23,12 +24,12 @@ platform: SCHED_NATIVE_CMD: "--export=NONE" SCHED_NATIVE_CMD_HPSS: "-n 1 --export=NONE" PRE_TASK_CMDS: '{ ulimit -s unlimited; ulimit -a; }' - TEST_EXTRN_MDL_SOURCE_BASEDIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/input_model_data + TEST_EXTRN_MDL_SOURCE_BASEDIR: /scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final TEST_AQM_INPUT_BASEDIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/aqm_data TEST_PREGEN_BASEDIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/FV3LAM_pregen TEST_ALT_EXTRN_MDL_SYSBASEDIR_ICS: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/dummy_FV3GFS_sys_dir TEST_ALT_EXTRN_MDL_SYSBASEDIR_LBCS: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/dummy_FV3GFS_sys_dir - TEST_VX_FCST_INPUT_BASEDIR: '{{ "/scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/output_data/fcst_" }}{{ "ens" if (global.NUM_ENS_MEMBERS > 0) else "det" }}{{ "/{{workflow.PREDEF_GRID_NAME}}" }}{% raw %}{% endraw %}' + TEST_VX_FCST_INPUT_BASEDIR: '{{ "/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final/fcst_" }}{{ "ens" if (global.NUM_ENS_MEMBERS > 0) else "det" }}{{ "/{{workflow.PREDEF_GRID_NAME}}" }}{% raw %}{% endraw %}' FIXaer: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/fix/fix_aer FIXgsm: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/fix/fix_am FIXlut: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/fix/fix_lut From 74738afb2334b517665971638293d1056e8e5cbf Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 12 Nov 2024 13:50:44 -0700 Subject: [PATCH 169/208] Change forecast and obs staging locations for WE2E tests to EPIC's directories since the files for the new tests in PR #1137 are staged now. --- ush/machine/hera.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ush/machine/hera.yaml b/ush/machine/hera.yaml index b334f827d1..2151375377 100644 --- a/ush/machine/hera.yaml +++ b/ush/machine/hera.yaml @@ -6,7 +6,7 @@ platform: TEST_NOHRSC_OBS_DIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/obs_data/nohrsc/proc TEST_MRMS_OBS_DIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/obs_data/mrms/proc TEST_NDAS_OBS_DIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/obs_data/ndas/proc - TEST_GDAS_OBS_DIR: /scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_SRW_WE2E_tests.final + TEST_GDAS_OBS_DIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/obs_data/gdas DOMAIN_PREGEN_BASEDIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/FV3LAM_pregen PARTITION_DEFAULT: hera QUEUE_DEFAULT: batch @@ -24,12 +24,13 @@ platform: SCHED_NATIVE_CMD: "--export=NONE" SCHED_NATIVE_CMD_HPSS: "-n 1 --export=NONE" PRE_TASK_CMDS: '{ ulimit -s unlimited; ulimit -a; }' - TEST_EXTRN_MDL_SOURCE_BASEDIR: /scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final + TEST_EXTRN_MDL_SOURCE_BASEDIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/input_model_data TEST_AQM_INPUT_BASEDIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/aqm_data TEST_PREGEN_BASEDIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/FV3LAM_pregen TEST_ALT_EXTRN_MDL_SYSBASEDIR_ICS: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/dummy_FV3GFS_sys_dir TEST_ALT_EXTRN_MDL_SYSBASEDIR_LBCS: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/dummy_FV3GFS_sys_dir - TEST_VX_FCST_INPUT_BASEDIR: '{{ "/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final/fcst_" }}{{ "ens" if (global.NUM_ENS_MEMBERS > 0) else "det" }}{{ "/{{workflow.PREDEF_GRID_NAME}}" }}{% raw %}{% endraw %}' + #TEST_VX_FCST_INPUT_BASEDIR: '{{ "/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final/fcst_" }}{{ "ens" if (global.NUM_ENS_MEMBERS > 0) else "det" }}{{ "/{{workflow.PREDEF_GRID_NAME}}" }}{% raw %}{% endraw %}' + TEST_VX_FCST_INPUT_BASEDIR: '{{ "/scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/output_data/fcst_" }}{{ "ens" if (global.NUM_ENS_MEMBERS > 0) else "det" }}{{ "/{{workflow.PREDEF_GRID_NAME}}" }}{% raw %}{% endraw %}' FIXaer: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/fix/fix_aer FIXgsm: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/fix/fix_am FIXlut: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/fix/fix_lut From 858b43d1ec7621ee459fd800437b54e8b5bf91ef Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 12 Nov 2024 16:04:12 -0700 Subject: [PATCH 170/208] Bug fixes to get the AI/ML and GFS tests to work with data staged at EPIC locations on Hera. --- ...nfig.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml | 2 +- ...config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml | 2 +- ...fig.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml | 2 +- .../config.vx-det_long-fcst_custom-vx-config_gfs.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml index f91c3a84be..f4d71ceeb8 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml @@ -50,7 +50,7 @@ verification: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false # - OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', '{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # VX_FCST_MODEL_NAME: 'fcnv2' VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml index 2a3dfad03d..caa917be41 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml @@ -50,7 +50,7 @@ verification: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false # - OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', '{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # VX_FCST_MODEL_NAME: 'gc' VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml index d6b2e22020..cf1fd79ad3 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml @@ -50,7 +50,7 @@ verification: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false # - OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', '{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # VX_FCST_MODEL_NAME: 'pw' VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml index e6a8333c9a..5ea940f055 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml @@ -53,7 +53,7 @@ verification: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false # - OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', '{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # VX_FCST_MODEL_NAME: 'gfs' VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_gfs.yaml' From 7b7f161541a8da40ba12cecf496e04b436ad76a4 Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Tue, 12 Nov 2024 17:07:29 -0700 Subject: [PATCH 171/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index c27332ae38..9ff082f37b 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -719,7 +719,7 @@ For example, to enable deterministic VX, ``rocoto: tasks: taskgroups:`` may be s This setting can apply to either a deterministic or an ensemble forecast. In the latter case, it will cause deterministic VX will be performed on each member of the ensemble (but not on the ensemble as a -whole). Note that with this setting, the weather model will be run as part of the workflow to generate +whole). Note that with this setting, the UFS Weather Model will be run as part of the workflow to generate forecast output because ``prep.yaml``, ``coldstart.yaml``, and ``post.yaml`` are also included in ``rocoto: tasks: taskgroups:``. Whether these forecasts are deterministic or ensemble depends on whether ``DO_ENSEMBLE`` in ``config.yaml`` is set to ``False`` or ``True``, respectively (and, if From c0726d6d12225a523dda4dfd5d08bc6aef49ad2f Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Tue, 12 Nov 2024 17:07:54 -0700 Subject: [PATCH 172/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 9ff082f37b..8045ea7fae 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -736,7 +736,7 @@ ensemble VX for an ensemble forecast as well as deterministic VX for each member If deterministic VX of each ensemble member is not desired, ``verify_det.yaml`` must be left out of the above. Note that, as in the previous example, this setting of ``rocoto: tasks: taskgroups:`` will cause -the workflow to run the weather model to generate forecast output because ``prep.yaml``, ``coldstart.yaml``, +the workflow to run the UFS Weather Model to generate forecast output because ``prep.yaml``, ``coldstart.yaml``, and ``post.yaml`` are again included, but in this case, ``DO_ENSEMBLE`` **must be** set to ``True`` (and ``NUM_ENS_MEMBERS`` set appropriately) in ``config.yaml`` because inclusion of ``verify_ens.yaml`` requires that the forecast be an ensemble one. From ca93bbad08e39bfbc761473207df4caad41b0b91 Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Tue, 12 Nov 2024 17:08:26 -0700 Subject: [PATCH 173/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 8045ea7fae..6c0d8b57ce 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -758,7 +758,7 @@ The VX (meta)tasks in the ``verify_*.yaml`` taskgroup files are described in det `. They are organized as follows. The (meta)tasks in ``verify_pre.yaml`` each -operate on a single obs type (except for ``metatask_check_post_output_all_mems``, which operates on the +operate on a single observation (obs) type (except for ``metatask_check_post_output_all_mems``, which operates on the forecast(s) and checks for the presence of all necessary forecast files), while the ones in ``verify_det.yaml`` and ``verify_ens.yaml`` operate on one or more verification field groups. A verification field group represents one or more meteorologial fields that are operated on (e.g. verified) together in a single From 708f96d535e86e4ca469704df2dd37a43a7fca80 Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Tue, 12 Nov 2024 17:10:33 -0700 Subject: [PATCH 174/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 6c0d8b57ce..29f134b36c 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -837,7 +837,7 @@ given by {*_OBS_DIR}/{OBS_*_FN_TEMPLATES[1,3,...]} -If the obs files exist at the locations specified by these variables, then the ``get_obs_*`` tasks will +If the obs files exist at the locations specified by ``{*_OBS_DIR}/{OBS_*_FN_TEMPLATES[1,3,...]}``, then the ``get_obs_*`` tasks will succeed and the workflow will move on to subsequent tasks. If one or more obs files do not exist, the ``get_obs_*`` tasks will attempt to retrieve the required files from a data store such as NOAA HPSS and place them in the locations specified by ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]``. Assuming From 3fa1fb887b4f4868f0e27d4206383b226dcf5068 Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Tue, 12 Nov 2024 17:11:42 -0700 Subject: [PATCH 175/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 29f134b36c..d6fc8c77ce 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -1119,11 +1119,11 @@ interval (for cumulative fields such as accumulated precipitation), and the name * - :bolditalic:`metatask_PcpCombine_ASNOW_all_accums_obs_NOHRSC` (``verify_pre.yaml``) - Set of tasks that generate NetCDF files containing observed ASNOW for the accumulation intervals specified in ``VX_ASNOW_ACCUMS_HRS``. Files for accumulation intervals larger than the one - provided in the obs are obtained by adding ASNOW values over multiple obs accumulation intervals, - e.g. if the obs contain 6-hour accumulations and 24-hr accumulation is specified in ``VX_ASNOW_ACCUMS_HRS``, + provided in the obs are obtained by adding ASNOW values over multiple obs accumulation intervals. + For example, if the obs contain 6-hour accumulations and 24-hr accumulation is specified in ``VX_ASNOW_ACCUMS_HRS``, then groups of 4 successive 6-hour ASNOW values in the obs are added to obtain the 24-hour values. - In rocoto, the tasks under this metatask are named ``run_MET_PcpCombine_ASNOW{accum_intvl}h_obs_NOHRSC``, - where ``{accum_intvl}`` is the accumulation interval in hours (e.g. ``06``, ``24``, etc) for which + In Rocoto, the tasks under this metatask are named ``run_MET_PcpCombine_ASNOW{accum_intvl}h_obs_NOHRSC``, + where ``{accum_intvl}`` is the accumulation interval in hours (e.g., ``06``, ``24``, etc.) for which the task is being run. This metatask is included in the workflow only if ``'ASNOW'`` is included in ``VX_FIELD_GROUPS``. From f6962f752d908d0f3e42888e8c8e8acf4e5005a3 Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Tue, 12 Nov 2024 17:12:07 -0700 Subject: [PATCH 176/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index d6fc8c77ce..d97eed3ef2 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -1111,8 +1111,8 @@ interval (for cumulative fields such as accumulated precipitation), and the name provided in the obs are obtained by adding APCP values over multiple obs accumulation intervals, e.g. if the obs contain 1-hour accumulations and 3-hr accumulation is specified in ``VX_APCP_ACCUMS_HRS``, then groups of 3 successive 1-hour APCP values in the obs are added to obtain the 3-hour values. - In rocoto, the tasks under this metatask are named ``run_MET_PcpCombine_APCP{accum_intvl}h_obs_CCPA``, - where ``{accum_intvl}`` is the accumulation interval in hours (e.g. ``01``, ``03``, ``06``, etc) + In Rocoto, the tasks under this metatask are named ``run_MET_PcpCombine_APCP{accum_intvl}h_obs_CCPA``, + where ``{accum_intvl}`` is the accumulation interval in hours (e.g., ``01``, ``03``, ``06``, etc.) for which the task is being run. This metatask is included in the workflow only if ``'APCP'`` is included in ``VX_FIELD_GROUPS``. From 678b23eb7e3fa2bff7d8122e6744cb67ef48c757 Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Tue, 12 Nov 2024 17:13:15 -0700 Subject: [PATCH 177/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index d97eed3ef2..972bc2a807 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -1108,8 +1108,8 @@ interval (for cumulative fields such as accumulated precipitation), and the name * - :bolditalic:`metatask_PcpCombine_APCP_all_accums_obs_CCPA` (``verify_pre.yaml``) - Set of tasks that generate NetCDF files containing observed APCP for the accumulation intervals specified in ``VX_APCP_ACCUMS_HRS``. Files for accumulation intervals larger than the one - provided in the obs are obtained by adding APCP values over multiple obs accumulation intervals, - e.g. if the obs contain 1-hour accumulations and 3-hr accumulation is specified in ``VX_APCP_ACCUMS_HRS``, + provided in the obs are obtained by adding APCP values over multiple obs accumulation intervals. + For example, if the obs contain 1-hour accumulations and 3-hr accumulation is specified in ``VX_APCP_ACCUMS_HRS``, then groups of 3 successive 1-hour APCP values in the obs are added to obtain the 3-hour values. In Rocoto, the tasks under this metatask are named ``run_MET_PcpCombine_APCP{accum_intvl}h_obs_CCPA``, where ``{accum_intvl}`` is the accumulation interval in hours (e.g., ``01``, ``03``, ``06``, etc.) From ed72caa4c7af1e709cb6dccae6b07f5030116198 Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Tue, 12 Nov 2024 17:13:38 -0700 Subject: [PATCH 178/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 972bc2a807..24f2041a02 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -1100,7 +1100,7 @@ interval (for cumulative fields such as accumulated precipitation), and the name - Checks for existence of staged :term:`NDAS` obs files at locations specified by ``NDAS_OBS_DIR`` and ``OBS_NDAS_FN_TEMPLATES``. If any files do not exist, it attempts to retrieve all the files from a data store (e.g. NOAA :term:`HPSS`) and place them in those locations. This task is included - in the workflow only if `'SFC'`` and/or ``'UPA'`` are included in ``VX_FIELD_GROUPS``. + in the workflow only if ``'SFC'`` and/or ``'UPA'`` are included in ``VX_FIELD_GROUPS``. * - :bolditalic:`task_run_MET_Pb2nc_obs_NDAS` (``verify_pre.yaml``) - Converts NDAS obs prepbufr files to NetCDF format. From ef6d7294e54d87a239bd4786f23558b8afc3b39c Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Tue, 12 Nov 2024 17:14:00 -0700 Subject: [PATCH 179/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 24f2041a02..37cd2bc14d 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -1066,7 +1066,7 @@ the taskgroup file that must be included in the user's ``config.yaml`` file unde in order for that (meta)task to be considered for inclusion in the workflow (see :numref:`Section %s ` for details). As described in :numref:`Section %s `, metatasks define a set of tasks in the workflow based on multiple values of one or more parameters such as the ensemble member index, the accumulation -interval (for cumulative fields such as accumulated precipitation), and the name of the verificaiton field group +interval (for cumulative fields such as accumulated precipitation), and the name of the verification field group (see description of ``VX_FIELD_GROUPS`` in :numref:`Section %s `). .. _VXWorkflowTasksTable: From 3f5d16cbeb6dd29ea33d08b5a3845511057d673a Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Tue, 12 Nov 2024 17:14:41 -0700 Subject: [PATCH 180/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 37cd2bc14d..c588f82c35 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -854,9 +854,9 @@ that attempt is successful, the workflow will move on to subsequent tasks. Thus * Users who have access to a data store that hosts the necessary files (e.g. NOAA HPSS) do not need to manually stage the obs data because the ``get_obs_*`` tasks will retrieve the necessary obs and place - them in the locations specified by ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]``. The default - values of these variables are such that the files are placed under the experiment directory, but - users may change these if they want the retrieved files to be placed elsewhere. + them in the locations specified by ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]``. By default, + the files will be placed under the experiment directory, but + users may change the values of these variables if they want the retrieved files to be placed elsewhere. As an example, consider a case in which all four types of obs are needed for verification. Then ``*_OBS_DIR`` From 4b84cbc45b525489d8ca280766d2fae891a39930 Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Tue, 12 Nov 2024 17:15:09 -0700 Subject: [PATCH 181/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index c588f82c35..723241d157 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -840,7 +840,7 @@ given by If the obs files exist at the locations specified by ``{*_OBS_DIR}/{OBS_*_FN_TEMPLATES[1,3,...]}``, then the ``get_obs_*`` tasks will succeed and the workflow will move on to subsequent tasks. If one or more obs files do not exist, the ``get_obs_*`` tasks will attempt to retrieve the required files from a data store such as NOAA HPSS and -place them in the locations specified by ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]``. Assuming +place them in the locations specified by ``{*_OBS_DIR}/{OBS_*_FN_TEMPLATES[1,3,...]}``. Assuming that attempt is successful, the workflow will move on to subsequent tasks. Thus: * Users who have the obs files already available (staged) on their system only need to set ``*_OBS_DIR`` From 91367a2e8346f48e8f70dec5f0a23d9998efa9e1 Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Tue, 12 Nov 2024 17:19:15 -0700 Subject: [PATCH 182/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 723241d157..4295b31c6a 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -838,7 +838,7 @@ given by {*_OBS_DIR}/{OBS_*_FN_TEMPLATES[1,3,...]} If the obs files exist at the locations specified by ``{*_OBS_DIR}/{OBS_*_FN_TEMPLATES[1,3,...]}``, then the ``get_obs_*`` tasks will -succeed and the workflow will move on to subsequent tasks. If one or more obs files do not exist, the +succeed, and the workflow will move on to subsequent tasks. If one or more obs files do not exist, the ``get_obs_*`` tasks will attempt to retrieve the required files from a data store such as NOAA HPSS and place them in the locations specified by ``{*_OBS_DIR}/{OBS_*_FN_TEMPLATES[1,3,...]}``. Assuming that attempt is successful, the workflow will move on to subsequent tasks. Thus: From f140a776dc34502a98740fa3ec59c33156f9b913 Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Tue, 12 Nov 2024 17:20:57 -0700 Subject: [PATCH 183/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 4295b31c6a..97fe959fc0 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -795,14 +795,12 @@ fields they include are given in :numref:`Table %s `. - NDAS - Various upper-air fields (e.g. at 800 mb, 500 mb, etc) -The list ``VX_FIELD_GROUPS`` in the ``verification:`` section of ``config.yaml`` specifies the VX field -groups for which to run verification. Thus, inclusion of a ``verify_*.yaml`` taskgroup file under the +The ``VX_FIELD_GROUPS`` list in the ``verification:`` section of ``config.yaml`` specifies the VX field +groups for which to run verification. In order to avoid unwanted computation, the Rocoto XML will include +only those (meta)tasks that operate on field groups or obs types associated with field groups in ``VX_FIELD_GROUPS``. +Thus, inclusion of a ``verify_*.yaml`` taskgroup file under the ``rocoto: tasks: taskgroups:`` section of ``config.yaml`` does not mean that all the (meta)tasks in that -file will necessarily be included in the workflow. This is because, in order to avoid unwanted computation, -only those (meta)tasks in ``verify_det.yaml`` and/or ``verify_ens.yaml`` that operate on field groups -included in ``VX_FIELD_GROUPS`` will appear in the Rocoto XML, and only those (meta)tasks in ``verify_pre.yaml`` -that operate on obs types associated with one of the field groups in ``VX_FIELD_GROUPS`` will appear in -the Rocoto XML. Thus, for example, setting +file will be included in the workflow. For example, setting: .. code-block:: console From 2504e0598c9b1ac4dc89a6aaa8c6c73ef6ac1f4f Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Wed, 13 Nov 2024 07:55:46 -0700 Subject: [PATCH 184/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 97fe959fc0..1f6f9bd9f6 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -1128,7 +1128,7 @@ interval (for cumulative fields such as accumulated precipitation), and the name * - :bolditalic:`metatask_check_post_output_all_mems` (``verify_pre.yaml``) - Set of tasks that ensure that the post-processed forecast files required for verification exist in the locations specified by ``VX_FCST_INPUT_BASEDIR``, ``FCST_SUBDIR_TEMPLATE``, and ``FCST_FN_TEMPLATE``. - In rocoto, the tasks under this metatask are named ``check_post_output_mem{mem_indx}``, where ``{mem_indx}`` + In Rocoto, the tasks under this metatask are named ``check_post_output_mem{mem_indx}``, where ``{mem_indx}`` is the index of the ensemble forecast member. This takes on the values ``001``, ``002``, ... for an ensemble of forecasts or just ``000`` for a single deterministic forecast. This metatask is included in the workflow if at least one other verification task or metatask is included. From 8de3a0545caec2fe6b0c4faf38e5f05f7b36db03 Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Wed, 13 Nov 2024 07:56:15 -0700 Subject: [PATCH 185/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 1f6f9bd9f6..f6384c67cc 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -1150,11 +1150,11 @@ interval (for cumulative fields such as accumulated precipitation), and the name - Set of tasks that generate NetCDF files containing forecast ASNOW for the accumulation intervals specified in ``VX_ASNOW_ACCUMS_HRS``. Files for accumulation intervals larger than the one provided in the forecasts are obtained by adding ASNOW values over multiple forecast accumulation - intervals, e.g. if the forecasts contain 1-hour accumulations and 6-hr accumulation is specified + intervals. For example, if the forecasts contain 1-hour accumulations, but 6-hr accumulation is specified in ``VX_ASNOW_ACCUMS_HRS``, then groups of 6 successive 1-hour ASNOW values in the forecasts are - added to obtain 6-hour values. In rocoto, the tasks under this metatask are named + added to obtain 6-hour values. In Rocoto, the tasks under this metatask are named ``run_MET_PcpCombine_ASNOW{accum_intvl}h_fcst_mem{mem_indx}``, where ``{accum_intvl}`` and - ``{mem_indx}`` are the accumulation interval (in hours, e.g. ``06``, ``24``, etc) and the ensemble + ``{mem_indx}`` are the accumulation interval (in hours, e.g., ``06``, ``24``, etc.) and the ensemble forecast member index (or just ``000`` for a single deterministic forecast) for which the task is being run. This metatask is included in the workflow only if ``'ASNOW'`` is included in ``VX_FIELD_GROUPS``. From 5ccf4547905e520c7817e90d72931eef311203b3 Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Wed, 13 Nov 2024 07:56:43 -0700 Subject: [PATCH 186/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index f6384c67cc..0514421ea9 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -1137,13 +1137,13 @@ interval (for cumulative fields such as accumulated precipitation), and the name - Set of tasks that generate NetCDF files containing forecast APCP for the accumulation intervals specified in ``VX_APCP_ACCUMS_HRS``. Files for accumulation intervals larger than the one provided in the forecasts are obtained by adding APCP values over multiple forecast accumulation - intervals, e.g. if the forecasts contain 1-hour accumulations and 3-hr accumulation is specified + intervals. For example, if the forecasts contain 1-hour accumulations, but 3-hr accumulation is specified in ``VX_APCP_ACCUMS_HRS``, then groups of 3 successive 1-hour APCP values in the forecasts are - added to obtain the 3-hour values. In rocoto, the tasks under this metatask are named + added to obtain the 3-hour values. In Rocoto, the tasks under this metatask are named ``run_MET_PcpCombine_APCP{accum_intvl}h_fcst_mem{mem_indx}``, where ``{accum_intvl}`` and - ``{mem_indx}`` are the accumulation interval (in hours, e.g. ``01``, ``03``, ``06``, etc) and + ``{mem_indx}`` are the accumulation interval (in hours, e.g., ``01``, ``03``, ``06``, etc.) and the ensemble forecast member index (or just ``000`` for a single deterministic forecast) for - which the task is being run. This metatask is included in the workflow only if ``'APCP'`` is + which the task is being run. This metatask is included in the workflow only if ``'APCP'`` is included in ``VX_FIELD_GROUPS``. * - :bolditalic:`metatask_PcpCombine_ASNOW_all_accums_all_mems` (``verify_pre.yaml``) From 75d2520faae60c57e8cfddff3b374d5cb571b76b Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Wed, 13 Nov 2024 09:31:59 -0700 Subject: [PATCH 187/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 0514421ea9..bd7bb6a012 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -1161,10 +1161,10 @@ interval (for cumulative fields such as accumulated precipitation), and the name * - :bolditalic:`metatask_GridStat_APCP_all_accums_all_mems` (``verify_det.yaml``) - Set of tasks that run grid-to-grid verification of accumulated precipitation (represented by the - verification field group ``APCP``) for the intervals specified in ``VX_APCP_ACCUMS_HRS``. In rocoto, + verification field group ``APCP``) for the intervals specified in ``VX_APCP_ACCUMS_HRS``. In Rocoto, the tasks under this metatask are named ``run_MET_GridStat_vx_APCP{accum_intvl}h_mem{mem_indx}``, - where ``{accum_intvl}`` and ``{mem_indx}`` are the accumulation interval (in hours, e.g. ``01``, - ``03``, ``06``, etc) and the ensemble forecast member index (or just ``000`` for a single deterministic + where ``{accum_intvl}`` and ``{mem_indx}`` are the accumulation interval in hours (e.g., ``01``, + ``03``, ``06``, etc.) and the ensemble forecast member index (or just ``000`` for a single deterministic forecast) for which the task is being run. This metatask is included in the workflow only if ``'APCP'`` is included in ``VX_FIELD_GROUPS``. From 1647133e306b9e65925af25fbb22bfc75bd452e2 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 13 Nov 2024 09:57:48 -0700 Subject: [PATCH 188/208] Change wallclock time for GenEnsProd task for SFC and UPA field groups from 02:30:00 to 04:15:00 to account for unexpected increase in time needed to complete GenEnsProd for UPA fields in the WE2E test "MET_ensemble_verification_only_vx_time_lag". --- parm/wflow/verify_ens.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 45cf5385be..d3601c14d8 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -157,7 +157,7 @@ metatask_GenEnsProd_EnsembleStat_SFC_UPA: ACCUM_HH: '01' FCST_LEVEL: 'all' FCST_THRESH: 'all' - walltime: 02:30:00 + walltime: 04:15:00 dependency: metataskdep_check_post_output: <<: *check_post_output From 62dad3a492d8b4530f4705a09dfcc35b458e0148 Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Wed, 13 Nov 2024 12:17:23 -0700 Subject: [PATCH 189/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index bd7bb6a012..3c5b6070b9 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -1170,9 +1170,9 @@ interval (for cumulative fields such as accumulated precipitation), and the name * - :bolditalic:`metatask_GridStat_ASNOW_all_accums_all_mems` (``verify_det.yaml``) - Set of tasks that run grid-to-grid verification of accumulated snowfall (represented by the verification - field group ``ASNOW``) for the intervals specified in ``VX_ASNOW_ACCUMS_HRS``. In rocoto, the tasks under + field group ``ASNOW``) for the intervals specified in ``VX_ASNOW_ACCUMS_HRS``. In Rocoto, the tasks under this metatask are named ``run_MET_GridStat_vx_ASNOW{accum_intvl}h_mem{mem_indx}``, where ``{accum_intvl}`` - and ``{mem_indx}`` are the accumulation interval (in hours, e.g. ``06``, ``24``, etc) and the ensemble + and ``{mem_indx}`` are the accumulation interval in hours (e.g., ``06``, ``24``, etc.) and the ensemble forecast member index (or just ``000`` for a single deterministic forecast) for which the task is being run. This metatask is included in the workflow only if ``'ASNOW'`` is included in ``VX_FIELD_GROUPS``. From 06c14052b456e5d06a8e64eda9a3391aa026beb5 Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Wed, 13 Nov 2024 12:17:35 -0700 Subject: [PATCH 190/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 3c5b6070b9..9ff596d852 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -1179,7 +1179,7 @@ interval (for cumulative fields such as accumulated precipitation), and the name * - :bolditalic:`metatask_GridStat_REFC_RETOP_all_mems` (``verify_det.yaml``) - Set of tasks that run grid-to-grid verification of :term:`composite reflectivity` (represented by the verification field group ``REFC``) and :term:`echo top` (represented by the verification field - group ``RETOP``). In rocoto, the tasks under this metatask are named ``run_MET_GridStat_vx_{field_group}_mem{mem_indx}``, + group ``RETOP``). In Rocoto, the tasks under this metatask are named ``run_MET_GridStat_vx_{field_group}_mem{mem_indx}``, where ``field_group`` and ``{mem_indx}`` are the field group (in this case either ``REFC`` or ``RETOP``) and the ensemble forecast member index (or just ``000`` for a single deterministic forecast) for which the task is being run. The tasks for ``REFC`` are included in the workflow only if ``'REFC'`` is From 0724a01b4347b3b83fa5c5f2990da5335e795496 Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Wed, 13 Nov 2024 12:17:52 -0700 Subject: [PATCH 191/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 9ff596d852..e0a9e98e47 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -1188,7 +1188,7 @@ interval (for cumulative fields such as accumulated precipitation), and the name * - :bolditalic:`metatask_PointStat_SFC_UPA_all_mems` (``verify_det.yaml``) - Set of tasks that run grid-to-point verification of surface fields (represented by the verification field - group ``SFC``) and upper-air fields (represented by the verification field group ``UPA``). In rocoto, + group ``SFC``) and upper-air fields (represented by the verification field group ``UPA``). In Rocoto, the tasks under this metatask are named ``run_MET_PointStat_vx_{field_group}_mem{mem_indx}``, where ``field_group`` and ``{mem_indx}`` are the field group (in this case either ``SFC`` or ``UPA``) and the ensemble forecast member index (or just ``000`` for a single deterministic forecast) for which the task From cd6d762366eab87edcb6c29bdee3d1a640d9116b Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Wed, 13 Nov 2024 12:18:15 -0700 Subject: [PATCH 192/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index e0a9e98e47..65926783d9 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -1198,10 +1198,10 @@ interval (for cumulative fields such as accumulated precipitation), and the name * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_APCP_all_accums` (``verify_ens.yaml``) - Set of tasks that run :term:`MET`'s ``GenEnsProd`` and ``EnsembleStat`` tools on APCP for the intervals - specified in ``VX_APCP_ACCUMS_HRS``. In rocoto, the tasks under this metatask that run ``GenEnsProd`` - are named ``run_MET_GenEnsProd_vx_APCP{accum_intvl}h``, and the ones that run `EnsembleStat`` are + specified in ``VX_APCP_ACCUMS_HRS``. In Rocoto, the tasks under this metatask that run ``GenEnsProd`` + are named ``run_MET_GenEnsProd_vx_APCP{accum_intvl}h``, and the ones that run ``EnsembleStat`` are named ``run_MET_EnsembleStat_vx_APCP{accum_intvl}h``, where ``{accum_intvl}`` is the accumulation - interval (in hours, e.g. ``01``, ``03``, ``06``, etc) for which the tasks are being run. This metatask + interval in hours (e.g., ``01``, ``03``, ``06``, etc.) for which the tasks are being run. This metatask is included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'APCP'`` is included in ``VX_FIELD_GROUPS``. From 790434cc5592c3db896bad94559b9aaaed6cf19e Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Wed, 13 Nov 2024 12:18:34 -0700 Subject: [PATCH 193/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 65926783d9..ca29e79115 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -1207,10 +1207,10 @@ interval (for cumulative fields such as accumulated precipitation), and the name * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_ASNOW_all_accums` (``verify_ens.yaml``) - Set of tasks that run :term:`MET`'s ``GenEnsProd`` and ``EnsembleStat`` tools on ASNOW for the intervals - specified in ``VX_ASNOW_ACCUMS_HRS``. In rocoto, the tasks under this metatask that run ``GenEnsProd`` - are named ``run_MET_GenEnsProd_vx_ASNOW{accum_intvl}h`` and the ones that run `EnsembleStat`` are + specified in ``VX_ASNOW_ACCUMS_HRS``. In Rocoto, the tasks under this metatask that run ``GenEnsProd`` + are named ``run_MET_GenEnsProd_vx_ASNOW{accum_intvl}h`` and the ones that run ``EnsembleStat`` are named ``run_MET_EnsembleStat_vx_ASNOW{accum_intvl}h``, where ``{accum_intvl}`` is the accumulation - interval (in hours, e.g. ``06``, ``24``, etc) for which the tasks are being run. This metatask will be + interval in hours (e.g., ``06``, ``24``, etc.) for which the tasks are being run. This metatask will be included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'ASNOW'`` is included in ``VX_FIELD_GROUPS``. From c1d02d57397db216d99f7e2289d7bc34d92beb9a Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Wed, 13 Nov 2024 12:18:59 -0700 Subject: [PATCH 194/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index ca29e79115..cb45ad572f 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -1227,8 +1227,8 @@ interval (for cumulative fields such as accumulated precipitation), and the name * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_SFC_UPA` (``verify_ens.yaml``) - Set of tasks that run :term:`MET`'s ``GenEnsProd`` and ``EnsembleStat`` tools on surface fields (represented by the verification field group ``SFC``) and upper-air fields (represented by the verification field group - ``UPA``). In rocoto, the tasks under this metatask that run ``GenEnsProd`` are named ``run_MET_GenEnsProd_vx_{field_group}``, - and the ones that run `EnsembleStat`` are named ``run_MET_EnsembleStat_vx_{field_group}``, where ``{field_group}`` + ``UPA``). In Rocoto, the tasks under this metatask that run ``GenEnsProd`` are named ``run_MET_GenEnsProd_vx_{field_group}``, + and the ones that run ``EnsembleStat`` are named ``run_MET_EnsembleStat_vx_{field_group}``, where ``{field_group}`` is the field group (in this case either ``SFC`` or ``UPA``) for which the tasks are being run. The tasks for ``SFC`` are included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'SFC'`` is included in ``VX_FIELD_GROUPS``, and the ones for ``UPA`` are included only if ``DO_ENSEMBLE`` is set to From c4592150af364a7dce0bae98e913242703a04971 Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Wed, 13 Nov 2024 12:19:23 -0700 Subject: [PATCH 195/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index cb45ad572f..097f1eab08 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -1216,8 +1216,8 @@ interval (for cumulative fields such as accumulated precipitation), and the name * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_REFC_RETOP` (``verify_ens.yaml``) - Set of tasks that run :term:`MET`'s ``GenEnsProd`` and ``EnsembleStat`` tools on REFC (:term:`composite - reflectivity`) and RETOP (:term:`echo top`). In rocoto, the tasks under this metatask that run - ``GenEnsProd`` are named ``run_MET_GenEnsProd_vx_{field_group}``, and the ones that run `EnsembleStat`` + reflectivity`) and RETOP (:term:`echo top`). In Rocoto, the tasks under this metatask that run + ``GenEnsProd`` are named ``run_MET_GenEnsProd_vx_{field_group}``, and the ones that run ``EnsembleStat`` are named ``run_MET_EnsembleStat_vx_{field_group}``, where ``{field_group}`` is the field group (in this case either ``REFC`` or ``RETOP``) for which the tasks are being run. The tasks for ``REFC`` are included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'REFC'`` From b0772f99161ec1f035d358d90e394e2846275fa0 Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Wed, 13 Nov 2024 12:19:40 -0700 Subject: [PATCH 196/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 097f1eab08..698ead9dd6 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -1236,10 +1236,10 @@ interval (for cumulative fields such as accumulated precipitation), and the name * - :bolditalic:`metatask_GridStat_APCP_all_accums_ensmeanprob` (``verify_ens.yaml``) - Set of tasks that run grid-to-grid verification of the ensemble mean of APCP and grid-to-grid probabilistic - verification of the ensemble of APCP forecasts as a whole. In rocoto, the tasks under this metatask for + verification of the ensemble of APCP forecasts as a whole. In Rocoto, the tasks under this metatask for ensemble mean verification are named ``run_MET_GridStat_vx_APCP{accum_intvl}h_ensmean``, and the ones for ensemble probabilistic verification are named ``run_MET_GridStat_vx_APCP{accum_intvl}h_ensprob``, where - ``{accum_intvl}`` is the accumulation interval (in hours, e.g. ``01``, ``03``, ``06``, etc) for which the + ``{accum_intvl}`` is the accumulation interval in hours (e.g., ``01``, ``03``, ``06``, etc.) for which the tasks are being run. This metatask is included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'APCP'`` is included in ``VX_FIELD_GROUPS``. From 0309c6859342c08bbd08f9e7536429446a6c183a Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Wed, 13 Nov 2024 12:19:58 -0700 Subject: [PATCH 197/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 698ead9dd6..6841e31346 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -1245,10 +1245,10 @@ interval (for cumulative fields such as accumulated precipitation), and the name * - :bolditalic:`metatask_GridStat_ASNOW_all_accums_ensmeanprob` (``verify_ens.yaml``) - Set of tasks that run grid-to-grid verification of the ensemble mean of ASNOW and grid-to-grid probabilistic - verification of the ensemble of ASNOW forecasts as a whole. In rocoto, the tasks under this metatask for + verification of the ensemble of ASNOW forecasts as a whole. In Rocoto, the tasks under this metatask for ensemble mean verification are named ``run_MET_GridStat_vx_ASNOW{accum_intvl}h_ensmean``, and the ones for ensemble probabilistic verification are named ``run_MET_GridStat_vx_ASNOW{accum_intvl}h_ensprob``, where - ``{accum_intvl}`` is the accumulation interval (in hours, e.g. ``01``, ``03``, ``06``, etc) for which the + ``{accum_intvl}`` is the accumulation interval in hours (e.g., ``01``, ``03``, ``06``, etc.) for which the tasks are being run. These tasks will be included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'ASNOW'`` is included in ``VX_FIELD_GROUPS``. From 6b88fff3dfa3284d193b21447becb91e76f8cbab Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Wed, 13 Nov 2024 12:20:18 -0700 Subject: [PATCH 198/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 6841e31346..c76e7bb9a1 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -1256,7 +1256,7 @@ interval (for cumulative fields such as accumulated precipitation), and the name - Set of tasks that run grid-to-grid probabilistic verification of the ensemble of :term:`composite reflectivity` (represented by the verification field group ``REFC``) and :term:`echo top` (represented by the field group ``RETOP``). (Note that there is no grid-to-grid verification of the ensemble mean of these quantities.) - In rocoto, the tasks under this metatask are named ``run_MET_GridStat_vx_{field_group}_ensprob``, where + In Rocoto, the tasks under this metatask are named ``run_MET_GridStat_vx_{field_group}_ensprob``, where ``{field_group}`` is the field group (in this case either ``REFC`` or ``RETOP``) for which the task is being run. The task for ``REFC`` is included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'REFC'`` is included in ``VX_FIELD_GROUPS``, and the one for ``RETOP`` is included From 9bff39bdfafe2de1f396b97b5e84af54f8619e0d Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Wed, 13 Nov 2024 12:20:34 -0700 Subject: [PATCH 199/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index c76e7bb9a1..ea5634dbef 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -1266,7 +1266,7 @@ interval (for cumulative fields such as accumulated precipitation), and the name - Set of tasks that run grid-to-grid verification of the ensemble mean of surface fields (represented by the verification field group ``SFC``) and upper-air fields (represented by the verification field group ``UPA``) as well as grid-to-grid probabilistic verification of the ensemble of the surface and upper-air field - forecasts as a whole. In rocoto, the tasks under this metatask for ensemble mean verification are named + forecasts as a whole. In Rocoto, the tasks under this metatask for ensemble mean verification are named ``run_MET_PointStat_vx_{field_group}_ensmean``, and the ones for ensemble probabilistic verification are named ``run_MET_PointStat_vx_{field_group}_ensprob``, where ``{field_group}`` is the field group (in this case either ``SFC`` or ``UPA``) on which the task is being run. The tasks for ``SFC`` are included in the From d2f31cbd7bce8f49a2f797bce97a9a0c2c7e15d2 Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Wed, 13 Nov 2024 12:46:39 -0700 Subject: [PATCH 200/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index ea5634dbef..70fd7b2e49 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -846,7 +846,7 @@ that attempt is successful, the workflow will move on to subsequent tasks. Thus * Users who do not have the obs files available on their systems and do not have access to NOAA HPSS need to download :term:`CCPA`, :term:`NOHRSC`, :term:`MRMS`, and/or :term:`NDAS` files manually - from collections of publicly available data such as the ones listed `here `__. + from collections of publicly available data. Then, as above, they must set ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]`` to match those staging locations and file names. From f715f44af767bd9a08f38e1f2f8c04702a013b18 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 13 Nov 2024 17:09:37 -0700 Subject: [PATCH 201/208] Add new terms to glossary. --- doc/TechDocs/ush/eval_metplus_timestr_tmpl.rst | 7 +++++++ doc/TechDocs/ush/get_obs.rst | 7 +++++++ doc/TechDocs/ush/set_cycle_and_obs_timeinfo.rst | 7 +++++++ doc/TechDocs/ush/set_leadhrs.rst | 7 +++++++ doc/UsersGuide/Reference/Glossary.rst | 9 +++++++++ 5 files changed, 37 insertions(+) create mode 100644 doc/TechDocs/ush/eval_metplus_timestr_tmpl.rst create mode 100644 doc/TechDocs/ush/get_obs.rst create mode 100644 doc/TechDocs/ush/set_cycle_and_obs_timeinfo.rst create mode 100644 doc/TechDocs/ush/set_leadhrs.rst diff --git a/doc/TechDocs/ush/eval_metplus_timestr_tmpl.rst b/doc/TechDocs/ush/eval_metplus_timestr_tmpl.rst new file mode 100644 index 0000000000..7ccc52bb64 --- /dev/null +++ b/doc/TechDocs/ush/eval_metplus_timestr_tmpl.rst @@ -0,0 +1,7 @@ +eval\_metplus\_timestr\_tmpl module +=================================== + +.. automodule:: eval_metplus_timestr_tmpl + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/TechDocs/ush/get_obs.rst b/doc/TechDocs/ush/get_obs.rst new file mode 100644 index 0000000000..6b4e2ac936 --- /dev/null +++ b/doc/TechDocs/ush/get_obs.rst @@ -0,0 +1,7 @@ +get\_obs module +=============== + +.. automodule:: get_obs + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/TechDocs/ush/set_cycle_and_obs_timeinfo.rst b/doc/TechDocs/ush/set_cycle_and_obs_timeinfo.rst new file mode 100644 index 0000000000..13ec7b9b73 --- /dev/null +++ b/doc/TechDocs/ush/set_cycle_and_obs_timeinfo.rst @@ -0,0 +1,7 @@ +set\_cycle\_and\_obs\_timeinfo module +===================================== + +.. automodule:: set_cycle_and_obs_timeinfo + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/TechDocs/ush/set_leadhrs.rst b/doc/TechDocs/ush/set_leadhrs.rst new file mode 100644 index 0000000000..b0172264d3 --- /dev/null +++ b/doc/TechDocs/ush/set_leadhrs.rst @@ -0,0 +1,7 @@ +set\_leadhrs module +=================== + +.. automodule:: set_leadhrs + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/UsersGuide/Reference/Glossary.rst b/doc/UsersGuide/Reference/Glossary.rst index a22cd70b4e..5b60c5b40d 100644 --- a/doc/UsersGuide/Reference/Glossary.rst +++ b/doc/UsersGuide/Reference/Glossary.rst @@ -48,6 +48,9 @@ Glossary Component Repository A :term:`repository` that contains, at a minimum, source code for a single component. + composite reflectivity + `Composite reflectivity `__ is a display or mapping of the maximum radar reflectivity factor at any altitude as a function of position on the ground. + Container `Docker `__ describes a container as "a standard unit of software that packages up code and all its dependencies so the application runs quickly and reliably from one computing environment to another." @@ -70,6 +73,9 @@ Glossary data assimilation Data assimilation is the process of combining observations, model data, and error statistics to achieve the best estimate of the state of a system. One of the major sources of error in weather and climate forecasts is uncertainty related to the initial conditions that are used to generate future predictions. Even the most precise instruments have a small range of unavoidable measurement error, which means that tiny measurement errors (e.g., related to atmospheric conditions and instrument location) can compound over time. These small differences result in very similar forecasts in the short term (i.e., minutes, hours), but they cause widely divergent forecasts in the long term. Errors in weather and climate forecasts can also arise because models are imperfect representations of reality. Data assimilation systems seek to mitigate these problems by combining the most timely observational data with a "first guess" of the atmospheric state (usually a previous forecast) and other sources of data to provide a "best guess" analysis of the atmospheric state to start a weather or climate simulation. When combined with an "ensemble" of model runs (many forecasts with slightly different conditions), data assimilation helps predict a range of possible atmospheric states, giving an overall measure of uncertainty in a given forecast. + DTC + The `Developmental Testbed Center `__ is a distributed facility where the NWP community can test and evaluate new models and techniques for use in research and operations. + dycore dynamical core Global atmospheric model based on fluid dynamics principles, including Euler's equations of motion. @@ -152,6 +158,9 @@ Glossary MERRA2 The `Modern-Era Retrospective analysis for Research and Applications, Version 2 `__ provides satellite observation data back to 1980. According to NASA, "It was introduced to replace the original MERRA dataset because of the advances made in the assimilation system that enable assimilation of modern hyperspectral radiance and microwave observations, along with GPS-Radio Occultation datasets. It also uses NASA's ozone profile observations that began in late 2004. Additional advances in both the GEOS model and the GSI assimilation system are included in MERRA-2. Spatial resolution remains about the same (about 50 km in the latitudinal direction) as in MERRA." + MET + The `Model Evaluation Tools `__ is a highly-configurable, state-of-the-art suite of verification tools developed at the :term:`DTC`. + MPI MPI stands for Message Passing Interface. An MPI is a standardized communication system used in parallel programming. It establishes portable and efficient syntax for the exchange of messages and data between multiple processors that are used by a single computer program. An MPI is required for high-performance computing (HPC) systems. From 6b1cef9976deb57bde63c857f07b1c9659a1b857 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 14 Nov 2024 03:37:38 -0700 Subject: [PATCH 202/208] Remove rst file in TechDocs for a non-existent python script. --- doc/TechDocs/ush/set_cycle_dates.rst | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 doc/TechDocs/ush/set_cycle_dates.rst diff --git a/doc/TechDocs/ush/set_cycle_dates.rst b/doc/TechDocs/ush/set_cycle_dates.rst deleted file mode 100644 index 1af14392fd..0000000000 --- a/doc/TechDocs/ush/set_cycle_dates.rst +++ /dev/null @@ -1,7 +0,0 @@ -set\_cycle\_dates module -======================== - -.. automodule:: set_cycle_dates - :members: - :undoc-members: - :show-inheritance: From 8a3545b181c7cd58606c8528a0c1d2a902b2d596 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 14 Nov 2024 03:38:24 -0700 Subject: [PATCH 203/208] Update list of modules to be included in the tech docs (the rst file for each new module was added in a previous commit). --- doc/TechDocs/ush/modules.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/TechDocs/ush/modules.rst b/doc/TechDocs/ush/modules.rst index 2070e75ad0..6ac0346624 100644 --- a/doc/TechDocs/ush/modules.rst +++ b/doc/TechDocs/ush/modules.rst @@ -12,18 +12,21 @@ ush create_diag_table_file create_model_configure_file create_ufs_configure_file + eval_metplus_timestr_tmpl generate_FV3LAM_wflow get_crontab_contents + get_obs link_fix mrms_pull_topofhour python_utils retrieve_data run_srw_tests - set_cycle_dates + set_cycle_and_obs_timeinfo set_fv3nml_ens_stoch_seeds set_fv3nml_sfc_climo_filenames set_gridparams_ESGgrid set_gridparams_GFDLgrid + set_leadhrs set_predef_grid_params setup update_input_nml From c0927984ba45fa978b8c7ea78900e1b9808463c5 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 14 Nov 2024 03:39:36 -0700 Subject: [PATCH 204/208] Bug fix: Change :ref: to :term: to reference the definition of DTC in the glossary. --- doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst index 48b84005a5..50d343f57e 100644 --- a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -1602,7 +1602,7 @@ Non-default parameters for verification tasks are set in the ``verification:`` s .. note:: The verification tasks in the SRW App are based on the :ref:`METplus ` - verification software developed at the Developmental Testbed Center (:ref:`DTC`). + verification software developed at the Developmental Testbed Center (:term:`DTC`). :ref:`METplus ` is a scientific verification framework that spans a wide range of temporal and spatial scales. Full documentation for METplus is available on the `METplus website `__. From 3d0bd5cfd9b12a1ae4655d9117faae502da0a514 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 14 Nov 2024 03:44:18 -0700 Subject: [PATCH 205/208] Fix indentation for argument and return value lists in the docstrings of new python functions; add type for each argument and return value; use latex-style math formatting for equations; other minor formatting adjustments. --- ush/get_obs.py | 121 ++++++++++++--------- ush/set_cycle_and_obs_timeinfo.py | 168 +++++++++++++++--------------- 2 files changed, 154 insertions(+), 135 deletions(-) diff --git a/ush/get_obs.py b/ush/get_obs.py index 361426c16c..da88856575 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -41,23 +41,23 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): archive hour to 24. Args: - obtype: - The observation type. A string. + obtype (str): + The observation type. - arcv_intvl_hrs: - Time interval (in hours) between archive files. An integer. For example, - if the obs files are bundled into 6-hourly archives, then this will be - set to 6. This must be between 1 and 24 and must divide evenly into 24. + arcv_intvl_hrs (int): + Time interval (in hours) between archive files. For example, if the obs + files are bundled into 6-hourly archives, then this will be set to 6. This + must be between 1 and 24 and must divide evenly into 24. - hod: - The hour of the day. An integer. This must be between 0 and 23. For - cumulative fields (CCPA and NOHRSC), hour 0 is treated as that of the - next day, i.e. as the 24th hour of the current day. + hod (int): + The hour of the day. This must be between 0 and 23. For cumulative fields + (CCPA and NOHRSC), hour 0 is treated as that of the next day, i.e. as the + 24th hour of the current day. Returns: - arcv_hr: - The hour since the start of day corresponding to the archive file containing - the obs file for the given hour of day. An integer. + arcv_hr (int): + The hour since the start of day corresponding to the archive file containing + the obs file for the given hour of day. """ valid_obtypes = ['CCPA', 'NOHRSC', 'MRMS', 'NDAS'] @@ -120,23 +120,24 @@ def get_obs(config, obtype, yyyymmdd_task): This script checks for the existence of obs files of the specified type at the locations specified by variables in the SRW App's configuration file. If one or more of these files do not exist, it retrieves them from - a data store (using the retrieve_data.py script and as specified by the - configuration file parm/data_locations.yml for that script) and places + a data store (using the ``retrieve_data.py`` script and as specified by the + configuration file ``parm/data_locations.yml`` for that script) and places them in the locations specified by the App's configuration variables, renaming them if necessary. Args: - config: - The final configuration dictionary (obtained from var_defns.yaml). + config (dict): + The final configuration dictionary (obtained from ``var_defns.yaml``). - obtype: - The observation type. A string. + obtype (str): + The observation type. - yyyymmdd_task: - The date for which obs may be needed. A datetime object. + yyyymmdd_task (datetime.datetime): + The date for which obs may be needed. Returns: - True if all goes well. + True (bool): + If all goes well. Detailed Description: @@ -158,15 +159,19 @@ def get_obs(config, obtype, yyyymmdd_task): obtain all available obs for the current day. - CCPA (Climatology-Calibrated Precipitation Analysis) precipitation - accumulation obs: - ---------- + CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs + ----------------------------------------------------------------------------------- For CCPA, the archive interval is 6 hours, i.e. the obs files are bundled into 6-hourly archives. The archives are organized such that each one contains 6 files, so that the obs availability interval is - obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] - = 1 hr/file + .. math:: + + \\begin{align*} + \\qquad \\text{obs_avail_intvl_hrs} + & = (\\text{24 hrs})/[(\\text{4 archives}) \\times (\\text{6 files/archive})] \\hspace{50in} \\\\ + & = \\text{1 hr/file} + \\end{align*} i.e. there is one obs file for each hour of the day containing the accumulation over that one hour. The archive corresponding to hour 0 @@ -192,15 +197,19 @@ def get_obs(config, obtype, yyyymmdd_task): errors if getting CCPA obs at these times. - NOHRSC (National Operational Hydrologic Remote Sensing Center) snow - accumulation observations: - ---------- + NOHRSC (National Operational Hydrologic Remote Sensing Center) snow accumulation observations + --------------------------------------------------------------------------------------------- For NOHRSC, the archive interval is 24 hours, i.e. the obs files are bundled into 24-hourly archives. The archives are organized such that each one contains 4 files, so that the obs availability interval is - obs_avail_intvl_hrs = (24 hrs)/[(1 archive)*(4 files/archive)] - = 6 hr/file + .. math:: + + \\begin{align*} + \\qquad \\text{obs_avail_intvl_hrs} + & = (\\text{24 hrs})/[(\\text{1 archive}) \\times (\\text{4 files/archive})] \\hspace{50in} \\\\ + & = \\text{6 hr/file} + \\end{align*} i.e. there is one obs file for each 6-hour interval of the day containing the accumulation over those 6 hours. The 4 obs files within each archive @@ -218,8 +227,8 @@ def get_obs(config, obtype, yyyymmdd_task): In other cases, the sequence we loop over will be a subset of [0, 24]. - MRMS (Multi-Radar Multi-Sensor) radar observations: - ---------- + MRMS (Multi-Radar Multi-Sensor) radar observations + -------------------------------------------------- For MRMS, the archive interval is 24 hours, i.e. the obs files are bundled into 24-hourly archives. The archives are organized such that each contains gzipped grib2 files for that day that are usually only a @@ -229,16 +238,21 @@ def get_obs(config, obtype, yyyymmdd_task): This effectively sets the obs availability interval for MRMS to one hour, i.e. - obs_avail_intvl_hrs = 1 hr/file + .. math:: + + \\begin{align*} + \\qquad \\text{obs_avail_intvl_hrs} + & = \\text{1 hr/file} \\hspace{50in} \\\\ + \\end{align*} i.e. there is one obs file for each hour of the day containing values at that hour (but only after filtering in time; also see notes for - MRMS_OBS_AVAIL_INTVL_HRS in config_defaults.yaml). Thus, to obtain the - obs at all hours of the day, we only need to extract files from one - archive. Thus, in the simplest case in which the observation retrieval - times include all hours of the current task's day at which obs files - are available and none of the obs files for this day already exist on - disk, the sequence of archive hours over which we loop will be just + ``MRMS_OBS_AVAIL_INTVL_HRS`` in ``config_defaults.yaml``). Thus, to + obtain the obs at all hours of the day, we only need to extract files + from one archive. Thus, in the simplest case in which the observation + retrieval times include all hours of the current task's day at which obs + files are available and none of the obs files for this day already exist + on disk, the sequence of archive hours over which we loop will be just [0]. Note that: * For cases in which MRMS data are not needed for all hours of the day, @@ -252,19 +266,19 @@ def get_obs(config, obtype, yyyymmdd_task): and extract two different archive files (one per field). - NDAS (NAM Data Assimilation System) conventional observations: - ---------- + NDAS (NAM Data Assimilation System) conventional observations + ------------------------------------------------------------- For NDAS, the archive interval is 6 hours, i.e. the obs files are bundled into 6-hourly archives. The archives are organized such that each one contains 7 files (not say 6). The archive associated with time yyyymmddhh_arcv contains the hourly files at - yyyymmddhh_arcv - 6 hours - yyyymmddhh_arcv - 5 hours - ... - yyyymmddhh_arcv - 2 hours - yyyymmddhh_arcv - 1 hours - yyyymmddhh_arcv - 0 hours + | yyyymmddhh_arcv - 6 hours + | yyyymmddhh_arcv - 5 hours + | ... + | yyyymmddhh_arcv - 2 hours + | yyyymmddhh_arcv - 1 hours + | yyyymmddhh_arcv - 0 hours These are known as the tm06, tm05, ..., tm02, tm01, and tm00 files, respectively. Thus, the tm06 file from the current archive, say the @@ -277,8 +291,13 @@ def get_obs(config, obtype, yyyymmdd_task): the one at tm00, effectively resulting in 6 files per archive for NDAS obs. The obs availability interval is then - obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] - = 1 hr/file + .. math:: + + \\begin{align*} + \\qquad \\text{obs_avail_intvl_hrs} + & = (\\text{24 hrs})/[(\\text{4 archives}) \\times (\\text{6 files/archive})] \\hspace{50in} \\\\ + & = \\text{1 hr/file} + \\end{align*} i.e. there is one obs file for each hour of the day containing values at that hour. The archive corresponding to hour 0 of the current day diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 31f615d573..7ae764ad10 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -13,29 +13,29 @@ def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, This file defines a function that returns a list containing the starting times of all the cycles in the experiment. - If return_type is set to "string" (the default value), the returned list - contains strings in the format 'YYYYMMDDHH'. If it is set to "datetime", + If return_type is set to 'string' (the default value), the returned list + contains strings in the format 'YYYYMMDDHH'. If it is set to 'datetime', the returned list contains a set of datetime objects. Args: - start_time_first_cycl: - Starting time of first cycle; a datetime object. + start_time_first_cycl (datetime.datetime): + Starting time of first cycle. - start_time_last_cycl: - Starting time of last cycle; a datetime object. + start_time_last_cycl (datetime.datetime): + Starting time of last cycle. - cycl_intvl: - Time interval between cycle starting times; a timedelta object. + cycl_intvl (datetime.timedelta): + Time interval between cycle start times. - return_type: - String that specifies the type of the returned list. + return_type (str): + Type of the returned list. Can be 'string' or 'datetime'. Returns: - all_cdates: - Either a list of strings in the format 'YYYYMMDDHH' or a list of datetime - objects containing the cycle starting times, where 'YYYY' is the four- - digit year, 'MM is the two-digit month, 'DD'' is the two-digit day-of- - month, and 'HH' is the two-digit hour-of-day. + all_cdates (list): + Either a list of strings in the format 'YYYYMMDDHH' or a list of datetime + objects containing the cycle starting times, where 'YYYY' is the four- + digit year, 'MM is the two-digit month, 'DD' is the two-digit day-of- + month, and 'HH' is the two-digit hour-of-day. """ print_input_args(locals()) @@ -58,7 +58,7 @@ def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, all_cdates.append(cdate) cdate += cycl_intvl - if return_type == "string": + if return_type == 'string': all_cdates = [datetime.strftime(cdate, "%Y%m%d%H") for cdate in all_cdates] return all_cdates @@ -107,27 +107,27 @@ def check_temporal_consistency_cumul_fields( to verify for that field. Args: - vx_config: - The verification configuration dictionary. + vx_config (dict): + The verification configuration dictionary. - cycle_start_times: - List containing the starting times of the cycles in the experiment; - each list element is a datetime object. + cycle_start_times (list): + List containing the starting times of the cycles in the experiment; each + list element is a datetime object. - fcst_len: - The length of each forecast; a timedelta object. + fcst_len (datetime.timedelta): + The length of each forecast; a timedelta object. - fcst_output_intvl: - Time interval between forecast output times; a timedelta object. + fcst_output_intvl (datetime.timedelta): + Time interval between forecast output times; a timedelta object. Returns: - vx_config: - An updated version of the verification configuration dictionary. + vx_config (dict): + An updated version of the verification configuration dictionary. - fcst_obs_matched_times_all_cycles_cumul: - Dictionary containing the times (in YYYYMMDDHH string format) at - which various field/accumlation combinations are output and at - which the corresponding obs type is also available. + fcst_obs_matched_times_all_cycles_cumul (dict): + Dictionary containing the times (in YYYYMMDDHH string format) at + which various field/accumlation combinations are output and at + which the corresponding obs type is also available. """ # Set dictionary containing all field groups that consist of cumulative @@ -357,27 +357,27 @@ def set_fcst_output_times_and_obs_days_all_cycles( accumulation interval smaller than this are obviously not allowed). Args: - cycle_start_times: - List containing the starting times of the cycles in the experiment; - each list element is a datetime object. + cycle_start_times (list): + List containing the starting times of the cycles in the experiment; each + list element is a datetime object. - fcst_len: - The length of each forecast; a timedelta object. + fcst_len (datetime.timedelta): + The length of each forecast. - fcst_output_intvl: - Time interval between forecast output times; a timedelta object. + fcst_output_intvl (datetime.timedelta): + Time interval between forecast output times. Returns: - fcst_output_times_all_cycles: - Dictionary containing a list of forecast output times over all cycles for - instantaneous fields and a second analogous list for cumulative fields. - Each element of these lists is a string of the form 'YYYYMMDDHH'. - - obs_days_all_cycles: - Dictionary containing a list of observation days (i.e. days on which - observations are needed to perform verification) over all cycles for - instantaneous fields and a second analogous list for cumulative fields. - Each element of these lists is a string of the form 'YYYYMMDD'. + fcst_output_times_all_cycles (dict): + Dictionary containing a list of forecast output times over all cycles for + instantaneous fields and a second analogous list for cumulative fields. + Each element of these lists is a string of the form 'YYYYMMDDHH'. + + obs_days_all_cycles (dict): + Dictionary containing a list of observation days (i.e. days on which + observations are needed to perform verification) over all cycles for + instantaneous fields and a second analogous list for cumulative fields. + Each element of these lists is a string of the form 'YYYYMMDD'. """ # Get the number of forecast output times per cycle/forecast. @@ -498,25 +498,25 @@ def set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles): in-between days on which obs are not needed. Args: - obs_days_all_cycles: - A list of strings of the form 'YYYYMMDD', with each string representing - a day on which observations are needed. Note that the list must be - sorted, i.e. the days must be increasing in time, but there may be - gaps between days. + obs_days_all_cycles (list): + A list of strings of the form 'YYYYMMDD', with each string representing + a day on which observations are needed. Note that the list must be sorted, + i.e. the days must be increasing in time, but there may be gaps between + days. Returns: - cycledefs_all_obs_days: - A list of strings, with each string being a ROCOTO-style cycledef of - the form - - '{yyyymmdd_start}0000 {yyyymmdd_end}0000 24:00:00' - - where {yyyymmdd_start} is the starting day of the first cycle in the - cycledef and {yyyymmdd_end} is the starting day of the last cycle (note - that the minutes and hours in these cycledef stirngs are always set to - '00'). For example, an element of the output list may be: - - '202404290000 202405010000 24:00:00' + cycledefs_all_obs_days (list): + A list of strings, with each string being a ROCOTO-style cycledef of the + form + + '{yyyymmdd_start}0000 {yyyymmdd_end}0000 24:00:00' + + where {yyyymmdd_start} is the starting day of the first cycle in the + cycledef and {yyyymmdd_end} is the starting day of the last cycle (note + that the minutes and hours in these cycledef stirngs are always set to + '00'). For example, an element of the output list may be: + + '202404290000 202405010000 24:00:00' """ # To enable arithmetic with dates, convert input sting list of observation @@ -585,31 +585,31 @@ def get_obs_retrieve_times_by_day( The elements of each list are formatted as 'YYYYMMDDHH'. Args: - vx_config: - The verification configuration dictionary. + vx_config (dict): + The verification configuration dictionary. - cycle_start_times: - List containing the starting times of the cycles in the experiment; - each list element is a datetime object. + cycle_start_times (list): + List containing the starting times of the cycles in the experiment; each + list element is a datetime object. - fcst_len: - The length of each forecast; a timedelta object. + fcst_len (datetime.timedelta): + The length of each forecast. - fcst_output_times_all_cycles: - Dictionary containing a list of forecast output times over all cycles for - instantaneous fields and a second analogous list for cumulative fields. - Each element of these lists is a string of the form 'YYYYMMDDHH'. + fcst_output_times_all_cycles (dict): + Dictionary containing a list of forecast output times over all cycles for + instantaneous fields and a second analogous list for cumulative fields. + Each element of these lists is a string of the form 'YYYYMMDDHH'. - obs_days_all_cycles: - Dictionary containing a list of observation days (i.e. days on which - observations are needed to perform verification) over all cycles for - instantaneous fields and a second analogous list for cumulative fields. - Each element of these lists is a string of the form 'YYYYMMDD'. + obs_days_all_cycles (dict): + Dictionary containing a list of observation days (i.e. days on which + observations are needed to perform verification) over all cycles for + instantaneous fields and a second analogous list for cumulative fields. + Each element of these lists is a string of the form 'YYYYMMDD'. Returns: - obs_retrieve_times_by_day: - Dictionary of dictionaries containing times at which each type of obs is - needed on each obs day. + obs_retrieve_times_by_day (dict): + Dictionary of dictionaries containing times at which each type of obs is + needed on each obs day. """ # Convert string contents of input dictionaries to datetime objects. From 7b8bc8d24299c47a47e0d0c90db6681ad4975db9 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 14 Nov 2024 04:23:48 -0700 Subject: [PATCH 206/208] Define METPLUS_ROOT in the Makefile for the docs. --- doc/Makefile | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/Makefile b/doc/Makefile index 9663ba3996..33cb872124 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -1,10 +1,12 @@ # Makefile for Sphinx documentation -SPHINXOPTS = -a -n -W +#SPHINXOPTS = -a -n -W +SPHINXOPTS = -a -n SPHINXBUILD = sphinx-build SOURCEDIR = . BUILDDIR = build LINKCHECKDIR = $(BUILDDIR)/linkcheck +METPLUSROOT = /scratch1/NCEPDEV/nems/role.epic/spack-stack/spack-stack-1.6.0/envs/upp-addon-env/install/intel/2021.5.0/metplus-5.1.0-eznr7ze .PHONY: help Makefile linkcheck @@ -14,8 +16,8 @@ help: doc: make clean - $(MAKE) linkcheck - $(MAKE) html + export METPLUS_ROOT=$(METPLUSROOT) ; $(MAKE) linkcheck + export METPLUS_ROOT=$(METPLUSROOT) ; $(MAKE) html linkcheck: $(SPHINXBUILD) -b linkcheck $(SPHINXOPTS) $(SOURCEDIR) $(LINKCHECKDIR) @@ -24,4 +26,4 @@ linkcheck: # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) -w $(BUILDDIR)/warnings.log \ No newline at end of file + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) -w $(BUILDDIR)/warnings.log From 3ca5387dd7632cfb0dce1e6bff10dbe1be574ec2 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 15 Nov 2024 09:20:06 -0700 Subject: [PATCH 207/208] Include "metplus" in autodoc_mock_imports list in the configuration file for sphinx (conf.py) (this will give sphinx access to METplus); remove definition of METPLUS_ROOT as an environment variable from the Makefile for the docs and instead define it in conf.py (as just a null string since it isn't actually used to load METplus). --- doc/Makefile | 8 +++----- doc/conf.py | 10 +++++++++- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/doc/Makefile b/doc/Makefile index 33cb872124..a4fac61e1a 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -1,12 +1,10 @@ # Makefile for Sphinx documentation -#SPHINXOPTS = -a -n -W -SPHINXOPTS = -a -n +SPHINXOPTS = -a -n -W SPHINXBUILD = sphinx-build SOURCEDIR = . BUILDDIR = build LINKCHECKDIR = $(BUILDDIR)/linkcheck -METPLUSROOT = /scratch1/NCEPDEV/nems/role.epic/spack-stack/spack-stack-1.6.0/envs/upp-addon-env/install/intel/2021.5.0/metplus-5.1.0-eznr7ze .PHONY: help Makefile linkcheck @@ -16,8 +14,8 @@ help: doc: make clean - export METPLUS_ROOT=$(METPLUSROOT) ; $(MAKE) linkcheck - export METPLUS_ROOT=$(METPLUSROOT) ; $(MAKE) html + $(MAKE) linkcheck + $(MAKE) html linkcheck: $(SPHINXBUILD) -b linkcheck $(SPHINXOPTS) $(SOURCEDIR) $(LINKCHECKDIR) diff --git a/doc/conf.py b/doc/conf.py index 2b5bf7b4d4..e8f15567d8 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -253,7 +253,7 @@ def setup(app): # -- Options for autodoc extension --------------------------------------- autodoc_mock_imports = ["f90nml","cartopy","mpl_toolkits.basemap","fill_jinja_template", - "matplotlib","numpy","uwtools","mpl_toolkits", + "matplotlib","numpy","uwtools","mpl_toolkits","metplus", ] logger = logging.getLogger(__name__) @@ -310,3 +310,11 @@ def warn_undocumented_members(app, what, name, obj, options, lines): 'fire-ug': ('https://fire-behavior.readthedocs.io/en/latest/%s', '%s'), } +# Define environment variables that need to exist when running the top-level code in python +# files (outside of functions, classes, etc.). +# +# METPLUS_ROOT just needs to exist in the environment; its value is not important since for +# the purpose of building the documentation, METplus is loaded by including "metplus" in +# the autodoc_mock_imports list above, not via use of the METPLUS_ROOT environment variable. +os.environ["METPLUS_ROOT"] = "" + From 72de59d7c8bd681c6f43a1d780d877439af033e4 Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Mon, 18 Nov 2024 08:11:14 -0700 Subject: [PATCH 208/208] Update doc/UsersGuide/BuildingRunningTesting/RunSRW.rst Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- .../BuildingRunningTesting/RunSRW.rst | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 70fd7b2e49..b66b399652 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -817,24 +817,26 @@ summer period for which ``ASNOW`` is not relevant. Staging Observation Files `````````````````````````````````` -The taskgroup in ``verify_pre.yaml`` defines a set of workflow tasks named ``get_obs_*`` --- where the ``*`` -represents any one of the supported obs types :term:`CCPA`, :term:`NOHRSC`, :term:`MRMS`, and :term:`NDAS` --- -that will first check for the existence of the obs files required for VX at the locations on disk specified +The taskgroup in ``verify_pre.yaml`` defines a set of workflow tasks named ``get_obs_*``, where the ``*`` +represents any one of the supported obs types: :term:`CCPA`, :term:`NOHRSC`, :term:`MRMS`, and :term:`NDAS`. These ``get_obs_*`` tasks +will first check on disk for the existence of the obs files required for VX using the locations specified by the variables ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]`` in the ``verification:`` section of -``config.yaml``. The ``*_OBS_DIR`` are the base directories in which the obs files are or should be +``config.yaml``. The ``*_OBS_DIR`` are the base directories in which the obs files are or should be staged, and the ``OBS_*_FN_TEMPLATES[1,3,...]`` are the file name templates (with METplus time strings -used for templating; see example below). The ``[1,3,...]`` in ``OBS_*_FN_TEMPLATES[1,3,...]`` refer -to the second, fourth, etc elements of ``OBS_*_FN_TEMPLATES`` and correspond to the various sets of files -that the obs type contains. (The first, third, etc elements, i.e. ``OBS_*_FN_TEMPLATES[0,2,...]``, -indicate the VX field groups for which the respective sets of obs files are used to verify and should -not be changed.) Note that ``OBS_*_FN_TEMPLATES[1,3,...]`` may include leading subdirectories and are -relative to the obs type's ``*_OBS_DIR``. Thus, the templates for the full paths to the obs files are -given by +used for templating; see example below). Thus, the templates for the full paths to the obs files are +given by: .. code-block:: console - + {*_OBS_DIR}/{OBS_*_FN_TEMPLATES[1,3,...]} +The contents of the ``OBS_*_FN_TEMPLATES`` list come in pairs, where the first element +of each pair (with even-numbered indices ``[0,2,...]``) refers to the field group, +while the second element (with odd-numbered indices ``[1,3,...]``) refers to the +corresponding sets of files that the obs type contains. Note that ``OBS_*_FN_TEMPLATES +[1,3,...]`` may include leading subdirectories and are +relative to the obs type's ``*_OBS_DIR``. + If the obs files exist at the locations specified by ``{*_OBS_DIR}/{OBS_*_FN_TEMPLATES[1,3,...]}``, then the ``get_obs_*`` tasks will succeed, and the workflow will move on to subsequent tasks. If one or more obs files do not exist, the ``get_obs_*`` tasks will attempt to retrieve the required files from a data store such as NOAA HPSS and