diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 60ed1bdc..99f5c52d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -40,7 +40,6 @@ jobs: run: | python -m pip install --upgrade pip pip install wheel build 'twine<5.0.0' 'importlib_metadata<=7.0.1' 'setuptools<=72.2.0' 'numpy<2.0' - - name: Base installation run: | pip --verbose install . @@ -124,8 +123,7 @@ jobs: - name: Set environment variables run: | echo "PYTHON_VERSION=${{ matrix.python-version }}" >> $GITHUB_ENV - echo "PYWS_FORTRAN=true" >> $GITHUB_ENV - echo 'SETUPTOOLS_ENABLE_FEATURES="legacy-editable"' >> $GITHUB_ENV + echo "PYWS_FORTRAN=false" >> $GITHUB_ENV cat .mf6_ci_ref_remote >> $GITHUB_ENV - name: Enforce MF6 ref and remote merge to main @@ -150,8 +148,9 @@ jobs: run: .github/scripts/symlink_gfortran_mac.sh - name: Install Dependencies via Micromamba - uses: mamba-org/setup-micromamba@v1 + uses: mamba-org/setup-micromamba@v1.9.0 with: + micromamba-version: '1.5.10-0' log-level: debug environment-file: environment.yml cache-environment: true @@ -191,7 +190,14 @@ jobs: - name: domainless - run tests not requiring domain data working-directory: autotest - run: pytest -m domainless -n=auto -vv + run: pytest + -m domainless + -n=auto + -vv + --durations=0 + --cov=pywatershed + --cov-report=xml + --junitxml=pytest_domainless.xml - name: sagehen_5yr_no_cascades - generate and manage test data domain, run PRMS and convert csv output to NetCDF working-directory: autotest diff --git a/.github/workflows/ci_examples.yaml b/.github/workflows/ci_examples.yaml index 2770a985..fa098b99 100644 --- a/.github/workflows/ci_examples.yaml +++ b/.github/workflows/ci_examples.yaml @@ -43,12 +43,30 @@ jobs: echo "SETUPTOOLS_ENABLE_FEATURES=legacy-editable" >> $GITHUB_ENV - name: Setup micromamba - uses: mamba-org/setup-micromamba@v1 + uses: mamba-org/setup-micromamba@v1.9.0 with: + micromamba-version: '1.5.10-0' environment-file: environment.yml cache-environment: true cache-downloads: true + - name: Checkout MODFLOW 6 + uses: actions/checkout@v4 + with: + repository: MODFLOW-USGS/modflow6 + ref: develop + path: modflow6 + + - name: Update flopy MODFLOW 6 classes + working-directory: modflow6/autotest + run: | + python update_flopy.py + + - name: Install mf6 nightly build binaries + uses: modflowpy/install-modflow-action@v1 + with: + repo: modflow6-nightly-build + - name: Install error reporter run: | pip install pytest-github-actions-annotate-failures diff --git a/README.md b/README.md index 1f0a8483..951a85eb 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,8 @@ [![Anaconda-Server Badge](https://anaconda.org/conda-forge/pywatershed/badges/version.svg)](https://anaconda.org/conda-forge/pywatershed) [![Anaconda-Server Badge](https://anaconda.org/conda-forge/pywatershed/badges/platforms.svg)](https://anaconda.org/conda-forge/pywatershed) +[![DOI:10.5066/P9AVWA7Z](https://img.shields.io/badge/DOI-10.5066/P9AVWA7Z-b4a9fe.svg)](https://doi.org/10.5066/P9AVWA7Z) + [![WholeTale](https://raw.githubusercontent.com/whole-tale/wt-design-docs/master/badges/wholetale-explore.svg)](https://dashboard.wholetale.org/run/64ae29e8a887f48b9f173678?tab=metadata) @@ -24,6 +26,7 @@ - [Installation](#installation) - [Getting started / Example notebooks](#getting-started--example-notebooks) - [Community engagement](#community-engagement) +- [How to Cite](#how-to-cite) - [Disclaimer](#disclaimer) @@ -149,6 +152,8 @@ guidelines. Thank you for your interest. +## How to Cite +McCreight, J., Langevin, C. D., & Hughes, J. D. (2023). pywatershed (Version 1.0.0) [Computer software]. [https://doi.org/10.5066/P9AVWA7Z](https://doi.org/10.5066/P9AVWA7Z) ## Disclaimer diff --git a/autotest/test_mmr_to_mf6_dfw.py b/autotest/test_mmr_to_mf6_dfw.py index b61711f6..3fa3e415 100644 --- a/autotest/test_mmr_to_mf6_dfw.py +++ b/autotest/test_mmr_to_mf6_dfw.py @@ -15,14 +15,14 @@ # Getting the gis files causes problems in parallel in CI. See regression test. # See below to check if these answers are still up-to-date with -# mf6/autotest/test_swf_dfw.py +# mf6/autotest/test_chf_dfw.py # Note the answers are from the binary FLW files in -# mf6/autotest/test_swf_dfw.py, if you switch to text files, the line should be +# mf6/autotest/test_chf_dfw.py, if you switch to text files, the line should be # flw_list = [ # (int(binary), 100), # ] # one-based cell numbers here if binary, zero-based if text -answers_swf_dfw = { +answers_chf_dfw = { "ia": np.array([0, 2, 5, 7]), "ja": np.array([0, 1, 1, 0, 2, 2, 1], dtype=np.int32), "stage": np.array([[[[1.00196123, 1.00003366, 1.0]]]]), @@ -52,9 +52,9 @@ @pytest.mark.skipif(mf6_bin_unavailable, reason="mf6 binary not available") @pytest.mark.domainless @pytest.mark.parametrize("binary_flw", [True, False]) -def test_mmr_to_mf6_swf_dfw(tmp_path, binary_flw): +def test_mmr_to_mf6_chf_dfw(tmp_path, binary_flw): # The point of this test is to reproduce the - # modflow6/autotest/test_swf_dfw.py + # modflow6/autotest/test_chf_dfw.py # Here we supply "seg_mid_elevation" in the parameter data and # "stress_period_data" in chd options. This bypasses the calculation of @@ -68,8 +68,8 @@ def test_mmr_to_mf6_swf_dfw(tmp_path, binary_flw): # stream segments, the other is about the units of volume/flow being in # cubicfeet. - name = "swf-dfw01" - output_dir = tmp_path / "test_swf_dfw01" + name = "chf-dfw01" + output_dir = tmp_path / "test_chf_dfw01" save_flows = True print_flows = True @@ -123,16 +123,16 @@ def test_mmr_to_mf6_swf_dfw(tmp_path, binary_flw): # vertices could also be supplied by shapefiles vertices = [] vertices = [[j, j * dx, 0.0] for j in range(nreach + 1)] - cell2d = [] + cell1d = [] for j in range(nreach): - cell2d.append([j, 0.5, 2, j, j + 1]) - nodes = len(cell2d) + cell1d.append([j, 0.5, 2, j, j + 1]) + nodes = len(cell1d) nvert = len(vertices) disv1d_options = { "nodes": nodes, "nvert": nvert, "vertices": vertices, - "cell2d": cell2d, + "cell1d": cell1d, } # dfw @@ -191,9 +191,14 @@ def test_mmr_to_mf6_swf_dfw(tmp_path, binary_flw): else: flw_vol = 0.0 + # < + time_coord_data = np.array([control.start_time]).astype( + "datetime64[ns]" + ) + _ = xr.Dataset( coords=dict( - time=np.array([control.start_time]), + time=time_coord_data, nsegment=params.parameters["seg_id"], ), data_vars={ @@ -246,18 +251,22 @@ def test_mmr_to_mf6_swf_dfw(tmp_path, binary_flw): # Checks assert success - # one can verify the answers match the current mf6 results for test_swf_dfw - # by placing the path to its output here + # one can verify the answers match the current mf6 results for test_chf_dfw + # by placing the path to its output here. Run the following lines to + # generate the mf6 results: + # cd modflow6_for_pws_ci/autotest # or your mf6 repo location + # pytest -s -vv test_chf_dfw.py --keep=keepers # output_dir = pl.Path( - # "../../modflow6/autotest/keepers/test_mf6model[0-swf-dfw01]0" + # "../../modflow6_for_pws_ci/autotest/keepers/" + # "test_mf6model[0-chf-dfw01]0" # ) # check binary grid file grb = flopy.mf6.utils.MfGrdFile(output_dir / f"{name}.disv1d.grb") ia = grb.ia ja = grb.ja - assert (answers_swf_dfw["ia"] == ia).all() - assert (answers_swf_dfw["ja"] == ja).all() + assert (answers_chf_dfw["ia"] == ia).all() + assert (answers_chf_dfw["ja"] == ja).all() assert ia.shape[0] == grb.nodes + 1, "ia in grb file is not correct size" # check stage file @@ -265,7 +274,7 @@ def test_mmr_to_mf6_swf_dfw(tmp_path, binary_flw): output_dir / f"{name}.stage", precision="double", text="STAGE" ) stage = qobj.get_alldata() - assert ((answers_swf_dfw["stage"] - stage) < 1.0e-7).all() + assert ((answers_chf_dfw["stage"] - stage) < 1.0e-7).all() # read the budget file budobj = flopy.utils.binaryfile.CellBudgetFile(output_dir / f"{name}.bud") @@ -275,17 +284,17 @@ def test_mmr_to_mf6_swf_dfw(tmp_path, binary_flw): qchd = np.array(budobj.get_data(text="CHD")[0].tolist()[0]) qresidual = np.zeros(grb.nodes)[0] - assert (answers_swf_dfw["flowja"] - flowja < 1.0e-7).all() - assert (answers_swf_dfw["qstorage"] - qstorage < 1.0e-7).all() - assert (answers_swf_dfw["qflw"] - qflw < 1.0e-7).all() - assert (answers_swf_dfw["qchd"] - qchd < 1.0e-7).all() - assert (answers_swf_dfw["qresidual"] - qresidual < 1.0e-7).all() + assert (answers_chf_dfw["flowja"] - flowja < 1.0e-7).all() + assert (answers_chf_dfw["qstorage"] - qstorage < 1.0e-7).all() + assert (answers_chf_dfw["qflw"] - qflw < 1.0e-7).all() + assert (answers_chf_dfw["qchd"] - qchd < 1.0e-7).all() + assert (answers_chf_dfw["qresidual"] - qresidual < 1.0e-7).all() # < answers_regression_means = { - "stage_all": 1.03667372881148, - "flow_all": 44.685014111989425, + "stage_all": 1.0372047024253908, + "flow_all": 44.70210250910929, } @@ -406,6 +415,7 @@ def test_mmr_to_mf6_dfw_regression(simulation, tmp_path): inflow_dir=inflow_dir, ) + dfw.write() success, buff = dfw.run(silent=False, report=True) assert success @@ -468,4 +478,5 @@ def get_outflow(itime): for kk, vv in answers_regression_means.items(): abs_diff = abs(locals()[kk].mean() - vv) + assert abs_diff < 1e-5, f"results for {kk} are not close" diff --git a/autotest/test_obsin_flow_node.py b/autotest/test_obsin_flow_node.py index b91a47c8..f613eb47 100644 --- a/autotest/test_obsin_flow_node.py +++ b/autotest/test_obsin_flow_node.py @@ -1,6 +1,6 @@ import numpy as np import pytest -from pyPRMS import Streamflow as PRMSStreamflowData +from pyPRMS import DataFile as PRMSStreamflowData from pywatershed import PRMSChannel from pywatershed.base.adapter import Adapter, AdapterNetcdf, adapter_factory @@ -8,7 +8,7 @@ from pywatershed.base.flow_graph import FlowGraph from pywatershed.base.parameters import Parameters from pywatershed.constants import nan, zero -from pywatershed.hydrology.obsin_node import ObsInNodeMaker +from pywatershed.hydrology.obsin_flow_node import ObsInFlowNodeMaker from pywatershed.hydrology.prms_channel_flow_graph import ( HruSegmentFlowAdapter, PRMSChannelFlowNodeMaker, @@ -68,7 +68,13 @@ def test_prms_channel_obsin_compare_prms( ) control_parameters = PrmsParameters.load(control_param_file) obsout_seg = control_parameters.parameters["obsout_segment"] - 1 - sf_data = PRMSStreamflowData(simulation["dir"] / "sf_data").data + sf_data = PRMSStreamflowData( + simulation["dir"] / "sf_data" + ).data_by_variable("runoff") + old_names = sf_data.columns.tolist() + new_names = [cc.split("_")[1] for cc in sf_data.columns.tolist()] + sf_data.rename(columns=dict(zip(old_names, new_names)), inplace=True) + poi_inds = obsout_seg[np.where(obsout_seg >= 0)].tolist() npoi = len(poi_inds) poi_ids = discretization_prms.parameters["poi_gage_id"][(poi_inds),] @@ -119,13 +125,14 @@ def advance(self) -> None: "prms_channel": PRMSChannelFlowNodeMaker( discretization_prms, parameters_prms ), - "obsin": ObsInNodeMaker(obsin_params, obsin_data), + "obsin": ObsInFlowNodeMaker(obsin_params, obsin_data), } nnodes = parameters_prms.dims["nsegment"] + npoi node_maker_name = ["prms_channel"] * nnodes node_maker_name[-npoi:] = ["obsin"] * npoi node_maker_index = np.arange(nnodes) node_maker_index[-npoi:] = np.arange(npoi) + node_maker_id = np.arange(nnodes) to_graph_index = np.zeros(nnodes, dtype=np.int64) dis_params = discretization_prms.parameters to_graph_index[0:-npoi] = dis_params["tosegment"] - 1 @@ -171,12 +178,14 @@ def advance(self) -> None: data_vars={ "node_maker_name": node_maker_name, "node_maker_index": node_maker_index, + "node_maker_id": node_maker_id, "to_graph_index": to_graph_index, }, metadata={ "nnodes": {"dims": ["nnodes"]}, "node_maker_name": {"dims": ["nnodes"]}, "node_maker_index": {"dims": ["nnodes"]}, + "node_maker_id": {"dims": ["nnodes"]}, "to_graph_index": {"dims": ["nnodes"]}, }, validate=True, diff --git a/autotest/test_pass_through_flow_graph.py b/autotest/test_pass_through_flow_graph.py index e1a5dc70..b966b2cf 100644 --- a/autotest/test_pass_through_flow_graph.py +++ b/autotest/test_pass_through_flow_graph.py @@ -10,7 +10,9 @@ from pywatershed.base.model import Model from pywatershed.base.parameters import Parameters from pywatershed.constants import nan, zero -from pywatershed.hydrology.pass_through_node import PassThroughNodeMaker +from pywatershed.hydrology.pass_through_flow_node import ( + PassThroughFlowNodeMaker, +) from pywatershed.hydrology.prms_channel_flow_graph import ( HruSegmentFlowAdapter, PRMSChannelFlowNodeMaker, @@ -63,8 +65,10 @@ def parameters_flow_graph(parameters_prms, discretization_prms): nnodes = parameters_prms.dims["nsegment"] + 1 node_maker_name = ["prms_channel"] * nnodes node_maker_name[-1] = "pass_throughs" + node_maker_name = np.array(node_maker_name, dtype="U") node_maker_index = np.arange(nnodes) node_maker_index[-1] = 0 + node_maker_id = np.arange(nnodes) to_graph_index = np.zeros(nnodes, dtype=np.int64) dis_params = discretization_prms.parameters to_graph_index[0:-1] = dis_params["tosegment"] - 1 @@ -77,11 +81,11 @@ def parameters_flow_graph(parameters_prms, discretization_prms): ) # have to map to the graph from an index found in prms_channel wh_intervene_above_graph = np.where( - (np.array(node_maker_name) == "prms_channel") + (node_maker_name == "prms_channel") & (node_maker_index == wh_intervene_above_nhm[0][0]) ) wh_intervene_below_graph = np.where( - (np.array(node_maker_name) == "prms_channel") + (node_maker_name == "prms_channel") & np.isin(node_maker_index, wh_intervene_below_nhm) ) @@ -98,12 +102,14 @@ def parameters_flow_graph(parameters_prms, discretization_prms): data_vars={ "node_maker_name": node_maker_name, "node_maker_index": node_maker_index, + "node_maker_id": node_maker_id, "to_graph_index": to_graph_index, }, metadata={ "node_coord": {"dims": ["nnodes"]}, "node_maker_name": {"dims": ["nnodes"]}, "node_maker_index": {"dims": ["nnodes"]}, + "node_maker_id": {"dims": ["nnodes"]}, "to_graph_index": {"dims": ["nnodes"]}, }, validate=True, @@ -117,7 +123,7 @@ def node_maker_dict(parameters_prms, discretization_prms): "prms_channel": PRMSChannelFlowNodeMaker( discretization_prms, parameters_prms ), - "pass_throughs": PassThroughNodeMaker(), + "pass_throughs": PassThroughFlowNodeMaker(), } diff --git a/autotest/test_prms_channel_flow_graph.py b/autotest/test_prms_channel_flow_graph.py index 2c968b14..d1e5c294 100644 --- a/autotest/test_prms_channel_flow_graph.py +++ b/autotest/test_prms_channel_flow_graph.py @@ -3,7 +3,14 @@ import xarray as xr from utils_compare import compare_in_memory -from pywatershed import PRMSChannel +from pywatershed import ( + PassThroughFlowNodeMaker, + PRMSChannel, + PRMSGroundwater, + PRMSRunoff, + PRMSSoilzone, + prms_channel_flow_graph_to_model_dict, +) from pywatershed.base.adapter import AdapterNetcdf, adapter_factory from pywatershed.base.control import Control from pywatershed.base.flow_graph import FlowGraph, inflow_exchange_factory @@ -106,14 +113,16 @@ def test_prms_channel_flow_graph_compare_prms( "node_coord": np.arange(nnodes), }, data_vars={ - "node_maker_name": ["prms_channel"] * nnodes, + "node_maker_name": np.array(["prms_channel"] * nnodes, dtype="U"), "node_maker_index": np.arange(nnodes), + "node_maker_id": np.arange(nnodes), "to_graph_index": discretization.parameters["tosegment"] - 1, }, metadata={ "node_coord": {"dims": ["nnodes"]}, "node_maker_name": {"dims": ["nnodes"]}, "node_maker_index": {"dims": ["nnodes"]}, + "node_maker_id": {"dims": ["nnodes"]}, "to_graph_index": {"dims": ["nnodes"]}, }, validate=True, @@ -209,7 +218,8 @@ def test_hru_segment_flow_exchange( if exchange_type == "hrusegmentflowexchange": Exchange = HruSegmentFlowExchange else: - + # else we implement the exchange by-hand here. + # this is also implemented in channel_flow_graph_to_model_dict def calculation(self) -> None: _hru_segment = self.hru_segment - 1 s_per_time = self.control.time_step_seconds @@ -280,12 +290,14 @@ def calculation(self) -> None: data_vars={ "node_maker_name": ["prms_channel"] * nnodes, "node_maker_index": np.arange(nnodes), + "node_maker_id": np.arange(nnodes), "to_graph_index": discretization.parameters["tosegment"] - 1, }, metadata={ "node_coord": {"dims": ["nnodes"]}, "node_maker_name": {"dims": ["nnodes"]}, "node_maker_index": {"dims": ["nnodes"]}, + "node_maker_id": {"dims": ["nnodes"]}, "to_graph_index": {"dims": ["nnodes"]}, }, validate=True, @@ -381,3 +393,117 @@ def calculation(self) -> None: ) model.finalize() + + +def test_prms_channel_flow_graph_to_model_dict( + simulation, control, discretization, parameters, tmp_path +): + # This also tests the netcdf output contains the correct coordinates, at + # the end. + domain_dir = simulation["dir"] + input_dir = simulation["output_dir"] + run_dir = tmp_path + control.options = control.options | { + "input_dir": input_dir, + "budget_type": "error", + "calc_method": "numba", + "netcdf_output_dir": run_dir, + "netcdf_output_var_names": [ + "node_outflows", + "node_upstream_inflows", + "node_storages", + ], + } + + nhm_processes = [ + # PRMSSnow, # snow introduces significant error + PRMSRunoff, + PRMSSoilzone, + PRMSGroundwater, + ] + + model_dict = { + "control": control, + "dis_both": discretization, + "dis_hru": discretization, + "model_order": [], + } + + # As in notebook 01 + for proc in nhm_processes: + proc_name = proc.__name__ + proc_rename = "prms_" + proc_name[4:].lower() + model_dict["model_order"] += [proc_rename] + model_dict[proc_rename] = {} + proc_dict = model_dict[proc_rename] + proc_dict["class"] = proc + proc_param_file = domain_dir / f"parameters_{proc_name}.nc" + proc_dict["parameters"] = Parameters.from_netcdf(proc_param_file) + proc_dict["dis"] = "dis_hru" + + # randomly sample some seg ids + nsegs = len(discretization.parameters["nhm_seg"]) + rando = ((0, int(nsegs / 2), -1),) + random_seg_ids = discretization.parameters["nhm_seg"][rando] + n_new_nodes = len(random_seg_ids) + + check_names = ["pass"] * n_new_nodes + check_indices = list(range(n_new_nodes)) + check_ids = list(range(n_new_nodes)) + + model_dict = prms_channel_flow_graph_to_model_dict( + model_dict=model_dict, + prms_channel_dis=discretization, + prms_channel_dis_name="dis_both", + prms_channel_params=parameters, + new_nodes_maker_dict={"pass": PassThroughFlowNodeMaker()}, + new_nodes_maker_names=check_names, + new_nodes_maker_indices=check_indices, + new_nodes_maker_ids=check_ids, + new_nodes_flow_to_nhm_seg=random_seg_ids, + graph_budget_type="warn", # move to error + ) + model = Model(model_dict) + + # answers for the exchange + var = "seg_lateral_inflow" + var_pth = simulation["output_dir"] / f"{var}.nc" + lateral_inflow_answers = adapter_factory( + var_pth, variable_name=var, control=control + ) + + for tt in range(control.n_times): + model.advance() + model.calculate() + model.output() + + # check exchange + lateral_inflow_answers.advance() + np.testing.assert_allclose( + model.processes["inflow_exchange"]["inflows"][0:nsegs], + lateral_inflow_answers.current, + rtol=rtol, + atol=atol, + ) + + # < + model.finalize() + + ans_dir = simulation["output_dir"] + outflow_ans = xr.load_dataarray(ans_dir / "seg_outflow.nc") + outflow_act = xr.load_dataarray(run_dir / "node_outflows.nc") + outflow_act_compare = outflow_act[:, 0:(nsegs)] + for tt in range(control.n_times): + np.testing.assert_allclose( + outflow_ans.values[tt, :], + outflow_act_compare.values[tt, :], + rtol=rtol, + atol=atol, + ) + + # check that the coordinates match what was provided + for vv in control.options["netcdf_output_var_names"]: + da = xr.load_dataarray(tmp_path / f"{vv}.nc") + assert da.node_maker_index[-3:].values.tolist() == check_indices + assert da.node_maker_name[-3:].values.tolist() == check_names + assert da.node_maker_id[-3:].values.tolist() == check_ids diff --git a/autotest/test_starfit_flow_graph.py b/autotest/test_starfit_flow_graph.py index 567cce8d..b556899a 100644 --- a/autotest/test_starfit_flow_graph.py +++ b/autotest/test_starfit_flow_graph.py @@ -5,19 +5,31 @@ import xarray as xr from utils_compare import compare_in_memory -from pywatershed import PRMSChannel +from pywatershed import ( + Model, + PRMSChannel, + PRMSGroundwater, + PRMSRunoff, + PRMSSoilzone, +) from pywatershed.base.adapter import adapter_factory from pywatershed.base.control import Control from pywatershed.base.parameters import Parameters from pywatershed.constants import nan, zero -from pywatershed.hydrology.pass_through_node import PassThroughNodeMaker +from pywatershed.hydrology.pass_through_flow_node import ( + PassThroughFlowNodeMaker, +) from pywatershed.hydrology.prms_channel_flow_graph import ( prms_channel_flow_graph_postprocess, + prms_channel_flow_graph_to_model_dict, ) from pywatershed.hydrology.starfit import StarfitFlowNodeMaker from pywatershed.parameters import PrmsParameters -# Purpose: TODO +# Purpose: Test that STARTFIT in a PRMSChannel FlowGraph gives correct results. +# Below the inserted reservoir, we dont have answers, but we'll check +# that the PRMSChannel results are not altered elsewhere on the graph. +# Tests below use "postprocess" and "to_model_dict" helper functions # Todo: add the model_dict run of flowgraph @@ -80,10 +92,13 @@ def big_sandy_parameters(): return Parameters.from_ds(params) -# TODO fixture for daily/hourly -@pytest.mark.parametrize( - "compute_daily", [True, False], ids=("daily", "hourly") +@pytest.fixture( + params=[True, False], ids=("daily", "hourly"), scope="function" ) +def compute_daily(request): + return request.param + + def test_starfit_flow_graph_postprocess( simulation, control, @@ -95,6 +110,43 @@ def test_starfit_flow_graph_postprocess( ): input_dir = simulation["output_dir"] + # We'll test adding multiple new nodes in-series into a FlowGraph. Above + # and below the starfit we'll add pass-through nodes and check these + # match. + + # We add a random passthrough node with no upstream node, to test if that + # works. + + # in this test we'll cover the case of disconnected nodes and allowing + # them or not + def add_disconnected_node_data(ds: xr.Dataset) -> xr.Dataset: + # This just takes data from the first segment and repeats it + # at the end of the array. Editing the values is done outside this + # function. + seg_data = {} + for vv in list(ds.variables): + if "nsegment" in ds[vv].dims: + data = ds[vv].values + seg_data[vv] = (["nsegment"], np.append(data, data[0])) + del ds[vv] + + nhm_seg = seg_data.pop("nhm_seg") + ds_seg = xr.Dataset( + data_vars=seg_data, + coords={"nhm_seg": nhm_seg}, + ) + return xr.merge([ds, ds_seg]) + + dis_ds = add_disconnected_node_data(discretization.to_xr_ds()) + params_ds = add_disconnected_node_data(parameters.to_xr_ds()) + dis_ds.tosegment[-1] = 0 + dis_ds.tosegment_nhm[-1] = 99999 + dis_ds.nhm_seg[-1] = 99998 + params_ds.nhm_seg[-1] = 99998 + + discretization = Parameters.from_ds(dis_ds) + parameters = Parameters.from_ds(params_ds) + # are we testing flowgraph netcdf output elsewhere? # i dont think so, so do it here. # cant really test answers per above note @@ -108,29 +160,49 @@ def test_starfit_flow_graph_postprocess( "node_storages", ] - # Currently this is the same as notebook 06 - flow_graph = prms_channel_flow_graph_postprocess( - control=control, - input_dir=input_dir, - prms_channel_dis=discretization, - prms_channel_params=parameters, - new_nodes_maker_dict={ - "starfit": StarfitFlowNodeMaker( - None, - big_sandy_parameters, - budget_type="error", - compute_daily=compute_daily, - ), - "pass_through": PassThroughNodeMaker(), - }, - new_nodes_maker_names=["starfit", "pass_through"], - new_nodes_maker_indices=[0, 0], - new_nodes_flow_to_nhm_seg=[ - 44426, - 44435, - ], - ) - + new_nodes_maker_names = ["starfit"] + ["pass_through"] * 3 + new_nodes_maker_indices = [0, 0, 1, 2] + new_nodes_maker_ids = [-2, -1, -100, -1000] + # The starfit node flows to the third passthrough node, in index 3. + # The first passthrough node flows to some random nhm_seg, not connected to + # the other new nodes. + # The second passthrough flows to the starfit node in index 0. + # The last passthrough node flows to the seg above which the reservoir + # is placed. + new_nodes_flow_to_nhm_seg = [-3, 44409, 0, 44426] + + # the first in the list is for the disconnected node + check_names = ["prms_channel"] + new_nodes_maker_names + check_indices = [dis_ds.sizes["nsegment"] - 1] + new_nodes_maker_indices + check_ids = [dis_ds.nhm_seg[-1].values.tolist()] + new_nodes_maker_ids + + # This warning should say: TODO + with pytest.warns( + UserWarning, match="Disconnected nodes present in FlowGraph." + ): + flow_graph = prms_channel_flow_graph_postprocess( + control=control, + input_dir=input_dir, + prms_channel_dis=discretization, + prms_channel_params=parameters, + new_nodes_maker_dict={ + "starfit": StarfitFlowNodeMaker( + None, + big_sandy_parameters, + budget_type="error", + compute_daily=compute_daily, + ), + "pass_through": PassThroughFlowNodeMaker(), + }, + new_nodes_maker_names=new_nodes_maker_names, + new_nodes_maker_indices=new_nodes_maker_indices, + new_nodes_maker_ids=new_nodes_maker_ids, + new_nodes_flow_to_nhm_seg=new_nodes_flow_to_nhm_seg, + addtl_output_vars=["spill", "release"], + allow_disconnected_nodes=True, + ) + + # < # get the segments un affected by flow, where the PRMS solutions should # match wh_44426 = np.where(discretization.parameters["nhm_seg"] == 44426)[0][0] @@ -149,6 +221,7 @@ def test_starfit_flow_graph_postprocess( answers = {} for var in PRMSChannel.get_variables(): if var not in rename_vars.keys(): + # this drops the inflow-vars and renames the rest continue new_var = rename_vars[var] var_pth = input_dir / f"{var}.nc" @@ -162,11 +235,212 @@ def test_starfit_flow_graph_postprocess( flow_graph.calculate(1.0) flow_graph.output() - for var in answers.values(): - var.advance() + # use the results/actual as answers on wh_ingore and on the new nodes + if do_compare_in_memory: + for var in answers.values(): + var.advance() + + answers_conv_vol = {} + # The -5 is not -4 because of the synthetic disconnected node also + # present on the FlowGraph (which is added via parameters, not + # using prms_channel_flow_graph_postprocess) + for key, val in answers.items(): + if key in convert_to_vol: + current = val.current / (24 * 60 * 60) + else: + current = val.current + + # < + # Fill the first node, the fake disconnected node, with + # whatever value is in the graph + # current = np.append(current, flow_graph[key][-3:-2]) + # Fill on the downstream reaches of PRMS, where we "ingnore" + # wh_ignore is on the extended dis/params but shouldnt make + # a difference with original since original data in same + # indices with new, disconnected node at the end. + current[(wh_ignore,)] = flow_graph[key][(wh_ignore,)] + # Fill in the last two nodes + answers_conv_vol[key] = np.concatenate( + [current, flow_graph[key][-5:]] + ) + + # << + # there are no expected sources or sinks in this test + answers_conv_vol["node_sink_source"] = np.concatenate( + [val.current * zero, flow_graph["node_sink_source"][-5:]] + ) + answers_conv_vol["node_negative_sink_source"] = np.concatenate( + [ + val.current * zero, + flow_graph["node_negative_sink_source"][-5:], + ] + ) + + answers_conv_vol["node_storages"] = np.concatenate( + [val.current * nan, flow_graph["node_storages"][-5:]] + ) + + compare_in_memory( + flow_graph, + answers_conv_vol, + atol=atol, + rtol=rtol, + skip_missing_ans=False, + fail_after_all_vars=False, + ) + + # this checks that the budget was actually active for the starfit node + assert flow_graph._nodes[-4].budget is not None + + flow_graph.finalize() + for vv in control.options["netcdf_output_var_names"]: + da = xr.load_dataarray(tmp_path / f"{vv}.nc", concat_characters=True) + assert da.node_maker_index[-5:].values.tolist() == check_indices + assert da.node_maker_name[-5:].values.tolist() == check_names + assert da.node_maker_id[-5:].values.tolist() == check_ids + + # test additional output files match and are working + da_no = xr.load_dataarray(tmp_path / "node_outflows.nc") + da_in = xr.load_dataarray(tmp_path / "node_upstream_inflows.nc") + + # full time check of the passthrough nodes (which is probably gratuitious + # given all the above checks already passed. + assert (abs(da_no[:, -1] - da_no[:, -4]) < 1e-12).all() + assert (abs(da_in[:, -2] - da_in[:, -4]) < 1e-12).all() + + da_lr = xr.load_dataarray(tmp_path / "release.nc") + da_ls = xr.load_dataarray(tmp_path / "spill.nc") + + da_no = da_no.where(da_no.node_maker_name == "starfit", drop=True) + da_lr = da_lr.where(da_lr.node_maker_name == "starfit", drop=True) + da_ls = da_ls.where(da_ls.node_maker_name == "starfit", drop=True) + + assert (da_no == da_lr + da_ls).all() + + +def test_starfit_flow_graph_model_dict( + simulation, + control, + discretization, + parameters, + big_sandy_parameters, + compute_daily, + tmp_path, +): + domain_dir = simulation["dir"] + input_dir = simulation["output_dir"] + + # are we testing flowgraph netcdf output elsewhere? + # i dont think so, so do it here. + # cant really test answers per above note + control.options["input_dir"] = input_dir + control.options["budget_type"] = "error" + control.options["verbosity"] = True + # control.options["netcdf_output_dir"] = tmp_path # TODO + control.options["netcdf_output_var_names"] = [ + "node_outflows", + "node_upstream_inflows", + "node_storages", + ] + + nhm_processes = [ + # PRMSSnow, # snow introduces significant error + PRMSRunoff, + PRMSSoilzone, + PRMSGroundwater, + ] + + model_dict = { + "control": control, + "dis_both": discretization, + "dis_hru": discretization, + "model_order": [], + } + + # As in notebook 01 + for proc in nhm_processes: + proc_name = proc.__name__ + proc_rename = "prms_" + proc_name[4:].lower() + model_dict["model_order"] += [proc_rename] + model_dict[proc_rename] = {} + proc_dict = model_dict[proc_rename] + proc_dict["class"] = proc + proc_param_file = domain_dir / f"parameters_{proc_name}.nc" + proc_dict["parameters"] = Parameters.from_netcdf(proc_param_file) + proc_dict["dis"] = "dis_hru" + + # < + new_nodes_maker_dict = { + "starfit": StarfitFlowNodeMaker( + None, + big_sandy_parameters, + budget_type="error", + compute_daily=compute_daily, + ), + "pass_through": PassThroughFlowNodeMaker(), + } + new_nodes_maker_names = ["starfit", "pass_through"] + new_nodes_maker_indices = [0, 0] + new_nodes_maker_ids = [-2, -1] + new_nodes_flow_to_nhm_seg = [ + 44426, + 44435, + ] + + model_dict = prms_channel_flow_graph_to_model_dict( + model_dict=model_dict, + prms_channel_dis=discretization, + prms_channel_dis_name="dis_both", + prms_channel_params=parameters, + new_nodes_maker_dict=new_nodes_maker_dict, + new_nodes_maker_names=new_nodes_maker_names, + new_nodes_maker_indices=new_nodes_maker_indices, + new_nodes_maker_ids=new_nodes_maker_ids, + new_nodes_flow_to_nhm_seg=new_nodes_flow_to_nhm_seg, + graph_budget_type="error", # move to error + addtl_output_vars=["spill", "release"], + ) + model = Model(model_dict) + + # on the segments unaffected by flow, the PRMS solutions should match + # so "ignore" the affected segments, we'll use the results as answers later + wh_44426 = np.where(discretization.parameters["nhm_seg"] == 44426)[0][0] + upind = wh_44426 + wh_ignore = [] + toseg = discretization.parameters["tosegment"] - 1 + while upind >= 0: + downind = toseg[upind] + wh_ignore += [upind] + upind = downind + + if do_compare_output_files: + # not really feasible as noted by NB section at top + model.initialize_netcdf(tmp_path, separate_files=False) + + if do_compare_in_memory: + answers = {} + for var in PRMSChannel.get_variables(): + if var not in rename_vars.keys(): + continue + new_var = rename_vars[var] + var_pth = input_dir / f"{var}.nc" + answers[new_var] = adapter_factory( + var_pth, variable_name=var, control=control + ) + + # short cut + flow_graph = model.processes["prms_channel_flow_graph"] + + for istep in range(control.n_times): + model.advance() + model.calculate() + model.output() # use the results as answers on wh_ingore and on the new nodes if do_compare_in_memory: + for var in answers.values(): + var.advance() + answers_conv_vol = {} for key, val in answers.items(): if key in convert_to_vol: @@ -205,9 +479,17 @@ def test_starfit_flow_graph_postprocess( rtol=rtol, skip_missing_ans=False, fail_after_all_vars=False, + verbose=False, ) # this checks that the budget was actually active for the starfit node assert flow_graph._nodes[-2].budget is not None flow_graph.finalize() + + # test single file output has extra coords and additional vars + ds = xr.open_dataset(tmp_path / "FlowGraph.nc") + ds_starfit = ds.where(ds.node_maker_name == "starfit", drop=True) + assert ( + ds_starfit.node_outflows == ds_starfit.release + ds_starfit.spill + ).all() diff --git a/autotest/test_starfit_flow_node.py b/autotest/test_starfit_flow_node.py index 4667006b..bf7fc17a 100644 --- a/autotest/test_starfit_flow_node.py +++ b/autotest/test_starfit_flow_node.py @@ -13,7 +13,10 @@ from pywatershed.parameters import Parameters, StarfitParameters # NB: -# Here we are comparing a daily starfit against an hourly StarfitNode. +# Here we are comparing a daily offline starfit against an hourly +# StarfitNode. The reference output is the mean value from offline runs run +# from 1995-2001 in the file +# ../test_data/starfit/starfit_mean_output_1995-2001.nc # We only advance the hourly StarfitNode one substepper day. It's # resulting flow rates are identical but the change in storage is 1/24 # of the daily value, so we check this. We have to track previous storage @@ -23,18 +26,18 @@ do_compare_in_memory = True rtol = atol = 1.0e-7 -end_time = (np.datetime64("2001-12-31 00:00:00"),) +end_time = np.datetime64("2001-12-31 00:00:00") # This is 117 reservoirs where # (parameters_ds.start_time == np.datetime64("1995-01-01 00:00:00")) # & (parameters_ds.end_time >= np.datetime64("2001-12-31 00:00:00")) # fmt: off starfit_inds_test = [ - 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, - 15, 16, 18, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, - 31, 32, 33, 36, 37, 38, 40, 43, 44, 47, 48, 49, 51, - 52, 53, 55, 56, 59, 62, 63, 64, 65, 67, 68, 69, 70, - 71, 72, 74, 75, 76, 77, 86, 87, 89, 90, 91, 92, 93, - 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, + 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, + 15, 16, 18, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, + 31, 32, 33, 36, 37, 38, 40, 43, 44, 47, 48, 49, 51, + 52, 53, 55, 56, 59, 62, 63, 64, 65, 67, 68, 69, 70, + 71, 72, 74, 75, 76, 77, 86, 87, 89, 90, 91, 92, 93, + 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 122, 123, 130, 134, 137, 139, 140, 141, 145, 148, 149, 152, 154, 155, 156, 157, 158, 159, 160, 161, 162, 164, 165, 166 @@ -156,7 +159,7 @@ def advance(self) -> None: # fill up timeseries arrays for ii, si in enumerate(starfit_inds_test): for var in results.keys(): - results[var][istep, ii] = nodes[ii][f"_{var}"] + results[var][istep, ii] = nodes[ii][f"_{var}"][0] for var in results.keys(): actual = results[var].mean(0) diff --git a/code.json b/code.json new file mode 100644 index 00000000..895dc256 --- /dev/null +++ b/code.json @@ -0,0 +1,39 @@ +[ + { + "status": "Preliminary", + "languages": [ + "Python", + "Fortran" + ], + "repositoryURL": "https://code.usgs.gov/wma-enterprisecapacity/pywatershed", + "disclaimerURL": "https://code.usgs.gov/wma-enterprisecapacity/pywatershed/-/blob/main/DISCLAIMER.md", + "tags": [ + "pywatershed", + "hydrologic model" + ], + "vcs": "git", + "name": "pywatershed", + "downloadURL": "https://code.usgs.gov/wma-enterprisecapacity/pywatershed/-/archive/1.0.0/pywatershed-1.0.0.zip", + "contact": { + "name": "James L. McCreight", + "email": "jmccreight@usgs.gov" + }, + "laborHours": -1, + "version": "1.0.0", + "date": { + "metadataLastUpdated": "2023-12-22" + }, + "organization": "U.S. Geological Survey", + "permissions": { + "licenses": [ + { + "URL": "https://code.usgs.gov/wma-enterprisecapacity/pywatershed/-/blob/main/LICENSE", + "name": "Public Domain, CC0-1.0" + } + ], + "usageType": "openSource" + }, + "homepageURL": "https://code.usgs.gov/wma-enterprisecapacity/pywatershed", + "description": "A hydrologic model in Python." + } +] diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7725a26e..1a64e8e4 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -35,7 +35,7 @@ New Features (DFW) routing from PRMS NHM input files and a few simple assumptions. The lateral (to-channel) fluxes from a PRMS are used as time varying boundary conditions. A new notebook runs the Delaware River Basin using MF6 DFW: - `examples/mmr_to_mf6_dfw.ipynb `__. + `examples/07_mmr_to_mf6_chf_dfw.ipynb `__. (:pull:`290`) By `James McCreight `_. - The depression storage option for PRMSRunoff is implemented and tested. (:pull:`279`) By `James McCreight `_. @@ -96,6 +96,8 @@ Internal changes PRMSGroundwater: 1.0e-8, PRMSGroundwaterNoDprst: 1.0e-8, PRMSChannel: 5.0e-7) for all test domains. (:pull:`288`) By `James McCreight `_. +- Migration to Numpy 2.0+. + (:pull:`310`) By `James McCreight `_. .. _whats-new.1.1.0: diff --git a/environment.yml b/environment.yml index 2a618212..11fd716f 100644 --- a/environment.yml +++ b/environment.yml @@ -24,7 +24,7 @@ dependencies: - nbconvert - netCDF4 - networkx - - numpy<2.0.0 + - numpy>=2.0.0 - numba - pandas>=1.4.0 - pint @@ -44,10 +44,11 @@ dependencies: - sphinx-autosummary-accessors - sphinx-copybutton - tqdm - - xarray>=2023.05.0 + - xarray>=2024.06.0 - pip: - asv - click != 8.1.0 + - filelock - git+https://github.com/modflowpy/flopy.git - jupyter_black - modflow-devtools diff --git a/environment_w_jupyter.yml b/environment_w_jupyter.yml index a3c31b3f..92755151 100644 --- a/environment_w_jupyter.yml +++ b/environment_w_jupyter.yml @@ -26,7 +26,7 @@ dependencies: - nbconvert - netCDF4 - networkx - - numpy<2.0.0 + - numpy>=2.0.0 - numba - pandas>=1.4.0 - pint @@ -46,7 +46,7 @@ dependencies: - sphinx-autosummary-accessors - sphinx-copybutton - tqdm - - xarray>=2023.05.0 + - xarray>=2024.06.0 - pip: - asv - click != 8.1.0 diff --git a/evaluation/performance/prms_5.2.1_performance.ipynb b/evaluation/performance/prms_5.2.1_performance.ipynb index b8ca94ae..d566a33a 100644 --- a/evaluation/performance/prms_5.2.1_performance.ipynb +++ b/evaluation/performance/prms_5.2.1_performance.ipynb @@ -184,7 +184,7 @@ "outputs": [], "source": [ "results2 = {}\n", - "files = pl.Path('/Users/jamesmcc/usgs/data/pynhm/performance_runs/results/').glob('*.pkl')\n", + "files = pl.Path('../../../data/pynhm/performance_runs/results/').glob('*.pkl')\n", "for ff in files: \n", " print(ff)\n", " with open(ff, \"rb\") as input_file:\n", diff --git a/examples/01_multi-process_models.ipynb b/examples/01_multi-process_models.ipynb index eb04f9ed..ccf3665d 100644 --- a/examples/01_multi-process_models.ipynb +++ b/examples/01_multi-process_models.ipynb @@ -12,7 +12,7 @@ "source": [ "# Multi-process models in pywatershed\n", "\n", - "In notebook `00_processes.ipynb`, we looked at how and individual Process representations work and are designed. In this notebook we learn how to put multiple Processes together into composite models using the `Model` class. \n", + "In notebook `00_processes.ipynb`, we looked at how individual Process representations work and are designed. In this notebook we learn how to put multiple Processes together into composite models using the `Model` class. \n", "\n", "The starting point for the development of `pywatershed` was the National Hydrologic Model (NHM, Regan et al., 2018) configuration of the Precipitation-Runoff Modeling System (PRMS, Regan et al., 2015). In this notebook, we'll first construct a full NHM configuration. The spatial domain we'll use will again be the Delaware River Basin. Once we construct the full NHM, we'll look at how we can also construct sub-models of the NHM.\n", "\n", @@ -58,7 +58,7 @@ "source": [ "## Domain Plot to get to know the area\n", "\n", - "Before diving in to pywatershed models, let's use one of its built-in tools to get familiar with the application domain. We'll combine the GIS files for the HRUs and the Segments in this domain with their parameters to learn more about how the model represents quantities in pyhiscal space. Please zoom in and out and select different layers. We aim to add more functionality to this plot over time, stay tuned." + "Before diving in to pywatershed models, let's use one of its built-in tools to get familiar with the application domain. We'll combine the GIS files for the HRUs and the Segments in this domain with their parameters to learn more about how the model represents quantities in physical space. Please zoom in and out and select different layers. We aim to add more functionality to this plot over time, stay tuned." ] }, { @@ -1188,7 +1188,7 @@ "id": "c787a163-c4dd-4826-b0f2-73e1b006c081", "metadata": {}, "source": [ - "We'll, that saved us some time. The run is similar to before, just using fewer processes. \n", + "Well, that saved us some time. The run is similar to before, just using fewer processes. \n", "\n", "The final time is still in memory. We can take a look at, say, recharge. Before plotting, let's take a look at the data and the metadata for recharge a bit closer." ] diff --git a/examples/02_prms_legacy_models.ipynb b/examples/02_prms_legacy_models.ipynb index 9ea2aa23..a0cbcc87 100644 --- a/examples/02_prms_legacy_models.ipynb +++ b/examples/02_prms_legacy_models.ipynb @@ -14,7 +14,7 @@ "\n", "Because pywatershed has its roots in the Precipitation-Runoff Modeling System (PRMS, Regan et al., 2015), pywatershed supports PRMS-model instantation from legacy PRMS input files. The traditional PRMS input files are the control file, the parameter file, and the climate-by-hru (CBH) files (typically daily precipitation and maximum and minimum temperatures). While the CBH files need to be pre-processed to NetCDF format, native PRMS control and parameter files are supported. \n", "\n", - "Below we'll show how to preprocess the CBH files to NetCDF and how to instantiate a pywatershed `Model` using PRMS-native files. In this notebook we'll reproduce the basic results from the previous notebook (`01_multi-process_models.ipynb`) for the NHM full model and its submodel. As in the previous notebooks, run PRMS processes models on the Delaware River Basin (DRB) subdomain of the NHM for a 2 year period using `pywatershed`.\n", + "Below we'll show how to preprocess the CBH files to NetCDF and how to instantiate a pywatershed `Model` using PRMS-native files. In this notebook we'll reproduce the basic results from the previous notebook (`01_multi-process_models.ipynb`) for the NHM full model and its submodel. As in the previous notebooks, this example will run PRMS processes models on the Delaware River Basin (DRB) subdomain of the NHM for a 2 year period using `pywatershed`.\n", "\n", "## Prerequisites" ] @@ -195,7 +195,9 @@ "metadata": {}, "outputs": [], "source": [ - "control = pws.Control.load_prms(domain_dir / \"nhm.control\", warn_unused_options=False)\n", + "control = pws.Control.load_prms(\n", + " domain_dir / \"nhm.control\", warn_unused_options=False\n", + ")\n", "\n", "control" ] diff --git a/examples/04_preprocess_atm.ipynb b/examples/04_preprocess_atm.ipynb index bcf9f483..da552a98 100644 --- a/examples/04_preprocess_atm.ipynb +++ b/examples/04_preprocess_atm.ipynb @@ -119,7 +119,7 @@ "metadata": {}, "source": [ "## Write solar geometry files\n", - "Below we'll demonstrated using an active instance of `PRMSSolarGeom` and also using its static output to drive `PRMSAtmosphere`. Here we create the static output that we need for `PRMSSolarGeom` in the second case." + "Below we'll demonstrate using an active instance of `PRMSSolarGeom` and also using its static output to drive `PRMSAtmosphere`. Here we create the static output that we need for `PRMSSolarGeom` in the second case." ] }, { diff --git a/examples/06_flow_graph_starfit.ipynb b/examples/06_flow_graph_starfit.ipynb index 3b315c47..b44385fc 100644 --- a/examples/06_flow_graph_starfit.ipynb +++ b/examples/06_flow_graph_starfit.ipynb @@ -8,13 +8,15 @@ "# PRMSChannel FlowGraph with a STARFIT Reservoir: Big Sandy Reservoir\n", "\n", "This notebook demonstrates the capabilities of the `FlowGraph` class and its associated classes\n", - "`FlowNode` and `FlowNodeMaker` in a real-world example. This example starts from an existing\n", - "flow graph which is in `PRMSChannel` and adds in a single new node to represent a reservoir\n", - "within the `PRMSChannel` simulation. The `FlowGraph` is the class which is able to take different\n", - "flow methods and combine them in user-specified ways. In this case we combine nodes of class\n", - "`PRMSChannelFlowNode` with one node of class `StarfitFlowNode`. \n", + "`FlowNode` and `FlowNodeMaker` in a real-world example. This example starts from an existing graph of \n", + "flow, embedded in `PRMSChannel` and its parameters, and adds in a single new node to represent a reservoir\n", + "within the `PRMSChannel` simulation. \n", "\n", - "Please see these links to the documentation for more details on \n", + "The `FlowGraph` is the class which is able to take different flow methods and combine them in \n", + "user-specified ways. In this case we combine nodes of class `PRMSChannelFlowNode` (a re-expression\n", + "of `PRMSChannel` as a `FlowNode` to work with `FlowGraph`) with one node of class `StarfitFlowNode`. \n", + "\n", + "Please see these links to the documentation for more details on each:\n", "[`FlowGraph`](https://pywatershed.readthedocs.io/en/latest/api/generated/pywatershed.FlowGraph.html), \n", "[`StarfitFlowNode`](https://pywatershed.readthedocs.io/en/latest/api/generated/pywatershed.StarfitFlowNode.html), and \n", "[`PRMSChannelFlowNode`](https://pywatershed.readthedocs.io/en/latest/api/generated/pywatershed.PRMSChannelFlowNode.html)." @@ -82,6 +84,29 @@ "Let's get to know something about this domain and reservoir. We'll load the full Global Reservoir and Dam (GRanD) data set and pull out the row for Big Sandy. " ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "c60dd131-c4d9-49cf-a376-556db6c23935", + "metadata": {}, + "outputs": [], + "source": [ + "# Below in this cell is how one would get parameters for reservoirs in the ISTARF-CONUS database.\n", + "# We use pre-canned parameters and do not do this here because the GRanD file\n", + "# must be downloaded manually from\n", + "# https://ln.sync.com/dl/bd47eb6b0/anhxaikr-62pmrgtq-k44xf84f-pyz4atkm/view/default/447819520013\n", + "# unpacked and placed in the location below, which can not be automated\n", + "# for testing this notebook.\n", + "# param_src_dir = nb_output_dir / \"param_sources\"\n", + "# param_src_dir.mkdir(exist_ok=True)\n", + "# grand_file = param_src_dir / \"GRanD_Version_1_3/GRanD_reservoirs_v1_3.dbf\"\n", + "# sf_params = pws.parameters.StarfitParameters.from_istarf_conus_grand(\n", + "# grand_file=grand_file,\n", + "# files_directory=param_src_dir,\n", + "# grand_ids=[419], # the id for Big Sandy\n", + "# )" + ] + }, { "cell_type": "code", "execution_count": null, @@ -147,9 +172,7 @@ "metadata": {}, "outputs": [], "source": [ - "# add GRanD shp file? or add to the object afterwards? option to get polygons\n", - "# for sf_params above? but how to show connectivity?\n", - "DomainPlot(\n", + "domain_plot = DomainPlot(\n", " hru_shp_file=shp_file_hru,\n", " segment_shp_file=shp_file_seg,\n", " hru_parameters=domain_dir / \"parameters_dis_hru.nc\",\n", @@ -178,7 +201,7 @@ "id": "1170d616-a760-455d-a88f-19ffc2205121", "metadata": {}, "source": [ - "From the above, by mousing over the segments we can see the reservoir should be inserted above nhm_seg 44426 and below nhm_segs 44434 and 44435. \n", + "From the above, by mousing over the segments we can see the reservoir would be inserted above nhm_seg 44426 and below nhm_segs 44434 and 44435. \n", "\n", "For more context, zooming out shows the full Flaming Gorge Domain on the Green River. The openstreetmap layers shows that Big Sandy Dike is located near Farson, WY. In the EsriSatellite layer, we observe this is a very dry, high plains region with farming downstream of the Big Sandy and Eden reservoirs around Farson. We can also see that the reservoir is fed by snowpack and seasonal runoff from the high Wind River Range to the Northeast. The photo of Arrowhead Lake below (taken by the author in August 2023) looks southeast at Temple Mountain, across the furthest upstream HRU of the Big Sandy Dike. \n", "![Arrowhead Lake, August 2023](static/arrowhead_lake.jpg)" @@ -190,8 +213,7 @@ "metadata": {}, "source": [ "## NHM Run on Flaming Gorge Domain: NO Big Sandy\n", - "The NHM does not represent any reservoirs. From above, we'll assume the outflows of Big Sandy are on segment 44426. We'll see how the NHM represents flow at Big Sandy.\n", - "We can run pywatershed using the \"legacy instantation\" as described in Notebook 02." + "The NHM does not represent any reservoirs. From the above plot, we'll assume the outflows of Big Sandy would be at segment 44426. Let's see how the NHM represents flow at this location, without any reservoir representation. We can run pywatershed using the \"legacy instantation\" as described in Notebook 02." ] }, { @@ -203,33 +225,11 @@ "source": [ "control = pws.Control.load_prms(control_file, warn_unused_options=False)\n", "control.edit_n_time_steps(ndays_run)\n", + "\n", "parameter_file = domain_dir / control.options[\"parameter_file\"]\n", "params = pws.parameters.PrmsParameters.load(parameter_file)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "82ba984f-3402-416d-8800-0bee97158219", - "metadata": {}, - "outputs": [], - "source": [ - "# domain_dir = pl.Path(\"/Users/jmccreight/usgs/data/pynhm/fgr_2yr\")\n", - "# # run just once to convert CBH/day forcing files to pywatershed, NetCDF format\n", - "# cbh_nc_dir = domain_dir\n", - "# cbh_files = [\n", - "# domain_dir / \"prcp_2yr.cbh\",\n", - "# domain_dir / \"tmax_2yr.cbh\",\n", - "# domain_dir / \"tmin_2yr.cbh\",\n", - "# ]\n", - "\n", - "# params = pws.parameters.PrmsParameters.load(domain_dir / \"myparam.param\")\n", - "\n", - "# for cbh_file in cbh_files:\n", - "# out_file = cbh_nc_dir / cbh_file.with_suffix(\".nc\").name\n", - "# pws.utils.cbh_file_to_netcdf(cbh_file, params, out_file)" - ] - }, { "cell_type": "code", "execution_count": null, @@ -308,7 +308,7 @@ { "cell_type": "code", "execution_count": null, - "id": "54058459-fed5-4cc3-bed2-8a3edfb22a13", + "id": "b5905f87-c4d7-47fe-ba52-cb51be05a4c2", "metadata": {}, "outputs": [], "source": [ @@ -318,10 +318,10 @@ " .rename(\"modeled\")\n", " .to_dataframe()[\"modeled\"]\n", ")\n", - "obs_all = pyPRMS.Streamflow(domain_dir / \"sf_data\")\n", + "obs_all = pyPRMS.DataFile(domain_dir / \"sf_data\").data_by_variable(\"runoff\")\n", "wh_poi_obs = np.where(params.parameters[\"poi_gage_segment\"] == 184)\n", "gage_id = params.parameters[\"poi_gage_id\"][wh_poi_obs][0]\n", - "obs = obs_all.data[gage_id]\n", + "obs = obs_all[f\"runoff_{gage_id}\"]\n", "obs.rename(\"gage \" + obs.name, inplace=True)\n", "\n", "outflow_obs.hvplot() * obs[0 : (365 * 2)].hvplot()" @@ -334,7 +334,7 @@ "source": [ "## FlowGraph in Model: NHM with a STARFIT representation of Big Sandy\n", "\n", - "Because FlowGraph is not part of PRMS, we cant run FlowGraph with PRMS/NHM using the legacy instantiation (eg. notebook 02). We have to use a multi-process model, the pywatershed way (e.g. notebook 01). The next three cells build the multi-process model above the FlowGraph. We then use a helper function to insert the STARFIT resevoir into the PRMS/NHM Muskingum-Mann channel routing and append it to our multi-process model." + "Because FlowGraph is not part of PRMS, we cant run FlowGraph with PRMS/NHM using the legacy instantiation (eg. notebook 02). We have to use a multi-process model, and set it up \"the pywatershed way\" (as described in notebook 01). The next three cells build the multi-process model which flows into the FlowGraph. We then use a helper function to insert the STARFIT resevoir into the `FlowNode` representation of the PRMS/NHM Muskingum-Mann channel routing and we append this `FlowGraph` to our multi-process model." ] }, { @@ -433,9 +433,9 @@ "id": "f229c613-56fe-49c0-b211-f0f047eddc57", "metadata": {}, "source": [ - "Now we have a model dictionary describing everything above the `PRMSChannel` (Musking-Mann). We have a very nice helper function, `prms_channel_flow_graph_to_model_dict`, we can use to add a `FlowGraph` to this model. The function takes the existing `model_dict`, the `PRMSChannel` information, plus additional user-supplied information, to construct a `FlowGraph` with a new `StarfitFlowNode` inserted in the `PRMSChannel` at the location above nhm segment 44426 (and below 44434 and 44435) to represent Big Sandy. This `FlowGraph` instance is added to the `model_dict` by name \"prms_channel_flow_graph\". \n", + "Now we have a model dictionary describing all the processes which flow into the `PRMSChannel` (Musking-Mann). We have a very nice helper function, `prms_channel_flow_graph_to_model_dict`, we can use to add a `FlowGraph` to this model. The function takes the existing `model_dict`, the `PRMSChannel` data, plus additional user-supplied information, to construct a `FlowGraph` new nodes inserted in to the `PRMSChannel`. In this case we'll add a single new node to the `PMRSChannel`, this will be a `StarfitFlowNode` inserted at the location above nhm segment 44426 (and below 44434 and 44435) to represent the Big Sandy dike. This `FlowGraph` instance is finaly added to the `model_dict` with the name \"prms_channel_flow_graph\". \n", "\n", - "The function will also add an `InflowExchange` instance to the `model_dict` named \"inflow_exchange\" which will manage getting the fluxes from PRMS to the FlowGraph. Zero lateral flows are supplied to the StarfitNode for Big Sandy in this case (though we could do otherwise)." + "We'll see that the `prms_channel_flow_graph_to_model_dict` helper function will also add an `InflowExchange` instance to the `model_dict` named \"inflow_exchange\". This `InflowExchange` which will manage getting the fluxes from the other process into to the FlowGraph. Zero lateral flows are supplied to the StarfitNode for Big Sandy in this case (though we could do otherwise)." ] }, { @@ -460,6 +460,7 @@ " },\n", " new_nodes_maker_names=[\"starfit\"],\n", " new_nodes_maker_indices=[0],\n", + " new_nodes_maker_ids=[999],\n", " new_nodes_flow_to_nhm_seg=[44426],\n", " graph_budget_type=\"warn\", # move to error\n", ")" @@ -488,17 +489,9 @@ " run_dir.mkdir()\n", " model = pws.Model(model_dict)\n", " model.run()\n", - " model.finalize()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0b8e0ec9-340c-4668-b483-9df90f8f5936", - "metadata": {}, - "outputs": [], - "source": [ - "model.processes[\"prms_channel_flow_graph\"].budget" + " model.finalize()\n", + "\n", + " model.processes[\"prms_channel_flow_graph\"].budget" ] }, { @@ -509,9 +502,8 @@ "outputs": [], "source": [ "wh_44426 = np.where(params.parameters[\"nhm_seg\"] == 44426)[0]\n", - "outflow_nodes = xr.open_dataarray(run_dir / \"node_outflows.nc\")[\n", - " :, wh_44426\n", - "].drop_vars(\"node_coord\")" + "outflow_nodes = xr.open_dataarray(run_dir / \"node_outflows.nc\")[:, wh_44426]\n", + "outflow_nodes = outflow_nodes.drop_vars(set(outflow_nodes.coords) - {\"time\"})" ] }, { @@ -537,9 +529,8 @@ "metadata": {}, "outputs": [], "source": [ - "storage_nodes = xr.open_dataarray(run_dir / \"node_storages.nc\")[\n", - " :, -1\n", - "].drop_vars(\"node_coord\")\n", + "storage_nodes = xr.open_dataarray(run_dir / \"node_storages.nc\")[:, -1]\n", + "storage_nodes = storage_nodes.drop_vars(set(storage_nodes.coords) - {\"time\"})\n", "storage_nodes.hvplot(width=plot_width, height=plot_height)" ] }, @@ -557,9 +548,11 @@ " \"node_storages\": \"Big Sandy Storage\",\n", " }\n", ").hvplot(\n", - " width=plot_width,\n", - " height=plot_height,\n", + " width=int(plot_width / 1.25),\n", + " height=int(plot_height / 1.3 / 1.2),\n", " ylabel=\"streamflow (cfs)\\nstorage (million cubic feet)\",\n", + ").opts(\n", + " legend_position=\"top_left\"\n", ")" ] }, @@ -569,11 +562,13 @@ "metadata": {}, "source": [ "## FlowGraph as a post-process: Drive FlowGraph with STARFIT representation of Big Sandy and Pass-Through using NHM output files\n", - "Above we ran the full NHM with a `StarfitNode` at Big Sandy. But pywatershed is flexible and in the NHM configuration no two process representations are two-way coupled. See [figure in the extended release notes](https://ec-usgs.github.io/pywatershed/assets/img/pywatershed_NHM_model_graph.png). (Note that some PRMS configurations in pywatershed can be two-way coupled between Runoff and Soilzone and/or Canopy and Snow.) In this case, the `PRMSChannel` is one-way coupled (forced) buy the rest of the model. So we could use the output of the first, NHM run above without any reservoir representation and use its outupts to drive just the `FlowGraph` in the run above. We might call running `FlowGraph` in this way a \"post-process\". If one were running the no-reservoir model and looking at hypotheses of what FlowGraphs give better flow representations, this is the method you'd want to follow.\n", + "Above we ran the equivalent of the full NHM but with a `StarfitNode` inserted at Big Sandy. Pywatershed is flexible and no two process representations are two-way coupled in the NHM configuration. This means that the `FlowGraph` in the model run above could be run as a post-process on the rest of the model chain.\n", + "\n", + "In fact, we can use the output of the first model run above, without any reservoir representation, to drive just the `FlowGraph` in the previous run. We call running `FlowGraph` in this way a \"post-process\". If one were running the no-reservoir model and investigating hypotheses of what FlowGraph designs give better flow representations, this is the method you'd want to follow instead of running all the model processes above `FlowGraph` every time.\n", "\n", - "So for this case we have a different helper function, `prms_channel_flow_graph_postprocess`, to which we supply most of the same information about the `FlowGraph`. However, we tell it about where it can find inputs from file rather than about an existing `model_dict` (as above).\n", + "For this post-process case we have a different helper function, `prms_channel_flow_graph_postprocess`, to which we supply most of the same information about the `FlowGraph`. However, we tell it about where it can find inputs from file rather than about an existing `model_dict` (as in the previous model above).\n", "\n", - "For additional extra fun and illustration, we'll not only add the `StarfitNode` for Big Sandy, we'll demonstrate that we can add additional nodes to the `FlowGraph` by putting a random `PassThroughNode` elsewhere on the domain. This node has no effect on the flows by design, but adding it here shows how additional nodes can easily be added to a `FlowGraph`." + "For extra fun and illustration, we'll not only add the `StarfitNode` for Big Sandy, we'll demonstrate that we can add additional nodes to the `FlowGraph` by putting a random `PassThroughFlowNode` elsewhere on the domain. This node has no effect on the flows by design, but adding it here shows how additional nodes can easily be added to a `FlowGraph`." ] }, { @@ -642,14 +637,16 @@ " compute_daily=False,\n", " budget_type=\"error\",\n", " ),\n", - " \"pass_through\": pws.hydrology.pass_through_node.PassThroughNodeMaker(),\n", + " \"pass_through\": pws.hydrology.pass_through_flow_node.PassThroughFlowNodeMaker(),\n", " },\n", " new_nodes_maker_names=[\"starfit\", \"pass_through\"],\n", " new_nodes_maker_indices=[0, 0], # relative to the indvidual NodeMakers\n", + " new_nodes_maker_ids=[999, 9999], # relative to the indvidual NodeMakers\n", " new_nodes_flow_to_nhm_seg=[\n", " 44426,\n", " 44435,\n", " ], # the second is a pass through above the first\n", + " addtl_output_vars=[\"spill\", \"release\"],\n", ")" ] }, @@ -670,7 +667,8 @@ " flow_graph.calculate(1.0)\n", " flow_graph.output()\n", "\n", - " flow_graph.finalize()" + " flow_graph.finalize()\n", + " flow_graph.budget" ] }, { @@ -680,11 +678,16 @@ "metadata": {}, "outputs": [], "source": [ - "wh_44426 = np.where(params.parameters[\"nhm_seg\"] == 44426)[0]\n", - "outflow_nodes_post = (\n", - " xr.open_dataarray(run_dir / \"node_outflows.nc\")[:, wh_44426]\n", - " .drop_vars(\"node_coord\")\n", - " .rename(\"node_outflows_post\")\n", + "# wh_44426 = np.where(params.parameters[\"nhm_seg\"] == 44426)[0]\n", + "outflow_nodes_post = xr.open_dataarray(run_dir / \"node_outflows.nc\")\n", + "wh_big_sandy = (outflow_nodes_post.node_maker_id == 999) & (\n", + " outflow_nodes_post.node_maker_name == \"starfit\"\n", + ")\n", + "outflow_nodes_post = outflow_nodes_post[:, wh_big_sandy].rename(\n", + " \"node_outflows_post\"\n", + ")\n", + "outflow_nodes_post = outflow_nodes_post.drop_vars(\n", + " set(outflow_nodes_post.coords) - {\"time\"}\n", ")" ] }, @@ -717,12 +720,15 @@ "metadata": {}, "outputs": [], "source": [ - "storage_nodes_post = (\n", - " xr.open_dataarray(run_dir / \"node_storages.nc\")[\n", - " :, -2\n", - " ] # pass through is the last node this time\n", - " .drop_vars(\"node_coord\")\n", - " .rename(\"node_storages_post\")\n", + "storage_nodes_post = xr.open_dataarray(run_dir / \"node_storages.nc\")\n", + "wh_big_sandy = (storage_nodes_post.node_maker_id == 999) & (\n", + " storage_nodes_post.node_maker_name == \"starfit\"\n", + ")\n", + "storage_nodes_post = storage_nodes_post[:, wh_big_sandy].rename(\n", + " \"node_storages_post\"\n", + ")\n", + "storage_nodes_post = storage_nodes_post.drop_vars(\n", + " set(storage_nodes_post.coords) - {\"time\"}\n", ")\n", "xr.merge(\n", " [\n", @@ -765,6 +771,54 @@ " ylabel=\"streamflow (cfs)\\nstorage (million cubic feet)\",\n", ")" ] + }, + { + "cell_type": "markdown", + "id": "7c74f780-a421-441b-a7ee-f2ff24b759ee", + "metadata": {}, + "source": [ + "While the `FlowGraph` itself only looks at lateral inflows, upstream inflows, and total outflows at each node, there may be other variables of interest on a node for the user. Looking at the properties or attributes of an individual `FlowNode` reveals what other variables are available for each node of that type. Above, the argument `addtl_output_vars=[\"spill\", \"release\"]` was passed in the call to `pws.prms_channel_flow_graph_postprocess`. This requests that these variables are output to NetCDF files on nodes where they are available. Nodes where these variables are not available will contain missing (NaN) values. In the case of `StarfitFlowNode`s, the total outflow has two components, the spill and the release. From the node outflow variable, we can not see the individual contribution of these terms. So we request these variables are output and we see that in the second summer (June 13-16, 1980) there is indeed a spill event on Big Sandy which contributes to the total outflow." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb9cf577-85e6-49ce-8a66-8862beb5aeca", + "metadata": {}, + "outputs": [], + "source": [ + "spill = xr.open_dataarray(run_dir / \"spill.nc\")[:, wh_big_sandy]\n", + "release = xr.open_dataarray(run_dir / \"release.nc\")[:, wh_big_sandy]\n", + "drop_vars = set(spill.coords) - {\"time\"}\n", + "spill = spill.drop_vars(drop_vars)\n", + "release = release.drop_vars(drop_vars)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6f20212-0e10-4b75-9533-62002324d4e5", + "metadata": {}, + "outputs": [], + "source": [ + "xr.merge(\n", + " [\n", + " outflow_nodes_post,\n", + " spill,\n", + " release,\n", + " ]\n", + ").rename(\n", + " {\n", + " \"node_outflows_post\": f\"Big Sandy Outflow CAP*{cap_mult}\",\n", + " \"spill\": f\"Big Sandy Spill CAP*{cap_mult}\",\n", + " \"release\": f\"Big Sandy Release CAP*{cap_mult}\",\n", + " }\n", + ").hvplot(\n", + " width=plot_width,\n", + " height=plot_height,\n", + " ylabel=\"streamflow (cfs)\",\n", + ")" + ] } ], "metadata": { diff --git a/examples/07_cascading_flow.ipynb b/examples/07_cascading_flow.ipynb deleted file mode 100644 index 6cf36bd7..00000000 --- a/examples/07_cascading_flow.ipynb +++ /dev/null @@ -1,214 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "b449f2f4-ca8f-4c8b-9960-40c5dfeaf6d4", - "metadata": {}, - "outputs": [], - "source": [ - "import pathlib as pl\n", - "import pywatershed as pws" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4fbe74df-01ff-4bb4-9daa-999364323cb8", - "metadata": {}, - "outputs": [], - "source": [ - "output_dir = pl.Path(\"./07_cascading_flow/output\")\n", - "domain_dir = pl.Path(\"../test_data/sagehen_5yr/\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "013de7f9-9acb-416a-910e-4e7dc59da2bd", - "metadata": {}, - "outputs": [], - "source": [ - "control_file = domain_dir / \"sagehen_no_gw_cascades.control\"\n", - "control = pws.Control.load_prms(control_file)\n", - "control.options[\"parameter_file\"] = domain_dir / control.options[\"parameter_file\"][2:]\n", - "control.options[\"input_dir\"] = domain_dir / \"output\"\n", - "control.options[\"budget_type\"] = None\n", - "control.options['netcdf_output_dir'] = output_dir\n", - "control.options['netcdf_output_var_names'] = [\n", - " *pws.PRMSRunoffCascadesNoDprst.get_variables(),\n", - " *pws.PRMSSoilzoneCascadesNoDprst.get_variables(),\n", - " *pws.PRMSGroundwaterNoDprst.get_variables(),\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "822aa9a8-8ae1-4ee3-9edf-3cbf21ded426", - "metadata": {}, - "outputs": [], - "source": [ - "parameters = pws.parameters.PrmsParameters.load(control.options[\"parameter_file\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "23017235-da67-4268-b72d-8ac661440ee2", - "metadata": {}, - "outputs": [], - "source": [ - "# add the cascading parameters via pre-processing\n", - "parameters = pws.utils.preprocess_cascades.preprocess_cascade_params(control, parameters)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f754d6c5-227e-4201-900d-9c04ec4b41a3", - "metadata": {}, - "outputs": [], - "source": [ - "test_data_dir = pl.Path(\"../test_data\")\n", - "domain_dir = test_data_dir / \"drb_2yr\"\n", - "dis_hru = None\n", - "\n", - "model_dict = {\n", - " \"control\": control,\n", - " \"dis_hru\": dis_hru,\n", - " \"model_order\": [\n", - " \"runoff\",\n", - " \"soilzone\",\n", - " \"groundwater\",\n", - " ],\n", - " \"runoff\": {\n", - " \"class\": pws.PRMSRunoffCascadesNoDprst,\n", - " \"parameters\": parameters,\n", - " \"dis\": \"dis_hru\",\n", - " },\n", - " \"soilzone\": {\n", - " \"class\": pws.PRMSSoilzoneCascadesNoDprst,\n", - " \"parameters\": parameters,\n", - " \"dis\": \"dis_hru\",\n", - " },\n", - " \"groundwater\": {\n", - " \"class\": pws.PRMSGroundwaterNoDprst,\n", - " \"parameters\": parameters,\n", - " \"dis\": \"dis_hru\",\n", - " },\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1e2e3ca8-4316-494a-ada3-5d83273c9e30", - "metadata": {}, - "outputs": [], - "source": [ - "model = pws.Model(model_dict)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a5029e51-2d20-4ca0-86ea-a664576d94fd", - "metadata": {}, - "outputs": [], - "source": [ - "# model.run(netcdf_dir=output_dir, finalize=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4683a9ad-856f-4bb3-b7c0-4d5dd5cf6ed6", - "metadata": {}, - "outputs": [], - "source": [ - "import xarray as xr\n", - "horton_casc = xr.open_dataarray(output_dir / \"hru_horton_cascflow.nc\")\n", - "sz_casc = xr.open_dataarray(output_dir / \"hru_sz_cascadeflow.nc\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c33c9d33-9622-49e1-b28e-fdbfc911e531", - "metadata": {}, - "outputs": [], - "source": [ - "proc_plot = pws.analysis.process_plot.ProcessPlot(\n", - " \"/Users/jmccreight/usgs/data/sagehen_pws_domain/GIS/\", \n", - " hru_shp_file_name = \"HRUs.shp\",\n", - " seg_shp_file_name = None,\n", - ")\n", - "proc_plot.plot_hru_var(\n", - " var_name=\"hru_horton_cascflow\",\n", - " process=pws.PRMSRunoffCascadesNoDprst,\n", - " data=horton_casc.mean(dim=\"time\"),\n", - " # data=horton_casc, # make this a move in the underlying code\n", - " data_units=horton_casc.attrs[\"units\"],\n", - " nhm_id=horton_casc[\"nhm_id\"],\n", - " clim = (0.0, 1.0e-2)\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "33c9bea6-3f02-47f8-979e-91bee4375863", - "metadata": {}, - "outputs": [], - "source": [ - "proc_plot.plot_hru_var(\n", - " var_name=var,\n", - " process=pws.PRMSRunoffCascadesNoDprst,\n", - " # data=horton_casc.mean(dim=\"time\"),\n", - " data=sz_casc[30, :],\n", - " data_units=horton_casc.attrs[\"units\"],\n", - " nhm_id=horton_casc[\"nhm_id\"],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e2cc8cc3-d5c8-49ac-b312-6b5ca2c5ed11", - "metadata": {}, - "outputs": [], - "source": [ - "proc_plot.plot_hru_var(\n", - " var_name=var,\n", - " process=pws.PRMSRunoffCascadesNoDprst,\n", - " # data=horton_casc.mean(dim=\"time\"),\n", - " data=sz_casc[135, :],\n", - " data_units=horton_casc.attrs[\"units\"],\n", - " nhm_id=horton_casc[\"nhm_id\"],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7dd278ec-4e56-4154-8a5b-a6d304951404", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "language_info": { - "codemirror_mode": { - "name": "ipython" - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/mmr_to_mf6_dfw.ipynb b/examples/07_mmr_to_mf6_chf_dfw.ipynb similarity index 95% rename from examples/mmr_to_mf6_dfw.ipynb rename to examples/07_mmr_to_mf6_chf_dfw.ipynb index 61495c25..df5322c6 100644 --- a/examples/mmr_to_mf6_dfw.ipynb +++ b/examples/07_mmr_to_mf6_chf_dfw.ipynb @@ -7,9 +7,9 @@ "source": [ "# MMR To MF6 DFW\n", "\n", - "This notebook performs MF6 diffussive wave routing (DFW) simulaton based on PRMS MMR (Muskingum-Mann Routing) inputs and boundary flows from the PRMS model. We'll compare and contrast the MF6 DFW and PRMS MMR simulations and delve into their differences at the largest gaged flows in the domain (that are not affected by tides). \n", + "This notebook performs 1-D diffussive wave (DFW) routing in MODFLOW 6 using the CHF (channel flow) model. The static and time-varying boundary conditions, come from PRMS. Specifically, PRMS's MMR (Muskingum-Mann Routing) parameters and its channel inflows calculated during a pywatershed run. We'll compare and contrast the MF6 CHF-DFW and PRMS MMR simulations and delve into their differences at the largest gaged flows in the domain (that are not affected by tides). \n", "\n", - "This notebook requries the develop branch of MF6 and a flopy upto date with this branch by running `python update_flopy.py` in modflow6/autotest. You will also need to add the MF6 executable to your path below. You will also need to have run \n", + "This notebook requries the develop branch of MF6 and a flopy upto date with this branch by running `python update_flopy.py` in modflow6/autotest. You will also need to add the MF6 executable to your path below. \n", "\n", "## User configuration" ] @@ -21,16 +21,20 @@ "metadata": {}, "outputs": [], "source": [ - "# Set YOUR path to MF6 here\n", + "# Set YOUR path to MF6 in this block\n", "import pathlib as pl\n", "\n", "mf6_bin = pl.Path(\"../../modflow6/bin/mf6\")\n", + "# double check\n", "msg = \"A build of mf6/develop branch required for DFW simulation\"\n", "assert mf6_bin.exists, msg\n", "\n", "# Rerun MF6 model below or use an existing run?\n", "rerun_mf6 = True\n", - "rerun_prms = True" + "rerun_prms = True\n", + "\n", + "# Perform the full, 2 year run period or just the first 45 days?\n", + "full_run_period = False" ] }, { @@ -96,7 +100,7 @@ "id": "e6a36353-3a61-4b57-8db5-e48293ffc0d1", "metadata": {}, "source": [ - "## Run PRMS\n", + "## Run PRMS NHM configuration using pywatershed\n", "Running PRMS gives the boundary conditions for the MF6 DFW run and it also produces its own streamflow simulation with its Muskingum-Mann routing method. " ] }, @@ -107,11 +111,9 @@ "metadata": {}, "outputs": [], "source": [ - "%time\n", - "\n", - "prms_run_dir = repo_root_dir / \"examples/mmr_to_mf6_dfw/prms_run\"\n", + "prms_run_dir = repo_root_dir / \"examples/07_mmr_to_mf6_chf_dfw/prms_run\"\n", "\n", - "if rerun_prms:\n", + "if rerun_prms and prms_run_dir.exists():\n", " shutil.rmtree(prms_run_dir)\n", "\n", "if not prms_run_dir.exists():\n", @@ -186,12 +188,10 @@ "control_file = domain_dir / \"nhm.control\"\n", "control = pws.Control.load_prms(control_file)\n", "ndays_run = control.n_times\n", - "# Could shorten the run duration\n", - "# Subtract one from ndays_run becase end day/time would be included in the PRMS run\n", - "# ndays_run = 45\n", - "# control.edit_end_time(\n", - "# control.start_time + ((ndays_run - 1) * control.time_step)\n", - "# )" + "\n", + "if not full_run_period:\n", + " ndays_run = 45\n", + " control.edit_n_time_steps(ndays_run)" ] }, { @@ -312,6 +312,7 @@ " inflow_dir=inflow_dir,\n", " )\n", "\n", + " dfw.write()\n", " success, buff = dfw.run(silent=False, report=True)\n", " assert success" ] @@ -354,7 +355,9 @@ "n_substeps = int(ndays_run * 24 * 60 * 60 / tdis_perlen * tdis_nstp)\n", "substep_len = np.timedelta64(int(tdis_perlen / tdis_nstp), \"s\")\n", "sim_end_time = sim_start_time + n_substeps * substep_len\n", - "sim_times = np.arange(sim_start_time, sim_end_time, substep_len)\n", + "sim_times = np.arange(sim_start_time, sim_end_time, substep_len).astype(\n", + " \"datetime64[ns]\"\n", + ") # ns to avoid xarray warnings\n", "perioddata = tdis.perioddata.get_data()\n", "assert len(sim_times) == len(perioddata) * perioddata[0][1]" ] @@ -482,7 +485,10 @@ "metadata": {}, "outputs": [], "source": [ - "tt = 3000\n", + "if ndays_run < 3000:\n", + " tt = ndays_run\n", + "else:\n", + " tt = 3000\n", "zoom = 1.0\n", "figsize = (7 * zoom, 10 * zoom)\n", "dt = tdis_perlen / tdis_nstp\n", @@ -564,10 +570,11 @@ "outputs": [], "source": [ "# Subset to points of interest (poi), known flow gages\n", - "poi_id = np.chararray(\n", - " prms_mf6_ds[\"prms streamflow\"].nhm_seg.shape, unicode=True, itemsize=15\n", - ")\n", "empty_str = \" \" * 15\n", + "poi_id = np.full(\n", + " prms_mf6_ds[\"prms streamflow\"].nhm_seg.shape, empty_str, dtype=\"&1 | tee run.log\"\n", + " exe_command = f\"time ./{binary.name} nhm.control -MAXDATALNLEN 60000 2>&1 | tee run.log\"\n", " result = subprocess.run(\n", " exe_command,\n", " shell=True,\n", @@ -151,11 +136,11 @@ " cwd=run_dir,\n", " )\n", "\n", - " # these will be useful in what follows \n", + " # these will be useful in what follows\n", " params = pws.parameters.PrmsParameters.load(\n", " domain_dir / \"myparam.param\"\n", " ).parameters\n", - " \n", + "\n", " # convert to netcdf\n", " # could make these arguments\n", " chunking = {\n", @@ -179,6 +164,8 @@ " \"soil_moist\",\n", " \"hru_impervstor\",\n", " \"dprst_stor_hru\",\n", + " \"soil_lower\",\n", + " \"soil_rechr\",\n", " ]:\n", " data = xr.open_dataset(output_dir / f\"{vv}.nc\")[vv]\n", " prev_da = data.copy()\n", @@ -282,6 +269,7 @@ "outputs": [], "source": [ "prms_dbl_run_dir = nb_output_dir / f\"{domain_name}_prms_double_run\"\n", + "skip_if_exists_prms_double = True\n", "run_prms(\n", " bin_double, prms_dbl_run_dir, skip_if_exists=skip_if_exists_prms_double\n", ")" @@ -303,9 +291,8 @@ "outputs": [], "source": [ "process = [pws.PRMSRunoff]\n", - "\n", "pws_run_dir = nb_output_dir / f\"{domain_name}_pws_run\"\n", - "input_dir = pws_run_dir / \"pws_input\"" + "input_dir_cp = prms_dbl_run_dir / \"inputs\"" ] }, { @@ -320,14 +307,16 @@ }, "outputs": [], "source": [ - "control = pws.Control.load(domain_dir / \"control.test\")\n", + "skip_if_exists_pws = True\n", + "control = pws.Control.load_prms(domain_dir / \"nhm.control\")\n", "output_dir = pws_run_dir / \"output\"\n", "control.options = control.options | {\n", - " \"input_dir\": input_dir,\n", + " \"input_dir\": input_dir_cp,\n", " \"budget_type\": \"error\",\n", " \"calc_method\": \"numpy\",\n", " \"netcdf_output_dir\": output_dir,\n", "}\n", + "del control.options[\"netcdf_output_var_names\"]\n", "params = pws.parameters.PrmsParameters.load(domain_dir / \"myparam.param\")" ] }, @@ -349,11 +338,11 @@ " )\n", "\n", "else:\n", - " input_dir.mkdir(exist_ok=True, parents=True)\n", + " input_dir_cp.mkdir(exist_ok=True, parents=True)\n", " for ff in prms_dbl_run_dir.glob(\"*.nc\"):\n", - " copy2(ff, input_dir / ff.name)\n", + " copy2(ff, input_dir_cp / ff.name)\n", " for ff in (prms_dbl_run_dir / \"output\").glob(\"*.nc\"):\n", - " copy2(ff, input_dir / ff.name)\n", + " copy2(ff, input_dir_cp / ff.name)\n", "\n", " submodel = pws.Model(\n", " process,\n", @@ -383,9 +372,9 @@ " print(vv)\n", " assert (output_dir / f\"{vv}.nc\").exists()\n", " try:\n", - " assert (input_dir / f\"{vv}.nc\").exists()\n", + " assert (input_dir_cp / f\"{vv}.nc\").exists()\n", " except:\n", - " print(f\"********** {vv} not in input_dir\")" + " print(f\"********** {vv} not in input_dir_cp\")" ] }, { @@ -468,7 +457,7 @@ " assert (pws_file).exists()\n", " pws_ds = xr.open_dataset(pws_file)[vv].rename(\"pws\")\n", "\n", - " prms_file = input_dir / f\"{vv}.nc\"\n", + " prms_file = input_dir_cp / f\"{vv}.nc\"\n", " assert (prms_file).exists()\n", " prms_ds = xr.open_dataset(prms_file)[vv].rename(\"prms\")\n", "\n", @@ -576,8 +565,7 @@ "source": [ "## Look at specific time and budget errors\n", "\n", - "> UserWarning: The flux unit balance not equal to the change in unit storageat time 1979-04-26T00:00:00 \n", - "> and at the following locations for PRMSRunoff: (array([341, 509]),)" + "Runoff mass balance errors have been solved." ] }, { @@ -626,14 +614,14 @@ " print(\" \", vv)\n", "\n", " if term == \"inputs\":\n", - " pws_file = input_dir / f\"{vv}.nc\"\n", + " pws_file = input_dir_cp / f\"{vv}.nc\"\n", " else:\n", " pws_file = output_dir / f\"{vv}.nc\"\n", "\n", " assert (pws_file).exists()\n", " pws_ds = xr.open_dataset(pws_file)[vv].rename(\"pws\")\n", "\n", - " prms_file = input_dir / f\"{vv}.nc\"\n", + " prms_file = input_dir_cp / f\"{vv}.nc\"\n", " assert (prms_file).exists()\n", " prms_ds = xr.open_dataset(prms_file)[vv].rename(\"prms\")\n", "\n", @@ -673,31 +661,31 @@ "metadata": {}, "outputs": [], "source": [ - "ic(budget_location_inds)\n", - "ic(inputs.prms.values)\n", - "ic(outputs.prms.values)\n", - "ic(storage_changes.prms.values)\n", + "print(f\"{budget_location_inds=}\")\n", + "print(f\"{inputs.prms.values=}\")\n", + "print(f\"{outputs.prms.values=}\")\n", + "print(f\"{storage_changes.prms.values=}\")\n", "\n", - "ic(\"-----------\")\n", + "print(\"-----------\")\n", "\n", - "ic(bc[\"through_rain\"].pws.values)\n", + "print(f'{bc[\"through_rain\"].pws.values=}')\n", "\n", - "ic(bc[\"snow_evap\"].prms.values)\n", - "ic(bc[\"hru_impervstor_change\"].prms.values)\n", - "ic(bc[\"hru_impervstor_change\"].pws.values)\n", - "ic(bc[\"dprst_stor_hru_change\"].prms.values)\n", - "ic(bc[\"dprst_stor_hru_change\"].pws.values)\n", - "ic(balance.prms.values)\n", + "print(f'{bc[\"snow_evap\"].prms.values=}')\n", + "print(f'{bc[\"hru_impervstor_change\"].prms.values=}')\n", + "print(f'{bc[\"hru_impervstor_change\"].pws.values=}')\n", + "print(f'{bc[\"dprst_stor_hru_change\"].prms.values=}')\n", + "print(f'{bc[\"dprst_stor_hru_change\"].pws.values=}')\n", + "print(f\"{balance.prms.values=}\")\n", "\n", - "# ic(bc[\"hru_sroffi\"].prms.sum().values)\n", - "# ic(bc[\"hru_sroffp\"].prms.sum().values)\n", - "# ic(bc[\"dprst_sroff_hru\"].prms.sum().values)\n", - "# ic(bc[\"infil_hru\"].prms.sum().values)\n", - "# ic(bc[\"hru_impervevap\"].prms.sum().values)\n", - "# ic(bc[\"dprst_seep_hru\"].prms.sum().values)\n", - "# ic(bc[\"dprst_evap_hru\"].prms.sum().values)\n", + "# print(f\"{bc[\"hru_sroffi\"].prms.sum().values=}\")\n", + "# print(f\"{bc[\"hru_sroffp\"].prms.sum().values=}\")\n", + "# print(f\"{bc[\"dprst_sroff_hru\"].prms.sum().values=}\")\n", + "# print(f\"{bc[\"infil_hru\"].prms.sum().values=}\")\n", + "# print(f\"{bc[\"hru_impervevap\"].prms.sum().values=}\")\n", + "# print(f\"{bc[\"dprst_seep_hru\"].prms.sum().values=}\")\n", + "# print(f\"{bc[\"dprst_evap_hru\"].prms.sum().values=}\")\n", "\n", - "# ic(storage_changes.prms.values)" + "# print(f\"{storage_changes.prms.values=}\")" ] }, { @@ -707,8 +695,8 @@ "metadata": {}, "outputs": [], "source": [ - "ic((balance - bc[\"through_rain\"]).pws.values)\n", - "ic((balance - bc[\"through_rain\"]).prms.values)" + "print(f'{(balance - bc[\"through_rain\"]).pws.values=}')\n", + "print(f'{(balance - bc[\"through_rain\"]).prms.values=}')" ] }, { @@ -738,13 +726,15 @@ "metadata": {}, "outputs": [], "source": [ - "ic(bc[\"through_rain\"].pws.values)\n", - "ic(bc[\"net_rain\"].pws.values)\n", - "ic(bc[\"net_snow\"].pws.values)\n", - "ic(bc[\"net_ppt\"].pws.values)\n", - "ic(bc[\"pptmix_nopack\"].pws.values)\n", - "ic(bc[\"newsnow\"].pws.values)\n", - "ic((bc[\"pk_ice_prev\"].pws.values + bc[\"freeh2o_prev\"].pws.values) < epsilon32)" + "print(f'{bc[\"through_rain\"].pws.values=}')\n", + "print(f'{bc[\"net_rain\"].pws.values=}')\n", + "print(f'{bc[\"net_snow\"].pws.values=}')\n", + "print(f'{bc[\"net_ppt\"].pws.values=}')\n", + "print(f'{bc[\"pptmix_nopack\"].pws.values=}')\n", + "print(f'{bc[\"newsnow\"].pws.values=}')\n", + "print(\n", + " f'{(bc[\"pk_ice_prev\"].pws.values + bc[\"freeh2o_prev\"].pws.values) < epsilon32=}'\n", + ")" ] }, { diff --git a/examples/snow_errors.ipynb b/examples/snow_errors.ipynb index b8243a21..c01071b3 100644 --- a/examples/snow_errors.ipynb +++ b/examples/snow_errors.ipynb @@ -8,22 +8,6 @@ "# Snow mass balance errors\n" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "6936fb48-fb5c-463e-8642-11287a578ab1", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - } - }, - "outputs": [], - "source": [ - "# auto-format the code in this notebook\n", - "%load_ext jupyter_black" - ] - }, { "cell_type": "markdown", "id": "c7b84519-933d-4284-838d-f5a8d4ec01f7", @@ -49,11 +33,13 @@ "from shutil import rmtree, copy2\n", "\n", "import hvplot.xarray # noqa\n", - "from icecream import ic\n", + "import jupyter_black\n", "from IPython.display import display\n", "import numpy as np\n", "import pywatershed as pws\n", - "import xarray as xr" + "import xarray as xr\n", + "\n", + "jupyter_black.load()" ] }, { @@ -132,7 +118,7 @@ " run_dir.mkdir() # must not exist, on user to delete\n", " copy2(binary, run_dir / binary.name)\n", " for ff in [\n", - " \"control.test\",\n", + " \"nhm.control\",\n", " \"myparam.param\",\n", " \"tmax.cbh\",\n", " \"tmin.cbh\",\n", @@ -144,7 +130,7 @@ " output_dir = run_dir / \"output\"\n", " output_dir.mkdir()\n", "\n", - " exe_command = f\"time ./{binary.name} control.test -MAXDATALNLEN 60000 2>&1 | tee run.log\"\n", + " exe_command = f\"time ./{binary.name} nhm.control -MAXDATALNLEN 60000 2>&1 | tee run.log\"\n", " result = subprocess.run(\n", " exe_command,\n", " shell=True,\n", @@ -309,7 +295,7 @@ }, "outputs": [], "source": [ - "control = pws.Control.load(domain_dir / \"control.test\")\n", + "control = pws.Control.load_prms(domain_dir / \"nhm.control\")\n", "output_dir = pws_run_dir / \"output\"\n", "control.options = control.options | {\n", " \"input_dir\": input_dir,\n", @@ -317,6 +303,7 @@ " \"calc_method\": \"numpy\",\n", " \"netcdf_output_dir\": output_dir,\n", "}\n", + "del control.options[\"netcdf_output_var_names\"]\n", "params = pws.parameters.PrmsParameters.load(domain_dir / \"myparam.param\")" ] }, @@ -348,16 +335,6 @@ " submodel.run(finalize=True)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "9a056b6a-d31a-492b-a761-3a33f42ba08c", - "metadata": {}, - "outputs": [], - "source": [ - "%debug" - ] - }, { "cell_type": "code", "execution_count": null, @@ -427,7 +404,7 @@ " # 'pk_precip',\n", " \"pk_temp\",\n", " # 'pksv',\n", - " \"pkwater_ante\",\n", + " # \"pkwater_ante\", # not actually prognostic, removed from pywatershed\n", " \"pkwater_equiv\",\n", " # 'pkwater_equiv_change',\n", " \"pptmix_nopack\",\n", @@ -556,7 +533,7 @@ "metadata": {}, "outputs": [], "source": [ - "if False:\n", + "if True:\n", " for var_name in comparisons.keys():\n", " var_close(var_name)" ] @@ -566,7 +543,8 @@ "id": "da70848a-3187-4ae7-b045-125c83e451c9", "metadata": {}, "source": [ - "## Mass-balance errors\n" + "## Mass-balance errors\n", + "These are currently resolved in the UCB_2yr domain for pywatershed." ] }, { @@ -699,9 +677,9 @@ "metadata": {}, "outputs": [], "source": [ - "ic(pk_ice.values[wh_imbalance])\n", - "ic(pk_ice_prev.values[wh_imbalance])\n", - "ic(pk_ice_change.values[wh_imbalance])" + "print(f\"{pk_ice.values[wh_imbalance]=}\")\n", + "print(f\"{pk_ice_prev.values[wh_imbalance]=}\")\n", + "print(f\"{pk_ice_change.values[wh_imbalance]=}\")" ] }, { @@ -711,9 +689,9 @@ "metadata": {}, "outputs": [], "source": [ - "ic(freeh2o.values[wh_imbalance])\n", - "ic(freeh2o_prev.values[wh_imbalance])\n", - "ic(freeh2o_change.values[wh_imbalance])" + "print(f\"{freeh2o.values[wh_imbalance]=}\")\n", + "print(f\"{freeh2o_prev.values[wh_imbalance]=}\")\n", + "print(f\"{freeh2o_change.values[wh_imbalance]=}\")" ] }, { diff --git a/pyproject.toml b/pyproject.toml index 83db4609..92e9374e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [build-system] requires = [ - "setuptools >=61, <=72.2.0", - "numpy >=1.15.0,<2.0.0", + "setuptools>64", + "numpy>=2.0", ] build-backend = "setuptools.build_meta" @@ -29,7 +29,7 @@ classifiers = [ requires-python = ">=3.10,<3.12" dependencies = [ "contextily", - "numpy >=1.15.0,<2.0.0", + "numpy>=2.0.0", "matplotlib >=1.4.0", "epiweeks", "flopy", diff --git a/pywatershed/__init__.py b/pywatershed/__init__.py index 522a8a42..83853731 100644 --- a/pywatershed/__init__.py +++ b/pywatershed/__init__.py @@ -11,8 +11,11 @@ from .base.parameters import Parameters from .base.process import Process from .base.timeseries import TimeseriesArray -from .hydrology.obsin_node import ObsInNode, ObsInNodeMaker -from .hydrology.pass_through_node import PassThroughNode, PassThroughNodeMaker +from .hydrology.obsin_flow_node import ObsInFlowNode, ObsInFlowNodeMaker +from .hydrology.pass_through_flow_node import ( + PassThroughFlowNode, + PassThroughFlowNodeMaker, +) from .hydrology.prms_canopy import PRMSCanopy from .hydrology.prms_channel import PRMSChannel from .hydrology.prms_channel_flow_graph import ( @@ -74,10 +77,10 @@ "Parameters", "Process", "TimeseriesArray", - "ObsInNode", - "ObsInNodeMaker", - "PassThroughNode", - "PassThroughNodeMaker", + "ObsInFlowNode", + "ObsInFlowNodeMaker", + "PassThroughFlowNode", + "PassThroughFlowNodeMaker", "StarfitFlowNode", "StarfitFlowNodeMaker", "PRMSCanopy", diff --git a/pywatershed/analysis/process_plot.py b/pywatershed/analysis/process_plot.py index a542c0f6..76b3568b 100644 --- a/pywatershed/analysis/process_plot.py +++ b/pywatershed/analysis/process_plot.py @@ -61,21 +61,19 @@ def __init__( raise ValueError(msg) # segment one-time setup - if self.seg_shapefile is not None: - self.seg_gdf = gpd.read_file(self.seg_shapefile) - - # if (self.__seg_poly.crs.name - # == "USA_Contiguous_Albers_Equal_Area_Conic_USGS_version"): - # print("Overriding USGS aea crs with EPSG:5070") - self.seg_gdf.crs = "EPSG:5070" - - self.seg_geoms_exploded = ( - self.seg_gdf.explode(index_parts=True) - .reset_index(level=1, drop=True) - .drop("model_idx", axis=1) - .rename(columns={"nsegment_v": "nhm_seg"}) - .set_index("nhm_seg") - ) + self.seg_gdf = gpd.read_file(self.seg_shapefile) + # if (self.__seg_poly.crs.name + # == "USA_Contiguous_Albers_Equal_Area_Conic_USGS_version"): + # print("Overriding USGS aea crs with EPSG:5070") + self.seg_gdf.set_crs("EPSG:5070") + + self.seg_geoms_exploded = ( + self.seg_gdf.explode(index_parts=True) + .reset_index(level=1, drop=True) + .drop("model_idx", axis=1) + .rename(columns={"nsegment_v": "nhm_seg"}) + .set_index("nhm_seg") + ) return diff --git a/pywatershed/atmosphere/prms_atmosphere.py b/pywatershed/atmosphere/prms_atmosphere.py index 2ab34dfd..d57315f5 100644 --- a/pywatershed/atmosphere/prms_atmosphere.py +++ b/pywatershed/atmosphere/prms_atmosphere.py @@ -293,7 +293,7 @@ def get_init_values() -> dict: "tmaxc": nan, "tavgc": nan, "tminc": nan, - "pptmix": nan, + "pptmix": -9999, "orad_hru": nan, } diff --git a/pywatershed/base/adapter.py b/pywatershed/base/adapter.py index 35d00d1a..1de66163 100644 --- a/pywatershed/base/adapter.py +++ b/pywatershed/base/adapter.py @@ -90,7 +90,13 @@ def __init__( _ = nc_shape.pop(nc_dims.index(time_dim)) self.time = self._nc_read.times - self._current_value = np.full(nc_shape, np.nan, nc_type) + + if "int" in str(nc_type): + fill_value = -9999 + else: + fill_value = np.nan + + self._current_value = np.full(nc_shape, fill_value, nc_type) return diff --git a/pywatershed/base/budget.py b/pywatershed/base/budget.py index 4201197f..0941f230 100644 --- a/pywatershed/base/budget.py +++ b/pywatershed/base/budget.py @@ -662,6 +662,7 @@ def initialize_netcdf( self, params: Parameters, output_dir: str, + extra_coords: dict = None, write_sum_vars: Union[list, bool] = True, write_individual_vars: bool = False, ) -> None: @@ -675,6 +676,8 @@ def initialize_netcdf( """ self._output_netcdf = True + if extra_coords is None: + extra_coords = {} # make working directory output_dir = pl.Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) @@ -737,6 +740,7 @@ def initialize_netcdf( coordinates, self._netcdf_output_var_dict, meta, + extra_coords=extra_coords, global_attrs=global_attrs, ) diff --git a/pywatershed/base/conservative_process.py b/pywatershed/base/conservative_process.py index 79ed6847..adbad36c 100644 --- a/pywatershed/base/conservative_process.py +++ b/pywatershed/base/conservative_process.py @@ -218,6 +218,8 @@ def initialize_netcdf( separate_files: bool = None, budget_args: dict = None, output_vars: list = None, + extra_coords: dict = None, + addtl_output_vars: list = None, ) -> None: if self._netcdf_initialized: msg = ( @@ -231,6 +233,8 @@ def initialize_netcdf( output_dir=output_dir, separate_files=separate_files, output_vars=output_vars, + extra_coords=extra_coords, + addtl_output_vars=addtl_output_vars, ) if self.budget is not None: diff --git a/pywatershed/base/data_model.py b/pywatershed/base/data_model.py index 47b75a0b..219e9177 100644 --- a/pywatershed/base/data_model.py +++ b/pywatershed/base/data_model.py @@ -765,6 +765,10 @@ def xr_ds_to_dd(file_or_ds, schema_only=False, encoding=True) -> dict: dd = xr_ds.to_dict(data=data_arg, encoding=encoding) + # before = xr_ds.time.values.dtype + # after = dd["coords"]["time"]["data"].dtype + # assert before == after + dd = xr_dd_to_dd(dd) return dd @@ -772,6 +776,7 @@ def xr_ds_to_dd(file_or_ds, schema_only=False, encoding=True) -> dict: def xr_dd_to_dd(xr_dd: dict) -> dict: dd = deepcopy(xr_dd) + # asdf # Move the global encoding to a global key of itself dd["encoding"] = {"global": dd.get("encoding", {})} @@ -817,18 +822,27 @@ def dd_to_xr_dd(dd: dict) -> dict: dd["encoding"] = encoding.pop("global", {}) for key, val in meta.items(): + # coordinate or variable? cv = None if key in dd["data_vars"].keys(): cv = "data_vars" elif key in dd["coords"].keys(): cv = "coords" + data_vals = dd[cv][key] + + if np.issubdtype(data_vals.dtype, np.datetime64): + # conversion to datetime64[ns] silences xr warnings + # but should be able to be removed in the future + data_vals = data_vals.astype("datetime64[ns]") + key_enc = {} if key in encoding.keys(): key_enc = encoding[key] + dd[cv][key] = { **val, - "data": dd[cv][key], + "data": data_vals, "encoding": key_enc, } @@ -1123,4 +1137,10 @@ def dd_to_nc4_ds(dd, nc_file): def open_datasetdict(nc_file: fileish, use_xr=True): + """Convenience method for opening a DatasetDict. + + Args: + nc_file: the file containing the DatasetDict. + use_xr: Use xarray or NetCDF4 for opening the NetCDF file? + """ return DatasetDict.from_netcdf(nc_file, use_xr=use_xr) diff --git a/pywatershed/base/flow_graph.py b/pywatershed/base/flow_graph.py index ab538552..572c38e0 100644 --- a/pywatershed/base/flow_graph.py +++ b/pywatershed/base/flow_graph.py @@ -1,4 +1,6 @@ +import pathlib as pl from typing import Literal +from warnings import warn import networkx as nx import numpy as np @@ -20,6 +22,11 @@ class FlowNode(Accessor): A FlowNode is instantiated with its own (optional) data and calculates outflow, storage_change, and sink_source properties on subtimesteps. + A FlowNode may have additional public variables provided by properties that + can be requested to be collected by :class:`FlowGraph` for output to + NetCDF files. These variable names should just not overwrite any existing + class attributes. + See :class:`FlowGraph` for related examples and discussion. """ @@ -59,27 +66,27 @@ def finalize_timestep(self): raise Exception("This must be overridden") @property - def outflow(self): + def outflow(self) -> np.float64: "The average outflow of the FlowNode over the current timestep." raise Exception("This must be overridden") @property - def outflow_substep(self): + def outflow_substep(self) -> np.float64: """The outflow of the FlowNode over the sub-timestep.""" raise Exception("This must be overridden") @property - def storage_change(self): + def storage_change(self) -> np.float64: "The storage change of the FlowNode at the current subtimestep." raise Exception("This must be overridden") @property - def storage(self): + def storage(self) -> np.float64: "The storage of the FlowNode at the current subtimestep." raise Exception("This must be overridden") @property - def sink_source(self): + def sink_source(self) -> np.float64: "The sink or source amount of the FlowNode at the current subtimestep." raise Exception("This must be overridden") @@ -116,8 +123,13 @@ def get_node(control: Control, index: int) -> FlowNode: raise Exception("This must be overridden") +def type_check(scalar: float): + assert isinstance(scalar, float) + return None + + class FlowGraph(ConservativeProcess): - """FlowGraph manages and computes FlowNodes given by FlowNodeMakers. + r"""FlowGraph manages and computes FlowNodes given by FlowNodeMakers. FlowGraph lets users combine :class:`FlowNode`\ s of different kinds into a single mathmetical graph of flow solution. FlowNodes provide explicit @@ -171,6 +183,10 @@ class FlowGraph(ConservativeProcess): :func:`prms_channel_flow_graph_to_model_dict` and :func:`prms_channel_flow_graph_postprocess`. + For developers looking to add new :class:`FlowNode`s, please read the + :class:`FlowNode` base class code and also the code for + :class:`FlowNodeMaker`. + Examples: --------- @@ -320,7 +336,11 @@ def __init__( parameters: Parameters, inflows: adaptable, node_maker_dict: dict, + addtl_output_vars: list[str] = None, + params_not_to_netcdf: list[str] = None, budget_type: Literal["defer", None, "warn", "error"] = "defer", + allow_disconnected_nodes: bool = False, + type_check_nodes: bool = False, verbose: bool = None, ): """Initialize a FlowGraph. @@ -336,21 +356,40 @@ def __init__( node_maker_dict: A dictionary of FlowNodeMaker instances with keys/names supplied in the parameters, e.g. {key1: flow_node_maker_instance, ...}. + params_not_to_netcdf: A list of string names for parameter to NOT + write to NetCDF output files. By default all parameters are + included in each file written. + addtl_output_vars: A list of string names for variables to collect + for NetCDF output from FlowNodes. These variables do not have to + be available in all FlowNodes but must be present in at least + one. budget_type: one of ["defer", None, "warn", "error"] with "defer" being the default and defering to control.options["budget_type"] when available. When control.options["budget_type"] is not avaiable, budget_type is set to "warn". + allow_disconnected_nodes: If False, an error is thrown when + disconnected nodes are found in the graph. This happens often + in PRMS, so allowing is a convenience but bad practive. + type_check_nodes: Intended for debugging if FlowNodes are not + compliant with their required float return values, which can + cause a lot or warnings or errors. + verbose: Print extra diagnostic messages? The `parameters` argument is a :class:`Parameters` object which contains the following data: * node_maker_name: A list or np.array of the FlowNodeMaker name for each node. - * node_maker_index: A list or np.array of the indices to ask for from + * node_maker_index: An np.array of the indices to ask for from the associated/collated FlowNodeMaker (above) for each node - * to_graph_index: the index of the downstream index in the FlowGraph - with -1 indicating an outflow node. This must specify a DAG. + * node_maker_id: An np.array of the integer ids used for each node by + its node maker. Not used internally but necessary or helpful to + users in post-processing to identify nodes. Ids may not be unique in + this list but should probably be unique to each node maker. + * to_graph_index: np.array of the index of the downstream index in the + FlowGraph with -1 indicating an outflow node. This must specify a + DAG. The inputs inflows, node_maker_name, node_maker_index, and to_graph_index are collated. The order of execution of the graph is not @@ -390,6 +429,7 @@ def get_parameters() -> tuple: return ( "node_maker_name", "node_maker_index", + "node_maker_id", "to_graph_index", ) @@ -399,6 +439,7 @@ def get_inputs() -> tuple: @staticmethod def get_init_values() -> dict: + """FlowNode initial values.""" return { "outflows": nan, "node_upstream_inflows": nan, @@ -465,19 +506,27 @@ def _init_graph(self) -> None: ) # use networkx to calculate the Directed Acyclic Graph - self._graph = nx.DiGraph() - self._graph.add_edges_from(connectivity) - - # Make sure the user isnt suppling disconnected nodes - assert len(self._graph) == self.nnodes, "Disconnected nodes present." - # should we allow disconnected nodes? seems not - # these are handled in PRMSChannel if we change our minds - if self.nnodes > 1: + self._graph = nx.DiGraph() + self._graph.add_edges_from(connectivity) node_order = list(nx.topological_sort(self._graph)) else: node_order = [0] + # Check if the user is suppling disconnected nodes + disconnected_nodes_present = len(self._graph) != self.nnodes + + if disconnected_nodes_present: + if not self._allow_disconnected_nodes: + raise ValueError("Disconnected nodes present in FlowGraph.") + else: + warn("Disconnected nodes present in FlowGraph.") + wh_mask_set = set(np.where(self._outflow_mask)[0]) + node_ord_set = set(node_order) + mask_not_node_ord = list(wh_mask_set - node_ord_set) + if len(mask_not_node_ord): + node_order = mask_not_node_ord + node_order + self._node_order = np.array(node_order, dtype="int64") # any performance for doing a hash table up front? @@ -494,6 +543,80 @@ def _init_graph(self) -> None: ) ] + # < + # Deal with additional output variables requested + if self._addtl_output_vars is None: + self._addtl_output_vars = [] + + unique_makers = np.unique(params["node_maker_name"]) + + self._addtl_output_vars_wh_collect = {} + for vv in self._addtl_output_vars: + inds_to_collect = [] + for uu in unique_makers: + wh_uu = np.where(params["node_maker_name"] == uu) + if hasattr(self._nodes[wh_uu[0][0]], vv): + # do we need to get/set the type here? Would have to + # check the type over all nodes/node makers + # for now I'll just throw and error if it is not float64 + msg = "Only currently handling float64, new code required" + assert self._nodes[wh_uu[0][0]][vv].dtype == "float64", msg + inds_to_collect += wh_uu[0].tolist() + # << + if len(inds_to_collect): + self._addtl_output_vars_wh_collect[vv] = inds_to_collect + + msg = "Variable already set on FlowGraph." + for kk in self._addtl_output_vars_wh_collect.keys(): + assert not hasattr(self, kk), msg + self[kk] = np.full([self.nnodes], np.nan) + # TODO: find some other way of getting metadata here. + # it could come from the node itself, i suppose, or + # could come from arguments or static source. Node seems most + # elegant. I suppose there could be conflicts if multiple + # nodes have the same variable and different metadata. + self.meta[kk] = {"dims": ("nnodes",), "type": "float64"} + + def initialize_netcdf( + self, + output_dir: [str, pl.Path] = None, + separate_files: bool = None, + budget_args: dict = None, + output_vars: list = None, + extra_coords: dict = None, + ) -> None: + if self._netcdf_initialized: + msg = ( + f"{self.name} class previously initialized netcdf output " + f"in {self._netcdf_output_dir}" + ) + warn(msg) + return + + params = self._params.parameters + + skip_params = self._params_not_to_netcdf + if skip_params is None: + skip_params = [] + + extra_coords = {"node_coord": {}} + for param_name in params.keys(): + if param_name in skip_params + ["node_coord"]: + continue + + extra_coords["node_coord"][param_name] = params[param_name] + + # this gets the budget initialization too + super().initialize_netcdf( + output_dir=output_dir, + separate_files=separate_files, + output_vars=output_vars, + extra_coords=extra_coords, + addtl_output_vars=list(self._addtl_output_vars_wh_collect.keys()), + ) + + return + def _advance_variables(self) -> None: for node in self._nodes: node.advance() @@ -524,6 +647,8 @@ def calculate(self, time_length: float, n_substeps: int = 24) -> None: self.inflows[inode], ) # Get the outflows back + if self._type_check_nodes: + type_check(self._nodes[inode].outflow_substep) self._node_outflow_substep[inode] = self._nodes[ inode ].outflow_substep @@ -545,11 +670,24 @@ def calculate(self, time_length: float, n_substeps: int = 24) -> None: ) for ii in range(self.nnodes): + if self._type_check_nodes: + type_check(self._nodes[ii].outflow) + type_check(self._nodes[ii].storage_change) + type_check(self._nodes[ii].storage) + type_check(self._nodes[ii].sink_source) + self.node_outflows[ii] = self._nodes[ii].outflow self.node_storage_changes[ii] = self._nodes[ii].storage_change self.node_storages[ii] = self._nodes[ii].storage self.node_sink_source[ii] = self._nodes[ii].sink_source + for ( + add_var_name, + add_var_inds, + ) in self._addtl_output_vars_wh_collect.items(): + for ii in add_var_inds: + self[add_var_name][ii] = self._nodes[ii][add_var_name] + self.node_negative_sink_source[:] = -1 * self.node_sink_source # global mass balance term diff --git a/pywatershed/base/process.py b/pywatershed/base/process.py index d9ab73ed..903a0808 100644 --- a/pywatershed/base/process.py +++ b/pywatershed/base/process.py @@ -471,6 +471,8 @@ def initialize_netcdf( output_dir: [str, pl.Path] = None, separate_files: bool = None, output_vars: list = None, + extra_coords: dict = None, + addtl_output_vars: list = None, ) -> None: """Initialize NetCDF output. @@ -529,23 +531,22 @@ def initialize_netcdf( self._netcdf_initialized = False return + if addtl_output_vars is not None: + self._netcdf_output_vars += addtl_output_vars + self._netcdf = {} if self._netcdf_separate: - # make working directory self._netcdf_output_dir.mkdir(parents=True, exist_ok=True) - for variable_name in self.variables: - if (self._netcdf_output_vars is not None) and ( - variable_name not in self._netcdf_output_vars - ): - continue + for variable_name in self._netcdf_output_vars: nc_path = self._netcdf_output_dir / f"{variable_name}.nc" self._netcdf[variable_name] = NetCdfWrite( - nc_path, - self._params.coords, - [variable_name], - {variable_name: self.meta[variable_name]}, - {"process class": self.name}, + name=nc_path, + coordinates=self._params.coords, + variables=[variable_name], + var_meta={variable_name: self.meta[variable_name]}, + extra_coords=extra_coords, + global_attrs={"process class": self.name}, ) else: @@ -557,11 +558,12 @@ def initialize_netcdf( initial_variable = the_out_vars[0] self._netcdf_output_dir.mkdir(parents=True, exist_ok=True) self._netcdf[initial_variable] = NetCdfWrite( - self._netcdf_output_dir / f"{self.name}.nc", - self._params.coords, - self._netcdf_output_vars, - self.meta, - {"process class": self.name}, + name=self._netcdf_output_dir / f"{self.name}.nc", + coordinates=self._params.coords, + variables=self._netcdf_output_vars, + var_meta=self.meta, + extra_coords=extra_coords, + global_attrs={"process class": self.name}, ) for variable in the_out_vars[1:]: self._netcdf[variable] = self._netcdf[initial_variable] @@ -577,11 +579,7 @@ def _output_netcdf(self) -> None: """ if self._netcdf_initialized: time_added = False - for variable in self.variables: - if (self._netcdf_output_vars is not None) and ( - variable not in self._netcdf_output_vars - ): - continue + for variable in self._netcdf_output_vars: if not time_added or self._netcdf_separate: time_added = True self._netcdf[variable].add_simulation_time( @@ -602,7 +600,7 @@ def _finalize_netcdf(self) -> None: None """ if self._netcdf_initialized: - for idx, variable in enumerate(self.variables): + for idx, variable in enumerate(self._netcdf_output_vars): if (self._netcdf_output_vars is not None) and ( variable not in self._netcdf_output_vars ): diff --git a/pywatershed/hydrology/obsin_node.py b/pywatershed/hydrology/obsin_flow_node.py similarity index 91% rename from pywatershed/hydrology/obsin_node.py rename to pywatershed/hydrology/obsin_flow_node.py index 9dfa44e2..b9e3eabe 100644 --- a/pywatershed/hydrology/obsin_node.py +++ b/pywatershed/hydrology/obsin_flow_node.py @@ -6,7 +6,7 @@ from pywatershed.constants import nan, zero -class ObsInNode(FlowNode): +class ObsInFlowNode(FlowNode): """A FlowNode that takes inflows but returns observed/specified flows. This FlowNode replicates the obsin and obsout seg functionality in PRMS but @@ -22,7 +22,7 @@ def __init__( control: Control, node_obs_data: pd.Series, ): - """Initialize an ObsInNode. + """Initialize an ObsInFlowNode. Args: control: a Control object. @@ -91,8 +91,8 @@ def sink_source(self): return self._sink_source -class ObsInNodeMaker(FlowNodeMaker): - """A FlowNodeMaker for ObsInNode. +class ObsInFlowNodeMaker(FlowNodeMaker): + """A FlowNodeMaker for ObsInFlowNode. See :class:`FlowGraph` for related examples and discussion. """ @@ -102,17 +102,17 @@ def __init__( parameters: Parameters, obs_data: pd.DataFrame, ) -> None: - """Initialize a ObsinNodeMaker. + """Initialize a ObsInFlowNodeMaker. Args: parameters: A pywatershed Parameters object. obs_data: A pandas DataFrame of observations given by pyPRMS.Streamflow. """ - self.name = "PassThroughNodeMaker" + self.name = "ObsInNodeMaker" self._parameters = parameters self._obs_data = obs_data def get_node(self, control: Control, index: int): node_poi_id = self._parameters.parameters["poi_gage_id"][index] - return ObsInNode(control, self._obs_data[node_poi_id]) + return ObsInFlowNode(control, self._obs_data[node_poi_id]) diff --git a/pywatershed/hydrology/pass_through_node.py b/pywatershed/hydrology/pass_through_flow_node.py similarity index 81% rename from pywatershed/hydrology/pass_through_node.py rename to pywatershed/hydrology/pass_through_flow_node.py index a104b03a..c91e592e 100644 --- a/pywatershed/hydrology/pass_through_node.py +++ b/pywatershed/hydrology/pass_through_flow_node.py @@ -3,14 +3,14 @@ from ..constants import nan, zero -class PassThroughNode(FlowNode): +class PassThroughFlowNode(FlowNode): """A FlowNode instance that gives what it takes and dosent store. - See :class:`FlowGraph` for a worked example using PassThroughNode. + See :class:`FlowGraph` for a worked example using PassThroughFlowNode. """ def __init__(self, control: Control): - """Initialize a PassThroughNode. + """Initialize a PassThroughFlowNode. Args: control: A control object. @@ -62,17 +62,17 @@ def sink_source(self): return zero -class PassThroughNodeMaker(FlowNodeMaker): - """A FlowNodeMaker of PassThroughNodes. +class PassThroughFlowNodeMaker(FlowNodeMaker): + """A FlowNodeMaker of PassThroughFlowNodes. - See :class:`FlowGraph` for a worked example using PassThroughNode. + See :class:`FlowGraph` for a worked example using PassThroughFlowNode. """ def __init__( self, ) -> None: - """Initialize a PassThroughNodeMaker.""" - self.name = "PassThroughNodeMaker" + """Initialize a PassThroughFlowNodeMaker.""" + self.name = "PassThroughFlowNodeMaker" def get_node(self, control: Control, index: int): - return PassThroughNode(control) + return PassThroughFlowNode(control) diff --git a/pywatershed/hydrology/prms_canopy.py b/pywatershed/hydrology/prms_canopy.py index 696bb0f2..91a9bd38 100644 --- a/pywatershed/hydrology/prms_canopy.py +++ b/pywatershed/hydrology/prms_canopy.py @@ -11,7 +11,6 @@ CovType, HruType, dnearzero, - nan, nearzero, numba_num_threads, zero, @@ -144,7 +143,7 @@ def get_init_values() -> dict: "net_snow": zero, "intcp_changeover": zero, "intcp_evap": zero, - "intcp_form": nan, # = RAIN bool would have to match RAIN/SNOW + "intcp_form": 0, # = RAIN bool would have to match RAIN/SNOW "intcp_stor": zero, "intcp_transp_on": 0, # could make boolean "hru_intcpevap": zero, @@ -476,7 +475,7 @@ def _calculate_numpy( # Keep the f90 call signature consistent with the args in # python/numba. - intcp_form = np.full_like(hru_rain, np.nan, dtype="int32") + intcp_form = np.full_like(hru_rain, -9999, dtype="int32") for aa in prange(nactive_hrus): i = active_hrus[aa] diff --git a/pywatershed/hydrology/prms_channel_flow_graph.py b/pywatershed/hydrology/prms_channel_flow_graph.py index 75487492..8d153bfd 100644 --- a/pywatershed/hydrology/prms_channel_flow_graph.py +++ b/pywatershed/hydrology/prms_channel_flow_graph.py @@ -1,4 +1,5 @@ import pathlib as pl +from copy import deepcopy from typing import Literal, Union import numba as nb @@ -623,8 +624,12 @@ def prms_channel_flow_graph_postprocess( new_nodes_maker_dict: dict, new_nodes_maker_names: list, new_nodes_maker_indices: list, + new_nodes_maker_ids: list, new_nodes_flow_to_nhm_seg: list, + addtl_output_vars: list[str] = None, + allow_disconnected_nodes: bool = False, budget_type: Literal["defer", None, "warn", "error"] = "defer", + type_check_nodes: bool = False, ) -> FlowGraph: """Add nodes to a PRMSChannel-based FlowGraph to run from known inputs. @@ -649,14 +654,19 @@ def prms_channel_flow_graph_postprocess( control: The control object for the run input_dir: the directory where the input files are found prms_channel_dis: the PRMSChannel discretization object - prms_channel_params: the PRMSChannel parameters object + prms_channel_params: the PRMSChannel parameters object. new_nodes_maker_dict: a dictionary of key/name and and instantiated - NodeMakers - new_nodes_maker_names: collated list of what node makers to use + NodeMakers. + new_nodes_maker_names: collated list of what node makers to use. new_nodes_maker_indices: collated list of indices relative to each - NodeMaker - new_nodes_flow_to_nhm_seg: collated list describing the nhm_seg to - which the node will flow. + NodeMaker. + new_nodes_maker_ids: Collated list of ids relative to each NodeMaker. + new_nodes_flow_to_nhm_seg: collated list describing the nhm_segs to + which the new nodes will flow. Use of non-positive entries specifies + the zero-based index for flowing to nodes specified in these + collated parameters, allowing these new nodes to be added in + groups, in series to the existing NHM FlowGraph. Note that a new + node may not be placed below any outflow point of the domain. budget_type: one of ["defer", None, "warn", "error"] with "defer" being the default and defering to control.options["budget_type"] when available. When control.options["budget_type"] is not avaiable, @@ -678,6 +688,7 @@ def prms_channel_flow_graph_postprocess( new_nodes_maker_dict, new_nodes_maker_names, new_nodes_maker_indices, + new_nodes_maker_ids, new_nodes_flow_to_nhm_seg, ) @@ -722,7 +733,10 @@ def advance(self) -> None: parameters=params_flow_graph, inflows=inflows_graph, node_maker_dict=node_maker_dict, + addtl_output_vars=addtl_output_vars, budget_type=budget_type, + type_check_nodes=type_check_nodes, + allow_disconnected_nodes=allow_disconnected_nodes, ) return flow_graph @@ -735,8 +749,12 @@ def prms_channel_flow_graph_to_model_dict( new_nodes_maker_dict: dict, new_nodes_maker_names: list, new_nodes_maker_indices: list, + new_nodes_maker_ids: list, new_nodes_flow_to_nhm_seg: list, + addtl_output_vars: list[str] = None, graph_budget_type: Literal["defer", None, "warn", "error"] = "defer", + allow_disconnected_nodes: bool = False, + type_check_nodes: bool = False, ) -> dict: """Add nodes to a PRMSChannel-based FlowGraph within a Model's model_dict. @@ -767,8 +785,13 @@ def prms_channel_flow_graph_to_model_dict( new_nodes_maker_names: collated list of what node makers to use new_nodes_maker_indices: collated list of indices relative to each NodeMaker - new_nodes_flow_to_nhm_seg: collated list describing the nhm_seg to - which the node will flow. + new_nodes_maker_ids: Collated list of ids relative to each NodeMaker. + new_nodes_flow_to_nhm_seg: collated list describing the nhm_segs to + which the new nodes will flow. Use of non-positive entries specifies + the zero-based index for flowing to nodes specified in these + collated parameters, allowing these new nodes to be added in + groups, in series to the existing NHM FlowGraph. Note that a new + node may not be placed below any outflow point of the domain. graph_budget_type: one of ["defer", None, "warn", "error"] with "defer" being the default and defering to control.options["budget_type"] when available. When @@ -787,6 +810,7 @@ def prms_channel_flow_graph_to_model_dict( new_nodes_maker_dict, new_nodes_maker_names, new_nodes_maker_indices, + new_nodes_maker_ids, new_nodes_flow_to_nhm_seg, ) @@ -798,8 +822,8 @@ def exchange_calculation(self) -> None: / s_per_time ) - # This zero in the last index means zero inflows to the pass through - # node + # This zeros in at the end mean that zero inflows and sinks in added + # nodes self.inflows[:] = zero # sinks is an HRU variable, its accounting in budget is fine because # global collapses it to a scalar before summing over variables @@ -859,6 +883,8 @@ def exchange_calculation(self) -> None: "parameters": params_flow_graph, "dis": None, "budget_type": graph_budget_type, + "addtl_output_vars": addtl_output_vars, + "allow_disconnected_nodes": allow_disconnected_nodes, }, } @@ -873,6 +899,7 @@ def _build_flow_graph_inputs( new_nodes_maker_dict: dict, new_nodes_maker_names: list, new_nodes_maker_indices: list, + new_nodes_maker_ids: list, new_nodes_flow_to_nhm_seg: list, ): prms_channel_flow_makers = [ @@ -882,46 +909,88 @@ def _build_flow_graph_inputs( ] assert len(prms_channel_flow_makers) == 0 - assert len(new_nodes_maker_names) == len(new_nodes_maker_indices), "nono" - assert len(new_nodes_maker_names) == len(new_nodes_flow_to_nhm_seg), "NONO" + msg = "Inconsistency in collated inputs" + assert len(new_nodes_maker_names) == len(new_nodes_maker_indices), msg + assert len(new_nodes_maker_names) == len(new_nodes_flow_to_nhm_seg), msg # I think this is the only condition to check with # new_nodes_flow_to_nhm_seg assert len(new_nodes_flow_to_nhm_seg) == len( np.unique(new_nodes_flow_to_nhm_seg) - ), "OHNO" + ), "Cant have more than one new node flowing to an existing or new node." nseg = prms_channel_params.dims["nsegment"] - nnodes = nseg + len(new_nodes_maker_names) + nnew = len(new_nodes_maker_names) + nnodes = nseg + nnew node_maker_name = ["prms_channel"] * nseg + new_nodes_maker_names + maxlen = np.array([len(nn) for nn in node_maker_name]).max() + # need this to be unicode U# for keys and searching below + node_maker_name = np.array(node_maker_name, dtype=f"|U{maxlen}") node_maker_index = np.array( np.arange(nseg).tolist() + new_nodes_maker_indices ) + node_maker_id = np.append( + prms_channel_dis.coords["nhm_seg"], np.array(new_nodes_maker_ids) + ) to_graph_index = np.zeros(nnodes, dtype=np.int64) dis_params = prms_channel_dis.parameters tosegment = dis_params["tosegment"] - 1 # fortan to python indexing to_graph_index[0:nseg] = tosegment + # The new nodes which flow to other new_nodes have to be added after + # the nodes flowing to existing nodes with nhm_seg ids. + to_new_nodes_inds_in_added = {} + added_new_nodes_inds_in_graph = {} for ii, nhm_seg in enumerate(new_nodes_flow_to_nhm_seg): + if nhm_seg < 1: + # negative indes are indices into the collated inputs lists + # for new nodes being in-series. + to_new_nodes_inds_in_added[ii] = -1 * nhm_seg + continue + wh_intervene_above_nhm = np.where(dis_params["nhm_seg"] == nhm_seg) wh_intervene_below_nhm = np.where( tosegment == wh_intervene_above_nhm[0][0] ) # have to map to the graph from an index found in prms_channel wh_intervene_above_graph = np.where( - (np.array(node_maker_name) == "prms_channel") + (node_maker_name == "prms_channel") & (node_maker_index == wh_intervene_above_nhm[0][0]) ) wh_intervene_below_graph = np.where( - (np.array(node_maker_name) == "prms_channel") + (node_maker_name == "prms_channel") & np.isin(node_maker_index, wh_intervene_below_nhm) ) to_graph_index[nseg + ii] = wh_intervene_above_graph[0][0] to_graph_index[wh_intervene_below_graph] = nseg + ii + added_new_nodes_inds_in_graph[ii] = nseg + ii + + # < + to_new_nodes_inds_remaining = deepcopy(to_new_nodes_inds_in_added) + # worst case scenario is that we have to iterate the length of this list + # if the items in the list are in the wrong order + for itry in range(len(to_new_nodes_inds_remaining)): + # for input_ind, to_ind_remain in to_new_nodes_inds_remaining.items(): + for input_ind in list(to_new_nodes_inds_remaining.keys()): + to_ind_remain = to_new_nodes_inds_remaining[input_ind] + if to_ind_remain not in added_new_nodes_inds_in_graph.keys(): + continue + flows_to_ind = added_new_nodes_inds_in_graph[to_ind_remain] + flows_from_inds = np.where(to_graph_index == flows_to_ind) + + to_graph_index[nseg + input_ind] = flows_to_ind + to_graph_index[flows_from_inds] = nseg + input_ind + added_new_nodes_inds_in_graph[input_ind] = nseg + input_ind + del to_new_nodes_inds_remaining[input_ind] + + if len(to_new_nodes_inds_remaining): + msg = "Unable to connect some new nodes in-series." + raise ValueError(msg) - params_flow_graph = Parameters( + # < + param_dict = dict( dims={ "nnodes": nnodes, }, @@ -931,16 +1000,18 @@ def _build_flow_graph_inputs( data_vars={ "node_maker_name": node_maker_name, "node_maker_index": node_maker_index, + "node_maker_id": node_maker_id, "to_graph_index": to_graph_index, }, metadata={ "node_coord": {"dims": ["nnodes"]}, "node_maker_name": {"dims": ["nnodes"]}, "node_maker_index": {"dims": ["nnodes"]}, + "node_maker_id": {"dims": ["nnodes"]}, "to_graph_index": {"dims": ["nnodes"]}, }, - validate=True, ) + params_flow_graph = Parameters(**param_dict, validate=True) # make available at top level __init__ node_maker_dict = { diff --git a/pywatershed/hydrology/starfit.py b/pywatershed/hydrology/starfit.py index 18aa4738..5e5926b6 100644 --- a/pywatershed/hydrology/starfit.py +++ b/pywatershed/hydrology/starfit.py @@ -531,12 +531,18 @@ def min_nor( class StarfitFlowNode(FlowNode): - """STARFIT FlowNode: Storage Targets And Release Function Inference Tool + r"""STARFIT FlowNode: Storage Targets And Release Function Inference Tool This :class:`FlowNode` implementation allows STARFIT solutions to be computed in a :class:`FlowGraph`. The solution has the option for subtimestep or daily computations. + Daily computations have the same outflows on the substeps of a day and + outflows and storages are calculated on the last subtimestep. On the first + subtimestep, we use the inflow of the first subtimestep as representative + of the mean inflow of the previous day in order to calculate an average + outflow for the first timestep. + The STARFIT reference: Sean W.D. Turner, Jennie Clarice Steyaert, Laura Condon, Nathalie Voisin, @@ -546,8 +552,10 @@ class StarfitFlowNode(FlowNode): https://github.com/IMMM-SFA/starfit - Adapted from STARFIT implementation in the MOSART-WM model: - https://github.com/IMMM-SFA/mosartwmpy/blob/main/mosartwmpy/reservoirs/istarf.py + Adapted from STARFIT implementation in the [MOSART-WM model](https://github.com/IMMM-SFA/mosartwmpy/blob/main/mosartwmpy/reservoirs/istarf.py) + Thurber, T., Rexer, E., Vernon, C., Sun, N., Turner, S., Yoon, J., + Broman, D., & Voisin, N. (2022). mosartwmpy (Version 0.2.7) + [Computer software]. https://github.com/IMMM-SFA/mosartwmpy See :class:`FlowGraph` for discussion and a worked example. The notebook `examples/06_flow_graph_starfit.ipynb `__ @@ -669,6 +677,7 @@ def __init__( self._m3ps_to_MCM = nhrs_substep * 60 * 60 / 1.0e6 self._MCM_to_m3ps = 1.0 / self._m3ps_to_MCM + # These need to be numpy pointers for the Budget! def nan1d(): return np.zeros(1) * nan @@ -751,6 +760,7 @@ def nan1d(): initial_storage = np.where( wh_initial_storage_nan, nor_mean_cap, self._initial_storage ) + # print(f"{nor_mean_cap=}") # another way to get this info out? # set lake_storage from initial_storage when start is not available self._lake_storage_sub[:] = np.where( np.isnat(self._start_time), initial_storage, nan @@ -844,23 +854,33 @@ def advance(self): return @property - def outflow(self): - return self._lake_outflow + def outflow(self) -> np.float64: + return self._lake_outflow[0] + + @property + def outflow_substep(self) -> np.float64: + return self._lake_outflow_sub[0] + + @property + def storage_change(self) -> np.float64: + return self._lake_storage_change_flow_units[0] @property - def outflow_substep(self): - return self._lake_outflow_sub + def storage(self) -> np.float64: + return self._lake_storage[0] @property - def storage_change(self): - return self._lake_storage_change_flow_units + def release(self) -> np.float64: + "The release component of the STARFIT outflow." + return self._lake_release[0] @property - def storage(self): - return self._lake_storage + def spill(self) -> np.float64: + "The spill component of the STARFIT outflow." + return self._lake_spill[0] @property - def sink_source(self): + def sink_source(self) -> np.float64: return zero def _calculate_subtimestep_daily( @@ -1111,7 +1131,7 @@ def _calculate_subtimestep_hourly( class StarfitFlowNodeMaker(FlowNodeMaker): - """STARFIT FlowNodeMaker: Storage Targets And Release Function Inference Tool. + r"""STARFIT FlowNodeMaker: Storage Targets And Release Function Inference Tool. This FlowNodeMaker instantiates :class:`StarfitFlowNode`\ s for :class:`FlowGraph`. diff --git a/pywatershed/parameters/prms_parameters.py b/pywatershed/parameters/prms_parameters.py index e09bc99e..40cce821 100644 --- a/pywatershed/parameters/prms_parameters.py +++ b/pywatershed/parameters/prms_parameters.py @@ -55,7 +55,9 @@ def default(self, obj): def _json_load(json_filename): - pars = json.load(open(json_filename)) + with open(json_filename) as ff: + pars = json.load(ff) + # need to convert lists to numpy arrays for k, v in pars.items(): if isinstance(v, list): @@ -107,12 +109,13 @@ def __init__( def parameters_to_json(self, json_filename) -> None: """write the parameters dictionary out to a json file""" - json.dump( - {**self.dims, **self.parameters}, - open(json_filename, "w"), - indent=4, - cls=JSONParameterEncoder, - ) + with open(json_filename, "w") as ff: + json.dump( + {**self.dims, **self.parameters}, + ff, + indent=4, + cls=JSONParameterEncoder, + ) return None @staticmethod diff --git a/pywatershed/parameters/starfit_parameters.py b/pywatershed/parameters/starfit_parameters.py index 6afa2fee..3ed2f34f 100644 --- a/pywatershed/parameters/starfit_parameters.py +++ b/pywatershed/parameters/starfit_parameters.py @@ -48,20 +48,39 @@ class StarfitParameters(Parameters): - """ - Starfit parameter class - - The GRanD data + """Starfit parameter class. + + This parameter class provides STARFIT parameters to for modeling. This + class does NOT calculate the parameters from inputs (e.g. as ISTARF-CONUS + did using ResOpsUS), it simply provides the format for the model to get the + the parameter data. + + The data supplied can come from whatever means. The method + `from_istarf_conus_grand` uses existing ISTARF-CONUS and GRanD data to + create a parameter object for the user. + + References: + + **ISTARF-CONUS (Inferred Storage Targets and Release Functions - Continental + US)**: Sean W.D. Turner, Jennie Clarice Steyaert, Laura Condon, + Nathalie Voisin, Water storage and release policies for all large + reservoirs of conterminous United States, Journal of Hydrology, + Volume 603, Part A, 2021, 126843, ISSN 0022-1694, + https://doi.org/10.1016/j.jhydrol.2021.126843. + https://zenodo.org/records/4602277 + + **GRanD (Global Reservoir and Dam) database**: Lehner, Bernhard, Catherine + Reidy Liermann, Carmen Revenga, Charles + Vörösmarty, Balazs Fekete, Philippe Crouzet, Petra Döll et al. "High‐ + resolution mapping of the world's reservoirs and dams for sustainable + river‐flow management." Frontiers in Ecology and the Environment 9, no. 9 + (2011): 494-502. https://ln.sync.com/dl/bd47eb6b0/anhxaikr-62pmrgtq-k44xf84f-pyz4atkm/view/default/447819520013 - The istarf data - https://zenodo.org/record/4602277#.ZCtYj-zMJqs - - The resops data - https://zenodo.org/record/5893641#.ZCtakuzMJqs - - # add citiatons. add this information to the starfit model too - # add a working example + **ResOpsUS**: Steyaert, Jennie C., Laura E. Condon, Sean WD Turner, and + Nathalie Voisin. "ResOpsUS, a dataset of historical reservoir operations + in the contiguous United States." Scientific Data 9, no. 1 (2022): 34. + https://zenodo.org/records/6612040 Parameters ---------- @@ -156,7 +175,7 @@ def from_netcdf( def from_istarf_conus_grand( grand_file: Union[pl.Path, str], istarf_file: Union[pl.Path, str] = None, - files_directory: Union[pl.Path, str] = None, + files_directory: Union[pl.Path, str] = pl.Path("."), grand_ids: list = None, ): """Build parameter object from istarf-conus and the GRanD v1.3 sources. @@ -175,14 +194,14 @@ def from_istarf_conus_grand( Starfit. Args: + grand_file: a path to an existing dbf or shp file. If the file does not + exist, an error will be thrown and you must download it manually + at https://ln.sync.com/dl/bd47eb6b0/anhxaikr-62pmrgtq-k44xf84f-pyz4atkm/view/de istarf_file: a path to an existing file. If file does not exist or is None then the file will be dowladed to files_directory. You can download the file yourself here - https://zenodo.org/records/4602277/files/ISTARF-CONUS.csv?download=1 - grand_file: a path to an existing dbf or shp file. If the file does not - exist, an error will be thrown and you must download it manually - at https://ln.sync.com/dl/bd47eb6b0/anhxaikr-62pmrgtq-k44xf84f-pyz4atkm/view/default/447819520013 - + https://ln.sync.com/dl/bd47eb6b0/anhxaikr-62pmrgtq-k44xf84f-pyz4atkm/view/default/447819520013 + files_directory: A local directory where to download the file. grand_ids: a subset of grand_ids to keep. Examples: @@ -222,7 +241,7 @@ def from_istarf_conus_grand( ... ) ... ) - """ # noqa: 501 + """ # noqa: E501 grand_ds = _get_grand(grand_file) istarf_ds = _get_istarf_conus(istarf_file, files_directory) @@ -260,7 +279,7 @@ def from_istarf_conus_grand( nreservoirs = len(ds.nreservoirs) for vv in ["start_time", "end_time"]: ds[vv] = xr.Variable( - "nreservoirs", np.array([nat] * nreservoirs, " None: """ def str2date(x): - return dt.datetime.strptime(x.decode("utf-8"), "%Y-%m-%d") + return dt.datetime.strptime(x, "%Y-%m-%d") all_data = [] ntimes = 0 diff --git a/pywatershed/utils/mmr_to_mf6_dfw.py b/pywatershed/utils/mmr_to_mf6_dfw.py index 7f14318a..8a8a8458 100644 --- a/pywatershed/utils/mmr_to_mf6_dfw.py +++ b/pywatershed/utils/mmr_to_mf6_dfw.py @@ -67,7 +67,7 @@ class MmrToMf6Dfw: Units of meters from metadata (these are converted to units requested for modflow, which we currently hardcode to meters and seconds.) * seg_slope: - Passed directly to SWF DFW. Also used to calculate seg_mid_elevation if + Passed directly to CHF DFW. Also used to calculate seg_mid_elevation if not present in parameters. See its description below. Unitless slope. * hru_segment: Used to calculate seg_mid_elevation if not present in parameters. See @@ -81,7 +81,7 @@ class MmrToMf6Dfw: This is (apparently) the NHD bank-full width present in the PRMS parameter files but unused in PRMS. Units are in meters. * mann_n: - Mannings N coefficient for MMR. Passed directly to SWF DFW. Units in + Mannings N coefficient for MMR. Passed directly to CHF DFW. Units in seconds / meter ** (1/3) according to the metadata. The following optional parameters can be user-supplied but are NOT part of @@ -140,7 +140,7 @@ def __init__( # time_units="seconds", save_flows: bool = True, time_zone: str = "UTC", - write_on_init: bool = True, + write_on_init: bool = False, chd_options: dict = None, cxs_options: dict = None, disv1d_options: dict = None, @@ -246,7 +246,7 @@ def __init__( self._handle_control_parameters() self._set_sim() self._set_tdis() - self._set_swf() + self._set_chf() self._set_disv1d() self._set_flw() self._set_ims() @@ -389,8 +389,8 @@ def _set_tdis(self): perioddata=tdis_rc, ) - def _set_swf(self): - self._swf = flopy.mf6.ModflowSwf( + def _set_chf(self): + self._chf = flopy.mf6.ModflowChf( self._sim, modelname=self._sim_name, save_flows=self._save_flows ) @@ -405,16 +405,6 @@ def _set_disv1d(self): if "idomain" not in self._disv1d_options.keys(): self._disv1d_options["idomain"] = 1 - if "length" not in self._disv1d_options.keys(): - # meters - # unit-ed quantities - segment_units = self._units(meta.parameters["seg_length"]["units"]) - self._segment_length = parameters["seg_length"] - self._segment_length = self._segment_length * segment_units - self._disv1d_options["length"] = ( - self._segment_length.to_base_units().magnitude - ) - if "width" not in self._disv1d_options.keys(): # meters, per metadata self._disv1d_options["width"] = parameters["seg_width"] @@ -432,13 +422,13 @@ def _set_disv1d(self): # < self._disv1d_options["bottom"] = self._seg_mid_elevation - # < vertices and cell2d + # < vertices and cell1d if self._segment_shp_file is not None: opts_set = [ "nodes", "nvert", "vertices", - "cell2d", + "cell1d", ] for oo in opts_set: self._warn_option_overwrite(oo, opt_dict_name, method_name) @@ -486,9 +476,9 @@ def _set_disv1d(self): for vv in range(len(inds)): vertices += [[inds[vv], geoms[vv][0], geoms[vv][1]]] - # cell2d: for each reach, if it has a "to", get the to's first ind + # cell1d: for each reach, if it has a "to", get the to's first ind # as the last vertex ind for the reach - cell2d = [] + cell1d = [] for rr in range(nreach): reach_id = parameters["nhm_seg"][rr] icvert = reach_vert_inds[reach_id] @@ -498,7 +488,7 @@ def _set_disv1d(self): icvert += [to_reach_start_ind] # < ncvert = len(icvert) - cell2d += [[rr, 0.5, ncvert, *icvert]] + cell1d += [[rr, 0.5, ncvert, *icvert]] # < Just a check check_connectivity = True @@ -507,8 +497,8 @@ def _set_disv1d(self): reach_id = parameters["nhm_seg"][rr] reach_ind = rr - reach_cell2d_start_ind = cell2d[reach_ind][3] - reach_cell2d_end_ind = cell2d[reach_ind][-1] + reach_cell1d_start_ind = cell1d[reach_ind][3] + reach_cell1d_end_ind = cell1d[reach_ind][-1] wh_geom = np.where( segment_gdf.nsegment_v.values == reach_id @@ -524,9 +514,9 @@ def _set_disv1d(self): to_reach_ind = np.where( parameters["nhm_seg"] == to_reach_id )[0][0] - to_reach_cell2d_start_ind = cell2d[to_reach_ind][3] + to_reach_cell1d_start_ind = cell1d[to_reach_ind][3] assert ( - reach_cell2d_end_ind == to_reach_cell2d_start_ind + reach_cell1d_end_ind == to_reach_cell1d_start_ind ) # wh_down_geom = np.where( @@ -548,9 +538,9 @@ def _set_disv1d(self): from_reach_ind = np.where( parameters["nhm_seg"] == from_reach_id )[0][0] - from_reach_cell2d_end_ind = cell2d[from_reach_ind][-1] + from_reach_cell1d_end_ind = cell1d[from_reach_ind][-1] assert ( - from_reach_cell2d_end_ind == reach_cell2d_start_ind + from_reach_cell1d_end_ind == reach_cell1d_start_ind ) # wh_up_geom = np.where( @@ -561,7 +551,7 @@ def _set_disv1d(self): # ).tolist() # assert reach_up_geom[-1] == reach_geom[0] - nnodes = len(cell2d) + nnodes = len(cell1d) assert nnodes == self._nsegment nvert = len(vertices) @@ -571,11 +561,11 @@ def _set_disv1d(self): self._disv1d_options["nodes"] = self._nsegment self._disv1d_options["nvert"] = nvert self._disv1d_options["vertices"] = vertices - self._disv1d_options["cell2d"] = cell2d + self._disv1d_options["cell1d"] = cell1d # < - self._disv1d = flopy.mf6.ModflowSwfdisv1D( - self._swf, **self._disv1d_options + self._disv1d = flopy.mf6.ModflowChfdisv1D( + self._chf, **self._disv1d_options ) def _set_flw(self, **kwargs): @@ -586,7 +576,7 @@ def _set_flw(self, **kwargs): # aggregate inflows over the contributing fluxes # For non-binary data, this method has to be called after - # flopy.mf6.Swfdisl is set on self._swf + # flopy.mf6.Chfdisl is set on self._chf parameters = self.parameters.parameters @@ -706,7 +696,7 @@ def read_inflow(vv, start_time, end_time): self.control.start_time + ispd * self.control.time_step ) bin_name = ( - f"swf_flw_bc/" + f"chf_flw_bc/" f"flw_{flow_name}_{i_time_str.replace(':', '_')}.bin" ) bin_name_pl = self._output_dir / bin_name @@ -727,8 +717,8 @@ def read_inflow(vv, start_time, end_time): if key not in self._flw_options.keys(): self._flw_options[key] = True - _ = flopy.mf6.ModflowSwfflw( - self._swf, + _ = flopy.mf6.ModflowChfflw( + self._chf, save_flows=self._save_flows, stress_period_data=flw_spd, maxbound=self._nsegment, @@ -752,13 +742,13 @@ def _set_dfw(self): # seconds / meter ** (1/3) self._dfw_options["manningsn"] = parameters["mann_n"] - _ = flopy.mf6.ModflowSwfdfw(self._swf, **self._dfw_options) + _ = flopy.mf6.ModflowChfdfw(self._chf, **self._dfw_options) def _set_sto(self): if "save_flows" not in self._sto_options: self._sto_options["save_flows"] = self._save_flows - _ = flopy.mf6.ModflowSwfsto( - self._swf, + _ = flopy.mf6.ModflowChfsto( + self._chf, **self._sto_options, ) @@ -771,20 +761,20 @@ def _set_ic(self): "strt": self._seg_mid_elevation + self._ic_options["strt"] } - self._ic = flopy.mf6.ModflowSwfic(self._swf, **ic_options) + self._ic = flopy.mf6.ModflowChfic(self._chf, **ic_options) def _set_cxs(self): if self._cxs_options is None: pass else: - self._cxs = flopy.mf6.ModflowSwfcxs(self._swf, **self._cxs_options) + self._cxs = flopy.mf6.ModflowChfcxs(self._chf, **self._cxs_options) def _set_oc(self): if self._oc_options is None: self._oc_options = {} - self._oc = flopy.mf6.ModflowSwfoc( - self._swf, + self._oc = flopy.mf6.ModflowChfoc( + self._chf, budget_filerecord=f"{self._sim_name}.bud", stage_filerecord=f"{self._sim_name}.stage", # qoutflow_filerecord=f"{self._sim_name}.qoutflow", @@ -814,7 +804,7 @@ def _set_chd(self): self._chd_options["maxbound"] = len( self._chd_options["stress_period_data"] ) - self._chd = flopy.mf6.ModflowSwfchd(self._swf, **self._chd_options) + self._chd = flopy.mf6.ModflowChfchd(self._chf, **self._chd_options) def _calculate_seg_mid_elevations(self, check=False): nseg = self._nsegment diff --git a/pywatershed/utils/netcdf_utils.py b/pywatershed/utils/netcdf_utils.py index a6c8db84..301f6871 100644 --- a/pywatershed/utils/netcdf_utils.py +++ b/pywatershed/utils/netcdf_utils.py @@ -9,6 +9,7 @@ from ..base.accessor import Accessor from ..base.meta import meta_dimensions, meta_netcdf_type +from ..constants import np_type_to_netcdf_type_dict from ..utils.time_utils import datetime_doy fileish = Union[str, pl.Path] @@ -353,31 +354,36 @@ def advance( class NetCdfWrite(Accessor): - """Output the csv output data to a netcdf file - - Args: - name: path for netcdf output file - clobber: boolean indicating if an existing netcdf file should - be overwritten - zlib: boolean indicating if the data should be compressed - (default is True) - complevel: compression level (default is 4) - chunk_sizes: dictionary defining chunk sizes for the data - """ - def __init__( self, name: fileish, coordinates: dict, variables: listish, var_meta: dict, - global_attrs: dict = {}, + extra_coords: dict = None, + global_attrs: dict = None, time_units: str = "days since 1970-01-01 00:00:00", clobber: bool = True, zlib: bool = True, complevel: int = 4, chunk_sizes: dict = {"time": 1, "hruid": 0}, ): + from netCDF4 import stringtochar + + """Output the csv output data to a netcdf file + + Args: + name: path for netcdf output file + extra_coords: A dictionary keyed by dimension with the values being + a dictionary of var_name: data pairs. Not for multi-dimensional + coordinates. + clobber: boolean indicating if an existing netcdf file should + be overwritten + zlib: boolean indicating if the data should be compressed + (default is True) + complevel: compression level (default is 4) + chunk_sizes: dictionary defining chunk sizes for the data + """ if isinstance(variables, dict): group_variables = [] for group, vars in variables.items(): @@ -398,6 +404,12 @@ def __init__( self.dataset = nc4.Dataset(name, "w", clobber=clobber) self.dataset.setncattr("Description", "pywatershed output data") + + if extra_coords is None: + extra_coords = {} + + if global_attrs is None: + global_attrs = {} for att_key, att_val in global_attrs.items(): self.dataset.setncattr(att_key, att_val) @@ -490,12 +502,12 @@ def __init__( if nhru_coordinate: self.hruid = self.dataset.createVariable( - "nhm_id", "i4", ("nhm_id") + "nhm_id", "i4", ("nhm_id",) ) self.hruid[:] = np.array(self.hru_ids, dtype=int) if nsegment_coordinate: self.segid = self.dataset.createVariable( - "nhm_seg", "i4", ("nhm_seg") + "nhm_seg", "i4", ("nhm_seg",) ) self.segid[:] = np.array(self.segment_ids, dtype=int) if one_coordinate: @@ -503,15 +515,63 @@ def __init__( self.oneid[:] = np.array(self.one_ids, dtype=int) if nreservoirs_coordinate: self.grandid = self.dataset.createVariable( - "grand_id", "i4", ("grand_id") + "grand_id", "i4", ("grand_id",) ) self.grandid[:] = coordinates["grand_id"] if nnodes_coordinate: self.node_coord = self.dataset.createVariable( - "node_coord", "i4", ("node_coord") + "node_coord", "i4", ("node_coord",) ) self.node_coord[:] = coordinates["node_coord"] + char_dims_created = [] + for x_dim, x_data_dict in extra_coords.items(): + for x_var_name, x_data in x_data_dict.items(): + type = x_data.dtype + type_str = str(type) + + dim = (x_dim,) + if "U" in type_str or "S" in type_str: + # https://unidata.github.io/netcdf4-python/#dealing-with-strings # noqa: E501 + # S1 gives "char" type in the file whereas another + # number gives "string" type. The former is properly + # handled by xarray + nc_type = "S1" + + # if it is a string array, convert it to a character array + # I dont understand the particulars here, may need more + # work + if "U" in type_str: + char_array = stringtochar(x_data.astype("S")) + else: + char_array = stringtochar(x_data) + + char_dim_len = char_array.shape[1] + dim_name = f"char{char_dim_len}" + if dim_name in char_dims_created: + continue + + dim = (x_dim, dim_name) + _ = self.dataset.createDimension( + dimname=dim_name, size=char_dim_len + ) + + char_dims_created += [dim_name] + + else: + nc_type = np_type_to_netcdf_type_dict[type] + + # < + self[x_var_name] = self.dataset.createVariable( + varname=x_var_name, datatype=nc_type, dimensions=dim + ) + if "S1" == nc_type: + self[x_var_name][:, :] = char_array + self[x_var_name]._Encoding = "utf-8" + + else: + self[x_var_name][:] = x_data + self.variables = {} for var_name, group_var_name in zip(variables, group_variables): variabletype = meta_netcdf_type(var_meta[var_name]) @@ -541,20 +601,32 @@ def __init__( else: time_dim = "time" + var_dims = (time_dim, spatial_coordinate) self.variables[var_name] = self.dataset.createVariable( group_var_name, variabletype, - (time_dim, spatial_coordinate), + var_dims, fill_value=nc4.default_fillvals[variabletype], zlib=zlib, complevel=complevel, chunksizes=tuple(chunk_sizes.values()), ) + for key, val in var_meta[var_name].items(): if isinstance(val, dict): continue self.variables[var_name].setncattr(key, val) + # https://docs.xarray.dev/en/stable/user-guide/io.html#coordinates + var_encoding = [] + for x_dim, x_data_dict in extra_coords.items(): + if x_dim in var_dims: + var_encoding += x_data_dict.keys() + + if len(var_encoding): + var_encoding = " ".join(var_encoding) + self.variables[var_name].setncattr("coordinates", var_encoding) + return def __del__(self): diff --git a/pywatershed/utils/prms5_file_util.py b/pywatershed/utils/prms5_file_util.py index 9e015b78..28f8fc84 100644 --- a/pywatershed/utils/prms5_file_util.py +++ b/pywatershed/utils/prms5_file_util.py @@ -142,6 +142,9 @@ def _get_file_object( raise TypeError("file_path must be a file path") return + def _close_file_object(self) -> None: + self.file_object.close() + def _get_control_variables( self, ) -> dict: @@ -172,7 +175,7 @@ def _get_control_variables( elif key in ("initial_deltat",): value = np.timedelta64(int(value[0]), "h") variable_dict[key] = value - self.file_object.close() + self._close_file_object() return variable_dict def _get_dimensions_parameters(self): @@ -236,6 +239,7 @@ def _get_dimensions_parameters(self): for kk, vv in parameter_dimensions_full_dict.items() } + self._close_file_object() return parameters_full_dict, parameter_dimensions_full_dict def _get_parameters(self): @@ -323,7 +327,7 @@ def _parse_variable( for idx in range(num_values): arr[idx] = float(self._get_line().split()[0]) elif data_type == PrmsDataType.CHARACTER.value: - arr = np.zeros(num_values, dtype=np.chararray) + arr = np.zeros(num_values, dtype="O") for idx in range(num_values): arr[idx] = self._get_line().split()[0] else: @@ -393,7 +397,7 @@ def _parse_parameter( for idx in range(len_array): arr[idx] = float(self._get_line().split()[0]) elif data_type == PrmsDataType.CHARACTER.value: - arr = np.zeros(len_array, dtype=np.chararray) + arr = np.zeros(len_array, dtype="O") for idx in range(len_array): arr[idx] = self._get_line().split()[0] else: diff --git a/pywatershed/utils/time_utils.py b/pywatershed/utils/time_utils.py index 451d4205..0ad8c81a 100644 --- a/pywatershed/utils/time_utils.py +++ b/pywatershed/utils/time_utils.py @@ -7,10 +7,19 @@ # https://github.com/numpy/numpy/pull/14276 -def dt64_to_dt(dt64: np.datetime64) -> datetime.datetime: +def dt64_to_dt(dt64: np.datetime64, dt_precision="s") -> datetime.datetime: """np.datetime64 to datetime.datetime""" # This is because I always forget this exists. - return dt64.astype(datetime.datetime) + val = dt64.astype(datetime.datetime) + # If the precision of dt64 is such that it is an integer too large to be + # coerced to datetime.datetime, then it just returns an integer and + # not a datetime.datetime object, which breaks other stuff downstream. + # default to 's' precision in case where this happens. + if isinstance(val, int): + dt = dt64.astype(f"datetime64[{dt_precision}]") + val = dt.astype(datetime.datetime) + # < + return val def datetime_year(dt64: np.datetime64) -> int: diff --git a/readthedocs.yml b/readthedocs.yml index ea496835..7b6da1fa 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -15,7 +15,7 @@ formats: all build: os: ubuntu-22.04 tools: - python: "3.9" + python: "3.11" python: install: diff --git a/test_data/generate/conus_2yr_subset.sh b/test_data/generate/conus_2yr_subset.sh index fcffdafc..32cfba0d 100755 --- a/test_data/generate/conus_2yr_subset.sh +++ b/test_data/generate/conus_2yr_subset.sh @@ -2,9 +2,11 @@ # Purpose: # Generate a 2 year test case of DRB from the full 40yr -source_dir=/home/jmccreight/pywatershed_data/20220209_gm_byHWobs_CONUS -target_dir=/home/jmccreight/pywatershed_data/conus_2yr -pywatershed_repo_dir=/home/jmccreight/pywatershed +home_dir=~ + +source_dir=$home_dir/pywatershed_data/20220209_gm_byHWobs_CONUS +target_dir=$home_dir/pywatershed_data/conus_2yr +pywatershed_repo_dir=$home_dir/pywatershed mkdir -p $target_dir diff --git a/test_data/generate/submit_domains_tg.sh b/test_data/generate/submit_domains_tg.sh index 1019eb20..d09b13a2 100644 --- a/test_data/generate/submit_domains_tg.sh +++ b/test_data/generate/submit_domains_tg.sh @@ -6,15 +6,16 @@ #SBATCH --account=impd #SBATCH --time=01:00:00 #SBATCH --mail-type=ALL -#SBATCH --mail-user=jmccreight@usgs.gov +## #SBATCH --mail-user=@usgs.gov #SBATCH -o conus_tg_job--%j.out # This is for TALLGRASS -# conda envs are documented in /home/jmccreight/conda_installs.sh +# conda envs are documented in ~/conda_installs.sh # make sure we have conda in the scheduler -source /home/jmccreight/.bashrc +home_dir=~ +source $home_dir/.bashrc # of gcc/gfort that PRMS 5.2.1 requires conda activate gnu_tg