From 0df4d761b725f74acc1b529a73531f9c0cbd965d Mon Sep 17 00:00:00 2001 From: ouyangwenyu Date: Wed, 27 Mar 2024 15:44:41 +0800 Subject: [PATCH] add param_range file so that you can set your own param range --- hydromodel/datasets/data_preprocess.py | 31 +++-- hydromodel/models/gr4j.py | 12 +- hydromodel/models/hymod.py | 14 +- hydromodel/models/model_config.py | 26 +++- hydromodel/models/param.yaml | 172 +++++++++++++++++++++++++ hydromodel/models/xaj.py | 14 +- hydromodel/trainers/calibrate_ga.py | 10 +- hydromodel/trainers/calibrate_sceua.py | 24 +++- scripts/calibrate_xaj.py | 12 +- 9 files changed, 275 insertions(+), 40 deletions(-) create mode 100644 hydromodel/models/param.yaml diff --git a/hydromodel/datasets/data_preprocess.py b/hydromodel/datasets/data_preprocess.py index 48df5aa..370d8a5 100644 --- a/hydromodel/datasets/data_preprocess.py +++ b/hydromodel/datasets/data_preprocess.py @@ -1,7 +1,7 @@ """ Author: Wenyu Ouyang Date: 2022-10-25 21:16:22 -LastEditTime: 2024-03-26 21:20:18 +LastEditTime: 2024-03-27 14:30:15 LastEditors: Wenyu Ouyang Description: preprocess data for models in hydro-model-xaj FilePath: \hydro-model-xaj\hydromodel\datasets\data_preprocess.py @@ -390,7 +390,23 @@ def cross_valid_data(ts_data, period, warmup, cv_fold, freq="1D"): return train_test_data -def get_basin_area(data_type, data_dir, basin_ids): +def get_basin_area(data_type, data_dir, basin_ids) -> xr.Dataset: + """_summary_ + + Parameters + ---------- + data_type : _type_ + _description_ + data_dir : _type_ + _description_ + basin_ids : _type_ + _description_ + + Returns + ------- + xr.Dataset + _description_ + """ area_name = remove_unit_from_name(AREA_NAME) if data_type == "camels": camels_data_dir = os.path.join( @@ -399,10 +415,9 @@ def get_basin_area(data_type, data_dir, basin_ids): camels = Camels(camels_data_dir) basin_area = camels.read_area(basin_ids) elif data_type == "owndata": - attr_data = xr.open_dataset( - os.path.join(os.path.dirname(data_dir), "attributes.nc") - ) - basin_area = attr_data[area_name].values + attr_data = xr.open_dataset(os.path.join(data_dir, "attributes.nc")) + # to guarantee the column name is same as the column name in the time series data + basin_area = attr_data[[area_name]].rename({"id": "basin"}) return basin_area @@ -451,9 +466,7 @@ def get_ts_from_diffsource(data_type, data_dir, periods, basin_ids): ts_data = ts_data.rename({"PET": pet_name}) # ts_data = ts_data.drop_vars('streamflow') elif data_type == "owndata": - ts_data = xr.open_dataset( - os.path.join(os.path.dirname(data_dir), "timeseries.nc") - ) + ts_data = xr.open_dataset(os.path.join(data_dir, "timeseries.nc")) target_unit = ts_data[prcp_name].attrs.get("units", "unknown") qobs_ = ts_data[[flow_name]] r_mmd = streamflow_unit_conv(qobs_, basin_area, target_unit=target_unit) diff --git a/hydromodel/models/gr4j.py b/hydromodel/models/gr4j.py index 8fd492e..4032584 100644 --- a/hydromodel/models/gr4j.py +++ b/hydromodel/models/gr4j.py @@ -3,7 +3,7 @@ import numpy as np from numba import jit -from hydromodel.models.model_config import MODEL_PARAM_DICT +from hydromodel.models.model_config import read_model_param_dict from hydromodel.models.xaj import uh_conv @@ -204,10 +204,12 @@ def gr4j(p_and_e, parameters, warmup_length: int, return_state=False, **kwargs): Union[np.array, tuple] streamflow or (streamflow, states) """ - x1_scale = MODEL_PARAM_DICT["gr4j"]["param_range"]["x1"] - x2_sacle = MODEL_PARAM_DICT["gr4j"]["param_range"]["x2"] - x3_scale = MODEL_PARAM_DICT["gr4j"]["param_range"]["x3"] - x4_scale = MODEL_PARAM_DICT["gr4j"]["param_range"]["x4"] + pr_file = kwargs.get("param_range_file", None) + model_param_dict = read_model_param_dict(pr_file) + x1_scale = model_param_dict["gr4j"]["param_range"]["x1"] + x2_sacle = model_param_dict["gr4j"]["param_range"]["x2"] + x3_scale = model_param_dict["gr4j"]["param_range"]["x3"] + x4_scale = model_param_dict["gr4j"]["param_range"]["x4"] x1 = x1_scale[0] + parameters[:, 0] * (x1_scale[1] - x1_scale[0]) x2 = x2_sacle[0] + parameters[:, 1] * (x2_sacle[1] - x2_sacle[0]) x3 = x3_scale[0] + parameters[:, 2] * (x3_scale[1] - x3_scale[0]) diff --git a/hydromodel/models/hymod.py b/hydromodel/models/hymod.py index 580c1fd..72201b8 100644 --- a/hydromodel/models/hymod.py +++ b/hydromodel/models/hymod.py @@ -2,7 +2,7 @@ import numpy as np from numba import jit -from hydromodel.models.model_config import MODEL_PARAM_DICT +from hydromodel.models.model_config import read_model_param_dict def hymod(p_and_e, parameters, warmup_length=30, return_state=False, **kwargs): @@ -30,12 +30,14 @@ def hymod(p_and_e, parameters, warmup_length=30, return_state=False, **kwargs): Union[list, np.array] streamflow, x_slow, x_quick, x_loss or streamflow """ + pr_file = kwargs.get("param_range_file", None) + model_param_dict = read_model_param_dict(pr_file) # parameter, 2-dim variable: [parameter=1, basin] - cmax_scale = MODEL_PARAM_DICT["hymod"]["param_range"]["cmax"] - bexp_sacle = MODEL_PARAM_DICT["hymod"]["param_range"]["bexp"] - alpha_scale = MODEL_PARAM_DICT["hymod"]["param_range"]["alpha"] - ks_scale = MODEL_PARAM_DICT["hymod"]["param_range"]["ks"] - kq_scale = MODEL_PARAM_DICT["hymod"]["param_range"]["kq"] + cmax_scale = model_param_dict["hymod"]["param_range"]["cmax"] + bexp_sacle = model_param_dict["hymod"]["param_range"]["bexp"] + alpha_scale = model_param_dict["hymod"]["param_range"]["alpha"] + ks_scale = model_param_dict["hymod"]["param_range"]["ks"] + kq_scale = model_param_dict["hymod"]["param_range"]["kq"] cmax = cmax_scale[0] + parameters[:, 0] * (cmax_scale[1] - cmax_scale[0]) bexp = bexp_sacle[0] + parameters[:, 1] * (bexp_sacle[1] - bexp_sacle[0]) alpha = alpha_scale[0] + parameters[:, 2] * (alpha_scale[1] - alpha_scale[0]) diff --git a/hydromodel/models/model_config.py b/hydromodel/models/model_config.py index 0679534..eb301d1 100644 --- a/hydromodel/models/model_config.py +++ b/hydromodel/models/model_config.py @@ -1,7 +1,7 @@ """ Author: Wenyu Ouyang Date: 2022-10-25 21:16:22 -LastEditTime: 2024-03-26 11:13:08 +LastEditTime: 2024-03-27 15:03:41 LastEditors: Wenyu Ouyang Description: some basic config for hydro-model-xaj models FilePath: \hydro-model-xaj\hydromodel\models\model_config.py @@ -10,7 +10,27 @@ from collections import OrderedDict -# NOTE: Don't change the parameter settings +import yaml + + +def read_model_param_dict(file_path="param.yaml"): + try: + with open(file_path, "r") as file: + data = yaml.safe_load(file) + + return { + model: { + "param_name": contents["param_name"], + "param_range": OrderedDict(contents["param_range"]), + } + for model, contents in data.items() + } + except FileNotFoundError: + print( + f"File not found: {file_path}, we directly use the default MODEL_PARAM_DICT." + ) + return MODEL_PARAM_DICT + MODEL_PARAM_DICT = { "xaj": { @@ -49,7 +69,7 @@ "KI": [0.0, 0.7], "KG": [0.0, 0.7], "CS": [0.0, 1.0], - "L": [1.0, 10.0], # unit is day + "L": [1.0, 10.0], # unit is same as your time step "CI": [0.0, 0.9], "CG": [0.98, 0.998], } diff --git a/hydromodel/models/param.yaml b/hydromodel/models/param.yaml new file mode 100644 index 0000000..4ba0b8d --- /dev/null +++ b/hydromodel/models/param.yaml @@ -0,0 +1,172 @@ +xaj: + param_name: + - K + - B + - IM + - UM + - LM + - DM + - C + - SM + - EX + - KI + - KG + - CS + - L + - CI + - CG + param_range: + K: + - 0.1 + - 1.0 + B: + - 0.1 + - 0.4 + IM: + - 0.01 + - 0.1 + UM: + - 0.0 + - 20.0 + LM: + - 60.0 + - 90.0 + DM: + - 60.0 + - 120.0 + C: + - 0.0 + - 0.2 + SM: + - 1 + - 100.0 + EX: + - 1.0 + - 1.5 + KI: + - 0.0 + - 0.7 + KG: + - 0.0 + - 0.7 + CS: + - 0.0 + - 1.0 + L: + - 1.0 + - 10.0 + CI: + - 0.0 + - 0.9 + CG: + - 0.98 + - 0.998 +xaj_mz: + param_name: + - K + - B + - IM + - UM + - LM + - DM + - C + - SM + - EX + - KI + - KG + - A + - THETA + - CI + - CG + - KERNEL + param_range: + K: + - 0.1 + - 1.0 + B: + - 0.1 + - 0.4 + IM: + - 0.01 + - 0.1 + UM: + - 0.0 + - 20.0 + LM: + - 60.0 + - 90.0 + DM: + - 60.0 + - 120.0 + C: + - 0.0 + - 0.2 + SM: + - 1.0 + - 100.0 + EX: + - 1.0 + - 1.5 + KI: + - 0.0 + - 0.7 + KG: + - 0.0 + - 0.7 + A: + - 0.0 + - 2.9 + THETA: + - 0.0 + - 6.5 + CI: + - 0.0 + - 0.9 + CG: + - 0.98 + - 0.998 + KERNEL: + - 1 + - 15 +gr4j: + param_name: + - x1 + - x2 + - x3 + - x4 + param_range: + x1: + - 100.0 + - 1200.0 + x2: + - -5.0 + - 3.0 + x3: + - 20.0 + - 300.0 + x4: + - 1.1 + - 2.9 +hymod: + param_name: + - cmax + - bexp + - alpha + - ks + - kq + param_range: + cmax: + - 1.0 + - 500.0 + bexp: + - 0.1 + - 2.0 + alpha: + - 0.1 + - 0.99 + ks: + - 0.001 + - 0.1 + kq: + - 0.1 + - 0.99 diff --git a/hydromodel/models/xaj.py b/hydromodel/models/xaj.py index 2a5965a..fa28007 100644 --- a/hydromodel/models/xaj.py +++ b/hydromodel/models/xaj.py @@ -1,7 +1,7 @@ """ Author: Wenyu Ouyang Date: 2021-12-10 23:01:02 -LastEditTime: 2024-03-26 11:06:12 +LastEditTime: 2024-03-27 15:09:11 LastEditors: Wenyu Ouyang Description: Core code for XinAnJiang model FilePath: /hydro-model-xaj/hydromodel/models/xaj.py @@ -14,7 +14,7 @@ from numba import jit from scipy.special import gamma -from hydromodel.models.model_config import MODEL_PARAM_DICT +from hydromodel.models.model_config import read_model_param_dict PRECISION = 1e-5 @@ -741,11 +741,13 @@ def xaj( streamflow or (streamflow, states) """ # default values for some function parameters - model_name = kwargs["name"] if "name" in kwargs else "xaj" - source_type = kwargs["source_type"] if "source_type" in kwargs else "sources" - source_book = kwargs["source_book"] if "source_book" in kwargs else "HF" + model_name = kwargs.get("name", "xaj") + source_type = kwargs.get("source_type", "sources") + source_book = kwargs.get("source_book", "HF") + pr_file = kwargs.get("param_range_file", None) + model_param_dict = read_model_param_dict(pr_file) # params - param_ranges = MODEL_PARAM_DICT[model_name]["param_range"] + param_ranges = model_param_dict[model_name]["param_range"] if model_name == "xaj": route_method = "CSL" elif model_name == "xaj_mz": diff --git a/hydromodel/trainers/calibrate_ga.py b/hydromodel/trainers/calibrate_ga.py index 8886df9..800c18f 100644 --- a/hydromodel/trainers/calibrate_ga.py +++ b/hydromodel/trainers/calibrate_ga.py @@ -1,7 +1,7 @@ """ Author: Wenyu Ouyang Date: 2021-12-10 23:01:02 -LastEditTime: 2024-03-26 15:22:19 +LastEditTime: 2024-03-27 15:11:04 LastEditors: Wenyu Ouyang Description: Calibrate XAJ model using DEAP FilePath: \hydro-model-xaj\hydromodel\trainers\calibrate_ga.py @@ -21,7 +21,7 @@ from hydromodel.datasets.data_postprocess import plot_sim_and_obs, plot_train_iteration -from hydromodel.models.model_config import MODEL_PARAM_DICT +from hydromodel.models.model_config import read_model_param_dict from hydromodel.models.model_dict import MODEL_DICT, rmse43darr @@ -97,7 +97,7 @@ def wrapper(*args, **kargs): def calibrate_by_ga( - input_data, observed_output, deap_dir, warmup_length=30, model=None, ga_param=None + input_data, observed_output, deap_dir, warmup_length=30, model=None, ga_param=None, **kwargs ): """ Use GA algorithm to find optimal parameters for hydrologic models @@ -141,8 +141,10 @@ def calibrate_by_ga( "mut_prob": 0.5, "save_freq": 1, } + pr_file = kwargs.get("param_range_file", None) + model_param_dict = read_model_param_dict(pr_file) np.random.seed(ga_param["random_seed"]) - param_num = len(MODEL_PARAM_DICT[model["name"]]["param_name"]) + param_num = len(model_param_dict[model["name"]]["param_name"]) creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) creator.create("Individual", list, fitness=creator.FitnessMin) toolbox = base.Toolbox() diff --git a/hydromodel/trainers/calibrate_sceua.py b/hydromodel/trainers/calibrate_sceua.py index 6d1758a..907e31b 100644 --- a/hydromodel/trainers/calibrate_sceua.py +++ b/hydromodel/trainers/calibrate_sceua.py @@ -4,12 +4,14 @@ import spotpy import pandas as pd from spotpy.parameter import Uniform, ParameterSet -from hydromodel.models.model_config import MODEL_PARAM_DICT +from hydromodel.models.model_config import read_model_param_dict from hydromodel.models.model_dict import LOSS_DICT, MODEL_DICT class SpotSetup(object): - def __init__(self, p_and_e, qobs, warmup_length=365, model=None, loss=None): + def __init__( + self, p_and_e, qobs, warmup_length=365, model=None, param_file=None, loss=None + ): """ Set up for Spotpy NOTE: once for a basin in one sampler or @@ -25,8 +27,8 @@ def __init__(self, p_and_e, qobs, warmup_length=365, model=None, loss=None): GR4J model need warmup period model we support "gr4j", "hymod", and "xaj" - model_func_param - parameters of model function + param_range + parameters range of model loss loss configs including objective function, typically RMSE """ @@ -42,7 +44,9 @@ def __init__(self, p_and_e, qobs, warmup_length=365, model=None, loss=None): "obj_func": "rmse", "events": None, } - self.parameter_names = MODEL_PARAM_DICT[model["name"]]["param_name"] + self.param_range_file = {"param_range_file": param_file} + param_range = read_model_param_dict(param_file) + self.parameter_names = param_range[model["name"]]["param_name"] self.model = model self.params = [] self.params.extend( @@ -79,7 +83,11 @@ def simulation(self, x: ParameterSet) -> Union[list, np.array]: # xaj model's output include streamflow and evaporation now, # but now we only calibrate the model with streamflow sim, _ = MODEL_DICT[self.model["name"]]( - self.p_and_e, params, warmup_length=self.warmup_length, **self.model + self.p_and_e, + params, + warmup_length=self.warmup_length, + **self.model, + **self.param_range_file ) return sim @@ -156,6 +164,7 @@ def calibrate_by_sceua( model=None, algorithm=None, loss=None, + param_file=None, ): """ Function for calibrating model by SCE-UA @@ -183,6 +192,8 @@ def calibrate_by_sceua( loss configs for events calculation or just one long time-series calculation with an objective function, typically RMSE + param_file + the file of the parameter range, yaml file Returns ------- @@ -228,6 +239,7 @@ def calibrate_by_sceua( warmup_length=warmup_length, model=model, loss=loss, + param_file=param_file, ) if not os.path.exists(dbname): os.makedirs(dbname) diff --git a/scripts/calibrate_xaj.py b/scripts/calibrate_xaj.py index dd23170..a266015 100644 --- a/scripts/calibrate_xaj.py +++ b/scripts/calibrate_xaj.py @@ -1,7 +1,7 @@ """ Author: Wenyu Ouyang Date: 2022-11-19 17:27:05 -LastEditTime: 2024-03-27 11:43:47 +LastEditTime: 2024-03-27 15:22:39 LastEditors: Wenyu Ouyang Description: the script to calibrate a model for CAMELS basin FilePath: \hydro-model-xaj\scripts\calibrate_xaj.py @@ -39,6 +39,7 @@ def calibrate(args): model_info = args.model algo_info = args.algorithm loss_info = args.loss + param_range_file = args.param_range_file where_save = Path(os.path.join(repo_path, "result", exp)) if os.path.exists(where_save) is False: @@ -68,6 +69,7 @@ def calibrate(args): model=model_info, algorithm=algo_info, loss=loss_info, + param_file=param_range_file, ) else: for i in range(cv_fold): @@ -177,6 +179,14 @@ def calibrate(args): }, type=json.loads, ) + parser.add_argument( + "--param_range_file", + dest="param_range_file", + help="The file of the parameter range", + # default=None, + default="C:\\Users\\wenyu\\OneDrive\\data\\biliuhe\\param_range.yaml", + type=str, + ) parser.add_argument( "--algorithm", dest="algorithm",