Skip to content

Commit

Permalink
Ajout d'une structure de modélisation flexible et de calage (#58)
Browse files Browse the repository at this point in the history
Proposition pour commencer à développer la structure de modélisation
hydrologique qui soit suffisamment flexible pour tous les besoins des
usagers.

L'idée est de fonctionner avec un objet 'model_config' (dict) qui
contient toute l'information requises pour rouler un modèle
hydrologique:
1. Un pre-processor prépare les données pour le modèle désiré, incluant
des checks sur les données manquantes;
2. Les données sont stockées dans l'objet model_config
3. Ce dernier est finalement passé au package de modélisation, qui, en
lisant le contenu de model_config, lance une simulation du bon modèle
hydrologique avec les données dans l'objet. On retourne Qsim par la
suite.

Ceci a plusieurs avantages:

1. Structure extrêmement flexible: Possibilité de stocker les données
directement, des path vers les netcdf au besoin, données distribuées
pour Hydrotel, etc.
2. Le calage utilise la même structure peu importe le modèle, car tout
est offloadé au système de modélisation pour le traitement du format des
données
3. Très simple à suivre pour les utilisateurs moins avancés (qu'un autre
genre d'objet dynamique/classe complexe instanciée automagiquement)
4. Très facile d'ajouter des modèles hydrologiques, peu importe leur
complexité, favorisant la contribution de la communauté
5. Efficace, car on passe uniquement les données requises ou les path
vers les données au besoin, donc moins de I/O.

Je démarre donc le bal avec cette proposition de structure, et suis très
intéressé à avoir votre avis/opinion. J'ai documenté le plus possible
pour que ce soit le plus clair possible, mais s'il y a des questions,
n'hésitez pas.

Aussi, il faudra installer quelques packages par rapport à la branche
master:

- hydroeval
- spotpy
- esmpy

Au plaisir d'avoir vos commentaires et suggestions!
  • Loading branch information
richardarsenault authored Feb 8, 2024
2 parents 6d214a7 + d9d3b85 commit edda069
Show file tree
Hide file tree
Showing 9 changed files with 1,841 additions and 0 deletions.
1 change: 1 addition & 0 deletions environment-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ dependencies:
- numpy
- pooch >=1.8.0
- pydantic >=2.0,<2.5.3 # FIXME: Remove pin once our dependencies (xclim, xscen) support pydantic 2.5.3
- spotpy
- statsmodels
- xarray
- xclim >=0.47.0 # FIXME: Remove pin once our dependencies (xclim, xscen) support pandas 2.2.0
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ dependencies:
- numpy
- pooch >=1.8.0
- pydantic >=2.0,<2.5.3 # FIXME: Remove pin once our dependencies (xclim, xscen) support pydantic 2.5.3
- spotpy
- statsmodels
- xarray
- xclim >=0.47.0 # FIXME: Remove pin once our dependencies (xclim, xscen) support pandas 2.2.0
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ dependencies = [
"numpy",
"pooch>=1.8.0",
"pydantic>=2.0,<2.5.3",
"spotpy",
"statsmodels",
"xarray",
"xclim>=0.47.0",
Expand Down
138 changes: 138 additions & 0 deletions tests/test_calibration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
"""Test suite for the calibration algorithm in calibration.py."""

# Also tests the dummy model implementation.
import numpy as np
import pytest

from xhydro.modelling.calibration import perform_calibration
from xhydro.modelling.hydrological_modelling import _dummy_model
from xhydro.modelling.obj_funcs import get_objective_function, transform_flows


def test_spotpy_calibration():
"""Make sure the calibration works under possible test cases."""
bounds_low = np.array([0, 0, 0])
bounds_high = np.array([10, 10, 10])

model_config = {
"precip": np.array([10, 11, 12, 13, 14, 15]),
"temperature": np.array([10, 3, -5, 1, 15, 0]),
"qobs": np.array([120, 130, 140, 150, 160, 170]),
"drainage_area": np.array([10]),
"model_name": "Dummy",
}

mask = np.array([0, 0, 0, 0, 1, 1])

best_parameters, best_simulation, best_objfun = perform_calibration(
model_config,
"mae",
bounds_low=bounds_low,
bounds_high=bounds_high,
evaluations=1000,
algorithm="DDS",
mask=mask,
sampler_kwargs=dict(trials=1),
)

# Test that the results have the same size as expected (number of parameters)
assert len(best_parameters) == len(bounds_high)

# Test that the objective function is calculated correctly
objfun = get_objective_function(
model_config["qobs"],
best_simulation,
obj_func="mae",
mask=mask,
)

assert objfun == best_objfun

# Test dummy model response
model_config["parameters"] = [5, 5, 5]
qsim = _dummy_model(model_config)
assert qsim["qsim"].values[3] == 3500.00

# Also test to ensure SCEUA and take_minimize is required.
best_parameters_sceua, best_simulation, best_objfun = perform_calibration(
model_config,
"mae",
bounds_low=bounds_low,
bounds_high=bounds_high,
evaluations=10,
algorithm="SCEUA",
)

assert len(best_parameters_sceua) == len(bounds_high)

# Also test to ensure SCEUA and take_minimize is required.
best_parameters_negative, best_simulation, best_objfun = perform_calibration(
model_config,
"nse",
bounds_low=bounds_low,
bounds_high=bounds_high,
evaluations=10,
algorithm="SCEUA",
)
assert len(best_parameters_negative) == len(bounds_high)

# Test to see if transform works
best_parameters_transform, best_simulation, best_objfun = perform_calibration(
model_config,
"nse",
bounds_low=bounds_low,
bounds_high=bounds_high,
evaluations=10,
algorithm="SCEUA",
transform="inv",
epsilon=0.01,
)
assert len(best_parameters_transform) == len(bounds_high)


def test_calibration_failure_mode_unknown_optimizer():
"""Test for maximize-minimize failure mode:
use "OTHER" optimizer, i.e. an unknown optimizer. Should fail.
"""
bounds_low = np.array([0, 0, 0])
bounds_high = np.array([10, 10, 10])
model_config = {
"precip": np.array([10, 11, 12, 13, 14, 15]),
"temperature": np.array([10, 3, -5, 1, 15, 0]),
"qobs": np.array([120, 130, 140, 150, 160, 170]),
"drainage_area": np.array([10]),
"model_name": "Dummy",
}
with pytest.raises(NotImplementedError) as pytest_wrapped_e:
best_parameters_transform, best_simulation, best_objfun = perform_calibration(
model_config,
"nse",
bounds_low=bounds_low,
bounds_high=bounds_high,
evaluations=10,
algorithm="OTHER",
)
assert pytest_wrapped_e.type == NotImplementedError


def test_transform():
"""Test the flow transformer"""
qsim = np.array([10, 10, 10])
qobs = np.array([5, 5, 5])

qsim_r, qobs_r = transform_flows(qsim, qobs, transform="inv", epsilon=0.01)
np.testing.assert_array_almost_equal(qsim_r[1], 0.0995024, 6)
np.testing.assert_array_almost_equal(qobs_r[1], 0.1980198, 6)

qsim_r, qobs_r = transform_flows(qsim, qobs, transform="sqrt")
np.testing.assert_array_almost_equal(qsim_r[1], 3.1622776, 6)
np.testing.assert_array_almost_equal(qobs_r[1], 2.2360679, 6)

qsim_r, qobs_r = transform_flows(qsim, qobs, transform="log", epsilon=0.01)
np.testing.assert_array_almost_equal(qsim_r[1], 2.3075726, 6)
np.testing.assert_array_almost_equal(qobs_r[1], 1.6193882, 6)

# Test Qobs different length than Qsim
with pytest.raises(NotImplementedError) as pytest_wrapped_e:
qobs_r, qobs_r = transform_flows(qsim, qobs, transform="a", epsilon=0.01)
assert pytest_wrapped_e.type == NotImplementedError
53 changes: 53 additions & 0 deletions tests/test_hydrological_modelling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""Test suite for hydrological modelling in hydrological_modelling.py"""

import numpy as np
import pytest

from xhydro.modelling.hydrological_modelling import (
get_hydrological_model_inputs,
run_hydrological_model,
)


def test_hydrological_modelling():
"""Test the hydrological models as they become online"""
# Test the dummy model
model_config = {
"precip": np.array([10, 11, 12, 13, 14, 15]),
"temperature": np.array([10, 3, -5, 1, 15, 0]),
"qobs": np.array([120, 130, 140, 150, 160, 170]),
"drainage_area": np.array([10]),
"model_name": "Dummy",
"parameters": np.array([5, 5, 5]),
}
qsim = run_hydrological_model(model_config)
assert qsim["qsim"].values[3] == 3500.00

# Test the exceptions for new models
model_config.update(model_name="ADD_OTHER_HERE")
qsim = run_hydrological_model(model_config)
assert qsim == 0


def test_import_unknown_model():
"""Test for unknown model"""
with pytest.raises(NotImplementedError) as pytest_wrapped_e:
model_config = {"model_name": "fake_model"}
_ = run_hydrological_model(model_config)
assert pytest_wrapped_e.type == NotImplementedError


def test_get_unknown_model_requirements():
"""Test for required inputs for models with unknown name"""
with pytest.raises(NotImplementedError) as pytest_wrapped_e:
model_name = "fake_model"
_ = get_hydrological_model_inputs(model_name)
assert pytest_wrapped_e.type == NotImplementedError


def test_get_model_requirements():
"""Test for required inputs for models"""
model_name = "Dummy"
required_config = get_hydrological_model_inputs(model_name)
print(required_config.keys())
assert len(required_config.keys()) == 4
160 changes: 160 additions & 0 deletions tests/test_objective_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
"""Test suite for the objective functions in obj_funcs.py."""

import numpy as np
import pytest

from xhydro.modelling.obj_funcs import (
_get_objfun_minimize_or_maximize,
get_objective_function,
)


def test_obj_funcs():
"""Series of tests to test all objective functions with fast test data"""
qobs = np.array([120, 130, 140, 150, 160, 170])
qsim = np.array([120, 125, 145, 140, 140, 180])

# Test that the objective function is calculated correctly
objfun = get_objective_function(qobs, qsim, obj_func="abs_bias")
np.testing.assert_array_almost_equal(objfun, 3.3333333333333335, 8)

objfun = get_objective_function(qobs, qsim, obj_func="abs_pbias")
np.testing.assert_array_almost_equal(objfun, 2.2988505747126435, 8)

objfun = get_objective_function(qobs, qsim, obj_func="abs_volume_error")
np.testing.assert_array_almost_equal(objfun, 0.022988505747126436, 8)

objfun = get_objective_function(qobs, qsim, obj_func="agreement_index")
np.testing.assert_array_almost_equal(objfun, 0.9171974522292994, 8)

objfun = get_objective_function(qobs, qsim, obj_func="bias")
np.testing.assert_array_almost_equal(objfun, -3.3333333333333335, 8)

objfun = get_objective_function(qobs, qsim, obj_func="correlation_coeff")
np.testing.assert_array_almost_equal(objfun, 0.8599102447336393, 8)

objfun = get_objective_function(qobs, qsim, obj_func="kge")
np.testing.assert_array_almost_equal(objfun, 0.8077187696552522, 8)

objfun = get_objective_function(qobs, qsim, obj_func="kge_mod")
np.testing.assert_array_almost_equal(objfun, 0.7888769531580001, 8)

objfun = get_objective_function(qobs, qsim, obj_func="mae")
np.testing.assert_array_almost_equal(objfun, 8.333333333333334, 8)

objfun = get_objective_function(qobs, qsim, obj_func="mare")
np.testing.assert_array_almost_equal(objfun, 0.05747126436781609, 8)

objfun = get_objective_function(qobs, qsim, obj_func="mse")
np.testing.assert_array_almost_equal(objfun, 108.33333333333333, 8)

objfun = get_objective_function(qobs, qsim, obj_func="nse")
np.testing.assert_array_almost_equal(objfun, 0.6285714285714286, 8)

objfun = get_objective_function(qobs, qsim, obj_func="pbias")
np.testing.assert_array_almost_equal(objfun, -2.2988505747126435, 8)

objfun = get_objective_function(qobs, qsim, obj_func="r2")
np.testing.assert_array_almost_equal(objfun, 0.7394456289978675, 8)

objfun = get_objective_function(qobs, qsim, obj_func="rmse")
np.testing.assert_array_almost_equal(objfun, 10.408329997330663, 8)

objfun = get_objective_function(qobs, qsim, obj_func="rrmse")
np.testing.assert_array_almost_equal(objfun, 0.07178158618848733, 8)

objfun = get_objective_function(qobs, qsim, obj_func="rsr")
np.testing.assert_array_almost_equal(objfun, 0.6094494002200439, 8)

objfun = get_objective_function(qobs, qsim, obj_func="volume_error")
np.testing.assert_array_almost_equal(objfun, -0.022988505747126436, 8)


def test_objective_function_failure_data_length():
"""Test for the objective function calculation failure mode:
qobs and qsim length are different
"""
with pytest.raises(ValueError) as pytest_wrapped_e:
_ = get_objective_function(
np.array([100, 110]),
np.array([100, 110, 120]),
obj_func="mae",
)
assert pytest_wrapped_e.type == ValueError


def test_objective_function_failure_mask_length():
"""Test for the objective function calculation failure mode:
qobs and mask length are different
"""
with pytest.raises(ValueError) as pytest_wrapped_e:
_ = get_objective_function(
np.array([100, 100, 100]),
np.array([100, 110, 120]),
obj_func="mae",
mask=np.array([0, 1, 0, 0]),
)
assert pytest_wrapped_e.type == ValueError


def test_objective_function_failure_unknown_objfun():
"""Test for the objective function calculation failure mode:
Objective function is unknown
"""
with pytest.raises(ValueError) as pytest_wrapped_e:
_ = get_objective_function(
np.array([100, 100, 100]),
np.array([100, 110, 120]),
obj_func="fake",
)
assert pytest_wrapped_e.type == ValueError


def test_objective_function_failure_mask_contents():
"""Test for the objective function calculation failure mode:
Mask contains other than 0 and 1
"""
with pytest.raises(ValueError) as pytest_wrapped_e:
_ = get_objective_function(
np.array([100, 100, 100]),
np.array([100, 110, 120]),
obj_func="mae",
mask=np.array([0, 0.5, 1]),
)
assert pytest_wrapped_e.type == ValueError


def test_maximizer_objfun_failure_modes_bias():
"""Test for maximize-minimize failure mode:
Use of bias objfun which is unbounded
"""
with pytest.raises(ValueError) as pytest_wrapped_e:
_ = _get_objfun_minimize_or_maximize(obj_func="bias")
assert pytest_wrapped_e.type == ValueError


def test_maximizer_objfun_failure_modes_pbias():
"""Test for maximize-minimize failure mode:
Use of pbias objfun which is unbounded
"""
with pytest.raises(ValueError) as pytest_wrapped_e:
_ = _get_objfun_minimize_or_maximize(obj_func="pbias")
assert pytest_wrapped_e.type == ValueError


def test_maximizer_objfun_failure_modes_volume_error():
"""Test for maximize-minimize failure mode:
Use of volume_error objfun which is unbounded
"""
with pytest.raises(ValueError) as pytest_wrapped_e:
_ = _get_objfun_minimize_or_maximize(obj_func="volume_error")
assert pytest_wrapped_e.type == ValueError


def test_maximizer_objfun_failure_modes_unknown_metric():
"""Test for maximize-minimize failure mode:
Use of unknown objfun
"""
with pytest.raises(NotImplementedError) as pytest_wrapped_e:
_ = _get_objfun_minimize_or_maximize(obj_func="unknown_of")
assert pytest_wrapped_e.type == NotImplementedError
Loading

0 comments on commit edda069

Please sign in to comment.