From b5cdf123625308bea6fe6bbe5c209423af789281 Mon Sep 17 00:00:00 2001 From: Bryn Pickering <17178478+brynpickering@users.noreply.github.com> Date: Wed, 14 Feb 2024 16:37:56 +0000 Subject: [PATCH] Add schema updater (#568) Co-authored-by: Stefan Pfenninger --- CHANGELOG.md | 3 + docs/reference/api/schema.md | 1 + docs/user_defined_math/customise.md | 52 +++++++++++++++++ mkdocs.yml | 1 + src/calliope/util/schema.py | 66 +++++++++++++++++++-- tests/test_core_util.py | 90 +++++++++++++++++++++++++---- 6 files changed, 199 insertions(+), 14 deletions(-) create mode 100644 docs/reference/api/schema.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f17b7c6..1b6a18ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,9 @@ Requires the [PyMdown tabbed extension](https://facelessuser.github.io/pymdown-e |new| Variables and global expressions can have a `default` value, which is used to fill missing array elements when doing math operations. These default values ensure that `NaN` doesn't creep into the built optimisation problem math and are set to values that lead to them having no impact on the optimal solution. +|new| Utility function `calliope.util.schema.update_model_schema(...)` to add user-defined parameters to the model schema / update existing parameters using YAML schema syntax. +`calliope.util.schema.reset()` can be used to clean the model schema and return to the original, pre-defined schema. + |fixed| Timeseries clustering file can be a non-ISO standard date format. Both the index and the values of the timeseries (both being date strings) should be in the user-defined `config.init.time_format`. diff --git a/docs/reference/api/schema.md b/docs/reference/api/schema.md new file mode 100644 index 00000000..a3f46cfc --- /dev/null +++ b/docs/reference/api/schema.md @@ -0,0 +1 @@ +::: calliope.util.schema \ No newline at end of file diff --git a/docs/user_defined_math/customise.md b/docs/user_defined_math/customise.md index 8b6f7bdb..a95e2ade 100644 --- a/docs/user_defined_math/customise.md +++ b/docs/user_defined_math/customise.md @@ -32,6 +32,58 @@ config: add_math: [my_new_math_1.yaml, storage_inter_cluster, my_new_math_2.md] ``` +## Adding your parameters to the YAML schema + +Our YAML schemas are used to validate user inputs. +The model definition schema includes metadata on all our pre-defined parameters, which you can find rendered in our [reference page][model-definition-schema]. + +When you add your own math you are likely to be adding new parameters to the model. +You can update the Calliope model definition schema to include your new entries using [`calliope.util.schema.update_model_schema(...)`][calliope.util.schema.update_model_schema]. +This ensures that your parameters have default values attached to them and if you choose to [write your own documentation](#writing-your-own-math-documentation), your parameters will have this metadata added to their descriptions. + +Entries in the schema look like this: + +```yaml +flow_cap_max: + $ref: "#/$defs/TechParamNullNumber" # (1)! + default: .inf + x-type: float + title: Maximum rated flow capacity. + description: >- + Limits `flow_cap` to a maximum. + x-unit: power. +``` + +1. This is a cross-reference to a much longer schema entry that says the parameter type is either `None`, a simple number, or an indexed parameter dictionary with the `data`, `index`, and `dims` keys. + +When you add your own parameters to the schema, you will need to know the top-level key under which the parameter will be found in your YAML definition: [`nodes`](../creating/nodes.md), [`techs`](../creating/techs.md), or [`parameters`](../creating/parameters.md). +As a general rule, if it includes the `techs` dimension, put it under `techs`; if it includes `nodes` but _not_ `techs` then put it under `nodes`; if it includes neither dimension, put it under `parameters`. + +The dictionary you supply for each parameter can include the following: + +* title (str): Short description of the parameter. +* description (str): Long description of the parameter. +* type (str or array): expected type of entry. +We recommend you use the pre-defined cross-reference `$ref: "#/$defs/TechParamNullNumber"` instead of explicitly using this key, to allow the parameter to be either numeric or an indexed parameter. +If you are adding a cost, you can use the cross reference `$ref: "#/$defs/TechCostNullNumber"`. +If you want to allow non-numeric data (e.g., strings), you would set `type: string` instead of using the cross-reference. +* default (str): default value. +This will be used in generating the optimisation problem. +* x-type (str): type of the non-NaN array entries in the internal calliope representation of the parameter. +This is usually one of `float` or `str`. +* x-unit (str): Unit of the parameter to use in documentation. +* x-operate-param (bool): If True, this parameter's schema data will only be loaded into the optimisation problem if running in "operate" mode. + +!!! note + + Schema attributes which start with `x-` are Calliope-specific. + They are not used at all for YAML validation and instead get picked up by us using the utility function [calliope.util.schema.extract_from_schema][]. + +!!! warning + + The schema is updated in-place so your edits to it will remain active as long as you are running in the same session. + You can reset your updates to the schema and return to the pre-defined schema by calling [`calliope.util.schema.reset()`][calliope.util.schema.reset] + ## Writing your own math documentation You can write your model's mathematical formulation to view it in a rich-text format (as we do for our [pre-defined math](../pre_defined_math/index.md) in this documentation). diff --git a/mkdocs.yml b/mkdocs.yml index ba4c2cc7..1744e20f 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -146,6 +146,7 @@ nav: - reference/api/backend_model.md - reference/api/helper_functions.md - reference/api/example_models.md + - reference/api/schema.md - reference/api/attrdict.md - reference/api/exceptions.md - reference/api/logging.md diff --git a/src/calliope/util/schema.py b/src/calliope/util/schema.py index c4c1b14d..9770aa8e 100644 --- a/src/calliope/util/schema.py +++ b/src/calliope/util/schema.py @@ -1,4 +1,13 @@ +# Copyright (C) since 2013 Calliope contributors listed in AUTHORS. +# Licensed under the Apache 2.0 License (see LICENSE file). + +""" +Load, update, and access attributes in the Calliope pre-defined YAML schemas +""" + +import importlib import re +import sys from copy import deepcopy from typing import Literal, Optional @@ -14,6 +23,11 @@ MATH_SCHEMA = load_config("math_schema.yaml") +def reset(): + """Reset all module-level schema to the pre-defined dictionaries.""" + importlib.reload(sys.modules[__name__]) + + def update_then_validate_config( config_key: str, config_dict: AttrDict, **update_kwargs ) -> AttrDict: @@ -27,6 +41,46 @@ def update_then_validate_config( return to_validate +def update_model_schema( + top_level_property: Literal["nodes", "techs", "parameters"], + new_entries: dict, + allow_override: bool = True, +): + """Update existing entries in the model schema or add a new parameter to the model schema. + + Available attributes: + + * title (str): Short description of the parameter. + * description (str): Long description of the parameter. + * type (str): expected type of entry. Pre-defined entries tend to use "$ref: "#/$defs/TechParamNullNumber" instead, to allow type to be either numeric or an indexed parameter. + * default (str): default value. This will be used in generating the optimisation problem. + * x-type (str): type of the non-NaN array entries in the internal calliope representation of the parameter. + * x-unit (str): Unit of the parameter to use in documentation. + * x-operate-param (bool): If True, this parameter's schema data will only be loaded into the optimisation problem if running in "operate" mode. + + Args: + top_level_property (Literal["nodes", "techs", "parameters"]): Top-level key under which parameters are to be updated/added. + new_entries (dict): Data to update the schema with. + allow_override (bool, optional): If True, allow existing entries in the schema to be overwritten. Defaults to True. + """ + new_schema = deepcopy(MODEL_SCHEMA) + to_update: AttrDict + if top_level_property == "parameters": + to_update = new_schema["properties"][top_level_property]["properties"] + else: + to_update = new_schema["properties"][top_level_property]["patternProperties"][ + "^[^_^\\d][\\w]*$" + ]["properties"] + + to_update.union(AttrDict(new_entries), allow_override=allow_override) + + validator = jsonschema.Draft202012Validator + validator.META_SCHEMA["unevaluatedProperties"] = False + validator.check_schema(new_schema) + + MODEL_SCHEMA.union(new_schema, allow_override=True) + + def validate_dict(to_validate: dict, schema: dict, dict_descriptor: str) -> None: """ Validate a dictionary under a given schema. @@ -37,8 +91,11 @@ def validate_dict(to_validate: dict, schema: dict, dict_descriptor: str) -> None dict_descriptor (str): Description of the dictionary to validate, to use if an error is raised. Raises: - jsonschema.SchemaError: If the schema itself is malformed, a SchemaError will be raised at the first issue. Other issues than that raised may still exist. - calliope.exceptions.ModelError: If the dictionary is not valid according to the schema, a list of the issues found will be collated and raised. + jsonschema.SchemaError: + If the schema itself is malformed, a SchemaError will be raised at the first issue. + Other issues than that raised may still exist. + calliope.exceptions.ModelError: + If the dictionary is not valid according to the schema, a list of the issues found will be collated and raised. """ errors = [] validator = jsonschema.Draft202012Validator @@ -95,8 +152,9 @@ def extract_from_schema( Defaults to None, i.e., all property branches are included. Returns: - dict: Flat dictionary of property name : keyword value. - Property trees are discarded since property names must be unique. + dict: + Flat dictionary of property name : keyword value. + Property trees are discarded since property names must be unique. """ extracted_keywords: dict = {} KeywordValidatingValidator = _extend_with_keyword( diff --git a/tests/test_core_util.py b/tests/test_core_util.py index 5cd8fdba..98f012bb 100644 --- a/tests/test_core_util.py +++ b/tests/test_core_util.py @@ -9,9 +9,9 @@ import numpy as np import pandas as pd import pytest +from calliope.util import schema from calliope.util.generate_runs import generate_runs from calliope.util.logging import log_time -from calliope.util.schema import extract_from_schema, validate_dict from .common.util import check_error_or_warning @@ -122,7 +122,7 @@ class TestValidateDict: reason="Checking the schema itself doesn't seem to be working properly; no clear idea of _why_ yet..." ) @pytest.mark.parametrize( - ["schema", "expected_path"], + ["schema_dict", "expected_path"], [ ({"foo": 2}, ""), ({"properties": {"bar": {"foo": "string"}}}, " at `properties.bar`"), @@ -135,10 +135,10 @@ class TestValidateDict: ), ], ) - def test_malformed_schema(self, schema, expected_path): + def test_malformed_schema(self, schema_dict, expected_path): to_validate = {"bar": [1, 2, 3]} with pytest.raises(jsonschema.SchemaError) as err: - validate_dict(to_validate, schema, "foobar") + schema.validate_dict(to_validate, schema_dict, "foobar") assert check_error_or_warning( err, f"The foobar schema is malformed{expected_path}: Unevaluated properties are not allowed ('foo' was unexpected)", @@ -152,7 +152,7 @@ def test_malformed_schema(self, schema, expected_path): ], ) def test_invalid_dict(self, to_validate, expected_path): - schema = { + schema_dict = { "properties": { "valid": { "type": "object", @@ -163,7 +163,7 @@ def test_invalid_dict(self, to_validate, expected_path): "additionalProperties": False, } with pytest.raises(calliope.exceptions.ModelError) as err: - validate_dict(to_validate, schema, "foobar") + schema.validate_dict(to_validate, schema_dict, "foobar") assert check_error_or_warning( err, [ @@ -188,7 +188,7 @@ def test_validate_math(self, base_math, dict_path): to_validate = base_math.union( calliope.AttrDict.from_yaml(dict_path), allow_override=True ) - validate_dict(to_validate, math_schema, "") + schema.validate_dict(to_validate, math_schema, "") class TestExtractFromSchema: @@ -339,7 +339,7 @@ def test_extract_config_defaults( self, sample_config_schema, expected_config_defaults ): extracted_defaults = pd.Series( - extract_from_schema(sample_config_schema, "default") + schema.extract_from_schema(sample_config_schema, "default") ) pd.testing.assert_series_equal( extracted_defaults.sort_index(), expected_config_defaults @@ -349,7 +349,7 @@ def test_extract_model_def_defaults( self, sample_model_def_schema, expected_model_def_defaults ): extracted_defaults = pd.Series( - extract_from_schema(sample_model_def_schema, "default") + schema.extract_from_schema(sample_model_def_schema, "default") ) pd.testing.assert_series_equal( extracted_defaults.sort_index(), expected_model_def_defaults @@ -379,10 +379,80 @@ def test_extract_defaults_subset( prop_keys, ): extracted_defaults = pd.Series( - extract_from_schema(sample_model_def_schema, "default", schema_key) + schema.extract_from_schema(sample_model_def_schema, "default", schema_key) ) pd.testing.assert_series_equal( expected_model_def_defaults.loc[prop_keys].sort_index(), extracted_defaults.sort_index(), check_dtype=False, ) + + +class TestUpdateSchema: + + @pytest.mark.parametrize("top_level", ["parameters", "nodes", "techs"]) + def test_add_new_schema(self, top_level): + schema.update_model_schema( + top_level, + { + f"{top_level}_foo": { + "type": "number", + "description": "bar", + "default": 1, + } + }, + allow_override=False, + ) + + extracted_defaults = schema.extract_from_schema(schema.MODEL_SCHEMA, "default") + assert extracted_defaults[f"{top_level}_foo"] == 1 + extracted_descriptions = schema.extract_from_schema( + schema.MODEL_SCHEMA, "description" + ) + assert extracted_descriptions[f"{top_level}_foo"] == "bar" + + schema.reset() + + @pytest.mark.parametrize("top_level", ["parameters", "nodes", "techs"]) + def test_update_schema(self, top_level): + schema.update_model_schema( + top_level, {f"{top_level}_foo": {"default": 1}}, allow_override=False + ) + + extracted_defaults = schema.extract_from_schema(schema.MODEL_SCHEMA, "default") + assert extracted_defaults[f"{top_level}_foo"] == 1 + + schema.update_model_schema( + top_level, {f"{top_level}_foo": {"default": 2}}, allow_override=True + ) + + extracted_defaults = pd.Series( + schema.extract_from_schema(schema.MODEL_SCHEMA, "default") + ) + assert extracted_defaults[f"{top_level}_foo"] == 2 + + schema.reset() + + @pytest.mark.parametrize("top_level", ["parameters", "nodes", "techs"]) + def test_update_schema_malformed(self, top_level): + with pytest.raises(jsonschema.SchemaError): + schema.update_model_schema( + top_level, + {f"{top_level}_foo": {"type": "i_am_not_a_type"}}, + allow_override=True, + ) + schema.reset() + + def test_reset_schema(self): + schema.update_model_schema( + "techs", + {"foo": {"type": "number", "description": "bar", "default": 1}}, + allow_override=False, + ) + schema.reset() + assert ( + "foo" + not in schema.MODEL_SCHEMA["properties"]["techs"]["patternProperties"][ + "^[^_^\\d][\\w]*$" + ]["properties"] + )