From 46ae5a005f0d3c8443d3b14c1cd92cde5ae81f45 Mon Sep 17 00:00:00 2001 From: Ivan Ruiz Manuel <72193617+irm-codebase@users.noreply.github.com> Date: Tue, 19 Nov 2024 10:14:09 +0100 Subject: [PATCH] Add data table schema w/ tests --- src/calliope/schemas/config_schema.py | 25 +------ src/calliope/schemas/data_table_schema.py | 84 +++++++++++++++++++++++ src/calliope/util/schema.py | 26 ++++++- tests/test_data_table_schema.py | 77 +++++++++++++++++++++ 4 files changed, 189 insertions(+), 23 deletions(-) create mode 100644 src/calliope/schemas/data_table_schema.py create mode 100644 tests/test_data_table_schema.py diff --git a/src/calliope/schemas/config_schema.py b/src/calliope/schemas/config_schema.py index 4e55beeb..e9113ef6 100644 --- a/src/calliope/schemas/config_schema.py +++ b/src/calliope/schemas/config_schema.py @@ -2,39 +2,20 @@ # Licensed under the Apache 2.0 License (see LICENSE file). """Implements the Calliope configuration class.""" -from collections.abc import Hashable from datetime import datetime from pathlib import Path -from typing import Annotated, Literal, Self, TypeVar +from typing import Literal, Self import jsonref -from pydantic import AfterValidator, BaseModel, Field, model_validator -from pydantic_core import PydanticCustomError +from pydantic import BaseModel, Field, model_validator from calliope.attrdict import AttrDict from calliope.util import tools +from calliope.util.schema import UniqueList MODES_T = Literal["plan", "operate", "spores"] CONFIG_T = Literal["init", "build", "solve"] -# == -# Taken from https://github.com/pydantic/pydantic-core/pull/820#issuecomment-1670475909 -T = TypeVar("T", bound=Hashable) - - -def _validate_unique_list(v: list[T]) -> list[T]: - if len(v) != len(set(v)): - raise PydanticCustomError("unique_list", "List must be unique") - return v - - -UniqueList = Annotated[ - list[T], - AfterValidator(_validate_unique_list), - Field(json_schema_extra={"uniqueItems": True}), -] -# == - def hide_from_schema(to_hide: list[str]): """Hide fields from the generated schema. diff --git a/src/calliope/schemas/data_table_schema.py b/src/calliope/schemas/data_table_schema.py new file mode 100644 index 00000000..c693150f --- /dev/null +++ b/src/calliope/schemas/data_table_schema.py @@ -0,0 +1,84 @@ +"""Implements the data table configuration class.""" + +from typing import Self + +from pydantic import BaseModel, model_validator + +from calliope.util.schema import AttrStr, UniqueList +from calliope.util.tools import listify + +# Get rid of pyright false negatives (see https://github.com/microsoft/pylance-release/issues/5457) +# pyright: reportInvalidTypeForm=false + + +class DataTable(BaseModel): + """Data table validation model.""" + + data: str + """ + Absolute or relative filepath. + Relative paths are based on the model config file used to initialise the model. + """ + rows: None | AttrStr | UniqueList[AttrStr] = None + """ + Names of dimensions defined row-wise. + Each name should correspond to a column in your data that contains index items. + These columns must be to the left of the columns containing your data. + """ + columns: None | AttrStr | UniqueList[AttrStr] = None + """ + Names of dimensions defined column-wise. + Each name should correspond to a row in your data that contains index items. + These rows must be above the rows containing your data. + """ + select: None | dict[AttrStr, AttrStr | UniqueList[AttrStr]] = None + """ + Select one or more index item from a dimension. + Selection takes place before `drop` and `add_dims`, so you can select a single + value from a data dimension and then drop the dimension so it doesn't find its way + through to the final dataset. + """ + drop: None | AttrStr | UniqueList[AttrStr] = None + """ + Enables removing rows and/or columns that contain irrelevant data/metadata. + These could include comments on the source of the data, the data license, or the parameter units. + You can also drop a dimension and then reintroduce it in `add_dims`, but with different index items. + """ + add_dims: None | dict[AttrStr, AttrStr] = None + """ + Data dimensions to add after loading in the array. + These allow you to use the same file to assign values to different parameters/dimension index items + (e.g., setting `flow_cap_min` and `flow_cap_max` to the same value), + or to add a dimension which would otherwise be a column containing the same information in each row + (e.g., assigning the cost class to monetary for a file containing cost data). + """ + rename_dims: None | dict[AttrStr, AttrStr] = None + """ + Mapping between dimension names in the data table being loaded to equivalent Calliope dimension names. + For instance, the "time" column in the data table would need to be mapped to "timesteps": `{"time": "timesteps"}`. + """ + template: None | AttrStr = None + """ + Reference to a template from which to inherit common configuration options. + """ + + @model_validator(mode="after") + def check_row_and_columns(self) -> Self: + """Ensure users specify a valid data table shape.""" + rows = set(listify(self.rows)) + columns = set(listify(self.columns)) + if not rows and not columns: + raise ValueError("Either row or columns must be defined for data_table.") + elif rows & columns: + raise ValueError("Rows and columns must not overlap.") + + if self.add_dims: + if self.add_dims.keys() & (rows | columns): + raise ValueError("Added dimensions must not be in columns or rows.") + + if self.rename_dims: + if set(self.rename_dims.values()) - (rows | columns): + raise ValueError( + "Renamed dimensions must be in either rows or columns." + ) + return self diff --git a/src/calliope/util/schema.py b/src/calliope/util/schema.py index 361cd9a9..f207c35a 100644 --- a/src/calliope/util/schema.py +++ b/src/calliope/util/schema.py @@ -5,10 +5,13 @@ import importlib import re import sys +from collections.abc import Hashable from copy import deepcopy -from typing import Literal +from typing import Annotated, Literal, TypeVar import jsonschema +from pydantic import AfterValidator, Field, constr +from pydantic_core import PydanticCustomError from calliope.attrdict import AttrDict from calliope.exceptions import print_warnings_and_raise_errors @@ -19,6 +22,27 @@ DATA_TABLE_SCHEMA = load_config("data_table_schema.yaml") MATH_SCHEMA = load_config("math_schema.yaml") +# Regular string pattern for most calliope attributes +FIELD_REGEX = r"^[^_^\d][\w]*$" +AttrStr = constr(pattern=FIELD_REGEX) +# == +# Taken from https://github.com/pydantic/pydantic-core/pull/820#issuecomment-1670475909 +T = TypeVar("T", bound=Hashable) + + +def _validate_unique_list(v: list[T]) -> list[T]: + if len(v) != len(set(v)): + raise PydanticCustomError("unique_list", "List must be unique") + return v + + +UniqueList = Annotated[ + list[T], + AfterValidator(_validate_unique_list), + Field(json_schema_extra={"uniqueItems": True}), +] +# == + def reset(): """Reset all module-level schema to the pre-defined dictionaries.""" diff --git a/tests/test_data_table_schema.py b/tests/test_data_table_schema.py new file mode 100644 index 00000000..4e7b5c81 --- /dev/null +++ b/tests/test_data_table_schema.py @@ -0,0 +1,77 @@ +"""Test data table schema validation.""" + +import pytest +from pydantic import ValidationError + +from calliope.attrdict import AttrDict +from calliope.schemas.data_table_schema import DataTable + +from .common.util import check_error_or_warning + +FULL_TABLE_CONFIG = """ +data: time_varying_df +rows: timesteps +columns: [comment, nodes, techs] +select: + nodes: [node1, node2] + techs: pv +drop: comment +add_dims: + parameters: something + costs: monetary +rename_dims: + location: nodes +template: some_template +""" + + +@pytest.mark.parametrize( + "data_table", + [{"rows": "timesteps"}, {"rows": "timesteps", "columns": ["techs", "nodes"]}], +) +def test_path_not_provided(data_table): + """Not providing the path should result in a failure.""" + with pytest.raises(ValidationError): + DataTable(**data_table) + + +@pytest.mark.parametrize("data_table", [{"data": "foo"}]) +def test_incomplete_column_or_row(data_table): + """Not providing either rows or columns is invalid.""" + with pytest.raises(ValidationError) as excinfo: + DataTable(**data_table) + assert check_error_or_warning( + excinfo, "Either row or columns must be defined for data_table." + ) + + +@pytest.mark.parametrize( + ("rows", "columns"), + [ + ("nodes", "nodes"), + (["nodes", "techs"], "techs"), + (["nodes", "techs", "params"], ["params", "costs"]), + ], +) +def test_row_column_overlap(rows, columns): + """Rows and columns must not share any similar values.""" + with pytest.raises(ValidationError) as excinfo: + DataTable(data="foobar", rows=rows, columns=columns) + assert check_error_or_warning(excinfo, "Rows and columns must not overlap.") + + +@pytest.mark.parametrize( + ("rows", "columns", "add_dims"), [("nodes", None, {"nodes": "MEX"})] +) +def test_add_dims_overlap(rows, columns, add_dims): + with pytest.raises(ValidationError) as excinfo: + DataTable(data="foo", rows=rows, columns=columns, add_dims=add_dims) + assert check_error_or_warning( + excinfo, "Added dimensions must not be in columns or rows." + ) + + +@pytest.mark.parametrize("data_table", [FULL_TABLE_CONFIG]) +def test_full_table_config(data_table): + """Test a fully fledged data table configuration.""" + DataTable(**AttrDict.from_yaml_string(data_table))