Skip to content

Commit

Permalink
Add data table schema w/ tests
Browse files Browse the repository at this point in the history
  • Loading branch information
irm-codebase committed Nov 19, 2024
1 parent 56e1126 commit 46ae5a0
Show file tree
Hide file tree
Showing 4 changed files with 189 additions and 23 deletions.
25 changes: 3 additions & 22 deletions src/calliope/schemas/config_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,20 @@
# Licensed under the Apache 2.0 License (see LICENSE file).
"""Implements the Calliope configuration class."""

from collections.abc import Hashable
from datetime import datetime
from pathlib import Path
from typing import Annotated, Literal, Self, TypeVar
from typing import Literal, Self

import jsonref
from pydantic import AfterValidator, BaseModel, Field, model_validator
from pydantic_core import PydanticCustomError
from pydantic import BaseModel, Field, model_validator

from calliope.attrdict import AttrDict
from calliope.util import tools
from calliope.util.schema import UniqueList

MODES_T = Literal["plan", "operate", "spores"]
CONFIG_T = Literal["init", "build", "solve"]

# ==
# Taken from https://github.com/pydantic/pydantic-core/pull/820#issuecomment-1670475909
T = TypeVar("T", bound=Hashable)


def _validate_unique_list(v: list[T]) -> list[T]:
if len(v) != len(set(v)):
raise PydanticCustomError("unique_list", "List must be unique")
return v


UniqueList = Annotated[
list[T],
AfterValidator(_validate_unique_list),
Field(json_schema_extra={"uniqueItems": True}),
]
# ==


def hide_from_schema(to_hide: list[str]):
"""Hide fields from the generated schema.
Expand Down
84 changes: 84 additions & 0 deletions src/calliope/schemas/data_table_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""Implements the data table configuration class."""

from typing import Self

from pydantic import BaseModel, model_validator

from calliope.util.schema import AttrStr, UniqueList
from calliope.util.tools import listify

# Get rid of pyright false negatives (see https://github.com/microsoft/pylance-release/issues/5457)
# pyright: reportInvalidTypeForm=false


class DataTable(BaseModel):
"""Data table validation model."""

data: str
"""
Absolute or relative filepath.
Relative paths are based on the model config file used to initialise the model.
"""
rows: None | AttrStr | UniqueList[AttrStr] = None
"""
Names of dimensions defined row-wise.
Each name should correspond to a column in your data that contains index items.
These columns must be to the left of the columns containing your data.
"""
columns: None | AttrStr | UniqueList[AttrStr] = None
"""
Names of dimensions defined column-wise.
Each name should correspond to a row in your data that contains index items.
These rows must be above the rows containing your data.
"""
select: None | dict[AttrStr, AttrStr | UniqueList[AttrStr]] = None
"""
Select one or more index item from a dimension.
Selection takes place before `drop` and `add_dims`, so you can select a single
value from a data dimension and then drop the dimension so it doesn't find its way
through to the final dataset.
"""
drop: None | AttrStr | UniqueList[AttrStr] = None
"""
Enables removing rows and/or columns that contain irrelevant data/metadata.
These could include comments on the source of the data, the data license, or the parameter units.
You can also drop a dimension and then reintroduce it in `add_dims`, but with different index items.
"""
add_dims: None | dict[AttrStr, AttrStr] = None
"""
Data dimensions to add after loading in the array.
These allow you to use the same file to assign values to different parameters/dimension index items
(e.g., setting `flow_cap_min` and `flow_cap_max` to the same value),
or to add a dimension which would otherwise be a column containing the same information in each row
(e.g., assigning the cost class to monetary for a file containing cost data).
"""
rename_dims: None | dict[AttrStr, AttrStr] = None
"""
Mapping between dimension names in the data table being loaded to equivalent Calliope dimension names.
For instance, the "time" column in the data table would need to be mapped to "timesteps": `{"time": "timesteps"}`.
"""
template: None | AttrStr = None
"""
Reference to a template from which to inherit common configuration options.
"""

@model_validator(mode="after")
def check_row_and_columns(self) -> Self:
"""Ensure users specify a valid data table shape."""
rows = set(listify(self.rows))
columns = set(listify(self.columns))
if not rows and not columns:
raise ValueError("Either row or columns must be defined for data_table.")
elif rows & columns:
raise ValueError("Rows and columns must not overlap.")

if self.add_dims:
if self.add_dims.keys() & (rows | columns):
raise ValueError("Added dimensions must not be in columns or rows.")

if self.rename_dims:
if set(self.rename_dims.values()) - (rows | columns):
raise ValueError(
"Renamed dimensions must be in either rows or columns."
)
return self
26 changes: 25 additions & 1 deletion src/calliope/util/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@
import importlib
import re
import sys
from collections.abc import Hashable
from copy import deepcopy
from typing import Literal
from typing import Annotated, Literal, TypeVar

import jsonschema
from pydantic import AfterValidator, Field, constr
from pydantic_core import PydanticCustomError

from calliope.attrdict import AttrDict
from calliope.exceptions import print_warnings_and_raise_errors
Expand All @@ -19,6 +22,27 @@
DATA_TABLE_SCHEMA = load_config("data_table_schema.yaml")
MATH_SCHEMA = load_config("math_schema.yaml")

# Regular string pattern for most calliope attributes
FIELD_REGEX = r"^[^_^\d][\w]*$"
AttrStr = constr(pattern=FIELD_REGEX)
# ==
# Taken from https://github.com/pydantic/pydantic-core/pull/820#issuecomment-1670475909
T = TypeVar("T", bound=Hashable)


def _validate_unique_list(v: list[T]) -> list[T]:
if len(v) != len(set(v)):
raise PydanticCustomError("unique_list", "List must be unique")
return v


UniqueList = Annotated[
list[T],
AfterValidator(_validate_unique_list),
Field(json_schema_extra={"uniqueItems": True}),
]
# ==


def reset():
"""Reset all module-level schema to the pre-defined dictionaries."""
Expand Down
77 changes: 77 additions & 0 deletions tests/test_data_table_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""Test data table schema validation."""

import pytest
from pydantic import ValidationError

from calliope.attrdict import AttrDict
from calliope.schemas.data_table_schema import DataTable

from .common.util import check_error_or_warning

FULL_TABLE_CONFIG = """
data: time_varying_df
rows: timesteps
columns: [comment, nodes, techs]
select:
nodes: [node1, node2]
techs: pv
drop: comment
add_dims:
parameters: something
costs: monetary
rename_dims:
location: nodes
template: some_template
"""


@pytest.mark.parametrize(
"data_table",
[{"rows": "timesteps"}, {"rows": "timesteps", "columns": ["techs", "nodes"]}],
)
def test_path_not_provided(data_table):
"""Not providing the path should result in a failure."""
with pytest.raises(ValidationError):
DataTable(**data_table)


@pytest.mark.parametrize("data_table", [{"data": "foo"}])
def test_incomplete_column_or_row(data_table):
"""Not providing either rows or columns is invalid."""
with pytest.raises(ValidationError) as excinfo:
DataTable(**data_table)
assert check_error_or_warning(
excinfo, "Either row or columns must be defined for data_table."
)


@pytest.mark.parametrize(
("rows", "columns"),
[
("nodes", "nodes"),
(["nodes", "techs"], "techs"),
(["nodes", "techs", "params"], ["params", "costs"]),
],
)
def test_row_column_overlap(rows, columns):
"""Rows and columns must not share any similar values."""
with pytest.raises(ValidationError) as excinfo:
DataTable(data="foobar", rows=rows, columns=columns)
assert check_error_or_warning(excinfo, "Rows and columns must not overlap.")


@pytest.mark.parametrize(
("rows", "columns", "add_dims"), [("nodes", None, {"nodes": "MEX"})]
)
def test_add_dims_overlap(rows, columns, add_dims):
with pytest.raises(ValidationError) as excinfo:
DataTable(data="foo", rows=rows, columns=columns, add_dims=add_dims)
assert check_error_or_warning(
excinfo, "Added dimensions must not be in columns or rows."
)


@pytest.mark.parametrize("data_table", [FULL_TABLE_CONFIG])
def test_full_table_config(data_table):
"""Test a fully fledged data table configuration."""
DataTable(**AttrDict.from_yaml_string(data_table))

0 comments on commit 46ae5a0

Please sign in to comment.