Skip to content

Commit

Permalink
more documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
b8raoult committed Mar 28, 2024
1 parent 5ff91f4 commit 163055a
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 35 deletions.
59 changes: 35 additions & 24 deletions anemoi/datasets/data/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,23 +207,34 @@ def _open(a, zarr_root):


def _auto_adjust(datasets, kwargs):
"""Adjust the datasets for concatenation or joining based on parameters set to
'matching'."""

if kwargs.get("adjust") == "matching":
kwargs.pop("adjust")
for p in ("select", "frequency", "start", "end"):
kwargs[p] = "matching"
if "adjust" not in kwargs:
return datasets, kwargs

if kwargs.get("dates") == "matching":
kwargs.pop("dates")
for p in ("frequency", "start", "end"):
kwargs[p] = "matching"
adjust_list = kwargs.pop("adjust")
if not isinstance(adjust_list, (tuple, list)):
adjust_list = [adjust_list]

adjust = [{} for _ in datasets]
ALIASES = {
"all": ["select", "frequency", "start", "end"],
"dates": ["start", "end", "frequency"],
"variables": ["select"],
}

adjust_set = set()

for a in adjust_list:
adjust_set.update(ALIASES.get(a, [a]))

extra = set(adjust_set) - set(ALIASES["all"])
if extra:
raise ValueError(f"Invalid adjust keys: {extra}")

subset_kwargs = [{} for _ in datasets]

if "select" in adjust_set:
assert "select" not in kwargs, "Cannot use 'select' in adjust and kwargs"

if kwargs.get("select") == "matching":
kwargs.pop("select")
variables = None

for d in datasets:
Expand All @@ -237,30 +248,30 @@ def _auto_adjust(datasets, kwargs):

for i, d in enumerate(datasets):
if set(d.variables) != variables:
adjust[i]["select"] = sorted(variables)
subset_kwargs[i]["select"] = sorted(variables)

if kwargs.get("start") == "matching":
kwargs.pop("start")
if "start" in adjust_set:
assert "start" not in kwargs, "Cannot use 'start' in adjust and kwargs"
start = max(d.dates[0] for d in datasets).astype(object)
for i, d in enumerate(datasets):
if start != d.dates[0]:
adjust[i]["start"] = start
subset_kwargs[i]["start"] = start

if kwargs.get("end") == "matching":
kwargs.pop("end")
if "end" in adjust_set:
assert "end" not in kwargs, "Cannot use 'end' in adjust and kwargs"
end = min(d.dates[-1] for d in datasets).astype(object)
for i, d in enumerate(datasets):
if end != d.dates[-1]:
adjust[i]["end"] = end
subset_kwargs[i]["end"] = end

if kwargs.get("frequency") == "matching":
kwargs.pop("frequency")
if "frequency" in adjust_set:
assert "frequency" not in kwargs, "Cannot use 'frequency' in adjust and kwargs"
frequency = max(d.frequency for d in datasets)
for i, d in enumerate(datasets):
if d.frequency != frequency:
adjust[i]["frequency"] = frequency
subset_kwargs[i]["frequency"] = frequency

datasets = [d._subset(**adjust[i]) for i, d in enumerate(datasets)]
datasets = [d._subset(**subset_kwargs[i]) for i, d in enumerate(datasets)]

return datasets, kwargs

Expand Down
7 changes: 3 additions & 4 deletions docs/using/matching.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ the attributes of the input datasets must match, such as the list of
variables for `concat` or the `dates` and `frequency` for `join`.

You can let the package automatically adjust the attributes of the input
datasets using the `matching` value:
datasets using the `adjust` keyword:

.. literalinclude:: matching1_.py
:language: python
Expand All @@ -21,13 +21,12 @@ to:
.. literalinclude:: matching2_.py
:language: python

This use the common set of variables, use :ref:`select` with the
`matching` value:
To use the common set of variables, use:

.. literalinclude:: matching3_.py
:language: python

To match all the attributes, use the `adjust` keyword:
To match all the attributes:

.. literalinclude:: matching4_.py
:language: python
6 changes: 2 additions & 4 deletions docs/using/matching1_.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
ds = open_dataset(
join=[dataset1, dataset2, ...],
begin="matching",
end="matching",
frequency="matching",
join=[dataset1, dataset2],
adjust=["begin", "end", "frequency"],
)
2 changes: 1 addition & 1 deletion docs/using/matching2_.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
ds = open_dataset(join=[dataset1, dataset2, ...], dates="matching")
ds = open_dataset(join=[dataset1, dataset2], adjust=["dates"])
2 changes: 1 addition & 1 deletion docs/using/matching3_.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
ds = open_dataset(concat=[dataset1, dataset2, ...], select="matching")
ds = open_dataset(concat=[dataset1, dataset2], ajust="variables")
6 changes: 5 additions & 1 deletion docs/using/matching4_.py
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
ds = open_dataset(grids=[dataset1, dataset2], mode="cutout", adjust="matching")
ds = open_dataset(
grids=[dataset1, dataset2],
mode="cutout",
adjust="all",
)

0 comments on commit 163055a

Please sign in to comment.