Skip to content

Commit

Permalink
Merge pull request #52 from dafyddstephenson/better_input_dataset_has…
Browse files Browse the repository at this point in the history
…h_handling

Better input dataset hash handling
  • Loading branch information
dafyddstephenson authored Aug 29, 2024
2 parents 56ec0a2 + 251578d commit 1693c8c
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 19 deletions.
13 changes: 10 additions & 3 deletions cstar/base/input_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ class InputDataset(ABC):
The base model with which this input dataset is associated
source: DataSource
Describes the location and type of the source data
file_hash: str
file_hash: str, default None
The 256 bit SHA sum associated with the file for verifying downloads
exists_locally: bool, default None
True if the input dataset exists on the local machine, set by `check_exists_locally()` method if source is a URL
Expand All @@ -117,7 +117,7 @@ def __init__(
self,
base_model: "BaseModel",
source: DataSource,
file_hash: str,
file_hash: Optional[str] = None,
start_date: Optional[str | dt.datetime] = None,
end_date: Optional[str | dt.datetime] = None,
):
Expand All @@ -138,7 +138,14 @@ def __init__(
self.base_model: "BaseModel" = base_model

self.source: DataSource = source
self.file_hash: str = file_hash
self.file_hash: Optional[str] = file_hash

if (self.file_hash is None) and (self.source.location_type == "url"):
raise ValueError(
f"Cannot create InputDataset for \n {self.source.location}:\n "
+ "InputDataset.file_hash cannot be None if InputDataset.source.location_type is 'url'.\n"
+ "A file hash is required to verify files downloaded from remote sources."
)

self.exists_locally: Optional[bool] = None
self.local_path: Optional[str] = None
Expand Down
32 changes: 16 additions & 16 deletions cstar/case.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,8 +381,8 @@ def from_blueprint(
model_grid = [
ROMSModelGrid(
base_model=base_model,
source=DataSource(f["source"]),
file_hash=f["hash"],
source=DataSource(f.get("source")),
file_hash=f.get("hash", None),
)
for f in input_dataset_info["model_grid"]["files"]
]
Expand All @@ -397,10 +397,10 @@ def from_blueprint(
initial_conditions = [
ROMSInitialConditions(
base_model=base_model,
source=DataSource(f["source"]),
file_hash=f["hash"],
start_date=f["start_date"],
end_date=f["end_date"],
source=DataSource(f.get("source")),
file_hash=f.get("hash", None),
start_date=f.get("start_date", None),
end_date=f.get("end_date", None),
)
for f in input_dataset_info["initial_conditions"]["files"]
]
Expand All @@ -417,8 +417,8 @@ def from_blueprint(
tidal_forcing = [
ROMSTidalForcing(
base_model=base_model,
source=DataSource(f["source"]),
file_hash=f["hash"],
source=DataSource(f.get("source")),
file_hash=f.get("hash", None),
)
for f in input_dataset_info["tidal_forcing"]["files"]
]
Expand All @@ -435,10 +435,10 @@ def from_blueprint(
boundary_forcing = [
ROMSBoundaryForcing(
base_model=base_model,
source=DataSource(f["source"]),
file_hash=f["hash"],
start_date=f["start_date"],
end_date=f["end_date"],
source=DataSource(f.get("source")),
file_hash=f.get("hash", None),
start_date=f.get("start_date", None),
end_date=f.get("end_date", None),
)
for f in input_dataset_info["boundary_forcing"]["files"]
]
Expand All @@ -455,10 +455,10 @@ def from_blueprint(
surface_forcing = [
ROMSSurfaceForcing(
base_model=base_model,
source=DataSource(f["source"]),
file_hash=f["hash"],
start_date=f["start_date"],
end_date=f["end_date"],
source=DataSource(f.get("source")),
file_hash=f.get("hash", None),
start_date=f.get("start_date", None),
end_date=f.get("end_date", None),
)
for f in input_dataset_info["surface_forcing"]["files"]
]
Expand Down

0 comments on commit 1693c8c

Please sign in to comment.