Skip to content

Commit

Permalink
Rest to commit
Browse files Browse the repository at this point in the history
  • Loading branch information
sveinugu committed Sep 2, 2024
1 parent dacc21d commit 08c7d2c
Show file tree
Hide file tree
Showing 21 changed files with 491 additions and 267 deletions.
4 changes: 3 additions & 1 deletion src/omnipy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@
from omnipy.modules.tables.tasks import (remove_columns,
rename_col_names,
transpose_columns_with_data_files)
from omnipy.util.contexts import print_exception

# if typing.TYPE_CHECKING:

Expand Down Expand Up @@ -288,5 +289,6 @@
'union_all',
'remove_columns',
'rename_col_names',
'transpose_columns_with_data_files'
'transpose_columns_with_data_files',
'print_exception',
]
2 changes: 1 addition & 1 deletion src/omnipy/compute/mixins/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def _generate_datetime_str(self):
def _all_job_output_file_paths_in_reverse_order_for_last_run(
persist_data_dir_path: Path, job_name: str) -> Generator[Path, None, None]:

sorted_date_dirs = iter(sorted(os.listdir(persist_data_dir_path)))
sorted_date_dirs = iter(reversed(sorted(os.listdir(persist_data_dir_path))))

try:
last_dir = next(sorted_date_dirs)
Expand Down
121 changes: 59 additions & 62 deletions src/omnipy/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,8 @@
get_args,
get_origin,
Iterator,
Optional,
Type,
TypeAlias,
TypeVar)
TypeAlias)
from urllib.parse import ParseResult, urlparse

import humanize
Expand All @@ -26,6 +24,7 @@
from pydantic.generics import GenericModel
from pydantic.main import ModelMetaclass
from pydantic.utils import lenient_isinstance, lenient_issubclass
from typing_extensions import TypeVar

from omnipy.data.data_class_creator import DataClassBase, DataClassBaseMeta
from omnipy.data.model import (_cleanup_name_qualname_and_module,
Expand All @@ -39,15 +38,13 @@
from omnipy.util.helpers import (get_calling_module_name,
get_default_if_typevar,
is_iterable,
is_optional,
is_strict_subclass,
is_union,
remove_annotated_plus_optional_if_present,
remove_forward_ref_notation)
from omnipy.util.tabulate import tabulate
from omnipy.util.web import download_file_to_memory

ModelT = TypeVar('ModelT', bound=Model)
ModelT = TypeVar('ModelT', bound=Model, default=Model[object])
_DatasetT = TypeVar('_DatasetT')

DATA_KEY = 'data'
Expand Down Expand Up @@ -113,45 +110,50 @@ class MyDataset(Dataset[MyToplevelDict]):
"""
class Config:
validate_assignment = True

# TODO: Use json serializer package from the pydantic config instead of 'json'

# json_loads = orjson.loads
# json_dumps = orjson_dumps

data: dict[str, ModelT] = Field(default={})

def __class_getitem__(cls, model: ModelT) -> ModelT:
def __class_getitem__(
cls,
params: type[ModelT] | tuple[type[ModelT]] | tuple[type[ModelT], Any] | TypeVar
| tuple[TypeVar, ...],
) -> 'type[Dataset[type[ModelT]]]':
# TODO: change model type to params: Type[Any] | tuple[Type[Any], ...]
# as in GenericModel.

# For now, only singular model types are allowed. These lines are needed for
# interoperability with pydantic GenericModel, which internally stores the model
# as a tuple:
if isinstance(model, tuple) and len(model) == 1:
model = model[0]
# These lines are needed for interoperability with pydantic GenericModel, which internally
# stores the model as a len(1) tuple
model = params[0] if isinstance(params, tuple) and len(params) == 1 else params

orig_model = model

model = cls._origmodel_if_annotated_optional(model)
args = get_args(model)
if cls == Dataset:
# model = cls._origmodel_if_annotated_optional(model)

if is_union(model) and len(args) == 2 and lenient_issubclass(args[1], DataWithParams):
model_to_check = args[0]
else:
model_to_check = model
args = get_args(model)

if not isinstance(model_to_check, TypeVar) \
and not lenient_issubclass(model_to_check, Model) \
and not is_strict_subclass(cls, Dataset):
raise TypeError('Invalid model: {}! '.format(model_to_check)
+ 'omnipy Dataset models must be a specialization of the omnipy '
'Model class.')
if is_union(model) and len(args) == 2 and lenient_issubclass(args[1], DataWithParams):
model_to_check = args[0]
else:
model_to_check = model

if cls == Dataset and not is_optional(model): # TODO: Handle MultiModelDataset??
model = Annotated[Optional[model], 'Fake Optional from Dataset']
if not isinstance(model_to_check, TypeVar) \
and not lenient_issubclass(model_to_check, Model):
raise TypeError('Invalid model: {}! '.format(model_to_check)
+ 'omnipy Dataset models must be a specialization of the omnipy '
'Model class.')

if isinstance(model, TypeVar):
model = get_default_if_typevar(model)
created_dataset = super().__class_getitem__(model)
else:
if isinstance(model, TypeVar):
params = get_default_if_typevar(model)

created_dataset = super().__class_getitem__(model)
created_dataset = super().__class_getitem__(params)

_cleanup_name_qualname_and_module(cls, created_dataset, orig_model)

Expand Down Expand Up @@ -185,7 +187,7 @@ def __init__( # noqa: C901
if value != Undefined:
assert data == Undefined, \
'Not allowed to combine positional and "data" keyword argument'
assert len(kwargs) == 0 or self.get_model_class().is_param_model(), \
assert len(kwargs) == 0, \
'Not allowed to combine positional and keyword arguments'
super_kwargs[DATA_KEY] = value

Expand All @@ -194,18 +196,18 @@ def __init__( # noqa: C901
"Not allowed to combine 'data' with other keyword arguments"
super_kwargs[DATA_KEY] = data

model_cls = self.get_model_class()
# model_cls = self.get_model_class()
if kwargs:
if DATA_KEY not in super_kwargs:
assert isinstance(model_cls, TypeVar) or not model_cls.is_param_model(), \
('If any keyword arguments are defined, parametrized datasets require at least '
'one positional argument in the __init__ method (typically providing the data '
'in the form of a dict from name to content for each data file).')

# assert isinstance(model_cls, TypeVar) or not model_cls.is_param_model(), \
# ('If any keyword arguments are defined, parametrized datasets require at least '
# 'one positional argument in the __init__ method (typically providing the data '
# 'in the form of a dict from name to content for each data file).')
#
super_kwargs[DATA_KEY] = kwargs
kwargs = {}

if model_cls == ModelT:
if self.get_model_class() == ModelT:
self._raise_no_model_exception()

dataset_as_input = DATA_KEY in super_kwargs \
Expand All @@ -216,10 +218,10 @@ def __init__( # noqa: C901
self._init(super_kwargs, **kwargs)

try:
GenericModel.__init__(self, **super_kwargs)
super().__init__(**super_kwargs)
except ValidationError:
if dataset_as_input:
GenericModel.__init__(self)
super().__init__()
self.from_data(super_kwargs[DATA_KEY])
else:
raise
Expand Down Expand Up @@ -258,7 +260,8 @@ def get_model_class(cls) -> Type[Model]:
:return: The concrete Model class used for all data files in the dataset
"""
model_type = cls._get_data_field().type_
return cls._origmodel_if_annotated_optional(model_type)
# return cls._origmodel_if_annotated_optional(model_type)
return model_type

@classmethod
def _origmodel_if_annotated_optional(cls, model):
Expand All @@ -268,24 +271,18 @@ def _origmodel_if_annotated_optional(cls, model):
model = get_args(model)[0]
return model

# TODO: Update _raise_no_model_exception() text. Model is now a requirement
@staticmethod
def _raise_no_model_exception() -> None:
raise TypeError(
'Note: The Dataset class requires a concrete model to be specified as '
'Note: The Dataset class requires a Model class (or a subclass) to be specified as '
'a type hierarchy within brackets either directly, e.g.:\n\n'
'\tmodel = Dataset[list[int]]()\n\n'
'\tmodel = Dataset[Model[list[int]]]()\n\n'
'or indirectly in a subclass definition, e.g.:\n\n'
'\tclass MyNumberListDataset(Dataset[list[int]]): ...\n\n'
'In both cases, the use of the Model class or a subclass is encouraged if anything '
'other than the simplest cases, e.g.:\n\n'
'\tclass MyNumberListDataset(Dataset[Model[list[int]]]): ...\n\n'
'For anything other than the simplest cases, the definition of Model and Dataset '
'subclasses is encouraged , e.g.:\n\n'
'\tclass MyNumberListModel(Model[list[int]]): ...\n'
'\tclass MyDataset(Dataset[MyNumberListModel]): ...\n\n'
'Usage of Dataset without a type specification results in this exception. '
'Similar use of the Model class do not currently result in an exception, only '
'a warning message the first time this is done. However, this is just a '
'"poor man\'s exception" due to complex technicalities in that class. Please '
'explicitly specify types in both cases. ')
'\tclass MyDataset(Dataset[MyNumberListModel]): ...\n\n')

def _set_standard_field_description(self) -> None:
self.__fields__[DATA_KEY].field_info.description = self._get_standard_field_description()
Expand Down Expand Up @@ -587,7 +584,7 @@ def _table_repr(self) -> str:
((i,
k,
type(v).__name__,
v.__len__() if hasattr(v, '__len__') else 'N/A',
len(v) if hasattr(v, '__len__') else 'N/A',
humanize.naturalsize(objsize.get_deep_size(v)))
for i, (k, v) in enumerate(self.items())),
('#', 'Data file name', 'Type', 'Length', 'Size (in memory)'),
Expand All @@ -597,20 +594,20 @@ def _table_repr(self) -> str:
return ret


# TODO: Use json serializer package from the pydantic config instead of 'json'
ModelNewT = TypeVar('ModelNewT', bound=Model, default=Model[object])


class MultiModelDataset(Dataset[ModelT], Generic[ModelT]):
class MultiModelDataset(Dataset[ModelNewT], Generic[ModelNewT]):
"""
Variant of Dataset that allows custom models to be set on individual data files
Note that the general model still needs to hold for all data files, in addition to any
custom models.
"""

_custom_field_models: dict[str, ModelT] = PrivateAttr(default={})
_custom_field_models: dict[str, ModelNewT] = PrivateAttr(default={})

def set_model(self, data_file: str, model: ModelT) -> None:
def set_model(self, data_file: str, model: ModelNewT) -> None:
try:
self._custom_field_models[data_file] = model
if data_file in self.data:
Expand All @@ -621,7 +618,7 @@ def set_model(self, data_file: str, model: ModelT) -> None:
del self._custom_field_models[data_file]
raise

def get_model(self, data_file: str) -> ModelT:
def get_model(self, data_file: str) -> ModelNewT:
if data_file in self._custom_field_models:
return self._custom_field_models[data_file]
else:
Expand All @@ -635,7 +632,7 @@ def _validate(self, data_file: str) -> None:
data_obj = self._to_data_if_model(self.data[data_file])
parsed_data = self._to_data_if_model(model(data_obj))
self.data[data_file] = parsed_data
super()._validate(data_file) # validates all data according to ModelT
super()._validate(data_file) # validates all data according to ModelNewT

@staticmethod
def _to_data_if_model(data_obj: Any):
Expand All @@ -644,9 +641,9 @@ def _to_data_if_model(data_obj: Any):
return data_obj


_KwargValT = TypeVar('_KwargValT', bound=object)
_ParamModelT = TypeVar('_ParamModelT', bound=ParamModel)
_ListOfParamModelT = TypeVar('_ListOfParamModelT', bound=ListOfParamModel)
_KwargValT = TypeVar('_KwargValT', bound=object, default=object)
_ParamModelT = TypeVar('_ParamModelT', bound=ParamModel, default=ParamModel)
_ListOfParamModelT = TypeVar('_ListOfParamModelT', bound=ListOfParamModel, default=ListOfParamModel)

ParamModelSuperKwargsType: TypeAlias = \
dict[str, dict[str, _ParamModelT | DataWithParams[_ParamModelT, _KwargValT]]]
Expand Down
20 changes: 19 additions & 1 deletion src/omnipy/data/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,22 @@ def __init__(self, t: T) -> None:
raise ValueError()


class TypeVarStore1(TypeVarStore[T], Generic[T]):
...


class TypeVarStore2(TypeVarStore[T], Generic[T]):
...


class TypeVarStore3(TypeVarStore[T], Generic[T]):
...


class TypeVarStore4(TypeVarStore[T], Generic[T]):
...


class YesNoMaybe(IntEnum):
NO = 0
YES = 1
Expand All @@ -26,7 +42,7 @@ class MethodInfo(NamedTuple):
# (https://docs.python.org/3.10/reference/datamodel.html)
_SPECIAL_METHODS_INFO_DICT: dict[str, MethodInfo] = {
# 3.3.1. Basic customization ############################################
'__bool__': MethodInfo(state_changing=False, returns_same_type=YesNoMaybe.NO),
# '__bool__': MethodInfo(state_changing=False, returns_same_type=YesNoMaybe.NO),
# 3.3.7. Emulating container types ######################################
'__len__': MethodInfo(state_changing=False, returns_same_type=YesNoMaybe.NO),
'__length_hint__': MethodInfo(state_changing=False, returns_same_type=YesNoMaybe.NO),
Expand Down Expand Up @@ -98,6 +114,8 @@ class MethodInfo(NamedTuple):
'__trunc__': MethodInfo(state_changing=False, returns_same_type=YesNoMaybe.MAYBE),
'__floor__': MethodInfo(state_changing=False, returns_same_type=YesNoMaybe.MAYBE),
'__ceil__': MethodInfo(state_changing=False, returns_same_type=YesNoMaybe.MAYBE),
# - Hash and other standard methods ----------------------------------
'__hash__': MethodInfo(state_changing=False, returns_same_type=YesNoMaybe.NO),
}


Expand Down
Loading

0 comments on commit 08c7d2c

Please sign in to comment.