You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# TODO: Add a parameter to allow for "async for" iteration over data items, awaiting each
params[0].default!=Parameter.emptyor \
params[0].kindnotin [Parameter.POSITIONAL_ONLY,
Parameter.POSITIONAL_OR_KEYWORD]:
raiseValueError('Parameter "iterate_over_data_files" is set to True, ''but the job function has no arguments without default values. ''Such a first argument will be replaced with a corresponding ''Dataset arg to be iterated over')
def_create_dataset_cls(data_file_type: InputTypeT) ->type[IsDataset]:
ifis_model_subclass(data_file_type):
returnDataset[data_file_type] # type: ignore[return-value, valid-type]else:
returnDataset[Model[data_file_type]] # type: ignore[return-value, valid-type]# Classes# TODO: Data files -> data items throughout, e.g. iterate_over_data_items??# TODO: Add a parameter to allow for "async for" iteration over data items, awaiting each# call of the underlying asynchronous function instead of running all calls concurrently,# as is currently implemented. This would be useful for cases where the processing of# each data item needs to be done sequentially, e.g. when the processing involves# modifying a shared resource that cannot be accessed concurrently. This would still# allow asynchronous processing of the job as a whole, e.g. in context of other jobs.classIterateFuncJobBaseMixin:
def__init__( # noqa: C901self,
*,
iterate_over_data_files: bool=False,
output_dataset_param: str|None=None,
output_dataset_cls: type[IsDataset] |None=None,
):
self_as_plain_func_arg_job_base=cast(IsPlainFuncArgJobBase, self)
self._iterate_over_data_files=iterate_over_data_filesself._input_dataset_type: type|None=Noneself._output_dataset_param=output_dataset_paramself._output_dataset_cls=output_dataset_clsself._output_dataset_param_in_func: inspect.Parameter|None=Noneifnotisinstance(self.iterate_over_data_files, bool):
raiseValueError(
'Value of "iterate_over_data_files" parameter must be bool (True/False), 'f'not "{iterate_over_data_files}"')
ifnotiterate_over_data_files:
ifoutput_dataset_paramisnotNone:
raiseValueError('Output dataset parameter can only be set when ''"iterate_over_data_files" is True')
ifoutput_dataset_clsisnotNone:
raiseValueError(
'Output dataset class can only be set when "iterate_over_data_files" is True')
ifiterate_over_data_files:
job_func=self_as_plain_func_arg_job_base._job_funcifjob_func.__name__!='_omnipy_iterate_func':
_check_job_func_parameters(job_func)
self._generate_new_signature_for_iteration(job_func)
def_sync_iterate_over_data_files_decorator(call_func: Callable):
def_omnipy_iterate_func(
dataset: InputDatasetT,
*args: object,
The text was updated successfully, but these errors were encountered:
call of the underlying asynchronous function instead of running all calls concurrently,
as is currently implemented. This would be useful for cases where the processing of
each data item needs to be done sequentially, e.g. when the processing involves
modifying a shared resource that cannot be accessed concurrently. This would still
allow asynchronous processing of the job as a whole, e.g. in context of other jobs.
omnipy/src/omnipy/compute/mixins/iterate.py
Line 45 in ae3ed8d
The text was updated successfully, but these errors were encountered: