From 160f857302431d11a781f2846ccdfc3e31865690 Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 16:48:03 -0400 Subject: [PATCH 01/23] fix(io): increase default compression level for posterior samples Signed-off-by: Cameron Smith --- src/pyrovelocity/io/compressedpickle.py | 48 +++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/src/pyrovelocity/io/compressedpickle.py b/src/pyrovelocity/io/compressedpickle.py index e36eb671d..2767408e5 100644 --- a/src/pyrovelocity/io/compressedpickle.py +++ b/src/pyrovelocity/io/compressedpickle.py @@ -1,11 +1,17 @@ +import os import pickle from os import PathLike from pathlib import Path import numpy as np -import zstandard as zstd +from beartype import beartype from beartype.typing import Any, Dict from sparse import COO +from zstandard import ( + ZstdCompressionParameters, + ZstdCompressor, + ZstdDecompressor, +) from pyrovelocity.io.sparsity import densify_arrays, sparsify_arrays from pyrovelocity.logging import configure_logging @@ -15,6 +21,35 @@ logger = configure_logging(__name__) +@beartype +def get_cpu_count() -> int: + """ + Safely determine the number of CPUs in the system. + Falls back to a default value if it can't be determined. + """ + try: + return os.cpu_count() or 1 + except NotImplementedError: + return 1 + + +@beartype +def get_optimal_thread_count(cpu_count: int) -> int: + """ + Determine the optimal number of threads based on CPU count. + """ + if cpu_count <= 2: + return cpu_count + elif cpu_count <= 8: + return cpu_count - 1 + else: + return cpu_count - 2 + + +CPU_COUNT = get_cpu_count() +COMPRESSION_THREADS = get_optimal_thread_count(CPU_COUNT) + + # TODO: Handle sparsification when values are not exclusively arrays class CompressedPickle: """ @@ -67,8 +102,15 @@ def save( """ ) + compression_params = ZstdCompressionParameters( + compression_level=9, + threads=COMPRESSION_THREADS, + ) + with file_path.open("wb") as f: - compression_context = zstd.ZstdCompressor(level=3) + compression_context = ZstdCompressor( + compression_params=compression_params + ) with compression_context.stream_writer(f) as compressor: pickle.dump(obj, compressor) @@ -99,7 +141,7 @@ def load( True """ with open(file_path, "rb") as f: - decompression_context = zstd.ZstdDecompressor() + decompression_context = ZstdDecompressor() with decompression_context.stream_reader(f) as decompressor: obj = pickle.load(decompressor) From e20f2ea2f7363d5d18215e92a492946cd89750e9 Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 16:59:50 -0400 Subject: [PATCH 02/23] nit(io): update larry single lineage docstrings Signed-off-by: Cameron Smith --- src/pyrovelocity/io/datasets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pyrovelocity/io/datasets.py b/src/pyrovelocity/io/datasets.py index f4520b6ad..887246b3c 100644 --- a/src/pyrovelocity/io/datasets.py +++ b/src/pyrovelocity/io/datasets.py @@ -84,7 +84,7 @@ def larry_neu( In vitro hematopoiesis LARRY dataset. Subset of Data from `Weinreb et al. (2020) ' - consisting of unipotent neutrophils. + consisting of unipotent neutrophil precursors and neutrophils. https://figshare.com/ndownloader/files/37028575 @@ -104,7 +104,7 @@ def larry_mono( """ In vitro hematopoiesis LARRY dataset Subset of Data from `Weinreb et al. (2020) ' - consisting of unipotent monocytes. + consisting of unipotent monocyte precursors and monocytes. https://figshare.com/ndownloader/files/37028572 From 055d1e09ed53eb9f3c09eca0fe6b4f5654e61970 Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 17:05:20 -0400 Subject: [PATCH 03/23] feat(workflows): add larry neutrophil precursor configuration Signed-off-by: Cameron Smith --- .../workflows/main_configuration.py | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/pyrovelocity/workflows/main_configuration.py b/src/pyrovelocity/workflows/main_configuration.py index d535e337a..6a4f86b51 100644 --- a/src/pyrovelocity/workflows/main_configuration.py +++ b/src/pyrovelocity/workflows/main_configuration.py @@ -383,6 +383,47 @@ class CombinedMetricsOutputs(DataClassJSONMixin): summarizing_resources_limits=default_resource_limits, ) +larry_neu_dataset_args = DownloadDatasetInterface( + data_set_name="larry_neu", +) +larry_neu_preprocess_data_args = PreprocessDataInterface( + data_set_name=f"{larry_neu_dataset_args.data_set_name}", + adata=f"{larry_neu_dataset_args.data_external_path}/{larry_neu_dataset_args.data_set_name}.h5ad", + use_obs_subset=SUBSET_OBS, + use_vars_subset=SUBSET_VARS, + cell_state="state_info", + vector_field_basis="emb", +) +larry_neu_train_model1_args = PyroVelocityTrainInterface( + adata=f"{larry_neu_preprocess_data_args.data_processed_path}/{larry_neu_dataset_args.data_set_name}_processed.h5ad", + data_set_name=f"{larry_neu_dataset_args.data_set_name}", + model_identifier="model1", + guide_type="auto_t0_constraint", + offset=False, + max_epochs=MAX_EPOCHS, +) +larry_neu_train_model2_args = PyroVelocityTrainInterface( + adata=f"{larry_neu_preprocess_data_args.data_processed_path}/{larry_neu_dataset_args.data_set_name}_processed.h5ad", + data_set_name=f"{larry_neu_dataset_args.data_set_name}", + model_identifier="model2", + max_epochs=MAX_EPOCHS, +) +larry_neu_postprocess_configuration = PostprocessConfiguration( + number_posterior_samples=NUMBER_POSTERIOR_SAMPLES, +) +larry_neu_configuration = WorkflowConfiguration( + download_dataset=larry_neu_dataset_args, + preprocess_data=larry_neu_preprocess_data_args, + training_configuration_1=larry_neu_train_model1_args, + training_configuration_2=larry_neu_train_model2_args, + postprocess_configuration=larry_neu_postprocess_configuration, + training_resources_requests=default_training_resource_requests, + training_resources_limits=default_training_resource_limits, + postprocessing_resources_requests=medium_resource_requests, + postprocessing_resources_limits=medium_resource_limits, + summarizing_resources_requests=default_resource_requests, + summarizing_resources_limits=default_resource_limits, +) larry_dataset_args = DownloadDatasetInterface( data_set_name="larry", From 441d9e9984ab94b281c1331c36e104a82e2c8064 Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 17:13:18 -0400 Subject: [PATCH 04/23] feat(workflows): enable execution for larry neutrophil precursor data Signed-off-by: Cameron Smith --- src/pyrovelocity/workflows/main_workflow.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/pyrovelocity/workflows/main_workflow.py b/src/pyrovelocity/workflows/main_workflow.py index fcc3c818d..a6a062f82 100644 --- a/src/pyrovelocity/workflows/main_workflow.py +++ b/src/pyrovelocity/workflows/main_workflow.py @@ -49,6 +49,7 @@ default_training_resource_limits, default_training_resource_requests, larry_configuration, + larry_neu_configuration, pancreas_configuration, pbmc68k_configuration, pons_configuration, @@ -568,6 +569,7 @@ def training_workflow( pbmc68k_configuration: WorkflowConfiguration = pbmc68k_configuration, pons_configuration: WorkflowConfiguration = pons_configuration, larry_configuration: WorkflowConfiguration = larry_configuration, + larry_neu_configuration: WorkflowConfiguration = larry_neu_configuration, ) -> list[list[SummarizeOutputs]]: """ Apply the primary workflow to a collection of configurations. @@ -584,6 +586,7 @@ def training_workflow( # (pbmc68k_configuration, "pbmc68k"), # (pons_configuration, "pons"), # (larry_configuration, "larry"), + (larry_neu_configuration, "larry_neu"), ] for config, _ in configurations: From 560e31f9d6da0db03be3defd89b88e754e82789b Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 19:49:53 -0400 Subject: [PATCH 05/23] fix(io): write data sets modified on download Signed-off-by: Cameron Smith --- src/pyrovelocity/io/datasets.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/pyrovelocity/io/datasets.py b/src/pyrovelocity/io/datasets.py index 887246b3c..39418a254 100644 --- a/src/pyrovelocity/io/datasets.py +++ b/src/pyrovelocity/io/datasets.py @@ -94,6 +94,7 @@ def larry_neu( url = "https://figshare.com/ndownloader/files/37028575" adata = sc.read(file_path, backup_url=url, sparse=True, cache=True) adata = adata[adata.obs.state_info != "Centroid", :] + adata.write(file_path) return adata @@ -114,6 +115,7 @@ def larry_mono( url = "https://figshare.com/ndownloader/files/37028572" adata = sc.read(file_path, backup_url=url, sparse=True, cache=True) adata = adata[adata.obs.state_info != "Centroid", :] + adata.write(file_path) return adata @@ -187,6 +189,7 @@ def larry_tips( adata = larry() adata = adata[adata.obs["time_info"] == 6.0] adata = adata[adata.obs["state_info"] != "Undifferentiated"] + adata.write(file_path) return adata @@ -205,6 +208,7 @@ def larry_multilineage( adata_larry_mono = larry_mono() adata_larry_neu = larry_neu() adata = adata_larry_mono.concatenate(adata_larry_neu) + adata.write(file_path) return adata @@ -273,4 +277,5 @@ def pbmc68k( adata = scv.datasets.pbmc68k(file_path=file_path) scv.pp.remove_duplicate_cells(adata) adata.obsm["X_tsne"][:, 0] *= -1 + adata.write(file_path) return adata From 168af3e08b7d3b158bbbc17511b38c1df0c91bf1 Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 19:56:06 -0400 Subject: [PATCH 06/23] feat(workflows): add larry monocyte precursor configuration Signed-off-by: Cameron Smith --- .../workflows/main_configuration.py | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/pyrovelocity/workflows/main_configuration.py b/src/pyrovelocity/workflows/main_configuration.py index 6a4f86b51..2f202a2bf 100644 --- a/src/pyrovelocity/workflows/main_configuration.py +++ b/src/pyrovelocity/workflows/main_configuration.py @@ -425,6 +425,48 @@ class CombinedMetricsOutputs(DataClassJSONMixin): summarizing_resources_limits=default_resource_limits, ) +larry_mono_dataset_args = DownloadDatasetInterface( + data_set_name="larry_mono", +) +larry_mono_preprocess_data_args = PreprocessDataInterface( + data_set_name=f"{larry_mono_dataset_args.data_set_name}", + adata=f"{larry_mono_dataset_args.data_external_path}/{larry_mono_dataset_args.data_set_name}.h5ad", + use_obs_subset=SUBSET_OBS, + use_vars_subset=SUBSET_VARS, + cell_state="state_info", + vector_field_basis="emb", +) +larry_mono_train_model1_args = PyroVelocityTrainInterface( + adata=f"{larry_mono_preprocess_data_args.data_processed_path}/{larry_mono_dataset_args.data_set_name}_processed.h5ad", + data_set_name=f"{larry_mono_dataset_args.data_set_name}", + model_identifier="model1", + guide_type="auto_t0_constraint", + offset=False, + max_epochs=MAX_EPOCHS, +) +larry_mono_train_model2_args = PyroVelocityTrainInterface( + adata=f"{larry_mono_preprocess_data_args.data_processed_path}/{larry_mono_dataset_args.data_set_name}_processed.h5ad", + data_set_name=f"{larry_mono_dataset_args.data_set_name}", + model_identifier="model2", + max_epochs=MAX_EPOCHS, +) +larry_mono_postprocess_configuration = PostprocessConfiguration( + number_posterior_samples=NUMBER_POSTERIOR_SAMPLES, +) +larry_mono_configuration = WorkflowConfiguration( + download_dataset=larry_mono_dataset_args, + preprocess_data=larry_mono_preprocess_data_args, + training_configuration_1=larry_mono_train_model1_args, + training_configuration_2=larry_mono_train_model2_args, + postprocess_configuration=larry_mono_postprocess_configuration, + training_resources_requests=default_training_resource_requests, + training_resources_limits=default_training_resource_limits, + postprocessing_resources_requests=medium_resource_requests, + postprocessing_resources_limits=medium_resource_limits, + summarizing_resources_requests=default_resource_requests, + summarizing_resources_limits=default_resource_limits, +) + larry_dataset_args = DownloadDatasetInterface( data_set_name="larry", ) From 8464429942771de1393bcb43546a60e49ba5ca07 Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 20:08:34 -0400 Subject: [PATCH 07/23] feat(workflows): enable execution for larry monocyte precursor data Signed-off-by: Cameron Smith --- src/pyrovelocity/workflows/main_workflow.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/pyrovelocity/workflows/main_workflow.py b/src/pyrovelocity/workflows/main_workflow.py index a6a062f82..72803912c 100644 --- a/src/pyrovelocity/workflows/main_workflow.py +++ b/src/pyrovelocity/workflows/main_workflow.py @@ -49,6 +49,7 @@ default_training_resource_limits, default_training_resource_requests, larry_configuration, + larry_mono_configuration, larry_neu_configuration, pancreas_configuration, pbmc68k_configuration, @@ -570,6 +571,7 @@ def training_workflow( pons_configuration: WorkflowConfiguration = pons_configuration, larry_configuration: WorkflowConfiguration = larry_configuration, larry_neu_configuration: WorkflowConfiguration = larry_neu_configuration, + larry_mono_configuration: WorkflowConfiguration = larry_mono_configuration, ) -> list[list[SummarizeOutputs]]: """ Apply the primary workflow to a collection of configurations. @@ -587,6 +589,7 @@ def training_workflow( # (pons_configuration, "pons"), # (larry_configuration, "larry"), (larry_neu_configuration, "larry_neu"), + (larry_mono_configuration, "larry_mono"), ] for config, _ in configurations: From 145a5ec8c6edf52c48fad308458669eaadc76261 Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 20:24:00 -0400 Subject: [PATCH 08/23] refactor(workflows): move all constants to constants Signed-off-by: Cameron Smith --- src/pyrovelocity/workflows/constants.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/pyrovelocity/workflows/constants.py b/src/pyrovelocity/workflows/constants.py index 92c298cc4..73e68b855 100644 --- a/src/pyrovelocity/workflows/constants.py +++ b/src/pyrovelocity/workflows/constants.py @@ -3,9 +3,23 @@ from dulwich.repo import NotGitRepository, Repo from pyrovelocity.logging import configure_logging +from pyrovelocity.utils import str_to_bool logger = configure_logging("pyrovelocity.workflows.constants") +PYROVELOCITY_TESTING_FLAG = str_to_bool( + os.getenv("PYROVELOCITY_TESTING_FLAG", "False") +) +PYROVELOCITY_DATA_SUBSET = str_to_bool( + os.getenv("PYROVELOCITY_DATA_SUBSET", "False") +) +PYROVELOCITY_UPLOAD_RESULTS = str_to_bool( + os.getenv("PYROVELOCITY_UPLOAD_RESULTS", "True") +) +PYROVELOCITY_OVERWRITE_CACHE = str_to_bool( + os.getenv("PYROVELOCITY_OVERWRITE_CACHE", "False") +) + def get_git_repo_root(path="."): try: From 610d7cce5806425eea6265914bfc5ba4f2d875fc Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 20:24:34 -0400 Subject: [PATCH 09/23] fix(workflows): use overwrite cache variable to set default value Signed-off-by: Cameron Smith --- src/pyrovelocity/workflows/cli/execute.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pyrovelocity/workflows/cli/execute.py b/src/pyrovelocity/workflows/cli/execute.py index 40e76f5ea..a8671cab4 100644 --- a/src/pyrovelocity/workflows/cli/execute.py +++ b/src/pyrovelocity/workflows/cli/execute.py @@ -45,6 +45,7 @@ ) from pyrovelocity.workflows.constants import ( LOCAL_CLUSTER_CONFIG_FILE_PATH, + PYROVELOCITY_OVERWRITE_CACHE, REMOTE_CLUSTER_CONFIG_FILE_PATH, ) @@ -83,7 +84,7 @@ class ExecutionContext(DataClassJsonMixin): project: str = "pyrovelocity" domain: str = "development" wait: bool = True - overwrite_cache: bool = False + overwrite_cache: bool = PYROVELOCITY_OVERWRITE_CACHE def handle_local_execution(exec_mode, execution_context, entity, entity_config): From 074133007319c7f06ae17c610c395a3c93f26a2e Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 20:26:25 -0400 Subject: [PATCH 10/23] fix(workflows): import contants in configuration Signed-off-by: Cameron Smith --- src/pyrovelocity/workflows/main_configuration.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/pyrovelocity/workflows/main_configuration.py b/src/pyrovelocity/workflows/main_configuration.py index 2f202a2bf..84c6517e7 100644 --- a/src/pyrovelocity/workflows/main_configuration.py +++ b/src/pyrovelocity/workflows/main_configuration.py @@ -12,7 +12,10 @@ PyroVelocityTrainInterface, ) from pyrovelocity.logging import configure_logging -from pyrovelocity.utils import str_to_bool +from pyrovelocity.workflows.constants import ( + PYROVELOCITY_TESTING_FLAG, + PYROVELOCITY_UPLOAD_RESULTS, +) __all__ = [ "ResourcesJSON", @@ -28,17 +31,6 @@ logger = configure_logging(__name__) -PYROVELOCITY_TESTING_FLAG = str_to_bool( - os.getenv("PYROVELOCITY_TESTING_FLAG", "False") -) -PYROVELOCITY_DATA_SUBSET = str_to_bool( - os.getenv("PYROVELOCITY_DATA_SUBSET", "False") -) -PYROVELOCITY_UPLOAD_RESULTS = str_to_bool( - os.getenv("PYROVELOCITY_UPLOAD_RESULTS", "True") -) - - if PYROVELOCITY_TESTING_FLAG: NUMBER_POSTERIOR_SAMPLES: int = 4 MAX_EPOCHS: int = 300 From 121381c7fda5f2791bfd2bd2c9c80c4faaa968ca Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 20:26:56 -0400 Subject: [PATCH 11/23] fix(workflows): import contants in workflow Signed-off-by: Cameron Smith --- src/pyrovelocity/workflows/main_workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pyrovelocity/workflows/main_workflow.py b/src/pyrovelocity/workflows/main_workflow.py index 72803912c..21de1123c 100644 --- a/src/pyrovelocity/workflows/main_workflow.py +++ b/src/pyrovelocity/workflows/main_workflow.py @@ -34,8 +34,8 @@ from pyrovelocity.tasks.summarize import summarize_dataset from pyrovelocity.tasks.train import train_dataset from pyrovelocity.utils import str_to_bool +from pyrovelocity.workflows.constants import PYROVELOCITY_DATA_SUBSET from pyrovelocity.workflows.main_configuration import ( - PYROVELOCITY_DATA_SUBSET, CombinedMetricsOutputs, PostprocessConfiguration, PostprocessOutputs, From 0232207d604cf650f2925044e0672b3ff36f5dab Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 20:27:20 -0400 Subject: [PATCH 12/23] feat(workflows): add larry multilineage configuration Signed-off-by: Cameron Smith --- .../workflows/main_configuration.py | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/pyrovelocity/workflows/main_configuration.py b/src/pyrovelocity/workflows/main_configuration.py index 84c6517e7..36115bbfc 100644 --- a/src/pyrovelocity/workflows/main_configuration.py +++ b/src/pyrovelocity/workflows/main_configuration.py @@ -459,6 +459,48 @@ class CombinedMetricsOutputs(DataClassJSONMixin): summarizing_resources_limits=default_resource_limits, ) +larry_multilineage_dataset_args = DownloadDatasetInterface( + data_set_name="larry_multilineage", +) +larry_multilineage_preprocess_data_args = PreprocessDataInterface( + data_set_name=f"{larry_multilineage_dataset_args.data_set_name}", + adata=f"{larry_multilineage_dataset_args.data_external_path}/{larry_multilineage_dataset_args.data_set_name}.h5ad", + use_obs_subset=SUBSET_OBS, + use_vars_subset=SUBSET_VARS, + cell_state="state_info", + vector_field_basis="emb", +) +larry_multilineage_train_model1_args = PyroVelocityTrainInterface( + adata=f"{larry_multilineage_preprocess_data_args.data_processed_path}/{larry_multilineage_dataset_args.data_set_name}_processed.h5ad", + data_set_name=f"{larry_multilineage_dataset_args.data_set_name}", + model_identifier="model1", + guide_type="auto_t0_constraint", + offset=False, + max_epochs=MAX_EPOCHS, +) +larry_multilineage_train_model2_args = PyroVelocityTrainInterface( + adata=f"{larry_multilineage_preprocess_data_args.data_processed_path}/{larry_multilineage_dataset_args.data_set_name}_processed.h5ad", + data_set_name=f"{larry_multilineage_dataset_args.data_set_name}", + model_identifier="model2", + max_epochs=MAX_EPOCHS, +) +larry_multilineage_postprocess_configuration = PostprocessConfiguration( + number_posterior_samples=NUMBER_POSTERIOR_SAMPLES, +) +larry_multilineage_configuration = WorkflowConfiguration( + download_dataset=larry_multilineage_dataset_args, + preprocess_data=larry_multilineage_preprocess_data_args, + training_configuration_1=larry_multilineage_train_model1_args, + training_configuration_2=larry_multilineage_train_model2_args, + postprocess_configuration=larry_multilineage_postprocess_configuration, + training_resources_requests=default_training_resource_requests, + training_resources_limits=default_training_resource_limits, + postprocessing_resources_requests=medium_resource_requests, + postprocessing_resources_limits=medium_resource_limits, + summarizing_resources_requests=default_resource_requests, + summarizing_resources_limits=default_resource_limits, +) + larry_dataset_args = DownloadDatasetInterface( data_set_name="larry", ) From d0321ca83665c8b3c7d3e403f259f37e9ee29448 Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 20:27:43 -0400 Subject: [PATCH 13/23] feat(workflows): enable execution for larry multilineage data Signed-off-by: Cameron Smith --- src/pyrovelocity/workflows/main_workflow.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/pyrovelocity/workflows/main_workflow.py b/src/pyrovelocity/workflows/main_workflow.py index 21de1123c..d34826162 100644 --- a/src/pyrovelocity/workflows/main_workflow.py +++ b/src/pyrovelocity/workflows/main_workflow.py @@ -50,6 +50,7 @@ default_training_resource_requests, larry_configuration, larry_mono_configuration, + larry_multilineage_configuration, larry_neu_configuration, pancreas_configuration, pbmc68k_configuration, @@ -572,6 +573,7 @@ def training_workflow( larry_configuration: WorkflowConfiguration = larry_configuration, larry_neu_configuration: WorkflowConfiguration = larry_neu_configuration, larry_mono_configuration: WorkflowConfiguration = larry_mono_configuration, + larry_multilineage_configuration: WorkflowConfiguration = larry_multilineage_configuration, ) -> list[list[SummarizeOutputs]]: """ Apply the primary workflow to a collection of configurations. @@ -579,17 +581,18 @@ def training_workflow( """ results = [] configurations = [ - (simulated_configuration, "simulated"), + # (simulated_configuration, "simulated"), ] if not PYROVELOCITY_DATA_SUBSET: configurations += [ - (pancreas_configuration, "pancreas"), + # (pancreas_configuration, "pancreas"), # (pbmc68k_configuration, "pbmc68k"), # (pons_configuration, "pons"), # (larry_configuration, "larry"), (larry_neu_configuration, "larry_neu"), (larry_mono_configuration, "larry_mono"), + (larry_multilineage_configuration, "larry_multilineage"), ] for config, _ in configurations: From 7f0883b369e320f390335b6e6be9846b03813b3d Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 20:30:04 -0400 Subject: [PATCH 14/23] test(constants): enable cache overwrite Signed-off-by: Cameron Smith --- src/pyrovelocity/workflows/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pyrovelocity/workflows/constants.py b/src/pyrovelocity/workflows/constants.py index 73e68b855..58093d2a9 100644 --- a/src/pyrovelocity/workflows/constants.py +++ b/src/pyrovelocity/workflows/constants.py @@ -17,7 +17,7 @@ os.getenv("PYROVELOCITY_UPLOAD_RESULTS", "True") ) PYROVELOCITY_OVERWRITE_CACHE = str_to_bool( - os.getenv("PYROVELOCITY_OVERWRITE_CACHE", "False") + os.getenv("PYROVELOCITY_OVERWRITE_CACHE", "True") ) From 13a53134912d4bb60064f63e7a86b65d103566cc Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 20:33:05 -0400 Subject: [PATCH 15/23] fix(workflows): load dotenv in constants Signed-off-by: Cameron Smith --- src/pyrovelocity/workflows/constants.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/pyrovelocity/workflows/constants.py b/src/pyrovelocity/workflows/constants.py index 58093d2a9..0e9c192e2 100644 --- a/src/pyrovelocity/workflows/constants.py +++ b/src/pyrovelocity/workflows/constants.py @@ -1,5 +1,6 @@ import os +from dotenv import load_dotenv from dulwich.repo import NotGitRepository, Repo from pyrovelocity.logging import configure_logging @@ -7,6 +8,8 @@ logger = configure_logging("pyrovelocity.workflows.constants") +load_dotenv() + PYROVELOCITY_TESTING_FLAG = str_to_bool( os.getenv("PYROVELOCITY_TESTING_FLAG", "False") ) From d2ca2fc8a49cd5f8cad29f095bd1ae44be521c5a Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 20:40:05 -0400 Subject: [PATCH 16/23] fix(workflows): log env from constants Signed-off-by: Cameron Smith --- src/pyrovelocity/workflows/constants.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/pyrovelocity/workflows/constants.py b/src/pyrovelocity/workflows/constants.py index 0e9c192e2..022bf06b2 100644 --- a/src/pyrovelocity/workflows/constants.py +++ b/src/pyrovelocity/workflows/constants.py @@ -16,11 +16,22 @@ PYROVELOCITY_DATA_SUBSET = str_to_bool( os.getenv("PYROVELOCITY_DATA_SUBSET", "False") ) +PYROVELOCITY_OVERWRITE_CACHE = str_to_bool( + os.getenv("PYROVELOCITY_OVERWRITE_CACHE", "True") +) +PYROVELOCITY_CACHE_FLAG = str_to_bool( + os.getenv("PYROVELOCITY_CACHE_FLAG", "True") +) PYROVELOCITY_UPLOAD_RESULTS = str_to_bool( os.getenv("PYROVELOCITY_UPLOAD_RESULTS", "True") ) -PYROVELOCITY_OVERWRITE_CACHE = str_to_bool( - os.getenv("PYROVELOCITY_OVERWRITE_CACHE", "True") + +logger.info( + f"\nPYROVELOCITY_TESTING_FLAG: {PYROVELOCITY_TESTING_FLAG}\n" + f"PYROVELOCITY_DATA_SUBSET: {PYROVELOCITY_DATA_SUBSET}\n" + f"PYROVELOCITY_OVERWRITE_CACHE: {PYROVELOCITY_OVERWRITE_CACHE}\n" + f"PYROVELOCITY_CACHE_FLAG: {PYROVELOCITY_CACHE_FLAG}\n" + f"PYROVELOCITY_UPLOAD_RESULTS: {PYROVELOCITY_UPLOAD_RESULTS}\n\n" ) From 784f2760a10c2868ae676e1e0fe0d1daae4814e4 Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 20:40:38 -0400 Subject: [PATCH 17/23] fix(workflows): import cache flag from constants Signed-off-by: Cameron Smith --- src/pyrovelocity/workflows/main_workflow.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pyrovelocity/workflows/main_workflow.py b/src/pyrovelocity/workflows/main_workflow.py index d34826162..c4e4c614b 100644 --- a/src/pyrovelocity/workflows/main_workflow.py +++ b/src/pyrovelocity/workflows/main_workflow.py @@ -34,7 +34,10 @@ from pyrovelocity.tasks.summarize import summarize_dataset from pyrovelocity.tasks.train import train_dataset from pyrovelocity.utils import str_to_bool -from pyrovelocity.workflows.constants import PYROVELOCITY_DATA_SUBSET +from pyrovelocity.workflows.constants import ( + PYROVELOCITY_CACHE_FLAG, + PYROVELOCITY_DATA_SUBSET, +) from pyrovelocity.workflows.main_configuration import ( CombinedMetricsOutputs, PostprocessConfiguration, @@ -79,9 +82,6 @@ SUMMARIZE_CACHE_VERSION = f"{CACHE_VERSION}.0" UPLOAD_CACHE_VERSION = f"{CACHE_VERSION}.0" COMBINE_METRICS_CACHE_VERSION = f"{CACHE_VERSION}.0" -PYROVELOCITY_CACHE_FLAG = str_to_bool( - os.getenv("PYROVELOCITY_CACHE_FLAG", "True") -) L4 = GPUAccelerator("nvidia-l4") ACCELERATOR_TYPE: GPUAccelerator = T4 From 32e400f2d8b7853cd285ca7ce1d30203efc959ef Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 21:16:36 -0400 Subject: [PATCH 18/23] fix(cid): set hydra outputs artifact name from set-variables outputs Signed-off-by: Cameron Smith --- .github/workflows/cid.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/cid.yaml b/.github/workflows/cid.yaml index 27292790e..4ce86c092 100644 --- a/.github/workflows/cid.yaml +++ b/.github/workflows/cid.yaml @@ -413,7 +413,7 @@ jobs: - name: Create config tarball id: save-hydra-outputs run: | - TAR_FILENAME="hydra_outputs_${GITHUB_SHA_SHORT}.tar.gz" + TAR_FILENAME="hydra_outputs_${{ needs.set-variables.outputs.checkout_rev }}.tar.gz" tar -czf $TAR_FILENAME ./outputs/ tar -tzf $TAR_FILENAME @@ -422,7 +422,6 @@ jobs: uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4 with: name: hydra-outputs - path: ${{ env.HYDRA_OUTPUTS_TAR }} release: From e40270f98e142bc1383cb261d953757bb44f9198 Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 21:22:48 -0400 Subject: [PATCH 19/23] fix(workflows): log warnings when cluster config files not found Signed-off-by: Cameron Smith --- src/pyrovelocity/workflows/constants.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pyrovelocity/workflows/constants.py b/src/pyrovelocity/workflows/constants.py index 022bf06b2..76830c28f 100644 --- a/src/pyrovelocity/workflows/constants.py +++ b/src/pyrovelocity/workflows/constants.py @@ -66,7 +66,6 @@ def get_git_repo_root(path="."): ) logger.warning(remote_cluster_config_file_not_found_message) REMOTE_CLUSTER_CONFIG_FILE_PATH = LOCAL_CLUSTER_CONFIG_FILE_PATH - # raise FileNotFoundError(remote_cluster_config_file_not_found_message) if not os.path.isfile(LOCAL_CLUSTER_CONFIG_FILE_PATH): local_cluster_config_file_not_found_message = ( @@ -74,7 +73,7 @@ def get_git_repo_root(path="."): f"{LOCAL_CLUSTER_CONFIG_FILE_PATH}\n\n" f"Check that you have not deleted this file from the repository.\n\n" ) - raise FileNotFoundError(local_cluster_config_file_not_found_message) + logger.warning(local_cluster_config_file_not_found_message) logger.debug( f"Remote cluster config file path: {REMOTE_CLUSTER_CONFIG_FILE_PATH}" From 7e1dad771d1a457d5c2b19360816ca72f4df3ac5 Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 22:32:10 -0400 Subject: [PATCH 20/23] test(constants): disable cache overwrite Signed-off-by: Cameron Smith --- src/pyrovelocity/workflows/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pyrovelocity/workflows/constants.py b/src/pyrovelocity/workflows/constants.py index 76830c28f..da008e8cd 100644 --- a/src/pyrovelocity/workflows/constants.py +++ b/src/pyrovelocity/workflows/constants.py @@ -17,7 +17,7 @@ os.getenv("PYROVELOCITY_DATA_SUBSET", "False") ) PYROVELOCITY_OVERWRITE_CACHE = str_to_bool( - os.getenv("PYROVELOCITY_OVERWRITE_CACHE", "True") + os.getenv("PYROVELOCITY_OVERWRITE_CACHE", "False") ) PYROVELOCITY_CACHE_FLAG = str_to_bool( os.getenv("PYROVELOCITY_CACHE_FLAG", "True") From cc5918294ea6bf8500d51ced63f1f073b8662df5 Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 22:32:46 -0400 Subject: [PATCH 21/23] fix(workflows): reenable simulated and pancreas data Signed-off-by: Cameron Smith --- src/pyrovelocity/workflows/main_workflow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pyrovelocity/workflows/main_workflow.py b/src/pyrovelocity/workflows/main_workflow.py index c4e4c614b..f4a2d0bc8 100644 --- a/src/pyrovelocity/workflows/main_workflow.py +++ b/src/pyrovelocity/workflows/main_workflow.py @@ -581,12 +581,12 @@ def training_workflow( """ results = [] configurations = [ - # (simulated_configuration, "simulated"), + (simulated_configuration, "simulated"), ] if not PYROVELOCITY_DATA_SUBSET: configurations += [ - # (pancreas_configuration, "pancreas"), + (pancreas_configuration, "pancreas"), # (pbmc68k_configuration, "pbmc68k"), # (pons_configuration, "pons"), # (larry_configuration, "larry"), From 561ba5b051bb8f4f9f5cf5b8697e0ca1fe4a6979 Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 22:54:49 -0400 Subject: [PATCH 22/23] chore(workflows): upload and metrics cache versions `2024.8.15.1` Signed-off-by: Cameron Smith --- src/pyrovelocity/workflows/main_workflow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pyrovelocity/workflows/main_workflow.py b/src/pyrovelocity/workflows/main_workflow.py index f4a2d0bc8..6ebbc3e57 100644 --- a/src/pyrovelocity/workflows/main_workflow.py +++ b/src/pyrovelocity/workflows/main_workflow.py @@ -80,8 +80,8 @@ TRAIN_CACHE_VERSION = f"{CACHE_VERSION}.0" POSTPROCESS_CACHE_VERSION = f"{CACHE_VERSION}.0" SUMMARIZE_CACHE_VERSION = f"{CACHE_VERSION}.0" -UPLOAD_CACHE_VERSION = f"{CACHE_VERSION}.0" -COMBINE_METRICS_CACHE_VERSION = f"{CACHE_VERSION}.0" +UPLOAD_CACHE_VERSION = f"{CACHE_VERSION}.1" +COMBINE_METRICS_CACHE_VERSION = f"{CACHE_VERSION}.1" L4 = GPUAccelerator("nvidia-l4") ACCELERATOR_TYPE: GPUAccelerator = T4 From 8a79997170d72b27517be1b4982c9aeb9ed1d070 Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Fri, 16 Aug 2024 23:04:22 -0400 Subject: [PATCH 23/23] chore(version): `0.4.0b1` Signed-off-by: Cameron Smith --- MODULE.bazel | 2 +- Makefile | 4 ++-- conda/colab/construct.yaml | 2 +- containers/gpu.Dockerfile | 2 +- containers/pkg.Dockerfile | 2 +- docs/source/notebooks/pyrovelocity_colab_template.ipynb | 2 +- pyproject.toml | 2 +- scripts/conda | 8 ++++---- 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/MODULE.bazel b/MODULE.bazel index a4172b329..c99b93222 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -4,7 +4,7 @@ pyrovelocity MODULE module( name = "pyrovelocity", - version = "0.3.0", + version = "0.4.0b1", compatibility_level = 1, ) diff --git a/Makefile b/Makefile index cdb06bc97..f3a84b06f 100644 --- a/Makefile +++ b/Makefile @@ -913,8 +913,8 @@ approve-prs: ## Approve github pull requests from bots: PR_ENTRIES="2-5 10 12-18 fi; \ done -PREVIOUS_VERSION := 0.3.0b7 -NEXT_VERSION := 0.3.0 +PREVIOUS_VERSION := 0.3.0 +NEXT_VERSION := 0.4.0b1 VERSION_FILES := \ pyproject.toml \ conda/colab/construct.yaml \ diff --git a/conda/colab/construct.yaml b/conda/colab/construct.yaml index 772ff1e43..d2e70c14b 100644 --- a/conda/colab/construct.yaml +++ b/conda/colab/construct.yaml @@ -1,5 +1,5 @@ name: pyrovelocity-colab -version: 0.3.0 +version: 0.4.0b1 channels: - pytorch diff --git a/containers/gpu.Dockerfile b/containers/gpu.Dockerfile index b271571d5..ab7111c6d 100644 --- a/containers/gpu.Dockerfile +++ b/containers/gpu.Dockerfile @@ -77,7 +77,7 @@ COPY . /root # development RUN pip install --no-deps -e . # distribution -# RUN pip install pyrovelocity==0.3.0 +# RUN pip install pyrovelocity==0.4.0b1 ARG tag ENV FLYTE_INTERNAL_IMAGE $tag diff --git a/containers/pkg.Dockerfile b/containers/pkg.Dockerfile index cb7e071e2..c81f3c81d 100644 --- a/containers/pkg.Dockerfile +++ b/containers/pkg.Dockerfile @@ -34,7 +34,7 @@ COPY . /root # development RUN pip install --no-deps -e . # distribution -# RUN pip install pyrovelocity==0.3.0 +# RUN pip install pyrovelocity==0.4.0b1 ARG tag ENV FLYTE_INTERNAL_IMAGE $tag diff --git a/docs/source/notebooks/pyrovelocity_colab_template.ipynb b/docs/source/notebooks/pyrovelocity_colab_template.ipynb index cde05e50f..5d41f8678 100644 --- a/docs/source/notebooks/pyrovelocity_colab_template.ipynb +++ b/docs/source/notebooks/pyrovelocity_colab_template.ipynb @@ -70,7 +70,7 @@ } ], "source": [ - "pyrovelocity_version = \"0.3.0\"\n", + "pyrovelocity_version = \"0.4.0b1\"\n", "pyrovelocity_colab_script_url = (\n", " \"https://storage.googleapis.com/pyrovelocity/data/scripts/\"\n", " + f\"pyrovelocity-colab-{pyrovelocity_version}-Linux-x86_64.sh\"\n", diff --git a/pyproject.toml b/pyproject.toml index 2eb53ff42..520193669 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pyrovelocity" -version = "0.3.0" +version = "0.4.0b1" packages = [{ include = "pyrovelocity", from = "src" }] description = "A multivariate RNA Velocity model to estimate future cell states with uncertainty using probabilistic modeling with pyro." authors = ["pyrovelocity team"] diff --git a/scripts/conda b/scripts/conda index 42d65e655..1e769b676 100755 --- a/scripts/conda +++ b/scripts/conda @@ -3,7 +3,7 @@ set -euo pipefail PACKAGE_NAME="pyrovelocity" -PACKAGE_VERSION="0.3.0" +PACKAGE_VERSION="0.4.0b1" CONDA_BUILD_STRING="pyhff70e4c" CONDA_BUILD_NUMBER="0" # CONDA_CHANNEL_LABEL="pyrovelocity_dev" @@ -32,7 +32,7 @@ Example: ./conda \\ --name pyrovelocity \\ - --version 0.3.0.dev1 \\ + --version 0.4.0b1.dev1 \\ --build-string pyhff70e4c \\ --build-number 0 \\ --label pyrovelocity_dev @@ -67,9 +67,9 @@ PACKAGE_SPEC="conda-forge/label/\ $CONDA_CHANNEL_LABEL::\ $PACKAGE_NAME=$PACKAGE_VERSION=$CONDA_BUILD_STRING"_"$CONDA_BUILD_NUMBER" -BLUE="\033[0;34;1m" +BLUE="\0.4.0b1;34;1m" BOLD="\033[1m" -NO_COLOR="\033[0m" +NO_COLOR="\0.4.0b1m" if [ "$USE_COLOR" = false ]; then BLUE="" BOLD=""