Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(workflows): add configuration for larry data subsets #630

Merged
merged 23 commits into from
Aug 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
160f857
fix(io): increase default compression level for posterior samples
cameronraysmith Aug 16, 2024
e20f2ea
nit(io): update larry single lineage docstrings
cameronraysmith Aug 16, 2024
055d1e0
feat(workflows): add larry neutrophil precursor configuration
cameronraysmith Aug 16, 2024
441d9e9
feat(workflows): enable execution for larry neutrophil precursor data
cameronraysmith Aug 16, 2024
560e31f
fix(io): write data sets modified on download
cameronraysmith Aug 16, 2024
168af3e
feat(workflows): add larry monocyte precursor configuration
cameronraysmith Aug 16, 2024
8464429
feat(workflows): enable execution for larry monocyte precursor data
cameronraysmith Aug 17, 2024
145a5ec
refactor(workflows): move all constants to constants
cameronraysmith Aug 17, 2024
610d7cc
fix(workflows): use overwrite cache variable to set default value
cameronraysmith Aug 17, 2024
0741330
fix(workflows): import contants in configuration
cameronraysmith Aug 17, 2024
121381c
fix(workflows): import contants in workflow
cameronraysmith Aug 17, 2024
0232207
feat(workflows): add larry multilineage configuration
cameronraysmith Aug 17, 2024
d0321ca
feat(workflows): enable execution for larry multilineage data
cameronraysmith Aug 17, 2024
7f0883b
test(constants): enable cache overwrite
cameronraysmith Aug 17, 2024
13a5313
fix(workflows): load dotenv in constants
cameronraysmith Aug 17, 2024
d2ca2fc
fix(workflows): log env from constants
cameronraysmith Aug 17, 2024
784f276
fix(workflows): import cache flag from constants
cameronraysmith Aug 17, 2024
32e400f
fix(cid): set hydra outputs artifact name from set-variables outputs
cameronraysmith Aug 17, 2024
e40270f
fix(workflows): log warnings when cluster config files not found
cameronraysmith Aug 17, 2024
7e1dad7
test(constants): disable cache overwrite
cameronraysmith Aug 17, 2024
cc59182
fix(workflows): reenable simulated and pancreas data
cameronraysmith Aug 17, 2024
561ba5b
chore(workflows): upload and metrics cache versions `2024.8.15.1`
cameronraysmith Aug 17, 2024
8a79997
chore(version): `0.4.0b1`
cameronraysmith Aug 17, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .github/workflows/cid.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ jobs:
- name: Create config tarball
id: save-hydra-outputs
run: |
TAR_FILENAME="hydra_outputs_${GITHUB_SHA_SHORT}.tar.gz"
TAR_FILENAME="hydra_outputs_${{ needs.set-variables.outputs.checkout_rev }}.tar.gz"

tar -czf $TAR_FILENAME ./outputs/
tar -tzf $TAR_FILENAME
Expand All @@ -422,7 +422,6 @@ jobs:
uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4
with:
name: hydra-outputs

path: ${{ env.HYDRA_OUTPUTS_TAR }}

release:
Expand Down
2 changes: 1 addition & 1 deletion MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ pyrovelocity MODULE

module(
name = "pyrovelocity",
version = "0.3.0",
version = "0.4.0b1",
compatibility_level = 1,
)

Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -913,8 +913,8 @@ approve-prs: ## Approve github pull requests from bots: PR_ENTRIES="2-5 10 12-18
fi; \
done

PREVIOUS_VERSION := 0.3.0b7
NEXT_VERSION := 0.3.0
PREVIOUS_VERSION := 0.3.0
NEXT_VERSION := 0.4.0b1
VERSION_FILES := \
pyproject.toml \
conda/colab/construct.yaml \
Expand Down
2 changes: 1 addition & 1 deletion conda/colab/construct.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: pyrovelocity-colab
version: 0.3.0
version: 0.4.0b1

channels:
- pytorch
Expand Down
2 changes: 1 addition & 1 deletion containers/gpu.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ COPY . /root
# development
RUN pip install --no-deps -e .
# distribution
# RUN pip install pyrovelocity==0.3.0
# RUN pip install pyrovelocity==0.4.0b1

ARG tag
ENV FLYTE_INTERNAL_IMAGE $tag
2 changes: 1 addition & 1 deletion containers/pkg.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ COPY . /root
# development
RUN pip install --no-deps -e .
# distribution
# RUN pip install pyrovelocity==0.3.0
# RUN pip install pyrovelocity==0.4.0b1

ARG tag
ENV FLYTE_INTERNAL_IMAGE $tag
2 changes: 1 addition & 1 deletion docs/source/notebooks/pyrovelocity_colab_template.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
}
],
"source": [
"pyrovelocity_version = \"0.3.0\"\n",
"pyrovelocity_version = \"0.4.0b1\"\n",
"pyrovelocity_colab_script_url = (\n",
" \"https://storage.googleapis.com/pyrovelocity/data/scripts/\"\n",
" + f\"pyrovelocity-colab-{pyrovelocity_version}-Linux-x86_64.sh\"\n",
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pyrovelocity"
version = "0.3.0"
version = "0.4.0b1"
packages = [{ include = "pyrovelocity", from = "src" }]
description = "A multivariate RNA Velocity model to estimate future cell states with uncertainty using probabilistic modeling with pyro."
authors = ["pyrovelocity team"]
Expand Down
8 changes: 4 additions & 4 deletions scripts/conda
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
set -euo pipefail

PACKAGE_NAME="pyrovelocity"
PACKAGE_VERSION="0.3.0"
PACKAGE_VERSION="0.4.0b1"
CONDA_BUILD_STRING="pyhff70e4c"
CONDA_BUILD_NUMBER="0"
# CONDA_CHANNEL_LABEL="pyrovelocity_dev"
Expand Down Expand Up @@ -32,7 +32,7 @@ Example:

./conda \\
--name pyrovelocity \\
--version 0.3.0.dev1 \\
--version 0.4.0b1.dev1 \\
--build-string pyhff70e4c \\
--build-number 0 \\
--label pyrovelocity_dev
Expand Down Expand Up @@ -67,9 +67,9 @@ PACKAGE_SPEC="conda-forge/label/\
$CONDA_CHANNEL_LABEL::\
$PACKAGE_NAME=$PACKAGE_VERSION=$CONDA_BUILD_STRING"_"$CONDA_BUILD_NUMBER"

BLUE="\033[0;34;1m"
BLUE="\0.4.0b1;34;1m"
BOLD="\033[1m"
NO_COLOR="\033[0m"
NO_COLOR="\0.4.0b1m"
if [ "$USE_COLOR" = false ]; then
BLUE=""
BOLD=""
Expand Down
48 changes: 45 additions & 3 deletions src/pyrovelocity/io/compressedpickle.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
import os
import pickle
from os import PathLike
from pathlib import Path

import numpy as np
import zstandard as zstd
from beartype import beartype
from beartype.typing import Any, Dict
from sparse import COO
from zstandard import (
ZstdCompressionParameters,
ZstdCompressor,
ZstdDecompressor,
)

from pyrovelocity.io.sparsity import densify_arrays, sparsify_arrays
from pyrovelocity.logging import configure_logging
Expand All @@ -15,6 +21,35 @@
logger = configure_logging(__name__)


@beartype
def get_cpu_count() -> int:
"""
Safely determine the number of CPUs in the system.
Falls back to a default value if it can't be determined.
"""
try:
return os.cpu_count() or 1
except NotImplementedError:
return 1


@beartype
def get_optimal_thread_count(cpu_count: int) -> int:
"""
Determine the optimal number of threads based on CPU count.
"""
if cpu_count <= 2:
return cpu_count
elif cpu_count <= 8:
return cpu_count - 1
else:
return cpu_count - 2


CPU_COUNT = get_cpu_count()
COMPRESSION_THREADS = get_optimal_thread_count(CPU_COUNT)


# TODO: Handle sparsification when values are not exclusively arrays
class CompressedPickle:
"""
Expand Down Expand Up @@ -67,8 +102,15 @@ def save(
"""
)

compression_params = ZstdCompressionParameters(
compression_level=9,
threads=COMPRESSION_THREADS,
)

with file_path.open("wb") as f:
compression_context = zstd.ZstdCompressor(level=3)
compression_context = ZstdCompressor(
compression_params=compression_params
)
with compression_context.stream_writer(f) as compressor:
pickle.dump(obj, compressor)

Expand Down Expand Up @@ -99,7 +141,7 @@ def load(
True
"""
with open(file_path, "rb") as f:
decompression_context = zstd.ZstdDecompressor()
decompression_context = ZstdDecompressor()
with decompression_context.stream_reader(f) as decompressor:
obj = pickle.load(decompressor)

Expand Down
9 changes: 7 additions & 2 deletions src/pyrovelocity/io/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def larry_neu(
In vitro hematopoiesis LARRY dataset.

Subset of Data from `Weinreb et al. (2020) <DOI: 10.1126/science.aaw3381>'
consisting of unipotent neutrophils.
consisting of unipotent neutrophil precursors and neutrophils.

https://figshare.com/ndownloader/files/37028575

Expand All @@ -94,6 +94,7 @@ def larry_neu(
url = "https://figshare.com/ndownloader/files/37028575"
adata = sc.read(file_path, backup_url=url, sparse=True, cache=True)
adata = adata[adata.obs.state_info != "Centroid", :]
adata.write(file_path)
return adata


Expand All @@ -104,7 +105,7 @@ def larry_mono(
"""
In vitro hematopoiesis LARRY dataset
Subset of Data from `Weinreb et al. (2020) <DOI: 10.1126/science.aaw3381>'
consisting of unipotent monocytes.
consisting of unipotent monocyte precursors and monocytes.

https://figshare.com/ndownloader/files/37028572

Expand All @@ -114,6 +115,7 @@ def larry_mono(
url = "https://figshare.com/ndownloader/files/37028572"
adata = sc.read(file_path, backup_url=url, sparse=True, cache=True)
adata = adata[adata.obs.state_info != "Centroid", :]
adata.write(file_path)
return adata


Expand Down Expand Up @@ -187,6 +189,7 @@ def larry_tips(
adata = larry()
adata = adata[adata.obs["time_info"] == 6.0]
adata = adata[adata.obs["state_info"] != "Undifferentiated"]
adata.write(file_path)
return adata


Expand All @@ -205,6 +208,7 @@ def larry_multilineage(
adata_larry_mono = larry_mono()
adata_larry_neu = larry_neu()
adata = adata_larry_mono.concatenate(adata_larry_neu)
adata.write(file_path)
return adata


Expand Down Expand Up @@ -273,4 +277,5 @@ def pbmc68k(
adata = scv.datasets.pbmc68k(file_path=file_path)
scv.pp.remove_duplicate_cells(adata)
adata.obsm["X_tsne"][:, 0] *= -1
adata.write(file_path)
return adata
3 changes: 2 additions & 1 deletion src/pyrovelocity/workflows/cli/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
)
from pyrovelocity.workflows.constants import (
LOCAL_CLUSTER_CONFIG_FILE_PATH,
PYROVELOCITY_OVERWRITE_CACHE,
REMOTE_CLUSTER_CONFIG_FILE_PATH,
)

Expand Down Expand Up @@ -83,7 +84,7 @@ class ExecutionContext(DataClassJsonMixin):
project: str = "pyrovelocity"
domain: str = "development"
wait: bool = True
overwrite_cache: bool = False
overwrite_cache: bool = PYROVELOCITY_OVERWRITE_CACHE


def handle_local_execution(exec_mode, execution_context, entity, entity_config):
Expand Down
31 changes: 29 additions & 2 deletions src/pyrovelocity/workflows/constants.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,39 @@
import os

from dotenv import load_dotenv
from dulwich.repo import NotGitRepository, Repo

from pyrovelocity.logging import configure_logging
from pyrovelocity.utils import str_to_bool

logger = configure_logging("pyrovelocity.workflows.constants")

load_dotenv()

PYROVELOCITY_TESTING_FLAG = str_to_bool(
os.getenv("PYROVELOCITY_TESTING_FLAG", "False")
)
PYROVELOCITY_DATA_SUBSET = str_to_bool(
os.getenv("PYROVELOCITY_DATA_SUBSET", "False")
)
PYROVELOCITY_OVERWRITE_CACHE = str_to_bool(
os.getenv("PYROVELOCITY_OVERWRITE_CACHE", "False")
)
PYROVELOCITY_CACHE_FLAG = str_to_bool(
os.getenv("PYROVELOCITY_CACHE_FLAG", "True")
)
PYROVELOCITY_UPLOAD_RESULTS = str_to_bool(
os.getenv("PYROVELOCITY_UPLOAD_RESULTS", "True")
)

logger.info(
f"\nPYROVELOCITY_TESTING_FLAG: {PYROVELOCITY_TESTING_FLAG}\n"
f"PYROVELOCITY_DATA_SUBSET: {PYROVELOCITY_DATA_SUBSET}\n"
f"PYROVELOCITY_OVERWRITE_CACHE: {PYROVELOCITY_OVERWRITE_CACHE}\n"
f"PYROVELOCITY_CACHE_FLAG: {PYROVELOCITY_CACHE_FLAG}\n"
f"PYROVELOCITY_UPLOAD_RESULTS: {PYROVELOCITY_UPLOAD_RESULTS}\n\n"
)


def get_git_repo_root(path="."):
try:
Expand Down Expand Up @@ -38,15 +66,14 @@ def get_git_repo_root(path="."):
)
logger.warning(remote_cluster_config_file_not_found_message)
REMOTE_CLUSTER_CONFIG_FILE_PATH = LOCAL_CLUSTER_CONFIG_FILE_PATH
# raise FileNotFoundError(remote_cluster_config_file_not_found_message)

if not os.path.isfile(LOCAL_CLUSTER_CONFIG_FILE_PATH):
local_cluster_config_file_not_found_message = (
f"Local cluster config file not found at path:\n\n"
f"{LOCAL_CLUSTER_CONFIG_FILE_PATH}\n\n"
f"Check that you have not deleted this file from the repository.\n\n"
)
raise FileNotFoundError(local_cluster_config_file_not_found_message)
logger.warning(local_cluster_config_file_not_found_message)

logger.debug(
f"Remote cluster config file path: {REMOTE_CLUSTER_CONFIG_FILE_PATH}"
Expand Down
Loading
Loading