Skip to content

Commit

Permalink
train model 2 with sample-based enumeration (#370)
Browse files Browse the repository at this point in the history
* use Trace_ELBO

* pipeline script: optionally `-f` force retraining
* add `force_retraining` to toggle `pipeline.sh -f`
* add annoy to plotting dependencies
* gitignore posterior phase portraits for pancreas
* pipeline: track posterior phase portraits
* summarize pancreas models 1/2 lock
* add flags to force postprocessing and summarize
* increase postprocess and summarize 4 -> 6 threads
* update workflow to support forcing stage groups
* update config to expect compressed figureS2 output
* compress three gene data set
* pipeline: track gene subset data file
* update pipeline lock

* update constraints for tests and release workflows
* update dvc lock for dvc >3.0
* use dvc3 cml container
* update version constraints
  • Loading branch information
cameronraysmith authored Jul 7, 2023
1 parent 0e189a8 commit 8147c6d
Show file tree
Hide file tree
Showing 38 changed files with 1,151 additions and 1,319 deletions.
40 changes: 27 additions & 13 deletions .github/pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,20 @@

set -x

DVC_COMMAND="dvc repro"
DVC_COMMAND_POSTPROCESS="dvc repro"
DVC_COMMAND_SUMMARIZE="dvc repro"
FORCE_ALL=""

while getopts ":ftfsfp" opt; do
case $opt in
f) FORCE_ALL="true"; DVC_COMMAND="dvc repro -f -s"; DVC_COMMAND_POSTPROCESS="dvc repro -f -s"; DVC_COMMAND_SUMMARIZE="dvc repro -f -s" ;;
ft) DVC_COMMAND="dvc repro -f -s" ;;
fp) DVC_COMMAND_POSTPROCESS="dvc repro -f -s" ;;
fs) DVC_COMMAND_SUMMARIZE="dvc repro -f -s" ;;
\?) echo "Invalid option -$OPTARG" >&2 ;;
esac
done

### Define parallel execution function ###
function run_parallel_pipeline() {
Expand All @@ -36,44 +50,44 @@ function run_parallel_pipeline() {
dvc repro preprocess

# manually execute training stages to distribute over four GPUs
dvc repro train@pancreas_model2 &
$DVC_COMMAND train@pancreas_model2 &
sleep 7
dvc repro train@pbmc68k_model2 &
$DVC_COMMAND train@pbmc68k_model2 &
sleep 7
dvc repro train@pons_model2 &
$DVC_COMMAND train@pons_model2 &
sleep 7
dvc repro train@larry_model2 &
$DVC_COMMAND train@larry_model2 &
wait

dvc repro train@larry_tips_model2 &
$DVC_COMMAND train@larry_tips_model2 &
sleep 7
dvc repro train@larry_mono_model2 &
$DVC_COMMAND train@larry_mono_model2 &
sleep 7
dvc repro train@larry_neu_model2 &
$DVC_COMMAND train@larry_neu_model2 &
sleep 7
dvc repro train@larry_multilineage_model2 &
$DVC_COMMAND train@larry_multilineage_model2 &
wait

dvc repro train@bonemarrow_model2 &
$DVC_COMMAND train@bonemarrow_model2 &
sleep 7
dvc repro train@pbmc10k_model2 &
$DVC_COMMAND train@pbmc10k_model2 &
sleep 7
dvc repro train@pbmc5k_model2 &
$DVC_COMMAND train@pbmc5k_model2 &

wait
dvc repro train

dvc stage list --name-only |\
grep -E "postprocess*" |\
/usr/bin/time -v \
xargs -t -n 1 -P 4 bash -c 'sleep $((RANDOM % 15 + 5)); dvc repro "$@"' --
xargs -t -n 1 -P 6 bash -c 'sleep $((RANDOM % 15 + 5)); '"$DVC_COMMAND_POSTPROCESS"' "$@"' --
wait
dvc repro postprocess

dvc stage list --name-only |\
grep -E "summarize*" |\
/usr/bin/time -v \
xargs -t -n 1 -P 4 bash -c 'sleep $((RANDOM % 15 + 5)); dvc repro "$@"' --
xargs -t -n 1 -P 6 bash -c 'sleep $((RANDOM % 15 + 5)); '"$DVC_COMMAND_SUMMARIZE"' "$@"' --
wait
dvc repro summarize
}
Expand Down
40 changes: 39 additions & 1 deletion .github/workflows/cml.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,26 @@ on:
required: true
type: boolean
default: false
force_retraining:
description: "Force retraining in model execution"
required: true
type: boolean
default: false
force_postprocessing:
description: "Force postprocessing in model execution"
required: true
type: boolean
default: false
force_summarize:
description: "Force summarization in model execution"
required: true
type: boolean
default: false
force_all:
description: "Force all steps in model execution"
required: true
type: boolean
default: false
push:
branches:
- "exp*"
Expand Down Expand Up @@ -120,5 +140,23 @@ jobs:
- name: Run experiment and submit report
run: |
source .venv/bin/activate
./.github/pipeline.sh
FLAGS=""
if [ "${{ github.event.inputs.force_retraining }}" = "true" ]; then
FLAGS="${FLAGS} -ft"
fi
if [ "${{ github.event.inputs.force_postprocessing }}" = "true" ]; then
FLAGS="${FLAGS} -fp"
fi
if [ "${{ github.event.inputs.force_summarize }}" = "true" ]; then
FLAGS="${FLAGS} -fs"
fi
if [ "${{ github.event.inputs.force_all }}" = "true" ]; then
FLAGS="${FLAGS} -f"
fi
./.github/pipeline.sh ${FLAGS}
shell: bash
4 changes: 2 additions & 2 deletions .github/workflows/constraints.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pip==23.1.2
nox==2023.4.22
nox-poetry==1.0.2
poetry==1.4.2
virtualenv==20.21.0
poetry==1.5.1
virtualenv==20.23.1
2 changes: 1 addition & 1 deletion dockerfiles/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# syntax=docker/dockerfile:1
FROM ghcr.io/iterative/cml:0-dvc2-base1-gpu
FROM ghcr.io/iterative/cml:0-dvc3-base1-gpu
# FROM ghcr.io/iterative/cml@sha256:ad10a563de25311241f10d9d5509cecab6bc754b6b2c90b61e309e34fe80911e

WORKDIR ${CML_RUNNER_PATH}
Expand Down
Loading

0 comments on commit 8147c6d

Please sign in to comment.