Skip to content

CML

CML #63

Workflow file for this run

name: CML
on:
workflow_dispatch:
inputs:
deploy_debug_enabled:
description: "Run the deployment with tmate.io debugging enabled"
required: true
type: boolean
default: false
model_debug_enabled:
description: "Run the model with tmate.io debugging enabled"
required: true
type: boolean
default: false
end_debug_enabled:
description: "Run tmate.io debugging at the end of the workflow"
required: true
type: boolean
default: false
force_retraining:
description: "Force retraining in model execution"
required: true
type: boolean
default: false
force_postprocessing:
description: "Force postprocessing in model execution"
required: true
type: boolean
default: false
force_summarize:
description: "Force summarization in model execution"
required: true
type: boolean
default: false
force_all:
description: "Force all steps in model execution"
required: true
type: boolean
default: false
push:
branches:
- "exp*"
paths:
- "pyrovelocity/**"
- "reproducibility/figures/*.py"
- "reproducibility/figures/dvc.*"
- "reproducibility/figures/config.yaml"
# Review/set variables via gh CLI:
#
# gh secret list --repo=$(GH_REPO)
# gh secret set PERSONAL_ACCESS_TOKEN --repo="$(GH_REPO)" --body="$(GH_PAT)"
#
# see also:
# https://cml.dev/doc/self-hosted-runners?tab=GitHub#personal-access-token
# test machine: n1-standard-32+nvidia-tesla-t4*1
# large machine: n1-standard-64+nvidia-tesla-t4*4
# large machine: a2-highgpu-2g+nvidia-tesla-a100*2
env:
REPO_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
GOOGLE_APPLICATION_CREDENTIALS_DATA: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS_DATA }}
GCP_SERVICE_ACCOUNT: ${{ secrets.GCP_SERVICE_ACCOUNT }}
MLFLOW_TRACKING_PASSWORD: ${{ secrets.MLFLOW_TRACKING_PASSWORD }}
MLFLOW_TRACKING_URI: ${{ secrets.MLFLOW_TRACKING_URI }}
MLFLOW_TRACKING_USERNAME: ${{ secrets.MLFLOW_TRACKING_USERNAME }}
TF_LOG_PROVIDER: DEBUG
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: iterative/setup-cml@v1
- name: Setup tmate debug session
uses: mxschmitt/action-tmate@v3
if: ${{ inputs.deploy_debug_enabled }}
- name: Deploy cloud runner
run: |
GCP_ZONES=( "us-central1-b" "us-central1-f"
"us-central1-a" "us-central1-c"
"us-east1-b"
"us-west1-b" "us-west4-b" )
for trial in {1..3}; do
for zone in "${GCP_ZONES[@]}"; do
echo "Trial $trial in zone $zone"
cml runner launch \
--labels=cml-gpu \
--reuse-idle \
--cloud=gcp \
--cloud-type=n1-highmem-96+nvidia-tesla-t4*4 \
--cloud-gpu=tesla \
--cloud-hdd-size=500 \
--cloud-region="$zone" \
--cloud-permission-set="${GCP_SERVICE_ACCOUNT},scopes=storage-rw" \
&& break 2
done
done
pipeline:
needs: deploy
runs-on: [self-hosted, cml-gpu]
timeout-minutes: 1440
container:
image: ghcr.io/pinellolab/pyrovelocity:master
options: --gpus all
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up python
id: setup-python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Install Poetry
uses: snok/install-poetry@v1
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
- name: Setup tmate debug session
uses: mxschmitt/action-tmate@v3
if: ${{ inputs.model_debug_enabled }}
- name: Install dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install -E dev -E plotting --no-interaction --no-root
- name: Install project
run: poetry install -E dev -E plotting --no-interaction
- name: CML setup
run: cml ci
- name: Setup rclone
run: |
sudo -v ; curl https://rclone.org/install.sh | sudo bash
which rclone
sudo apt-get update && sudo apt-get install tree
which tree
mkdir -p $HOME/.config/rclone/
echo '${{ secrets.RCLONE_CONF }}' > $HOME/.config/rclone/rclone.conf
- name: Run experiment and submit report
run: |
source .venv/bin/activate
FLAGS=""
if [ "${{ github.event.inputs.force_retraining }}" = "true" ]; then
FLAGS="${FLAGS} -ft"
fi
if [ "${{ github.event.inputs.force_postprocessing }}" = "true" ]; then
FLAGS="${FLAGS} -fp"
fi
if [ "${{ github.event.inputs.force_summarize }}" = "true" ]; then
FLAGS="${FLAGS} -fs"
fi
if [ "${{ github.event.inputs.force_all }}" = "true" ]; then
FLAGS="${FLAGS} -f"
fi
./.github/pipeline.sh ${FLAGS}
shell: bash
- name: Setup tmate debug session
uses: mxschmitt/action-tmate@v3
if: ${{ inputs.end_debug_enabled }}