Skip to content

Add data transformation to new_feature_template #1444

Add data transformation to new_feature_template

Add data transformation to new_feature_template #1444

# Workflow that builds a Docker image containing the model code,
# pushes it to the GitHub Container Registry, and then optionally uses
# that container image to run the model using an AWS Batch job.
#
# Images are built on every commit to a PR or main branch in order to ensure
# that the build continues to work properly, but Batch jobs are gated behind
# a `deploy` environment that requires manual approval from a
# @ccao-data/core-team member.
name: build-and-run-model
on:
pull_request:
types: [opened, reopened, synchronize, closed]
workflow_dispatch:
inputs:
run_type:
type: choice
description: Run type or purpose
options:
- junk
- rejected
- test
- baseline
- candidate
- final
default: test
required: true
run_note:
type: string
description: Note to include with run
required: true
report_additional_pins:
type: string
description: Create reports for additional PINs
default: '13253180150000 05174150240000'
required: false
upload_enable:
type: boolean
description: Upload results to S3
default: true
required: true
cv_enable:
type: boolean
description: Run cross-validation
default: false
required: true
comp_enable:
type: boolean
description: Run comparables finding
default: false
required: true
shap_enable:
type: boolean
description: Calculate SHAP values
default: false
required: true
push:
branches: [master]
jobs:
build-and-run-model:
permissions:
# contents:read and id-token:write permissions are needed to interact
# with GitHub's OIDC Token endpoint so that we can authenticate with AWS
contents: read
id-token: write
# While packages:write is usually not required for workflows, it is
# required in order to allow the reusable called workflow to push to
# GitHub Container Registry
packages: write
uses: ccao-data/actions/.github/workflows/build-and-run-batch-job.yaml@main
with:
backend: "ec2"
vcpu: "40"
memory: "158000"
# Maximum pipeline runtime. This is slightly below 6 hours, which
# is the maximum length of any single GitHub Actions job
role-duration-seconds: 21000
# Disable Batch job status polling since this workflow often takes
# more than 6 hours
poll_for_status: false
# Set these env vars in the container
container_env_vars: |
WORKFLOW_RUN_TYPE=${{ inputs.run_type }}
WORKFLOW_RUN_NOTE=${{ inputs.run_note }}
UPLOAD_ENABLE_OVERRIDE=${{ inputs.upload_enable }}
CV_ENABLE_OVERRIDE=${{ inputs.cv_enable }}
COMP_ENABLE_OVERRIDE=${{ inputs.comp_enable }}
SHAP_ENABLE_OVERRIDE=${{ inputs.shap_enable }}
REPORT_ADDITIONAL_PINS=${{ inputs.report_additional_pins }}
secrets:
AWS_IAM_ROLE_TO_ASSUME_ARN: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }}
AWS_ACCOUNT_ID: ${{ secrets.AWS_ACCOUNT_ID }}
# Set these env vars as secrets so they get masked in the GitHub
# Actions logs
CONTAINER_ENV_VARS: |
AWS_SNS_ARN_MODEL_STATUS=${{ secrets.AWS_SNS_ARN_MODEL_STATUS }}