calculate-times-auto-2024-tract-15-weighted #282
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
name: calculate-times | |
run-name: calculate-times-${{ inputs.mode }}-${{ inputs.year }}-${{ inputs.geography }}-${{ inputs.state }}-${{ inputs.centroid_type }} | |
on: | |
workflow_dispatch: | |
inputs: | |
# Input values match those in params.yaml | |
mode: | |
required: true | |
description: Mode of travel in R5 | |
default: 'car' | |
type: choice | |
options: | |
- auto | |
- bicycle | |
- pedestrian | |
year: | |
required: true | |
description: Census/OSM data year | |
default: '2020' | |
type: choice | |
options: | |
- '2020' | |
- '2021' | |
- '2022' | |
- '2023' | |
- '2024' | |
geography: | |
required: true | |
description: Census data geography | |
default: county | |
type: choice | |
options: | |
- state | |
- county | |
- county_subdivision | |
- tract | |
- block_group | |
- zcta | |
state: | |
required: true | |
description: Target Census state | |
default: '01' | |
type: choice | |
options: | |
- '01' | |
- '02' | |
- '04' | |
- '05' | |
- '06' | |
- '08' | |
- '09' | |
- '10' | |
- '11' | |
- '12' | |
- '13' | |
- '15' | |
- '16' | |
- '17' | |
- '18' | |
- '19' | |
- '20' | |
- '21' | |
- '22' | |
- '23' | |
- '24' | |
- '25' | |
- '26' | |
- '27' | |
- '28' | |
- '29' | |
- '30' | |
- '31' | |
- '32' | |
- '33' | |
- '34' | |
- '35' | |
- '36' | |
- '37' | |
- '38' | |
- '39' | |
- '40' | |
- '41' | |
- '42' | |
- '44' | |
- '45' | |
- '46' | |
- '47' | |
- '48' | |
- '49' | |
- '50' | |
- '51' | |
- '53' | |
- '54' | |
- '55' | |
- '56' | |
centroid_type: | |
required: true | |
description: Whether or not to use population-weighted locations | |
default: weighted | |
type: choice | |
options: | |
- weighted | |
- unweighted | |
override_chunks: | |
required: false | |
description: | | |
Comma-separated chunks to run e.g. 0-5,6-11. | |
Will run all chunks if null | |
type: string | |
env: | |
AWS_DEFAULT_REGION: us-east-1 | |
# See: https://github.com/aws/aws-cli/issues/5262#issuecomment-705832151 | |
AWS_EC2_METADATA_DISABLED: true | |
PYTHONUNBUFFERED: "1" | |
jobs: | |
# Using the location data, split the origins into N jobs (max 256) | |
setup-jobs: | |
runs-on: ubuntu-24.04 | |
outputs: | |
chunks: ${{ steps.create-job-chunks.outputs.chunks }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Setup Cloudflare credentials | |
uses: ./.github/actions/setup-cloudflare-s3 | |
with: | |
CLOUDFLARE_S3_API_ACCESS_KEY_ID: ${{ secrets.CLOUDFLARE_S3_API_ACCESS_KEY_ID }} | |
CLOUDFLARE_S3_API_SECRET_ACCESS_KEY: ${{ secrets.CLOUDFLARE_S3_API_SECRET_ACCESS_KEY }} | |
- name: Fetch GitHub user and group ID | |
shell: bash | |
id: fetch-ids | |
run: | | |
echo "USER_ID=$(id -u)" >> $GITHUB_ENV | |
echo "GROUP_ID=$(id -g)" >> $GITHUB_ENV | |
- name: Build Dockerized dependencies | |
uses: ./.github/actions/build-docker | |
with: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
- name: Load Docker image | |
run: | | |
docker load --input /tmp/opentimes.tar | |
docker image ls -a | |
- name: Fetch locations data | |
uses: ./.github/actions/fetch-locations | |
- name: Create job chunks | |
id: create-job-chunks | |
shell: bash | |
run: | | |
export USER_ID=${{ env.USER_ID }} | |
export GROUP_ID=${{ env.GROUP_ID }} | |
chunks=$(docker compose run --rm --quiet-pull \ | |
--entrypoint=python valhalla-run /data/src/split_origin.py \ | |
--year ${{ inputs.year }} --geography ${{ inputs.geography }} \ | |
--state ${{ inputs.state }}) | |
echo "chunks=$chunks" >> $GITHUB_OUTPUT | |
# If override chunks are set, use those instead | |
chunks_parsed=($(echo "$chunks" | jq -r '.[]')) | |
if [ -n "${{ inputs.override_chunks }}" ]; then | |
override_chunks_parsed=($(echo "${{ inputs.override_chunks }}" | tr -d ' ' | tr ',' ' ')) | |
for chunk in "${override_chunks_parsed[@]}"; do | |
if [[ ! " ${chunks_parsed[@]} " =~ " ${chunk} " ]]; then | |
echo "Error: Override chunk ${chunk} is not in the chunks for this origin" | |
echo "Chunks include: ${chunks_parsed[@]}" | |
exit 1 | |
fi | |
done | |
chunks_json=$(printf '%s\n' "${override_chunks_parsed[@]}" | jq -c -R . | jq -c -s .) | |
echo "Creating jobs for chunks: ${override_chunks_parsed[@]}" | |
echo "chunks=$chunks_json" > $GITHUB_OUTPUT | |
else | |
echo "Creating jobs for chunks: ${chunks_parsed[@]}" | |
fi | |
run-job: | |
runs-on: ubuntu-24.04 | |
needs: setup-jobs | |
strategy: | |
# Don't fail all chunks if one fails | |
fail-fast: false | |
matrix: | |
chunk: ${{ fromJSON(needs.setup-jobs.outputs.chunks) }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Setup Cloudflare credentials | |
uses: ./.github/actions/setup-cloudflare-s3 | |
with: | |
CLOUDFLARE_S3_API_ACCESS_KEY_ID: ${{ secrets.CLOUDFLARE_S3_API_ACCESS_KEY_ID }} | |
CLOUDFLARE_S3_API_SECRET_ACCESS_KEY: ${{ secrets.CLOUDFLARE_S3_API_SECRET_ACCESS_KEY }} | |
- name: Fetch locations data | |
uses: ./.github/actions/fetch-locations | |
# Don't fetch tile data in setup-jobs because they're very large and | |
# will churn the Actions cache. We want to wait to fetch it until jobs | |
# have actually been picked up | |
- name: Fetch Valhalla tile data | |
uses: ./.github/actions/fetch-valhalla-tiles | |
with: | |
year: ${{ inputs.year }} | |
state: ${{ inputs.state }} | |
- name: Fetch GitHub user and group ID | |
shell: bash | |
id: fetch-ids | |
run: | | |
echo "USER_ID=$(id -u)" >> $GITHUB_ENV | |
echo "GROUP_ID=$(id -g)" >> $GITHUB_ENV | |
- name: Fetch Docker image | |
uses: actions/download-artifact@v4 | |
with: | |
name: opentimes-docker-${{ hashFiles('./data/Dockerfile', './pyproject.toml') }} | |
path: /tmp | |
- name: Load Docker image | |
run: | | |
docker load --input /tmp/opentimes.tar | |
docker image ls -a | |
- name: Extract tiles | |
shell: bash | |
working-directory: 'data' | |
run: | | |
tile_path="year=${{ inputs.year }}/geography=state/state=${{ inputs.state }}" | |
ln ./intermediate/valhalla_tiles/"$tile_path"/valhalla_tiles.tar.zst ./build/ | |
tar -xf ./build/valhalla_tiles.tar.zst -C ./build | |
rm -f ./build/valhalla_tiles.tar.zst | |
- name: Run job chunk | |
shell: bash | |
working-directory: 'data' | |
run: | | |
export USER_ID=${{ env.USER_ID }} | |
export GROUP_ID=${{ env.GROUP_ID }} | |
docker compose run --rm --quiet-pull --entrypoint=python \ | |
valhalla-run /data/src/calculate_times.py \ | |
--mode ${{ inputs.mode }} --year ${{ inputs.year }} \ | |
--geography ${{ inputs.geography }} --state ${{ inputs.state }} \ | |
--centroid-type ${{ inputs.centroid_type }} \ | |
--chunk ${{ matrix.chunk }} --write-to-s3 | |
# Clear the cache if we're one of the last running jobs. Using this instead | |
# of a separate workflow step because the steps often come last in the job | |
# queue and then won't run when many workflows are queued at the same time | |
- name: Clear workflow cache | |
if: always() | |
continue-on-error: true | |
shell: bash | |
run: | | |
endpoint='repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/jobs --paginate -q' | |
total_jobs=$(gh api $endpoint '.total_count') | |
complete_jobs=$(gh api $endpoint '.jobs[] | select(.status == "completed")' | wc -l) | |
in_progress_jobs=$(gh api $endpoint '.jobs[] | select(.status == "in_progress")' | wc -l) | |
n_remaining=$((total_jobs - complete_jobs)) | |
all_statuses=$((complete_jobs + in_progress_jobs)) | |
echo "Total number of jobs: $total_jobs" | |
echo "Number of jobs complete: $total_jobs" | |
echo "Number of jobs remaining: $n_remaining" | |
echo "Number of jobs run/running: $all_statuses" | |
if [ "$n_remaining" -lt 2 ] && [ "$total_jobs" -eq "$all_statuses" ]; then | |
echo "Less than 5 jobs still running. Clearing workflow cache!" | |
gh cache delete \ | |
valhalla-tiles-${{ inputs.year }}-${{ inputs.state }}-${{ hashFiles('./data/dvc.lock') }} || true | |
fi | |
env: | |
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} |