calculate-times-auto-2024-tract-50-weighted #398

Workflow file for this run

.github/workflows/calculate-times.yaml at 745e3cc

	---

	name: calculate-times
	run-name: calculate-times-${{ inputs.mode }}-${{ inputs.year }}-${{ inputs.geography }}-${{ inputs.state }}-${{ inputs.centroid_type }}

	on:
	workflow_dispatch:
	inputs:
	# Input values match those in params.yaml
	mode:
	required: true
	description: Mode of travel
	default: 'auto'
	type: choice
	options:
	- auto
	- bicycle
	- pedestrian

	year:
	required: true
	description: Census/OSM data year
	default: '2020'
	type: choice
	options:
	- '2020'
	- '2021'
	- '2022'
	- '2023'
	- '2024'

	geography:
	required: true
	description: Census data geography
	default: county
	type: choice
	options:
	- state
	- county
	- county_subdivision
	- tract
	- block_group
	- zcta

	state:
	required: true
	description: Target Census state
	default: '01'
	type: choice
	options:
	- '01'
	- '02'
	- '04'
	- '05'
	- '06'
	- '08'
	- '09'
	- '10'
	- '11'
	- '12'
	- '13'
	- '15'
	- '16'
	- '17'
	- '18'
	- '19'
	- '20'
	- '21'
	- '22'
	- '23'
	- '24'
	- '25'
	- '26'
	- '27'
	- '28'
	- '29'
	- '30'
	- '31'
	- '32'
	- '33'
	- '34'
	- '35'
	- '36'
	- '37'
	- '38'
	- '39'
	- '40'
	- '41'
	- '42'
	- '44'
	- '45'
	- '46'
	- '47'
	- '48'
	- '49'
	- '50'
	- '51'
	- '53'
	- '54'
	- '55'
	- '56'

	centroid_type:
	required: true
	description: Whether or not to use population-weighted locations
	default: weighted
	type: choice
	options:
	- weighted
	- unweighted

	override_chunks:
	required: false
	description: \|
	Comma-separated chunks to run e.g. 0-5,6-11.
	Will run all chunks if null
	type: string

	env:
	AWS_DEFAULT_REGION: us-east-1
	# See: https://github.com/aws/aws-cli/issues/5262#issuecomment-705832151
	AWS_EC2_METADATA_DISABLED: true
	PYTHONUNBUFFERED: "1"
	UV_SYSTEM_PYTHON: 1

	jobs:
	# Using the location data, split the origins into N jobs (max 256)
	setup-jobs:
	runs-on: ubuntu-24.04
	outputs:
	chunks: ${{ steps.create-job-chunks.outputs.chunks }}
	steps:
	- name: Checkout
	uses: actions/checkout@v4

	- name: Setup Cloudflare credentials
	uses: ./.github/actions/setup-cloudflare-s3
	with:
	CLOUDFLARE_S3_API_ACCESS_KEY_ID: ${{ secrets.CLOUDFLARE_S3_API_ACCESS_KEY_ID }}
	CLOUDFLARE_S3_API_SECRET_ACCESS_KEY: ${{ secrets.CLOUDFLARE_S3_API_SECRET_ACCESS_KEY }}

	- name: Fetch GitHub user and group ID
	shell: bash
	id: fetch-ids
	run: \|
	echo "USER_ID=$(id -u)" >> $GITHUB_ENV
	echo "GROUP_ID=$(id -g)" >> $GITHUB_ENV

	- name: Build Dockerized dependencies
	uses: ./.github/actions/build-docker
	with:
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

	- name: Load Docker image
	run: \|
	docker load --input /tmp/opentimes.tar
	docker image ls -a

	- name: Fetch locations data
	uses: ./.github/actions/fetch-locations

	- name: Create job chunks
	id: create-job-chunks
	shell: bash
	run: \|
	export USER_ID=${{ env.USER_ID }}
	export GROUP_ID=${{ env.GROUP_ID }}
	chunks=$(docker compose run --rm --quiet-pull \
	--entrypoint=python valhalla-run /data/src/split_origin.py \
	--year ${{ inputs.year }} --geography ${{ inputs.geography }} \
	--state ${{ inputs.state }})
	echo "chunks=$chunks" >> $GITHUB_OUTPUT

	# If override chunks are set, use those instead
	chunks_parsed=($(echo "$chunks" \| jq -r '.[]'))
	if [ -n "${{ inputs.override_chunks }}" ]; then
	override_chunks_parsed=($(echo "${{ inputs.override_chunks }}" \| tr -d ' ' \| tr ',' ' '))
	for chunk in "${override_chunks_parsed[@]}"; do
	if [[ ! " ${chunks_parsed[@]} " =~ " ${chunk} " ]]; then
	echo "Error: Override chunk ${chunk} is not in the chunks for this origin"
	echo "Chunks include: ${chunks_parsed[@]}"
	exit 1
	fi
	done
	chunks_json=$(printf '%s\n' "${override_chunks_parsed[@]}" \| jq -c -R . \| jq -c -s .)
	echo "Creating jobs for chunks: ${override_chunks_parsed[@]}"
	echo "chunks=$chunks_json" > $GITHUB_OUTPUT
	else
	echo "Creating jobs for chunks: ${chunks_parsed[@]}"
	fi

	run-job:
	runs-on: ubuntu-24.04
	needs: setup-jobs
	strategy:
	# Don't fail all chunks if one fails
	fail-fast: false
	matrix:
	chunk: ${{ fromJSON(needs.setup-jobs.outputs.chunks) }}

	steps:
	- name: Checkout
	uses: actions/checkout@v4

	- name: Setup Cloudflare credentials
	uses: ./.github/actions/setup-cloudflare-s3
	with:
	CLOUDFLARE_S3_API_ACCESS_KEY_ID: ${{ secrets.CLOUDFLARE_S3_API_ACCESS_KEY_ID }}
	CLOUDFLARE_S3_API_SECRET_ACCESS_KEY: ${{ secrets.CLOUDFLARE_S3_API_SECRET_ACCESS_KEY }}

	- name: Fetch locations data
	uses: ./.github/actions/fetch-locations

	# Don't fetch tile data in setup-jobs because they're very large and
	# will churn the Actions cache. We want to wait to fetch it until jobs
	# have actually been picked up
	- name: Fetch Valhalla tile data
	uses: ./.github/actions/fetch-valhalla-tiles
	with:
	year: ${{ inputs.year }}
	state: ${{ inputs.state }}

	- name: Fetch GitHub user and group ID
	shell: bash
	id: fetch-ids
	run: \|
	echo "USER_ID=$(id -u)" >> $GITHUB_ENV
	echo "GROUP_ID=$(id -g)" >> $GITHUB_ENV

	- name: Fetch Docker image
	uses: actions/download-artifact@v4
	with:
	name: opentimes-docker-${{ hashFiles('./data/Dockerfile', './pyproject.toml') }}
	path: /tmp

	- name: Load Docker image
	run: \|
	docker load --input /tmp/opentimes.tar
	docker image ls -a

	- name: Extract tiles
	shell: bash
	working-directory: 'data'
	run: \|
	tile_path="year=${{ inputs.year }}/geography=state/state=${{ inputs.state }}"
	ln ./intermediate/valhalla_tiles/"$tile_path"/valhalla_tiles.tar.zst ./build/
	tar -xf ./build/valhalla_tiles.tar.zst -C ./build
	rm -f ./build/valhalla_tiles.tar.zst

	- name: Run job chunk
	shell: bash
	working-directory: 'data'
	run: \|
	export USER_ID=${{ env.USER_ID }}
	export GROUP_ID=${{ env.GROUP_ID }}
	docker compose run --rm --quiet-pull --entrypoint=python \
	valhalla-run /data/src/calculate_times.py \
	--mode ${{ inputs.mode }} --year ${{ inputs.year }} \
	--geography ${{ inputs.geography }} --state ${{ inputs.state }} \
	--centroid-type ${{ inputs.centroid_type }} \
	--chunk ${{ matrix.chunk }} --write-to-s3

	# Clear the cache if we're one of the last running jobs. Using this instead
	# of a separate workflow step because the steps often come last in the job
	# queue and then won't run when many workflows are queued at the same time
	- name: Clear workflow cache
	if: always()
	continue-on-error: true
	shell: bash
	run: \|
	endpoint='repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/jobs --paginate -q'
	total_jobs=$(gh api $endpoint '.total_count')
	complete_jobs=$(gh api $endpoint '.jobs[] \| select(.status == "completed")' \| wc -l)
	in_progress_jobs=$(gh api $endpoint '.jobs[] \| select(.status == "in_progress")' \| wc -l)

	n_remaining=$((total_jobs - complete_jobs))
	all_statuses=$((complete_jobs + in_progress_jobs))
	echo "Total number of jobs: $total_jobs"
	echo "Number of jobs complete: $total_jobs"
	echo "Number of jobs remaining: $n_remaining"
	echo "Number of jobs run/running: $all_statuses"
	if [ "$n_remaining" -lt 2 ] && [ "$total_jobs" -eq "$all_statuses" ]; then
	echo "Less than 5 jobs still running. Clearing workflow cache!"
	gh cache delete \
	valhalla-tiles-${{ inputs.year }}-${{ inputs.state }}-${{ hashFiles('./data/dvc.lock') }} \|\| true
	fi
	env:
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

calculate-times-auto-2024-tract-50-weighted #398

Workflow file

calculate-times-auto-2024-tract-50-weighted #398

Jobs

Run details

Workflow file for this run