Skip to content

Scrape latest data #9269

Scrape latest data

Scrape latest data #9269

Workflow file for this run

name: Scrape latest data
on:
push:
workflow_dispatch: # This allows us to trigger manually from the GitHub Actions UI
schedule:
- cron: "6,26,46 * * * *" # Scheduled to run 3 times/hour (at minute 6, 26, and 46)
jobs:
scheduled:
runs-on: ubuntu-latest
steps:
- name: Check out this repo
uses: actions/checkout@v4
- name: Set up python
uses: actions/setup-python@v5
with:
python-version: 3.11
# Step to get the latest artifact run ID
# Fetch the latest artifact run ID using GitHub API and jq
# Save the run ID as an environment variable
# If your repository is set to private, an OAuth app token or personal access token (classic) with repo scope is required
- name: Get latest artifact run id
run: |
ARTIFACT_RUN_ID=$(curl -s "https://api.github.com/repos/${{ github.repository }}/actions/artifacts?per_page=1" | jq '.artifacts[0].workflow_run.id')
echo "artifact_run_id=$ARTIFACT_RUN_ID" >> $GITHUB_ENV
# Download the artifact (our SQLite DB!) from the last run
- name: Download artifact
uses: actions/download-artifact@v4
with:
name: ca-fires-history-db
path: ./data/
run-id: ${{ env.artifact_run_id }} # Run ID of the artifact (SQLite DB) uploaded from the last run
github-token: ${{ secrets.GH_PAT }} # REQUIRED. See https://github.com/actions/download-artifact?tab=readme-ov-file#download-artifacts-from-other-workflow-runs-or-repositories
continue-on-error: false
- name: Display downloaded file
run: ls data/
- name: Run scrape.py
run: python3 scrape.py
- name: Upload updated artifact
uses: actions/upload-artifact@v4
with:
name: ca-fires-history-db # Name of the artifact to upload, make sure to match the name in the download step
path: ./data/fires.db
if-no-files-found: error
- name: Install datasette
run: |
pipx install datasette
- name: Deploy to vercel
env:
VERCEL_TOKEN: ${{ secrets.VERCEL_TOKEN }}
run: |-
datasette install datasette-publish-vercel
datasette publish vercel data/fires.db --project=cafireshistorydb --install=datasette-hashed-urls --install=datasette-cluster-map --token="$VERCEL_TOKEN" --metadata data/metadata.json --setting allow_download off --setting allow_csv_stream off --extra-options "-i"