Skip to content

Fetch Travel Times & Weather Data #20

Fetch Travel Times & Weather Data

Fetch Travel Times & Weather Data #20

name: Fetch Travel Times & Weather Data
on:
workflow_dispatch:
schedule:
- cron: '0 * * * *' # every hour
jobs:
fetch_raw_mbajk_data:
runs-on: ubuntu-latest
outputs:
sha_new: ${{ steps.sha_new.outputs.SHA_NEW }}
permissions:
contents: write
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: python -m pip install selenium requests pandas dvc[s3]
- name: Authenticate dvc remote
run: |
dvc remote modify origin --local access_key_id "${{ secrets.DVC_ACCESS_KEY_ID }}"
dvc remote modify origin --local secret_access_key "${{ secrets.DVC_SECRET_ACCESS_KEY }}"
- name: Pull data from dvc
run: dvc pull
- name: Scrape travel times
run: python src/data/scrapers/travel_time_scraper.py
- name: Fetch weather data
run: python src/data/weather/fetch_weather_data.py
- name: Push data to dvc
run: |
dvc add data
dvc push
- name: Commit to git
run: |
git config --global user.name "GitHub Actions"
git config --global user.email "actions@github.com"
git add data.dvc
git commit -m "Update raw mbajk data on `date` with Github Actions"
- name: Push to git
uses: ad-m/github-push-action@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
branch: ${{ github.ref }}
- name: Get commit SHA and store it in GITHUB_OUTPUT
id: sha_new
run: echo "SHA_NEW=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"