Skip to content

Fetch Travel Times & Weather Data #1672

Fetch Travel Times & Weather Data

Fetch Travel Times & Weather Data #1672

name: Fetch Travel Times & Weather Data
on:
workflow_dispatch:
schedule:
- cron: '0 * * * *' # every hour
jobs:
fetch_raw_mbajk_data:
runs-on: ubuntu-latest
outputs:
sha_new: ${{ steps.sha_new.outputs.SHA_NEW }}
permissions:
contents: write
env:
MONGO_URI: ${{ secrets.MONGO_URI }}
MONGO_DB_NAME: ${{ secrets.MONGO_DB_NAME }}
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: python -m pip install selenium requests pandas dvc[s3] pymongo python-dotenv evidently
- name: Authenticate dvc remote
run: |
dvc remote modify origin --local access_key_id "${{ secrets.DVC_ACCESS_KEY_ID }}"
dvc remote modify origin --local secret_access_key "${{ secrets.DVC_SECRET_ACCESS_KEY }}"
- name: Pull data from dvc
run: dvc pull
- name: Scrape travel times
run: |
cd src/data/scrapers
python travel_time_scraper.py
- name: Fetch weather data for travel times
run: python src/data/weather/fetch_weather_data.py travel-times
- name: Scrape vehicle counters data
run: python src/data/scrapers/vehicle_counters_scraper.py
- name: Fetch weather data for vehicle counters
run: python src/data/weather/fetch_weather_data.py vehicle-counters
- name: Process travel times data
run: python src/data/process_travel_time_data.py
- name: Process vehicle counters data
run: python src/data/process_vehicle_counter_data.py
#- name: Validate data
# run: |
# cd src/data/test
# python test_processed_data.py
#- name: Test data drift
# run: |
# cd src/data/test
# python test_data_drift.py
- name: Push data to dvc
run: |
dvc add data
dvc push
- name: Commit to git
run: |
git config --global user.name "GitHub Actions"
git config --global user.email "actions@github.com"
git add data.dvc
git commit -m "Update data `date` with Github Actions"
- name: Push to git
uses: ad-m/github-push-action@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
branch: ${{ github.ref }}
- name: Get commit SHA and store it in GITHUB_OUTPUT
id: sha_new
run: echo "SHA_NEW=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"