Skip to content

Commit

Permalink
feat(workflow): add build-wheel job for lxml package caching and buil…
Browse files Browse the repository at this point in the history
…d-and-push job for Docker image

- Added a new job `build-wheel` to build wheels for the `lxml` package, cache them based on the hashed version from `requirements.txt`, and avoid unnecessary builds if the wheel is already cached.
- Updated the existing job `build-and-push` to include a dependency on the `build-wheel` job, login to GitHub Container Registry and Docker Hub, build and push the Docker image to both registries, set up cosign for image signing, and sign the container image using cosign.
  • Loading branch information
obeone committed Apr 13, 2024
1 parent 8ffaf33 commit e27b222
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 58 deletions.
149 changes: 105 additions & 44 deletions .github/workflows/build-and-publish.yaml
Original file line number Diff line number Diff line change
@@ -1,60 +1,121 @@
name: Build and Push Docker image

# Trigger the workflow on push events to the main branch.
on:
push:
branches:
- main

jobs:
# Job for building wheels for the lxml package.
build-wheel:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python 3.12
uses: actions/setup-python@v2
with:
python-version: '3.12'

# Extract the version of lxml from requirements.txt and hash it for cache key generation.
- name: Extract hashed lxml version
id: lxml-version
run: echo "::set-output name=lxml_line::$(grep 'lxml' requirements.txt | sha256sum | cut -d ' ' -f 1)"

# Cache lxml wheels based on the hashed version to speed up builds.
- name: Cache lxml wheels
id: cache-lxml
uses: actions/cache@v2
with:
path: .wheels
key: ${{ runner.arch }}-lxml-${{ steps.lxml-version.outputs.lxml_line }}

# Check if the lxml wheel is already cached to avoid unnecessary builds.
- name: Check if lxml wheel is cached
id: check-wheel
run: |
if [ -f ".wheels/lxml*.whl" ]; then
echo "Wheel found in cache."
echo "::set-output name=wheel_exists::true"
else
echo "Wheel not found, need to build."
echo "::set-output name=wheel_exists::false"
# Build lxml wheel if it is not found in the cache.
- name: Build lxml wheel if not cached
if: steps.check-wheel.outputs.wheel_exists == 'false'
run: pip wheel lxml -w .wheels

# Job for building and pushing the Docker image to Docker registries.
build-and-push:
needs: build-wheel
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
id-token: write # For cosign

id-token: write
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Log in to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Build and push to GHCR and Docker Hub
uses: docker/build-push-action@v5
id: build-and-push
with:
context: .
file: ./Dockerfile
push: true
cache-from: type=gha
cache-to: type=gha,mode=max
tags: |
ghcr.io/obeone/crawler-to-md:latest
docker.io/obeoneorg/crawler-to-md:latest
platforms: linux/amd64,linux/arm64,linux/i386,linux/armhf,linux/armel

- name: Set up cosign
uses: sigstore/cosign-installer@v3
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Sign the container image with cosign
run: |
cosign sign --yes ghcr.io/obeone/crawler-to-md@${DIGEST}
cosign sign --yes docker.io/obeoneorg/crawler-to-md@${DIGEST}
env:
COSIGN_EXPERIMENTAL: true
DIGEST: ${{ steps.build-and-push.outputs.digest }}
# Extract the version of lxml from requirements.txt and hash it for cache key generation.
- name: Extract hashed lxml version
id: lxml-version
run: echo "::set-output name=lxml_line::$(grep 'lxml' requirements.txt | sha256sum | cut -d ' ' -f 1)"

# Cache lxml wheels based on the hashed version to speed up builds.
- name: Cache lxml wheels
id: cache-lxml
uses: actions/cache@v2
with:
path: .wheels
key: ${{ runner.arch }}-lxml-${{ steps.lxml-version.outputs.lxml_line }}


# Login to GitHub Container Registry.
- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

# Login to Docker Hub.
- name: Log in to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

# Build and push the Docker image to both GitHub Container Registry and Docker Hub.
- name: Build and push Docker image
uses: docker/build-push-action@v5
id: build-and-push
with:
context: .
file: ./Dockerfile
push: true
cache-from: type=gha
cache-to: type=gha,mode=max
tags: |
ghcr.io/obeone/crawler-to-md:latest
docker.io/obeoneorg/crawler-to-md:latest
platforms: linux/amd64,linux/arm64,linux/i386,linux/armhf,linux/armel

# Set up cosign for signing the container image.
- name: Set up cosign for image signing
uses: sigstore/cosign-installer@v3

# Sign the container image using cosign.
- name: Sign the container image
run: |
cosign sign --yes ghcr.io/obeone/crawler-to-md@${DIGEST}
cosign sign --yes docker.io/obeoneorg/crawler-to-md@${DIGEST}
env:
COSIGN_EXPERIMENTAL: True
DIGEST: ${{ steps.build-and-push.outputs.digest }}
Empty file added .wheels/.gitkeep
Empty file.
38 changes: 24 additions & 14 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,40 +1,48 @@
# Use Dockerfile syntax version 1.5 for compatibility and new features
# syntax=docker/dockerfile:1.5

FROM python:3.12 as builder

# Set non-interactive mode
ENV DEBIAN_FRONTEND=noninteractive

# Prevent docker from cleaning up the apt cache
RUN rm -f /etc/apt/apt.conf.d/docker-clean

# Define ARG for platform-specific cache separation
ARG TARGETPLATFORM

# Update and install dependencies with cache separated by architecture
RUN --mount=type=cache,target=/var/cache/apt,id=apt-cache-${TARGETPLATFORM} \
--mount=type=cache,target=/var/lib/apt,id=apt-lib-${TARGETPLATFORM} \
apt-get update && \
apt-get install -y libxml2-dev libxslt-dev
apt-get update && apt-get install -y \
libxml2-dev \
libxslt-dev

WORKDIR /app

COPY requirements.txt .

# Copy lxml wheel from the wheel directory
COPY .wheels /app/

# Conditionally install lxml from the local wheel if it exists
RUN <<EOF
if [ $(ls /app/.wheels/lxml*.whl 2> /dev/null | wc -l) -gt 0 ]; then
echo "Installing lxml from local wheel"
pip install /app/.wheels/lxml*.whl
else
echo "No local wheel for lxml found, installing from PyPI"
pip install lxml
fi
EOF

# Use pip cache to speed up builds
RUN --mount=type=cache,target=/root/.cache/pip \
pip install -r requirements.txt -t packages


# Start from a slim Python 3.12 image for a small final image size
FROM python:3.12-slim as final

# Set non-interactive mode
ENV DEBIAN_FRONTEND=noninteractive

# Prevent docker from cleaning up the apt cache in the final image
RUN rm -f /etc/apt/apt.conf.d/docker-clean

ARG TARGETPLATFORM

# Copy built packages from the previous stage
Expand All @@ -43,8 +51,10 @@ COPY --from=builder /app/packages /app/packages
# Update and install runtime dependencies if necessary, with cache separated by architecture
RUN --mount=type=cache,target=/var/cache/apt,id=apt-cache-${TARGETPLATFORM} \
--mount=type=cache,target=/var/lib/apt,id=apt-lib-${TARGETPLATFORM} \
apt-get update && \
apt-get install -y libxml2 libxslt1.1 libtk8.6
apt-get update && apt-get install -y \
libxml2 \
libxslt1.1 \
libtk8.6

WORKDIR /app

Expand All @@ -53,6 +63,6 @@ ENV PYTHONPATH=/app/packages:$PYTHONPATH
# Copy the rest of the application's source code into the working directory
COPY . .

VOLUME [ "/app/cache"]
VOLUME ["/app/cache"]

ENTRYPOINT [ "python", "main.py" ]
ENTRYPOINT ["python", "main.py"]
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ coloredlogs==15.0.1
requests==2.31.0
tqdm==4.66.2
trafilatura==1.8.1
lxml==5.1.0

0 comments on commit e27b222

Please sign in to comment.