Feature(MInference): add triton-based decoding in case flash_attn is … #10
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This workflows will build and upload a Python Package using Twine when a release is published | |
# Conda-forge bot will pick up new PyPI version and automatically create new version | |
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries | |
name: Release | |
on: | |
push: | |
tags: | |
- v* | |
# Needed to create release and upload assets | |
permissions: | |
contents: write | |
jobs: | |
release: | |
# Retrieve tag and create release | |
name: Create Release | |
runs-on: ubuntu-latest | |
outputs: | |
upload_url: ${{ steps.create_release.outputs.upload_url }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Extract branch info | |
shell: bash | |
run: | | |
echo "release_tag=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV | |
- name: Create Release | |
id: create_release | |
uses: "actions/github-script@v6" | |
env: | |
RELEASE_TAG: ${{ env.release_tag }} | |
with: | |
github-token: "${{ secrets.GITHUB_TOKEN }}" | |
script: | | |
const script = require('.github/workflows/scripts/create_release.js') | |
await script(github, context, core) | |
wheel: | |
name: Build Wheel | |
runs-on: ${{ matrix.os }} | |
needs: release | |
strategy: | |
fail-fast: false | |
matrix: | |
os: ['ubuntu-20.04'] | |
python-version: ['3.8', '3.9', '3.10', '3.11'] | |
pytorch-version: ['2.0.1', '2.1.2', '2.2.2', '2.3.0'] | |
cuda-version: ['11.8.0', '12.2.2'] | |
exclude: | |
# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix | |
# Pytorch < 2.2 does not support Python 3.12 | |
- pytorch-version: '1.12.1' | |
python-version: '3.12' | |
- pytorch-version: '1.13.1' | |
python-version: '3.12' | |
- pytorch-version: '2.0.1' | |
python-version: '3.12' | |
- pytorch-version: '2.1.2' | |
python-version: '3.12' | |
# Pytorch <= 1.12 does not support Python 3.11 | |
- pytorch-version: '1.12.1' | |
python-version: '3.11' | |
# Pytorch >= 2.0 only supports Python >= 3.8 | |
- pytorch-version: '2.0.1' | |
python-version: '3.7' | |
- pytorch-version: '2.1.2' | |
python-version: '3.7' | |
- pytorch-version: '2.2.2' | |
python-version: '3.7' | |
- pytorch-version: '2.3.0' | |
python-version: '3.7' | |
- pytorch-version: '2.4.0.dev20240407' | |
python-version: '3.7' | |
# Pytorch <= 2.0 only supports CUDA <= 11.8 | |
- pytorch-version: '1.12.1' | |
cuda-version: '12.2.2' | |
- pytorch-version: '1.13.1' | |
cuda-version: '12.2.2' | |
- pytorch-version: '2.0.1' | |
cuda-version: '12.2.2' | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Setup ccache | |
uses: hendrikmuhs/ccache-action@v1.2 | |
with: | |
create-symlink: true | |
key: ${{ github.job }}-${{ matrix.python-version }}-${{ matrix.cuda-version }} | |
- name: Set up Linux Env | |
if: ${{ runner.os == 'Linux' }} | |
run: | | |
bash -x .github/workflows/scripts/env.sh | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Set CUDA and PyTorch versions | |
run: | | |
echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV | |
echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.pytorch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV | |
echo "MATRIX_PYTHON_VERSION=$(echo ${{ matrix.python-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV | |
- name: Install CUDA ${{ matrix.cuda-version }} | |
if: ${{ matrix.cuda-version != 'cpu' }} | |
uses: Jimver/cuda-toolkit@v0.2.14 | |
id: cuda-toolkit | |
with: | |
cuda: ${{ matrix.cuda-version }} | |
linux-local-args: '["--toolkit"]' | |
# default method is "local", and we're hitting some error with caching for CUDA 11.8 and 12.1 | |
# method: ${{ (matrix.cuda-version == '11.8.0' || matrix.cuda-version == '12.1.0') && 'network' || 'local' }} | |
method: 'network' | |
# We need the cuda libraries (e.g. cuSparse, cuSolver) for compiling PyTorch extensions, | |
# not just nvcc | |
# sub-packages: '["nvcc"]' | |
- name: Install PyTorch ${{ matrix.pytorch-version }} with CUDA ${{ matrix.cuda-version }} | |
run: | | |
bash -x .github/workflows/scripts/pytorch-install.sh ${{ matrix.python-version }} ${{ matrix.pytorch-version }} ${{ matrix.cuda-version }} | |
- name: Build wheel | |
shell: bash | |
env: | |
CMAKE_BUILD_TYPE: Release # do not compile with debug symbol to reduce wheel size | |
run: | | |
bash -x .github/workflows/scripts/build.sh ${{ matrix.python-version }} ${{ matrix.cuda-version }} bdist_wheel | |
wheel_name=$(ls dist/*whl | xargs -n 1 basename) | |
asset_name=${wheel_name} | |
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV | |
echo "asset_name=${asset_name}" >> $GITHUB_ENV | |
- name: Upload Release Asset | |
uses: actions/upload-release-asset@v1 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
with: | |
upload_url: ${{ needs.release.outputs.upload_url }} | |
asset_path: ./dist/${{ env.wheel_name }} | |
asset_name: ${{ env.asset_name }} | |
asset_content_type: application/* | |
- name: Store the distribution packages | |
uses: actions/upload-artifact@v4 | |
with: | |
name: ${{ env.asset_name }} | |
path: ./dist/${{ env.wheel_name }} | |
publish_package: | |
name: Publish Python 🐍 distribution 📦 to PyPI | |
needs: [release, wheel] | |
runs-on: ${{ matrix.os }} | |
environment: | |
name: pypi | |
url: https://pypi.org/project/minference/ | |
permissions: | |
id-token: write | |
strategy: | |
fail-fast: false | |
matrix: | |
os: ['ubuntu-20.04'] | |
python-version: ['3.10'] | |
pytorch-version: ['2.3.0'] # Must be the most recent version that meets requirements-cuda.txt. | |
cuda-version: ['12.2.2'] | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Setup ccache | |
uses: hendrikmuhs/ccache-action@v1.2 | |
with: | |
create-symlink: true | |
key: ${{ github.job }}-${{ matrix.python-version }}-${{ matrix.cuda-version }} | |
- name: Set up Linux Env | |
if: ${{ runner.os == 'Linux' }} | |
run: | | |
bash -x .github/workflows/scripts/env.sh | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Set CUDA and PyTorch versions | |
run: | | |
echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV | |
echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.pytorch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV | |
echo "MATRIX_PYTHON_VERSION=$(echo ${{ matrix.python-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV | |
- name: Install CUDA ${{ matrix.cuda-version }} | |
if: ${{ matrix.cuda-version != 'cpu' }} | |
uses: Jimver/cuda-toolkit@v0.2.14 | |
id: cuda-toolkit | |
with: | |
cuda: ${{ matrix.cuda-version }} | |
linux-local-args: '["--toolkit"]' | |
# default method is "local", and we're hitting some error with caching for CUDA 11.8 and 12.1 | |
# method: ${{ (matrix.cuda-version == '11.8.0' || matrix.cuda-version == '12.1.0') && 'network' || 'local' }} | |
method: 'network' | |
# We need the cuda libraries (e.g. cuSparse, cuSolver) for compiling PyTorch extensions, | |
# not just nvcc | |
# sub-packages: '["nvcc"]' | |
- name: Install PyTorch ${{ matrix.pytorch-version }} with CUDA ${{ matrix.cuda-version }} | |
run: | | |
bash -x .github/workflows/scripts/pytorch-install.sh ${{ matrix.python-version }} ${{ matrix.pytorch-version }} ${{ matrix.cuda-version }} | |
- name: Build core package | |
run: | | |
bash -x .github/workflows/scripts/build.sh ${{ matrix.python-version }} ${{ matrix.cuda-version }} sdist | |
- name: Display structure of dist files | |
run: ls -R dist/ | |
- name: Publish distribution 📦 to PyPI | |
uses: pypa/gh-action-pypi-publish@release/v1 | |
with: | |
print-hash: true |