mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-09 22:05:30 +00:00
[feature] add github action for pre compile
This commit is contained in:
parent
412055d450
commit
1db4a67dca
5 changed files with 396 additions and 4 deletions
252
.github/workflows/package_wheel_release.yml
vendored
Normal file
252
.github/workflows/package_wheel_release.yml
vendored
Normal file
|
@ -0,0 +1,252 @@
|
|||
name: Build Wheels
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
release:
|
||||
description: 'Release? 1 = yes, 0 = no'
|
||||
default: '0'
|
||||
required: true
|
||||
type: string
|
||||
jobs:
|
||||
build_wheels:
|
||||
name: ${{ matrix.os }} Python=${{ matrix.pyver }} CUDA=${{ matrix.cuda }} CPU_INSTRUCT=${{ matrix.instruct }} Torch=${{ matrix.torch }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
# Ubuntu
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
|
||||
# Windows
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: pwsh
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Free Disk Space
|
||||
uses: jlumbroso/free-disk-space@v1.3.1
|
||||
if: runner.os == 'Linux'
|
||||
with:
|
||||
tool-cache: true
|
||||
android: true
|
||||
dotnet: true
|
||||
haskell: true
|
||||
large-packages: false
|
||||
swap-storage: true
|
||||
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.pyver }}
|
||||
|
||||
- name: check_space
|
||||
run: |
|
||||
if($IsLinux) {df -h}
|
||||
if($IsWindows) {Get-PSDrive -PSProvider 'FileSystem'}
|
||||
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 20
|
||||
|
||||
- name: Setup Mamba
|
||||
if: matrix.cuda != ''
|
||||
uses: conda-incubator/setup-miniconda@v2.3.0
|
||||
with:
|
||||
activate-environment: "ktransformers"
|
||||
python-version: ${{ matrix.pyver }}
|
||||
miniforge-variant: Mambaforge
|
||||
miniforge-version: latest
|
||||
use-mamba: true
|
||||
add-pip-as-python-dependency: true
|
||||
auto-activate-base: false
|
||||
|
||||
|
||||
|
||||
- name: build web
|
||||
run: |
|
||||
cd ktransformers/website/
|
||||
npm install
|
||||
npm run build
|
||||
cd ../../
|
||||
|
||||
- name: build for cuda
|
||||
if: matrix.cuda != ''
|
||||
run: |
|
||||
git submodule init
|
||||
git submodule update
|
||||
if($IsWindows){
|
||||
$originalPath = Get-Location
|
||||
Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
||||
Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -DevCmdArguments '-arch=x64 -host_arch=x64'
|
||||
$env:DISTUTILS_USE_SDK=1
|
||||
Set-Location $originalPath
|
||||
}
|
||||
$cudaVersion = '${{ matrix.cuda }}'
|
||||
$env:MAMBA_NO_LOW_SPEED_LIMIT = 1
|
||||
mamba install -y -c nvidia/label/cuda-$cudaVersion cuda-toolkit cuda-runtime
|
||||
$env:CUDA_PATH = $env:CONDA_PREFIX
|
||||
$env:CUDA_HOME = $env:CONDA_PREFIX
|
||||
if ($IsLinux) {
|
||||
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
|
||||
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib/python${{ matrix.pyver }}/site-packages/nvidia/nvjitlink/lib:' + $env:LD_LIBRARY_PATH
|
||||
if (!(Test-Path $env:CUDA_HOME/lib64)) {
|
||||
New-Item -ItemType SymbolicLink -Path $env:CUDA_HOME/lib64 -Target $env:CUDA_HOME/lib
|
||||
}
|
||||
}
|
||||
if ($IsWindows) {
|
||||
$env:CUDA_PATH = "$env:CUDA_PATH/Library"
|
||||
$env:CUDA_HOME = $env:CUDA_PATH
|
||||
$env:PATH = "$env:CUDA_PATH/bin;" + $env:PATH
|
||||
cp $env:CUDA_PATH/lib/*.lib $env:CUDA_PATH/lib/x64/
|
||||
$env:INCLUDE =$env:CUDA_PATH + "/include/targets/x64;" + $env:INCLUDE
|
||||
|
||||
}
|
||||
python -m pip install torch==${{ matrix.torch }} torchvision torchaudio --index-url https://download.pytorch.org/whl/cu${{ matrix.torch_cu }}
|
||||
python -m pip install cpufeature build wheel ninja packaging setuptools
|
||||
$env:KTRANSFORMERS_FORCE_BUILD = "TRUE"
|
||||
$env:CPU_INSTRUCT = '${{ matrix.instruct }}'
|
||||
$env:TORCH_CUDA_ARCH_LIST = '${{ matrix.cudaarch }}'
|
||||
python -m build --no-isolation --verbose
|
||||
|
||||
|
||||
- name: create Rlease dir
|
||||
run: |
|
||||
if ($IsWindows) {
|
||||
$env:date = $(Get-Date -Format "yyyy-MM-dd")
|
||||
New-Item -ItemType Directory -Force -Path "$Env:USERPROFILE\.ssh"
|
||||
$Env:SSH_PATH = "$Env:USERPROFILE\.ssh\id_rsa"
|
||||
Set-Content -Path $Env:SSH_PATH -Value "${{ secrets.SSH_PRIVATE_KEY }}"
|
||||
(Get-Content -Path $Env:SSH_PATH).Replace("`r`n","`n") | Set-Content -Path $Env:SSH_PATH
|
||||
chmod 600 $Env:SSH_PATH
|
||||
}
|
||||
if ($IsLinux) {
|
||||
$env:date = $(date +%Y-%m-%d)
|
||||
mkdir -p ~/.ssh/
|
||||
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa
|
||||
chmod 600 ~/.ssh/id_rsa
|
||||
}
|
||||
|
||||
ssh -p ${{ secrets.SSH_PORT }} -o StrictHostKeyChecking=no root@${{ secrets.SSH_SERVER }} "mkdir -p /mnt/data/release-$env:date"
|
||||
scp -P ${{ secrets.SSH_PORT }} -o StrictHostKeyChecking=no dist/*.whl root@${{ secrets.SSH_SERVER }}:/mnt/data/release-$env:date/
|
132
.github/workflows/package_wheel_test.yml
vendored
Normal file
132
.github/workflows/package_wheel_test.yml
vendored
Normal file
|
@ -0,0 +1,132 @@
|
|||
name: Build Wheels
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
release:
|
||||
description: 'Release? 1 = yes, 0 = no'
|
||||
default: '0'
|
||||
required: true
|
||||
type: string
|
||||
jobs:
|
||||
build_wheels:
|
||||
name: ${{ matrix.os }} Python=${{ matrix.pyver }} CUDA=${{ matrix.cuda }} CPU_INSTRUCT=${{ matrix.instruct }} Torch=${{ matrix.torch }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
# Ubuntu
|
||||
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||
- { os: windows-2022, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: pwsh
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Free Disk Space
|
||||
uses: jlumbroso/free-disk-space@v1.3.1
|
||||
if: runner.os == 'Linux'
|
||||
with:
|
||||
tool-cache: true
|
||||
android: true
|
||||
dotnet: true
|
||||
haskell: true
|
||||
large-packages: false
|
||||
swap-storage: true
|
||||
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.pyver }}
|
||||
|
||||
- name: check_space
|
||||
run: |
|
||||
if($IsLinux) {df -h}
|
||||
if($IsWindows) {Get-PSDrive -PSProvider 'FileSystem'}
|
||||
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 20
|
||||
|
||||
- name: Setup Mamba
|
||||
if: matrix.cuda != ''
|
||||
uses: conda-incubator/setup-miniconda@v2.3.0
|
||||
with:
|
||||
activate-environment: "ktransformers"
|
||||
python-version: ${{ matrix.pyver }}
|
||||
miniforge-variant: Mambaforge
|
||||
miniforge-version: latest
|
||||
use-mamba: true
|
||||
add-pip-as-python-dependency: true
|
||||
auto-activate-base: false
|
||||
|
||||
|
||||
|
||||
- name: build web
|
||||
run: |
|
||||
cd ktransformers/website/
|
||||
npm install
|
||||
npm run build
|
||||
cd ../../
|
||||
|
||||
- name: build for cuda
|
||||
if: matrix.cuda != ''
|
||||
run: |
|
||||
git submodule init
|
||||
git submodule update
|
||||
if($IsWindows){
|
||||
$originalPath = Get-Location
|
||||
Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
||||
Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -DevCmdArguments '-arch=x64 -host_arch=x64'
|
||||
$env:DISTUTILS_USE_SDK=1
|
||||
Set-Location $originalPath
|
||||
}
|
||||
$cudaVersion = '${{ matrix.cuda }}'
|
||||
$env:MAMBA_NO_LOW_SPEED_LIMIT = 1
|
||||
mamba install -y -c nvidia/label/cuda-$cudaVersion cuda-toolkit cuda-runtime
|
||||
$env:CUDA_PATH = $env:CONDA_PREFIX
|
||||
$env:CUDA_HOME = $env:CONDA_PREFIX
|
||||
if ($IsLinux) {
|
||||
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
|
||||
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib/python${{ matrix.pyver }}/site-packages/nvidia/nvjitlink/lib:' + $env:LD_LIBRARY_PATH
|
||||
if (!(Test-Path $env:CUDA_HOME/lib64)) {
|
||||
New-Item -ItemType SymbolicLink -Path $env:CUDA_HOME/lib64 -Target $env:CUDA_HOME/lib
|
||||
}
|
||||
}
|
||||
if ($IsWindows) {
|
||||
$env:CUDA_PATH = "$env:CUDA_PATH/Library"
|
||||
$env:CUDA_HOME = $env:CUDA_PATH
|
||||
$env:PATH = "$env:CUDA_PATH/bin;" + $env:PATH
|
||||
cp $env:CUDA_PATH/lib/*.lib $env:CUDA_PATH/lib/x64/
|
||||
$env:INCLUDE =$env:CUDA_PATH + "/include/targets/x64;" + $env:INCLUDE
|
||||
|
||||
}
|
||||
python -m pip install torch==${{ matrix.torch }} torchvision torchaudio --index-url https://download.pytorch.org/whl/cu${{ matrix.torch_cu }}
|
||||
python -m pip install cpufeature build wheel ninja packaging setuptools
|
||||
$env:KTRANSFORMERS_FORCE_BUILD = "TRUE"
|
||||
$env:CPU_INSTRUCT = '${{ matrix.instruct }}'
|
||||
$env:TORCH_CUDA_ARCH_LIST = '${{ matrix.cudaarch }}'
|
||||
python -m build --no-isolation --verbose
|
||||
|
||||
|
||||
- name: create Rlease dir
|
||||
run: |
|
||||
if ($IsWindows) {
|
||||
$env:date = $(Get-Date -Format "yyyy-MM-dd")
|
||||
New-Item -ItemType Directory -Force -Path "$Env:USERPROFILE\.ssh"
|
||||
$Env:SSH_PATH = "$Env:USERPROFILE\.ssh\id_rsa"
|
||||
Set-Content -Path $Env:SSH_PATH -Value "${{ secrets.SSH_PRIVATE_KEY }}"
|
||||
(Get-Content -Path $Env:SSH_PATH).Replace("`r`n","`n") | Set-Content -Path $Env:SSH_PATH
|
||||
chmod 600 $Env:SSH_PATH
|
||||
}
|
||||
if ($IsLinux) {
|
||||
$env:date = $(date +%Y-%m-%d)
|
||||
mkdir -p ~/.ssh/
|
||||
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa
|
||||
chmod 600 ~/.ssh/id_rsa
|
||||
}
|
||||
|
||||
ssh -p ${{ secrets.SSH_PORT }} -o StrictHostKeyChecking=no root@${{ secrets.SSH_SERVER }} "mkdir -p /mnt/data/release-$env:date"
|
||||
scp -P ${{ secrets.SSH_PORT }} -o StrictHostKeyChecking=no dist/*.whl root@${{ secrets.SSH_SERVER }}:/mnt/data/release-$env:date/
|
|
@ -1 +1 @@
|
|||
__version__ = "0.1.1"
|
||||
__version__ = "0.1.2"
|
13
setup.py
13
setup.py
|
@ -6,7 +6,7 @@ Author : chenxl
|
|||
Date : 2024-07-27 16:15:27
|
||||
Version : 1.0.0
|
||||
LastEditors : chenxl
|
||||
LastEditTime : 2024-08-08 02:45:15
|
||||
LastEditTime : 2024-08-14 16:36:19
|
||||
Adapted from:
|
||||
https://github.com/Dao-AILab/flash-attention/blob/v2.6.3/setup.py
|
||||
Copyright (c) 2023, Tri Dao.
|
||||
|
@ -299,6 +299,15 @@ setup(
|
|||
'ktransformers/ktransformers_ext/cuda/custom_gguf/dequant.cu',
|
||||
'ktransformers/ktransformers_ext/cuda/binding.cpp',
|
||||
'ktransformers/ktransformers_ext/cuda/gptq_marlin/gptq_marlin.cu'
|
||||
])
|
||||
],
|
||||
extra_compile_args={
|
||||
'cxx': ['-O3'],
|
||||
'nvcc': [
|
||||
'-O3',
|
||||
'--use_fast_math',
|
||||
'-Xcompiler', '-fPIC',
|
||||
]
|
||||
}
|
||||
)
|
||||
]
|
||||
)
|
||||
|
|
1
third_party/llamafile/sgemm.cpp
vendored
1
third_party/llamafile/sgemm.cpp
vendored
|
@ -94,7 +94,6 @@ static const struct GemmFuncs {
|
|||
#if defined(__FMA__) || (defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__)))
|
||||
#if defined(__AVX2__)
|
||||
#if defined(__AVX512F__)
|
||||
printf("__AVX512F__\n");
|
||||
#if defined(__AVX512VL__) && defined(__AVX512BW__) && defined(__AVX512DQ__) && defined(__AVX512VNNI__) && defined(__AVX512BF16__)
|
||||
// AMD Zen4+ (2023-)
|
||||
sgemm = llamafile_sgemm_amd_zen4;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue