mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-10 15:29:39 +00:00
[feature] add github action for pre compile
This commit is contained in:
parent
412055d450
commit
1db4a67dca
5 changed files with 396 additions and 4 deletions
252
.github/workflows/package_wheel_release.yml
vendored
Normal file
252
.github/workflows/package_wheel_release.yml
vendored
Normal file
|
@ -0,0 +1,252 @@
|
||||||
|
name: Build Wheels
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
release:
|
||||||
|
description: 'Release? 1 = yes, 0 = no'
|
||||||
|
default: '0'
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
jobs:
|
||||||
|
build_wheels:
|
||||||
|
name: ${{ matrix.os }} Python=${{ matrix.pyver }} CUDA=${{ matrix.cuda }} CPU_INSTRUCT=${{ matrix.instruct }} Torch=${{ matrix.torch }}
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
# Ubuntu
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
|
||||||
|
# Windows
|
||||||
|
- { os: windows-2022, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.12', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.12', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.12', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.12', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.12', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.12', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.12', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.12', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.11', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.11', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.11', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.11', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.11', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.11', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.11', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.11', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.11', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.10', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.10', cuda: '12.4.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.10', cuda: '12.2.2', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.10', cuda: '12.1.1', torch: '2.4.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.10', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.10', cuda: '12.5.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.10', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.10', cuda: '12.4.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
- { os: windows-2022, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.10', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX512', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.10', cuda: '12.1.1', torch: '2.3.0', cudaarch: '8.0;8.6;8.7;8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '121'}
|
||||||
|
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: pwsh
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Free Disk Space
|
||||||
|
uses: jlumbroso/free-disk-space@v1.3.1
|
||||||
|
if: runner.os == 'Linux'
|
||||||
|
with:
|
||||||
|
tool-cache: true
|
||||||
|
android: true
|
||||||
|
dotnet: true
|
||||||
|
haskell: true
|
||||||
|
large-packages: false
|
||||||
|
swap-storage: true
|
||||||
|
|
||||||
|
- uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.pyver }}
|
||||||
|
|
||||||
|
- name: check_space
|
||||||
|
run: |
|
||||||
|
if($IsLinux) {df -h}
|
||||||
|
if($IsWindows) {Get-PSDrive -PSProvider 'FileSystem'}
|
||||||
|
|
||||||
|
- uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: 20
|
||||||
|
|
||||||
|
- name: Setup Mamba
|
||||||
|
if: matrix.cuda != ''
|
||||||
|
uses: conda-incubator/setup-miniconda@v2.3.0
|
||||||
|
with:
|
||||||
|
activate-environment: "ktransformers"
|
||||||
|
python-version: ${{ matrix.pyver }}
|
||||||
|
miniforge-variant: Mambaforge
|
||||||
|
miniforge-version: latest
|
||||||
|
use-mamba: true
|
||||||
|
add-pip-as-python-dependency: true
|
||||||
|
auto-activate-base: false
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
- name: build web
|
||||||
|
run: |
|
||||||
|
cd ktransformers/website/
|
||||||
|
npm install
|
||||||
|
npm run build
|
||||||
|
cd ../../
|
||||||
|
|
||||||
|
- name: build for cuda
|
||||||
|
if: matrix.cuda != ''
|
||||||
|
run: |
|
||||||
|
git submodule init
|
||||||
|
git submodule update
|
||||||
|
if($IsWindows){
|
||||||
|
$originalPath = Get-Location
|
||||||
|
Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
||||||
|
Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -DevCmdArguments '-arch=x64 -host_arch=x64'
|
||||||
|
$env:DISTUTILS_USE_SDK=1
|
||||||
|
Set-Location $originalPath
|
||||||
|
}
|
||||||
|
$cudaVersion = '${{ matrix.cuda }}'
|
||||||
|
$env:MAMBA_NO_LOW_SPEED_LIMIT = 1
|
||||||
|
mamba install -y -c nvidia/label/cuda-$cudaVersion cuda-toolkit cuda-runtime
|
||||||
|
$env:CUDA_PATH = $env:CONDA_PREFIX
|
||||||
|
$env:CUDA_HOME = $env:CONDA_PREFIX
|
||||||
|
if ($IsLinux) {
|
||||||
|
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
|
||||||
|
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib/python${{ matrix.pyver }}/site-packages/nvidia/nvjitlink/lib:' + $env:LD_LIBRARY_PATH
|
||||||
|
if (!(Test-Path $env:CUDA_HOME/lib64)) {
|
||||||
|
New-Item -ItemType SymbolicLink -Path $env:CUDA_HOME/lib64 -Target $env:CUDA_HOME/lib
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ($IsWindows) {
|
||||||
|
$env:CUDA_PATH = "$env:CUDA_PATH/Library"
|
||||||
|
$env:CUDA_HOME = $env:CUDA_PATH
|
||||||
|
$env:PATH = "$env:CUDA_PATH/bin;" + $env:PATH
|
||||||
|
cp $env:CUDA_PATH/lib/*.lib $env:CUDA_PATH/lib/x64/
|
||||||
|
$env:INCLUDE =$env:CUDA_PATH + "/include/targets/x64;" + $env:INCLUDE
|
||||||
|
|
||||||
|
}
|
||||||
|
python -m pip install torch==${{ matrix.torch }} torchvision torchaudio --index-url https://download.pytorch.org/whl/cu${{ matrix.torch_cu }}
|
||||||
|
python -m pip install cpufeature build wheel ninja packaging setuptools
|
||||||
|
$env:KTRANSFORMERS_FORCE_BUILD = "TRUE"
|
||||||
|
$env:CPU_INSTRUCT = '${{ matrix.instruct }}'
|
||||||
|
$env:TORCH_CUDA_ARCH_LIST = '${{ matrix.cudaarch }}'
|
||||||
|
python -m build --no-isolation --verbose
|
||||||
|
|
||||||
|
|
||||||
|
- name: create Rlease dir
|
||||||
|
run: |
|
||||||
|
if ($IsWindows) {
|
||||||
|
$env:date = $(Get-Date -Format "yyyy-MM-dd")
|
||||||
|
New-Item -ItemType Directory -Force -Path "$Env:USERPROFILE\.ssh"
|
||||||
|
$Env:SSH_PATH = "$Env:USERPROFILE\.ssh\id_rsa"
|
||||||
|
Set-Content -Path $Env:SSH_PATH -Value "${{ secrets.SSH_PRIVATE_KEY }}"
|
||||||
|
(Get-Content -Path $Env:SSH_PATH).Replace("`r`n","`n") | Set-Content -Path $Env:SSH_PATH
|
||||||
|
chmod 600 $Env:SSH_PATH
|
||||||
|
}
|
||||||
|
if ($IsLinux) {
|
||||||
|
$env:date = $(date +%Y-%m-%d)
|
||||||
|
mkdir -p ~/.ssh/
|
||||||
|
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa
|
||||||
|
chmod 600 ~/.ssh/id_rsa
|
||||||
|
}
|
||||||
|
|
||||||
|
ssh -p ${{ secrets.SSH_PORT }} -o StrictHostKeyChecking=no root@${{ secrets.SSH_SERVER }} "mkdir -p /mnt/data/release-$env:date"
|
||||||
|
scp -P ${{ secrets.SSH_PORT }} -o StrictHostKeyChecking=no dist/*.whl root@${{ secrets.SSH_SERVER }}:/mnt/data/release-$env:date/
|
132
.github/workflows/package_wheel_test.yml
vendored
Normal file
132
.github/workflows/package_wheel_test.yml
vendored
Normal file
|
@ -0,0 +1,132 @@
|
||||||
|
name: Build Wheels
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
release:
|
||||||
|
description: 'Release? 1 = yes, 0 = no'
|
||||||
|
default: '0'
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
jobs:
|
||||||
|
build_wheels:
|
||||||
|
name: ${{ matrix.os }} Python=${{ matrix.pyver }} CUDA=${{ matrix.cuda }} CPU_INSTRUCT=${{ matrix.instruct }} Torch=${{ matrix.torch }}
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
# Ubuntu
|
||||||
|
- { os: ubuntu-20.04, pyver: '3.12', cuda: '12.2.2', torch: '2.3.0', cudaarch: '8.9;9.0+PTX', instruct: 'FANCY', torch_cu: '121'}
|
||||||
|
- { os: windows-2022, pyver: '3.11', cuda: '12.5.1', torch: '2.4.0', cudaarch: '8.9;9.0+PTX', instruct: 'AVX2', torch_cu: '124'}
|
||||||
|
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: pwsh
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Free Disk Space
|
||||||
|
uses: jlumbroso/free-disk-space@v1.3.1
|
||||||
|
if: runner.os == 'Linux'
|
||||||
|
with:
|
||||||
|
tool-cache: true
|
||||||
|
android: true
|
||||||
|
dotnet: true
|
||||||
|
haskell: true
|
||||||
|
large-packages: false
|
||||||
|
swap-storage: true
|
||||||
|
|
||||||
|
- uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.pyver }}
|
||||||
|
|
||||||
|
- name: check_space
|
||||||
|
run: |
|
||||||
|
if($IsLinux) {df -h}
|
||||||
|
if($IsWindows) {Get-PSDrive -PSProvider 'FileSystem'}
|
||||||
|
|
||||||
|
- uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: 20
|
||||||
|
|
||||||
|
- name: Setup Mamba
|
||||||
|
if: matrix.cuda != ''
|
||||||
|
uses: conda-incubator/setup-miniconda@v2.3.0
|
||||||
|
with:
|
||||||
|
activate-environment: "ktransformers"
|
||||||
|
python-version: ${{ matrix.pyver }}
|
||||||
|
miniforge-variant: Mambaforge
|
||||||
|
miniforge-version: latest
|
||||||
|
use-mamba: true
|
||||||
|
add-pip-as-python-dependency: true
|
||||||
|
auto-activate-base: false
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
- name: build web
|
||||||
|
run: |
|
||||||
|
cd ktransformers/website/
|
||||||
|
npm install
|
||||||
|
npm run build
|
||||||
|
cd ../../
|
||||||
|
|
||||||
|
- name: build for cuda
|
||||||
|
if: matrix.cuda != ''
|
||||||
|
run: |
|
||||||
|
git submodule init
|
||||||
|
git submodule update
|
||||||
|
if($IsWindows){
|
||||||
|
$originalPath = Get-Location
|
||||||
|
Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
||||||
|
Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -DevCmdArguments '-arch=x64 -host_arch=x64'
|
||||||
|
$env:DISTUTILS_USE_SDK=1
|
||||||
|
Set-Location $originalPath
|
||||||
|
}
|
||||||
|
$cudaVersion = '${{ matrix.cuda }}'
|
||||||
|
$env:MAMBA_NO_LOW_SPEED_LIMIT = 1
|
||||||
|
mamba install -y -c nvidia/label/cuda-$cudaVersion cuda-toolkit cuda-runtime
|
||||||
|
$env:CUDA_PATH = $env:CONDA_PREFIX
|
||||||
|
$env:CUDA_HOME = $env:CONDA_PREFIX
|
||||||
|
if ($IsLinux) {
|
||||||
|
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
|
||||||
|
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib/python${{ matrix.pyver }}/site-packages/nvidia/nvjitlink/lib:' + $env:LD_LIBRARY_PATH
|
||||||
|
if (!(Test-Path $env:CUDA_HOME/lib64)) {
|
||||||
|
New-Item -ItemType SymbolicLink -Path $env:CUDA_HOME/lib64 -Target $env:CUDA_HOME/lib
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ($IsWindows) {
|
||||||
|
$env:CUDA_PATH = "$env:CUDA_PATH/Library"
|
||||||
|
$env:CUDA_HOME = $env:CUDA_PATH
|
||||||
|
$env:PATH = "$env:CUDA_PATH/bin;" + $env:PATH
|
||||||
|
cp $env:CUDA_PATH/lib/*.lib $env:CUDA_PATH/lib/x64/
|
||||||
|
$env:INCLUDE =$env:CUDA_PATH + "/include/targets/x64;" + $env:INCLUDE
|
||||||
|
|
||||||
|
}
|
||||||
|
python -m pip install torch==${{ matrix.torch }} torchvision torchaudio --index-url https://download.pytorch.org/whl/cu${{ matrix.torch_cu }}
|
||||||
|
python -m pip install cpufeature build wheel ninja packaging setuptools
|
||||||
|
$env:KTRANSFORMERS_FORCE_BUILD = "TRUE"
|
||||||
|
$env:CPU_INSTRUCT = '${{ matrix.instruct }}'
|
||||||
|
$env:TORCH_CUDA_ARCH_LIST = '${{ matrix.cudaarch }}'
|
||||||
|
python -m build --no-isolation --verbose
|
||||||
|
|
||||||
|
|
||||||
|
- name: create Rlease dir
|
||||||
|
run: |
|
||||||
|
if ($IsWindows) {
|
||||||
|
$env:date = $(Get-Date -Format "yyyy-MM-dd")
|
||||||
|
New-Item -ItemType Directory -Force -Path "$Env:USERPROFILE\.ssh"
|
||||||
|
$Env:SSH_PATH = "$Env:USERPROFILE\.ssh\id_rsa"
|
||||||
|
Set-Content -Path $Env:SSH_PATH -Value "${{ secrets.SSH_PRIVATE_KEY }}"
|
||||||
|
(Get-Content -Path $Env:SSH_PATH).Replace("`r`n","`n") | Set-Content -Path $Env:SSH_PATH
|
||||||
|
chmod 600 $Env:SSH_PATH
|
||||||
|
}
|
||||||
|
if ($IsLinux) {
|
||||||
|
$env:date = $(date +%Y-%m-%d)
|
||||||
|
mkdir -p ~/.ssh/
|
||||||
|
echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa
|
||||||
|
chmod 600 ~/.ssh/id_rsa
|
||||||
|
}
|
||||||
|
|
||||||
|
ssh -p ${{ secrets.SSH_PORT }} -o StrictHostKeyChecking=no root@${{ secrets.SSH_SERVER }} "mkdir -p /mnt/data/release-$env:date"
|
||||||
|
scp -P ${{ secrets.SSH_PORT }} -o StrictHostKeyChecking=no dist/*.whl root@${{ secrets.SSH_SERVER }}:/mnt/data/release-$env:date/
|
|
@ -1 +1 @@
|
||||||
__version__ = "0.1.1"
|
__version__ = "0.1.2"
|
13
setup.py
13
setup.py
|
@ -6,7 +6,7 @@ Author : chenxl
|
||||||
Date : 2024-07-27 16:15:27
|
Date : 2024-07-27 16:15:27
|
||||||
Version : 1.0.0
|
Version : 1.0.0
|
||||||
LastEditors : chenxl
|
LastEditors : chenxl
|
||||||
LastEditTime : 2024-08-08 02:45:15
|
LastEditTime : 2024-08-14 16:36:19
|
||||||
Adapted from:
|
Adapted from:
|
||||||
https://github.com/Dao-AILab/flash-attention/blob/v2.6.3/setup.py
|
https://github.com/Dao-AILab/flash-attention/blob/v2.6.3/setup.py
|
||||||
Copyright (c) 2023, Tri Dao.
|
Copyright (c) 2023, Tri Dao.
|
||||||
|
@ -299,6 +299,15 @@ setup(
|
||||||
'ktransformers/ktransformers_ext/cuda/custom_gguf/dequant.cu',
|
'ktransformers/ktransformers_ext/cuda/custom_gguf/dequant.cu',
|
||||||
'ktransformers/ktransformers_ext/cuda/binding.cpp',
|
'ktransformers/ktransformers_ext/cuda/binding.cpp',
|
||||||
'ktransformers/ktransformers_ext/cuda/gptq_marlin/gptq_marlin.cu'
|
'ktransformers/ktransformers_ext/cuda/gptq_marlin/gptq_marlin.cu'
|
||||||
])
|
],
|
||||||
|
extra_compile_args={
|
||||||
|
'cxx': ['-O3'],
|
||||||
|
'nvcc': [
|
||||||
|
'-O3',
|
||||||
|
'--use_fast_math',
|
||||||
|
'-Xcompiler', '-fPIC',
|
||||||
|
]
|
||||||
|
}
|
||||||
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
1
third_party/llamafile/sgemm.cpp
vendored
1
third_party/llamafile/sgemm.cpp
vendored
|
@ -94,7 +94,6 @@ static const struct GemmFuncs {
|
||||||
#if defined(__FMA__) || (defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__)))
|
#if defined(__FMA__) || (defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__)))
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
#if defined(__AVX512F__)
|
#if defined(__AVX512F__)
|
||||||
printf("__AVX512F__\n");
|
|
||||||
#if defined(__AVX512VL__) && defined(__AVX512BW__) && defined(__AVX512DQ__) && defined(__AVX512VNNI__) && defined(__AVX512BF16__)
|
#if defined(__AVX512VL__) && defined(__AVX512BW__) && defined(__AVX512DQ__) && defined(__AVX512VNNI__) && defined(__AVX512BF16__)
|
||||||
// AMD Zen4+ (2023-)
|
// AMD Zen4+ (2023-)
|
||||||
sgemm = llamafile_sgemm_amd_zen4;
|
sgemm = llamafile_sgemm_amd_zen4;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue