diff --git a/.github/workflows/kcpp-build-release-win-full-cu12.yaml b/.github/workflows/kcpp-build-release-win-full-cu12.yaml index 1490996af..77e1a86ab 100644 --- a/.github/workflows/kcpp-build-release-win-full-cu12.yaml +++ b/.github/workflows/kcpp-build-release-win-full-cu12.yaml @@ -13,7 +13,7 @@ env: jobs: windows: - runs-on: windows-2019 + runs-on: windows-2022 steps: - name: Clone id: checkout @@ -35,6 +35,39 @@ jobs: python -m pip install --upgrade pip pip install customtkinter==5.2.0 pyinstaller==5.11.0 psutil==5.9.5 + - name: Display full Visual Studio info Before + run: | + & "C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -all -products * -format json + shell: pwsh + + - name: Visual Studio 2019 Reinstall + shell: cmd + run: | + @echo off + echo Preparing setup + curl -fLO https://download.visualstudio.microsoft.com/download/pr/1fbe074b-8ae1-4e9b-8e83-d1ce4200c9d1/61098e228df7ba3a6a8b4e920a415ad8878d386de6dd0f23f194fe1a55db189a/vs_Enterprise.exe + vs_Enterprise.exe --quiet --add Microsoft.VisualStudio.Workload.VCTools --add Microsoft.VisualStudio.Component.VC.CLI.Support --add Microsoft.VisualStudio.Component.Windows10SDK.19041 --add Microsoft.VisualStudio.Workload.UniversalBuildTools --add Microsoft.VisualStudio.Component.VC.CMake.Project + echo Waiting for VS2019 setup + set "ProcessName=setup.exe" + :CheckProcess + tasklist /FI "IMAGENAME eq %ProcessName%" | find /I "%ProcessName%" >nul + if %errorlevel%==0 ( + ping 127.0.0.1 /n 5 >nul + goto CheckProcess + ) + echo VS2019 Setup completed + exit /b 0 + + - name: Disable Visual Studio 2022 by Renaming + run: | + Rename-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise" "Enterprise_DISABLED" + shell: pwsh + + - name: Display full Visual Studio info After + run: | + & "C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -all -products * -format json + shell: pwsh + - name: Download and install win64devkit run: | curl -L https://github.com/skeeto/w64devkit/releases/download/v1.22.0/w64devkit-1.22.0.zip --output w64devkit.zip diff --git a/.github/workflows/kcpp-build-release-win-full.yaml b/.github/workflows/kcpp-build-release-win-full.yaml index c5590a9ed..3a029f4c0 100644 --- a/.github/workflows/kcpp-build-release-win-full.yaml +++ b/.github/workflows/kcpp-build-release-win-full.yaml @@ -35,6 +35,39 @@ jobs: python -m pip install --upgrade pip pip install customtkinter==5.2.0 pyinstaller==5.11.0 psutil==5.9.5 + - name: Display full Visual Studio info Before + run: | + & "C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -all -products * -format json + shell: pwsh + + - name: Visual Studio 2019 Reinstall + shell: cmd + run: | + @echo off + echo Preparing setup + curl -fLO https://download.visualstudio.microsoft.com/download/pr/1fbe074b-8ae1-4e9b-8e83-d1ce4200c9d1/61098e228df7ba3a6a8b4e920a415ad8878d386de6dd0f23f194fe1a55db189a/vs_Enterprise.exe + vs_Enterprise.exe --quiet --add Microsoft.VisualStudio.Workload.VCTools --add Microsoft.VisualStudio.Component.VC.CLI.Support --add Microsoft.VisualStudio.Component.Windows10SDK.19041 --add Microsoft.VisualStudio.Workload.UniversalBuildTools --add Microsoft.VisualStudio.Component.VC.CMake.Project + echo Waiting for VS2019 setup + set "ProcessName=setup.exe" + :CheckProcess + tasklist /FI "IMAGENAME eq %ProcessName%" | find /I "%ProcessName%" >nul + if %errorlevel%==0 ( + ping 127.0.0.1 /n 5 >nul + goto CheckProcess + ) + echo VS2019 Setup completed + exit /b 0 + + - name: Disable Visual Studio 2022 by Renaming + run: | + Rename-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise" "Enterprise_DISABLED" + shell: pwsh + + - name: Display full Visual Studio info After + run: | + & "C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -all -products * -format json + shell: pwsh + - name: Download and install win64devkit run: | curl -L https://github.com/skeeto/w64devkit/releases/download/v1.22.0/w64devkit-1.22.0.zip --output w64devkit.zip @@ -64,28 +97,12 @@ jobs: cuda: '11.4.4' use-github-cache: false - - name: Install Visual Studio 2019 - shell: cmd - run: | - echo off - echo Downloading Visual Studio 2019 Setup - curl -fLO https://download.visualstudio.microsoft.com/download/pr/1fbe074b-8ae1-4e9b-8e83-d1ce4200c9d1/61098e228df7ba3a6a8b4e920a415ad8878d386de6dd0f23f194fe1a55db189a/vs_Enterprise.exe - echo Running Visual Studio 2019 Setup - vs_Enterprise.exe --quiet --wait --norestart --nocache ^ - --add Microsoft.VisualStudio.Workload.VCTools ^ - --add Microsoft.VisualStudio.Component.VC.CLI.Support ^ - --add Microsoft.VisualStudio.Component.Windows10SDK.19041 ^ - --add Microsoft.VisualStudio.Workload.UniversalBuildTools ^ - --add Microsoft.VisualStudio.Component.VC.CMake.Project - - - name: Build CUDA with Visual Studio 2019 + - name: Build CUDA id: cmake_build - shell: cmd run: | - call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat" mkdir build cd build - cmake .. -G "Visual Studio 16 2019" -A x64 -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0" + cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0" cmake --build . --config Release -j 2 cd .. diff --git a/.github/workflows/kcpp-build-release-win-oldcpu-full.yaml b/.github/workflows/kcpp-build-release-win-oldcpu-full.yaml index e12771e63..2b0365024 100644 --- a/.github/workflows/kcpp-build-release-win-oldcpu-full.yaml +++ b/.github/workflows/kcpp-build-release-win-oldcpu-full.yaml @@ -13,7 +13,7 @@ env: jobs: windows: - runs-on: windows-2019 + runs-on: windows-2022 steps: - name: Clone id: checkout @@ -35,6 +35,39 @@ jobs: python -m pip install --upgrade pip pip install customtkinter==5.2.0 pyinstaller==5.11.0 psutil==5.9.5 + - name: Display full Visual Studio info Before + run: | + & "C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -all -products * -format json + shell: pwsh + + - name: Visual Studio 2019 Reinstall + shell: cmd + run: | + @echo off + echo Preparing setup + curl -fLO https://download.visualstudio.microsoft.com/download/pr/1fbe074b-8ae1-4e9b-8e83-d1ce4200c9d1/61098e228df7ba3a6a8b4e920a415ad8878d386de6dd0f23f194fe1a55db189a/vs_Enterprise.exe + vs_Enterprise.exe --quiet --add Microsoft.VisualStudio.Workload.VCTools --add Microsoft.VisualStudio.Component.VC.CLI.Support --add Microsoft.VisualStudio.Component.Windows10SDK.19041 --add Microsoft.VisualStudio.Workload.UniversalBuildTools --add Microsoft.VisualStudio.Component.VC.CMake.Project + echo Waiting for VS2019 setup + set "ProcessName=setup.exe" + :CheckProcess + tasklist /FI "IMAGENAME eq %ProcessName%" | find /I "%ProcessName%" >nul + if %errorlevel%==0 ( + ping 127.0.0.1 /n 5 >nul + goto CheckProcess + ) + echo VS2019 Setup completed + exit /b 0 + + - name: Disable Visual Studio 2022 by Renaming + run: | + Rename-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise" "Enterprise_DISABLED" + shell: pwsh + + - name: Display full Visual Studio info After + run: | + & "C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -all -products * -format json + shell: pwsh + - name: Download and install win64devkit run: | curl -L https://github.com/skeeto/w64devkit/releases/download/v1.22.0/w64devkit-1.22.0.zip --output w64devkit.zip diff --git a/CMakeLists.txt b/CMakeLists.txt index 614efc7ac..33f9a1876 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,8 @@ set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS 1) set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Release") set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) +set(LLAMA_STANDALONE ON) +set(BUILD_SHARED_LIBS_DEFAULT ON) set(LLAMA_STATIC OFF) set(LLAMA_NATIVE OFF) set(LLAMA_LTO OFF) @@ -79,6 +81,8 @@ set(GGML_V2_LEGACY_CUDA_SOURCES otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_ if (LLAMA_CUBLAS) + cmake_minimum_required(VERSION 3.17) + find_package(CUDAToolkit) if (CUDAToolkit_FOUND) message(STATUS "cuBLAS found") @@ -212,6 +216,10 @@ endif() if (WIN32) add_compile_definitions(_CRT_SECURE_NO_WARNINGS) + + if (BUILD_SHARED_LIBS) + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) + endif() endif() if (LLAMA_LTO) diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index 12850b111..aee587b60 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -122,7 +122,7 @@ constexpr int ggml_cuda_highest_compiled_arch(const int arch) { } #else static int ggml_cuda_highest_compiled_arch(const int arch) { - return arch; + return (arch>__CUDA_ARCH__?__CUDA_ARCH__:arch); } #endif // __CUDA_ARCH_LIST__ diff --git a/ggml/src/ggml-cuda/fattn.cu b/ggml/src/ggml-cuda/fattn.cu index 8d1f1a8f6..b809329b8 100644 --- a/ggml/src/ggml-cuda/fattn.cu +++ b/ggml/src/ggml-cuda/fattn.cu @@ -15,21 +15,26 @@ static void ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1(ggml_backend_cuda_con if constexpr (ncols2 <= 8) { if (Q->ne[1] <= 8/ncols2) { + printf("\nCase B: %d %d %d %d %d\n",DKQ,DV,8/ncols2,ncols2,Q->ne[1]); ggml_cuda_flash_attn_ext_mma_f16_case(ctx, dst); return; } } if (Q->ne[1] <= 16/ncols2) { + printf("\nCase C: %d %d %d %d %d\n",DKQ,DV,16/ncols2,ncols2,Q->ne[1]); ggml_cuda_flash_attn_ext_mma_f16_case(ctx, dst); return; } - if (ggml_cuda_highest_compiled_arch(cc) == GGML_CUDA_CC_TURING || Q->ne[1] <= 32/ncols2) { + if (ggml_cuda_highest_compiled_arch(cc) <= GGML_CUDA_CC_TURING || Q->ne[1] <= 32/ncols2) { + printf("\nCase D: %d %d %d %d %d\n",DKQ,DV,32/ncols2,ncols2,Q->ne[1]); ggml_cuda_flash_attn_ext_mma_f16_case(ctx, dst); return; } + printf("\nDBG: %d %d %d\n",ggml_cuda_highest_compiled_arch(cc),cc,GGML_CUDA_CC_TURING); + printf("\nCase E: %d %d %d %d %d\n",DKQ,DV,64/ncols2,ncols2,Q->ne[1]); ggml_cuda_flash_attn_ext_mma_f16_case(ctx, dst); } @@ -47,7 +52,7 @@ static void ggml_cuda_flash_attn_ext_mma_f16_switch_ncols2(ggml_backend_cuda_con GGML_ASSERT(Q->ne[2] % K->ne[2] == 0); const int gqa_ratio = Q->ne[2] / K->ne[2]; - + printf("\ngqa_ratio is %d\n",gqa_ratio); if (use_gqa_opt && gqa_ratio % 8 == 0) { ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1(ctx, dst); return; @@ -72,7 +77,7 @@ static void ggml_cuda_flash_attn_ext_mma_f16(ggml_backend_cuda_context & ctx, gg const ggml_tensor * K = dst->src[1]; const ggml_tensor * V = dst->src[2]; const ggml_tensor * mask = dst->src[3]; - + printf("\nQ->ne[0] is %d\n",Q->ne[0]); switch (Q->ne[0]) { case 64: GGML_ASSERT(V->ne[0] == 64);