diff --git a/docs/backend/SYCL.md b/docs/backend/SYCL.md index 1b86b3d4a..7ebb4ec02 100644 --- a/docs/backend/SYCL.md +++ b/docs/backend/SYCL.md @@ -51,6 +51,12 @@ The packages for FP32 and FP16 would have different accuracy and performance on ## News +- 2026.04 + + - Optimize mul_mat by reorder feature for data type: Q4_K, Q5_K, Q_K, Q8_0. + - Fused MoE. + - Upgrate CI and built package for oneAPI 2025.3.3, support Ubuntu 24.04 built package. + - 2026.03 - Support Flash-Attention: less memory usage, performance impact depends on LLM. @@ -349,6 +355,12 @@ Choose one of following methods to run. ./examples/sycl/test.sh ``` +- Run llama-server: + +```sh +./examples/sycl/start-svr.sh -m PATH/MODEL_FILE +``` + 2. Command line Launch inference @@ -637,10 +649,18 @@ Choose one of following methods to run. 1. Script +- Run test: + ``` examples\sycl\win-test.bat ``` +- Run llama-server: + +``` +examples\sycl\win-start-svr.bat -m PATH\MODEL_FILE +``` + 2. Command line Launch inference diff --git a/examples/sycl/start-svr.sh b/examples/sycl/start-svr.sh new file mode 100755 index 000000000..55cd0210f --- /dev/null +++ b/examples/sycl/start-svr.sh @@ -0,0 +1,124 @@ +#!/bin/bash + +# MIT license +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: MIT + +Help() { + cat << EOF +Usage: $(basename "$0") [OPTIONS] + +This script processes files with specified options. + +Options: + -h, --help Display this help message and exit. + -c, --context Set context length. Bigger need more memory. + -p, --promote Prompt to start generation with. + -m, --model Full model file path. + -mg,--main-gpu Set main GPU ID (0 - n) for single GPU mode. + -sm,--split-mode How to split the model across multiple GPUs, one of: + - none: use one GPU only + - layer (default): split layers and KV across GPUs + - row: split rows across GPUs + -ngl,--n-gpu-layers Max. number of layers to store in VRAM (default: -1) + -lv,--log-verbosity Set the verbosity threshold. Messages with a higher verbosity will be + ignored. Values: + - 0: generic output + - 1: error + - 2: warning + - 3: info + - 4: debug + + +EOF +} + +BIN_FILE=./build/bin/llama-server +SEED=0 +GPUS_SETTING="" + +MODEL_FILE=../models/Qwen3.5-4B-Q4_0.gguf +NGL=99 +CONTEXT=4096 +GGML_SYCL_DEVICE=-1 +SPLIT_MODE=layer +LOG_VERBOSE=3 +while [[ $# -gt 0 ]]; do + case "$1" in + -c|--context) + CONTEXT=$2 + # Shift twice to consume both the option flag and its value + shift + shift + ;; + -m|--model) + MODEL_FILE="$2" + # Shift twice to consume both the option flag and its value + shift + shift + ;; + -mg|--main-gpu) + GGML_SYCL_DEVICE=$2 + SPLIT_MODE=none + # Shift twice to consume both the option flag and its value + shift + shift + ;; + -sm|--split-mode) + SPLIT_MODE=$2 + # Shift twice to consume both the option flag and its value + shift + shift + ;; + -ngl|--n-gpu-layers) + NGL=$2 + # Shift twice to consume both the option flag and its value + shift + shift + ;; + -lv|--log-verbosity) + LOG_VERBOSE=$2 + # Shift twice to consume both the option flag and its value + shift + shift + ;; + -h|--help) + Help + exit 0 + ;; + *) + # Handle unknown options or stop processing options + echo "Invalid option: $1" + # Optional: exit script or shift to treat remaining as positional args + exit 1 + ;; + esac +done + + + +source /opt/intel/oneapi/setvars.sh + +#export GGML_SYCL_DEBUG=1 + +#ZES_ENABLE_SYSMAN=1, Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory. Recommended to use when --split-mode = layer. + +#support malloc device memory more than 4GB. +export UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=1 +echo "UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=${UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS}" + +if [ $GGML_SYCL_DEVICE -ne -1 ]; then + echo "Use $GGML_SYCL_DEVICE as main GPU" + #use signle GPU only + GPUS_SETTING="-mg $GGML_SYCL_DEVICE -sm ${SPLIT_MODE}" + export ONEAPI_DEVICE_SELECTOR="level_zero:${$GGML_SYCL_DEVICE}" + echo "ONEAPI_DEVICE_SELECTOR=${ONEAPI_DEVICE_SELECTOR}" +else + echo "Use all Intel GPUs, including iGPU & dGPU" + GPUS_SETTING="-sm ${SPLIT_MODE}" + fi + +echo "run cmd: ZES_ENABLE_SYSMAN=1 ${BIN_FILE} -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 200 -e -ngl ${NGL} -s ${SEED} -c ${CONTEXT} ${GPUS_SETTING} -lv ${LOG_VERBOSE} --mmap " +ZES_ENABLE_SYSMAN=1 ${BIN_FILE} -m ${MODEL_FILE} -ngl ${NGL} -s ${SEED} -c ${CONTEXT} ${GPUS_SETTING} -lv ${LOG_VERBOSE} --mmap --host 0.0.0.0 --port 8000 + + diff --git a/examples/sycl/test.sh b/examples/sycl/test.sh index 140c19146..14dcac56a 100755 --- a/examples/sycl/test.sh +++ b/examples/sycl/test.sh @@ -38,7 +38,7 @@ SEED=0 GPUS_SETTING="" INPUT_PROMPT="Building a website can be done in 10 simple steps:\nStep 1:" -MODEL_FILE=models/llama-2-7b.Q4_0.gguf +MODEL_FILE=../models/llama-2-7b.Q4_0.gguf NGL=99 CONTEXT=4096 GGML_SYCL_DEVICE=-1 @@ -122,9 +122,10 @@ if [ $GGML_SYCL_DEVICE -ne -1 ]; then export ONEAPI_DEVICE_SELECTOR="level_zero:${$GGML_SYCL_DEVICE}" echo "ONEAPI_DEVICE_SELECTOR=${ONEAPI_DEVICE_SELECTOR}" else - echo "Use all Intel GPUs, including iGPU & dGPU" + echo "Use all Intel GPUs, including iGPU & dGPU" + GPUS_SETTING="-sm ${SPLIT_MODE}" fi -echo "run cmd: ZES_ENABLE_SYSMAN=1 ${BIN_FILE} -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s ${SEED} -c ${CONTEXT} ${GPUS_SETTING} -lv ${LOG_VERBOSE} --mmap " -ZES_ENABLE_SYSMAN=1 ${BIN_FILE} -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s ${SEED} -c ${CONTEXT} ${GPUS_SETTING} -lv ${LOG_VERBOSE} --mmap +echo "run cmd: ZES_ENABLE_SYSMAN=1 ${BIN_FILE} -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 200 -e -ngl ${NGL} -s ${SEED} -c ${CONTEXT} ${GPUS_SETTING} -lv ${LOG_VERBOSE} --mmap " +ZES_ENABLE_SYSMAN=1 ${BIN_FILE} -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 200 -e -ngl ${NGL} -s ${SEED} -c ${CONTEXT} ${GPUS_SETTING} -lv ${LOG_VERBOSE} --mmap diff --git a/examples/sycl/win-start-svr.bat b/examples/sycl/win-start-svr.bat new file mode 100644 index 000000000..4d850cbaa --- /dev/null +++ b/examples/sycl/win-start-svr.bat @@ -0,0 +1,179 @@ +:: MIT license +:: Copyright (C) 2024 Intel Corporation +:: SPDX-License-Identifier: MIT + +@echo off +setlocal EnableExtensions EnableDelayedExpansion + +set "BIN_FILE=.\build\bin\llama-server.exe" +set "SEED=0" +set "GPUS_SETTING=" + +set "MODEL_FILE=..\models\Qwen3.5-4B-Q4_0.gguf" +set "NGL=99" +set "CONTEXT=4096" +set "GGML_SYCL_DEVICE=-1" +set "SPLIT_MODE=layer" +set "LOG_VERBOSE=3" + +if "%~1"=="" goto after_args + +:parse_args +if "%~1"=="" goto after_args + +if /I "%~1"=="-c" ( + if "%~2"=="" goto missing_value + set "CONTEXT=%~2" + shift + shift + goto parse_args +) +if /I "%~1"=="--context" ( + if "%~2"=="" goto missing_value + set "CONTEXT=%~2" + shift + shift + goto parse_args +) + +if /I "%~1"=="-m" ( + if "%~2"=="" goto missing_value + set "MODEL_FILE=%~2" + shift + shift + goto parse_args +) +if /I "%~1"=="--model" ( + if "%~2"=="" goto missing_value + set "MODEL_FILE=%~2" + shift + shift + goto parse_args +) + +if /I "%~1"=="-mg" ( + if "%~2"=="" goto missing_value + set "GGML_SYCL_DEVICE=%~2" + set "SPLIT_MODE=none" + shift + shift + goto parse_args +) +if /I "%~1"=="--main-gpu" ( + if "%~2"=="" goto missing_value + set "GGML_SYCL_DEVICE=%~2" + set "SPLIT_MODE=none" + shift + shift + goto parse_args +) + +if /I "%~1"=="-sm" ( + if "%~2"=="" goto missing_value + set "SPLIT_MODE=%~2" + shift + shift + goto parse_args +) +if /I "%~1"=="--split-mode" ( + if "%~2"=="" goto missing_value + set "SPLIT_MODE=%~2" + shift + shift + goto parse_args +) + +if /I "%~1"=="-ngl" ( + if "%~2"=="" goto missing_value + set "NGL=%~2" + shift + shift + goto parse_args +) +if /I "%~1"=="--n-gpu-layers" ( + if "%~2"=="" goto missing_value + set "NGL=%~2" + shift + shift + goto parse_args +) + +if /I "%~1"=="-lv" ( + if "%~2"=="" goto missing_value + set "LOG_VERBOSE=%~2" + shift + shift + goto parse_args +) +if /I "%~1"=="--log-verbosity" ( + if "%~2"=="" goto missing_value + set "LOG_VERBOSE=%~2" + shift + shift + goto parse_args +) + +if /I "%~1"=="-h" goto help +if /I "%~1"=="--help" goto help + +echo Invalid option: %~1 +exit /b 1 + +:missing_value +echo Missing value for option: %~1 +exit /b 1 + +:help +echo Usage: %~n0 [OPTIONS] +echo. +echo This script processes files with specified options. +echo. +echo Options: +echo -h, --help Display this help message and exit. +echo -c, --context ^ Set context length. Bigger need more memory. +echo -m, --model ^ Full model file path. +echo -mg,--main-gpu ^ Set main GPU ID (0 - n) for single GPU mode. +echo -sm,--split-mode ^ How to split the model across multiple GPUs, one of: +echo - none: use one GPU only +echo - layer (default): split layers and KV across GPUs +echo - row: split rows across GPUs +echo -ngl,--n-gpu-layers ^ Max. number of layers to store in VRAM (default: -1) +echo -lv,--log-verbosity ^ Set the verbosity threshold. Messages with a higher verbosity will be +echo ignored. Values: +echo - 0: generic output +echo - 1: error +echo - 2: warning +echo - 3: info +echo - 4: debug +exit /b 0 + +:after_args + +REM In Windows CMD, source is not available; call oneAPI setvars if present. +if exist "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" ( + call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" >nul +) else ( + echo Warning: oneAPI setvars.bat not found. Continuing without environment setup. +) + +REM Support malloc device memory more than 4GB. +set "UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=1" +echo UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=%UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS% + +if not "%GGML_SYCL_DEVICE%"=="-1" ( + echo Use %GGML_SYCL_DEVICE% as main GPU + REM Use single GPU only. + set "GPUS_SETTING=-mg %GGML_SYCL_DEVICE% -sm %SPLIT_MODE%" + set "ONEAPI_DEVICE_SELECTOR=level_zero:%GGML_SYCL_DEVICE%" + echo ONEAPI_DEVICE_SELECTOR=%ONEAPI_DEVICE_SELECTOR% +) else ( + echo Use all Intel GPUs, including iGPU ^& dGPU + set "GPUS_SETTING=-sm %SPLIT_MODE%" +) + +echo run cmd: ZES_ENABLE_SYSMAN=1 %BIN_FILE% -m "%MODEL_FILE%" -ngl %NGL% -s %SEED% -c %CONTEXT% %GPUS_SETTING% -lv %LOG_VERBOSE% --mmap --host 0.0.0.0 --port 8000 +set "ZES_ENABLE_SYSMAN=1" +%BIN_FILE% -m "%MODEL_FILE%" -ngl %NGL% -s %SEED% -c %CONTEXT% %GPUS_SETTING% -lv %LOG_VERBOSE% --mmap --host 0.0.0.0 --port 8000 + +endlocal + diff --git a/examples/sycl/win-test.bat b/examples/sycl/win-test.bat index 1f2dab8d0..781d17705 100644 --- a/examples/sycl/win-test.bat +++ b/examples/sycl/win-test.bat @@ -2,10 +2,200 @@ :: Copyright (C) 2024 Intel Corporation :: SPDX-License-Identifier: MIT -set INPUT2="Building a website can be done in 10 simple steps:\nStep 1:" -@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force -:: support malloc device memory more than 4GB. -set UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=1 -set LOAD_MODE="--mmap" -.\build\bin\llama-completion.exe -m models\llama-2-7b.Q4_0.gguf -no-cnv -p %INPUT2% -n 400 -e -ngl 99 -s 0 %LOAD_MODE% +@echo off +setlocal EnableExtensions EnableDelayedExpansion + +REM MIT license +REM Copyright (C) 2024 Intel Corporation +REM SPDX-License-Identifier: MIT + +set "BIN_FILE=.\build\bin\llama-completion.exe" +set "SEED=0" +set "GPUS_SETTING=" + +set "INPUT_PROMPT=Building a website can be done in 10 simple steps:^nStep 1:" +set "MODEL_FILE=..\models\llama-2-7b.Q4_0.gguf" +set "NGL=99" +set "CONTEXT=4096" +set "GGML_SYCL_DEVICE=-1" +set "SPLIT_MODE=layer" +set "LOG_VERBOSE=3" + +if "%~1"=="" goto after_args + +:parse_args +if "%~1"=="" goto after_args + +if /I "%~1"=="-c" ( + if "%~2"=="" goto missing_value + set "CONTEXT=%~2" + shift + shift + goto parse_args +) +if /I "%~1"=="--context" ( + if "%~2"=="" goto missing_value + set "CONTEXT=%~2" + shift + shift + goto parse_args +) + +if /I "%~1"=="-p" ( + if "%~2"=="" goto missing_value + set "INPUT_PROMPT=%~2" + shift + shift + goto parse_args +) +if /I "%~1"=="--promote" ( + if "%~2"=="" goto missing_value + set "INPUT_PROMPT=%~2" + shift + shift + goto parse_args +) + +if /I "%~1"=="-m" ( + if "%~2"=="" goto missing_value + set "MODEL_FILE=%~2" + shift + shift + goto parse_args +) +if /I "%~1"=="--model" ( + if "%~2"=="" goto missing_value + set "MODEL_FILE=%~2" + shift + shift + goto parse_args +) + +if /I "%~1"=="-mg" ( + if "%~2"=="" goto missing_value + set "GGML_SYCL_DEVICE=%~2" + set "SPLIT_MODE=none" + shift + shift + goto parse_args +) +if /I "%~1"=="--main-gpu" ( + if "%~2"=="" goto missing_value + set "GGML_SYCL_DEVICE=%~2" + set "SPLIT_MODE=none" + shift + shift + goto parse_args +) + +if /I "%~1"=="-sm" ( + if "%~2"=="" goto missing_value + set "SPLIT_MODE=%~2" + shift + shift + goto parse_args +) +if /I "%~1"=="--split-mode" ( + if "%~2"=="" goto missing_value + set "SPLIT_MODE=%~2" + shift + shift + goto parse_args +) + +if /I "%~1"=="-ngl" ( + if "%~2"=="" goto missing_value + set "NGL=%~2" + shift + shift + goto parse_args +) +if /I "%~1"=="--n-gpu-layers" ( + if "%~2"=="" goto missing_value + set "NGL=%~2" + shift + shift + goto parse_args +) + +if /I "%~1"=="-lv" ( + if "%~2"=="" goto missing_value + set "LOG_VERBOSE=%~2" + shift + shift + goto parse_args +) +if /I "%~1"=="--log-verbosity" ( + if "%~2"=="" goto missing_value + set "LOG_VERBOSE=%~2" + shift + shift + goto parse_args +) + +if /I "%~1"=="-h" goto help +if /I "%~1"=="--help" goto help + +echo Invalid option: %~1 +exit /b 1 + +:missing_value +echo Missing value for option: %~1 +exit /b 1 + +:help +echo Usage: %~n0 [OPTIONS] +echo. +echo This script processes files with specified options. +echo. +echo Options: +echo -h, --help Display this help message and exit. +echo -c, --context ^ Set context length. Bigger need more memory. +echo -p, --promote ^ Prompt to start generation with. +echo -m, --model ^ Full model file path. +echo -mg,--main-gpu ^ Set main GPU ID (0 - n) for single GPU mode. +echo -sm,--split-mode ^ How to split the model across multiple GPUs, one of: +echo - none: use one GPU only +echo - layer (default): split layers and KV across GPUs +echo - row: split rows across GPUs +echo -ngl,--n-gpu-layers ^ Max. number of layers to store in VRAM (default: -1) +echo -lv,--log-verbosity ^ Set the verbosity threshold. Messages with a higher verbosity will be +echo ignored. Values: +echo - 0: generic output +echo - 1: error +echo - 2: warning +echo - 3: info +echo - 4: debug +exit /b 0 + +:after_args + +REM In Windows CMD, source is not available; call oneAPI setvars if present. +if exist "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" ( + call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" >nul +) else ( + echo Warning: oneAPI setvars.bat not found. Continuing without environment setup. +) + +REM Support malloc device memory more than 4GB. +set "UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=1" +echo UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=%UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS% + +if not "%GGML_SYCL_DEVICE%"=="-1" ( + echo Use %GGML_SYCL_DEVICE% as main GPU + REM Use single GPU only. + set "GPUS_SETTING=-mg %GGML_SYCL_DEVICE% -sm %SPLIT_MODE%" + set "ONEAPI_DEVICE_SELECTOR=level_zero:%GGML_SYCL_DEVICE%" + echo ONEAPI_DEVICE_SELECTOR=%ONEAPI_DEVICE_SELECTOR% +) else ( + echo Use all Intel GPUs, including iGPU ^& dGPU + set "GPUS_SETTING=-sm %SPLIT_MODE%" +) + +echo run cmd: ZES_ENABLE_SYSMAN=1 %BIN_FILE% -m %MODEL_FILE% -no-cnv -p "%INPUT_PROMPT%" -n 200 -e -ngl %NGL% -s %SEED% -c %CONTEXT% %GPUS_SETTING% -lv %LOG_VERBOSE% --mmap +set "ZES_ENABLE_SYSMAN=1" +%BIN_FILE% -m "%MODEL_FILE%" -no-cnv -p "%INPUT_PROMPT%" -n 200 -e -ngl %NGL% -s %SEED% -c %CONTEXT% %GPUS_SETTING% -lv %LOG_VERBOSE% --mmap + +endlocal + diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp index 0101b2764..5abf22906 100644 --- a/ggml/src/ggml-sycl/common.hpp +++ b/ggml/src/ggml-sycl/common.hpp @@ -224,7 +224,7 @@ struct sycl_device_info { // cudaOccupancyMaxActiveBlocksPerMultiprocessor bool vmm; // virtual memory support size_t total_vram; - //sycl_hw_info hw_info; \\ device id and aarch, currently not used + sycl_hw_info hw_info; optimize_feature opt_feature; }; diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 36923160d..1eead625e 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -104,6 +104,7 @@ static ggml_sycl_device_info ggml_sycl_init() { info.max_work_group_sizes[i] = prop.get_max_work_group_size(); info.devices[i].max_wg_per_cu = info.max_work_group_sizes[i] / prop.get_max_compute_units(); + info.devices[i].hw_info = get_device_hw_info(&device); } @@ -3703,9 +3704,16 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor // Dispatch becomes obscure with the reorder, MMVQ when the reorder optimization // is enabled takes precedence over DMMV, the current if-else implementation // requires disabling DMMV if both conditions are met + if (!g_ggml_sycl_prioritize_dmmv && ((should_reorder_tensor(ctx, dst) && ggml_sycl_supports_reorder_mmvq(src0->type)))) { - use_dequantize_mul_mat_vec = use_dequantize_mul_mat_vec && !use_mul_mat_vec_q; + // Arc770 get benefit with Q4_0 by skipping it. + if (!(ggml_sycl_info().devices[ctx.device].hw_info.arch == + gpu_arch::intel_gpu_acm_g10 && + src0->type == GGML_TYPE_Q4_0)) { + use_dequantize_mul_mat_vec = + use_dequantize_mul_mat_vec && !use_mul_mat_vec_q; + } } if (!split && src0->type == GGML_TYPE_F16 && ggml_is_permuted(src0) && ggml_is_permuted(src1) && src1->ne[1] == 1) { diff --git a/ggml/src/ggml-sycl/sycl_hw.cpp b/ggml/src/ggml-sycl/sycl_hw.cpp index 704114003..03b0c37a3 100644 --- a/ggml/src/ggml-sycl/sycl_hw.cpp +++ b/ggml/src/ggml-sycl/sycl_hw.cpp @@ -1,15 +1,67 @@ #include "sycl_hw.hpp" -// TODO: currently not used -/* -sycl_hw_info get_device_hw_info(sycl::device *device_ptr) { - sycl_hw_info res; - int32_t id = device_ptr->get_info(); - res.device_id = id; +using namespace std; - syclex::architecture arch = device_ptr->get_info(); - res.arch = arch; - - return res; -} +/*defined in +* /opt/intel/oneapi/compiler/latest/include/sycl/ext/oneapi/experimental/device_architecture.def */ +static map> arch2name = { + {gpu_arch::intel_gpu_bdw, {"intel_gpu_bdw", GPU_FAMILY_IGPU_NON_XE}}, + {gpu_arch::intel_gpu_skl, {"intel_gpu_skl", GPU_FAMILY_IGPU_NON_XE}}, + {gpu_arch::intel_gpu_kbl, {"intel_gpu_kbl", GPU_FAMILY_IGPU_NON_XE}}, + {gpu_arch::intel_gpu_cfl, {"intel_gpu_cfl", GPU_FAMILY_IGPU_NON_XE}}, + {gpu_arch::intel_gpu_apl, {"intel_gpu_apl", GPU_FAMILY_IGPU_NON_XE}}, + {gpu_arch::intel_gpu_glk, {"intel_gpu_glk", GPU_FAMILY_IGPU_NON_XE}}, + {gpu_arch::intel_gpu_whl, {"intel_gpu_whl", GPU_FAMILY_IGPU_NON_XE}}, + {gpu_arch::intel_gpu_aml, {"intel_gpu_aml", GPU_FAMILY_IGPU_NON_XE}}, + {gpu_arch::intel_gpu_cml, {"intel_gpu_cml", GPU_FAMILY_IGPU_NON_XE}}, + {gpu_arch::intel_gpu_icllp, {"intel_gpu_icllp", GPU_FAMILY_IGPU_NON_XE}}, + {gpu_arch::intel_gpu_ehl, {"intel_gpu_ehl", GPU_FAMILY_IGPU_NON_XE}}, + {gpu_arch::intel_gpu_tgllp, {"intel_gpu_tgllp", GPU_FAMILY_IGPU_NON_XE}}, + {gpu_arch::intel_gpu_rkl, {"intel_gpu_rkl", GPU_FAMILY_IGPU_NON_XE}}, + {gpu_arch::intel_gpu_adl_s, {"intel_gpu_adl_s", GPU_FAMILY_IGPU_NON_XE}}, + {gpu_arch::intel_gpu_adl_p, {"intel_gpu_adl_p", GPU_FAMILY_IGPU_NON_XE}}, + {gpu_arch::intel_gpu_adl_n, {"intel_gpu_adl_n", GPU_FAMILY_IGPU_NON_XE}}, + {gpu_arch::intel_gpu_dg1, {"intel_gpu_dg1", GPU_FAMILY_DGPU_CLIENT_GAME}}, + {gpu_arch::intel_gpu_acm_g10, {"intel_gpu_acm_g10", GPU_FAMILY_DGPU_CLIENT_GAME}}, + {gpu_arch::intel_gpu_acm_g11, {"intel_gpu_acm_g11", GPU_FAMILY_DGPU_CLIENT_GAME}}, + {gpu_arch::intel_gpu_acm_g12, {"intel_gpu_acm_g12", GPU_FAMILY_DGPU_CLIENT_GAME}}, + {gpu_arch::intel_gpu_pvc, {"intel_gpu_pvc", GPU_FAMILY_DGPU_CLOUD}}, + {gpu_arch::intel_gpu_pvc_vg, {"intel_gpu_pvc_vg", GPU_FAMILY_DGPU_CLOUD}}, + {gpu_arch::intel_gpu_mtl_u, {"intel_gpu_mtl_u", GPU_FAMILY_IGPU_XE}}, + {gpu_arch::intel_gpu_mtl_h, {"intel_gpu_mtl_h", GPU_FAMILY_IGPU_XE}}, + {gpu_arch::intel_gpu_arl_h, {"intel_gpu_arl_h", GPU_FAMILY_IGPU_XE}}, + {gpu_arch::intel_gpu_bmg_g21, {"intel_gpu_bmg_g21", GPU_FAMILY_DGPU_CLIENT_GAME}}, + {gpu_arch::intel_gpu_bmg_g31, {"intel_gpu_bmg_g31", GPU_FAMILY_DGPU_CLIENT_GAME}}, + {gpu_arch::intel_gpu_lnl_m, {"intel_gpu_lnl_m", GPU_FAMILY_IGPU_XE}}, + {gpu_arch::intel_gpu_ptl_h, {"intel_gpu_ptl_h", GPU_FAMILY_IGPU_XE}}, + {gpu_arch::intel_gpu_ptl_u, {"intel_gpu_ptl_u", GPU_FAMILY_IGPU_XE}}, + {gpu_arch::intel_gpu_wcl, {"intel_gpu_wcl", GPU_FAMILY_IGPU_XE}} +}; + + +sycl_hw_info get_device_hw_info(sycl::device* device_ptr) { + sycl_hw_info res; + int32_t id = + device_ptr->get_info(); + res.device_id = id; + + res.name = device_ptr->get_info(); + + syclex::architecture arch = + device_ptr->get_info(); + res.arch = arch; + + map>::iterator it = + arch2name.find(res.arch); + if (it != arch2name.end()) { + res.arch_name = it->second.first; + res.gpu_family = it->second.second; + } else { + res.arch_name = "unknown"; + res.gpu_family = GPU_FAMILY_UKNOWN; + } + + return res; +} diff --git a/ggml/src/ggml-sycl/sycl_hw.hpp b/ggml/src/ggml-sycl/sycl_hw.hpp index 36b140bf0..a5d204625 100644 --- a/ggml/src/ggml-sycl/sycl_hw.hpp +++ b/ggml/src/ggml-sycl/sycl_hw.hpp @@ -9,18 +9,30 @@ #include namespace syclex = sycl::ext::oneapi::experimental; +using gpu_arch = sycl::ext::oneapi::experimental::architecture; -// TODO: currently not used -/* -struct sycl_hw_info { - syclex::architecture arch; - int32_t device_id; +// It's used to mark the GPU computing capacity +// The value must flow the order of performance. +enum sycl_intel_gpu_family { + GPU_FAMILY_UKNOWN = -1, + // iGPU without Xe core, before Meteor Lake iGPU(Xe) + GPU_FAMILY_IGPU_NON_XE = 0, + // iGPU with Xe core, Meteor Lake iGPU or newer. + GPU_FAMILY_IGPU_XE = 1, + // dGPU for gaming in client/data center (DG1/FLex 140 or newer). + GPU_FAMILY_DGPU_CLIENT_GAME = 2, + // dGPU for AI in cloud, PVC or newer. + GPU_FAMILY_DGPU_CLOUD = 3 }; -bool is_in_vector(std::vector &vec, int item); +struct sycl_hw_info { + syclex::architecture arch; + const char* arch_name; + int32_t device_id; + std::string name; + sycl_intel_gpu_family gpu_family; +}; sycl_hw_info get_device_hw_info(sycl::device *device_ptr); -*/ - #endif // SYCL_HW_HPP