mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-27 00:14:49 +00:00
sycl : Level Zero detection in ggml_sycl_init (#23097)
* [SYCL] Centralize Level Zero detection in ggml_sycl_init * use the same wording * get back the warning
This commit is contained in:
parent
56f16f235c
commit
bcfd1989e9
2 changed files with 10 additions and 18 deletions
|
|
@ -238,6 +238,8 @@ struct ggml_sycl_device_info {
|
|||
std::array<float, GGML_SYCL_MAX_DEVICES> default_tensor_split = {};
|
||||
|
||||
int max_work_group_sizes[GGML_SYCL_MAX_DEVICES] = {0};
|
||||
|
||||
bool ext_oneapi_level_zero = true; // sycl::backend::ext_oneapi_level_zero used by all enumerated GPU devices
|
||||
};
|
||||
|
||||
const ggml_sycl_device_info & ggml_sycl_info();
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ static ggml_sycl_device_info ggml_sycl_init() {
|
|||
for (int i = 0; i < info.device_count; ++i) {
|
||||
info.devices[i].vmm = 0;
|
||||
dpct::device_info prop;
|
||||
sycl::device device = dpct::dev_mgr::instance().get_device(i);
|
||||
auto & device = dpct::dev_mgr::instance().get_device(i);
|
||||
|
||||
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
|
||||
prop, device)));
|
||||
|
|
@ -117,6 +117,12 @@ static ggml_sycl_device_info ggml_sycl_init() {
|
|||
info.devices[i].max_wg_per_cu = info.max_work_group_sizes[i] / prop.get_max_compute_units();
|
||||
info.devices[i].hw_info = get_device_hw_info(&device);
|
||||
|
||||
// Only check GPU devices; CPU devices use OpenCL and would otherwise
|
||||
// disable Level Zero for the GPUs on systems without ONEAPI_DEVICE_SELECTOR set.
|
||||
if (device.is_gpu() && device.default_queue().get_backend() != sycl::backend::ext_oneapi_level_zero) {
|
||||
GGML_LOG_WARN("SYCL GPU device %d does not use Level Zero backend, disabling Level Zero memory API\n", i);
|
||||
info.ext_oneapi_level_zero = false;
|
||||
}
|
||||
}
|
||||
|
||||
for (int id = 0; id < info.device_count; ++id) {
|
||||
|
|
@ -230,26 +236,10 @@ static void ggml_check_sycl() try {
|
|||
g_ggml_sycl_disable_dnn = get_sycl_env("GGML_SYCL_DISABLE_DNN", 0);
|
||||
g_ggml_sycl_prioritize_dmmv = get_sycl_env("GGML_SYCL_PRIORITIZE_DMMV", 0);
|
||||
#ifdef GGML_SYCL_SUPPORT_LEVEL_ZERO
|
||||
g_ggml_sycl_enable_level_zero = get_sycl_env("GGML_SYCL_ENABLE_LEVEL_ZERO", 1);
|
||||
g_ggml_sycl_enable_level_zero = get_sycl_env("GGML_SYCL_ENABLE_LEVEL_ZERO", ggml_sycl_info().ext_oneapi_level_zero);
|
||||
#else
|
||||
g_ggml_sycl_enable_level_zero = 0;
|
||||
#endif
|
||||
if (g_ggml_sycl_enable_level_zero) {
|
||||
// Verify all GPU devices use the Level Zero backend before enabling L0 APIs.
|
||||
// Only check GPU devices; CPU devices use OpenCL and would otherwise
|
||||
// disable Level Zero for the GPUs on systems without ONEAPI_DEVICE_SELECTOR set.
|
||||
for (unsigned int i = 0; i < dpct::dev_mgr::instance().device_count(); i++) {
|
||||
auto & q = dpct::dev_mgr::instance().get_device(i).default_queue();
|
||||
if (!q.get_device().is_gpu()) {
|
||||
continue;
|
||||
}
|
||||
if (q.get_backend() != sycl::backend::ext_oneapi_level_zero) {
|
||||
GGML_LOG_WARN("SYCL GPU device %d does not use Level Zero backend, disabling Level Zero memory API\n", i);
|
||||
g_ggml_sycl_enable_level_zero = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef SYCL_FLASH_ATTN
|
||||
g_ggml_sycl_enable_flash_attention = get_sycl_env("GGML_SYCL_ENABLE_FLASH_ATTN", 1);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue