diff --git a/ggml-opencl.cpp b/ggml-opencl.cpp index 6153ca69e..47567b43b 100644 --- a/ggml-opencl.cpp +++ b/ggml-opencl.cpp @@ -1812,6 +1812,10 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor * cl_kernel* to_fp32_cl = ggml_get_to_fp32_cl(type); cl_kernel* dmmv = ggml_get_dequantize_mul_mat_vec_cl(type); + if(to_fp32_cl==nullptr) + { + printf("\nOpenCL: Unsupported Tensor Type Detected: %d\n",type); + } GGML_ASSERT(to_fp32_cl != nullptr); const size_t global_denom = ggml_cl_global_denom(type); diff --git a/src/llama.cpp b/src/llama.cpp index 784711598..dc9c4c0ea 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -5607,12 +5607,14 @@ static bool llm_load_tensors( int64_t i_gpu_start = std::max((int64_t) hparams.n_layer - n_gpu_layers, (int64_t) 0); bool use_mmap_buffer = true; + #if defined(GGML_USE_CLBLAST) if(clblast_offload_fallback_mode) { printf("\nOpenCL GPU Offload Fallback..."); clblast_offload_fallback_layers = n_gpu_layers; i_gpu_start = std::max((int64_t) hparams.n_layer, (int64_t) 0); } + #endif // there is very little benefit to offloading the input layer, so always keep it on the CPU model.buft_input = llama_default_buffer_type_cpu(true); diff --git a/src/unicode.cpp b/src/unicode.cpp index 8692924b9..bacd0129f 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -189,9 +189,19 @@ static std::unordered_map unicode_utf8_to_byte_map() { return map; } +static bool unicode_wstring_from_utf8_failed_once = false; static inline std::wstring unicode_wstring_from_utf8(const std::string & s) { std::wstring_convert> conv; - return conv.from_bytes(s); + try { + return conv.from_bytes(s); + } catch(const std::exception & e) { + if(!unicode_wstring_from_utf8_failed_once) + { + unicode_wstring_from_utf8_failed_once = true; + printf("\nunicode_wstring_from_utf8 failed: %s\n", e.what()); + } + return L""; + } } static std::vector unicode_byte_encoding_process(const std::vector & bpe_words) {