workaround for deepseek not working

2025-09-11 09:34:37 +00:00 · 2024-07-05 22:39:53 +08:00 · 2024-07-05 22:39:53 +08:00 · 388a2aff00
commit 388a2aff00
parent 6b0756506b
3 changed files with 17 additions and 1 deletions
--- a/ggml-opencl.cpp
+++ b/ggml-opencl.cpp
@ -1812,6 +1812,10 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
    cl_kernel* to_fp32_cl = ggml_get_to_fp32_cl(type);
    cl_kernel* dmmv = ggml_get_dequantize_mul_mat_vec_cl(type);
    if(to_fp32_cl==nullptr)
    {
        printf("\nOpenCL: Unsupported Tensor Type Detected: %d\n",type);
    }
    GGML_ASSERT(to_fp32_cl != nullptr);
    const size_t global_denom = ggml_cl_global_denom(type);
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -5607,12 +5607,14 @@ static bool llm_load_tensors(
    int64_t i_gpu_start = std::max((int64_t) hparams.n_layer - n_gpu_layers, (int64_t) 0);
    bool use_mmap_buffer = true;
    #if defined(GGML_USE_CLBLAST)
    if(clblast_offload_fallback_mode)
    {
        printf("\nOpenCL GPU Offload Fallback...");
        clblast_offload_fallback_layers = n_gpu_layers;
        i_gpu_start = std::max((int64_t) hparams.n_layer, (int64_t) 0);
    }
    #endif
    // there is very little benefit to offloading the input layer, so always keep it on the CPU
    model.buft_input = llama_default_buffer_type_cpu(true);
--- a/src/unicode.cpp
+++ b/src/unicode.cpp
@ -189,9 +189,19 @@ static std::unordered_map<std::string, uint8_t> unicode_utf8_to_byte_map() {
    return map;
 }
 static bool unicode_wstring_from_utf8_failed_once = false;
 static inline std::wstring unicode_wstring_from_utf8(const std::string & s) {
    std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
-    return conv.from_bytes(s);
+    try {
        return conv.from_bytes(s);
    } catch(const std::exception & e) {
        if(!unicode_wstring_from_utf8_failed_once)
        {
            unicode_wstring_from_utf8_failed_once = true;
            printf("\nunicode_wstring_from_utf8 failed: %s\n", e.what());
        }
        return L"";
    }
 }
 static std::vector<std::string> unicode_byte_encoding_process(const std::vector<std::string> & bpe_words) {