mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
workaround for deepseek not working
This commit is contained in:
parent
6b0756506b
commit
388a2aff00
3 changed files with 17 additions and 1 deletions
|
@ -1812,6 +1812,10 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
|
||||||
|
|
||||||
cl_kernel* to_fp32_cl = ggml_get_to_fp32_cl(type);
|
cl_kernel* to_fp32_cl = ggml_get_to_fp32_cl(type);
|
||||||
cl_kernel* dmmv = ggml_get_dequantize_mul_mat_vec_cl(type);
|
cl_kernel* dmmv = ggml_get_dequantize_mul_mat_vec_cl(type);
|
||||||
|
if(to_fp32_cl==nullptr)
|
||||||
|
{
|
||||||
|
printf("\nOpenCL: Unsupported Tensor Type Detected: %d\n",type);
|
||||||
|
}
|
||||||
GGML_ASSERT(to_fp32_cl != nullptr);
|
GGML_ASSERT(to_fp32_cl != nullptr);
|
||||||
|
|
||||||
const size_t global_denom = ggml_cl_global_denom(type);
|
const size_t global_denom = ggml_cl_global_denom(type);
|
||||||
|
|
|
@ -5607,12 +5607,14 @@ static bool llm_load_tensors(
|
||||||
int64_t i_gpu_start = std::max((int64_t) hparams.n_layer - n_gpu_layers, (int64_t) 0);
|
int64_t i_gpu_start = std::max((int64_t) hparams.n_layer - n_gpu_layers, (int64_t) 0);
|
||||||
bool use_mmap_buffer = true;
|
bool use_mmap_buffer = true;
|
||||||
|
|
||||||
|
#if defined(GGML_USE_CLBLAST)
|
||||||
if(clblast_offload_fallback_mode)
|
if(clblast_offload_fallback_mode)
|
||||||
{
|
{
|
||||||
printf("\nOpenCL GPU Offload Fallback...");
|
printf("\nOpenCL GPU Offload Fallback...");
|
||||||
clblast_offload_fallback_layers = n_gpu_layers;
|
clblast_offload_fallback_layers = n_gpu_layers;
|
||||||
i_gpu_start = std::max((int64_t) hparams.n_layer, (int64_t) 0);
|
i_gpu_start = std::max((int64_t) hparams.n_layer, (int64_t) 0);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// there is very little benefit to offloading the input layer, so always keep it on the CPU
|
// there is very little benefit to offloading the input layer, so always keep it on the CPU
|
||||||
model.buft_input = llama_default_buffer_type_cpu(true);
|
model.buft_input = llama_default_buffer_type_cpu(true);
|
||||||
|
|
|
@ -189,9 +189,19 @@ static std::unordered_map<std::string, uint8_t> unicode_utf8_to_byte_map() {
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool unicode_wstring_from_utf8_failed_once = false;
|
||||||
static inline std::wstring unicode_wstring_from_utf8(const std::string & s) {
|
static inline std::wstring unicode_wstring_from_utf8(const std::string & s) {
|
||||||
std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
|
std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
|
||||||
return conv.from_bytes(s);
|
try {
|
||||||
|
return conv.from_bytes(s);
|
||||||
|
} catch(const std::exception & e) {
|
||||||
|
if(!unicode_wstring_from_utf8_failed_once)
|
||||||
|
{
|
||||||
|
unicode_wstring_from_utf8_failed_once = true;
|
||||||
|
printf("\nunicode_wstring_from_utf8 failed: %s\n", e.what());
|
||||||
|
}
|
||||||
|
return L"";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::vector<std::string> unicode_byte_encoding_process(const std::vector<std::string> & bpe_words) {
|
static std::vector<std::string> unicode_byte_encoding_process(const std::vector<std::string> & bpe_words) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue