Fix bugs in dequant_mul_mat code

This commit is contained in:
0cc4m 2023-05-14 21:14:05 +02:00
parent c77966524a
commit 8795403de3
2 changed files with 16 additions and 17 deletions

View file

@ -1075,7 +1075,7 @@ static void llama_model_load_internal(
ggml_cl_transform_tensor(layer.w3); vram_total += ggml_nbytes(layer.w3);
}
if (n_gpu_layers > (int) hparams.n_layer) {
fprintf(stderr, "%s: [cublas] offloading output layer to GPU\n", __func__);
fprintf(stderr, "%s: [opencl] offloading output layer to GPU\n", __func__);
ggml_cl_transform_tensor(model.output); vram_total += ggml_nbytes(model.output);
}