mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
not working on cpu
This commit is contained in:
commit
7b4254bef9
2 changed files with 22 additions and 14 deletions
|
@ -351,7 +351,8 @@ struct clip_ctx {
|
|||
std::vector<ggml_backend_t> backend_ptrs;
|
||||
std::vector<ggml_backend_buffer_type_t> backend_buft;
|
||||
|
||||
ggml_backend_ptr backend;
|
||||
ggml_backend_t backend;
|
||||
|
||||
ggml_backend_buffer_ptr buf;
|
||||
|
||||
ggml_backend_sched_ptr sched;
|
||||
|
@ -363,31 +364,35 @@ struct clip_ctx {
|
|||
if(enable_gpu_clip)
|
||||
{
|
||||
#ifdef GGML_USE_CUDA
|
||||
backend = ggml_backend_ptr(ggml_backend_cuda_init(0));
|
||||
backend = ggml_backend_cuda_init(0);
|
||||
LOG_INF("%s: CLIP using CUDA backend\n", __func__);
|
||||
#endif
|
||||
#ifdef GGML_USE_METAL
|
||||
backend = ggml_backend_ptr(ggml_backend_metal_init());
|
||||
backend = ggml_backend_metal_init();
|
||||
LOG_INF("%s: CLIP using Metal backend\n", __func__);
|
||||
#endif
|
||||
#ifdef GGML_USE_VULKAN
|
||||
backend = ggml_backend_ptr(ggml_backend_vk_init(0));
|
||||
backend = ggml_backend_vk_init(0);
|
||||
LOG_INF("%s: CLIP using Vulkan backend\n", __func__);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (!backend) {
|
||||
backend = ggml_backend_ptr(ggml_backend_cpu_init());
|
||||
backend = ggml_backend_cpu_init();
|
||||
LOG_INF("%s: CLIP using CPU backend\n", __func__);
|
||||
}
|
||||
|
||||
backend_ptrs.push_back(backend.get());
|
||||
backend_buft.push_back(ggml_backend_get_default_buffer_type(backend.get()));
|
||||
backend_ptrs.push_back(backend);
|
||||
backend_buft.push_back(ggml_backend_get_default_buffer_type(backend));
|
||||
|
||||
sched.reset(
|
||||
ggml_backend_sched_new(backend_ptrs.data(), backend_buft.data(), backend_ptrs.size(), 8192, false)
|
||||
);
|
||||
}
|
||||
|
||||
~clip_ctx() {
|
||||
ggml_backend_free(backend);
|
||||
}
|
||||
};
|
||||
|
||||
static ggml_cgraph * clip_image_build_graph_siglip(clip_ctx * ctx, const clip_image_f32_batch & imgs) {
|
||||
|
@ -1555,7 +1560,7 @@ struct clip_model_loader {
|
|||
}
|
||||
|
||||
// alloc memory and offload data
|
||||
ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(ctx_clip.backend.get());
|
||||
ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(ctx_clip.backend);
|
||||
ctx_clip.buf.reset(ggml_backend_alloc_ctx_tensors_from_buft(ctx_clip.ctx_data.get(), buft));
|
||||
ggml_backend_buffer_set_usage(ctx_clip.buf.get(), GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
|
||||
for (auto & t : tensors_to_load) {
|
||||
|
@ -2950,8 +2955,8 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
|||
if (window_mask) ggml_backend_tensor_set(window_mask, mask.data(), 0, ggml_nbytes(window_mask));
|
||||
}
|
||||
|
||||
if (ggml_backend_is_cpu(ctx->backend.get())) {
|
||||
ggml_backend_cpu_set_n_threads(ctx->backend.get(), n_threads);
|
||||
if (ggml_backend_is_cpu(ctx->backend)) {
|
||||
ggml_backend_cpu_set_n_threads(ctx->backend, n_threads);
|
||||
}
|
||||
|
||||
auto status = ggml_backend_sched_graph_compute(ctx->sched.get(), gf);
|
||||
|
|
|
@ -201,6 +201,11 @@ void main() {
|
|||
uint32_t q_stride = p.gqa_ratio > 1 ? (p.nb02 / 4) : p.nb01;
|
||||
uint32_t k_stride = p.nb11;
|
||||
uint32_t v_stride = p.nb21;
|
||||
// When using grouped query attention, all rows use the same mask (stride 0).
|
||||
// "p.gqa_ratio >> 16" is just a roundabout way of writing zero
|
||||
// that prevents the compiler from folding the "&" through the select
|
||||
// and breaking the alignment detection.
|
||||
uint32_t m_stride = (p.gqa_ratio > 1) ? (p.gqa_ratio >> 16) : KV;
|
||||
// hint to the compiler that strides are aligned for the aligned variant of the shader
|
||||
if (Clamp != gl_CooperativeMatrixClampModeConstantNV)
|
||||
{
|
||||
|
@ -209,6 +214,7 @@ void main() {
|
|||
k_stride &= ~7;
|
||||
v_stride &= ~7;
|
||||
#endif
|
||||
m_stride &= ~7;
|
||||
}
|
||||
tensorLayoutQ = setTensorLayoutStrideNV(tensorLayoutQ, q_stride, 1);
|
||||
tensorLayoutK = setTensorLayoutStrideNV(tensorLayoutK, k_stride, 1);
|
||||
|
@ -261,10 +267,7 @@ void main() {
|
|||
if (p.mask != 0) {
|
||||
tensorLayoutNV<2, Clamp> tensorLayoutM = createTensorLayoutNV(2, Clamp);
|
||||
tensorLayoutM = setTensorLayoutDimensionNV(tensorLayoutM, p.nem1, KV);
|
||||
// When using grouped query attention, all rows use the same mask.
|
||||
if (p.gqa_ratio > 1) {
|
||||
tensorLayoutM = setTensorLayoutStrideNV(tensorLayoutM, 0, 1);
|
||||
}
|
||||
tensorLayoutM = setTensorLayoutStrideNV(tensorLayoutM, m_stride, 1);
|
||||
|
||||
coopmat<float16_t, gl_ScopeWorkgroup, Br, Bc, gl_MatrixUseAccumulator> mv;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue