mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
fixes for sdcpp
This commit is contained in:
parent
a0ae187563
commit
7e1289ade8
7 changed files with 26 additions and 16 deletions
|
@ -363,26 +363,26 @@ struct clip_ctx {
|
|||
if(enable_gpu_clip)
|
||||
{
|
||||
#ifdef GGML_USE_CUDA
|
||||
backend = ggml_backend_cuda_init(0);
|
||||
backend = ggml_backend_ptr(ggml_backend_cuda_init(0));
|
||||
LOG_INF("%s: CLIP using CUDA backend\n", __func__);
|
||||
#endif
|
||||
#ifdef GGML_USE_METAL
|
||||
backend = ggml_backend_metal_init();
|
||||
backend = ggml_backend_ptr(ggml_backend_metal_init());
|
||||
LOG_INF("%s: CLIP using Metal backend\n", __func__);
|
||||
#endif
|
||||
#ifdef GGML_USE_VULKAN
|
||||
backend = ggml_backend_vk_init(0);
|
||||
backend = ggml_backend_ptr(ggml_backend_vk_init(0));
|
||||
LOG_INF("%s: CLIP using Vulkan backend\n", __func__);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (!backend) {
|
||||
backend = ggml_backend_cpu_init();
|
||||
backend = ggml_backend_ptr(ggml_backend_cpu_init());
|
||||
LOG_INF("%s: CLIP using CPU backend\n", __func__);
|
||||
}
|
||||
|
||||
backend_ptrs.push_back(backend);
|
||||
backend_buft.push_back(ggml_backend_get_default_buffer_type(backend));
|
||||
backend_ptrs.push_back(backend.get());
|
||||
backend_buft.push_back(ggml_backend_get_default_buffer_type(backend.get()));
|
||||
|
||||
sched.reset(
|
||||
ggml_backend_sched_new(backend_ptrs.data(), backend_buft.data(), backend_ptrs.size(), 8192, false)
|
||||
|
@ -1228,7 +1228,7 @@ struct clip_model_loader {
|
|||
|
||||
// print gguf info
|
||||
try {
|
||||
|
||||
|
||||
std::string name;
|
||||
get_string(KEY_NAME, name, false);
|
||||
std::string description;
|
||||
|
@ -2950,8 +2950,8 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
|||
if (window_mask) ggml_backend_tensor_set(window_mask, mask.data(), 0, ggml_nbytes(window_mask));
|
||||
}
|
||||
|
||||
if (ggml_backend_is_cpu(ctx->backend)) {
|
||||
ggml_backend_cpu_set_n_threads(ctx->backend, n_threads);
|
||||
if (ggml_backend_is_cpu(ctx->backend.get())) {
|
||||
ggml_backend_cpu_set_n_threads(ctx->backend.get(), n_threads);
|
||||
}
|
||||
|
||||
auto status = ggml_backend_sched_graph_compute(ctx->sched.get(), gf);
|
||||
|
|
|
@ -56,7 +56,7 @@ public:
|
|||
// x: [N, channels, h, w]
|
||||
auto conv = std::dynamic_pointer_cast<Conv2d>(blocks["conv"]);
|
||||
|
||||
x = ggml_upscale(ctx, x, 2); // [N, channels, h*2, w*2]
|
||||
x = ggml_upscale(ctx, x, 2, ggml_scale_mode::GGML_SCALE_MODE_NEAREST); // [N, channels, h*2, w*2]
|
||||
x = conv->forward(ctx, x); // [N, out_channels, h*2, w*2]
|
||||
return x;
|
||||
}
|
||||
|
|
|
@ -130,8 +130,8 @@ public:
|
|||
body_feat = conv_body->forward(ctx, body_feat);
|
||||
feat = ggml_add(ctx, feat, body_feat);
|
||||
// upsample
|
||||
feat = lrelu(ctx, conv_up1->forward(ctx, ggml_upscale(ctx, feat, 2)));
|
||||
feat = lrelu(ctx, conv_up2->forward(ctx, ggml_upscale(ctx, feat, 2)));
|
||||
feat = lrelu(ctx, conv_up1->forward(ctx, ggml_upscale(ctx, feat, 2, ggml_scale_mode::GGML_SCALE_MODE_NEAREST)));
|
||||
feat = lrelu(ctx, conv_up2->forward(ctx, ggml_upscale(ctx, feat, 2, ggml_scale_mode::GGML_SCALE_MODE_NEAREST)));
|
||||
auto out = conv_last->forward(ctx, lrelu(ctx, conv_hr->forward(ctx, feat)));
|
||||
return out;
|
||||
}
|
||||
|
|
|
@ -113,7 +113,8 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_kronecker(ggml_context* ctx, struct g
|
|||
a->ne[0] * b->ne[0],
|
||||
a->ne[1] * b->ne[1],
|
||||
a->ne[2] * b->ne[2],
|
||||
a->ne[3] * b->ne[3]),
|
||||
a->ne[3] * b->ne[3],
|
||||
ggml_scale_mode::GGML_SCALE_MODE_NEAREST),
|
||||
b);
|
||||
}
|
||||
|
||||
|
|
|
@ -1749,7 +1749,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
|
|||
bool success = true;
|
||||
for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) {
|
||||
std::string file_path = file_paths_[file_index];
|
||||
LOG_DEBUG("loading tensors from %s", file_path.c_str());
|
||||
LOG_DEBUG("loading tensors from %s\n", file_path.c_str());
|
||||
|
||||
std::ifstream file(file_path, std::ios::binary);
|
||||
if (!file.is_open()) {
|
||||
|
@ -1886,7 +1886,12 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
|
|||
}
|
||||
}
|
||||
int64_t t2 = ggml_time_ms();
|
||||
pretty_progress(++tensor_count, processed_tensor_storages.size(), (t2 - t1) / 1000.0f);
|
||||
++tensor_count;
|
||||
if(tensor_count<2 || tensor_count%5==0 || (tensor_count+10) > processed_tensor_storages.size())
|
||||
{
|
||||
//throttle progress printing
|
||||
pretty_progress(tensor_count, processed_tensor_storages.size(), (t2 - t1) / 1000.0f);
|
||||
}
|
||||
t1 = t2;
|
||||
}
|
||||
|
||||
|
|
|
@ -160,6 +160,10 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
|
|||
{
|
||||
printf("With Custom Clip-G Model: %s\n",clipg_filename.c_str());
|
||||
}
|
||||
if(inputs.quant)
|
||||
{
|
||||
printf("Note: Loading a pre-quantized model is always faster than using compress weights!\n");
|
||||
}
|
||||
|
||||
//duplicated from expose.cpp
|
||||
int cl_parseinfo = inputs.clblast_info; //first digit is whether configured, second is platform, third is devices
|
||||
|
|
|
@ -149,7 +149,7 @@ public:
|
|||
if (i == 1) {
|
||||
h = ggml_relu_inplace(ctx, h);
|
||||
} else {
|
||||
h = ggml_upscale(ctx, h, 2);
|
||||
h = ggml_upscale(ctx, h, 2, ggml_scale_mode::GGML_SCALE_MODE_NEAREST);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue