diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 5bd8159ce..ebadff907 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -363,26 +363,26 @@ struct clip_ctx { if(enable_gpu_clip) { #ifdef GGML_USE_CUDA - backend = ggml_backend_cuda_init(0); + backend = ggml_backend_ptr(ggml_backend_cuda_init(0)); LOG_INF("%s: CLIP using CUDA backend\n", __func__); #endif #ifdef GGML_USE_METAL - backend = ggml_backend_metal_init(); + backend = ggml_backend_ptr(ggml_backend_metal_init()); LOG_INF("%s: CLIP using Metal backend\n", __func__); #endif #ifdef GGML_USE_VULKAN - backend = ggml_backend_vk_init(0); + backend = ggml_backend_ptr(ggml_backend_vk_init(0)); LOG_INF("%s: CLIP using Vulkan backend\n", __func__); #endif } if (!backend) { - backend = ggml_backend_cpu_init(); + backend = ggml_backend_ptr(ggml_backend_cpu_init()); LOG_INF("%s: CLIP using CPU backend\n", __func__); } - backend_ptrs.push_back(backend); - backend_buft.push_back(ggml_backend_get_default_buffer_type(backend)); + backend_ptrs.push_back(backend.get()); + backend_buft.push_back(ggml_backend_get_default_buffer_type(backend.get())); sched.reset( ggml_backend_sched_new(backend_ptrs.data(), backend_buft.data(), backend_ptrs.size(), 8192, false) @@ -1228,7 +1228,7 @@ struct clip_model_loader { // print gguf info try { - + std::string name; get_string(KEY_NAME, name, false); std::string description; @@ -2950,8 +2950,8 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima if (window_mask) ggml_backend_tensor_set(window_mask, mask.data(), 0, ggml_nbytes(window_mask)); } - if (ggml_backend_is_cpu(ctx->backend)) { - ggml_backend_cpu_set_n_threads(ctx->backend, n_threads); + if (ggml_backend_is_cpu(ctx->backend.get())) { + ggml_backend_cpu_set_n_threads(ctx->backend.get(), n_threads); } auto status = ggml_backend_sched_graph_compute(ctx->sched.get(), gf); diff --git a/otherarch/sdcpp/common.hpp b/otherarch/sdcpp/common.hpp index 337b4a0c4..32250d763 100644 --- a/otherarch/sdcpp/common.hpp +++ b/otherarch/sdcpp/common.hpp @@ -56,7 +56,7 @@ public: // x: [N, channels, h, w] auto conv = std::dynamic_pointer_cast(blocks["conv"]); - x = ggml_upscale(ctx, x, 2); // [N, channels, h*2, w*2] + x = ggml_upscale(ctx, x, 2, ggml_scale_mode::GGML_SCALE_MODE_NEAREST); // [N, channels, h*2, w*2] x = conv->forward(ctx, x); // [N, out_channels, h*2, w*2] return x; } diff --git a/otherarch/sdcpp/esrgan.hpp b/otherarch/sdcpp/esrgan.hpp index 989d15fee..5fbc6c509 100644 --- a/otherarch/sdcpp/esrgan.hpp +++ b/otherarch/sdcpp/esrgan.hpp @@ -130,8 +130,8 @@ public: body_feat = conv_body->forward(ctx, body_feat); feat = ggml_add(ctx, feat, body_feat); // upsample - feat = lrelu(ctx, conv_up1->forward(ctx, ggml_upscale(ctx, feat, 2))); - feat = lrelu(ctx, conv_up2->forward(ctx, ggml_upscale(ctx, feat, 2))); + feat = lrelu(ctx, conv_up1->forward(ctx, ggml_upscale(ctx, feat, 2, ggml_scale_mode::GGML_SCALE_MODE_NEAREST))); + feat = lrelu(ctx, conv_up2->forward(ctx, ggml_upscale(ctx, feat, 2, ggml_scale_mode::GGML_SCALE_MODE_NEAREST))); auto out = conv_last->forward(ctx, lrelu(ctx, conv_hr->forward(ctx, feat))); return out; } diff --git a/otherarch/sdcpp/ggml_extend.hpp b/otherarch/sdcpp/ggml_extend.hpp index 9b1fd13ec..5ff42122e 100644 --- a/otherarch/sdcpp/ggml_extend.hpp +++ b/otherarch/sdcpp/ggml_extend.hpp @@ -113,7 +113,8 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_kronecker(ggml_context* ctx, struct g a->ne[0] * b->ne[0], a->ne[1] * b->ne[1], a->ne[2] * b->ne[2], - a->ne[3] * b->ne[3]), + a->ne[3] * b->ne[3], + ggml_scale_mode::GGML_SCALE_MODE_NEAREST), b); } diff --git a/otherarch/sdcpp/model.cpp b/otherarch/sdcpp/model.cpp index 496cab8e5..9e62bdbcc 100644 --- a/otherarch/sdcpp/model.cpp +++ b/otherarch/sdcpp/model.cpp @@ -1749,7 +1749,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend bool success = true; for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) { std::string file_path = file_paths_[file_index]; - LOG_DEBUG("loading tensors from %s", file_path.c_str()); + LOG_DEBUG("loading tensors from %s\n", file_path.c_str()); std::ifstream file(file_path, std::ios::binary); if (!file.is_open()) { @@ -1886,7 +1886,12 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend } } int64_t t2 = ggml_time_ms(); - pretty_progress(++tensor_count, processed_tensor_storages.size(), (t2 - t1) / 1000.0f); + ++tensor_count; + if(tensor_count<2 || tensor_count%5==0 || (tensor_count+10) > processed_tensor_storages.size()) + { + //throttle progress printing + pretty_progress(tensor_count, processed_tensor_storages.size(), (t2 - t1) / 1000.0f); + } t1 = t2; } diff --git a/otherarch/sdcpp/sdtype_adapter.cpp b/otherarch/sdcpp/sdtype_adapter.cpp index b74b64fd1..d74c2d27c 100644 --- a/otherarch/sdcpp/sdtype_adapter.cpp +++ b/otherarch/sdcpp/sdtype_adapter.cpp @@ -160,6 +160,10 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) { { printf("With Custom Clip-G Model: %s\n",clipg_filename.c_str()); } + if(inputs.quant) + { + printf("Note: Loading a pre-quantized model is always faster than using compress weights!\n"); + } //duplicated from expose.cpp int cl_parseinfo = inputs.clblast_info; //first digit is whether configured, second is platform, third is devices diff --git a/otherarch/sdcpp/tae.hpp b/otherarch/sdcpp/tae.hpp index c458b87d2..4c822eaf9 100644 --- a/otherarch/sdcpp/tae.hpp +++ b/otherarch/sdcpp/tae.hpp @@ -149,7 +149,7 @@ public: if (i == 1) { h = ggml_relu_inplace(ctx, h); } else { - h = ggml_upscale(ctx, h, 2); + h = ggml_upscale(ctx, h, 2, ggml_scale_mode::GGML_SCALE_MODE_NEAREST); } continue; }