mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # CMakePresets.json # ggml/src/ggml-cuda/CMakeLists.txt # tests/test-sampling.cpp # tools/mtmd/clip.cpp
This commit is contained in:
commit
38b3bffcef
3 changed files with 1369 additions and 1316 deletions
|
@ -3915,6 +3915,16 @@ class Gemma3VisionModel(VisionModel):
|
||||||
# default values below are taken from HF tranformers code
|
# default values below are taken from HF tranformers code
|
||||||
self.gguf_writer.add_vision_attention_layernorm_eps(hparams.get("layer_norm_eps", 1e-6))
|
self.gguf_writer.add_vision_attention_layernorm_eps(hparams.get("layer_norm_eps", 1e-6))
|
||||||
self.gguf_writer.add_vision_use_gelu(True)
|
self.gguf_writer.add_vision_use_gelu(True)
|
||||||
|
# calculate proj_scale_factor (used by tinygemma3 test model)
|
||||||
|
image_seq_length = self.preprocessor_config.get("image_seq_length", 256)
|
||||||
|
n_per_side = int(image_seq_length ** 0.5)
|
||||||
|
image_size = self.hparams["image_size"]
|
||||||
|
patch_size = self.hparams["patch_size"]
|
||||||
|
proj_scale_factor = (image_size // patch_size) // n_per_side
|
||||||
|
if proj_scale_factor > 0 and proj_scale_factor != 4:
|
||||||
|
# we only need to write this if it's not the default value
|
||||||
|
# in this case, we are converting a test model
|
||||||
|
self.gguf_writer.add_vision_projector_scale_factor(proj_scale_factor)
|
||||||
|
|
||||||
def tensor_force_quant(self, name, new_name, bid, n_dims):
|
def tensor_force_quant(self, name, new_name, bid, n_dims):
|
||||||
del bid, new_name, n_dims # unused
|
del bid, new_name, n_dims # unused
|
||||||
|
@ -3928,6 +3938,9 @@ class Gemma3VisionModel(VisionModel):
|
||||||
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||||
del bid # unused
|
del bid # unused
|
||||||
|
|
||||||
|
if "vision_model.head." in name:
|
||||||
|
return [] # skip redundant tensors for tinygemma3
|
||||||
|
|
||||||
if name.startswith("multi_modal_projector.") or name.startswith("vision_tower.") \
|
if name.startswith("multi_modal_projector.") or name.startswith("vision_tower.") \
|
||||||
or name.startswith("multimodal_projector.") or name.startswith("vision_model."):
|
or name.startswith("multimodal_projector.") or name.startswith("vision_model."):
|
||||||
# process vision tensors
|
# process vision tensors
|
||||||
|
|
|
@ -1750,27 +1750,35 @@ static const char * llama_sampler_top_n_sigma_name(const struct llama_sampler *
|
||||||
static void llama_sampler_top_n_sigma_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
|
static void llama_sampler_top_n_sigma_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
|
||||||
const auto * ctx = (llama_sampler_top_n_sigma *) smpl->ctx;
|
const auto * ctx = (llama_sampler_top_n_sigma *) smpl->ctx;
|
||||||
|
|
||||||
if (ctx->n < 0.0f) {
|
if (ctx->n <= 0.0f || cur_p->size <= 1) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// find max logit and calculate mean
|
// find max logit and calculate mean
|
||||||
float max = cur_p->data[0].logit;
|
float max = cur_p->data[0].logit;
|
||||||
float logits_sum = 0;
|
float logits_sum = 0;
|
||||||
|
size_t valid_count = 0;
|
||||||
for (size_t i = 0; i < cur_p->size; ++i) {
|
for (size_t i = 0; i < cur_p->size; ++i) {
|
||||||
|
// Only count non-negative infinity values
|
||||||
|
if (cur_p->data[i].logit != -INFINITY) {
|
||||||
if (cur_p->data[i].logit > max) {
|
if (cur_p->data[i].logit > max) {
|
||||||
max = cur_p->data[i].logit;
|
max = cur_p->data[i].logit;
|
||||||
}
|
}
|
||||||
logits_sum += cur_p->data[i].logit;
|
logits_sum += cur_p->data[i].logit;
|
||||||
|
valid_count++;
|
||||||
}
|
}
|
||||||
float mean = logits_sum/cur_p->size;
|
}
|
||||||
|
float mean = valid_count > 0 ? logits_sum/valid_count : 0;
|
||||||
|
|
||||||
// calculate standard deviation
|
// calculate standard deviation
|
||||||
float acc = 0;
|
float acc = 0;
|
||||||
for (size_t i = 0; i < cur_p->size; ++i) {
|
for (size_t i = 0; i < cur_p->size; ++i) {
|
||||||
|
// Skip -infinity in std calculation
|
||||||
|
if (cur_p->data[i].logit != -INFINITY) {
|
||||||
acc += pow(cur_p->data[i].logit - mean, 2);
|
acc += pow(cur_p->data[i].logit - mean, 2);
|
||||||
}
|
}
|
||||||
float std = sqrt(acc/cur_p->size);
|
}
|
||||||
|
float std = valid_count > 0 ? sqrt(acc/valid_count) : 0;
|
||||||
|
|
||||||
//apply mask
|
//apply mask
|
||||||
for (size_t i = 0; i < cur_p->size; ++i) {
|
for (size_t i = 0; i < cur_p->size; ++i) {
|
||||||
|
|
2542
tools/mtmd/clip.cpp
2542
tools/mtmd/clip.cpp
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue