Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	CMakePresets.json
#	ggml/src/ggml-cuda/CMakeLists.txt
#	tests/test-sampling.cpp
#	tools/mtmd/clip.cpp
This commit is contained in:
Concedo 2025-05-07 19:47:44 +08:00
commit 38b3bffcef
3 changed files with 1369 additions and 1316 deletions

View file

@ -3915,6 +3915,16 @@ class Gemma3VisionModel(VisionModel):
# default values below are taken from HF tranformers code
self.gguf_writer.add_vision_attention_layernorm_eps(hparams.get("layer_norm_eps", 1e-6))
self.gguf_writer.add_vision_use_gelu(True)
# calculate proj_scale_factor (used by tinygemma3 test model)
image_seq_length = self.preprocessor_config.get("image_seq_length", 256)
n_per_side = int(image_seq_length ** 0.5)
image_size = self.hparams["image_size"]
patch_size = self.hparams["patch_size"]
proj_scale_factor = (image_size // patch_size) // n_per_side
if proj_scale_factor > 0 and proj_scale_factor != 4:
# we only need to write this if it's not the default value
# in this case, we are converting a test model
self.gguf_writer.add_vision_projector_scale_factor(proj_scale_factor)
def tensor_force_quant(self, name, new_name, bid, n_dims):
del bid, new_name, n_dims # unused
@ -3928,6 +3938,9 @@ class Gemma3VisionModel(VisionModel):
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
del bid # unused
if "vision_model.head." in name:
return [] # skip redundant tensors for tinygemma3
if name.startswith("multi_modal_projector.") or name.startswith("vision_tower.") \
or name.startswith("multimodal_projector.") or name.startswith("vision_model."):
# process vision tensors

View file

@ -1750,27 +1750,35 @@ static const char * llama_sampler_top_n_sigma_name(const struct llama_sampler *
static void llama_sampler_top_n_sigma_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
const auto * ctx = (llama_sampler_top_n_sigma *) smpl->ctx;
if (ctx->n < 0.0f) {
if (ctx->n <= 0.0f || cur_p->size <= 1) {
return;
}
// find max logit and calculate mean
float max = cur_p->data[0].logit;
float logits_sum = 0;
size_t valid_count = 0;
for (size_t i = 0; i < cur_p->size; ++i) {
if (cur_p->data[i].logit > max) {
max = cur_p->data[i].logit;
// Only count non-negative infinity values
if (cur_p->data[i].logit != -INFINITY) {
if (cur_p->data[i].logit > max) {
max = cur_p->data[i].logit;
}
logits_sum += cur_p->data[i].logit;
valid_count++;
}
logits_sum += cur_p->data[i].logit;
}
float mean = logits_sum/cur_p->size;
float mean = valid_count > 0 ? logits_sum/valid_count : 0;
// calculate standard deviation
float acc = 0;
for (size_t i = 0; i < cur_p->size; ++i) {
acc += pow(cur_p->data[i].logit - mean, 2);
// Skip -infinity in std calculation
if (cur_p->data[i].logit != -INFINITY) {
acc += pow(cur_p->data[i].logit - mean, 2);
}
}
float std = sqrt(acc/cur_p->size);
float std = valid_count > 0 ? sqrt(acc/valid_count) : 0;
//apply mask
for (size_t i = 0; i < cur_p->size; ++i) {

File diff suppressed because it is too large Load diff