server : add support for embd_normalize parameter (#14964)

This commit adds support for the `embd_normalize` parameter in the
server code.

The motivation for this is that currently if the server is started with
a pooling type that is not `none`, then Euclidean/L2 normalization will
be the normalization method used for embeddings. However, this is not
always the desired behavior, and users may want to use other
normalization (or none) and this commit allows that.

Example usage:
```console
curl --request POST \
    --url http://localhost:8080/embedding \
    --header "Content-Type: application/json" \
    --data '{"input": "Hello world today", "embd_normalize": -1}
```
This commit is contained in:
Daniel Bevenius 2025-07-30 18:07:11 +02:00 committed by GitHub
parent ad4a700117
commit 41e78c567e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 22 additions and 1 deletions

View file

@ -138,6 +138,9 @@ struct slot_params {
std::string oaicompat_cmpl_id;
common_chat_syntax oaicompat_chat_syntax;
// Embeddings
int32_t embd_normalize = 2; // (-1=none, 0=max absolute int16, 1=taxicab, 2=Euclidean/L2, >2=p-norm)
json to_json() const {
std::vector<std::string> samplers;
samplers.reserve(sampling.samplers.size());
@ -2601,7 +2604,7 @@ struct server_context {
// normalize only when there is pooling
if (llama_pooling_type(slot.ctx) != LLAMA_POOLING_TYPE_NONE) {
common_embd_normalize(embd, embd_res.data(), n_embd, 2);
common_embd_normalize(embd, embd_res.data(), n_embd, slot.params.embd_normalize);
res->embedding.push_back(embd_res);
break;
} else {
@ -4614,6 +4617,14 @@ int main(int argc, char ** argv) {
}
}
int embd_normalize = 2; // default to Euclidean/L2 norm
if (body.count("embd_normalize") != 0) {
embd_normalize = body.at("embd_normalize");
if (llama_pooling_type(ctx_server.ctx) == LLAMA_POOLING_TYPE_NONE) {
SRV_DBG("embd_normalize is not supported by pooling type %d, ignoring it\n", llama_pooling_type(ctx_server.ctx));
}
}
// create and queue the task
json responses = json::array();
bool error = false;
@ -4629,6 +4640,7 @@ int main(int argc, char ** argv) {
// OAI-compat
task.params.oaicompat = oaicompat;
task.params.embd_normalize = embd_normalize;
tasks.push_back(std::move(task));
}