diff --git a/tools/server/README.md b/tools/server/README.md index aa07f1ef5..f3f4caed8 100644 --- a/tools/server/README.md +++ b/tools/server/README.md @@ -644,6 +644,15 @@ The same as [the embedding example](../embedding) does. `image_data`: An array of objects to hold base64-encoded image `data` and its `id`s to be reference in `content`. You can determine the place of the image in the content as in the following: `Image: [img-21].\nCaption: This is a picture of a house`. In this case, `[img-21]` will be replaced by the embeddings of the image with id `21` in the following `image_data` array: `{..., "image_data": [{"data": "", "id": 21}]}`. Use `image_data` only with multimodal models, e.g., LLaVA. +`embd_normalize`: Normalization for pooled embeddings. Can be one of the following values: +``` + -1: No normalization + 0: Max absolute + 1: Taxicab + 2: Euclidean/L2 + >2: P-Norm +``` + ### POST `/reranking`: Rerank documents according to a given query Similar to https://jina.ai/reranker/ but might change in the future. diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 022b5d0b3..2e4c40af7 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -138,6 +138,9 @@ struct slot_params { std::string oaicompat_cmpl_id; common_chat_syntax oaicompat_chat_syntax; + // Embeddings + int32_t embd_normalize = 2; // (-1=none, 0=max absolute int16, 1=taxicab, 2=Euclidean/L2, >2=p-norm) + json to_json() const { std::vector samplers; samplers.reserve(sampling.samplers.size()); @@ -2601,7 +2604,7 @@ struct server_context { // normalize only when there is pooling if (llama_pooling_type(slot.ctx) != LLAMA_POOLING_TYPE_NONE) { - common_embd_normalize(embd, embd_res.data(), n_embd, 2); + common_embd_normalize(embd, embd_res.data(), n_embd, slot.params.embd_normalize); res->embedding.push_back(embd_res); break; } else { @@ -4614,6 +4617,14 @@ int main(int argc, char ** argv) { } } + int embd_normalize = 2; // default to Euclidean/L2 norm + if (body.count("embd_normalize") != 0) { + embd_normalize = body.at("embd_normalize"); + if (llama_pooling_type(ctx_server.ctx) == LLAMA_POOLING_TYPE_NONE) { + SRV_DBG("embd_normalize is not supported by pooling type %d, ignoring it\n", llama_pooling_type(ctx_server.ctx)); + } + } + // create and queue the task json responses = json::array(); bool error = false; @@ -4629,6 +4640,7 @@ int main(int argc, char ** argv) { // OAI-compat task.params.oaicompat = oaicompat; + task.params.embd_normalize = embd_normalize; tasks.push_back(std::move(task)); }