diff --git a/common/common.cpp b/common/common.cpp index 43c8840ad..e7b8ab1d1 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1635,7 +1635,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param options.push_back({ "server", " --host HOST", "ip address to listen (default: %s)", params.hostname.c_str() }); options.push_back({ "server", " --port PORT", "port to listen (default: %d)", params.port }); options.push_back({ "server", " --path PATH", "path to serve static files from (default: %s)", params.public_path.c_str() }); - options.push_back({ "server", " --embedding(s)", "enable embedding endpoint (default: %s)", params.embedding ? "enabled" : "disabled" }); + options.push_back({ "server", " --embedding(s)", "restrict to only support embedding use case; use only with dedicated embedding models (default: %s)", params.embedding ? "enabled" : "disabled" }); options.push_back({ "server", " --api-key KEY", "API key to use for authentication (default: none)" }); options.push_back({ "server", " --api-key-file FNAME", "path to file containing API keys (default: none)" }); options.push_back({ "server", " --ssl-key-file FNAME", "path to file a PEM-encoded SSL private key" }); diff --git a/examples/server/README.md b/examples/server/README.md index 33a2b95cc..de83ee7d0 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -247,7 +247,7 @@ server: --host HOST ip address to listen (default: 127.0.0.1) --port PORT port to listen (default: 8080) --path PATH path to serve static files from (default: ) - --embedding(s) enable embedding endpoint (default: disabled) + --embedding(s) restrict to only support embedding use case; use only with dedicated embedding models (default: disabled) --api-key KEY API key to use for authentication (default: none) --api-key-file FNAME path to file containing API keys (default: none) --ssl-key-file FNAME path to file a PEM-encoded SSL private key diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index ba6f53cda..2e0b335ee 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -312,6 +312,8 @@ class GGUFWriter: self.add_key_value(key, val, GGUFValueType.STRING) def add_array(self, key: str, val: Sequence[Any]) -> None: + if len(val) == 0: + return self.add_key_value(key, val, GGUFValueType.ARRAY) @staticmethod @@ -845,7 +847,14 @@ class GGUFWriter: encoded_val = val.encode("utf-8") if isinstance(val, str) else val kv_data += self._pack("Q", len(encoded_val)) kv_data += encoded_val - elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and val: + elif vtype == GGUFValueType.ARRAY: + + if not isinstance(val, Sequence): + raise ValueError("Invalid GGUF metadata array, expecting sequence") + + if len(val) == 0: + raise ValueError("Invalid GGUF metadata array. Empty array") + if isinstance(val, bytes): ltype = GGUFValueType.UINT8 else: diff --git a/src/llama.cpp b/src/llama.cpp index f610a54dc..a4f628905 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -4988,6 +4988,7 @@ static void llm_load_hparams( hparams.attn_soft_cap = true; switch (hparams.n_layer) { + case 26: model.type = e_model::MODEL_2B; break; case 42: model.type = e_model::MODEL_9B; break; case 46: model.type = e_model::MODEL_27B; break; default: model.type = e_model::MODEL_UNKNOWN; @@ -11800,6 +11801,7 @@ struct llm_build_context { // ref: https://github.com/google/gemma_pytorch/commit/03e657582d17cb5a8617ebf333c1c16f3694670e switch (model.type) { + case e_model::MODEL_2B: case e_model::MODEL_9B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break; case e_model::MODEL_27B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd / n_head))); break; default: GGML_ABORT("fatal error");