mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .devops/nix/package.nix # CMakeLists.txt # Makefile
This commit is contained in:
commit
101efb66af
4 changed files with 14 additions and 3 deletions
|
@ -1635,7 +1635,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
|
|||
options.push_back({ "server", " --host HOST", "ip address to listen (default: %s)", params.hostname.c_str() });
|
||||
options.push_back({ "server", " --port PORT", "port to listen (default: %d)", params.port });
|
||||
options.push_back({ "server", " --path PATH", "path to serve static files from (default: %s)", params.public_path.c_str() });
|
||||
options.push_back({ "server", " --embedding(s)", "enable embedding endpoint (default: %s)", params.embedding ? "enabled" : "disabled" });
|
||||
options.push_back({ "server", " --embedding(s)", "restrict to only support embedding use case; use only with dedicated embedding models (default: %s)", params.embedding ? "enabled" : "disabled" });
|
||||
options.push_back({ "server", " --api-key KEY", "API key to use for authentication (default: none)" });
|
||||
options.push_back({ "server", " --api-key-file FNAME", "path to file containing API keys (default: none)" });
|
||||
options.push_back({ "server", " --ssl-key-file FNAME", "path to file a PEM-encoded SSL private key" });
|
||||
|
|
|
@ -247,7 +247,7 @@ server:
|
|||
--host HOST ip address to listen (default: 127.0.0.1)
|
||||
--port PORT port to listen (default: 8080)
|
||||
--path PATH path to serve static files from (default: )
|
||||
--embedding(s) enable embedding endpoint (default: disabled)
|
||||
--embedding(s) restrict to only support embedding use case; use only with dedicated embedding models (default: disabled)
|
||||
--api-key KEY API key to use for authentication (default: none)
|
||||
--api-key-file FNAME path to file containing API keys (default: none)
|
||||
--ssl-key-file FNAME path to file a PEM-encoded SSL private key
|
||||
|
|
|
@ -312,6 +312,8 @@ class GGUFWriter:
|
|||
self.add_key_value(key, val, GGUFValueType.STRING)
|
||||
|
||||
def add_array(self, key: str, val: Sequence[Any]) -> None:
|
||||
if len(val) == 0:
|
||||
return
|
||||
self.add_key_value(key, val, GGUFValueType.ARRAY)
|
||||
|
||||
@staticmethod
|
||||
|
@ -845,7 +847,14 @@ class GGUFWriter:
|
|||
encoded_val = val.encode("utf-8") if isinstance(val, str) else val
|
||||
kv_data += self._pack("Q", len(encoded_val))
|
||||
kv_data += encoded_val
|
||||
elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and val:
|
||||
elif vtype == GGUFValueType.ARRAY:
|
||||
|
||||
if not isinstance(val, Sequence):
|
||||
raise ValueError("Invalid GGUF metadata array, expecting sequence")
|
||||
|
||||
if len(val) == 0:
|
||||
raise ValueError("Invalid GGUF metadata array. Empty array")
|
||||
|
||||
if isinstance(val, bytes):
|
||||
ltype = GGUFValueType.UINT8
|
||||
else:
|
||||
|
|
|
@ -4988,6 +4988,7 @@ static void llm_load_hparams(
|
|||
hparams.attn_soft_cap = true;
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 26: model.type = e_model::MODEL_2B; break;
|
||||
case 42: model.type = e_model::MODEL_9B; break;
|
||||
case 46: model.type = e_model::MODEL_27B; break;
|
||||
default: model.type = e_model::MODEL_UNKNOWN;
|
||||
|
@ -11800,6 +11801,7 @@ struct llm_build_context {
|
|||
|
||||
// ref: https://github.com/google/gemma_pytorch/commit/03e657582d17cb5a8617ebf333c1c16f3694670e
|
||||
switch (model.type) {
|
||||
case e_model::MODEL_2B:
|
||||
case e_model::MODEL_9B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break;
|
||||
case e_model::MODEL_27B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd / n_head))); break;
|
||||
default: GGML_ABORT("fatal error");
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue