mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .devops/nix/package.nix # CMakeLists.txt # Makefile
This commit is contained in:
commit
101efb66af
4 changed files with 14 additions and 3 deletions
|
@ -1635,7 +1635,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
|
||||||
options.push_back({ "server", " --host HOST", "ip address to listen (default: %s)", params.hostname.c_str() });
|
options.push_back({ "server", " --host HOST", "ip address to listen (default: %s)", params.hostname.c_str() });
|
||||||
options.push_back({ "server", " --port PORT", "port to listen (default: %d)", params.port });
|
options.push_back({ "server", " --port PORT", "port to listen (default: %d)", params.port });
|
||||||
options.push_back({ "server", " --path PATH", "path to serve static files from (default: %s)", params.public_path.c_str() });
|
options.push_back({ "server", " --path PATH", "path to serve static files from (default: %s)", params.public_path.c_str() });
|
||||||
options.push_back({ "server", " --embedding(s)", "enable embedding endpoint (default: %s)", params.embedding ? "enabled" : "disabled" });
|
options.push_back({ "server", " --embedding(s)", "restrict to only support embedding use case; use only with dedicated embedding models (default: %s)", params.embedding ? "enabled" : "disabled" });
|
||||||
options.push_back({ "server", " --api-key KEY", "API key to use for authentication (default: none)" });
|
options.push_back({ "server", " --api-key KEY", "API key to use for authentication (default: none)" });
|
||||||
options.push_back({ "server", " --api-key-file FNAME", "path to file containing API keys (default: none)" });
|
options.push_back({ "server", " --api-key-file FNAME", "path to file containing API keys (default: none)" });
|
||||||
options.push_back({ "server", " --ssl-key-file FNAME", "path to file a PEM-encoded SSL private key" });
|
options.push_back({ "server", " --ssl-key-file FNAME", "path to file a PEM-encoded SSL private key" });
|
||||||
|
|
|
@ -247,7 +247,7 @@ server:
|
||||||
--host HOST ip address to listen (default: 127.0.0.1)
|
--host HOST ip address to listen (default: 127.0.0.1)
|
||||||
--port PORT port to listen (default: 8080)
|
--port PORT port to listen (default: 8080)
|
||||||
--path PATH path to serve static files from (default: )
|
--path PATH path to serve static files from (default: )
|
||||||
--embedding(s) enable embedding endpoint (default: disabled)
|
--embedding(s) restrict to only support embedding use case; use only with dedicated embedding models (default: disabled)
|
||||||
--api-key KEY API key to use for authentication (default: none)
|
--api-key KEY API key to use for authentication (default: none)
|
||||||
--api-key-file FNAME path to file containing API keys (default: none)
|
--api-key-file FNAME path to file containing API keys (default: none)
|
||||||
--ssl-key-file FNAME path to file a PEM-encoded SSL private key
|
--ssl-key-file FNAME path to file a PEM-encoded SSL private key
|
||||||
|
|
|
@ -312,6 +312,8 @@ class GGUFWriter:
|
||||||
self.add_key_value(key, val, GGUFValueType.STRING)
|
self.add_key_value(key, val, GGUFValueType.STRING)
|
||||||
|
|
||||||
def add_array(self, key: str, val: Sequence[Any]) -> None:
|
def add_array(self, key: str, val: Sequence[Any]) -> None:
|
||||||
|
if len(val) == 0:
|
||||||
|
return
|
||||||
self.add_key_value(key, val, GGUFValueType.ARRAY)
|
self.add_key_value(key, val, GGUFValueType.ARRAY)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -845,7 +847,14 @@ class GGUFWriter:
|
||||||
encoded_val = val.encode("utf-8") if isinstance(val, str) else val
|
encoded_val = val.encode("utf-8") if isinstance(val, str) else val
|
||||||
kv_data += self._pack("Q", len(encoded_val))
|
kv_data += self._pack("Q", len(encoded_val))
|
||||||
kv_data += encoded_val
|
kv_data += encoded_val
|
||||||
elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and val:
|
elif vtype == GGUFValueType.ARRAY:
|
||||||
|
|
||||||
|
if not isinstance(val, Sequence):
|
||||||
|
raise ValueError("Invalid GGUF metadata array, expecting sequence")
|
||||||
|
|
||||||
|
if len(val) == 0:
|
||||||
|
raise ValueError("Invalid GGUF metadata array. Empty array")
|
||||||
|
|
||||||
if isinstance(val, bytes):
|
if isinstance(val, bytes):
|
||||||
ltype = GGUFValueType.UINT8
|
ltype = GGUFValueType.UINT8
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -4988,6 +4988,7 @@ static void llm_load_hparams(
|
||||||
hparams.attn_soft_cap = true;
|
hparams.attn_soft_cap = true;
|
||||||
|
|
||||||
switch (hparams.n_layer) {
|
switch (hparams.n_layer) {
|
||||||
|
case 26: model.type = e_model::MODEL_2B; break;
|
||||||
case 42: model.type = e_model::MODEL_9B; break;
|
case 42: model.type = e_model::MODEL_9B; break;
|
||||||
case 46: model.type = e_model::MODEL_27B; break;
|
case 46: model.type = e_model::MODEL_27B; break;
|
||||||
default: model.type = e_model::MODEL_UNKNOWN;
|
default: model.type = e_model::MODEL_UNKNOWN;
|
||||||
|
@ -11800,6 +11801,7 @@ struct llm_build_context {
|
||||||
|
|
||||||
// ref: https://github.com/google/gemma_pytorch/commit/03e657582d17cb5a8617ebf333c1c16f3694670e
|
// ref: https://github.com/google/gemma_pytorch/commit/03e657582d17cb5a8617ebf333c1c16f3694670e
|
||||||
switch (model.type) {
|
switch (model.type) {
|
||||||
|
case e_model::MODEL_2B:
|
||||||
case e_model::MODEL_9B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break;
|
case e_model::MODEL_9B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break;
|
||||||
case e_model::MODEL_27B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd / n_head))); break;
|
case e_model::MODEL_27B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd / n_head))); break;
|
||||||
default: GGML_ABORT("fatal error");
|
default: GGML_ABORT("fatal error");
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue