mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-14 02:49:41 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # Makefile # build.zig # common/CMakeLists.txt # common/common.cpp # tests/CMakeLists.txt
This commit is contained in:
commit
c80e24a96f
9 changed files with 115 additions and 18 deletions
|
@ -21,12 +21,12 @@ not have to be performed at all.
|
|||
### Running the example
|
||||
Download a Grit model:
|
||||
```console
|
||||
$ scripts/hf.sh --repo cohesionet/GritLM-7B_gguf --file gritlm-7b_q4_1.gguf
|
||||
$ scripts/hf.sh --repo cohesionet/GritLM-7B_gguf --file gritlm-7b_q4_1.gguf --outdir models
|
||||
```
|
||||
|
||||
Run the example using the downloaded model:
|
||||
```console
|
||||
$ ./gritlm -m gritlm-7b_q4_1.gguf
|
||||
$ ./gritlm -m models/gritlm-7b_q4_1.gguf
|
||||
|
||||
Cosine similarity between "Bitcoin: A Peer-to-Peer Electronic Cash System" and "A purely peer-to-peer version of electronic cash w" is: 0.605
|
||||
Cosine similarity between "Bitcoin: A Peer-to-Peer Electronic Cash System" and "All text-based language problems can be reduced to" is: 0.103
|
||||
|
|
|
@ -304,10 +304,12 @@ These options help improve the performance and memory usage of the LLaMA models.
|
|||
|
||||
- `--prompt-cache FNAME`: Specify a file to cache the model state after the initial prompt. This can significantly speed up the startup time when you're using longer prompts. The file is created during the first run and is reused and updated in subsequent runs. **Note**: Restoring a cached prompt does not imply restoring the exact state of the session at the point it was saved. So even when specifying a specific seed, you are not guaranteed to get the same sequence of tokens as the original generation.
|
||||
|
||||
### Grammars
|
||||
### Grammars & JSON schemas
|
||||
|
||||
- `--grammar GRAMMAR`, `--grammar-file FILE`: Specify a grammar (defined inline or in a file) to constrain model output to a specific format. For example, you could force the model to output JSON or to speak only in emojis. See the [GBNF guide](../../grammars/README.md) for details on the syntax.
|
||||
|
||||
- `--json-schema SCHEMA`: Specify a [JSON schema](https://json-schema.org/) to constrain model output to (e.g. `{}` for any JSON object, or `{"items": {"type": "string", "minLength": 10, "maxLength": 100}, "minItems": 10}` for a JSON array of strings with size constraints). If a schema uses external `$ref`s, you should use `--grammar "$( python examples/json_schema_to_grammar.py myschema.json )"` instead.
|
||||
|
||||
### Quantization
|
||||
|
||||
For information about 4-bit quantization, which can significantly improve performance and reduce memory usage, please refer to llama.cpp's primary [README](../../README.md#prepare-and-quantize).
|
||||
|
|
|
@ -1853,12 +1853,20 @@ int main(int argc, char ** argv) {
|
|||
|
||||
const int32_t n_ctx = params.n_ctx;
|
||||
|
||||
if (n_ctx <= 0) {
|
||||
fprintf(stderr, "%s: perplexity tool requires '--ctx-size' > 0\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const bool ppl = !params.hellaswag && !params.winogrande && !params.multiple_choice && !params.kl_divergence;
|
||||
|
||||
if (ppl) {
|
||||
int n_seq = std::max(1, params.n_batch / n_ctx);
|
||||
int32_t n_kv = n_seq * n_ctx;
|
||||
const int32_t n_seq = std::max(1, params.n_batch / n_ctx);
|
||||
const int32_t n_kv = n_seq * n_ctx;
|
||||
|
||||
params.n_parallel = n_seq;
|
||||
params.n_ctx = n_kv;
|
||||
params.n_ctx = n_kv;
|
||||
|
||||
params.n_batch = std::min(params.n_batch, n_kv);
|
||||
} else {
|
||||
params.n_batch = std::min(params.n_batch, params.n_ctx);
|
||||
|
|
|
@ -11,7 +11,7 @@ install(TARGETS ${TARGET} RUNTIME)
|
|||
target_compile_definitions(${TARGET} PRIVATE
|
||||
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
||||
)
|
||||
target_link_libraries(${TARGET} PRIVATE common json-schema-to-grammar ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
|
||||
if (LLAMA_SERVER_SSL)
|
||||
find_package(OpenSSL REQUIRED)
|
||||
target_link_libraries(${TARGET} PRIVATE OpenSSL::SSL OpenSSL::Crypto)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue