mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
Merge branch 'master' into concedo_experimental
# Conflicts: # Makefile # tests/test-grad0.cpp # tests/test-quantize-perf.cpp
This commit is contained in:
commit
c2c238b4f3
20 changed files with 44 additions and 38 deletions
|
@ -739,7 +739,7 @@ bool clip_image_preprocess(const clip_ctx * ctx, const clip_image_u8 * img, clip
|
|||
temp->ny = longer_side;
|
||||
temp->size = 3 * longer_side * longer_side;
|
||||
temp->data = new uint8_t[temp->size]();
|
||||
uint8_t bc[3] = {122, 116, 104}; // bakground color in RGB from LLaVA
|
||||
uint8_t bc[3] = {122, 116, 104}; // background color in RGB from LLaVA
|
||||
|
||||
// fill with background color
|
||||
for (size_t i = 0; i < temp->size; i++) {
|
||||
|
|
|
@ -51,7 +51,7 @@ def bytes_to_unicode():
|
|||
The reversible bpe codes work on unicode strings.
|
||||
This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
|
||||
When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
|
||||
This is a signficant percentage of your normal, say, 32K bpe vocab.
|
||||
This is a significant percentage of your normal, say, 32K bpe vocab.
|
||||
To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
|
||||
And avoids mapping to whitespace/control characters the bpe code barfs on.
|
||||
"""
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# llama.cpp/examples/lookahead
|
||||
|
||||
Demonstartion of lookahead decoding technique:
|
||||
Demonstration of lookahead decoding technique:
|
||||
|
||||
https://lmsys.org/blog/2023-11-21-lookahead-decoding/
|
||||
|
||||
|
|
|
@ -222,7 +222,7 @@ node index.js
|
|||
|
||||
`content`: Set the text to process.
|
||||
|
||||
**POST** `/infill`: For code infilling. Takes a prefix and a suffix and returns the predicted completion as stream.
|
||||
- **POST** `/infill`: For code infilling. Takes a prefix and a suffix and returns the predicted completion as stream.
|
||||
|
||||
*Options:*
|
||||
|
||||
|
|
|
@ -11227,7 +11227,7 @@ class binary_reader
|
|||
}
|
||||
if (is_ndarray) // ndarray dimensional vector can only contain integers, and can not embed another array
|
||||
{
|
||||
return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read, exception_message(input_format, "ndarray dimentional vector is not allowed", "size"), nullptr));
|
||||
return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read, exception_message(input_format, "ndarray dimensional vector is not allowed", "size"), nullptr));
|
||||
}
|
||||
std::vector<size_t> dim;
|
||||
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_ndarray_size(dim)))
|
||||
|
|
|
@ -114,7 +114,7 @@ export async function* llama(prompt, params = {}, config = {}) {
|
|||
return content;
|
||||
}
|
||||
|
||||
// Call llama, return an event target that you can subcribe to
|
||||
// Call llama, return an event target that you can subscribe to
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
|
|
|
@ -223,7 +223,7 @@
|
|||
repeat_last_n: 256, // 0 = disable penalty, -1 = context size
|
||||
repeat_penalty: 1.18, // 1.0 = disabled
|
||||
top_k: 40, // <= 0 to use vocab size
|
||||
top_p: 0.5, // 1.0 = disabled
|
||||
top_p: 0.95, // 1.0 = disabled
|
||||
min_p: 0.05, // 0 = disabled
|
||||
tfs_z: 1.0, // 1.0 = disabled
|
||||
typical_p: 1.0, // 1.0 = disabled
|
||||
|
@ -238,7 +238,7 @@
|
|||
cache_prompt: true
|
||||
})
|
||||
|
||||
/* START: Support for storing prompt templates and parameters in borwser LocalStorage */
|
||||
/* START: Support for storing prompt templates and parameters in browsers LocalStorage */
|
||||
|
||||
const local_storage_storageKey = "llamacpp_server_local_storage";
|
||||
|
||||
|
@ -282,7 +282,7 @@
|
|||
let importedTemplates = local_storage_getDataAsObject('user_templates')
|
||||
|
||||
if (importedTemplates) {
|
||||
// saved templates were successfuly imported.
|
||||
// saved templates were successfully imported.
|
||||
|
||||
console.log('Processing saved templates and updating default template')
|
||||
params.value = { ...params.value, image_data: [] };
|
||||
|
@ -303,7 +303,7 @@
|
|||
}
|
||||
|
||||
function userTemplateResetToDefault() {
|
||||
console.log('Reseting themplate to default')
|
||||
console.log('Resetting template to default')
|
||||
selectedUserTemplate.value.name = 'default';
|
||||
selectedUserTemplate.value.data = savedUserTemplates.value['default'];
|
||||
}
|
||||
|
@ -762,7 +762,7 @@
|
|||
|
||||
<fieldset class="two">
|
||||
${IntField({ label: "Predictions", max: 2048, min: -1, name: "n_predict", value: params.value.n_predict })}
|
||||
${FloatField({ label: "Temperature", max: 1.5, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature })}
|
||||
${FloatField({ label: "Temperature", max: 2.0, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature })}
|
||||
${FloatField({ label: "Penalize repeat sequence", max: 2.0, min: 0.0, name: "repeat_penalty", step: 0.01, value: params.value.repeat_penalty })}
|
||||
${IntField({ label: "Consider N tokens for penalize", max: 2048, min: 0, name: "repeat_last_n", value: params.value.repeat_last_n })}
|
||||
${IntField({ label: "Top-K sampling", max: 100, min: -1, name: "top_k", value: params.value.top_k })}
|
||||
|
|
|
@ -2383,6 +2383,7 @@ json oaicompat_completion_params_parse(
|
|||
llama_params["__oaicompat"] = true;
|
||||
|
||||
// Map OpenAI parameters to llama.cpp parameters
|
||||
llama_params["model"] = json_value(body, "model", std::string("uknown"));
|
||||
llama_params["prompt"] = format_chatml(body["messages"]); // OpenAI 'messages' to llama.cpp 'prompt'
|
||||
llama_params["cache_prompt"] = json_value(body, "cache_prompt", false);
|
||||
llama_params["temperature"] = json_value(body, "temperature", 0.8);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# llama.cpp/examples/speculative
|
||||
|
||||
Demonstartion of speculative decoding and tree-based speculative decoding techniques
|
||||
Demonstration of speculative decoding and tree-based speculative decoding techniques
|
||||
|
||||
More info:
|
||||
|
||||
|
|
|
@ -430,7 +430,7 @@ int main(int argc, char ** argv) {
|
|||
++n_past_tgt;
|
||||
}
|
||||
|
||||
// the first token is always proposed by the traget model before the speculation loop so we erase it here
|
||||
// the first token is always proposed by the target model before the speculation loop so we erase it here
|
||||
for (int s = 0; s < n_seq_dft; ++s) {
|
||||
if (!drafts[s].active) {
|
||||
continue;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue