Merge branch 'master' into concedo_experimental

# Conflicts:
#	Makefile
#	tests/test-grad0.cpp
#	tests/test-quantize-perf.cpp
This commit is contained in:
Concedo 2023-12-13 14:49:03 +08:00
commit c2c238b4f3
20 changed files with 44 additions and 38 deletions

View file

@ -739,7 +739,7 @@ bool clip_image_preprocess(const clip_ctx * ctx, const clip_image_u8 * img, clip
temp->ny = longer_side;
temp->size = 3 * longer_side * longer_side;
temp->data = new uint8_t[temp->size]();
uint8_t bc[3] = {122, 116, 104}; // bakground color in RGB from LLaVA
uint8_t bc[3] = {122, 116, 104}; // background color in RGB from LLaVA
// fill with background color
for (size_t i = 0; i < temp->size; i++) {

View file

@ -51,7 +51,7 @@ def bytes_to_unicode():
The reversible bpe codes work on unicode strings.
This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
This is a signficant percentage of your normal, say, 32K bpe vocab.
This is a significant percentage of your normal, say, 32K bpe vocab.
To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
And avoids mapping to whitespace/control characters the bpe code barfs on.
"""

View file

@ -1,6 +1,6 @@
# llama.cpp/examples/lookahead
Demonstartion of lookahead decoding technique:
Demonstration of lookahead decoding technique:
https://lmsys.org/blog/2023-11-21-lookahead-decoding/

View file

@ -222,7 +222,7 @@ node index.js
`content`: Set the text to process.
**POST** `/infill`: For code infilling. Takes a prefix and a suffix and returns the predicted completion as stream.
- **POST** `/infill`: For code infilling. Takes a prefix and a suffix and returns the predicted completion as stream.
*Options:*

View file

@ -11227,7 +11227,7 @@ class binary_reader
}
if (is_ndarray) // ndarray dimensional vector can only contain integers, and can not embed another array
{
return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read, exception_message(input_format, "ndarray dimentional vector is not allowed", "size"), nullptr));
return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read, exception_message(input_format, "ndarray dimensional vector is not allowed", "size"), nullptr));
}
std::vector<size_t> dim;
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_ndarray_size(dim)))

View file

@ -114,7 +114,7 @@ export async function* llama(prompt, params = {}, config = {}) {
return content;
}
// Call llama, return an event target that you can subcribe to
// Call llama, return an event target that you can subscribe to
//
// Example:
//

View file

@ -223,7 +223,7 @@
repeat_last_n: 256, // 0 = disable penalty, -1 = context size
repeat_penalty: 1.18, // 1.0 = disabled
top_k: 40, // <= 0 to use vocab size
top_p: 0.5, // 1.0 = disabled
top_p: 0.95, // 1.0 = disabled
min_p: 0.05, // 0 = disabled
tfs_z: 1.0, // 1.0 = disabled
typical_p: 1.0, // 1.0 = disabled
@ -238,7 +238,7 @@
cache_prompt: true
})
/* START: Support for storing prompt templates and parameters in borwser LocalStorage */
/* START: Support for storing prompt templates and parameters in browsers LocalStorage */
const local_storage_storageKey = "llamacpp_server_local_storage";
@ -282,7 +282,7 @@
let importedTemplates = local_storage_getDataAsObject('user_templates')
if (importedTemplates) {
// saved templates were successfuly imported.
// saved templates were successfully imported.
console.log('Processing saved templates and updating default template')
params.value = { ...params.value, image_data: [] };
@ -303,7 +303,7 @@
}
function userTemplateResetToDefault() {
console.log('Reseting themplate to default')
console.log('Resetting template to default')
selectedUserTemplate.value.name = 'default';
selectedUserTemplate.value.data = savedUserTemplates.value['default'];
}
@ -762,7 +762,7 @@
<fieldset class="two">
${IntField({ label: "Predictions", max: 2048, min: -1, name: "n_predict", value: params.value.n_predict })}
${FloatField({ label: "Temperature", max: 1.5, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature })}
${FloatField({ label: "Temperature", max: 2.0, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature })}
${FloatField({ label: "Penalize repeat sequence", max: 2.0, min: 0.0, name: "repeat_penalty", step: 0.01, value: params.value.repeat_penalty })}
${IntField({ label: "Consider N tokens for penalize", max: 2048, min: 0, name: "repeat_last_n", value: params.value.repeat_last_n })}
${IntField({ label: "Top-K sampling", max: 100, min: -1, name: "top_k", value: params.value.top_k })}

View file

@ -2383,6 +2383,7 @@ json oaicompat_completion_params_parse(
llama_params["__oaicompat"] = true;
// Map OpenAI parameters to llama.cpp parameters
llama_params["model"] = json_value(body, "model", std::string("uknown"));
llama_params["prompt"] = format_chatml(body["messages"]); // OpenAI 'messages' to llama.cpp 'prompt'
llama_params["cache_prompt"] = json_value(body, "cache_prompt", false);
llama_params["temperature"] = json_value(body, "temperature", 0.8);

View file

@ -1,6 +1,6 @@
# llama.cpp/examples/speculative
Demonstartion of speculative decoding and tree-based speculative decoding techniques
Demonstration of speculative decoding and tree-based speculative decoding techniques
More info:

View file

@ -430,7 +430,7 @@ int main(int argc, char ** argv) {
++n_past_tgt;
}
// the first token is always proposed by the traget model before the speculation loop so we erase it here
// the first token is always proposed by the target model before the speculation loop so we erase it here
for (int s = 0; s < n_seq_dft; ++s) {
if (!drafts[s].active) {
continue;