mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .devops/llama-server-cuda.Dockerfile # .devops/llama-server-rocm.Dockerfile # .devops/llama-server-vulkan.Dockerfile # .devops/llama-server.Dockerfile # .github/workflows/docker.yml # README.md # llama.cpp # tests/test-chat-template.cpp # tests/test-grammar-integration.cpp # tests/test-json-schema-to-grammar.cpp # tests/test-llama-grammar.cpp
This commit is contained in:
commit
f3dfa96dbc
29 changed files with 2097 additions and 431 deletions
|
@ -24,6 +24,201 @@ function _buildRepetition(itemRule, minItems, maxItems, opts={}) {
|
|||
return minItems === 0 ? `(${result})?` : result;
|
||||
}
|
||||
|
||||
function _generateMinMaxInt(minValue, maxValue, out, decimalsLeft = 16, topLevel = true) {
|
||||
const hasMin = minValue !== null;
|
||||
const hasMax = maxValue !== null;
|
||||
|
||||
function digitRange(fromChar, toChar) {
|
||||
out.push("[");
|
||||
if (fromChar === toChar) {
|
||||
out.push(fromChar);
|
||||
} else {
|
||||
out.push(fromChar);
|
||||
out.push("-");
|
||||
out.push(toChar);
|
||||
}
|
||||
out.push("]");
|
||||
}
|
||||
|
||||
function moreDigits(minDigits, maxDigits) {
|
||||
out.push("[0-9]");
|
||||
if (minDigits === maxDigits && minDigits === 1) {
|
||||
return;
|
||||
}
|
||||
out.push("{");
|
||||
out.push(minDigits.toString());
|
||||
if (maxDigits !== minDigits) {
|
||||
out.push(",");
|
||||
if (maxDigits !== Number.MAX_SAFE_INTEGER) {
|
||||
out.push(maxDigits.toString());
|
||||
}
|
||||
}
|
||||
out.push("}");
|
||||
}
|
||||
|
||||
function uniformRange(fromStr, toStr) {
|
||||
let i = 0;
|
||||
while (i < fromStr.length && fromStr[i] === toStr[i]) {
|
||||
i++;
|
||||
}
|
||||
if (i > 0) {
|
||||
out.push("\"");
|
||||
out.push(fromStr.slice(0, i));
|
||||
out.push("\"");
|
||||
}
|
||||
if (i < fromStr.length) {
|
||||
if (i > 0) {
|
||||
out.push(" ");
|
||||
}
|
||||
const subLen = fromStr.length - i - 1;
|
||||
if (subLen > 0) {
|
||||
const fromSub = fromStr.slice(i + 1);
|
||||
const toSub = toStr.slice(i + 1);
|
||||
const subZeros = "0".repeat(subLen);
|
||||
const subNines = "9".repeat(subLen);
|
||||
|
||||
let toReached = false;
|
||||
out.push("(");
|
||||
if (fromSub === subZeros) {
|
||||
digitRange(fromStr[i], String.fromCharCode(toStr.charCodeAt(i) - 1));
|
||||
out.push(" ");
|
||||
moreDigits(subLen, subLen);
|
||||
} else {
|
||||
out.push("[");
|
||||
out.push(fromStr[i]);
|
||||
out.push("] ");
|
||||
out.push("(");
|
||||
uniformRange(fromSub, subNines);
|
||||
out.push(")");
|
||||
if (fromStr.charCodeAt(i) < toStr.charCodeAt(i) - 1) {
|
||||
out.push(" | ");
|
||||
if (toSub === subNines) {
|
||||
digitRange(String.fromCharCode(fromStr.charCodeAt(i) + 1), toStr[i]);
|
||||
toReached = true;
|
||||
} else {
|
||||
digitRange(String.fromCharCode(fromStr.charCodeAt(i) + 1), String.fromCharCode(toStr.charCodeAt(i) - 1));
|
||||
}
|
||||
out.push(" ");
|
||||
moreDigits(subLen, subLen);
|
||||
}
|
||||
}
|
||||
if (!toReached) {
|
||||
out.push(" | ");
|
||||
digitRange(toStr[i], toStr[i]);
|
||||
out.push(" ");
|
||||
uniformRange(subZeros, toSub);
|
||||
}
|
||||
out.push(")");
|
||||
} else {
|
||||
out.push("[");
|
||||
out.push(fromStr[i]);
|
||||
out.push("-");
|
||||
out.push(toStr[i]);
|
||||
out.push("]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hasMin && hasMax) {
|
||||
if (minValue < 0 && maxValue < 0) {
|
||||
out.push("\"-\" (");
|
||||
_generateMinMaxInt(-maxValue, -minValue, out, decimalsLeft, true);
|
||||
out.push(")");
|
||||
return;
|
||||
}
|
||||
|
||||
if (minValue < 0) {
|
||||
out.push("\"-\" (");
|
||||
_generateMinMaxInt(0, -minValue, out, decimalsLeft, true);
|
||||
out.push(") | ");
|
||||
minValue = 0;
|
||||
}
|
||||
|
||||
let minS = minValue.toString();
|
||||
const maxS = maxValue.toString();
|
||||
const minDigits = minS.length;
|
||||
const maxDigits = maxS.length;
|
||||
|
||||
for (let digits = minDigits; digits < maxDigits; digits++) {
|
||||
uniformRange(minS, "9".repeat(digits));
|
||||
minS = "1" + "0".repeat(digits);
|
||||
out.push(" | ");
|
||||
}
|
||||
uniformRange(minS, maxS);
|
||||
return;
|
||||
}
|
||||
|
||||
const lessDecimals = Math.max(decimalsLeft - 1, 1);
|
||||
|
||||
if (hasMin) {
|
||||
if (minValue < 0) {
|
||||
out.push("\"-\" (");
|
||||
_generateMinMaxInt(null, -minValue, out, decimalsLeft, false);
|
||||
out.push(") | [0] | [1-9] ");
|
||||
moreDigits(0, decimalsLeft - 1);
|
||||
} else if (minValue === 0) {
|
||||
if (topLevel) {
|
||||
out.push("[0] | [1-9] ");
|
||||
moreDigits(0, lessDecimals);
|
||||
} else {
|
||||
moreDigits(1, decimalsLeft);
|
||||
}
|
||||
} else if (minValue <= 9) {
|
||||
const c = minValue.toString();
|
||||
const range_start = topLevel ? '1' : '0';
|
||||
if (c > range_start) {
|
||||
digitRange(range_start, String.fromCharCode(c.charCodeAt(0) - 1));
|
||||
out.push(" ");
|
||||
moreDigits(1, lessDecimals);
|
||||
out.push(" | ");
|
||||
}
|
||||
digitRange(c, "9");
|
||||
out.push(" ");
|
||||
moreDigits(0, lessDecimals);
|
||||
} else {
|
||||
const minS = minValue.toString();
|
||||
const length = minS.length;
|
||||
const c = minS[0];
|
||||
|
||||
if (c > "1") {
|
||||
digitRange(topLevel ? "1" : "0", String.fromCharCode(c.charCodeAt(0) - 1));
|
||||
out.push(" ");
|
||||
moreDigits(length, lessDecimals);
|
||||
out.push(" | ");
|
||||
}
|
||||
digitRange(c, c);
|
||||
out.push(" (");
|
||||
_generateMinMaxInt(parseInt(minS.slice(1)), null, out, lessDecimals, false);
|
||||
out.push(")");
|
||||
if (c < "9") {
|
||||
out.push(" | ");
|
||||
digitRange(String.fromCharCode(c.charCodeAt(0) + 1), "9");
|
||||
out.push(" ");
|
||||
moreDigits(length - 1, lessDecimals);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (hasMax) {
|
||||
if (maxValue >= 0) {
|
||||
if (topLevel) {
|
||||
out.push("\"-\" [1-9] ");
|
||||
moreDigits(0, lessDecimals);
|
||||
out.push(" | ");
|
||||
}
|
||||
_generateMinMaxInt(0, maxValue, out, decimalsLeft, true);
|
||||
} else {
|
||||
out.push("\"-\" (");
|
||||
_generateMinMaxInt(-maxValue, null, out, decimalsLeft, false);
|
||||
out.push(")");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
throw new Error("At least one of minValue or maxValue must be set");
|
||||
}
|
||||
|
||||
class BuiltinRule {
|
||||
constructor(content, deps) {
|
||||
this.content = content;
|
||||
|
@ -337,6 +532,64 @@ export class SchemaConverter {
|
|||
return this._addRule(name, "\"\\\"\" " + toRule(transform()) + " \"\\\"\" space")
|
||||
}
|
||||
|
||||
_notStrings(strings) {
|
||||
class TrieNode {
|
||||
constructor() {
|
||||
this.children = {};
|
||||
this.isEndOfString = false;
|
||||
}
|
||||
|
||||
insert(str) {
|
||||
let node = this;
|
||||
for (const c of str) {
|
||||
node = node.children[c] = node.children[c] || new TrieNode();
|
||||
}
|
||||
node.isEndOfString = true;
|
||||
}
|
||||
}
|
||||
|
||||
const trie = new TrieNode();
|
||||
for (const s of strings) {
|
||||
trie.insert(s);
|
||||
}
|
||||
|
||||
const charRuleName = this._addPrimitive('char', PRIMITIVE_RULES['char']);
|
||||
const out = ['["] ( '];
|
||||
|
||||
const visit = (node) => {
|
||||
const rejects = [];
|
||||
let first = true;
|
||||
for (const c of Object.keys(node.children).sort()) {
|
||||
const child = node.children[c];
|
||||
rejects.push(c);
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
out.push(' | ');
|
||||
}
|
||||
out.push(`[${c}]`);
|
||||
if (Object.keys(child.children).length > 0) {
|
||||
out.push(' (');
|
||||
visit(child);
|
||||
out.push(')');
|
||||
} else if (child.isEndOfString) {
|
||||
out.push(` ${charRuleName}+`);
|
||||
}
|
||||
}
|
||||
if (Object.keys(node.children).length > 0) {
|
||||
if (!first) {
|
||||
out.push(' | ');
|
||||
}
|
||||
out.push(`[^"${rejects.join('')}] ${charRuleName}*`);
|
||||
}
|
||||
};
|
||||
|
||||
visit(trie);
|
||||
|
||||
out.push(` )${trie.isEndOfString ? '' : '?'} ["] space`);
|
||||
return out.join('');
|
||||
}
|
||||
|
||||
_resolveRef(ref) {
|
||||
let refName = ref.split('/').pop();
|
||||
if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) {
|
||||
|
@ -363,11 +616,11 @@ export class SchemaConverter {
|
|||
} else if (schema.oneOf || schema.anyOf) {
|
||||
return this._addRule(ruleName, this._generateUnionRule(name, schema.oneOf || schema.anyOf));
|
||||
} else if (Array.isArray(schemaType)) {
|
||||
return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({ type: t }))));
|
||||
return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({...schema, type: t}))));
|
||||
} else if ('const' in schema) {
|
||||
return this._addRule(ruleName, this._generateConstantRule(schema.const));
|
||||
return this._addRule(ruleName, this._generateConstantRule(schema.const) + ' space');
|
||||
} else if ('enum' in schema) {
|
||||
const rule = schema.enum.map(v => this._generateConstantRule(v)).join(' | ');
|
||||
const rule = '(' + schema.enum.map(v => this._generateConstantRule(v)).join(' | ') + ') space';
|
||||
return this._addRule(ruleName, rule);
|
||||
} else if ((schemaType === undefined || schemaType === 'object') &&
|
||||
('properties' in schema ||
|
||||
|
@ -404,7 +657,7 @@ export class SchemaConverter {
|
|||
}
|
||||
}
|
||||
|
||||
return this._addRule(ruleName, this._buildObjectRule(properties, required, name, /* additionalProperties= */ false));
|
||||
return this._addRule(ruleName, this._buildObjectRule(properties, required, name, null));
|
||||
} else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) {
|
||||
const items = schema.items ?? schema.prefixItems;
|
||||
if (Array.isArray(items)) {
|
||||
|
@ -435,6 +688,24 @@ export class SchemaConverter {
|
|||
const minLen = schema.minLength || 0;
|
||||
const maxLen = schema.maxLength;
|
||||
return this._addRule(ruleName, '"\\\"" ' + _buildRepetition(charRuleName, minLen, maxLen) + ' "\\\"" space');
|
||||
} else if (schemaType === 'integer' && ('minimum' in schema || 'exclusiveMinimum' in schema || 'maximum' in schema || 'exclusiveMaximum' in schema)) {
|
||||
let minValue = null;
|
||||
let maxValue = null;
|
||||
if ('minimum' in schema) {
|
||||
minValue = schema.minimum;
|
||||
} else if ('exclusiveMinimum' in schema) {
|
||||
minValue = schema.exclusiveMinimum + 1;
|
||||
}
|
||||
if ('maximum' in schema) {
|
||||
maxValue = schema.maximum;
|
||||
} else if ('exclusiveMaximum' in schema) {
|
||||
maxValue = schema.exclusiveMaximum - 1;
|
||||
}
|
||||
|
||||
const out = ["("];
|
||||
_generateMinMaxInt(minValue, maxValue, out);
|
||||
out.push(") space");
|
||||
return this._addRule(ruleName, out.join(''));
|
||||
} else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
|
||||
return this._addRule(ruleName, this._addPrimitive('object', PRIMITIVE_RULES['object']));
|
||||
} else {
|
||||
|
@ -480,12 +751,19 @@ export class SchemaConverter {
|
|||
const requiredProps = sortedProps.filter(k => required.has(k));
|
||||
const optionalProps = sortedProps.filter(k => !required.has(k));
|
||||
|
||||
if (typeof additionalProperties === 'object' || additionalProperties === true) {
|
||||
if (additionalProperties !== false) {
|
||||
const subName = `${name ?? ''}${name ? '-' : ''}additional`;
|
||||
const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`);
|
||||
const valueRule =
|
||||
additionalProperties != null && typeof additionalProperties === 'object' ? this.visit(additionalProperties, `${subName}-value`)
|
||||
: this._addPrimitive('value', PRIMITIVE_RULES['value']);
|
||||
|
||||
const key_rule =
|
||||
sortedProps.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string'])
|
||||
: this._addRule(`${subName}-k`, this._notStrings(sortedProps));
|
||||
|
||||
propKvRuleNames['*'] = this._addRule(
|
||||
`${subName}-kv`,
|
||||
`${this._addPrimitive('string', PRIMITIVE_RULES['string'])} ":" space ${valueRule}`);
|
||||
`${key_rule} ":" space ${valueRule}`);
|
||||
optionalProps.push('*');
|
||||
}
|
||||
|
||||
|
@ -502,15 +780,11 @@ export class SchemaConverter {
|
|||
const [k, ...rest] = ks;
|
||||
const kvRuleName = propKvRuleNames[k];
|
||||
let res;
|
||||
if (k === '*') {
|
||||
res = this._addRule(
|
||||
`${name ?? ''}${name ? '-' : ''}additional-kvs`,
|
||||
`${kvRuleName} ( "," space ` + kvRuleName + ` )*`
|
||||
)
|
||||
} else if (firstIsOptional) {
|
||||
res = `( "," space ${kvRuleName} )?`;
|
||||
const commaRef = `( "," space ${kvRuleName} )`;
|
||||
if (firstIsOptional) {
|
||||
res = commaRef + (k === '*' ? '*' : '?');
|
||||
} else {
|
||||
res = kvRuleName;
|
||||
res = kvRuleName + (k === '*' ? ' ' + commaRef + '*' : '');
|
||||
}
|
||||
if (rest.length > 0) {
|
||||
res += ' ' + this._addRule(
|
||||
|
|
|
@ -3,6 +3,13 @@
|
|||
|
||||
by Humans for All.
|
||||
|
||||
## quickstart
|
||||
|
||||
To run from the build dir
|
||||
|
||||
bin/llama-server -m path/model.gguf --path ../examples/server/public_simplechat
|
||||
|
||||
Continue reading for the details.
|
||||
|
||||
## overview
|
||||
|
||||
|
@ -14,6 +21,8 @@ own system prompts.
|
|||
This allows seeing the generated text / ai-model response in oneshot at the end, after it is fully generated,
|
||||
or potentially as it is being generated, in a streamed manner from the server/ai-model.
|
||||
|
||||

|
||||
|
||||
Auto saves the chat session locally as and when the chat is progressing and inturn at a later time when you
|
||||
open SimpleChat, option is provided to restore the old chat session, if a matching one exists.
|
||||
|
||||
|
@ -170,17 +179,23 @@ It is attached to the document object. Some of these can also be updated using t
|
|||
The histogram/freq based trimming logic is currently tuned for english language wrt its
|
||||
is-it-a-alpabetic|numeral-char regex match logic.
|
||||
|
||||
chatRequestOptions - maintains the list of options/fields to send along with chat request,
|
||||
apiRequestOptions - maintains the list of options/fields to send along with api request,
|
||||
irrespective of whether /chat/completions or /completions endpoint.
|
||||
|
||||
If you want to add additional options/fields to send to the server/ai-model, and or
|
||||
modify the existing options value or remove them, for now you can update this global var
|
||||
using browser's development-tools/console.
|
||||
|
||||
For string and numeric fields in chatRequestOptions, including even those added by a user
|
||||
at runtime by directly modifying gMe.chatRequestOptions, setting ui entries will be auto
|
||||
For string, numeric and boolean fields in apiRequestOptions, including even those added by a
|
||||
user at runtime by directly modifying gMe.apiRequestOptions, setting ui entries will be auto
|
||||
created.
|
||||
|
||||
cache_prompt option supported by example/server is allowed to be controlled by user, so that
|
||||
any caching supported wrt system-prompt and chat history, if usable can get used. When chat
|
||||
history sliding window is enabled, cache_prompt logic may or may not kick in at the backend
|
||||
wrt same, based on aspects related to model, positional encoding, attention mechanism etal.
|
||||
However system prompt should ideally get the benefit of caching.
|
||||
|
||||
headers - maintains the list of http headers sent when request is made to the server. By default
|
||||
Content-Type is set to application/json. Additionally Authorization entry is provided, which can
|
||||
be set if needed using the settings ui.
|
||||
|
@ -197,10 +212,10 @@ It is attached to the document object. Some of these can also be updated using t
|
|||
>0 : Send the latest chat history from the latest system prompt, limited to specified cnt.
|
||||
|
||||
|
||||
By using gMe's iRecentUserMsgCnt and chatRequestOptions.max_tokens one can try to control the
|
||||
implications of loading of the ai-model's context window by chat history, wrt chat response to
|
||||
some extent in a simple crude way. You may also want to control the context size enabled when
|
||||
the server loads ai-model, on the server end.
|
||||
By using gMe's iRecentUserMsgCnt and apiRequestOptions.max_tokens/n_predict one can try to control
|
||||
the implications of loading of the ai-model's context window by chat history, wrt chat response to
|
||||
some extent in a simple crude way. You may also want to control the context size enabled when the
|
||||
server loads ai-model, on the server end.
|
||||
|
||||
|
||||
Sometimes the browser may be stuborn with caching of the file, so your updates to html/css/js
|
||||
|
@ -237,12 +252,12 @@ also be started with a model context size of 1k or more, to be on safe side.
|
|||
internal n_predict, for now add the same here on the client side, maybe later add max_tokens
|
||||
to /completions endpoint handling code on server side.
|
||||
|
||||
NOTE: One may want to experiment with frequency/presence penalty fields in chatRequestOptions
|
||||
wrt the set of fields sent to server along with the user query. To check how the model behaves
|
||||
NOTE: One may want to experiment with frequency/presence penalty fields in apiRequestOptions
|
||||
wrt the set of fields sent to server along with the user query, to check how the model behaves
|
||||
wrt repeatations in general in the generated text response.
|
||||
|
||||
A end-user can change these behaviour by editing gMe from browser's devel-tool/console or by
|
||||
using the providing settings ui.
|
||||
using the provided settings ui (for settings exposed through the ui).
|
||||
|
||||
|
||||
### OpenAi / Equivalent API WebService
|
||||
|
@ -253,7 +268,7 @@ for a minimal chatting experimentation by setting the below.
|
|||
* the baseUrl in settings ui
|
||||
* https://api.openai.com/v1 or similar
|
||||
|
||||
* Wrt request body - gMe.chatRequestOptions
|
||||
* Wrt request body - gMe.apiRequestOptions
|
||||
* model (settings ui)
|
||||
* any additional fields if required in future
|
||||
|
||||
|
|
|
@ -222,8 +222,8 @@ class SimpleChat {
|
|||
* @param {Object} obj
|
||||
*/
|
||||
request_jsonstr_extend(obj) {
|
||||
for(let k in gMe.chatRequestOptions) {
|
||||
obj[k] = gMe.chatRequestOptions[k];
|
||||
for(let k in gMe.apiRequestOptions) {
|
||||
obj[k] = gMe.apiRequestOptions[k];
|
||||
}
|
||||
if (gMe.bStream) {
|
||||
obj["stream"] = true;
|
||||
|
@ -740,11 +740,12 @@ class Me {
|
|||
"Authorization": "", // Authorization: Bearer OPENAI_API_KEY
|
||||
}
|
||||
// Add needed fields wrt json object to be sent wrt LLM web services completions endpoint.
|
||||
this.chatRequestOptions = {
|
||||
this.apiRequestOptions = {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"temperature": 0.7,
|
||||
"max_tokens": 1024,
|
||||
"n_predict": 1024,
|
||||
"cache_prompt": false,
|
||||
//"frequency_penalty": 1.2,
|
||||
//"presence_penalty": 1.2,
|
||||
};
|
||||
|
@ -800,51 +801,55 @@ class Me {
|
|||
|
||||
ui.el_create_append_p(`bStream:${this.bStream}`, elDiv);
|
||||
|
||||
ui.el_create_append_p(`bTrimGarbage:${this.bTrimGarbage}`, elDiv);
|
||||
|
||||
ui.el_create_append_p(`ApiEndPoint:${this.apiEP}`, elDiv);
|
||||
|
||||
ui.el_create_append_p(`iRecentUserMsgCnt:${this.iRecentUserMsgCnt}`, elDiv);
|
||||
|
||||
ui.el_create_append_p(`bCompletionFreshChatAlways:${this.bCompletionFreshChatAlways}`, elDiv);
|
||||
|
||||
ui.el_create_append_p(`bCompletionInsertStandardRolePrefix:${this.bCompletionInsertStandardRolePrefix}`, elDiv);
|
||||
|
||||
ui.el_create_append_p(`bTrimGarbage:${this.bTrimGarbage}`, elDiv);
|
||||
|
||||
ui.el_create_append_p(`iRecentUserMsgCnt:${this.iRecentUserMsgCnt}`, elDiv);
|
||||
|
||||
ui.el_create_append_p(`ApiEndPoint:${this.apiEP}`, elDiv);
|
||||
|
||||
}
|
||||
|
||||
ui.el_create_append_p(`chatRequestOptions:${JSON.stringify(this.chatRequestOptions, null, " - ")}`, elDiv);
|
||||
ui.el_create_append_p(`apiRequestOptions:${JSON.stringify(this.apiRequestOptions, null, " - ")}`, elDiv);
|
||||
ui.el_create_append_p(`headers:${JSON.stringify(this.headers, null, " - ")}`, elDiv);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Auto create ui input elements for fields in ChatRequestOptions
|
||||
* Auto create ui input elements for fields in apiRequestOptions
|
||||
* Currently supports text and number field types.
|
||||
* @param {HTMLDivElement} elDiv
|
||||
*/
|
||||
show_settings_chatrequestoptions(elDiv) {
|
||||
show_settings_apirequestoptions(elDiv) {
|
||||
let typeDict = {
|
||||
"string": "text",
|
||||
"number": "number",
|
||||
};
|
||||
let fs = document.createElement("fieldset");
|
||||
let legend = document.createElement("legend");
|
||||
legend.innerText = "ChatRequestOptions";
|
||||
legend.innerText = "ApiRequestOptions";
|
||||
fs.appendChild(legend);
|
||||
elDiv.appendChild(fs);
|
||||
for(const k in this.chatRequestOptions) {
|
||||
let val = this.chatRequestOptions[k];
|
||||
for(const k in this.apiRequestOptions) {
|
||||
let val = this.apiRequestOptions[k];
|
||||
let type = typeof(val);
|
||||
if (!((type == "string") || (type == "number"))) {
|
||||
continue;
|
||||
if (((type == "string") || (type == "number"))) {
|
||||
let inp = ui.el_creatediv_input(`Set${k}`, k, typeDict[type], this.apiRequestOptions[k], (val)=>{
|
||||
if (type == "number") {
|
||||
val = Number(val);
|
||||
}
|
||||
this.apiRequestOptions[k] = val;
|
||||
});
|
||||
fs.appendChild(inp.div);
|
||||
} else if (type == "boolean") {
|
||||
let bbtn = ui.el_creatediv_boolbutton(`Set{k}`, k, {true: "true", false: "false"}, val, (userVal)=>{
|
||||
this.apiRequestOptions[k] = userVal;
|
||||
});
|
||||
fs.appendChild(bbtn.div);
|
||||
}
|
||||
let inp = ui.el_creatediv_input(`Set${k}`, k, typeDict[type], this.chatRequestOptions[k], (val)=>{
|
||||
if (type == "number") {
|
||||
val = Number(val);
|
||||
}
|
||||
this.chatRequestOptions[k] = val;
|
||||
});
|
||||
fs.appendChild(inp.div);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -870,6 +875,23 @@ class Me {
|
|||
});
|
||||
elDiv.appendChild(bb.div);
|
||||
|
||||
bb = ui.el_creatediv_boolbutton("SetTrimGarbage", "TrimGarbage", {true: "[+] yes trim", false: "[-] dont trim"}, this.bTrimGarbage, (val)=>{
|
||||
this.bTrimGarbage = val;
|
||||
});
|
||||
elDiv.appendChild(bb.div);
|
||||
|
||||
this.show_settings_apirequestoptions(elDiv);
|
||||
|
||||
let sel = ui.el_creatediv_select("SetApiEP", "ApiEndPoint", ApiEP.Type, this.apiEP, (val)=>{
|
||||
this.apiEP = ApiEP.Type[val];
|
||||
});
|
||||
elDiv.appendChild(sel.div);
|
||||
|
||||
sel = ui.el_creatediv_select("SetChatHistoryInCtxt", "ChatHistoryInCtxt", this.sRecentUserMsgCnt, this.iRecentUserMsgCnt, (val)=>{
|
||||
this.iRecentUserMsgCnt = this.sRecentUserMsgCnt[val];
|
||||
});
|
||||
elDiv.appendChild(sel.div);
|
||||
|
||||
bb = ui.el_creatediv_boolbutton("SetCompletionFreshChatAlways", "CompletionFreshChatAlways", {true: "[+] yes fresh", false: "[-] no, with history"}, this.bCompletionFreshChatAlways, (val)=>{
|
||||
this.bCompletionFreshChatAlways = val;
|
||||
});
|
||||
|
@ -880,23 +902,6 @@ class Me {
|
|||
});
|
||||
elDiv.appendChild(bb.div);
|
||||
|
||||
bb = ui.el_creatediv_boolbutton("SetTrimGarbage", "TrimGarbage", {true: "[+] yes trim", false: "[-] dont trim"}, this.bTrimGarbage, (val)=>{
|
||||
this.bTrimGarbage = val;
|
||||
});
|
||||
elDiv.appendChild(bb.div);
|
||||
|
||||
let sel = ui.el_creatediv_select("SetChatHistoryInCtxt", "ChatHistoryInCtxt", this.sRecentUserMsgCnt, this.iRecentUserMsgCnt, (val)=>{
|
||||
this.iRecentUserMsgCnt = this.sRecentUserMsgCnt[val];
|
||||
});
|
||||
elDiv.appendChild(sel.div);
|
||||
|
||||
sel = ui.el_creatediv_select("SetApiEP", "ApiEndPoint", ApiEP.Type, this.apiEP, (val)=>{
|
||||
this.apiEP = ApiEP.Type[val];
|
||||
});
|
||||
elDiv.appendChild(sel.div);
|
||||
|
||||
this.show_settings_chatrequestoptions(elDiv);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
BIN
examples/server/public_simplechat/simplechat_screens.webp
Normal file
BIN
examples/server/public_simplechat/simplechat_screens.webp
Normal file
Binary file not shown.
After Width: | Height: | Size: 21 KiB |
|
@ -2607,17 +2607,9 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// print sample chat example to make it clear which template is used
|
||||
{
|
||||
json chat;
|
||||
chat.push_back({{"role", "system"}, {"content", "You are a helpful assistant"}});
|
||||
chat.push_back({{"role", "user"}, {"content", "Hello"}});
|
||||
chat.push_back({{"role", "assistant"}, {"content", "Hi there"}});
|
||||
chat.push_back({{"role", "user"}, {"content", "How are you?"}});
|
||||
|
||||
const std::string chat_example = format_chat(ctx_server.model, params.chat_template, chat);
|
||||
|
||||
LOG_INFO("chat template", {
|
||||
{"chat_example", chat_example},
|
||||
{"built_in", params.chat_template.empty()},
|
||||
{"chat_example", llama_chat_format_example(ctx_server.model, params.chat_template)},
|
||||
{"built_in", params.chat_template.empty()},
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -82,7 +82,7 @@ Feature: llama.cpp server
|
|||
|
||||
Examples: Prompts
|
||||
| response_format | n_predicted | re_content |
|
||||
| {"type": "json_object", "schema": {"const": "42"}} | 5 | "42" |
|
||||
| {"type": "json_object", "schema": {"const": "42"}} | 6 | "42" |
|
||||
| {"type": "json_object", "schema": {"items": [{"type": "integer"}]}} | 10 | \[ -300 \] |
|
||||
| {"type": "json_object"} | 10 | \{ " Jacky. |
|
||||
|
||||
|
|
|
@ -118,36 +118,17 @@ static inline void server_log(const char * level, const char * function, int lin
|
|||
|
||||
// Format given chat. If tmpl is empty, we take the template from model metadata
|
||||
inline std::string format_chat(const struct llama_model * model, const std::string & tmpl, const std::vector<json> & messages) {
|
||||
size_t alloc_size = 0;
|
||||
// vector holding all allocated string to be passed to llama_chat_apply_template
|
||||
std::vector<std::string> str(messages.size() * 2);
|
||||
std::vector<llama_chat_message> chat(messages.size());
|
||||
std::vector<llama_chat_msg> chat;
|
||||
|
||||
for (size_t i = 0; i < messages.size(); ++i) {
|
||||
const auto & curr_msg = messages[i];
|
||||
str[i*2 + 0] = json_value(curr_msg, "role", std::string(""));
|
||||
str[i*2 + 1] = json_value(curr_msg, "content", std::string(""));
|
||||
alloc_size += str[i*2 + 1].length();
|
||||
chat[i].role = str[i*2 + 0].c_str();
|
||||
chat[i].content = str[i*2 + 1].c_str();
|
||||
std::string role = json_value(curr_msg, "role", std::string(""));
|
||||
std::string content = json_value(curr_msg, "content", std::string(""));
|
||||
chat.push_back({role, content});
|
||||
}
|
||||
|
||||
const char * ptr_tmpl = tmpl.empty() ? nullptr : tmpl.c_str();
|
||||
std::vector<char> buf(alloc_size * 2);
|
||||
|
||||
// run the first time to get the total output length
|
||||
int32_t res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
|
||||
|
||||
// if it turns out that our buffer is too small, we resize it
|
||||
if ((size_t) res > buf.size()) {
|
||||
buf.resize(res);
|
||||
res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
|
||||
}
|
||||
|
||||
const std::string formatted_chat(buf.data(), res);
|
||||
|
||||
auto formatted_chat = llama_chat_apply_template(model, tmpl, chat, true);
|
||||
LOG_VERBOSE("formatted_chat", {{"text", formatted_chat.c_str()}});
|
||||
|
||||
return formatted_chat;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue