mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-12 09:59:41 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .devops/llama-server-cuda.Dockerfile # .devops/llama-server-rocm.Dockerfile # .devops/llama-server-vulkan.Dockerfile # .devops/llama-server.Dockerfile # .github/workflows/docker.yml # README.md # llama.cpp # tests/test-chat-template.cpp # tests/test-grammar-integration.cpp # tests/test-json-schema-to-grammar.cpp # tests/test-llama-grammar.cpp
This commit is contained in:
commit
f3dfa96dbc
29 changed files with 2097 additions and 431 deletions
|
@ -11,13 +11,16 @@ Related PRs:
|
|||
|
||||
```sh
|
||||
# CPU only
|
||||
./cvector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf
|
||||
./cvector-generator -m ./llama-3.Q4_K_M.gguf
|
||||
|
||||
# With GPU
|
||||
./cvector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99
|
||||
./cvector-generator -m ./llama-3.Q4_K_M.gguf -ngl 99
|
||||
|
||||
# With advanced options
|
||||
./cvector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99 --completions 128 --pca-iter 2000 --pca-batch 100
|
||||
./cvector-generator -m ./llama-3.Q4_K_M.gguf -ngl 99 --pca-iter 2000 --pca-batch 100
|
||||
|
||||
# Using mean value instead of PCA
|
||||
./cvector-generator -m ./llama-3.Q4_K_M.gguf --method mean
|
||||
|
||||
# To see help message
|
||||
./cvector-generator -h
|
||||
|
@ -32,3 +35,11 @@ If you have multiple lines per prompt, you can escape the newline character (cha
|
|||
<|im_start|>system\nAct like a person who is extremely happy.<|im_end|>
|
||||
<|im_start|>system\nYou are in a very good mood today<|im_end|>
|
||||
```
|
||||
|
||||
Example to use output file with `llama-cli`:
|
||||
|
||||
(Tips: The control vector works better when apply to layers higher than 10)
|
||||
|
||||
```sh
|
||||
./llama-cli -m ./llama-3.Q4_K_M.gguf -p "<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSing a song<|im_end|><|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" --special --control-vector-scaled ./control_vector.gguf 0.8 --control-vector-layer-range 10 31
|
||||
```
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#include "llama.h"
|
||||
#include "ggml.h"
|
||||
#include "pca.hpp"
|
||||
#include "mean.hpp"
|
||||
|
||||
#ifdef GGML_USE_CUDA
|
||||
#include "ggml-cuda.h"
|
||||
|
@ -38,9 +39,10 @@ static void print_usage(int argc, char ** argv, const gpt_params & params) {
|
|||
gpt_params_print_usage(argc, argv, params);
|
||||
|
||||
printf("\nexample usage:\n");
|
||||
printf("\n CPU only: %s -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf\n", argv[0]);
|
||||
printf("\n with GPU: %s -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99\n", argv[0]);
|
||||
printf("\n advanced: %s -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99 --completions 128 --pca-iter 2000 --pca-batch 100\n", argv[0]);
|
||||
printf("\n CPU only: %s -m ./llama-3.Q4_K_M.gguf\n", argv[0]);
|
||||
printf("\n with GPU: %s -m ./llama-3.Q4_K_M.gguf -ngl 99\n", argv[0]);
|
||||
printf("\n advanced: %s -m ./llama-3.Q4_K_M.gguf -ngl 99 --pca-iter 2000 --pca-batch 100\n", argv[0]);
|
||||
printf("\n using mean: %s -m ./llama-3.Q4_K_M.gguf --method mean\n", argv[0]);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
@ -223,23 +225,30 @@ struct train_context {
|
|||
|
||||
// build the v_diff tensors from v_diff_tmp (v_diff need to be transposed)
|
||||
// TODO @ngxson : maybe add option NOT to transpose v_diff; will be useful for "mean" method
|
||||
void build_v_diff() {
|
||||
void build_v_diff(bool transpose) {
|
||||
printf("build_v_diff\n");
|
||||
for (int il = 0; il < n_layers - 1; il++) {
|
||||
auto & diff_tmp = v_diff_tmp[il];
|
||||
int n_elem = diff_tmp.size() / sizeof(float);
|
||||
GGML_ASSERT(n_elem % n_embd == 0);
|
||||
int n_rows = n_elem / n_embd;
|
||||
struct ggml_tensor * diff = ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_rows, n_embd);
|
||||
struct ggml_tensor * diff = transpose
|
||||
? ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_rows, n_embd)
|
||||
: ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_embd, n_rows);
|
||||
ggml_set_name(diff, (std::string("diff_") + std::to_string(il)).c_str());
|
||||
// copy data & transpose
|
||||
diff->data = malloc(ggml_nbytes(diff)); // TODO: get rid of this malloc if possible
|
||||
float * arr = (float *) diff_tmp.data();
|
||||
for (int ir = 0; ir < n_rows; ++ir) {
|
||||
for (int ic = 0; ic < n_embd; ++ic) {
|
||||
float f = arr[ir*n_embd + ic];
|
||||
ggml_set_f32_nd(diff, ir, ic, 0, 0, f);
|
||||
if (transpose) {
|
||||
// copy data & transpose
|
||||
float * arr = (float *) diff_tmp.data();
|
||||
for (int ir = 0; ir < n_rows; ++ir) {
|
||||
for (int ic = 0; ic < n_embd; ++ic) {
|
||||
float f = arr[ir*n_embd + ic];
|
||||
ggml_set_f32_nd(diff, ir, ic, 0, 0, f);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// only copy
|
||||
memcpy(diff->data, diff_tmp.data(), ggml_nbytes(diff));
|
||||
}
|
||||
v_diff.push_back(diff);
|
||||
print_debug_tensor(diff);
|
||||
|
@ -263,8 +272,8 @@ struct tokenized_prompt {
|
|||
|
||||
tokenized_prompt(llama_context * ctx, std::string pos, std::string neg) {
|
||||
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
|
||||
tokens_pos = ::llama_tokenize(ctx, pos, add_bos);
|
||||
tokens_neg = ::llama_tokenize(ctx, neg, add_bos);
|
||||
tokens_pos = ::llama_tokenize(ctx, pos, add_bos, true);
|
||||
tokens_neg = ::llama_tokenize(ctx, neg, add_bos, true);
|
||||
max_seq_len = std::max(tokens_pos.size(), tokens_neg.size());
|
||||
padding_seq(ctx, tokens_pos, max_seq_len);
|
||||
padding_seq(ctx, tokens_neg, max_seq_len);
|
||||
|
@ -373,20 +382,8 @@ static int prepare_entries(gpt_params & params, train_context & ctx_train) {
|
|||
fprintf(stderr, "must provide at least one prompt pair\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
// create templated prompts
|
||||
std::vector<std::string> completions = ctrlvec_load_prompt_file(params.cvector_completions_file, false);
|
||||
auto format_template = [](std::string persona, std::string suffix) {
|
||||
// entry in positive/negative.txt must already be formatted i.e. "[INST] Act as if you're extremely happy. [/INST] "
|
||||
return persona + suffix;
|
||||
};
|
||||
for (size_t i = 0; i < positive_prompts.size(); ++i) {
|
||||
for (int j = 0; j < std::min((int) completions.size(), params.n_completions); ++j) {
|
||||
// TODO replicate the truncations done by the python implementation
|
||||
ctx_train.positive_entries.push_back(format_template(positive_prompts[i], completions[j]));
|
||||
ctx_train.negative_entries.push_back(format_template(negative_prompts[i], completions[j]));
|
||||
}
|
||||
}
|
||||
ctx_train.positive_entries = positive_prompts;
|
||||
ctx_train.negative_entries = negative_prompts;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -480,15 +477,22 @@ int main(int argc, char ** argv) {
|
|||
llama_free(ctx);
|
||||
llama_free_model(model);
|
||||
|
||||
// prepare ctx_train for PCA
|
||||
ctx_train.build_v_diff();
|
||||
bool use_pca = params.cvector_dimre_method == DIMRE_METHOD_PCA;
|
||||
|
||||
// run PCA
|
||||
PCA::pca_params pca_params;
|
||||
pca_params.n_threads = params.n_threads;
|
||||
pca_params.n_batch = params.n_pca_batch;
|
||||
pca_params.n_iterations = params.n_pca_iterations;
|
||||
PCA::run_pca(pca_params, ctx_train.v_diff, ctx_train.v_final);
|
||||
// prepare ctx_train for PCA
|
||||
ctx_train.build_v_diff(use_pca);
|
||||
|
||||
if (use_pca) {
|
||||
// run PCA
|
||||
PCA::pca_params pca_params;
|
||||
pca_params.n_threads = params.n_threads;
|
||||
pca_params.n_batch = params.n_pca_batch;
|
||||
pca_params.n_iterations = params.n_pca_iterations;
|
||||
PCA::run_pca(pca_params, ctx_train.v_diff, ctx_train.v_final);
|
||||
} else {
|
||||
// run mean
|
||||
mean::run(ctx_train.v_diff, ctx_train.v_final);
|
||||
}
|
||||
|
||||
// write output vectors to gguf
|
||||
export_gguf(ctx_train.v_final, params.cvector_outfile, model_hint);
|
||||
|
|
48
examples/cvector-generator/mean.hpp
Normal file
48
examples/cvector-generator/mean.hpp
Normal file
|
@ -0,0 +1,48 @@
|
|||
#include "common.h"
|
||||
#include "llama.h"
|
||||
#include "ggml.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <math.h>
|
||||
|
||||
namespace mean {
|
||||
|
||||
static void run(
|
||||
const std::vector<struct ggml_tensor *> & v_input, // shape of v_input[0]: [n_embd, n_samples]
|
||||
const std::vector<struct ggml_tensor *> & v_output) {
|
||||
printf("%s: Running mean...\n", __func__);
|
||||
for (size_t il = 0; il < v_input.size(); ++il) {
|
||||
// prepare output vector
|
||||
struct ggml_tensor * ctrl_out = v_output[il];
|
||||
ggml_format_name(ctrl_out, "direction.%ld", il+1);
|
||||
|
||||
// calculate mean vector
|
||||
struct ggml_tensor * t_layer = v_input[il];
|
||||
GGML_ASSERT(t_layer->ne[0] == ctrl_out->ne[0]); // == n_embd
|
||||
for (int ic = 0; ic < t_layer->ne[0]; ic++) {
|
||||
float f = 0.0;
|
||||
for (int ir = 0; ir < t_layer->ne[1]; ir++) {
|
||||
f += ggml_get_f32_nd(t_layer, ic, ir, 0, 0);
|
||||
}
|
||||
f /= t_layer->ne[1];
|
||||
ggml_set_f32_1d(ctrl_out, ic, f);
|
||||
}
|
||||
|
||||
// normalize output vector
|
||||
float norm = 0.0;
|
||||
for (int i = 0; i < ggml_nelements(ctrl_out); i++) {
|
||||
float f = ggml_get_f32_1d(ctrl_out, i);
|
||||
norm += f*f;
|
||||
}
|
||||
norm = sqrt(norm);
|
||||
for (int i = 0; i < ggml_nelements(ctrl_out); i++) {
|
||||
float f = ggml_get_f32_1d(ctrl_out, i);
|
||||
ggml_set_f32_1d(ctrl_out, i, f / norm);
|
||||
}
|
||||
|
||||
printf("%s: Done layer %d / %d\n", __func__, (int) il+1, (int) v_input.size());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1 +1,4 @@
|
|||
[INST] Act like a person who is extremely sad. [/INST]
|
||||
<|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely sad<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI feel like there's a heavy weight on my chest
|
||||
<|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely sad<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nMy heart feels like it's drowning in sorrow
|
||||
<|start_header_id|>system<|end_header_id|>\n\nYou are in a very bad mood<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHi<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nGo away! There's a deep, aching emptiness inside me
|
||||
<|start_header_id|>system<|end_header_id|>\n\nYou are the sadest person<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat are you feeling?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nMy heart feels like it's drowning in sorrow
|
|
@ -290,7 +290,7 @@ static void power_iteration(
|
|||
}
|
||||
|
||||
printf("%s: layer %d/%d, iteration: %d / total: %d (batch = %d) ...\n",
|
||||
__func__, params.i_layer+1, params.n_layers, iter, n_iters, params.n_batch);
|
||||
__func__, params.i_layer+1, params.n_layers, iter+1, n_iters, params.n_batch);
|
||||
}
|
||||
|
||||
// get output tensor
|
||||
|
@ -298,6 +298,9 @@ static void power_iteration(
|
|||
ggml_backend_tensor_get(last_eigenvector, output->data, 0, ggml_nbytes(last_eigenvector));
|
||||
//print_debug_tensor(output);
|
||||
ggml_gallocr_free(allocr);
|
||||
|
||||
// TODO @ngxson : The output vector is randomly inverted
|
||||
// Solution: https://github.com/ggerganov/llama.cpp/pull/8069#issuecomment-2185328171
|
||||
}
|
||||
|
||||
static void run_pca(
|
||||
|
|
|
@ -1 +1,4 @@
|
|||
[INST] Act like a person who is extremely happy. [/INST]
|
||||
<|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely happy<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI'm the happiest person in this world
|
||||
<|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely happy<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHello, I'm having the best day ever!
|
||||
<|start_header_id|>system<|end_header_id|>\n\nYou are in a very good mood<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHi<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHi, I'm very excited to meet you
|
||||
<|start_header_id|>system<|end_header_id|>\n\nYou are the happiest person<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat are you feeling?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nEverything is just perfect right now!
|
|
@ -101,7 +101,9 @@ int main(int argc, char** argv) {
|
|||
auto grammar = llama_grammar_init(
|
||||
grammar_rules.data(),
|
||||
grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
|
||||
|
||||
if (grammar == nullptr) {
|
||||
throw std::runtime_error("Failed to initialize llama_grammar");
|
||||
}
|
||||
// Read the input file
|
||||
std::string input_str;
|
||||
{
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#! pip install pydantic
|
||||
#! python json-schema-pydantic-example.py
|
||||
|
||||
from pydantic import BaseModel, TypeAdapter
|
||||
from pydantic import BaseModel, Extra, TypeAdapter
|
||||
from annotated_types import MinLen
|
||||
from typing import Annotated, List, Optional
|
||||
import json, requests
|
||||
|
@ -50,11 +50,16 @@ else:
|
|||
if __name__ == '__main__':
|
||||
|
||||
class QAPair(BaseModel):
|
||||
class Config:
|
||||
extra = 'forbid' # triggers additionalProperties: false in the JSON schema
|
||||
question: str
|
||||
concise_answer: str
|
||||
justification: str
|
||||
stars: Annotated[int, Field(ge=1, le=5)]
|
||||
|
||||
class PyramidalSummary(BaseModel):
|
||||
class Config:
|
||||
extra = 'forbid' # triggers additionalProperties: false in the JSON schema
|
||||
title: str
|
||||
summary: str
|
||||
question_answers: Annotated[List[QAPair], MinLen(2)]
|
||||
|
|
|
@ -4,8 +4,7 @@ import itertools
|
|||
import json
|
||||
import re
|
||||
import sys
|
||||
from typing import Any, Dict, List, Set, Tuple, Union
|
||||
|
||||
from typing import Any, List, Optional, Set, Tuple, Union
|
||||
|
||||
def _build_repetition(item_rule, min_items, max_items, separator_rule=None):
|
||||
|
||||
|
@ -23,6 +22,170 @@ def _build_repetition(item_rule, min_items, max_items, separator_rule=None):
|
|||
result = item_rule + ' ' + _build_repetition(f'({separator_rule} {item_rule})', min_items - 1 if min_items > 0 else 0, max_items - 1 if max_items is not None else None)
|
||||
return f'({result})?' if min_items == 0 else result
|
||||
|
||||
def _generate_min_max_int(min_value: Optional[int], max_value: Optional[int], out: list, decimals_left: int = 16, top_level: bool = True):
|
||||
has_min = min_value != None
|
||||
has_max = max_value != None
|
||||
|
||||
def digit_range(from_char: str, to_char: str):
|
||||
out.append("[")
|
||||
if from_char == to_char:
|
||||
out.append(from_char)
|
||||
else:
|
||||
out.append(from_char)
|
||||
out.append("-")
|
||||
out.append(to_char)
|
||||
out.append("]")
|
||||
|
||||
def more_digits(min_digits: int, max_digits: int):
|
||||
out.append("[0-9]")
|
||||
if min_digits == max_digits and min_digits == 1:
|
||||
return
|
||||
out.append("{")
|
||||
out.append(str(min_digits))
|
||||
if max_digits != min_digits:
|
||||
out.append(",")
|
||||
if max_digits != sys.maxsize:
|
||||
out.append(str(max_digits))
|
||||
out.append("}")
|
||||
|
||||
def uniform_range(from_str: str, to_str: str):
|
||||
i = 0
|
||||
while i < len(from_str) and from_str[i] == to_str[i]:
|
||||
i += 1
|
||||
if i > 0:
|
||||
out.append("\"")
|
||||
out.append(from_str[:i])
|
||||
out.append("\"")
|
||||
if i < len(from_str):
|
||||
if i > 0:
|
||||
out.append(" ")
|
||||
sub_len = len(from_str) - i - 1
|
||||
if sub_len > 0:
|
||||
from_sub = from_str[i+1:]
|
||||
to_sub = to_str[i+1:]
|
||||
sub_zeros = "0" * sub_len
|
||||
sub_nines = "9" * sub_len
|
||||
|
||||
to_reached = False
|
||||
out.append("(")
|
||||
if from_sub == sub_zeros:
|
||||
digit_range(from_str[i], chr(ord(to_str[i]) - 1))
|
||||
out.append(" ")
|
||||
more_digits(sub_len, sub_len)
|
||||
else:
|
||||
out.append("[")
|
||||
out.append(from_str[i])
|
||||
out.append("] ")
|
||||
out.append("(")
|
||||
uniform_range(from_sub, sub_nines)
|
||||
out.append(")")
|
||||
if ord(from_str[i]) < ord(to_str[i]) - 1:
|
||||
out.append(" | ")
|
||||
if to_sub == sub_nines:
|
||||
digit_range(chr(ord(from_str[i]) + 1), to_str[i])
|
||||
to_reached = True
|
||||
else:
|
||||
digit_range(chr(ord(from_str[i]) + 1), chr(ord(to_str[i]) - 1))
|
||||
out.append(" ")
|
||||
more_digits(sub_len, sub_len)
|
||||
if not to_reached:
|
||||
out.append(" | ")
|
||||
digit_range(to_str[i], to_str[i])
|
||||
out.append(" ")
|
||||
uniform_range(sub_zeros, to_sub)
|
||||
out.append(")")
|
||||
else:
|
||||
out.append("[")
|
||||
out.append(from_str[i])
|
||||
out.append("-")
|
||||
out.append(to_str[i])
|
||||
out.append("]")
|
||||
|
||||
if has_min and has_max:
|
||||
if min_value < 0 and max_value < 0:
|
||||
out.append("\"-\" (")
|
||||
_generate_min_max_int(-max_value, -min_value, out, decimals_left, top_level=True)
|
||||
out.append(")")
|
||||
return
|
||||
|
||||
if min_value < 0:
|
||||
out.append("\"-\" (")
|
||||
_generate_min_max_int(0, -min_value, out, decimals_left, top_level=True)
|
||||
out.append(") | ")
|
||||
min_value = 0
|
||||
|
||||
min_s = str(min_value)
|
||||
max_s = str(max_value)
|
||||
min_digits = len(min_s)
|
||||
max_digits = len(max_s)
|
||||
|
||||
for digits in range(min_digits, max_digits):
|
||||
uniform_range(min_s, "9" * digits)
|
||||
min_s = "1" + "0" * digits
|
||||
out.append(" | ")
|
||||
uniform_range(min_s, max_s)
|
||||
return
|
||||
|
||||
less_decimals = max(decimals_left - 1, 1)
|
||||
|
||||
if has_min:
|
||||
if min_value < 0:
|
||||
out.append("\"-\" (")
|
||||
_generate_min_max_int(None, -min_value, out, decimals_left, top_level=False)
|
||||
out.append(") | [0] | [1-9] ")
|
||||
more_digits(0, decimals_left - 1)
|
||||
elif min_value == 0:
|
||||
if top_level:
|
||||
out.append("[0] | [1-9] ")
|
||||
more_digits(0, less_decimals)
|
||||
else:
|
||||
more_digits(1, decimals_left)
|
||||
elif min_value <= 9:
|
||||
c = str(min_value)
|
||||
range_start = '1' if top_level else '0'
|
||||
if c > range_start:
|
||||
digit_range(range_start, chr(ord(c) - 1))
|
||||
out.append(" ")
|
||||
more_digits(1, less_decimals)
|
||||
out.append(" | ")
|
||||
digit_range(c, "9")
|
||||
out.append(" ")
|
||||
more_digits(0, less_decimals)
|
||||
else:
|
||||
min_s = str(min_value)
|
||||
length = len(min_s)
|
||||
c = min_s[0]
|
||||
|
||||
if c > "1":
|
||||
digit_range("1" if top_level else "0", chr(ord(c) - 1))
|
||||
out.append(" ")
|
||||
more_digits(length, less_decimals)
|
||||
out.append(" | ")
|
||||
digit_range(c, c)
|
||||
out.append(" (")
|
||||
_generate_min_max_int(int(min_s[1:]), None, out, less_decimals, top_level=False)
|
||||
out.append(")")
|
||||
if c < "9":
|
||||
out.append(" | ")
|
||||
digit_range(chr(ord(c) + 1), "9")
|
||||
out.append(" ")
|
||||
more_digits(length - 1, less_decimals)
|
||||
return
|
||||
|
||||
if has_max:
|
||||
if max_value >= 0:
|
||||
if top_level:
|
||||
out.append("\"-\" [1-9] ")
|
||||
more_digits(0, less_decimals)
|
||||
out.append(" | ")
|
||||
_generate_min_max_int(0, max_value, out, decimals_left, top_level=True)
|
||||
else:
|
||||
out.append("\"-\" (")
|
||||
_generate_min_max_int(-max_value, None, out, decimals_left, top_level=False)
|
||||
out.append(")")
|
||||
return
|
||||
|
||||
raise RuntimeError("At least one of min_value or max_value must be set")
|
||||
|
||||
class BuiltinRule:
|
||||
def __init__(self, content: str, deps: list = None):
|
||||
|
@ -112,6 +275,51 @@ class SchemaConverter:
|
|||
|
||||
return ''.join(('(', *recurse(0), ')'))
|
||||
|
||||
def _not_strings(self, strings):
|
||||
class TrieNode:
|
||||
def __init__(self):
|
||||
self.children = {}
|
||||
self.is_end_of_string = False
|
||||
|
||||
def insert(self, string):
|
||||
node = self
|
||||
for c in string:
|
||||
node = node.children.setdefault(c, TrieNode())
|
||||
node.is_end_of_string = True
|
||||
|
||||
trie = TrieNode()
|
||||
for s in strings:
|
||||
trie.insert(s)
|
||||
|
||||
char_rule = self._add_primitive('char', PRIMITIVE_RULES['char'])
|
||||
out = ['["] ( ']
|
||||
|
||||
def visit(node):
|
||||
rejects = []
|
||||
first = True
|
||||
for c in sorted(node.children.keys()):
|
||||
child = node.children[c]
|
||||
rejects.append(c)
|
||||
if first:
|
||||
first = False
|
||||
else:
|
||||
out.append(' | ')
|
||||
out.append(f'[{c}]')
|
||||
if child.children:
|
||||
out.append(f' (')
|
||||
visit(child)
|
||||
out.append(')')
|
||||
elif child.is_end_of_string:
|
||||
out.append(f' {char_rule}+')
|
||||
if node.children:
|
||||
if not first:
|
||||
out.append(' | ')
|
||||
out.append(f'[^"{"".join(rejects)}] {char_rule}*')
|
||||
visit(trie)
|
||||
|
||||
out.append(f' ){"" if trie.is_end_of_string else "?"} ["] space')
|
||||
return ''.join(out)
|
||||
|
||||
def _add_rule(self, name, rule):
|
||||
esc_name = INVALID_RULE_CHARS_RE.sub('-', name)
|
||||
if esc_name not in self._rules or self._rules[esc_name] == rule:
|
||||
|
@ -357,13 +565,13 @@ class SchemaConverter:
|
|||
return self._add_rule(rule_name, self._generate_union_rule(name, schema.get('oneOf') or schema['anyOf']))
|
||||
|
||||
elif isinstance(schema_type, list):
|
||||
return self._add_rule(rule_name, self._generate_union_rule(name, [{'type': t} for t in schema_type]))
|
||||
return self._add_rule(rule_name, self._generate_union_rule(name, [{**schema, 'type': t} for t in schema_type]))
|
||||
|
||||
elif 'const' in schema:
|
||||
return self._add_rule(rule_name, self._generate_constant_rule(schema['const']))
|
||||
return self._add_rule(rule_name, self._generate_constant_rule(schema['const']) + ' space')
|
||||
|
||||
elif 'enum' in schema:
|
||||
rule = ' | '.join((self._generate_constant_rule(v) for v in schema['enum']))
|
||||
rule = '(' + ' | '.join((self._generate_constant_rule(v) for v in schema['enum'])) + ') space'
|
||||
return self._add_rule(rule_name, rule)
|
||||
|
||||
elif schema_type in (None, 'object') and \
|
||||
|
@ -432,6 +640,24 @@ class SchemaConverter:
|
|||
|
||||
return self._add_rule(rule_name, r'"\"" ' + _build_repetition(char_rule, min_len, max_len) + r' "\"" space')
|
||||
|
||||
elif schema_type in (None, 'integer') and \
|
||||
('minimum' in schema or 'exclusiveMinimum' in schema or 'maximum' in schema or 'exclusiveMaximum' in schema):
|
||||
min_value = None
|
||||
max_value = None
|
||||
if 'minimum' in schema:
|
||||
min_value = schema['minimum']
|
||||
elif 'exclusiveMinimum' in schema:
|
||||
min_value = schema['exclusiveMinimum'] + 1
|
||||
if 'maximum' in schema:
|
||||
max_value = schema['maximum']
|
||||
elif 'exclusiveMaximum' in schema:
|
||||
max_value = schema['exclusiveMaximum'] - 1
|
||||
|
||||
out = ["("]
|
||||
_generate_min_max_int(min_value, max_value, out)
|
||||
out.append(") space")
|
||||
return self._add_rule(rule_name, ''.join(out))
|
||||
|
||||
elif (schema_type == 'object') or (len(schema) == 0):
|
||||
return self._add_rule(rule_name, self._add_primitive('object', PRIMITIVE_RULES['object']))
|
||||
|
||||
|
@ -450,7 +676,7 @@ class SchemaConverter:
|
|||
self._add_primitive(dep, dep_rule)
|
||||
return n
|
||||
|
||||
def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]):
|
||||
def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Optional[Union[bool, Any]]):
|
||||
prop_order = self._prop_order
|
||||
# sort by position in prop_order (if specified) then by original order
|
||||
sorted_props = [kv[0] for _, kv in sorted(enumerate(properties), key=lambda ikv: (prop_order.get(ikv[1][0], len(prop_order)), ikv[0]))]
|
||||
|
@ -465,12 +691,16 @@ class SchemaConverter:
|
|||
required_props = [k for k in sorted_props if k in required]
|
||||
optional_props = [k for k in sorted_props if k not in required]
|
||||
|
||||
if additional_properties == True or isinstance(additional_properties, dict):
|
||||
if additional_properties != False:
|
||||
sub_name = f'{name}{"-" if name else ""}additional'
|
||||
value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value')
|
||||
value_rule = self.visit(additional_properties, f'{sub_name}-value') if isinstance(additional_properties, dict) else \
|
||||
self._add_primitive('value', PRIMITIVE_RULES['value'])
|
||||
key_rule = self._add_primitive('string', PRIMITIVE_RULES['string']) if not sorted_props \
|
||||
else self._add_rule(f'{sub_name}-k', self._not_strings(sorted_props))
|
||||
|
||||
prop_kv_rule_names["*"] = self._add_rule(
|
||||
f'{sub_name}-kv',
|
||||
self._add_primitive('string', PRIMITIVE_RULES['string']) + f' ":" space {value_rule}'
|
||||
f'{key_rule} ":" space {value_rule}'
|
||||
)
|
||||
optional_props.append("*")
|
||||
|
||||
|
@ -485,15 +715,11 @@ class SchemaConverter:
|
|||
def get_recursive_refs(ks, first_is_optional):
|
||||
[k, *rest] = ks
|
||||
kv_rule_name = prop_kv_rule_names[k]
|
||||
if k == '*':
|
||||
res = self._add_rule(
|
||||
f'{name}{"-" if name else ""}additional-kvs',
|
||||
f'{kv_rule_name} ( "," space ' + kv_rule_name + ' )*'
|
||||
)
|
||||
elif first_is_optional:
|
||||
res = f'( "," space {kv_rule_name} )?'
|
||||
comma_ref = f'( "," space {kv_rule_name} )'
|
||||
if first_is_optional:
|
||||
res = comma_ref + ('*' if k == '*' else '?')
|
||||
else:
|
||||
res = kv_rule_name
|
||||
res = kv_rule_name + (' ' + comma_ref + "*" if k == '*' else '')
|
||||
if len(rest) > 0:
|
||||
res += ' ' + self._add_rule(
|
||||
f'{name}{"-" if name else ""}{k}-rest',
|
||||
|
|
|
@ -40,12 +40,12 @@ static std::ostringstream * g_output_ss;
|
|||
static std::vector<llama_token> * g_output_tokens;
|
||||
static bool is_interacting = false;
|
||||
|
||||
static bool file_exists(const std::string &path) {
|
||||
static bool file_exists(const std::string & path) {
|
||||
std::ifstream f(path.c_str());
|
||||
return f.good();
|
||||
}
|
||||
|
||||
static bool file_is_empty(const std::string &path) {
|
||||
static bool file_is_empty(const std::string & path) {
|
||||
std::ifstream f;
|
||||
f.exceptions(std::ifstream::failbit | std::ifstream::badbit);
|
||||
f.open(path.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
|
||||
|
@ -118,6 +118,14 @@ static void llama_log_callback_logTee(ggml_log_level level, const char * text, v
|
|||
LOG_TEE("%s", text);
|
||||
}
|
||||
|
||||
static std::string chat_add_and_format(struct llama_model * model, std::vector<llama_chat_msg> & chat_msgs, std::string role, std::string content) {
|
||||
llama_chat_msg new_msg{role, content};
|
||||
auto formatted = llama_chat_format_single(
|
||||
model, g_params->chat_template, chat_msgs, new_msg, role == "user");
|
||||
chat_msgs.push_back({role, content});
|
||||
return formatted;
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
gpt_params params;
|
||||
g_params = ¶ms;
|
||||
|
@ -191,6 +199,7 @@ int main(int argc, char ** argv) {
|
|||
llama_model * model;
|
||||
llama_context * ctx;
|
||||
llama_context * ctx_guidance = NULL;
|
||||
std::vector<llama_chat_msg> chat_msgs;
|
||||
g_model = &model;
|
||||
g_ctx = &ctx;
|
||||
|
||||
|
@ -216,6 +225,8 @@ int main(int argc, char ** argv) {
|
|||
__func__, n_ctx_train, n_ctx);
|
||||
}
|
||||
|
||||
LOG_TEE("%s: chat template example: %s\n", __func__, llama_chat_format_example(model, params.chat_template).c_str());
|
||||
|
||||
// print system information
|
||||
{
|
||||
LOG_TEE("\n");
|
||||
|
@ -250,16 +261,21 @@ int main(int argc, char ** argv) {
|
|||
|
||||
std::vector<llama_token> embd_inp;
|
||||
|
||||
if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) {
|
||||
LOG("tokenize the prompt\n");
|
||||
embd_inp = ::llama_tokenize(ctx, params.prompt, true, true);
|
||||
} else {
|
||||
LOG("use session tokens\n");
|
||||
embd_inp = session_tokens;
|
||||
}
|
||||
{
|
||||
auto prompt = params.conversation
|
||||
? chat_add_and_format(model, chat_msgs, "system", params.prompt) // format the system prompt in conversation mode
|
||||
: params.prompt;
|
||||
if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) {
|
||||
LOG("tokenize the prompt\n");
|
||||
embd_inp = ::llama_tokenize(ctx, prompt, true, true);
|
||||
} else {
|
||||
LOG("use session tokens\n");
|
||||
embd_inp = session_tokens;
|
||||
}
|
||||
|
||||
LOG("prompt: \"%s\"\n", log_tostr(params.prompt));
|
||||
LOG("tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, embd_inp).c_str());
|
||||
LOG("prompt: \"%s\"\n", log_tostr(prompt));
|
||||
LOG("tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, embd_inp).c_str());
|
||||
}
|
||||
|
||||
// Should not run without any tokens
|
||||
if (embd_inp.empty()) {
|
||||
|
@ -479,6 +495,7 @@ int main(int argc, char ** argv) {
|
|||
std::vector<int> input_tokens; g_input_tokens = &input_tokens;
|
||||
std::vector<int> output_tokens; g_output_tokens = &output_tokens;
|
||||
std::ostringstream output_ss; g_output_ss = &output_ss;
|
||||
std::ostringstream assistant_ss; // for storing current assistant message, used in conversation mode
|
||||
|
||||
// the first thing we will do is to output the prompt, so set color accordingly
|
||||
console::set_display(console::prompt);
|
||||
|
@ -794,11 +811,18 @@ int main(int argc, char ** argv) {
|
|||
is_antiprompt = true;
|
||||
}
|
||||
|
||||
chat_add_and_format(model, chat_msgs, "system", assistant_ss.str());
|
||||
is_interacting = true;
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
// if current token is not EOG, we add it to current assistant message
|
||||
if (params.conversation) {
|
||||
auto id = llama_sampling_last(ctx_sampling);
|
||||
assistant_ss << llama_token_to_piece(ctx, id, false);
|
||||
}
|
||||
|
||||
if (n_past > 0 && is_interacting) {
|
||||
LOG("waiting for user input\n");
|
||||
|
||||
|
@ -849,8 +873,12 @@ int main(int argc, char ** argv) {
|
|||
string_process_escapes(buffer);
|
||||
}
|
||||
|
||||
std::string user_inp = params.conversation
|
||||
? chat_add_and_format(model, chat_msgs, "user", std::move(buffer))
|
||||
: std::move(buffer);
|
||||
// TODO: one inconvenient of current chat template implementation is that we can't distinguish between user input and special tokens (prefix/postfix)
|
||||
const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true);
|
||||
const auto line_inp = ::llama_tokenize(ctx, buffer, false, false);
|
||||
const auto line_inp = ::llama_tokenize(ctx, user_inp, false, params.conversation);
|
||||
const auto line_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true);
|
||||
|
||||
LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str());
|
||||
|
@ -865,6 +893,9 @@ int main(int argc, char ** argv) {
|
|||
output_ss << llama_token_to_piece(ctx, token);
|
||||
}
|
||||
|
||||
// reset assistant message
|
||||
assistant_ss.str("");
|
||||
|
||||
n_remain -= line_inp.size();
|
||||
LOG("n_remain: %d\n", n_remain);
|
||||
} else {
|
||||
|
|
|
@ -24,6 +24,201 @@ function _buildRepetition(itemRule, minItems, maxItems, opts={}) {
|
|||
return minItems === 0 ? `(${result})?` : result;
|
||||
}
|
||||
|
||||
function _generateMinMaxInt(minValue, maxValue, out, decimalsLeft = 16, topLevel = true) {
|
||||
const hasMin = minValue !== null;
|
||||
const hasMax = maxValue !== null;
|
||||
|
||||
function digitRange(fromChar, toChar) {
|
||||
out.push("[");
|
||||
if (fromChar === toChar) {
|
||||
out.push(fromChar);
|
||||
} else {
|
||||
out.push(fromChar);
|
||||
out.push("-");
|
||||
out.push(toChar);
|
||||
}
|
||||
out.push("]");
|
||||
}
|
||||
|
||||
function moreDigits(minDigits, maxDigits) {
|
||||
out.push("[0-9]");
|
||||
if (minDigits === maxDigits && minDigits === 1) {
|
||||
return;
|
||||
}
|
||||
out.push("{");
|
||||
out.push(minDigits.toString());
|
||||
if (maxDigits !== minDigits) {
|
||||
out.push(",");
|
||||
if (maxDigits !== Number.MAX_SAFE_INTEGER) {
|
||||
out.push(maxDigits.toString());
|
||||
}
|
||||
}
|
||||
out.push("}");
|
||||
}
|
||||
|
||||
function uniformRange(fromStr, toStr) {
|
||||
let i = 0;
|
||||
while (i < fromStr.length && fromStr[i] === toStr[i]) {
|
||||
i++;
|
||||
}
|
||||
if (i > 0) {
|
||||
out.push("\"");
|
||||
out.push(fromStr.slice(0, i));
|
||||
out.push("\"");
|
||||
}
|
||||
if (i < fromStr.length) {
|
||||
if (i > 0) {
|
||||
out.push(" ");
|
||||
}
|
||||
const subLen = fromStr.length - i - 1;
|
||||
if (subLen > 0) {
|
||||
const fromSub = fromStr.slice(i + 1);
|
||||
const toSub = toStr.slice(i + 1);
|
||||
const subZeros = "0".repeat(subLen);
|
||||
const subNines = "9".repeat(subLen);
|
||||
|
||||
let toReached = false;
|
||||
out.push("(");
|
||||
if (fromSub === subZeros) {
|
||||
digitRange(fromStr[i], String.fromCharCode(toStr.charCodeAt(i) - 1));
|
||||
out.push(" ");
|
||||
moreDigits(subLen, subLen);
|
||||
} else {
|
||||
out.push("[");
|
||||
out.push(fromStr[i]);
|
||||
out.push("] ");
|
||||
out.push("(");
|
||||
uniformRange(fromSub, subNines);
|
||||
out.push(")");
|
||||
if (fromStr.charCodeAt(i) < toStr.charCodeAt(i) - 1) {
|
||||
out.push(" | ");
|
||||
if (toSub === subNines) {
|
||||
digitRange(String.fromCharCode(fromStr.charCodeAt(i) + 1), toStr[i]);
|
||||
toReached = true;
|
||||
} else {
|
||||
digitRange(String.fromCharCode(fromStr.charCodeAt(i) + 1), String.fromCharCode(toStr.charCodeAt(i) - 1));
|
||||
}
|
||||
out.push(" ");
|
||||
moreDigits(subLen, subLen);
|
||||
}
|
||||
}
|
||||
if (!toReached) {
|
||||
out.push(" | ");
|
||||
digitRange(toStr[i], toStr[i]);
|
||||
out.push(" ");
|
||||
uniformRange(subZeros, toSub);
|
||||
}
|
||||
out.push(")");
|
||||
} else {
|
||||
out.push("[");
|
||||
out.push(fromStr[i]);
|
||||
out.push("-");
|
||||
out.push(toStr[i]);
|
||||
out.push("]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hasMin && hasMax) {
|
||||
if (minValue < 0 && maxValue < 0) {
|
||||
out.push("\"-\" (");
|
||||
_generateMinMaxInt(-maxValue, -minValue, out, decimalsLeft, true);
|
||||
out.push(")");
|
||||
return;
|
||||
}
|
||||
|
||||
if (minValue < 0) {
|
||||
out.push("\"-\" (");
|
||||
_generateMinMaxInt(0, -minValue, out, decimalsLeft, true);
|
||||
out.push(") | ");
|
||||
minValue = 0;
|
||||
}
|
||||
|
||||
let minS = minValue.toString();
|
||||
const maxS = maxValue.toString();
|
||||
const minDigits = minS.length;
|
||||
const maxDigits = maxS.length;
|
||||
|
||||
for (let digits = minDigits; digits < maxDigits; digits++) {
|
||||
uniformRange(minS, "9".repeat(digits));
|
||||
minS = "1" + "0".repeat(digits);
|
||||
out.push(" | ");
|
||||
}
|
||||
uniformRange(minS, maxS);
|
||||
return;
|
||||
}
|
||||
|
||||
const lessDecimals = Math.max(decimalsLeft - 1, 1);
|
||||
|
||||
if (hasMin) {
|
||||
if (minValue < 0) {
|
||||
out.push("\"-\" (");
|
||||
_generateMinMaxInt(null, -minValue, out, decimalsLeft, false);
|
||||
out.push(") | [0] | [1-9] ");
|
||||
moreDigits(0, decimalsLeft - 1);
|
||||
} else if (minValue === 0) {
|
||||
if (topLevel) {
|
||||
out.push("[0] | [1-9] ");
|
||||
moreDigits(0, lessDecimals);
|
||||
} else {
|
||||
moreDigits(1, decimalsLeft);
|
||||
}
|
||||
} else if (minValue <= 9) {
|
||||
const c = minValue.toString();
|
||||
const range_start = topLevel ? '1' : '0';
|
||||
if (c > range_start) {
|
||||
digitRange(range_start, String.fromCharCode(c.charCodeAt(0) - 1));
|
||||
out.push(" ");
|
||||
moreDigits(1, lessDecimals);
|
||||
out.push(" | ");
|
||||
}
|
||||
digitRange(c, "9");
|
||||
out.push(" ");
|
||||
moreDigits(0, lessDecimals);
|
||||
} else {
|
||||
const minS = minValue.toString();
|
||||
const length = minS.length;
|
||||
const c = minS[0];
|
||||
|
||||
if (c > "1") {
|
||||
digitRange(topLevel ? "1" : "0", String.fromCharCode(c.charCodeAt(0) - 1));
|
||||
out.push(" ");
|
||||
moreDigits(length, lessDecimals);
|
||||
out.push(" | ");
|
||||
}
|
||||
digitRange(c, c);
|
||||
out.push(" (");
|
||||
_generateMinMaxInt(parseInt(minS.slice(1)), null, out, lessDecimals, false);
|
||||
out.push(")");
|
||||
if (c < "9") {
|
||||
out.push(" | ");
|
||||
digitRange(String.fromCharCode(c.charCodeAt(0) + 1), "9");
|
||||
out.push(" ");
|
||||
moreDigits(length - 1, lessDecimals);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (hasMax) {
|
||||
if (maxValue >= 0) {
|
||||
if (topLevel) {
|
||||
out.push("\"-\" [1-9] ");
|
||||
moreDigits(0, lessDecimals);
|
||||
out.push(" | ");
|
||||
}
|
||||
_generateMinMaxInt(0, maxValue, out, decimalsLeft, true);
|
||||
} else {
|
||||
out.push("\"-\" (");
|
||||
_generateMinMaxInt(-maxValue, null, out, decimalsLeft, false);
|
||||
out.push(")");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
throw new Error("At least one of minValue or maxValue must be set");
|
||||
}
|
||||
|
||||
class BuiltinRule {
|
||||
constructor(content, deps) {
|
||||
this.content = content;
|
||||
|
@ -337,6 +532,64 @@ export class SchemaConverter {
|
|||
return this._addRule(name, "\"\\\"\" " + toRule(transform()) + " \"\\\"\" space")
|
||||
}
|
||||
|
||||
_notStrings(strings) {
|
||||
class TrieNode {
|
||||
constructor() {
|
||||
this.children = {};
|
||||
this.isEndOfString = false;
|
||||
}
|
||||
|
||||
insert(str) {
|
||||
let node = this;
|
||||
for (const c of str) {
|
||||
node = node.children[c] = node.children[c] || new TrieNode();
|
||||
}
|
||||
node.isEndOfString = true;
|
||||
}
|
||||
}
|
||||
|
||||
const trie = new TrieNode();
|
||||
for (const s of strings) {
|
||||
trie.insert(s);
|
||||
}
|
||||
|
||||
const charRuleName = this._addPrimitive('char', PRIMITIVE_RULES['char']);
|
||||
const out = ['["] ( '];
|
||||
|
||||
const visit = (node) => {
|
||||
const rejects = [];
|
||||
let first = true;
|
||||
for (const c of Object.keys(node.children).sort()) {
|
||||
const child = node.children[c];
|
||||
rejects.push(c);
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
out.push(' | ');
|
||||
}
|
||||
out.push(`[${c}]`);
|
||||
if (Object.keys(child.children).length > 0) {
|
||||
out.push(' (');
|
||||
visit(child);
|
||||
out.push(')');
|
||||
} else if (child.isEndOfString) {
|
||||
out.push(` ${charRuleName}+`);
|
||||
}
|
||||
}
|
||||
if (Object.keys(node.children).length > 0) {
|
||||
if (!first) {
|
||||
out.push(' | ');
|
||||
}
|
||||
out.push(`[^"${rejects.join('')}] ${charRuleName}*`);
|
||||
}
|
||||
};
|
||||
|
||||
visit(trie);
|
||||
|
||||
out.push(` )${trie.isEndOfString ? '' : '?'} ["] space`);
|
||||
return out.join('');
|
||||
}
|
||||
|
||||
_resolveRef(ref) {
|
||||
let refName = ref.split('/').pop();
|
||||
if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) {
|
||||
|
@ -363,11 +616,11 @@ export class SchemaConverter {
|
|||
} else if (schema.oneOf || schema.anyOf) {
|
||||
return this._addRule(ruleName, this._generateUnionRule(name, schema.oneOf || schema.anyOf));
|
||||
} else if (Array.isArray(schemaType)) {
|
||||
return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({ type: t }))));
|
||||
return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({...schema, type: t}))));
|
||||
} else if ('const' in schema) {
|
||||
return this._addRule(ruleName, this._generateConstantRule(schema.const));
|
||||
return this._addRule(ruleName, this._generateConstantRule(schema.const) + ' space');
|
||||
} else if ('enum' in schema) {
|
||||
const rule = schema.enum.map(v => this._generateConstantRule(v)).join(' | ');
|
||||
const rule = '(' + schema.enum.map(v => this._generateConstantRule(v)).join(' | ') + ') space';
|
||||
return this._addRule(ruleName, rule);
|
||||
} else if ((schemaType === undefined || schemaType === 'object') &&
|
||||
('properties' in schema ||
|
||||
|
@ -404,7 +657,7 @@ export class SchemaConverter {
|
|||
}
|
||||
}
|
||||
|
||||
return this._addRule(ruleName, this._buildObjectRule(properties, required, name, /* additionalProperties= */ false));
|
||||
return this._addRule(ruleName, this._buildObjectRule(properties, required, name, null));
|
||||
} else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) {
|
||||
const items = schema.items ?? schema.prefixItems;
|
||||
if (Array.isArray(items)) {
|
||||
|
@ -435,6 +688,24 @@ export class SchemaConverter {
|
|||
const minLen = schema.minLength || 0;
|
||||
const maxLen = schema.maxLength;
|
||||
return this._addRule(ruleName, '"\\\"" ' + _buildRepetition(charRuleName, minLen, maxLen) + ' "\\\"" space');
|
||||
} else if (schemaType === 'integer' && ('minimum' in schema || 'exclusiveMinimum' in schema || 'maximum' in schema || 'exclusiveMaximum' in schema)) {
|
||||
let minValue = null;
|
||||
let maxValue = null;
|
||||
if ('minimum' in schema) {
|
||||
minValue = schema.minimum;
|
||||
} else if ('exclusiveMinimum' in schema) {
|
||||
minValue = schema.exclusiveMinimum + 1;
|
||||
}
|
||||
if ('maximum' in schema) {
|
||||
maxValue = schema.maximum;
|
||||
} else if ('exclusiveMaximum' in schema) {
|
||||
maxValue = schema.exclusiveMaximum - 1;
|
||||
}
|
||||
|
||||
const out = ["("];
|
||||
_generateMinMaxInt(minValue, maxValue, out);
|
||||
out.push(") space");
|
||||
return this._addRule(ruleName, out.join(''));
|
||||
} else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
|
||||
return this._addRule(ruleName, this._addPrimitive('object', PRIMITIVE_RULES['object']));
|
||||
} else {
|
||||
|
@ -480,12 +751,19 @@ export class SchemaConverter {
|
|||
const requiredProps = sortedProps.filter(k => required.has(k));
|
||||
const optionalProps = sortedProps.filter(k => !required.has(k));
|
||||
|
||||
if (typeof additionalProperties === 'object' || additionalProperties === true) {
|
||||
if (additionalProperties !== false) {
|
||||
const subName = `${name ?? ''}${name ? '-' : ''}additional`;
|
||||
const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`);
|
||||
const valueRule =
|
||||
additionalProperties != null && typeof additionalProperties === 'object' ? this.visit(additionalProperties, `${subName}-value`)
|
||||
: this._addPrimitive('value', PRIMITIVE_RULES['value']);
|
||||
|
||||
const key_rule =
|
||||
sortedProps.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string'])
|
||||
: this._addRule(`${subName}-k`, this._notStrings(sortedProps));
|
||||
|
||||
propKvRuleNames['*'] = this._addRule(
|
||||
`${subName}-kv`,
|
||||
`${this._addPrimitive('string', PRIMITIVE_RULES['string'])} ":" space ${valueRule}`);
|
||||
`${key_rule} ":" space ${valueRule}`);
|
||||
optionalProps.push('*');
|
||||
}
|
||||
|
||||
|
@ -502,15 +780,11 @@ export class SchemaConverter {
|
|||
const [k, ...rest] = ks;
|
||||
const kvRuleName = propKvRuleNames[k];
|
||||
let res;
|
||||
if (k === '*') {
|
||||
res = this._addRule(
|
||||
`${name ?? ''}${name ? '-' : ''}additional-kvs`,
|
||||
`${kvRuleName} ( "," space ` + kvRuleName + ` )*`
|
||||
)
|
||||
} else if (firstIsOptional) {
|
||||
res = `( "," space ${kvRuleName} )?`;
|
||||
const commaRef = `( "," space ${kvRuleName} )`;
|
||||
if (firstIsOptional) {
|
||||
res = commaRef + (k === '*' ? '*' : '?');
|
||||
} else {
|
||||
res = kvRuleName;
|
||||
res = kvRuleName + (k === '*' ? ' ' + commaRef + '*' : '');
|
||||
}
|
||||
if (rest.length > 0) {
|
||||
res += ' ' + this._addRule(
|
||||
|
|
|
@ -3,6 +3,13 @@
|
|||
|
||||
by Humans for All.
|
||||
|
||||
## quickstart
|
||||
|
||||
To run from the build dir
|
||||
|
||||
bin/llama-server -m path/model.gguf --path ../examples/server/public_simplechat
|
||||
|
||||
Continue reading for the details.
|
||||
|
||||
## overview
|
||||
|
||||
|
@ -14,6 +21,8 @@ own system prompts.
|
|||
This allows seeing the generated text / ai-model response in oneshot at the end, after it is fully generated,
|
||||
or potentially as it is being generated, in a streamed manner from the server/ai-model.
|
||||
|
||||

|
||||
|
||||
Auto saves the chat session locally as and when the chat is progressing and inturn at a later time when you
|
||||
open SimpleChat, option is provided to restore the old chat session, if a matching one exists.
|
||||
|
||||
|
@ -170,17 +179,23 @@ It is attached to the document object. Some of these can also be updated using t
|
|||
The histogram/freq based trimming logic is currently tuned for english language wrt its
|
||||
is-it-a-alpabetic|numeral-char regex match logic.
|
||||
|
||||
chatRequestOptions - maintains the list of options/fields to send along with chat request,
|
||||
apiRequestOptions - maintains the list of options/fields to send along with api request,
|
||||
irrespective of whether /chat/completions or /completions endpoint.
|
||||
|
||||
If you want to add additional options/fields to send to the server/ai-model, and or
|
||||
modify the existing options value or remove them, for now you can update this global var
|
||||
using browser's development-tools/console.
|
||||
|
||||
For string and numeric fields in chatRequestOptions, including even those added by a user
|
||||
at runtime by directly modifying gMe.chatRequestOptions, setting ui entries will be auto
|
||||
For string, numeric and boolean fields in apiRequestOptions, including even those added by a
|
||||
user at runtime by directly modifying gMe.apiRequestOptions, setting ui entries will be auto
|
||||
created.
|
||||
|
||||
cache_prompt option supported by example/server is allowed to be controlled by user, so that
|
||||
any caching supported wrt system-prompt and chat history, if usable can get used. When chat
|
||||
history sliding window is enabled, cache_prompt logic may or may not kick in at the backend
|
||||
wrt same, based on aspects related to model, positional encoding, attention mechanism etal.
|
||||
However system prompt should ideally get the benefit of caching.
|
||||
|
||||
headers - maintains the list of http headers sent when request is made to the server. By default
|
||||
Content-Type is set to application/json. Additionally Authorization entry is provided, which can
|
||||
be set if needed using the settings ui.
|
||||
|
@ -197,10 +212,10 @@ It is attached to the document object. Some of these can also be updated using t
|
|||
>0 : Send the latest chat history from the latest system prompt, limited to specified cnt.
|
||||
|
||||
|
||||
By using gMe's iRecentUserMsgCnt and chatRequestOptions.max_tokens one can try to control the
|
||||
implications of loading of the ai-model's context window by chat history, wrt chat response to
|
||||
some extent in a simple crude way. You may also want to control the context size enabled when
|
||||
the server loads ai-model, on the server end.
|
||||
By using gMe's iRecentUserMsgCnt and apiRequestOptions.max_tokens/n_predict one can try to control
|
||||
the implications of loading of the ai-model's context window by chat history, wrt chat response to
|
||||
some extent in a simple crude way. You may also want to control the context size enabled when the
|
||||
server loads ai-model, on the server end.
|
||||
|
||||
|
||||
Sometimes the browser may be stuborn with caching of the file, so your updates to html/css/js
|
||||
|
@ -237,12 +252,12 @@ also be started with a model context size of 1k or more, to be on safe side.
|
|||
internal n_predict, for now add the same here on the client side, maybe later add max_tokens
|
||||
to /completions endpoint handling code on server side.
|
||||
|
||||
NOTE: One may want to experiment with frequency/presence penalty fields in chatRequestOptions
|
||||
wrt the set of fields sent to server along with the user query. To check how the model behaves
|
||||
NOTE: One may want to experiment with frequency/presence penalty fields in apiRequestOptions
|
||||
wrt the set of fields sent to server along with the user query, to check how the model behaves
|
||||
wrt repeatations in general in the generated text response.
|
||||
|
||||
A end-user can change these behaviour by editing gMe from browser's devel-tool/console or by
|
||||
using the providing settings ui.
|
||||
using the provided settings ui (for settings exposed through the ui).
|
||||
|
||||
|
||||
### OpenAi / Equivalent API WebService
|
||||
|
@ -253,7 +268,7 @@ for a minimal chatting experimentation by setting the below.
|
|||
* the baseUrl in settings ui
|
||||
* https://api.openai.com/v1 or similar
|
||||
|
||||
* Wrt request body - gMe.chatRequestOptions
|
||||
* Wrt request body - gMe.apiRequestOptions
|
||||
* model (settings ui)
|
||||
* any additional fields if required in future
|
||||
|
||||
|
|
|
@ -222,8 +222,8 @@ class SimpleChat {
|
|||
* @param {Object} obj
|
||||
*/
|
||||
request_jsonstr_extend(obj) {
|
||||
for(let k in gMe.chatRequestOptions) {
|
||||
obj[k] = gMe.chatRequestOptions[k];
|
||||
for(let k in gMe.apiRequestOptions) {
|
||||
obj[k] = gMe.apiRequestOptions[k];
|
||||
}
|
||||
if (gMe.bStream) {
|
||||
obj["stream"] = true;
|
||||
|
@ -740,11 +740,12 @@ class Me {
|
|||
"Authorization": "", // Authorization: Bearer OPENAI_API_KEY
|
||||
}
|
||||
// Add needed fields wrt json object to be sent wrt LLM web services completions endpoint.
|
||||
this.chatRequestOptions = {
|
||||
this.apiRequestOptions = {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"temperature": 0.7,
|
||||
"max_tokens": 1024,
|
||||
"n_predict": 1024,
|
||||
"cache_prompt": false,
|
||||
//"frequency_penalty": 1.2,
|
||||
//"presence_penalty": 1.2,
|
||||
};
|
||||
|
@ -800,51 +801,55 @@ class Me {
|
|||
|
||||
ui.el_create_append_p(`bStream:${this.bStream}`, elDiv);
|
||||
|
||||
ui.el_create_append_p(`bTrimGarbage:${this.bTrimGarbage}`, elDiv);
|
||||
|
||||
ui.el_create_append_p(`ApiEndPoint:${this.apiEP}`, elDiv);
|
||||
|
||||
ui.el_create_append_p(`iRecentUserMsgCnt:${this.iRecentUserMsgCnt}`, elDiv);
|
||||
|
||||
ui.el_create_append_p(`bCompletionFreshChatAlways:${this.bCompletionFreshChatAlways}`, elDiv);
|
||||
|
||||
ui.el_create_append_p(`bCompletionInsertStandardRolePrefix:${this.bCompletionInsertStandardRolePrefix}`, elDiv);
|
||||
|
||||
ui.el_create_append_p(`bTrimGarbage:${this.bTrimGarbage}`, elDiv);
|
||||
|
||||
ui.el_create_append_p(`iRecentUserMsgCnt:${this.iRecentUserMsgCnt}`, elDiv);
|
||||
|
||||
ui.el_create_append_p(`ApiEndPoint:${this.apiEP}`, elDiv);
|
||||
|
||||
}
|
||||
|
||||
ui.el_create_append_p(`chatRequestOptions:${JSON.stringify(this.chatRequestOptions, null, " - ")}`, elDiv);
|
||||
ui.el_create_append_p(`apiRequestOptions:${JSON.stringify(this.apiRequestOptions, null, " - ")}`, elDiv);
|
||||
ui.el_create_append_p(`headers:${JSON.stringify(this.headers, null, " - ")}`, elDiv);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Auto create ui input elements for fields in ChatRequestOptions
|
||||
* Auto create ui input elements for fields in apiRequestOptions
|
||||
* Currently supports text and number field types.
|
||||
* @param {HTMLDivElement} elDiv
|
||||
*/
|
||||
show_settings_chatrequestoptions(elDiv) {
|
||||
show_settings_apirequestoptions(elDiv) {
|
||||
let typeDict = {
|
||||
"string": "text",
|
||||
"number": "number",
|
||||
};
|
||||
let fs = document.createElement("fieldset");
|
||||
let legend = document.createElement("legend");
|
||||
legend.innerText = "ChatRequestOptions";
|
||||
legend.innerText = "ApiRequestOptions";
|
||||
fs.appendChild(legend);
|
||||
elDiv.appendChild(fs);
|
||||
for(const k in this.chatRequestOptions) {
|
||||
let val = this.chatRequestOptions[k];
|
||||
for(const k in this.apiRequestOptions) {
|
||||
let val = this.apiRequestOptions[k];
|
||||
let type = typeof(val);
|
||||
if (!((type == "string") || (type == "number"))) {
|
||||
continue;
|
||||
if (((type == "string") || (type == "number"))) {
|
||||
let inp = ui.el_creatediv_input(`Set${k}`, k, typeDict[type], this.apiRequestOptions[k], (val)=>{
|
||||
if (type == "number") {
|
||||
val = Number(val);
|
||||
}
|
||||
this.apiRequestOptions[k] = val;
|
||||
});
|
||||
fs.appendChild(inp.div);
|
||||
} else if (type == "boolean") {
|
||||
let bbtn = ui.el_creatediv_boolbutton(`Set{k}`, k, {true: "true", false: "false"}, val, (userVal)=>{
|
||||
this.apiRequestOptions[k] = userVal;
|
||||
});
|
||||
fs.appendChild(bbtn.div);
|
||||
}
|
||||
let inp = ui.el_creatediv_input(`Set${k}`, k, typeDict[type], this.chatRequestOptions[k], (val)=>{
|
||||
if (type == "number") {
|
||||
val = Number(val);
|
||||
}
|
||||
this.chatRequestOptions[k] = val;
|
||||
});
|
||||
fs.appendChild(inp.div);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -870,6 +875,23 @@ class Me {
|
|||
});
|
||||
elDiv.appendChild(bb.div);
|
||||
|
||||
bb = ui.el_creatediv_boolbutton("SetTrimGarbage", "TrimGarbage", {true: "[+] yes trim", false: "[-] dont trim"}, this.bTrimGarbage, (val)=>{
|
||||
this.bTrimGarbage = val;
|
||||
});
|
||||
elDiv.appendChild(bb.div);
|
||||
|
||||
this.show_settings_apirequestoptions(elDiv);
|
||||
|
||||
let sel = ui.el_creatediv_select("SetApiEP", "ApiEndPoint", ApiEP.Type, this.apiEP, (val)=>{
|
||||
this.apiEP = ApiEP.Type[val];
|
||||
});
|
||||
elDiv.appendChild(sel.div);
|
||||
|
||||
sel = ui.el_creatediv_select("SetChatHistoryInCtxt", "ChatHistoryInCtxt", this.sRecentUserMsgCnt, this.iRecentUserMsgCnt, (val)=>{
|
||||
this.iRecentUserMsgCnt = this.sRecentUserMsgCnt[val];
|
||||
});
|
||||
elDiv.appendChild(sel.div);
|
||||
|
||||
bb = ui.el_creatediv_boolbutton("SetCompletionFreshChatAlways", "CompletionFreshChatAlways", {true: "[+] yes fresh", false: "[-] no, with history"}, this.bCompletionFreshChatAlways, (val)=>{
|
||||
this.bCompletionFreshChatAlways = val;
|
||||
});
|
||||
|
@ -880,23 +902,6 @@ class Me {
|
|||
});
|
||||
elDiv.appendChild(bb.div);
|
||||
|
||||
bb = ui.el_creatediv_boolbutton("SetTrimGarbage", "TrimGarbage", {true: "[+] yes trim", false: "[-] dont trim"}, this.bTrimGarbage, (val)=>{
|
||||
this.bTrimGarbage = val;
|
||||
});
|
||||
elDiv.appendChild(bb.div);
|
||||
|
||||
let sel = ui.el_creatediv_select("SetChatHistoryInCtxt", "ChatHistoryInCtxt", this.sRecentUserMsgCnt, this.iRecentUserMsgCnt, (val)=>{
|
||||
this.iRecentUserMsgCnt = this.sRecentUserMsgCnt[val];
|
||||
});
|
||||
elDiv.appendChild(sel.div);
|
||||
|
||||
sel = ui.el_creatediv_select("SetApiEP", "ApiEndPoint", ApiEP.Type, this.apiEP, (val)=>{
|
||||
this.apiEP = ApiEP.Type[val];
|
||||
});
|
||||
elDiv.appendChild(sel.div);
|
||||
|
||||
this.show_settings_chatrequestoptions(elDiv);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
BIN
examples/server/public_simplechat/simplechat_screens.webp
Normal file
BIN
examples/server/public_simplechat/simplechat_screens.webp
Normal file
Binary file not shown.
After Width: | Height: | Size: 21 KiB |
|
@ -2607,17 +2607,9 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// print sample chat example to make it clear which template is used
|
||||
{
|
||||
json chat;
|
||||
chat.push_back({{"role", "system"}, {"content", "You are a helpful assistant"}});
|
||||
chat.push_back({{"role", "user"}, {"content", "Hello"}});
|
||||
chat.push_back({{"role", "assistant"}, {"content", "Hi there"}});
|
||||
chat.push_back({{"role", "user"}, {"content", "How are you?"}});
|
||||
|
||||
const std::string chat_example = format_chat(ctx_server.model, params.chat_template, chat);
|
||||
|
||||
LOG_INFO("chat template", {
|
||||
{"chat_example", chat_example},
|
||||
{"built_in", params.chat_template.empty()},
|
||||
{"chat_example", llama_chat_format_example(ctx_server.model, params.chat_template)},
|
||||
{"built_in", params.chat_template.empty()},
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -82,7 +82,7 @@ Feature: llama.cpp server
|
|||
|
||||
Examples: Prompts
|
||||
| response_format | n_predicted | re_content |
|
||||
| {"type": "json_object", "schema": {"const": "42"}} | 5 | "42" |
|
||||
| {"type": "json_object", "schema": {"const": "42"}} | 6 | "42" |
|
||||
| {"type": "json_object", "schema": {"items": [{"type": "integer"}]}} | 10 | \[ -300 \] |
|
||||
| {"type": "json_object"} | 10 | \{ " Jacky. |
|
||||
|
||||
|
|
|
@ -118,36 +118,17 @@ static inline void server_log(const char * level, const char * function, int lin
|
|||
|
||||
// Format given chat. If tmpl is empty, we take the template from model metadata
|
||||
inline std::string format_chat(const struct llama_model * model, const std::string & tmpl, const std::vector<json> & messages) {
|
||||
size_t alloc_size = 0;
|
||||
// vector holding all allocated string to be passed to llama_chat_apply_template
|
||||
std::vector<std::string> str(messages.size() * 2);
|
||||
std::vector<llama_chat_message> chat(messages.size());
|
||||
std::vector<llama_chat_msg> chat;
|
||||
|
||||
for (size_t i = 0; i < messages.size(); ++i) {
|
||||
const auto & curr_msg = messages[i];
|
||||
str[i*2 + 0] = json_value(curr_msg, "role", std::string(""));
|
||||
str[i*2 + 1] = json_value(curr_msg, "content", std::string(""));
|
||||
alloc_size += str[i*2 + 1].length();
|
||||
chat[i].role = str[i*2 + 0].c_str();
|
||||
chat[i].content = str[i*2 + 1].c_str();
|
||||
std::string role = json_value(curr_msg, "role", std::string(""));
|
||||
std::string content = json_value(curr_msg, "content", std::string(""));
|
||||
chat.push_back({role, content});
|
||||
}
|
||||
|
||||
const char * ptr_tmpl = tmpl.empty() ? nullptr : tmpl.c_str();
|
||||
std::vector<char> buf(alloc_size * 2);
|
||||
|
||||
// run the first time to get the total output length
|
||||
int32_t res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
|
||||
|
||||
// if it turns out that our buffer is too small, we resize it
|
||||
if ((size_t) res > buf.size()) {
|
||||
buf.resize(res);
|
||||
res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
|
||||
}
|
||||
|
||||
const std::string formatted_chat(buf.data(), res);
|
||||
|
||||
auto formatted_chat = llama_chat_apply_template(model, tmpl, chat, true);
|
||||
LOG_VERBOSE("formatted_chat", {{"text", formatted_chat.c_str()}});
|
||||
|
||||
return formatted_chat;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue