mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-12 01:54:37 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .devops/llama-server-cuda.Dockerfile # .devops/llama-server-rocm.Dockerfile # .devops/llama-server-vulkan.Dockerfile # .devops/llama-server.Dockerfile # .github/workflows/docker.yml # README.md # llama.cpp # tests/test-chat-template.cpp # tests/test-grammar-integration.cpp # tests/test-json-schema-to-grammar.cpp # tests/test-llama-grammar.cpp
This commit is contained in:
commit
f3dfa96dbc
29 changed files with 2097 additions and 431 deletions
|
@ -1264,11 +1264,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
|||
return true;
|
||||
}
|
||||
// cvector params
|
||||
if (arg == "--completions-file") {
|
||||
CHECK_ARG
|
||||
params.cvector_completions_file = argv[i];
|
||||
return true;
|
||||
}
|
||||
if (arg == "--positive-file") {
|
||||
CHECK_ARG
|
||||
params.cvector_positive_file = argv[i];
|
||||
|
@ -1279,11 +1274,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
|||
params.cvector_negative_file = argv[i];
|
||||
return true;
|
||||
}
|
||||
if (arg == "--completions") {
|
||||
CHECK_ARG
|
||||
params.n_completions = std::stoi(argv[i]);
|
||||
return true;
|
||||
}
|
||||
if (arg == "--pca-batch") {
|
||||
CHECK_ARG
|
||||
params.n_pca_batch = std::stoi(argv[i]);
|
||||
|
@ -1294,6 +1284,14 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
|||
params.n_pca_iterations = std::stoi(argv[i]);
|
||||
return true;
|
||||
}
|
||||
if (arg == "--method") {
|
||||
CHECK_ARG
|
||||
std::string value(argv[i]);
|
||||
/**/ if (value == "pca") { params.cvector_dimre_method = DIMRE_METHOD_PCA; }
|
||||
else if (value == "mean") { params.cvector_dimre_method = DIMRE_METHOD_MEAN; }
|
||||
else { invalid_param = true; }
|
||||
return true;
|
||||
}
|
||||
#ifndef LOG_DISABLE_LOGS
|
||||
// Parse args for logging parameters
|
||||
if (log_param_single_parse(argv[i])) {
|
||||
|
@ -1445,7 +1443,10 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
|
|||
options.push_back({ "main", " --cfg-negative-prompt-file FNAME",
|
||||
"negative prompt file to use for guidance" });
|
||||
options.push_back({ "main", " --cfg-scale N", "strength of guidance (default: %.1f, 1.0 = disable)", (double)sparams.cfg_scale });
|
||||
|
||||
options.push_back({ "main", " --chat-template JINJA_TEMPLATE",
|
||||
"set custom jinja chat template (default: template taken from model's metadata)\n"
|
||||
"only commonly used templates are accepted:\n"
|
||||
"https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template" });
|
||||
options.push_back({ "grammar" });
|
||||
options.push_back({ "*", " --grammar GRAMMAR", "BNF-like grammar to constrain generations (see samples in grammars/ dir) (default: '%s')", sparams.grammar.c_str() });
|
||||
options.push_back({ "*", " --grammar-file FNAME", "file to read grammar from" });
|
||||
|
@ -1624,11 +1625,9 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
|
|||
options.push_back({ "cvector", "-o, --output FNAME", "output file (default: '%s')", params.cvector_outfile.c_str() });
|
||||
options.push_back({ "cvector", " --positive-file FNAME", "positive prompts file, one prompt per line (default: '%s')", params.cvector_positive_file.c_str() });
|
||||
options.push_back({ "cvector", " --negative-file FNAME", "negative prompts file, one prompt per line (default: '%s')", params.cvector_negative_file.c_str() });
|
||||
options.push_back({ "cvector", " --completions-file FNAME",
|
||||
"completions file (default: '%s')", params.cvector_completions_file.c_str() });
|
||||
options.push_back({ "cvector", " --completions N", "number of lines of completions file to use (default: %d)", params.n_completions });
|
||||
options.push_back({ "cvector", " --pca-batch N", "batch size used for PCA. Larger batch runs faster, but uses more memory (default: %d)", params.n_pca_batch });
|
||||
options.push_back({ "cvector", " --pca-iter N", "number of iterations used for PCA (default: %d)", params.n_pca_iterations });
|
||||
options.push_back({ "cvector", " --method {pca,mean}", "dimensionality reduction method to be used (default: pca)" });
|
||||
|
||||
printf("usage: %s [options]\n", argv[0]);
|
||||
|
||||
|
@ -2605,12 +2604,67 @@ bool llama_should_add_bos_token(const llama_model * model) {
|
|||
return add_bos != -1 ? bool(add_bos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
|
||||
}
|
||||
|
||||
//
|
||||
// Chat template utils
|
||||
//
|
||||
|
||||
bool llama_chat_verify_template(const std::string & tmpl) {
|
||||
llama_chat_message chat[] = {{"user", "test"}};
|
||||
int res = llama_chat_apply_template(nullptr, tmpl.c_str(), chat, 1, true, nullptr, 0);
|
||||
return res >= 0;
|
||||
}
|
||||
|
||||
std::string llama_chat_apply_template(const struct llama_model * model,
|
||||
const std::string & tmpl,
|
||||
const std::vector<llama_chat_msg> & msgs,
|
||||
bool add_ass) {
|
||||
int alloc_size = 0;
|
||||
std::vector<llama_chat_message> chat;
|
||||
for (auto & msg : msgs) {
|
||||
chat.push_back({msg.role.c_str(), msg.content.c_str()});
|
||||
alloc_size += (msg.role.size() + msg.content.size()) * 1.25;
|
||||
}
|
||||
|
||||
const char * ptr_tmpl = tmpl.empty() ? nullptr : tmpl.c_str();
|
||||
std::vector<char> buf(alloc_size);
|
||||
|
||||
// run the first time to get the total output length
|
||||
int32_t res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), add_ass, buf.data(), buf.size());
|
||||
|
||||
// if it turns out that our buffer is too small, we resize it
|
||||
if ((size_t) res > buf.size()) {
|
||||
buf.resize(res);
|
||||
res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), add_ass, buf.data(), buf.size());
|
||||
}
|
||||
|
||||
std::string formatted_chat(buf.data(), res);
|
||||
return formatted_chat;
|
||||
}
|
||||
|
||||
std::string llama_chat_format_single(const struct llama_model * model,
|
||||
const std::string & tmpl,
|
||||
const std::vector<llama_chat_msg> & past_msg,
|
||||
const llama_chat_msg & new_msg,
|
||||
bool add_ass) {
|
||||
auto fmt_past_msg = llama_chat_apply_template(model, tmpl, past_msg, false);
|
||||
std::vector<llama_chat_msg> chat_new(past_msg);
|
||||
chat_new.push_back(new_msg);
|
||||
auto fmt_new_msg = llama_chat_apply_template(model, tmpl, chat_new, add_ass);
|
||||
auto formatted = fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size());
|
||||
return formatted;
|
||||
}
|
||||
|
||||
std::string llama_chat_format_example(const struct llama_model * model,
|
||||
const std::string & tmpl) {
|
||||
std::vector<llama_chat_msg> msgs = {
|
||||
{"system", "You are a helpful assistant"},
|
||||
{"user", "Hello"},
|
||||
{"assistant", "Hi there"},
|
||||
{"user", "How are you?"},
|
||||
};
|
||||
return llama_chat_apply_template(model, tmpl, msgs, true);
|
||||
}
|
||||
|
||||
//
|
||||
// KV cache utils
|
||||
//
|
||||
|
|
|
@ -48,6 +48,12 @@ int32_t cpu_get_num_math();
|
|||
// CLI argument parsing
|
||||
//
|
||||
|
||||
// dimensionality reduction methods, used by cvector-generator
|
||||
enum dimre_method {
|
||||
DIMRE_METHOD_PCA,
|
||||
DIMRE_METHOD_MEAN,
|
||||
};
|
||||
|
||||
struct gpt_params {
|
||||
uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
|
||||
|
||||
|
@ -255,13 +261,12 @@ struct gpt_params {
|
|||
bool compute_ppl = true; // whether to compute perplexity
|
||||
|
||||
// cvector-generator params
|
||||
int n_completions = 64;
|
||||
int n_pca_batch = 20;
|
||||
int n_pca_batch = 100;
|
||||
int n_pca_iterations = 1000;
|
||||
std::string cvector_outfile = "control_vector.gguf";
|
||||
std::string cvector_completions_file = "examples/cvector-generator/completions.txt";
|
||||
std::string cvector_positive_file = "examples/cvector-generator/positive.txt";
|
||||
std::string cvector_negative_file = "examples/cvector-generator/negative.txt";
|
||||
dimre_method cvector_dimre_method = DIMRE_METHOD_PCA;
|
||||
std::string cvector_outfile = "control_vector.gguf";
|
||||
std::string cvector_positive_file = "examples/cvector-generator/positive.txt";
|
||||
std::string cvector_negative_file = "examples/cvector-generator/negative.txt";
|
||||
};
|
||||
|
||||
void gpt_params_handle_model_default(gpt_params & params);
|
||||
|
@ -382,9 +387,32 @@ bool llama_should_add_bos_token(const llama_model * model);
|
|||
// Chat template utils
|
||||
//
|
||||
|
||||
// same with llama_chat_message, but uses std::string
|
||||
struct llama_chat_msg {
|
||||
std::string role;
|
||||
std::string content;
|
||||
};
|
||||
|
||||
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
|
||||
bool llama_chat_verify_template(const std::string & tmpl);
|
||||
|
||||
// CPP wrapper for llama_chat_apply_template
|
||||
std::string llama_chat_apply_template(const struct llama_model * model,
|
||||
const std::string & tmpl,
|
||||
const std::vector<llama_chat_msg> & chat,
|
||||
bool add_ass);
|
||||
|
||||
// Format single message, while taking into account the position of that message in chat history
|
||||
std::string llama_chat_format_single(const struct llama_model * model,
|
||||
const std::string & tmpl,
|
||||
const std::vector<llama_chat_msg> & past_msg,
|
||||
const llama_chat_msg & new_msg,
|
||||
bool add_ass);
|
||||
|
||||
// Returns an example of formatted chat
|
||||
std::string llama_chat_format_example(const struct llama_model * model,
|
||||
const std::string & tmpl);
|
||||
|
||||
//
|
||||
// KV cache utils
|
||||
//
|
||||
|
|
|
@ -40,6 +40,233 @@ static std::string build_repetition(const std::string & item_rule, int min_items
|
|||
return result;
|
||||
}
|
||||
|
||||
/* Minimalistic replacement for std::string_view, which is only available from C++17 onwards */
|
||||
class string_view {
|
||||
const std::string & _str;
|
||||
const size_t _start;
|
||||
const size_t _end;
|
||||
public:
|
||||
string_view(const std::string & str, size_t start = 0, size_t end = std::string::npos) : _str(str), _start(start), _end(end == std::string::npos ? str.length() : end) {}
|
||||
|
||||
size_t size() const {
|
||||
return _end - _start;
|
||||
}
|
||||
|
||||
size_t length() const {
|
||||
return size();
|
||||
}
|
||||
|
||||
operator std::string() const {
|
||||
return str();
|
||||
}
|
||||
|
||||
std::string str() const {
|
||||
return _str.substr(_start, _end - _start);
|
||||
}
|
||||
|
||||
string_view substr(size_t pos, size_t len = std::string::npos) const {
|
||||
return string_view(_str, _start + pos, len == std::string::npos ? _end : _start + pos + len);
|
||||
}
|
||||
|
||||
char operator[](size_t pos) const {
|
||||
auto index = _start + pos;
|
||||
if (index >= _end) {
|
||||
throw std::out_of_range("string_view index out of range");
|
||||
}
|
||||
return _str[_start + pos];
|
||||
}
|
||||
|
||||
bool operator==(const string_view & other) const {
|
||||
std::string this_str = *this;
|
||||
std::string other_str = other;
|
||||
return this_str == other_str;
|
||||
}
|
||||
};
|
||||
|
||||
static void _build_min_max_int(int min_value, int max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
|
||||
auto has_min = min_value != std::numeric_limits<int>::min();
|
||||
auto has_max = max_value != std::numeric_limits<int>::max();
|
||||
|
||||
auto digit_range = [&](char from, char to) {
|
||||
out << "[";
|
||||
if (from == to) {
|
||||
out << from;
|
||||
} else {
|
||||
out << from << "-" << to;
|
||||
}
|
||||
out << "]";
|
||||
};
|
||||
auto more_digits = [&](int min_digits, int max_digits) {
|
||||
out << "[0-9]";
|
||||
if (min_digits == max_digits && min_digits == 1) {
|
||||
return;
|
||||
}
|
||||
out << "{";
|
||||
out << min_digits;
|
||||
if (max_digits != min_digits) {
|
||||
out << ",";
|
||||
if (max_digits != std::numeric_limits<int>::max()) {
|
||||
out << max_digits;
|
||||
}
|
||||
}
|
||||
out << "}";
|
||||
};
|
||||
std::function<void(const string_view &, const string_view &)> uniform_range =
|
||||
[&](const string_view & from, const string_view & to) {
|
||||
size_t i = 0;
|
||||
while (i < from.length() && i < to.length() && from[i] == to[i]) {
|
||||
i++;
|
||||
}
|
||||
if (i > 0) {
|
||||
out << "\"" << from.substr(0, i).str() << "\"";
|
||||
}
|
||||
if (i < from.length() && i < to.length()) {
|
||||
if (i > 0) {
|
||||
out << " ";
|
||||
}
|
||||
auto sub_len = from.length() - i - 1;
|
||||
if (sub_len > 0) {
|
||||
auto from_sub = from.substr(i + 1);
|
||||
auto to_sub = to.substr(i + 1);
|
||||
auto sub_zeros = repeat("0", sub_len);
|
||||
auto sub_nines = repeat("9", sub_len);
|
||||
|
||||
auto to_reached = false;
|
||||
out << "(";
|
||||
if (from_sub == sub_zeros) {
|
||||
digit_range(from[i], to[i] - 1);
|
||||
out << " ";
|
||||
more_digits(sub_len, sub_len);
|
||||
} else {
|
||||
out << "[" << from[i] << "] ";
|
||||
out << "(";
|
||||
uniform_range(from_sub, sub_nines);
|
||||
out << ")";
|
||||
if (from[i] < to[i] - 1) {
|
||||
out << " | ";
|
||||
if (to_sub == sub_nines) {
|
||||
digit_range(from[i] + 1, to[i]);
|
||||
to_reached = true;
|
||||
} else {
|
||||
digit_range(from[i] + 1, to[i] - 1);
|
||||
}
|
||||
out << " ";
|
||||
more_digits(sub_len, sub_len);
|
||||
}
|
||||
}
|
||||
if (!to_reached) {
|
||||
out << " | ";
|
||||
digit_range(to[i], to[i]);
|
||||
out << " ";
|
||||
uniform_range(sub_zeros, to_sub);
|
||||
}
|
||||
out << ")";
|
||||
} else {
|
||||
out << "[" << from[i] << "-" << to[i] << "]";
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if (has_min && has_max) {
|
||||
if (min_value < 0 && max_value < 0) {
|
||||
out << "\"-\" (";
|
||||
_build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true);
|
||||
out << ")";
|
||||
return;
|
||||
}
|
||||
|
||||
if (min_value < 0) {
|
||||
out << "\"-\" (";
|
||||
_build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true);
|
||||
out << ") | ";
|
||||
min_value = 0;
|
||||
}
|
||||
|
||||
auto min_s = std::to_string(min_value);
|
||||
auto max_s = std::to_string(max_value);
|
||||
auto min_digits = min_s.length();
|
||||
auto max_digits = max_s.length();
|
||||
|
||||
for (auto digits = min_digits; digits < max_digits; digits++) {
|
||||
uniform_range(min_s, repeat("9", digits));
|
||||
min_s = "1" + repeat("0", digits);
|
||||
out << " | ";
|
||||
}
|
||||
uniform_range(min_s, max_s);
|
||||
return;
|
||||
}
|
||||
|
||||
auto less_decimals = std::max(decimals_left - 1, 1);
|
||||
|
||||
if (has_min) {
|
||||
if (min_value < 0) {
|
||||
out << "\"-\" (";
|
||||
_build_min_max_int(std::numeric_limits<int>::min(), -min_value, out, decimals_left, /* top_level= */ false);
|
||||
out << ") | [0] | [1-9] ";
|
||||
more_digits(0, decimals_left - 1);
|
||||
} else if (min_value == 0) {
|
||||
if (top_level) {
|
||||
out << "[0] | [1-9] ";
|
||||
more_digits(0, less_decimals);
|
||||
} else {
|
||||
more_digits(1, decimals_left);
|
||||
}
|
||||
} else if (min_value <= 9) {
|
||||
char c = '0' + min_value;
|
||||
auto range_start = top_level ? '1' : '0';
|
||||
if (c > range_start) {
|
||||
digit_range(range_start, c - 1);
|
||||
out << " ";
|
||||
more_digits(1, less_decimals);
|
||||
out << " | ";
|
||||
}
|
||||
digit_range(c, '9');
|
||||
out << " ";
|
||||
more_digits(0, less_decimals);
|
||||
} else {
|
||||
auto min_s = std::to_string(min_value);
|
||||
auto len = min_s.length();
|
||||
auto c = min_s[0];
|
||||
|
||||
if (c > '1') {
|
||||
digit_range(top_level ? '1' : '0', c - 1);
|
||||
out << " ";
|
||||
more_digits(len, less_decimals);
|
||||
out << " | ";
|
||||
}
|
||||
digit_range(c, c);
|
||||
out << " (";
|
||||
_build_min_max_int(std::stoi(min_s.substr(1)), std::numeric_limits<int>::max(), out, less_decimals, /* top_level= */ false);
|
||||
out << ")";
|
||||
if (c < '9') {
|
||||
out << " | ";
|
||||
digit_range(c + 1, '9');
|
||||
out << " ";
|
||||
more_digits(len - 1, less_decimals);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (has_max) {
|
||||
if (max_value >= 0) {
|
||||
if (top_level) {
|
||||
out << "\"-\" [1-9] ";
|
||||
more_digits(0, less_decimals);
|
||||
out << " | ";
|
||||
}
|
||||
_build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true);
|
||||
} else {
|
||||
out << "\"-\" (";
|
||||
_build_min_max_int(-max_value, std::numeric_limits<int>::max(), out, decimals_left, /* top_level= */ false);
|
||||
out << ")";
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
throw std::runtime_error("At least one of min_value or max_value must be set");
|
||||
}
|
||||
|
||||
const std::string SPACE_RULE = "| \" \" | \"\\n\" [ \\t]{0,20}";
|
||||
|
||||
struct BuiltinRule {
|
||||
|
@ -160,7 +387,6 @@ static std::string format_literal(const std::string & literal) {
|
|||
return "\"" + escaped + "\"";
|
||||
}
|
||||
|
||||
|
||||
class SchemaConverter {
|
||||
private:
|
||||
std::function<json(const std::string &)> _fetch_json;
|
||||
|
@ -388,6 +614,75 @@ private:
|
|||
return _add_rule(name, "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space");
|
||||
}
|
||||
|
||||
/*
|
||||
Returns a rule that matches a JSON string that is none of the provided strings
|
||||
|
||||
not_strings({"a"})
|
||||
-> ["] ( [a] char+ | [^"a] char* )? ["] space
|
||||
not_strings({"and", "also"})
|
||||
-> ["] ( [a] ([l] ([s] ([o] char+ | [^"o] char*) | [^"s] char*) | [n] ([d] char+ | [^"d] char*) | [^"ln] char*) | [^"a] char* )? ["] space
|
||||
*/
|
||||
std::string _not_strings(const std::vector<std::string> & strings) {
|
||||
|
||||
struct TrieNode {
|
||||
std::map<char, TrieNode> children;
|
||||
bool is_end_of_string;
|
||||
|
||||
TrieNode() : is_end_of_string(false) {}
|
||||
|
||||
void insert(const std::string & string) {
|
||||
auto node = this;
|
||||
for (char c : string) {
|
||||
node = &node->children[c];
|
||||
}
|
||||
node->is_end_of_string = true;
|
||||
}
|
||||
};
|
||||
|
||||
TrieNode trie;
|
||||
for (const auto & s : strings) {
|
||||
trie.insert(s);
|
||||
}
|
||||
|
||||
std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char"));
|
||||
std::ostringstream out;
|
||||
out << "[\"] ( ";
|
||||
std::function<void(const TrieNode &)> visit = [&](const TrieNode & node) {
|
||||
std::ostringstream rejects;
|
||||
auto first = true;
|
||||
for (const auto & kv : node.children) {
|
||||
rejects << kv.first;
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
out << " | ";
|
||||
}
|
||||
out << "[" << kv.first << "]";
|
||||
if (!kv.second.children.empty()) {
|
||||
out << " (";
|
||||
visit(kv.second);
|
||||
out << ")";
|
||||
} else if (kv.second.is_end_of_string) {
|
||||
out << " " << char_rule << "+";
|
||||
}
|
||||
}
|
||||
if (!node.children.empty()) {
|
||||
if (!first) {
|
||||
out << " | ";
|
||||
}
|
||||
out << "[^\"" << rejects.str() << "] " << char_rule << "*";
|
||||
}
|
||||
};
|
||||
visit(trie);
|
||||
|
||||
out << " )";
|
||||
if (!trie.is_end_of_string) {
|
||||
out << "?";
|
||||
}
|
||||
out << " [\"] space";
|
||||
return out.str();
|
||||
}
|
||||
|
||||
std::string _resolve_ref(const std::string & ref) {
|
||||
std::string ref_name = ref.substr(ref.find_last_of('/') + 1);
|
||||
if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) {
|
||||
|
@ -408,6 +703,7 @@ private:
|
|||
std::vector<std::string> required_props;
|
||||
std::vector<std::string> optional_props;
|
||||
std::unordered_map<std::string, std::string> prop_kv_rule_names;
|
||||
std::vector<std::string> prop_names;
|
||||
for (const auto & kv : properties) {
|
||||
const auto &prop_name = kv.first;
|
||||
const auto &prop_schema = kv.second;
|
||||
|
@ -422,11 +718,18 @@ private:
|
|||
} else {
|
||||
optional_props.push_back(prop_name);
|
||||
}
|
||||
prop_names.push_back(prop_name);
|
||||
}
|
||||
if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get<bool>())) {
|
||||
if (!(additional_properties.is_boolean() && !additional_properties.get<bool>())) {
|
||||
std::string sub_name = name + (name.empty() ? "" : "-") + "additional";
|
||||
std::string value_rule = visit(additional_properties.is_object() ? additional_properties : json::object(), sub_name + "-value");
|
||||
std::string kv_rule = _add_rule(sub_name + "-kv", _add_primitive("string", PRIMITIVE_RULES.at("string")) + " \":\" space " + value_rule);
|
||||
std::string value_rule =
|
||||
additional_properties.is_object() ? visit(additional_properties, sub_name + "-value")
|
||||
: _add_primitive("value", PRIMITIVE_RULES.at("value"));
|
||||
|
||||
auto key_rule =
|
||||
prop_names.empty() ? _add_primitive("string", PRIMITIVE_RULES.at("string"))
|
||||
: _add_rule(sub_name + "-k", _not_strings(prop_names));
|
||||
std::string kv_rule = _add_rule(sub_name + "-kv", key_rule + " \":\" space " + value_rule);
|
||||
prop_kv_rule_names["*"] = kv_rule;
|
||||
optional_props.push_back("*");
|
||||
}
|
||||
|
@ -452,15 +755,11 @@ private:
|
|||
}
|
||||
std::string k = ks[0];
|
||||
std::string kv_rule_name = prop_kv_rule_names[k];
|
||||
if (k == "*") {
|
||||
res = _add_rule(
|
||||
name + (name.empty() ? "" : "-") + "additional-kvs",
|
||||
kv_rule_name + " ( \",\" space " + kv_rule_name + " )*"
|
||||
);
|
||||
} else if (first_is_optional) {
|
||||
res = "( \",\" space " + kv_rule_name + " )?";
|
||||
std::string comma_ref = "( \",\" space " + kv_rule_name + " )";
|
||||
if (first_is_optional) {
|
||||
res = comma_ref + (k == "*" ? "*" : "?");
|
||||
} else {
|
||||
res = kv_rule_name;
|
||||
res = kv_rule_name + (k == "*" ? " " + comma_ref + "*" : "");
|
||||
}
|
||||
if (ks.size() > 1) {
|
||||
res += " " + _add_rule(
|
||||
|
@ -594,17 +893,19 @@ public:
|
|||
} else if (schema_type.is_array()) {
|
||||
std::vector<json> schema_types;
|
||||
for (const auto & t : schema_type) {
|
||||
schema_types.push_back({{"type", t}});
|
||||
json schema_copy(schema);
|
||||
schema_copy["type"] = t;
|
||||
schema_types.push_back(schema_copy);
|
||||
}
|
||||
return _add_rule(rule_name, _generate_union_rule(name, schema_types));
|
||||
} else if (schema.contains("const")) {
|
||||
return _add_rule(rule_name, _generate_constant_rule(schema["const"]));
|
||||
return _add_rule(rule_name, _generate_constant_rule(schema["const"]) + " space");
|
||||
} else if (schema.contains("enum")) {
|
||||
std::vector<std::string> enum_values;
|
||||
for (const auto & v : schema["enum"]) {
|
||||
enum_values.push_back(_generate_constant_rule(v));
|
||||
}
|
||||
return _add_rule(rule_name, join(enum_values.begin(), enum_values.end(), " | "));
|
||||
return _add_rule(rule_name, "(" + join(enum_values.begin(), enum_values.end(), " | ") + ") space");
|
||||
} else if ((schema_type.is_null() || schema_type == "object")
|
||||
&& (schema.contains("properties") ||
|
||||
(schema.contains("additionalProperties") && schema["additionalProperties"] != true))) {
|
||||
|
@ -686,6 +987,24 @@ public:
|
|||
int min_len = schema.contains("minLength") ? schema["minLength"].get<int>() : 0;
|
||||
int max_len = schema.contains("maxLength") ? schema["maxLength"].get<int>() : std::numeric_limits<int>::max();
|
||||
return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space");
|
||||
} else if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) {
|
||||
int min_value = std::numeric_limits<int>::min();
|
||||
int max_value = std::numeric_limits<int>::max();
|
||||
if (schema.contains("minimum")) {
|
||||
min_value = schema["minimum"].get<int>();
|
||||
} else if (schema.contains("exclusiveMinimum")) {
|
||||
min_value = schema["exclusiveMinimum"].get<int>() + 1;
|
||||
}
|
||||
if (schema.contains("maximum")) {
|
||||
max_value = schema["maximum"].get<int>();
|
||||
} else if (schema.contains("exclusiveMaximum")) {
|
||||
max_value = schema["exclusiveMaximum"].get<int>() - 1;
|
||||
}
|
||||
std::stringstream out;
|
||||
out << "(";
|
||||
_build_min_max_int(min_value, max_value, out);
|
||||
out << ") space";
|
||||
return _add_rule(rule_name, out.str());
|
||||
} else if (schema.empty() || schema_type == "object") {
|
||||
return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object")));
|
||||
} else {
|
||||
|
|
|
@ -28,9 +28,13 @@ struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_
|
|||
|
||||
std::vector<const llama_grammar_element *> grammar_rules(result->parsed_grammar.c_rules());
|
||||
|
||||
result->grammar = llama_grammar_init(
|
||||
struct llama_grammar * grammar = llama_grammar_init(
|
||||
grammar_rules.data(),
|
||||
grammar_rules.size(), result->parsed_grammar.symbol_ids.at("root"));
|
||||
if (grammar == nullptr) {
|
||||
throw std::runtime_error("Failed to initialize llama_grammar");
|
||||
}
|
||||
result->grammar = grammar;
|
||||
}
|
||||
|
||||
result->prev.resize(params.n_prev);
|
||||
|
@ -59,9 +63,13 @@ void llama_sampling_reset(llama_sampling_context * ctx) {
|
|||
if (!ctx->parsed_grammar.rules.empty()) {
|
||||
std::vector<const llama_grammar_element *> grammar_rules(ctx->parsed_grammar.c_rules());
|
||||
|
||||
ctx->grammar = llama_grammar_init(
|
||||
struct llama_grammar * grammar = llama_grammar_init(
|
||||
grammar_rules.data(),
|
||||
grammar_rules.size(), ctx->parsed_grammar.symbol_ids.at("root"));
|
||||
if (grammar == nullptr) {
|
||||
throw std::runtime_error("Failed to initialize llama_grammar");
|
||||
}
|
||||
ctx->grammar = grammar;
|
||||
}
|
||||
|
||||
std::fill(ctx->prev.begin(), ctx->prev.end(), 0);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue