Merge commit '88d23ad515' into concedo_experimental

# Conflicts:
#	CODEOWNERS
#	docs/build.md
#	ggml/CMakeLists.txt
#	ggml/src/CMakeLists.txt
#	ggml/src/ggml-webgpu/ggml-webgpu.cpp
#	ggml/src/ggml-zendnn/CMakeLists.txt
#	tests/test-chat-template.cpp
This commit is contained in:
Concedo 2026-01-29 22:25:56 +08:00
commit 46cd17c17e
29 changed files with 916 additions and 719 deletions

View file

@ -1298,9 +1298,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
).set_env("LLAMA_ARG_CACHE_RAM").set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}));
add_opt(common_arg(
{"-kvu", "--kv-unified"},
{"-no-kvu", "--no-kv-unified"},
"use single unified KV buffer shared across all sequences (default: enabled if number of slots is auto)",
[](common_params & params) {
params.kv_unified = true;
[](common_params & params, bool value) {
params.kv_unified = value;
}
).set_env("LLAMA_ARG_KV_UNIFIED").set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_PERPLEXITY, LLAMA_EXAMPLE_BATCHED}));
add_opt(common_arg(
@ -2201,18 +2202,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
add_opt(common_arg(
{"--mmap"},
{"--no-mmap"},
string_format("whether to memory-map model. Explicitly enabling mmap disables direct-io. (if mmap disabled, slower load but may reduce pageouts if not using mlock) (default: %s)", params.use_mmap ? "enabled" : "disabled"),
string_format("whether to memory-map model. (if mmap disabled, slower load but may reduce pageouts if not using mlock) (default: %s)", params.use_mmap ? "enabled" : "disabled"),
[](common_params & params, bool value) {
params.use_mmap = value;
if (value) {
params.use_direct_io = false; // disable direct io when mmap is explicitly enabled
}
}
).set_env("LLAMA_ARG_MMAP"));
add_opt(common_arg(
{"-dio", "--direct-io"},
{"-ndio", "--no-direct-io"},
string_format("use DirectIO if available. Takes precedence over --mmap (default: %s)", params.use_direct_io ? "enabled" : "disabled"),
string_format("use DirectIO if available. (default: %s)", params.use_direct_io ? "enabled" : "disabled"),
[](common_params & params, bool value) {
params.use_direct_io = value;
}

View file

@ -435,7 +435,7 @@ struct common_params {
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
bool use_mmap = true; // enable mmap to use filesystem cache
bool use_direct_io = true; // read from disk without buffering for faster model loading
bool use_direct_io = false; // read from disk without buffering
bool use_mlock = false; // use mlock to keep model in memory
bool verbose_prompt = false; // print prompt tokens before generation
bool display_prompt = true; // print prompt before generation

View file

@ -44,6 +44,12 @@ static std::string get_line_col(const std::string & source, size_t pos) {
return "line " + std::to_string(line) + ", column " + std::to_string(col);
}
static void ensure_key_type_allowed(const value & val) {
if (!val->is_hashable()) {
throw std::runtime_error("Type: " + val->type() + " is not allowed as object key");
}
}
// execute with error handling
value statement::execute(context & ctx) {
try {
@ -95,20 +101,10 @@ value identifier::execute_impl(context & ctx) {
value object_literal::execute_impl(context & ctx) {
auto obj = mk_val<value_object>();
for (const auto & pair : val) {
value key_val = pair.first->execute(ctx);
if (!is_val<value_string>(key_val) && !is_val<value_int>(key_val)) {
throw std::runtime_error("Object literal: keys must be string or int values, got " + key_val->type());
}
std::string key = key_val->as_string().str();
value key = pair.first->execute(ctx);
value val = pair.second->execute(ctx);
JJ_DEBUG("Object literal: setting key '%s' with value type %s", key.c_str(), val->type().c_str());
JJ_DEBUG("Object literal: setting key '%s' with value type %s", key->as_string().str().c_str(), val->type().c_str());
obj->insert(key, val);
if (is_val<value_int>(key_val)) {
obj->val_obj.is_key_numeric = true;
} else if (obj->val_obj.is_key_numeric) {
throw std::runtime_error("Object literal: cannot mix numeric and non-numeric keys");
}
}
return obj;
}
@ -127,9 +123,9 @@ value binary_expression::execute_impl(context & ctx) {
value right_val = right->execute(ctx);
JJ_DEBUG("Executing binary expression %s '%s' %s", left_val->type().c_str(), op.value.c_str(), right_val->type().c_str());
if (op.value == "==") {
return mk_val<value_bool>(value_compare(left_val, right_val, value_compare_op::eq));
return mk_val<value_bool>(*left_val == *right_val);
} else if (op.value == "!=") {
return mk_val<value_bool>(!value_compare(left_val, right_val, value_compare_op::eq));
return mk_val<value_bool>(!(*left_val == *right_val));
}
auto workaround_concat_null_with_str = [&](value & res) -> bool {
@ -230,7 +226,7 @@ value binary_expression::execute_impl(context & ctx) {
auto & arr = right_val->as_array();
bool member = false;
for (const auto & item : arr) {
if (value_compare(left_val, item, value_compare_op::eq)) {
if (*left_val == *item) {
member = true;
break;
}
@ -265,10 +261,9 @@ value binary_expression::execute_impl(context & ctx) {
}
}
// String in object
if (is_val<value_string>(left_val) && is_val<value_object>(right_val)) {
auto key = left_val->as_string().str();
bool has_key = right_val->has_key(key);
// Value key in object
if (is_val<value_object>(right_val)) {
bool has_key = right_val->has_key(left_val);
if (op.value == "in") {
return mk_val<value_bool>(has_key);
} else if (op.value == "not in") {
@ -465,14 +460,8 @@ value for_statement::execute_impl(context & ctx) {
JJ_DEBUG("%s", "For loop over object keys");
auto & obj = iterable_val->as_ordered_object();
for (auto & p : obj) {
auto tuple = mk_val<value_array>();
if (iterable_val->val_obj.is_key_numeric) {
tuple->push_back(mk_val<value_int>(std::stoll(p.first)));
} else {
tuple->push_back(mk_val<value_string>(p.first));
}
tuple->push_back(p.second);
items.push_back(tuple);
auto tuple = mk_val<value_tuple>(p);
items.push_back(std::move(tuple));
}
if (ctx.is_get_stats) {
iterable_val->stats.used = true;
@ -602,11 +591,13 @@ value set_statement::execute_impl(context & ctx) {
auto rhs = val ? val->execute(ctx) : exec_statements(body, ctx);
if (is_stmt<identifier>(assignee)) {
// case: {% set my_var = value %}
auto var_name = cast_stmt<identifier>(assignee)->val;
JJ_DEBUG("Setting global variable '%s' with value type %s", var_name.c_str(), rhs->type().c_str());
ctx.set_val(var_name, rhs);
} else if (is_stmt<tuple_literal>(assignee)) {
// case: {% set a, b = value %}
auto tuple = cast_stmt<tuple_literal>(assignee);
if (!is_val<value_array>(rhs)) {
throw std::runtime_error("Cannot unpack non-iterable type in set: " + rhs->type());
@ -625,6 +616,7 @@ value set_statement::execute_impl(context & ctx) {
}
} else if (is_stmt<member_expression>(assignee)) {
// case: {% set ns.my_var = value %}
auto member = cast_stmt<member_expression>(assignee);
if (member->computed) {
throw std::runtime_error("Cannot assign to computed member");
@ -767,22 +759,22 @@ value member_expression::execute_impl(context & ctx) {
}
JJ_DEBUG("Member expression on object type %s, property type %s", object->type().c_str(), property->type().c_str());
ensure_key_type_allowed(property);
value val = mk_val<value_undefined>("object_property");
if (is_val<value_undefined>(object)) {
JJ_DEBUG("%s", "Accessing property on undefined object, returning undefined");
return val;
} else if (is_val<value_object>(object)) {
if (!is_val<value_string>(property)) {
throw std::runtime_error("Cannot access object with non-string: got " + property->type());
}
auto key = property->as_string().str();
val = object->at(key, val);
val = object->at(property, val);
if (is_val<value_undefined>(val)) {
val = try_builtin_func(ctx, key, object, true);
}
JJ_DEBUG("Accessed property '%s' value, got type: %s", key.c_str(), val->type().c_str());
} else if (is_val<value_array>(object) || is_val<value_string>(object)) {
if (is_val<value_int>(property)) {
int64_t index = property->as_int();
@ -806,6 +798,7 @@ value member_expression::execute_impl(context & ctx) {
auto key = property->as_string().str();
JJ_DEBUG("Accessing %s built-in '%s'", is_val<value_array>(object) ? "array" : "string", key.c_str());
val = try_builtin_func(ctx, key, object, true);
} else {
throw std::runtime_error("Cannot access property with non-string/non-number: got " + property->type());
}

View file

@ -79,18 +79,18 @@ struct context {
}
value get_val(const std::string & name) {
auto it = env->val_obj.unordered.find(name);
if (it != env->val_obj.unordered.end()) {
return it->second;
} else {
return mk_val<value_undefined>(name);
}
value default_val = mk_val<value_undefined>(name);
return env->at(name, default_val);
}
void set_val(const std::string & name, const value & val) {
env->insert(name, val);
}
void set_val(const value & name, const value & val) {
env->insert(name, val);
}
void print_vars() const {
printf("Context Variables:\n%s\n", value_to_json(env, 2).c_str());
}
@ -344,9 +344,19 @@ struct array_literal : public expression {
}
};
struct tuple_literal : public array_literal {
explicit tuple_literal(statements && val) : array_literal(std::move(val)) {}
struct tuple_literal : public expression {
statements val;
explicit tuple_literal(statements && val) : val(std::move(val)) {
for (const auto& item : this->val) chk_type<expression>(item);
}
std::string type() const override { return "TupleLiteral"; }
value execute_impl(context & ctx) override {
auto arr = mk_val<value_array>();
for (const auto & item_stmt : val) {
arr->push_back(item_stmt->execute(ctx));
}
return mk_val<value_tuple>(std::move(arr->as_array()));
}
};
struct object_literal : public expression {

View file

@ -61,6 +61,12 @@ size_t string::length() const {
return len;
}
void string::hash_update(hasher & hash) const noexcept {
for (const auto & part : parts) {
hash.update(part.val.data(), part.val.length());
}
}
bool string::all_parts_are_input() const {
for (const auto & part : parts) {
if (!part.is_input) {

View file

@ -4,6 +4,8 @@
#include <string>
#include <vector>
#include "utils.h"
namespace jinja {
// allow differentiate between user input strings and template strings
@ -37,6 +39,7 @@ struct string {
std::string str() const;
size_t length() const;
void hash_update(hasher & hash) const noexcept;
bool all_parts_are_input() const;
bool is_uppercase() const;
bool is_lowercase() const;

View file

@ -3,6 +3,8 @@
#include <string>
#include <sstream>
#include <algorithm>
#include <cstdint>
#include <cstring>
namespace jinja {
@ -46,4 +48,102 @@ static std::string fmt_error_with_source(const std::string & tag, const std::str
return oss.str();
}
// Note: this is a simple hasher, not cryptographically secure, just for hash table usage
struct hasher {
static constexpr auto size_t_digits = sizeof(size_t) * 8;
static constexpr size_t prime = size_t_digits == 64 ? 0x100000001b3 : 0x01000193;
static constexpr size_t seed = size_t_digits == 64 ? 0xcbf29ce484222325 : 0x811c9dc5;
static constexpr auto block_size = sizeof(size_t); // in bytes; allowing the compiler to vectorize the computation
static_assert(size_t_digits == 64 || size_t_digits == 32);
static_assert(block_size == 8 || block_size == 4);
uint8_t buffer[block_size];
size_t idx = 0; // current index in buffer
size_t state = seed;
hasher() = default;
hasher(const std::type_info & type_inf) noexcept {
const auto type_hash = type_inf.hash_code();
update(&type_hash, sizeof(type_hash));
}
// Properties:
// - update is not associative: update(a).update(b) != update(b).update(a)
// - update(a ~ b) == update(a).update(b) with ~ as concatenation operator --> useful for streaming
// - update("", 0) --> state unchanged with empty input
hasher& update(void const * bytes, size_t len) noexcept {
const uint8_t * c = static_cast<uint8_t const *>(bytes);
if (len == 0) {
return *this;
}
size_t processed = 0;
// first, fill the existing buffer if it's partial
if (idx > 0) {
size_t to_fill = block_size - idx;
if (to_fill > len) {
to_fill = len;
}
std::memcpy(buffer + idx, c, to_fill);
idx += to_fill;
processed += to_fill;
if (idx == block_size) {
update_block(buffer);
idx = 0;
}
}
// process full blocks from the remaining input
for (; processed + block_size <= len; processed += block_size) {
update_block(c + processed);
}
// buffer any remaining bytes
size_t remaining = len - processed;
if (remaining > 0) {
std::memcpy(buffer, c + processed, remaining);
idx = remaining;
}
return *this;
}
// convenience function for testing only
hasher& update(const std::string & s) noexcept {
return update(s.data(), s.size());
}
// finalize and get the hash value
// note: after calling digest, the hasher state is modified, do not call update() again
size_t digest() noexcept {
// if there are remaining bytes in buffer, fill the rest with zeros and process
if (idx > 0) {
for (size_t i = idx; i < block_size; ++i) {
buffer[i] = 0;
}
update_block(buffer);
idx = 0;
}
return state;
}
private:
// IMPORTANT: block must have at least block_size bytes
void update_block(const uint8_t * block) noexcept {
size_t blk = static_cast<uint32_t>(block[0])
| (static_cast<uint32_t>(block[1]) << 8)
| (static_cast<uint32_t>(block[2]) << 16)
| (static_cast<uint32_t>(block[3]) << 24);
if constexpr (block_size == 8) {
blk = blk | (static_cast<uint64_t>(block[4]) << 32)
| (static_cast<uint64_t>(block[5]) << 40)
| (static_cast<uint64_t>(block[6]) << 48)
| (static_cast<uint64_t>(block[7]) << 56);
}
state ^= blk;
state *= prime;
}
};
} // namespace jinja

View file

@ -163,7 +163,7 @@ static value selectattr(const func_args & args) {
args.ensure_vals<value_array, value_string, value_string, value_string>(true, true, false, false);
auto arr = args.get_pos(0)->as_array();
auto attr_name = args.get_pos(1)->as_string().str();
auto attribute = args.get_pos(1);
auto out = mk_val<value_array>();
value val_default = mk_val<value_undefined>();
@ -173,7 +173,7 @@ static value selectattr(const func_args & args) {
if (!is_val<value_object>(item)) {
throw raised_exception("selectattr: item is not an object");
}
value attr_val = item->at(attr_name, val_default);
value attr_val = item->at(attribute, val_default);
bool is_selected = attr_val->as_bool();
if constexpr (is_reject) is_selected = !is_selected;
if (is_selected) out->push_back(item);
@ -217,7 +217,7 @@ static value selectattr(const func_args & args) {
if (!is_val<value_object>(item)) {
throw raised_exception("selectattr: item is not an object");
}
value attr_val = item->at(attr_name, val_default);
value attr_val = item->at(attribute, val_default);
func_args test_args(args.ctx);
test_args.push_back(attr_val); // attribute value
test_args.push_back(extra_arg); // extra argument
@ -741,6 +741,7 @@ const func_builtins & value_array_t::get_builtins() const {
args.ensure_count(1, 4);
args.ensure_vals<value_array, value_int, value_int, value_int>(true, true, false, false);
auto val = args.get_pos(0);
auto arg0 = args.get_pos(1);
auto arg1 = args.get_pos(2, mk_val<value_undefined>());
auto arg2 = args.get_pos(3, mk_val<value_undefined>());
@ -762,10 +763,8 @@ const func_builtins & value_array_t::get_builtins() const {
if (step == 0) {
throw raised_exception("slice step cannot be zero");
}
auto arr = slice(args.get_pos(0)->as_array(), start, stop, step);
auto res = mk_val<value_array>();
res->val_arr = std::move(arr);
return res;
auto arr = slice(val->as_array(), start, stop, step);
return is_val<value_tuple>(val) ? mk_val<value_tuple>(std::move(arr)) : mk_val<value_array>(std::move(arr));
}},
{"selectattr", selectattr<false>},
{"select", selectattr<false>},
@ -785,15 +784,14 @@ const func_builtins & value_array_t::get_builtins() const {
}
const int64_t attr_int = attr_is_int ? attribute->as_int() : 0;
const std::string delim = val_delim->is_undefined() ? "" : val_delim->as_string().str();
const std::string attr_name = attribute->is_undefined() ? "" : attribute->as_string().str();
std::string result;
for (size_t i = 0; i < arr.size(); ++i) {
value val_arr = arr[i];
if (!attribute->is_undefined()) {
if (attr_is_int && is_val<value_array>(val_arr)) {
val_arr = val_arr->at(attr_int);
} else if (!attr_is_int && !attr_name.empty() && is_val<value_object>(val_arr)) {
val_arr = val_arr->at(attr_name);
} else if (!attr_is_int && is_val<value_object>(val_arr)) {
val_arr = val_arr->at(attribute);
}
}
if (!is_val<value_string>(val_arr) && !is_val<value_int>(val_arr) && !is_val<value_float>(val_arr)) {
@ -808,9 +806,7 @@ const func_builtins & value_array_t::get_builtins() const {
}},
{"string", [](const func_args & args) -> value {
args.ensure_vals<value_array>();
auto str = mk_val<value_string>();
gather_string_parts_recursive(args.get_pos(0), str);
return str;
return mk_val<value_string>(args.get_pos(0)->as_string());
}},
{"tojson", tojson},
{"map", [](const func_args & args) -> value {
@ -821,26 +817,26 @@ const func_builtins & value_array_t::get_builtins() const {
if (!is_val<value_kwarg>(args.get_args().at(1))) {
throw not_implemented_exception("map: filter-mapping not implemented");
}
value val = args.get_pos(0);
value attribute = args.get_kwarg_or_pos("attribute", 1);
const bool attr_is_int = is_val<value_int>(attribute);
if (!is_val<value_string>(attribute) && !attr_is_int) {
throw raised_exception("map: attribute must be string or integer");
}
const int64_t attr_int = attr_is_int ? attribute->as_int() : 0;
const std::string attr_name = attribute->as_string().str();
value default_val = args.get_kwarg("default", mk_val<value_undefined>());
auto out = mk_val<value_array>();
auto arr = args.get_pos(0)->as_array();
auto arr = val->as_array();
for (const auto & item : arr) {
value attr_val;
if (attr_is_int) {
attr_val = is_val<value_array>(item) ? item->at(attr_int, default_val) : default_val;
} else {
attr_val = is_val<value_object>(item) ? item->at(attr_name, default_val) : default_val;
attr_val = is_val<value_object>(item) ? item->at(attribute, default_val) : default_val;
}
out->push_back(attr_val);
}
return out;
return is_val<value_tuple>(val) ? mk_val<value_tuple>(std::move(out->as_array())) : out;
}},
{"append", [](const func_args & args) -> value {
args.ensure_count(2);
@ -867,6 +863,7 @@ const func_builtins & value_array_t::get_builtins() const {
if (!is_val<value_array>(args.get_pos(0))) {
throw raised_exception("sort: first argument must be an array");
}
value val = args.get_pos(0);
value val_reverse = args.get_kwarg_or_pos("reverse", 1);
value val_case = args.get_kwarg_or_pos("case_sensitive", 2);
value attribute = args.get_kwarg_or_pos("attribute", 3);
@ -875,8 +872,7 @@ const func_builtins & value_array_t::get_builtins() const {
const bool reverse = val_reverse->as_bool(); // undefined == false
const bool attr_is_int = is_val<value_int>(attribute);
const int64_t attr_int = attr_is_int ? attribute->as_int() : 0;
const std::string attr_name = attribute->is_undefined() ? "" : attribute->as_string().str();
std::vector<value> arr = cast_val<value_array>(args.get_pos(0))->as_array(); // copy
std::vector<value> arr = val->as_array(); // copy
std::sort(arr.begin(), arr.end(),[&](const value & a, const value & b) {
value val_a = a;
value val_b = b;
@ -884,22 +880,23 @@ const func_builtins & value_array_t::get_builtins() const {
if (attr_is_int && is_val<value_array>(a) && is_val<value_array>(b)) {
val_a = a->at(attr_int);
val_b = b->at(attr_int);
} else if (!attr_is_int && !attr_name.empty() && is_val<value_object>(a) && is_val<value_object>(b)) {
val_a = a->at(attr_name);
val_b = b->at(attr_name);
} else if (!attr_is_int && is_val<value_object>(a) && is_val<value_object>(b)) {
val_a = a->at(attribute);
val_b = b->at(attribute);
} else {
throw raised_exception("sort: unsupported object attribute comparison");
throw raised_exception("sort: unsupported object attribute comparison between " + a->type() + " and " + b->type());
}
}
return value_compare(val_a, val_b, reverse ? value_compare_op::gt : value_compare_op::lt);
});
return mk_val<value_array>(arr);
return is_val<value_tuple>(val) ? mk_val<value_tuple>(std::move(arr)) : mk_val<value_array>(std::move(arr));
}},
{"reverse", [](const func_args & args) -> value {
args.ensure_vals<value_array>();
std::vector<value> arr = cast_val<value_array>(args.get_pos(0))->as_array(); // copy
value val = args.get_pos(0);
std::vector<value> arr = val->as_array(); // copy
std::reverse(arr.begin(), arr.end());
return mk_val<value_array>(arr);
return is_val<value_tuple>(val) ? mk_val<value_tuple>(std::move(arr)) : mk_val<value_array>(std::move(arr));
}},
{"unique", [](const func_args &) -> value {
throw not_implemented_exception("Array unique builtin not implemented");
@ -930,7 +927,7 @@ const func_builtins & value_object_t::get_builtins() const {
default_val = args.get_pos(2);
}
const value obj = args.get_pos(0);
std::string key = args.get_pos(1)->as_string().str();
const value key = args.get_pos(1);
return obj->at(key, default_val);
}},
{"keys", [](const func_args & args) -> value {
@ -938,7 +935,7 @@ const func_builtins & value_object_t::get_builtins() const {
const auto & obj = args.get_pos(0)->as_ordered_object();
auto result = mk_val<value_array>();
for (const auto & pair : obj) {
result->push_back(mk_val<value_string>(pair.first));
result->push_back(pair.first);
}
return result;
}},
@ -956,15 +953,16 @@ const func_builtins & value_object_t::get_builtins() const {
const auto & obj = args.get_pos(0)->as_ordered_object();
auto result = mk_val<value_array>();
for (const auto & pair : obj) {
auto item = mk_val<value_array>();
item->push_back(mk_val<value_string>(pair.first));
item->push_back(pair.second);
auto item = mk_val<value_tuple>(pair);
result->push_back(std::move(item));
}
return result;
}},
{"tojson", tojson},
{"string", tojson},
{"string", [](const func_args & args) -> value {
args.ensure_vals<value_object>();
return mk_val<value_string>(args.get_pos(0)->as_string());
}},
{"length", [](const func_args & args) -> value {
args.ensure_vals<value_object>();
const auto & obj = args.get_pos(0)->as_ordered_object();
@ -985,11 +983,11 @@ const func_builtins & value_object_t::get_builtins() const {
const bool reverse = val_reverse->as_bool(); // undefined == false
const bool by_value = is_val<value_string>(val_by) && val_by->as_string().str() == "value" ? true : false;
auto result = mk_val<value_object>(val_input); // copy
std::sort(result->val_obj.ordered.begin(), result->val_obj.ordered.end(), [&](const auto & a, const auto & b) {
std::sort(result->val_obj.begin(), result->val_obj.end(), [&](const auto & a, const auto & b) {
if (by_value) {
return value_compare(a.second, b.second, reverse ? value_compare_op::gt : value_compare_op::lt);
} else {
return reverse ? a.first > b.first : a.first < b.first;
return value_compare(a.first, b.first, reverse ? value_compare_op::gt : value_compare_op::lt);
}
});
return result;
@ -1134,6 +1132,8 @@ void global_from_json(context & ctx, const nlohmann::ordered_json & json_obj, bo
}
}
// recursively convert value to JSON string
// TODO: avoid circular references
static void value_to_json_internal(std::ostringstream & oss, const value & val, int curr_lvl, int indent, const std::string_view item_sep, const std::string_view key_sep) {
auto indent_str = [indent, curr_lvl]() -> std::string {
return (indent > 0) ? std::string(curr_lvl * indent, ' ') : "";
@ -1196,7 +1196,8 @@ static void value_to_json_internal(std::ostringstream & oss, const value & val,
size_t i = 0;
for (const auto & pair : obj) {
oss << indent_str() << (indent > 0 ? std::string(indent, ' ') : "");
oss << "\"" << pair.first << "\"" << key_sep;
value_to_json_internal(oss, mk_val<value_string>(pair.first->as_string().str()), curr_lvl + 1, indent, item_sep, key_sep);
oss << key_sep;
value_to_json_internal(oss, pair.second, curr_lvl + 1, indent, item_sep, key_sep);
if (i < obj.size() - 1) {
oss << item_sep;
@ -1219,4 +1220,19 @@ std::string value_to_json(const value & val, int indent, const std::string_view
return oss.str();
}
// TODO: avoid circular references
std::string value_to_string_repr(const value & val) {
if (is_val<value_string>(val)) {
const std::string val_str = val->as_string().str();
if (val_str.find('\'') != std::string::npos) {
return value_to_json(val);
} else {
return "'" + val_str + "'";
}
} else {
return val->as_repr();
}
}
} // namespace jinja

View file

@ -1,8 +1,10 @@
#pragma once
#include "string.h"
#include "utils.h"
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <functional>
#include <map>
@ -93,7 +95,8 @@ void global_from_json(context & ctx, const T_JSON & json_obj, bool mark_input);
struct func_args; // function argument values
using func_handler = std::function<value(const func_args &)>;
using func_hptr = value(const func_args &);
using func_handler = std::function<func_hptr>;
using func_builtins = std::map<std::string, func_handler>;
enum value_compare_op { eq, ge, gt, lt, ne };
@ -103,28 +106,9 @@ struct value_t {
int64_t val_int;
double val_flt;
string val_str;
bool val_bool;
std::vector<value> val_arr;
struct map {
// once set to true, all keys must be numeric
// caveat: we only allow either all numeric keys or all non-numeric keys
// for now, this only applied to for_statement in case of iterating over object keys/items
bool is_key_numeric = false;
std::map<std::string, value> unordered;
std::vector<std::pair<std::string, value>> ordered;
void insert(const std::string & key, const value & val) {
if (unordered.find(key) != unordered.end()) {
// if key exists, remove from ordered list
ordered.erase(std::remove_if(ordered.begin(), ordered.end(),
[&](const std::pair<std::string, value> & p) { return p.first == key; }),
ordered.end());
}
unordered[key] = val;
ordered.push_back({key, val});
}
} val_obj;
std::vector<std::pair<value, value>> val_obj;
func_handler val_func;
@ -139,6 +123,7 @@ struct value_t {
value_t(const value_t &) = default;
virtual ~value_t() = default;
// Note: only for debugging and error reporting purposes
virtual std::string type() const { return ""; }
virtual int64_t as_int() const { throw std::runtime_error(type() + " is not an int value"); }
@ -146,7 +131,7 @@ struct value_t {
virtual string as_string() const { throw std::runtime_error(type() + " is not a string value"); }
virtual bool as_bool() const { throw std::runtime_error(type() + " is not a bool value"); }
virtual const std::vector<value> & as_array() const { throw std::runtime_error(type() + " is not an array value"); }
virtual const std::vector<std::pair<std::string, value>> & as_ordered_object() const { throw std::runtime_error(type() + " is not an object value"); }
virtual const std::vector<std::pair<value, value>> & as_ordered_object() const { throw std::runtime_error(type() + " is not an object value"); }
virtual value invoke(const func_args &) const { throw std::runtime_error(type() + " is not a function value"); }
virtual bool is_none() const { return false; }
virtual bool is_undefined() const { return false; }
@ -154,43 +139,66 @@ struct value_t {
throw std::runtime_error("No builtins available for type " + type());
}
virtual bool has_key(const std::string & key) {
return val_obj.unordered.find(key) != val_obj.unordered.end();
}
virtual value & at(const std::string & key, value & default_val) {
auto it = val_obj.unordered.find(key);
if (it == val_obj.unordered.end()) {
return default_val;
}
return val_obj.unordered.at(key);
}
virtual value & at(const std::string & key) {
auto it = val_obj.unordered.find(key);
if (it == val_obj.unordered.end()) {
throw std::runtime_error("Key '" + key + "' not found in value of type " + type());
}
return val_obj.unordered.at(key);
}
virtual value & at(int64_t index, value & default_val) {
if (index < 0) {
index += val_arr.size();
}
if (index < 0 || static_cast<size_t>(index) >= val_arr.size()) {
return default_val;
}
return val_arr[index];
}
virtual value & at(int64_t index) {
if (index < 0) {
index += val_arr.size();
}
if (index < 0 || static_cast<size_t>(index) >= val_arr.size()) {
throw std::runtime_error("Index " + std::to_string(index) + " out of bounds for array of size " + std::to_string(val_arr.size()));
}
return val_arr[index];
}
virtual bool has_key(const value &) { throw std::runtime_error(type() + " is not an object value"); }
virtual void insert(const value & /* key */, const value & /* val */) { throw std::runtime_error(type() + " is not an object value"); }
virtual value & at(const value & /* key */, value & /* default_val */) { throw std::runtime_error(type() + " is not an object value"); }
virtual value & at(const value & /* key */) { throw std::runtime_error(type() + " is not an object value"); }
virtual value & at(const std::string & /* key */, value & /* default_val */) { throw std::runtime_error(type() + " is not an object value"); }
virtual value & at(const std::string & /* key */) { throw std::runtime_error(type() + " is not an object value"); }
virtual value & at(int64_t /* idx */, value & /* default_val */) { throw std::runtime_error(type() + " is not an array value"); }
virtual value & at(int64_t /* idx */) { throw std::runtime_error(type() + " is not an array value"); }
virtual bool is_numeric() const { return false; }
virtual bool is_hashable() const { return false; }
virtual bool is_immutable() const { return true; }
virtual hasher unique_hash() const noexcept = 0;
// TODO: C++20 <=> operator
// NOTE: We are treating == as equivalent (for normal comparisons) and != as strict nonequal (for strict (is) comparisons)
virtual bool operator==(const value_t & other) const { return equivalent(other); }
virtual bool operator!=(const value_t & other) const { return nonequal(other); }
// Note: only for debugging purposes
virtual std::string as_repr() const { return as_string().str(); }
protected:
virtual bool equivalent(const value_t &) const = 0;
virtual bool nonequal(const value_t & other) const { return !equivalent(other); }
};
//
// utils
//
const func_builtins & global_builtins();
std::string value_to_json(const value & val, int indent = -1, const std::string_view item_sep = ", ", const std::string_view key_sep = ": ");
// Note: only used for debugging purposes
std::string value_to_string_repr(const value & val);
struct not_implemented_exception : public std::runtime_error {
not_implemented_exception(const std::string & msg) : std::runtime_error("NotImplemented: " + msg) {}
};
struct value_hasher {
size_t operator()(const value & val) const noexcept {
return val->unique_hash().digest();
}
};
struct value_equivalence {
bool operator()(const value & lhs, const value & rhs) const {
return *lhs == *rhs;
}
bool operator()(const std::pair<value, value> & lhs, const std::pair<value, value> & rhs) const {
return *(lhs.first) == *(rhs.first) && *(lhs.second) == *(rhs.second);
}
};
struct value_equality {
bool operator()(const value & lhs, const value & rhs) const {
return !(*lhs != *rhs);
}
};
//
@ -198,24 +206,49 @@ struct value_t {
//
struct value_int_t : public value_t {
value_int_t(int64_t v) { val_int = v; }
value_int_t(int64_t v) {
val_int = v;
val_flt = static_cast<double>(v);
if (static_cast<int64_t>(val_flt) != v) {
val_flt = v < 0 ? -INFINITY : INFINITY;
}
}
virtual std::string type() const override { return "Integer"; }
virtual int64_t as_int() const override { return val_int; }
virtual double as_float() const override { return static_cast<double>(val_int); }
virtual double as_float() const override { return val_flt; }
virtual string as_string() const override { return std::to_string(val_int); }
virtual bool as_bool() const override {
return val_int != 0;
}
virtual const func_builtins & get_builtins() const override;
virtual bool is_numeric() const override { return true; }
virtual bool is_hashable() const override { return true; }
virtual hasher unique_hash() const noexcept override {
return hasher(typeid(*this))
.update(&val_int, sizeof(val_int))
.update(&val_flt, sizeof(val_flt));
}
protected:
virtual bool equivalent(const value_t & other) const override {
return other.is_numeric() && val_int == other.val_int && val_flt == other.val_flt;
}
virtual bool nonequal(const value_t & other) const override {
return !(typeid(*this) == typeid(other) && val_int == other.val_int);
}
};
using value_int = std::shared_ptr<value_int_t>;
struct value_float_t : public value_t {
value_float_t(double v) { val_flt = v; }
value val;
value_float_t(double v) {
val_flt = v;
val_int = std::isfinite(v) ? static_cast<int64_t>(v) : 0;
val = mk_val<value_int>(val_int);
}
virtual std::string type() const override { return "Float"; }
virtual double as_float() const override { return val_flt; }
virtual int64_t as_int() const override { return static_cast<int64_t>(val_flt); }
virtual int64_t as_int() const override { return val_int; }
virtual string as_string() const override {
std::string out = std::to_string(val_flt);
out.erase(out.find_last_not_of('0') + 1, std::string::npos); // remove trailing zeros
@ -226,6 +259,24 @@ struct value_float_t : public value_t {
return val_flt != 0.0;
}
virtual const func_builtins & get_builtins() const override;
virtual bool is_numeric() const override { return true; }
virtual bool is_hashable() const override { return true; }
virtual hasher unique_hash() const noexcept override {
if (static_cast<double>(val_int) == val_flt) {
return val->unique_hash();
} else {
return hasher(typeid(*this))
.update(&val_int, sizeof(val_int))
.update(&val_flt, sizeof(val_flt));
}
}
protected:
virtual bool equivalent(const value_t & other) const override {
return other.is_numeric() && val_int == other.val_int && val_flt == other.val_flt;
}
virtual bool nonequal(const value_t & other) const override {
return !(typeid(*this) == typeid(other) && val_flt == other.val_flt);
}
};
using value_float = std::shared_ptr<value_float_t>;
@ -247,19 +298,49 @@ struct value_string_t : public value_t {
return val_str.length() > 0;
}
virtual const func_builtins & get_builtins() const override;
virtual bool is_hashable() const override { return true; }
virtual hasher unique_hash() const noexcept override {
const auto type_hash = typeid(*this).hash_code();
auto hash = hasher();
hash.update(&type_hash, sizeof(type_hash));
val_str.hash_update(hash);
return hash;
}
void mark_input() {
val_str.mark_input();
}
protected:
virtual bool equivalent(const value_t & other) const override {
return typeid(*this) == typeid(other) && val_str.str() == other.val_str.str();
}
};
using value_string = std::shared_ptr<value_string_t>;
struct value_bool_t : public value_t {
value_bool_t(bool v) { val_bool = v; }
value val;
value_bool_t(bool v) {
val_int = static_cast<int64_t>(v);
val_flt = static_cast<double>(v);
val = mk_val<value_int>(val_int);
}
virtual std::string type() const override { return "Boolean"; }
virtual bool as_bool() const override { return val_bool; }
virtual string as_string() const override { return std::string(val_bool ? "True" : "False"); }
virtual int64_t as_int() const override { return val_int; }
virtual bool as_bool() const override { return val_int; }
virtual string as_string() const override { return std::string(val_int ? "True" : "False"); }
virtual const func_builtins & get_builtins() const override;
virtual bool is_numeric() const override { return true; }
virtual bool is_hashable() const override { return true; }
virtual hasher unique_hash() const noexcept override {
return val->unique_hash();
}
protected:
virtual bool equivalent(const value_t & other) const override {
return other.is_numeric() && val_int == other.val_int && val_flt == other.val_flt;
}
virtual bool nonequal(const value_t & other) const override {
return !(typeid(*this) == typeid(other) && val_int == other.val_int);
}
};
using value_bool = std::shared_ptr<value_bool_t>;
@ -269,13 +350,34 @@ struct value_array_t : public value_t {
value_array_t(value & v) {
val_arr = v->val_arr;
}
value_array_t(std::vector<value> && arr) {
val_arr = arr;
}
value_array_t(const std::vector<value> & arr) {
val_arr = arr;
}
void reverse() { std::reverse(val_arr.begin(), val_arr.end()); }
void push_back(const value & val) { val_arr.push_back(val); }
void push_back(value && val) { val_arr.push_back(std::move(val)); }
void reverse() {
if (is_immutable()) {
throw std::runtime_error("Attempting to modify immutable type");
}
std::reverse(val_arr.begin(), val_arr.end());
}
void push_back(const value & val) {
if (is_immutable()) {
throw std::runtime_error("Attempting to modify immutable type");
}
val_arr.push_back(val);
}
void push_back(value && val) {
if (is_immutable()) {
throw std::runtime_error("Attempting to modify immutable type");
}
val_arr.push_back(std::move(val));
}
value pop_at(int64_t index) {
if (is_immutable()) {
throw std::runtime_error("Attempting to modify immutable type");
}
if (index < 0) {
index = static_cast<int64_t>(val_arr.size()) + index;
}
@ -287,64 +389,225 @@ struct value_array_t : public value_t {
return val;
}
virtual std::string type() const override { return "Array"; }
virtual bool is_immutable() const override { return false; }
virtual const std::vector<value> & as_array() const override { return val_arr; }
virtual string as_string() const override {
const bool immutable = is_immutable();
std::ostringstream ss;
ss << "[";
ss << (immutable ? "(" : "[");
for (size_t i = 0; i < val_arr.size(); i++) {
if (i > 0) ss << ", ";
ss << val_arr.at(i)->as_repr();
value val = val_arr.at(i);
ss << value_to_string_repr(val);
}
ss << "]";
if (immutable && val_arr.size() == 1) {
ss << ",";
}
ss << (immutable ? ")" : "]");
return ss.str();
}
virtual bool as_bool() const override {
return !val_arr.empty();
}
virtual value & at(int64_t index, value & default_val) override {
if (index < 0) {
index += val_arr.size();
}
if (index < 0 || static_cast<size_t>(index) >= val_arr.size()) {
return default_val;
}
return val_arr[index];
}
virtual value & at(int64_t index) override {
if (index < 0) {
index += val_arr.size();
}
if (index < 0 || static_cast<size_t>(index) >= val_arr.size()) {
throw std::runtime_error("Index " + std::to_string(index) + " out of bounds for array of size " + std::to_string(val_arr.size()));
}
return val_arr[index];
}
virtual const func_builtins & get_builtins() const override;
virtual bool is_hashable() const override {
if (std::all_of(val_arr.begin(), val_arr.end(), [&](auto & val) -> bool {
return val->is_immutable() && val->is_hashable();
})) {
return true;
}
return false;
}
virtual hasher unique_hash() const noexcept override {
auto hash = hasher(typeid(*this));
for (const auto & val : val_arr) {
// must use digest to prevent problems from "concatenation" property of hasher
// for ex. hash of [ "ab", "c" ] should be different from [ "a", "bc" ]
const size_t val_hash = val->unique_hash().digest();
hash.update(&val_hash, sizeof(size_t));
}
return hash;
}
protected:
virtual bool equivalent(const value_t & other) const override {
return typeid(*this) == typeid(other) && is_hashable() && other.is_hashable() && std::equal(val_arr.begin(), val_arr.end(), other.val_arr.begin(), value_equivalence());
}
};
using value_array = std::shared_ptr<value_array_t>;
struct value_tuple_t : public value_array_t {
value_tuple_t(value & v) {
val_arr = v->val_arr;
}
value_tuple_t(std::vector<value> && arr) {
val_arr = arr;
}
value_tuple_t(const std::vector<value> & arr) {
val_arr = arr;
}
value_tuple_t(const std::pair<value, value> & pair) {
val_arr.push_back(pair.first);
val_arr.push_back(pair.second);
}
virtual std::string type() const override { return "Tuple"; }
virtual bool is_immutable() const override { return true; }
};
using value_tuple = std::shared_ptr<value_tuple_t>;
struct value_object_t : public value_t {
std::unordered_map<value, value, value_hasher, value_equivalence> unordered;
bool has_builtins = true; // context and loop objects do not have builtins
value_object_t() = default;
value_object_t(value & v) {
val_obj = v->val_obj;
}
value_object_t(const std::map<std::string, value> & obj) {
for (const auto & pair : obj) {
val_obj.insert(pair.first, pair.second);
for (const auto & pair : val_obj) {
unordered[pair.first] = pair.second;
}
}
value_object_t(const std::vector<std::pair<std::string, value>> & obj) {
value_object_t(const std::map<value, value> & obj) {
for (const auto & pair : obj) {
val_obj.insert(pair.first, pair.second);
insert(pair.first, pair.second);
}
}
value_object_t(const std::vector<std::pair<value, value>> & obj) {
for (const auto & pair : obj) {
insert(pair.first, pair.second);
}
}
void insert(const std::string & key, const value & val) {
val_obj.insert(key, val);
insert(mk_val<value_string>(key), val);
}
virtual std::string type() const override { return "Object"; }
virtual const std::vector<std::pair<std::string, value>> & as_ordered_object() const override { return val_obj.ordered; }
virtual bool is_immutable() const override { return false; }
virtual const std::vector<std::pair<value, value>> & as_ordered_object() const override { return val_obj; }
virtual string as_string() const override {
std::ostringstream ss;
ss << "{";
for (size_t i = 0; i < val_obj.size(); i++) {
if (i > 0) ss << ", ";
auto & [key, val] = val_obj.at(i);
ss << value_to_string_repr(key) << ": " << value_to_string_repr(val);
}
ss << "}";
return ss.str();
}
virtual bool as_bool() const override {
return !val_obj.unordered.empty();
return !unordered.empty();
}
virtual bool has_key(const value & key) override {
if (!key->is_immutable() || !key->is_hashable()) {
throw std::runtime_error("Object key of unhashable type: " + key->type());
}
return unordered.find(key) != unordered.end();
}
virtual void insert(const value & key, const value & val) override {
bool replaced = false;
if (is_immutable()) {
throw std::runtime_error("Attempting to modify immutable type");
}
if (has_key(key)) {
// if key exists, replace value in ordered list instead of appending
for (auto & pair : val_obj) {
if (*(pair.first) == *key) {
pair.second = val;
replaced = true;
break;
}
}
}
unordered[key] = val;
if (!replaced) {
val_obj.push_back({key, val});
}
}
virtual value & at(const value & key, value & default_val) override {
if (!has_key(key)) {
return default_val;
}
return unordered.at(key);
}
virtual value & at(const value & key) override {
if (!has_key(key)) {
throw std::runtime_error("Key '" + key->as_string().str() + "' not found in value of type " + type());
}
return unordered.at(key);
}
virtual value & at(const std::string & key, value & default_val) override {
value key_val = mk_val<value_string>(key);
return at(key_val, default_val);
}
virtual value & at(const std::string & key) override {
value key_val = mk_val<value_string>(key);
return at(key_val);
}
virtual const func_builtins & get_builtins() const override;
virtual bool is_hashable() const override {
if (std::all_of(val_obj.begin(), val_obj.end(), [&](auto & pair) -> bool {
const auto & val = pair.second;
return val->is_immutable() && val->is_hashable();
})) {
return true;
}
return false;
}
virtual hasher unique_hash() const noexcept override {
auto hash = hasher(typeid(*this));
for (const auto & [key, val] : val_obj) {
// must use digest to prevent problems from "concatenation" property of hasher
// for ex. hash of key="ab", value="c" should be different from key="a", value="bc"
const size_t key_hash = key->unique_hash().digest();
const size_t val_hash = val->unique_hash().digest();
hash.update(&key_hash, sizeof(key_hash));
hash.update(&val_hash, sizeof(val_hash));
}
return hash;
}
protected:
virtual bool equivalent(const value_t & other) const override {
return typeid(*this) == typeid(other) && is_hashable() && other.is_hashable() && std::equal(val_obj.begin(), val_obj.end(), other.val_obj.begin(), value_equivalence());
}
};
using value_object = std::shared_ptr<value_object_t>;
//
// null and undefined types
// none and undefined types
//
struct value_none_t : public value_t {
virtual std::string type() const override { return "None"; }
virtual bool is_none() const override { return true; }
virtual bool as_bool() const override { return false; }
virtual string as_string() const override { return string("None"); }
virtual string as_string() const override { return string(type()); }
virtual std::string as_repr() const override { return type(); }
virtual const func_builtins & get_builtins() const override;
virtual bool is_hashable() const override { return true; }
virtual hasher unique_hash() const noexcept override {
return hasher(typeid(*this));
}
protected:
virtual bool equivalent(const value_t & other) const override {
return typeid(*this) == typeid(other);
}
};
using value_none = std::shared_ptr<value_none_t>;
@ -356,6 +619,13 @@ struct value_undefined_t : public value_t {
virtual bool as_bool() const override { return false; }
virtual std::string as_repr() const override { return type(); }
virtual const func_builtins & get_builtins() const override;
virtual hasher unique_hash() const noexcept override {
return hasher(typeid(*this));
}
protected:
virtual bool equivalent(const value_t & other) const override {
return is_undefined() == other.is_undefined();
}
};
using value_undefined = std::shared_ptr<value_undefined_t>;
@ -436,7 +706,23 @@ struct value_func_t : public value_t {
return val_func(new_args);
}
virtual std::string type() const override { return "Function"; }
virtual std::string as_repr() const override { return type(); }
virtual std::string as_repr() const override { return type() + "<" + name + ">(" + (arg0 ? arg0->as_repr() : "") + ")"; }
virtual bool is_hashable() const override { return false; }
virtual hasher unique_hash() const noexcept override {
// Note: this is unused for now, we don't support function as object keys
// use function pointer as unique identifier
const auto target = val_func.target<func_hptr>();
return hasher(typeid(*this)).update(&target, sizeof(target));
}
protected:
virtual bool equivalent(const value_t & other) const override {
// Note: this is unused for now, we don't support function as object keys
// compare function pointers
// (val_func == other.val_func does not work as std::function::operator== is only used for nullptr check)
const auto target_this = this->val_func.target<func_hptr>();
const auto target_other = other.val_func.target<func_hptr>();
return typeid(*this) == typeid(other) && target_this == target_other;
}
};
using value_func = std::shared_ptr<value_func_t>;
@ -447,18 +733,21 @@ struct value_kwarg_t : public value_t {
value_kwarg_t(const std::string & k, const value & v) : key(k), val(v) {}
virtual std::string type() const override { return "KwArg"; }
virtual std::string as_repr() const override { return type(); }
virtual bool is_hashable() const override { return true; }
virtual hasher unique_hash() const noexcept override {
const auto type_hash = typeid(*this).hash_code();
auto hash = val->unique_hash();
hash.update(&type_hash, sizeof(type_hash))
.update(key.data(), key.size());
return hash;
}
protected:
virtual bool equivalent(const value_t & other) const override {
const value_kwarg_t & other_val = static_cast<const value_kwarg_t &>(other);
return typeid(*this) == typeid(other) && key == other_val.key && val == other_val.val;
}
};
using value_kwarg = std::shared_ptr<value_kwarg_t>;
// utils
const func_builtins & global_builtins();
std::string value_to_json(const value & val, int indent = -1, const std::string_view item_sep = ", ", const std::string_view key_sep = ": ");
struct not_implemented_exception : public std::runtime_error {
not_implemented_exception(const std::string & msg) : std::runtime_error("NotImplemented: " + msg) {}
};
} // namespace jinja

View file

@ -0,0 +1,16 @@
#pragma once
#include "ggml.h"
#include "ggml-backend.h"
#ifdef __cplusplus
extern "C" {
#endif
#define GGML_REMOTING_FRONTEND_NAME "RemotingFrontend"
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_virtgpu_reg();
#ifdef __cplusplus
}
#endif

View file

@ -69,6 +69,10 @@
#include "ggml-rpc.h"
#endif
#ifdef GGML_USE_VIRTGPU_FRONTEND
#include "ggml-virtgpu.h"
#endif
#ifdef GGML_USE_CANN
#include "ggml-cann.h"
#endif
@ -180,7 +184,12 @@ struct ggml_backend_registry {
register_backend(ggml_backend_sycl_reg());
#endif
#ifdef GGML_USE_VULKAN
// Add runtime disable check
if (getenv("GGML_DISABLE_VULKAN") == nullptr) {
register_backend(ggml_backend_vk_reg());
} else {
GGML_LOG_DEBUG("Vulkan backend disabled by GGML_DISABLE_VULKAN environment variable\n");
}
#endif
#ifdef GGML_USE_WEBGPU
register_backend(ggml_backend_webgpu_reg());
@ -188,6 +197,10 @@ struct ggml_backend_registry {
#ifdef GGML_USE_ZDNN
register_backend(ggml_backend_zdnn_reg());
#endif
#ifdef GGML_USE_VIRTGPU_FRONTEND
register_backend(ggml_backend_virtgpu_reg());
#endif
#ifdef GGML_USE_OPENCL
register_backend(ggml_backend_opencl_reg());
#endif
@ -605,6 +618,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
ggml_backend_load_best("rpc", silent, dir_path);
ggml_backend_load_best("sycl", silent, dir_path);
ggml_backend_load_best("vulkan", silent, dir_path);
ggml_backend_load_best("virtgpu", silent, dir_path);
ggml_backend_load_best("opencl", silent, dir_path);
ggml_backend_load_best("hexagon", silent, dir_path);
ggml_backend_load_best("musa", silent, dir_path);

View file

@ -3148,16 +3148,17 @@ void ggml_gemm_q4_K_8x8_q8_K(int n,
// Scales[i] corresponds to column i
const int scale_offset = cp * 2;
for (int blk = 0; blk < 2; blk++) {
const int32x4_t block_scale = {
(int32_t) q4sb_scales[blk][scale_offset],
(int32_t) q4sb_scales[blk][scale_offset],
(int32_t) q4sb_scales[blk][scale_offset + 1],
(int32_t) q4sb_scales[blk][scale_offset + 1],
};
acc[cp] = vmlaq_s32(acc[cp], sb_acc[blk], block_scale);
acc[cp + 4] = vmlaq_s32(acc[cp + 4], sb_acc[blk + 2], block_scale);
}
const int32_t scale_00 = q4sb_scales[0][scale_offset];
const int32_t scale_01 = q4sb_scales[0][scale_offset + 1];
const int32_t scale_10 = q4sb_scales[1][scale_offset];
const int32_t scale_11 = q4sb_scales[1][scale_offset + 1];
const int32x4_t block_scale_0 = vcombine_s32(vdup_n_s32(scale_00), vdup_n_s32(scale_01));
const int32x4_t block_scale_1 = vcombine_s32(vdup_n_s32(scale_10), vdup_n_s32(scale_11));
acc[cp] = vmlaq_s32(acc[cp], sb_acc[0], block_scale_0);
acc[cp + 4] = vmlaq_s32(acc[cp + 4], sb_acc[2], block_scale_0);
acc[cp] = vmlaq_s32(acc[cp], sb_acc[1], block_scale_1);
acc[cp + 4] = vmlaq_s32(acc[cp + 4], sb_acc[3], block_scale_1);
}
// Multiply Acc bsum + mins

View file

@ -53,6 +53,7 @@
// While BW spans CC 1000, 1100 & 1200, we are integrating Tensor Core instructions available to 1200 family, see
// https://docs.nvidia.com/cutlass/media/docs/cpp/blackwell_functionality.html#blackwell-sm120-gemms
#define GGML_CUDA_CC_BLACKWELL 1200
#define GGML_CUDA_CC_DGX_SPARK 1210
#define GGML_CUDA_CC_RUBIN 1300
#define GGML_CUDA_CC_OFFSET_AMD 0x1000000
#define GGML_CUDA_CC_OFFSET_MTHREADS 0x0100000

View file

@ -789,7 +789,7 @@ void launch_fattn(
const ggml_tensor * K = dst->src[1];
const ggml_tensor * V = dst->src[2];
const bool V_is_K_view = V->view_src && V->view_offs == 0 && (V->view_src == K || V->view_src == K->view_src);
const bool V_is_K_view = V->view_src && (V->view_src == K || (V->view_src == K->view_src && V->view_offs == K->view_offs));
const ggml_tensor * mask = dst->src[3];
const ggml_tensor * sinks = dst->src[4];

View file

@ -147,6 +147,14 @@ static void ggml_cuda_flash_attn_ext_mma_f16(ggml_backend_cuda_context & ctx, gg
GGML_ASSERT(Q->ne[2] % K->ne[2] == 0);
const int gqa_ratio = Q->ne[2] / K->ne[2];
if (gqa_ratio == 20) { // GLM 4.7 Flash
if (cc >= GGML_CUDA_CC_DGX_SPARK) {
if (Q->ne[1] <= 8) {
ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1<576, 512, 16>(ctx, dst);
break;
}
ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1<576, 512, 4>(ctx, dst);
break;
}
if (cc >= GGML_CUDA_CC_BLACKWELL) {
if (Q->ne[1] <= 4 && K->ne[1] >= 65536) {
ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1<576, 512, 16>(ctx, dst);
@ -302,7 +310,7 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const
}
}
const bool V_is_K_view = V->view_src && V->view_offs == 0 && (V->view_src == K || V->view_src == K->view_src);
const bool V_is_K_view = V->view_src && (V->view_src == K || (V->view_src == K->view_src && V->view_offs == K->view_offs));
const int cc = ggml_cuda_info().devices[device].cc;

View file

@ -1,153 +0,0 @@
#ifndef OP_DESC_H
#define OP_DESC_H
#define GGML_COMMON_IMPL_CPP
#include "ggml-backend-impl.h"
#include "ggml-common.h"
#include <string>
#include <stdio.h>
struct op_desc {
char strides[64 * GGML_MAX_SRC];
char dims[64 * GGML_MAX_SRC];
char types[16 * GGML_MAX_SRC];
char buffs[64 * GGML_MAX_SRC];
char names[64 * GGML_MAX_SRC];
int format_tensor_dims(char * str, const struct ggml_tensor * t) {
if (t->ne[2] == 1 && t->ne[3] == 1) {
return sprintf(str, "%d:%d", (int) t->ne[0], (int) t->ne[1]);
} else {
return sprintf(str, "%d:%d:%d:%d", (int) t->ne[0], (int) t->ne[1], (int) t->ne[2], (int) t->ne[3]);
}
}
void format_op_dims(char * str, const struct ggml_tensor * t) {
char * p = str;
// append src0 and src1 (if any)
if (t->src[0]) {
p += format_tensor_dims(p, t->src[0]);
for (int i = 1; i < GGML_MAX_SRC && t->src[i]; i++) {
p += sprintf(p, " x ");
p += format_tensor_dims(p, t->src[i]);
}
p += sprintf(p, " -> ");
}
// format self dims separately for better visual alignment
char self[64];
format_tensor_dims(self, t);
p += sprintf(p, "%s", self);
}
int format_tensor_strides(char * str, const struct ggml_tensor * t) {
const char * c = ggml_is_contiguous(t) ? "" : "!";
if (t->ne[2] == 1 && t->ne[3] == 1) {
return sprintf(str, "%zu:%zu%s", (size_t) t->nb[0], (size_t) t->nb[1], c);
} else {
return sprintf(str, "%zu:%zu:%zu:%zu%s", (size_t) t->nb[0], (size_t) t->nb[1], (size_t) t->nb[2], (size_t) t->nb[3], c);
}
}
void format_op_strides(char * str, const struct ggml_tensor * t) {
char * p = str;
// append src0 and src1 (if any)
if (t->src[0]) {
p += format_tensor_strides(p, t->src[0]);
for (int i = 1; i < GGML_MAX_SRC && t->src[i]; i++) {
p += sprintf(p, " x ");
p += format_tensor_strides(p, t->src[i]);
}
p += sprintf(p, " -> ");
}
// format self dims separately for better visual alignment
char self[64];
format_tensor_strides(self, t);
p += sprintf(p, "%s", self);
}
void format_op_types(char * str, const struct ggml_tensor * t) {
char * p = str;
// append src0 and src1 (if any)
if (t->src[0]) {
p += sprintf(p, "%s", ggml_type_name(t->src[0]->type));
for (int i = 1; i < GGML_MAX_SRC && t->src[i]; i++) {
p += sprintf(p, " x ");
p += sprintf(p, "%s", ggml_type_name(t->src[i]->type));
}
p += sprintf(p, " -> ");
}
p += sprintf(p, "%s", ggml_type_name(t->type));
}
const char * tensor_buff_name(const struct ggml_tensor * t) {
if (t->buffer) {
return ggml_backend_buffer_name(t->buffer);
}
return "NONE";
}
void format_op_buffs(char * str, const struct ggml_tensor * t) {
char * p = str;
// append src0 and src1 (if any)
if (t->src[0]) {
p += sprintf(p, "%s", tensor_buff_name(t->src[0]));
for (int i = 1; i < GGML_MAX_SRC && t->src[i]; i++) {
p += sprintf(p, " x ");
p += sprintf(p, "%s", tensor_buff_name(t->src[i]));
}
p += sprintf(p, " -> ");
}
p += sprintf(p, "%s", tensor_buff_name(t));
}
void format_op_names(char * str, const struct ggml_tensor * t) {
char * p = str;
// append src0 and src1 (if any)
if (t->src[0]) {
p += sprintf(p, "%s", t->src[0]->name);
for (int i = 1; i < GGML_MAX_SRC && t->src[i]; i++) {
p += sprintf(p, " x ");
p += sprintf(p, "%s", t->src[i]->name);
}
p += sprintf(p, " -> ");
}
p += sprintf(p, "%s", t->name);
}
void format(const ggml_tensor * op) {
format_op_dims(dims, op);
format_op_strides(strides, op);
format_op_types(types, op);
format_op_buffs(buffs, op);
format_op_names(names, op);
}
op_desc() {}
op_desc(const ggml_tensor * op) { format(op); }
};
#endif // OP_DESC_H

View file

@ -1,194 +0,0 @@
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#ifdef cl_intel_subgroups
#pragma OPENCL EXTENSION cl_intel_subgroups : enable
#else
#pragma OPENCL EXTENSION cl_khr_subgroups : enable
#endif
#ifdef cl_intel_required_subgroup_size
#pragma OPENCL EXTENSION cl_intel_required_subgroup_size : enable
#define INTEL_GPU 1
#define REQD_SUBGROUP_SIZE_16 __attribute__((intel_reqd_sub_group_size(16)))
#define REQD_SUBGROUP_SIZE_32 __attribute__((intel_reqd_sub_group_size(32)))
#elif defined(cl_qcom_reqd_sub_group_size)
#pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable
#define ADRENO_GPU 1
#define REQD_SUBGROUP_SIZE_64 __attribute__((qcom_reqd_sub_group_size("half")))
#define REQD_SUBGROUP_SIZE_128 __attribute__((qcom_reqd_sub_group_size("full")))
#endif
//------------------------------------------------------------------------------
// kernel_mul_mv_q6_K_f32_flat
//------------------------------------------------------------------------------
#define Q6_K_MASK1 0x03
#define Q6_K_MASK2 0x0C
#define Q6_K_MASK3 0x30
#define Q6_K_MASK4 0xC0
#define QK_K 256
inline float block_q_6_K_dot_y_flat(
global uchar * blk_ql,
global uchar * blk_qh,
global char * blk_scales,
global half * blk_d,
global float * yy,
int ib,
int ip,
int is,
int l0
) {
int y_offset = 128*ip + l0;
int q_offset_l = 64*ip + l0;
int q_offset_h = 32*ip + l0;
global uchar * q1 = blk_ql + ib*128 + q_offset_l;
global uchar * q2 = q1 + QK_K/8;
global uchar * qh = blk_qh + ib*64 + q_offset_h;
global char * sc = blk_scales + ib*16 + is;
global float * y = yy + ib * QK_K + y_offset;
float dall = blk_d[ib];
float sumf = 0;
float4 sums = {0.f, 0.f, 0.f, 0.f};
sums.s0 += y[0+ 0] * ((float)((q1[0] & 0xF) | ((qh[0] & Q6_K_MASK1) << 4)) - 32.f);
sums.s1 += y[0+32] * ((float)((q2[0] & 0xF) | ((qh[0] & Q6_K_MASK2) << 2)) - 32.f);
sums.s2 += y[0+64] * ((float)((q1[0] >> 4) | ((qh[0] & Q6_K_MASK3) << 0)) - 32.f);
sums.s3 += y[0+96] * ((float)((q2[0] >> 4) | ((qh[0] & Q6_K_MASK4) >> 2)) - 32.f);
sums.s0 += y[1+ 0] * ((float)((q1[1] & 0xF) | ((qh[1] & Q6_K_MASK1) << 4)) - 32.f);
sums.s1 += y[1+32] * ((float)((q2[1] & 0xF) | ((qh[1] & Q6_K_MASK2) << 2)) - 32.f);
sums.s2 += y[1+64] * ((float)((q1[1] >> 4) | ((qh[1] & Q6_K_MASK3) << 0)) - 32.f);
sums.s3 += y[1+96] * ((float)((q2[1] >> 4) | ((qh[1] & Q6_K_MASK4) >> 2)) - 32.f);
sums.s0 += y[2+ 0] * ((float)((q1[2] & 0xF) | ((qh[2] & Q6_K_MASK1) << 4)) - 32.f);
sums.s1 += y[2+32] * ((float)((q2[2] & 0xF) | ((qh[2] & Q6_K_MASK2) << 2)) - 32.f);
sums.s2 += y[2+64] * ((float)((q1[2] >> 4) | ((qh[2] & Q6_K_MASK3) << 0)) - 32.f);
sums.s3 += y[2+96] * ((float)((q2[2] >> 4) | ((qh[2] & Q6_K_MASK4) >> 2)) - 32.f);
sums.s0 += y[3+ 0] * ((float)((q1[3] & 0xF) | ((qh[3] & Q6_K_MASK1) << 4)) - 32.f);
sums.s1 += y[3+32] * ((float)((q2[3] & 0xF) | ((qh[3] & Q6_K_MASK2) << 2)) - 32.f);
sums.s2 += y[3+64] * ((float)((q1[3] >> 4) | ((qh[3] & Q6_K_MASK3) << 0)) - 32.f);
sums.s3 += y[3+96] * ((float)((q2[3] >> 4) | ((qh[3] & Q6_K_MASK4) >> 2)) - 32.f);
sumf += dall * (sums.s0 * sc[0] + sums.s1 * sc[2] + sums.s2 * sc[4] + sums.s3 * sc[6]);
return sumf;
}
#undef N_DST
#undef N_SIMDGROUP
#undef N_SIMDWIDTH
#ifdef INTEL_GPU
#define N_DST 4
#define N_SIMDGROUP 2
#define N_SIMDWIDTH 16
#elif defined (ADRENO_GPU)
#define N_DST 4
#define N_SIMDGROUP 2
#define N_SIMDWIDTH 64
#endif
#define BLOCK_STRIDE (N_SIMDWIDTH/16) // number of blocks each subgroup processes
#ifdef INTEL_GPU
REQD_SUBGROUP_SIZE_16
#elif defined (ADRENO_GPU)
REQD_SUBGROUP_SIZE_64
#endif
kernel void kernel_mul_mv_q6_K_f32_flat(
global uchar * src0_ql,
global uchar * src0_qh,
global char * src0_s,
global half * src0_d,
global float * src1,
ulong offset1,
global float * dst,
ulong offsetd,
int ne00,
int ne01,
int ne02,
int ne10,
int ne12,
int ne0,
int ne1,
int r2,
int r3
) {
src1 = (global float*)((global char*)src1 + offset1);
dst = (global float*)((global char*)dst + offsetd);
int nb = ne00/QK_K;
int r0 = get_group_id(0);
int r1 = get_group_id(1);
int im = get_group_id(2);
int i12 = im%ne12;
int i13 = im/ne12;
int first_row = (N_SIMDGROUP * r0 + get_sub_group_id()) * N_DST;
ulong offset_src0 = first_row*nb + (i12/r2)*(nb*ne01) + (i13/r3)*(nb*ne01*ne02);
ulong offset_src0_ql = offset_src0 * 128;
ulong offset_src0_qh = offset_src0 * 64;
ulong offset_src0_s = offset_src0 * 16;
ulong offset_src0_d = offset_src0;
global uchar * blk_ql = (global uchar *) src0_ql + offset_src0_ql;
global uchar * blk_qh = (global uchar *) src0_qh + offset_src0_qh;
global char * blk_scales = (global char *) src0_s + offset_src0_s;
global half * blk_d = (global half *) src0_d + offset_src0_d;
global float * yy = (global float *) src1 + r1*ne10 + im*ne00*ne1;
int tid = get_sub_group_local_id()/BLOCK_STRIDE; // first block_stride groups have tid=0
int ix = get_sub_group_local_id()%BLOCK_STRIDE; // first block is 0..block_stride-1
int ip = tid/8; // first or second half of (super) block (0 or 1)
int il = tid%8; // each half has 8 parts, one per scale
int n = 4; // 4 scales at a time (and 4 sums)
int l0 = n*il; // offset into half-block, 0..28
int is = 8*ip + l0/16; // 0, 1, 8, 9
float4 sumf = 0;
for (int ib = ix; ib < nb; ib += BLOCK_STRIDE) {
if (first_row + 0 < ne01) {
sumf.s0 += block_q_6_K_dot_y_flat(blk_ql + 0*nb*128, blk_qh + 0*nb*64, blk_scales + 0*nb*16, blk_d + 0*nb, yy, ib, ip, is, l0);
}
if (first_row + 1 < ne01) {
sumf.s1 += block_q_6_K_dot_y_flat(blk_ql + 1*nb*128, blk_qh + 1*nb*64, blk_scales + 1*nb*16, blk_d + 1*nb, yy, ib, ip, is, l0);
}
if (first_row + 2 < ne01) {
sumf.s2 += block_q_6_K_dot_y_flat(blk_ql + 2*nb*128, blk_qh + 2*nb*64, blk_scales + 2*nb*16, blk_d + 2*nb, yy, ib, ip, is, l0);
}
if (first_row + 3 < ne01) {
sumf.s3 += block_q_6_K_dot_y_flat(blk_ql + 3*nb*128, blk_qh + 3*nb*64, blk_scales + 3*nb*16, blk_d + 3*nb, yy, ib, ip, is, l0);
}
}
float4 tot = (float4)(
sub_group_reduce_add(sumf.s0),
sub_group_reduce_add(sumf.s1),
sub_group_reduce_add(sumf.s2),
sub_group_reduce_add(sumf.s3)
);
if (get_sub_group_local_id() == 0) {
if (first_row + 0 < ne01) {
dst[r1*ne0 + im*ne0*ne1 + first_row + 0] = tot.s0;
}
if (first_row + 1 < ne01) {
dst[r1*ne0 + im*ne0*ne1 + first_row + 1] = tot.s1;
}
if (first_row + 2 < ne01) {
dst[r1*ne0 + im*ne0*ne1 + first_row + 2] = tot.s2;
}
if (first_row + 3 < ne01) {
dst[r1*ne0 + im*ne0*ne1 + first_row + 3] = tot.s3;
}
}
}

View file

@ -1,32 +0,0 @@
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
//------------------------------------------------------------------------------
// tri
//------------------------------------------------------------------------------
__kernel void kernel_tri_f32(
global float * src0,
ulong offset0,
global float * dst,
ulong offsetd,
int n,
int ne0,
int ne1,
int tri_type
) {
src0 = (global float*)((global char*)src0 + offset0);
dst = (global float*)((global char*)dst + offsetd);
int idx = get_global_id(0);
if (idx >= n) return;
int i0 = idx % ne0;
int i1 = (idx / ne0) % ne1;
int keep = 0;
if (tri_type == 0) keep = (i0 >= i1);
else if (tri_type == 1) keep = (i0 > i1);
else if (tri_type == 2) keep = (i0 <= i1);
else keep = (i0 < i1);
dst[idx] = keep ? src0[idx] : 0.0f;
}

View file

@ -1,77 +0,0 @@
#include <sycl/sycl.hpp>
#include "common.hpp"
#include "add-id.hpp"
static void add_id_kernel(
const float* src0,
const float* src1,
const int32_t* src2,
float* dst,
int64_t ne0,
int64_t ne1,
size_t nb01,
size_t nb02,
size_t nb11,
size_t nb21,
sycl::nd_item<3> item_ct1) {
const int64_t i1 = item_ct1.get_group(2);
const int64_t i2 = item_ct1.get_group(1);
const int i11 =
*(const int32_t*)((const char*)src2 + i1 * sizeof(int32_t) + i2 * nb21);
const size_t nb1 = ne0 * sizeof(float);
const size_t nb2 = ne1 * nb1;
float* dst_row = (float*)((char*)dst + i1 * nb1 + i2 * nb2);
const float* src0_row =
(const float*)((const char*)src0 + i1 * nb01 + i2 * nb02);
const float* src1_row = (const float*)((const char*)src1 + i11 * nb11);
for (int64_t i0 = item_ct1.get_local_id(2); i0 < ne0;
i0 += item_ct1.get_local_range(2)) {
dst_row[i0] = src0_row[i0] + src1_row[i0];
}
}
void ggml_sycl_add_id(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
const ggml_tensor* src0 = dst->src[0];
const ggml_tensor* src1 = dst->src[1];
const ggml_tensor* src2 = dst->src[2];
GGML_TENSOR_TERNARY_OP_LOCALS
GGML_ASSERT(dst->type == GGML_TYPE_F32);
GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_ASSERT(src2->type == GGML_TYPE_I32);
GGML_ASSERT(nb00 == sizeof(float));
GGML_ASSERT(nb10 == sizeof(float));
GGML_ASSERT(nb20 == sizeof(int32_t));
const float* src0_d = (const float*)src0->data;
const float* src1_d = (const float*)src1->data;
const int32_t* src2_d = (const int32_t*)src2->data;
float* dst_d = (float*)dst->data;
int threads = std::min((int)ne00, 768); // cols
ctx.stream()->parallel_for(
sycl::nd_range<3>(
sycl::range<3>(1, ne02, ne01) * sycl::range<3>(1, 1, threads),
sycl::range<3>(1, 1, threads)),
[=](sycl::nd_item<3> item_ct1) {
add_id_kernel(
src0_d,
src1_d,
src2_d,
dst_d,
ne0,
ne1,
nb01,
nb02,
nb11,
nb21,
item_ct1);
});
}

View file

@ -1,8 +0,0 @@
#ifndef GGML_SYCL_ADD_ID_HPP
#define GGML_SYCL_ADD_ID_HPP
#include "common.hpp"
void ggml_sycl_add_id(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
#endif // GGML_SYCL_ADD_ID_HPP

View file

@ -5552,22 +5552,32 @@ static void ggml_vk_instance_init() {
if ((new_props.properties.deviceType == vk::PhysicalDeviceType::eDiscreteGpu || new_props.properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu) && ggml_vk_device_is_supported(devices[i])) {
// Check if there are two physical devices corresponding to the same GPU
// This handles the case where the same GPU appears with different drivers (e.g., RADV + AMDVLK on Linux),
// see https://github.com/ggml-org/llama.cpp/pull/7582 for original deduplication.
// However, for MoltenVK on macOS, multiple GPUs on the same card may report the same UUID,
// see https://github.com/KhronosGroup/MoltenVK/issues/2683. Until this is fixed, we'll only deduplicate
// when drivers differ (same driver + same UUID = likely different GPUs)
auto old_device = std::find_if(
vk_instance.device_indices.begin(),
vk_instance.device_indices.end(),
[&devices, &new_id](const size_t k){
[&devices, &new_id, &new_driver](const size_t k){
vk::PhysicalDeviceProperties2 old_props;
vk::PhysicalDeviceDriverProperties old_driver;
vk::PhysicalDeviceIDProperties old_id;
old_props.pNext = &old_id;
old_props.pNext = &old_driver;
old_driver.pNext = &old_id;
devices[k].getProperties2(&old_props);
bool equals = std::equal(std::begin(old_id.deviceUUID), std::end(old_id.deviceUUID), std::begin(new_id.deviceUUID));
equals = equals || (
bool same_uuid = std::equal(std::begin(old_id.deviceUUID), std::end(old_id.deviceUUID), std::begin(new_id.deviceUUID));
same_uuid = same_uuid || (
old_id.deviceLUIDValid && new_id.deviceLUIDValid &&
std::equal(std::begin(old_id.deviceLUID), std::end(old_id.deviceLUID), std::begin(new_id.deviceLUID))
);
return equals;
// Only deduplicate if same UUID AND different drivers
// (same driver + same UUID on MoltenVK = likely different GPUs on multi-GPU card)
bool different_driver = (old_driver.driverID != new_driver.driverID);
return same_uuid && different_driver;
}
);
if (old_device == vk_instance.device_indices.end()) {

View file

@ -312,7 +312,7 @@ extern "C" {
// Keep the booleans together to avoid misalignment during copy-by-value.
bool vocab_only; // only load the vocabulary, no weights
bool use_mmap; // use mmap if possible
bool use_direct_io; // use direct io, takes precedence over use_mmap
bool use_direct_io; // use direct io, takes precedence over use_mmap when supported
bool use_mlock; // force system to keep model in RAM
bool check_tensors; // validate model tensor data
bool use_extra_bufts; // use extra buffer types (used for weight repacking)

View file

@ -256,11 +256,7 @@ llama_context::llama_context(
// graph outputs buffer
{
// resized during inference when a batch uses more outputs
// Create a dummy batch for initialization.
llama_batch dummy_batch = {};
dummy_batch.n_tokens = 0;
if (output_reserve(params.n_seq_max, dummy_batch) < params.n_seq_max) {
if (output_reserve(params.n_seq_max) < params.n_seq_max) {
throw std::runtime_error("failed to reserve initial output buffer");
}
@ -1235,7 +1231,7 @@ int llama_context::encode(const llama_batch & batch_inp) {
n_queued_tokens += n_tokens;
// reserve output buffer
if (output_reserve(n_tokens, batch_inp) < n_tokens) {
if (output_reserve(n_tokens) < n_tokens) {
LLAMA_LOG_ERROR("%s: could not reserve space for batch with %u outputs\n", __func__, n_tokens);
return -2;
};
@ -1466,6 +1462,23 @@ static void copy_tensor_async_candidates(
}
}
static bool needs_raw_logits(const llama_ubatch & ubatch, const std::map<llama_seq_id, llama_sampler *> & samplers) {
for (uint32_t i = 0; i < ubatch.n_tokens; i++) {
if (!ubatch.output[i]) {
continue;
}
// Check if the output token has at least one sequence without a backend sampler.
for (int32_t j = 0; j < ubatch.n_seq_id[i]; ++j) {
llama_seq_id seq_id = ubatch.seq_id[i][j];
if (samplers.find(seq_id) == samplers.end()) {
return true;
}
}
}
return false; // all sequences use backend sampling
}
int llama_context::decode(const llama_batch & batch_inp) {
GGML_ASSERT((!batch_inp.token && batch_inp.embd) || (batch_inp.token && !batch_inp.embd)); // NOLINT
@ -1598,7 +1611,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
}
// reserve output buffer
if (output_reserve(n_outputs_all, balloc->get_batch()) < n_outputs_all) {
if (output_reserve(n_outputs_all) < n_outputs_all) {
LLAMA_LOG_ERROR("%s: could not reserve space for batch with %d outputs\n", __func__, n_outputs_all);
return -2;
};
@ -1671,10 +1684,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
}
// extract logits
// For multi-sequence batches that mix backend samplers and CPU sampler
// this is currently inefficient as we copy all logits even for the
// backend sampled tokens.
if (logits && t_logits && n_outputs > 0) {
if (logits && t_logits && n_outputs > 0 && needs_raw_logits(ubatch, sampling.samplers)) {
ggml_backend_t backend_res = ggml_backend_sched_get_tensor_backend(sched.get(), t_logits);
GGML_ASSERT(backend_res != nullptr);
GGML_ASSERT(logits != nullptr);
@ -1744,11 +1754,8 @@ int llama_context::decode(const llama_batch & batch_inp) {
}
}
// This flag indicates whether a backend sampler has actually sampled a specific
// token, or if it has produced probabilites. If true, we can skip the normal copying of logits and embeddings.
const bool has_sampled = !res->t_sampled.empty() || !res->t_sampled_probs.empty() || !res->t_sampled_logits.empty();
if (has_samplers && has_sampled) {
// Copy backend sampling output if this ubatch produced any sampling tensors.
if (has_samplers && (!res->t_sampled.empty() || !res->t_sampled_probs.empty() || !res->t_sampled_logits.empty())) {
const auto seq_to_output_row = build_seq_to_output_row(ubatch, n_outputs_prev);
const auto stride = n_vocab;
@ -1823,7 +1830,8 @@ int llama_context::decode(const llama_batch & batch_inp) {
// output
//
uint32_t llama_context::output_reserve(int32_t n_outputs, const llama_batch & batch) {
uint32_t llama_context::output_reserve(int32_t n_outputs) {
const auto & hparams = model.hparams;
const auto & vocab = model.vocab;
@ -1842,45 +1850,16 @@ uint32_t llama_context::output_reserve(int32_t n_outputs, const llama_batch & ba
has_embd = true;
}
// Check which sampling modes are needed for the current batch.
// TODO: avoid this branching by working with the worst-case
bool has_sampling = false;
bool cpu_logits = false;
if (batch.logits) {
for (int32_t i = 0; i < batch.n_tokens; i++) {
if (!batch.logits[i]) {
continue;
}
for (int32_t j = 0; j < batch.n_seq_id[i]; j++) {
llama_seq_id seq_id = batch.seq_id[i][j];
if (sampling.samplers.find(seq_id) != sampling.samplers.end()) {
has_sampling = true;
} else {
cpu_logits = true;
}
}
}
} else {
// When batch.logits is nullptr (when loading state with a dummy batch),
// allocate CPU logits.
cpu_logits = true;
}
size_t backend_float_count = 0;
size_t backend_token_count = 0;
// Allocate CPU logits buffer only if needed by sequences in this batch
logits_size = (has_logits && cpu_logits) ? n_vocab*n_outputs_max : 0;
logits_size = has_logits ? n_vocab*n_outputs_max : 0;
embd_size = has_embd ? n_embd_out*n_outputs_max : 0;
// TODO: avoid this branching by working with the worst-case
if (!has_sampling) {
sampling.logits_size = 0;
sampling.probs_size = 0;
sampling.sampled_size = 0;
sampling.candidates_size = 0;
} else {
// Allocate backend sampling output buffers if there are backend samplers configured.
const bool has_sampling = !sampling.samplers.empty();
if (has_sampling) {
sampling.logits_size = n_vocab*n_outputs_max;
sampling.probs_size = n_vocab*n_outputs_max;
sampling.sampled_size = n_outputs_max;
@ -1938,7 +1917,7 @@ uint32_t llama_context::output_reserve(int32_t n_outputs, const llama_batch & ba
size_t offset = 0;
uint8_t * base = (uint8_t *) output_base;
logits = (has_logits && cpu_logits) ? output_base : nullptr;
logits = has_logits ? output_base : nullptr;
offset += logits_size * sizeof(float);
embd = has_embd ? (float *) (base + offset) : nullptr;
@ -2624,10 +2603,7 @@ size_t llama_context::state_read_data(llama_io_read_i & io) {
auto n_outputs = this->n_outputs;
io.read_to(&n_outputs, sizeof(n_outputs));
// Create a dummy batch for state loading.
llama_batch dummy_batch = {};
dummy_batch.n_tokens = 0;
if (n_outputs > output_reserve(n_outputs, dummy_batch)) {
if (n_outputs > output_reserve(n_outputs)) {
throw std::runtime_error("could not reserve outputs");
}
@ -2872,7 +2848,7 @@ void llama_context::opt_epoch_iter(
}
// reserve output buffer
if (output_reserve(n_outputs_all, balloc->get_batch()) < n_outputs_all) {
if (output_reserve(n_outputs_all) < n_outputs_all) {
LLAMA_LOG_ERROR("%s: could not reserve space for batch with %d outputs\n", __func__, n_outputs_all);
GGML_ABORT("TODO: handle this error");
};

View file

@ -212,7 +212,7 @@ private:
// Make sure enough space is available for outputs.
// Returns max number of outputs for which space was reserved.
uint32_t output_reserve(int32_t n_outputs, const llama_batch & batch);
uint32_t output_reserve(int32_t n_outputs);
void output_reorder();

View file

@ -541,15 +541,15 @@ llama_model_loader::llama_model_loader(
if (use_mmap && use_direct_io) {
if (files.back()->has_direct_io()) {
// Disable mmap, as DirectIO is available
use_mmap = false;
LLAMA_LOG_WARN("%s: direct I/O is enabled, disabling mmap\n", __func__);
use_mmap = false;
} else {
// Disable DirectIO and reopen file using std::fopen for mmap
LLAMA_LOG_WARN("%s: direct I/O is not available, using mmap\n", __func__);
use_direct_io = false;
// reopen file using std::fopen for mmap
files.pop_back();
files.emplace_back(new llama_file(fname.c_str(), "rb", false));
LLAMA_LOG_WARN("%s: direct I/O is not available, using mmap\n", __func__);
}
}

View file

@ -8279,7 +8279,7 @@ llama_model_params llama_model_default_params() {
/*.kv_overrides =*/ nullptr,
/*.vocab_only =*/ false,
/*.use_mmap =*/ true,
/*.use_direct_io =*/ true,
/*.use_direct_io =*/ false,
/*.use_mlock =*/ false,
/*.check_tensors =*/ false,
/*.use_extra_bufts =*/ true,

View file

@ -545,7 +545,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
}
std::vector<std::string> splits = {};
llama_model_loader ml(fname_inp, splits, use_mmap, /*use_direct_io*/ true, /*check_tensors*/ true, /*no_alloc*/ false, kv_overrides, nullptr);
llama_model_loader ml(fname_inp, splits, use_mmap, /*use_direct_io*/ false, /*check_tensors*/ true, /*no_alloc*/ false, kv_overrides, nullptr);
ml.init_mappings(false); // no prefetching
llama_model model(llama_model_default_params());

View file

@ -9,6 +9,7 @@
#include "jinja/runtime.h"
#include "jinja/parser.h"
#include "jinja/lexer.h"
#include "jinja/utils.h"
#include "testing.h"
@ -30,6 +31,7 @@ static void test_tests(testing & t);
static void test_string_methods(testing & t);
static void test_array_methods(testing & t);
static void test_object_methods(testing & t);
static void test_hasher(testing & t);
static void test_fuzzing(testing & t);
static bool g_python_mode = false;
@ -67,6 +69,7 @@ int main(int argc, char *argv[]) {
t.test("array methods", test_array_methods);
t.test("object methods", test_object_methods);
if (!g_python_mode) {
t.test("hasher", test_hasher);
t.test("fuzzing", test_fuzzing);
}
@ -156,6 +159,18 @@ static void test_conditionals(testing & t) {
"big"
);
test_template(t, "object comparison",
"{% if {0: 1, none: 2, 1.0: 3, '0': 4, true: 5} == {false: 1, none: 2, 1: 5, '0': 4} %}equal{% endif %}",
json::object(),
"equal"
);
test_template(t, "array comparison",
"{% if [0, 1.0, false] == [false, 1, 0.0] %}equal{% endif %}",
json::object(),
"equal"
);
test_template(t, "logical and",
"{% if a and b %}both{% endif %}",
{{"a", true}, {"b", true}},
@ -358,6 +373,30 @@ static void test_expressions(testing & t) {
"b"
);
test_template(t, "array negative access",
"{{ items[-1] }}",
{{"items", json::array({"a", "b", "c"})}},
"c"
);
test_template(t, "array slice",
"{{ items[1:-1]|string }}",
{{"items", json::array({"a", "b", "c"})}},
"['b']"
);
test_template(t, "array slice step",
"{{ items[::2]|string }}",
{{"items", json::array({"a", "b", "c"})}},
"['a', 'c']"
);
test_template(t, "tuple slice",
"{{ ('a', 'b', 'c')[::-1]|string }}",
json::object(),
"('c', 'b', 'a')"
);
test_template(t, "arithmetic",
"{{ (a + b) * c }}",
{{"a", 2}, {"b", 3}, {"c", 4}},
@ -401,6 +440,36 @@ static void test_set_statement(testing & t) {
json::object(),
"1"
);
test_template(t, "set dict with mixed type keys",
"{% set d = {0: 1, none: 2, 1.0: 3, '0': 4, (0, 0): 5, false: 6, 1: 7} %}{{ d[(0, 0)] + d[0] + d[none] + d['0'] + d[false] + d[1.0] + d[1] }}",
json::object(),
"37"
);
test_template(t, "print dict with mixed type keys",
"{% set d = {0: 1, none: 2, 1.0: 3, '0': 4, (0, 0): 5, true: 6} %}{{ d|string }}",
json::object(),
"{0: 1, None: 2, 1.0: 6, '0': 4, (0, 0): 5}"
);
test_template(t, "print array with mixed types",
"{% set d = [0, none, 1.0, '0', true, (0, 0)] %}{{ d|string }}",
json::object(),
"[0, None, 1.0, '0', True, (0, 0)]"
);
test_template(t, "object member assignment with mixed key types",
"{% set d = namespace() %}{% set d.a = 123 %}{{ d['a'] == 123 }}",
json::object(),
"True"
);
test_template(t, "tuple unpacking",
"{% set t = (1, 2, 3) %}{% set a, b, c = t %}{{ a + b + c }}",
json::object(),
"6"
);
}
static void test_filters(testing & t) {
@ -1312,6 +1381,154 @@ static void test_object_methods(testing & t) {
{{"obj", {{"a", "b"}}}},
"True True"
);
test_template(t, "expression as object key",
"{% set d = {'ab': 123} %}{{ d['a' + 'b'] == 123 }}",
json::object(),
"True"
);
test_template(t, "numeric as object key (template: Seed-OSS)",
"{% set d = {1: 'a', 2: 'b'} %}{{ d[1] == 'a' and d[2] == 'b' }}",
json::object(),
"True"
);
}
static void test_hasher(testing & t) {
static const std::vector<std::pair<size_t, size_t>> chunk_sizes = {
{1, 2},
{1, 16},
{8, 1},
{1, 1024},
{5, 512},
{16, 256},
{45, 122},
{70, 634},
};
static auto random_bytes = [](size_t length) -> std::string {
std::string data;
data.resize(length);
for (size_t i = 0; i < length; ++i) {
data[i] = static_cast<char>(rand() % 256);
}
return data;
};
t.test("state unchanged with empty input", [](testing & t) {
jinja::hasher hasher;
hasher.update("some data");
size_t initial_state = hasher.digest();
hasher.update("", 0);
size_t final_state = hasher.digest();
t.assert_true("Hasher state should remain unchanged", initial_state == final_state);
});
t.test("different inputs produce different hashes", [](testing & t) {
jinja::hasher hasher1;
hasher1.update("data one");
size_t hash1 = hasher1.digest();
jinja::hasher hasher2;
hasher2.update("data two");
size_t hash2 = hasher2.digest();
t.assert_true("Different inputs should produce different hashes", hash1 != hash2);
});
t.test("same inputs produce same hashes", [](testing & t) {
jinja::hasher hasher1;
hasher1.update("consistent data");
size_t hash1 = hasher1.digest();
jinja::hasher hasher2;
hasher2.update("consistent data");
size_t hash2 = hasher2.digest();
t.assert_true("Same inputs should produce same hashes", hash1 == hash2);
});
t.test("property: update(a ~ b) == update(a).update(b)", [](testing & t) {
for (const auto & [size1, size2] : chunk_sizes) {
std::string data1 = random_bytes(size1);
std::string data2 = random_bytes(size2);
jinja::hasher hasher1;
hasher1.update(data1);
hasher1.update(data2);
size_t hash1 = hasher1.digest();
jinja::hasher hasher2;
hasher2.update(data1 + data2);
size_t hash2 = hasher2.digest();
t.assert_true(
"Hashing in multiple updates should match single update (" + std::to_string(size1) + ", " + std::to_string(size2) + ")",
hash1 == hash2);
}
});
t.test("property: update(a ~ b) == update(a).update(b) with more update passes", [](testing & t) {
static const std::vector<size_t> sizes = {3, 732, 131, 13, 17, 256, 436, 99, 4};
jinja::hasher hasher1;
jinja::hasher hasher2;
std::string combined_data;
for (size_t size : sizes) {
std::string data = random_bytes(size);
hasher1.update(data);
combined_data += data;
}
hasher2.update(combined_data);
size_t hash1 = hasher1.digest();
size_t hash2 = hasher2.digest();
t.assert_true(
"Hashing in multiple updates should match single update with many chunks",
hash1 == hash2);
});
t.test("property: non associativity of update", [](testing & t) {
for (const auto & [size1, size2] : chunk_sizes) {
std::string data1 = random_bytes(size1);
std::string data2 = random_bytes(size2);
jinja::hasher hasher1;
hasher1.update(data1);
hasher1.update(data2);
size_t hash1 = hasher1.digest();
jinja::hasher hasher2;
hasher2.update(data2);
hasher2.update(data1);
size_t hash2 = hasher2.digest();
t.assert_true(
"Hashing order should matter (" + std::to_string(size1) + ", " + std::to_string(size2) + ")",
hash1 != hash2);
}
});
t.test("property: different lengths produce different hashes (padding block size)", [](testing & t) {
std::string random_data = random_bytes(64);
jinja::hasher hasher1;
hasher1.update(random_data);
size_t hash1 = hasher1.digest();
for (int i = 0; i < 16; ++i) {
random_data.push_back('A'); // change length
jinja::hasher hasher2;
hasher2.update(random_data);
size_t hash2 = hasher2.digest();
t.assert_true("Different lengths should produce different hashes (length " + std::to_string(random_data.size()) + ")", hash1 != hash2);
hash1 = hash2;
}
});
}
static void test_template_cpp(testing & t, const std::string & name, const std::string & tmpl, const json & vars, const std::string & expect) {

View file

@ -30,6 +30,7 @@ def test_with_and_without_draft():
"prompt": "I believe the meaning of life is",
"temperature": 0.0,
"top_k": 1,
"n_predict": 16,
})
assert res.status_code == 200
content_no_draft = res.body["content"]
@ -42,6 +43,7 @@ def test_with_and_without_draft():
"prompt": "I believe the meaning of life is",
"temperature": 0.0,
"top_k": 1,
"n_predict": 16,
})
assert res.status_code == 200
content_draft = res.body["content"]
@ -68,6 +70,7 @@ def test_different_draft_min_draft_max():
"prompt": "I believe the meaning of life is",
"temperature": 0.0,
"top_k": 1,
"n_predict": 16,
})
assert res.status_code == 200
if last_content is not None: