mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-28 03:30:20 +00:00
Merge commit '88d23ad515' into concedo_experimental
# Conflicts: # CODEOWNERS # docs/build.md # ggml/CMakeLists.txt # ggml/src/CMakeLists.txt # ggml/src/ggml-webgpu/ggml-webgpu.cpp # ggml/src/ggml-zendnn/CMakeLists.txt # tests/test-chat-template.cpp
This commit is contained in:
commit
46cd17c17e
29 changed files with 916 additions and 719 deletions
|
|
@ -1298,9 +1298,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
).set_env("LLAMA_ARG_CACHE_RAM").set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}));
|
||||
add_opt(common_arg(
|
||||
{"-kvu", "--kv-unified"},
|
||||
{"-no-kvu", "--no-kv-unified"},
|
||||
"use single unified KV buffer shared across all sequences (default: enabled if number of slots is auto)",
|
||||
[](common_params & params) {
|
||||
params.kv_unified = true;
|
||||
[](common_params & params, bool value) {
|
||||
params.kv_unified = value;
|
||||
}
|
||||
).set_env("LLAMA_ARG_KV_UNIFIED").set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_PERPLEXITY, LLAMA_EXAMPLE_BATCHED}));
|
||||
add_opt(common_arg(
|
||||
|
|
@ -2201,18 +2202,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
add_opt(common_arg(
|
||||
{"--mmap"},
|
||||
{"--no-mmap"},
|
||||
string_format("whether to memory-map model. Explicitly enabling mmap disables direct-io. (if mmap disabled, slower load but may reduce pageouts if not using mlock) (default: %s)", params.use_mmap ? "enabled" : "disabled"),
|
||||
string_format("whether to memory-map model. (if mmap disabled, slower load but may reduce pageouts if not using mlock) (default: %s)", params.use_mmap ? "enabled" : "disabled"),
|
||||
[](common_params & params, bool value) {
|
||||
params.use_mmap = value;
|
||||
if (value) {
|
||||
params.use_direct_io = false; // disable direct io when mmap is explicitly enabled
|
||||
}
|
||||
}
|
||||
).set_env("LLAMA_ARG_MMAP"));
|
||||
add_opt(common_arg(
|
||||
{"-dio", "--direct-io"},
|
||||
{"-ndio", "--no-direct-io"},
|
||||
string_format("use DirectIO if available. Takes precedence over --mmap (default: %s)", params.use_direct_io ? "enabled" : "disabled"),
|
||||
string_format("use DirectIO if available. (default: %s)", params.use_direct_io ? "enabled" : "disabled"),
|
||||
[](common_params & params, bool value) {
|
||||
params.use_direct_io = value;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -435,7 +435,7 @@ struct common_params {
|
|||
|
||||
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
|
||||
bool use_mmap = true; // enable mmap to use filesystem cache
|
||||
bool use_direct_io = true; // read from disk without buffering for faster model loading
|
||||
bool use_direct_io = false; // read from disk without buffering
|
||||
bool use_mlock = false; // use mlock to keep model in memory
|
||||
bool verbose_prompt = false; // print prompt tokens before generation
|
||||
bool display_prompt = true; // print prompt before generation
|
||||
|
|
|
|||
|
|
@ -44,6 +44,12 @@ static std::string get_line_col(const std::string & source, size_t pos) {
|
|||
return "line " + std::to_string(line) + ", column " + std::to_string(col);
|
||||
}
|
||||
|
||||
static void ensure_key_type_allowed(const value & val) {
|
||||
if (!val->is_hashable()) {
|
||||
throw std::runtime_error("Type: " + val->type() + " is not allowed as object key");
|
||||
}
|
||||
}
|
||||
|
||||
// execute with error handling
|
||||
value statement::execute(context & ctx) {
|
||||
try {
|
||||
|
|
@ -95,20 +101,10 @@ value identifier::execute_impl(context & ctx) {
|
|||
value object_literal::execute_impl(context & ctx) {
|
||||
auto obj = mk_val<value_object>();
|
||||
for (const auto & pair : val) {
|
||||
value key_val = pair.first->execute(ctx);
|
||||
if (!is_val<value_string>(key_val) && !is_val<value_int>(key_val)) {
|
||||
throw std::runtime_error("Object literal: keys must be string or int values, got " + key_val->type());
|
||||
}
|
||||
std::string key = key_val->as_string().str();
|
||||
value key = pair.first->execute(ctx);
|
||||
value val = pair.second->execute(ctx);
|
||||
JJ_DEBUG("Object literal: setting key '%s' with value type %s", key.c_str(), val->type().c_str());
|
||||
JJ_DEBUG("Object literal: setting key '%s' with value type %s", key->as_string().str().c_str(), val->type().c_str());
|
||||
obj->insert(key, val);
|
||||
|
||||
if (is_val<value_int>(key_val)) {
|
||||
obj->val_obj.is_key_numeric = true;
|
||||
} else if (obj->val_obj.is_key_numeric) {
|
||||
throw std::runtime_error("Object literal: cannot mix numeric and non-numeric keys");
|
||||
}
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
|
@ -127,9 +123,9 @@ value binary_expression::execute_impl(context & ctx) {
|
|||
value right_val = right->execute(ctx);
|
||||
JJ_DEBUG("Executing binary expression %s '%s' %s", left_val->type().c_str(), op.value.c_str(), right_val->type().c_str());
|
||||
if (op.value == "==") {
|
||||
return mk_val<value_bool>(value_compare(left_val, right_val, value_compare_op::eq));
|
||||
return mk_val<value_bool>(*left_val == *right_val);
|
||||
} else if (op.value == "!=") {
|
||||
return mk_val<value_bool>(!value_compare(left_val, right_val, value_compare_op::eq));
|
||||
return mk_val<value_bool>(!(*left_val == *right_val));
|
||||
}
|
||||
|
||||
auto workaround_concat_null_with_str = [&](value & res) -> bool {
|
||||
|
|
@ -230,7 +226,7 @@ value binary_expression::execute_impl(context & ctx) {
|
|||
auto & arr = right_val->as_array();
|
||||
bool member = false;
|
||||
for (const auto & item : arr) {
|
||||
if (value_compare(left_val, item, value_compare_op::eq)) {
|
||||
if (*left_val == *item) {
|
||||
member = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -265,10 +261,9 @@ value binary_expression::execute_impl(context & ctx) {
|
|||
}
|
||||
}
|
||||
|
||||
// String in object
|
||||
if (is_val<value_string>(left_val) && is_val<value_object>(right_val)) {
|
||||
auto key = left_val->as_string().str();
|
||||
bool has_key = right_val->has_key(key);
|
||||
// Value key in object
|
||||
if (is_val<value_object>(right_val)) {
|
||||
bool has_key = right_val->has_key(left_val);
|
||||
if (op.value == "in") {
|
||||
return mk_val<value_bool>(has_key);
|
||||
} else if (op.value == "not in") {
|
||||
|
|
@ -465,14 +460,8 @@ value for_statement::execute_impl(context & ctx) {
|
|||
JJ_DEBUG("%s", "For loop over object keys");
|
||||
auto & obj = iterable_val->as_ordered_object();
|
||||
for (auto & p : obj) {
|
||||
auto tuple = mk_val<value_array>();
|
||||
if (iterable_val->val_obj.is_key_numeric) {
|
||||
tuple->push_back(mk_val<value_int>(std::stoll(p.first)));
|
||||
} else {
|
||||
tuple->push_back(mk_val<value_string>(p.first));
|
||||
}
|
||||
tuple->push_back(p.second);
|
||||
items.push_back(tuple);
|
||||
auto tuple = mk_val<value_tuple>(p);
|
||||
items.push_back(std::move(tuple));
|
||||
}
|
||||
if (ctx.is_get_stats) {
|
||||
iterable_val->stats.used = true;
|
||||
|
|
@ -602,11 +591,13 @@ value set_statement::execute_impl(context & ctx) {
|
|||
auto rhs = val ? val->execute(ctx) : exec_statements(body, ctx);
|
||||
|
||||
if (is_stmt<identifier>(assignee)) {
|
||||
// case: {% set my_var = value %}
|
||||
auto var_name = cast_stmt<identifier>(assignee)->val;
|
||||
JJ_DEBUG("Setting global variable '%s' with value type %s", var_name.c_str(), rhs->type().c_str());
|
||||
ctx.set_val(var_name, rhs);
|
||||
|
||||
} else if (is_stmt<tuple_literal>(assignee)) {
|
||||
// case: {% set a, b = value %}
|
||||
auto tuple = cast_stmt<tuple_literal>(assignee);
|
||||
if (!is_val<value_array>(rhs)) {
|
||||
throw std::runtime_error("Cannot unpack non-iterable type in set: " + rhs->type());
|
||||
|
|
@ -625,6 +616,7 @@ value set_statement::execute_impl(context & ctx) {
|
|||
}
|
||||
|
||||
} else if (is_stmt<member_expression>(assignee)) {
|
||||
// case: {% set ns.my_var = value %}
|
||||
auto member = cast_stmt<member_expression>(assignee);
|
||||
if (member->computed) {
|
||||
throw std::runtime_error("Cannot assign to computed member");
|
||||
|
|
@ -767,22 +759,22 @@ value member_expression::execute_impl(context & ctx) {
|
|||
}
|
||||
|
||||
JJ_DEBUG("Member expression on object type %s, property type %s", object->type().c_str(), property->type().c_str());
|
||||
ensure_key_type_allowed(property);
|
||||
|
||||
value val = mk_val<value_undefined>("object_property");
|
||||
|
||||
if (is_val<value_undefined>(object)) {
|
||||
JJ_DEBUG("%s", "Accessing property on undefined object, returning undefined");
|
||||
return val;
|
||||
|
||||
} else if (is_val<value_object>(object)) {
|
||||
if (!is_val<value_string>(property)) {
|
||||
throw std::runtime_error("Cannot access object with non-string: got " + property->type());
|
||||
}
|
||||
auto key = property->as_string().str();
|
||||
val = object->at(key, val);
|
||||
val = object->at(property, val);
|
||||
if (is_val<value_undefined>(val)) {
|
||||
val = try_builtin_func(ctx, key, object, true);
|
||||
}
|
||||
JJ_DEBUG("Accessed property '%s' value, got type: %s", key.c_str(), val->type().c_str());
|
||||
|
||||
} else if (is_val<value_array>(object) || is_val<value_string>(object)) {
|
||||
if (is_val<value_int>(property)) {
|
||||
int64_t index = property->as_int();
|
||||
|
|
@ -806,6 +798,7 @@ value member_expression::execute_impl(context & ctx) {
|
|||
auto key = property->as_string().str();
|
||||
JJ_DEBUG("Accessing %s built-in '%s'", is_val<value_array>(object) ? "array" : "string", key.c_str());
|
||||
val = try_builtin_func(ctx, key, object, true);
|
||||
|
||||
} else {
|
||||
throw std::runtime_error("Cannot access property with non-string/non-number: got " + property->type());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -79,18 +79,18 @@ struct context {
|
|||
}
|
||||
|
||||
value get_val(const std::string & name) {
|
||||
auto it = env->val_obj.unordered.find(name);
|
||||
if (it != env->val_obj.unordered.end()) {
|
||||
return it->second;
|
||||
} else {
|
||||
return mk_val<value_undefined>(name);
|
||||
}
|
||||
value default_val = mk_val<value_undefined>(name);
|
||||
return env->at(name, default_val);
|
||||
}
|
||||
|
||||
void set_val(const std::string & name, const value & val) {
|
||||
env->insert(name, val);
|
||||
}
|
||||
|
||||
void set_val(const value & name, const value & val) {
|
||||
env->insert(name, val);
|
||||
}
|
||||
|
||||
void print_vars() const {
|
||||
printf("Context Variables:\n%s\n", value_to_json(env, 2).c_str());
|
||||
}
|
||||
|
|
@ -344,9 +344,19 @@ struct array_literal : public expression {
|
|||
}
|
||||
};
|
||||
|
||||
struct tuple_literal : public array_literal {
|
||||
explicit tuple_literal(statements && val) : array_literal(std::move(val)) {}
|
||||
struct tuple_literal : public expression {
|
||||
statements val;
|
||||
explicit tuple_literal(statements && val) : val(std::move(val)) {
|
||||
for (const auto& item : this->val) chk_type<expression>(item);
|
||||
}
|
||||
std::string type() const override { return "TupleLiteral"; }
|
||||
value execute_impl(context & ctx) override {
|
||||
auto arr = mk_val<value_array>();
|
||||
for (const auto & item_stmt : val) {
|
||||
arr->push_back(item_stmt->execute(ctx));
|
||||
}
|
||||
return mk_val<value_tuple>(std::move(arr->as_array()));
|
||||
}
|
||||
};
|
||||
|
||||
struct object_literal : public expression {
|
||||
|
|
|
|||
|
|
@ -61,6 +61,12 @@ size_t string::length() const {
|
|||
return len;
|
||||
}
|
||||
|
||||
void string::hash_update(hasher & hash) const noexcept {
|
||||
for (const auto & part : parts) {
|
||||
hash.update(part.val.data(), part.val.length());
|
||||
}
|
||||
}
|
||||
|
||||
bool string::all_parts_are_input() const {
|
||||
for (const auto & part : parts) {
|
||||
if (!part.is_input) {
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
namespace jinja {
|
||||
|
||||
// allow differentiate between user input strings and template strings
|
||||
|
|
@ -37,6 +39,7 @@ struct string {
|
|||
|
||||
std::string str() const;
|
||||
size_t length() const;
|
||||
void hash_update(hasher & hash) const noexcept;
|
||||
bool all_parts_are_input() const;
|
||||
bool is_uppercase() const;
|
||||
bool is_lowercase() const;
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@
|
|||
#include <string>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
|
||||
namespace jinja {
|
||||
|
||||
|
|
@ -46,4 +48,102 @@ static std::string fmt_error_with_source(const std::string & tag, const std::str
|
|||
return oss.str();
|
||||
}
|
||||
|
||||
// Note: this is a simple hasher, not cryptographically secure, just for hash table usage
|
||||
struct hasher {
|
||||
static constexpr auto size_t_digits = sizeof(size_t) * 8;
|
||||
static constexpr size_t prime = size_t_digits == 64 ? 0x100000001b3 : 0x01000193;
|
||||
static constexpr size_t seed = size_t_digits == 64 ? 0xcbf29ce484222325 : 0x811c9dc5;
|
||||
static constexpr auto block_size = sizeof(size_t); // in bytes; allowing the compiler to vectorize the computation
|
||||
|
||||
static_assert(size_t_digits == 64 || size_t_digits == 32);
|
||||
static_assert(block_size == 8 || block_size == 4);
|
||||
|
||||
uint8_t buffer[block_size];
|
||||
size_t idx = 0; // current index in buffer
|
||||
size_t state = seed;
|
||||
|
||||
hasher() = default;
|
||||
hasher(const std::type_info & type_inf) noexcept {
|
||||
const auto type_hash = type_inf.hash_code();
|
||||
update(&type_hash, sizeof(type_hash));
|
||||
}
|
||||
|
||||
// Properties:
|
||||
// - update is not associative: update(a).update(b) != update(b).update(a)
|
||||
// - update(a ~ b) == update(a).update(b) with ~ as concatenation operator --> useful for streaming
|
||||
// - update("", 0) --> state unchanged with empty input
|
||||
hasher& update(void const * bytes, size_t len) noexcept {
|
||||
const uint8_t * c = static_cast<uint8_t const *>(bytes);
|
||||
if (len == 0) {
|
||||
return *this;
|
||||
}
|
||||
size_t processed = 0;
|
||||
|
||||
// first, fill the existing buffer if it's partial
|
||||
if (idx > 0) {
|
||||
size_t to_fill = block_size - idx;
|
||||
if (to_fill > len) {
|
||||
to_fill = len;
|
||||
}
|
||||
std::memcpy(buffer + idx, c, to_fill);
|
||||
idx += to_fill;
|
||||
processed += to_fill;
|
||||
if (idx == block_size) {
|
||||
update_block(buffer);
|
||||
idx = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// process full blocks from the remaining input
|
||||
for (; processed + block_size <= len; processed += block_size) {
|
||||
update_block(c + processed);
|
||||
}
|
||||
|
||||
// buffer any remaining bytes
|
||||
size_t remaining = len - processed;
|
||||
if (remaining > 0) {
|
||||
std::memcpy(buffer, c + processed, remaining);
|
||||
idx = remaining;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
// convenience function for testing only
|
||||
hasher& update(const std::string & s) noexcept {
|
||||
return update(s.data(), s.size());
|
||||
}
|
||||
|
||||
// finalize and get the hash value
|
||||
// note: after calling digest, the hasher state is modified, do not call update() again
|
||||
size_t digest() noexcept {
|
||||
// if there are remaining bytes in buffer, fill the rest with zeros and process
|
||||
if (idx > 0) {
|
||||
for (size_t i = idx; i < block_size; ++i) {
|
||||
buffer[i] = 0;
|
||||
}
|
||||
update_block(buffer);
|
||||
idx = 0;
|
||||
}
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
private:
|
||||
// IMPORTANT: block must have at least block_size bytes
|
||||
void update_block(const uint8_t * block) noexcept {
|
||||
size_t blk = static_cast<uint32_t>(block[0])
|
||||
| (static_cast<uint32_t>(block[1]) << 8)
|
||||
| (static_cast<uint32_t>(block[2]) << 16)
|
||||
| (static_cast<uint32_t>(block[3]) << 24);
|
||||
if constexpr (block_size == 8) {
|
||||
blk = blk | (static_cast<uint64_t>(block[4]) << 32)
|
||||
| (static_cast<uint64_t>(block[5]) << 40)
|
||||
| (static_cast<uint64_t>(block[6]) << 48)
|
||||
| (static_cast<uint64_t>(block[7]) << 56);
|
||||
}
|
||||
state ^= blk;
|
||||
state *= prime;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace jinja
|
||||
|
|
|
|||
|
|
@ -163,7 +163,7 @@ static value selectattr(const func_args & args) {
|
|||
args.ensure_vals<value_array, value_string, value_string, value_string>(true, true, false, false);
|
||||
|
||||
auto arr = args.get_pos(0)->as_array();
|
||||
auto attr_name = args.get_pos(1)->as_string().str();
|
||||
auto attribute = args.get_pos(1);
|
||||
auto out = mk_val<value_array>();
|
||||
value val_default = mk_val<value_undefined>();
|
||||
|
||||
|
|
@ -173,7 +173,7 @@ static value selectattr(const func_args & args) {
|
|||
if (!is_val<value_object>(item)) {
|
||||
throw raised_exception("selectattr: item is not an object");
|
||||
}
|
||||
value attr_val = item->at(attr_name, val_default);
|
||||
value attr_val = item->at(attribute, val_default);
|
||||
bool is_selected = attr_val->as_bool();
|
||||
if constexpr (is_reject) is_selected = !is_selected;
|
||||
if (is_selected) out->push_back(item);
|
||||
|
|
@ -217,7 +217,7 @@ static value selectattr(const func_args & args) {
|
|||
if (!is_val<value_object>(item)) {
|
||||
throw raised_exception("selectattr: item is not an object");
|
||||
}
|
||||
value attr_val = item->at(attr_name, val_default);
|
||||
value attr_val = item->at(attribute, val_default);
|
||||
func_args test_args(args.ctx);
|
||||
test_args.push_back(attr_val); // attribute value
|
||||
test_args.push_back(extra_arg); // extra argument
|
||||
|
|
@ -741,6 +741,7 @@ const func_builtins & value_array_t::get_builtins() const {
|
|||
args.ensure_count(1, 4);
|
||||
args.ensure_vals<value_array, value_int, value_int, value_int>(true, true, false, false);
|
||||
|
||||
auto val = args.get_pos(0);
|
||||
auto arg0 = args.get_pos(1);
|
||||
auto arg1 = args.get_pos(2, mk_val<value_undefined>());
|
||||
auto arg2 = args.get_pos(3, mk_val<value_undefined>());
|
||||
|
|
@ -762,10 +763,8 @@ const func_builtins & value_array_t::get_builtins() const {
|
|||
if (step == 0) {
|
||||
throw raised_exception("slice step cannot be zero");
|
||||
}
|
||||
auto arr = slice(args.get_pos(0)->as_array(), start, stop, step);
|
||||
auto res = mk_val<value_array>();
|
||||
res->val_arr = std::move(arr);
|
||||
return res;
|
||||
auto arr = slice(val->as_array(), start, stop, step);
|
||||
return is_val<value_tuple>(val) ? mk_val<value_tuple>(std::move(arr)) : mk_val<value_array>(std::move(arr));
|
||||
}},
|
||||
{"selectattr", selectattr<false>},
|
||||
{"select", selectattr<false>},
|
||||
|
|
@ -785,15 +784,14 @@ const func_builtins & value_array_t::get_builtins() const {
|
|||
}
|
||||
const int64_t attr_int = attr_is_int ? attribute->as_int() : 0;
|
||||
const std::string delim = val_delim->is_undefined() ? "" : val_delim->as_string().str();
|
||||
const std::string attr_name = attribute->is_undefined() ? "" : attribute->as_string().str();
|
||||
std::string result;
|
||||
for (size_t i = 0; i < arr.size(); ++i) {
|
||||
value val_arr = arr[i];
|
||||
if (!attribute->is_undefined()) {
|
||||
if (attr_is_int && is_val<value_array>(val_arr)) {
|
||||
val_arr = val_arr->at(attr_int);
|
||||
} else if (!attr_is_int && !attr_name.empty() && is_val<value_object>(val_arr)) {
|
||||
val_arr = val_arr->at(attr_name);
|
||||
} else if (!attr_is_int && is_val<value_object>(val_arr)) {
|
||||
val_arr = val_arr->at(attribute);
|
||||
}
|
||||
}
|
||||
if (!is_val<value_string>(val_arr) && !is_val<value_int>(val_arr) && !is_val<value_float>(val_arr)) {
|
||||
|
|
@ -808,9 +806,7 @@ const func_builtins & value_array_t::get_builtins() const {
|
|||
}},
|
||||
{"string", [](const func_args & args) -> value {
|
||||
args.ensure_vals<value_array>();
|
||||
auto str = mk_val<value_string>();
|
||||
gather_string_parts_recursive(args.get_pos(0), str);
|
||||
return str;
|
||||
return mk_val<value_string>(args.get_pos(0)->as_string());
|
||||
}},
|
||||
{"tojson", tojson},
|
||||
{"map", [](const func_args & args) -> value {
|
||||
|
|
@ -821,26 +817,26 @@ const func_builtins & value_array_t::get_builtins() const {
|
|||
if (!is_val<value_kwarg>(args.get_args().at(1))) {
|
||||
throw not_implemented_exception("map: filter-mapping not implemented");
|
||||
}
|
||||
value val = args.get_pos(0);
|
||||
value attribute = args.get_kwarg_or_pos("attribute", 1);
|
||||
const bool attr_is_int = is_val<value_int>(attribute);
|
||||
if (!is_val<value_string>(attribute) && !attr_is_int) {
|
||||
throw raised_exception("map: attribute must be string or integer");
|
||||
}
|
||||
const int64_t attr_int = attr_is_int ? attribute->as_int() : 0;
|
||||
const std::string attr_name = attribute->as_string().str();
|
||||
value default_val = args.get_kwarg("default", mk_val<value_undefined>());
|
||||
auto out = mk_val<value_array>();
|
||||
auto arr = args.get_pos(0)->as_array();
|
||||
auto arr = val->as_array();
|
||||
for (const auto & item : arr) {
|
||||
value attr_val;
|
||||
if (attr_is_int) {
|
||||
attr_val = is_val<value_array>(item) ? item->at(attr_int, default_val) : default_val;
|
||||
} else {
|
||||
attr_val = is_val<value_object>(item) ? item->at(attr_name, default_val) : default_val;
|
||||
attr_val = is_val<value_object>(item) ? item->at(attribute, default_val) : default_val;
|
||||
}
|
||||
out->push_back(attr_val);
|
||||
}
|
||||
return out;
|
||||
return is_val<value_tuple>(val) ? mk_val<value_tuple>(std::move(out->as_array())) : out;
|
||||
}},
|
||||
{"append", [](const func_args & args) -> value {
|
||||
args.ensure_count(2);
|
||||
|
|
@ -867,6 +863,7 @@ const func_builtins & value_array_t::get_builtins() const {
|
|||
if (!is_val<value_array>(args.get_pos(0))) {
|
||||
throw raised_exception("sort: first argument must be an array");
|
||||
}
|
||||
value val = args.get_pos(0);
|
||||
value val_reverse = args.get_kwarg_or_pos("reverse", 1);
|
||||
value val_case = args.get_kwarg_or_pos("case_sensitive", 2);
|
||||
value attribute = args.get_kwarg_or_pos("attribute", 3);
|
||||
|
|
@ -875,8 +872,7 @@ const func_builtins & value_array_t::get_builtins() const {
|
|||
const bool reverse = val_reverse->as_bool(); // undefined == false
|
||||
const bool attr_is_int = is_val<value_int>(attribute);
|
||||
const int64_t attr_int = attr_is_int ? attribute->as_int() : 0;
|
||||
const std::string attr_name = attribute->is_undefined() ? "" : attribute->as_string().str();
|
||||
std::vector<value> arr = cast_val<value_array>(args.get_pos(0))->as_array(); // copy
|
||||
std::vector<value> arr = val->as_array(); // copy
|
||||
std::sort(arr.begin(), arr.end(),[&](const value & a, const value & b) {
|
||||
value val_a = a;
|
||||
value val_b = b;
|
||||
|
|
@ -884,22 +880,23 @@ const func_builtins & value_array_t::get_builtins() const {
|
|||
if (attr_is_int && is_val<value_array>(a) && is_val<value_array>(b)) {
|
||||
val_a = a->at(attr_int);
|
||||
val_b = b->at(attr_int);
|
||||
} else if (!attr_is_int && !attr_name.empty() && is_val<value_object>(a) && is_val<value_object>(b)) {
|
||||
val_a = a->at(attr_name);
|
||||
val_b = b->at(attr_name);
|
||||
} else if (!attr_is_int && is_val<value_object>(a) && is_val<value_object>(b)) {
|
||||
val_a = a->at(attribute);
|
||||
val_b = b->at(attribute);
|
||||
} else {
|
||||
throw raised_exception("sort: unsupported object attribute comparison");
|
||||
throw raised_exception("sort: unsupported object attribute comparison between " + a->type() + " and " + b->type());
|
||||
}
|
||||
}
|
||||
return value_compare(val_a, val_b, reverse ? value_compare_op::gt : value_compare_op::lt);
|
||||
});
|
||||
return mk_val<value_array>(arr);
|
||||
return is_val<value_tuple>(val) ? mk_val<value_tuple>(std::move(arr)) : mk_val<value_array>(std::move(arr));
|
||||
}},
|
||||
{"reverse", [](const func_args & args) -> value {
|
||||
args.ensure_vals<value_array>();
|
||||
std::vector<value> arr = cast_val<value_array>(args.get_pos(0))->as_array(); // copy
|
||||
value val = args.get_pos(0);
|
||||
std::vector<value> arr = val->as_array(); // copy
|
||||
std::reverse(arr.begin(), arr.end());
|
||||
return mk_val<value_array>(arr);
|
||||
return is_val<value_tuple>(val) ? mk_val<value_tuple>(std::move(arr)) : mk_val<value_array>(std::move(arr));
|
||||
}},
|
||||
{"unique", [](const func_args &) -> value {
|
||||
throw not_implemented_exception("Array unique builtin not implemented");
|
||||
|
|
@ -930,7 +927,7 @@ const func_builtins & value_object_t::get_builtins() const {
|
|||
default_val = args.get_pos(2);
|
||||
}
|
||||
const value obj = args.get_pos(0);
|
||||
std::string key = args.get_pos(1)->as_string().str();
|
||||
const value key = args.get_pos(1);
|
||||
return obj->at(key, default_val);
|
||||
}},
|
||||
{"keys", [](const func_args & args) -> value {
|
||||
|
|
@ -938,7 +935,7 @@ const func_builtins & value_object_t::get_builtins() const {
|
|||
const auto & obj = args.get_pos(0)->as_ordered_object();
|
||||
auto result = mk_val<value_array>();
|
||||
for (const auto & pair : obj) {
|
||||
result->push_back(mk_val<value_string>(pair.first));
|
||||
result->push_back(pair.first);
|
||||
}
|
||||
return result;
|
||||
}},
|
||||
|
|
@ -956,15 +953,16 @@ const func_builtins & value_object_t::get_builtins() const {
|
|||
const auto & obj = args.get_pos(0)->as_ordered_object();
|
||||
auto result = mk_val<value_array>();
|
||||
for (const auto & pair : obj) {
|
||||
auto item = mk_val<value_array>();
|
||||
item->push_back(mk_val<value_string>(pair.first));
|
||||
item->push_back(pair.second);
|
||||
auto item = mk_val<value_tuple>(pair);
|
||||
result->push_back(std::move(item));
|
||||
}
|
||||
return result;
|
||||
}},
|
||||
{"tojson", tojson},
|
||||
{"string", tojson},
|
||||
{"string", [](const func_args & args) -> value {
|
||||
args.ensure_vals<value_object>();
|
||||
return mk_val<value_string>(args.get_pos(0)->as_string());
|
||||
}},
|
||||
{"length", [](const func_args & args) -> value {
|
||||
args.ensure_vals<value_object>();
|
||||
const auto & obj = args.get_pos(0)->as_ordered_object();
|
||||
|
|
@ -985,11 +983,11 @@ const func_builtins & value_object_t::get_builtins() const {
|
|||
const bool reverse = val_reverse->as_bool(); // undefined == false
|
||||
const bool by_value = is_val<value_string>(val_by) && val_by->as_string().str() == "value" ? true : false;
|
||||
auto result = mk_val<value_object>(val_input); // copy
|
||||
std::sort(result->val_obj.ordered.begin(), result->val_obj.ordered.end(), [&](const auto & a, const auto & b) {
|
||||
std::sort(result->val_obj.begin(), result->val_obj.end(), [&](const auto & a, const auto & b) {
|
||||
if (by_value) {
|
||||
return value_compare(a.second, b.second, reverse ? value_compare_op::gt : value_compare_op::lt);
|
||||
} else {
|
||||
return reverse ? a.first > b.first : a.first < b.first;
|
||||
return value_compare(a.first, b.first, reverse ? value_compare_op::gt : value_compare_op::lt);
|
||||
}
|
||||
});
|
||||
return result;
|
||||
|
|
@ -1134,6 +1132,8 @@ void global_from_json(context & ctx, const nlohmann::ordered_json & json_obj, bo
|
|||
}
|
||||
}
|
||||
|
||||
// recursively convert value to JSON string
|
||||
// TODO: avoid circular references
|
||||
static void value_to_json_internal(std::ostringstream & oss, const value & val, int curr_lvl, int indent, const std::string_view item_sep, const std::string_view key_sep) {
|
||||
auto indent_str = [indent, curr_lvl]() -> std::string {
|
||||
return (indent > 0) ? std::string(curr_lvl * indent, ' ') : "";
|
||||
|
|
@ -1196,7 +1196,8 @@ static void value_to_json_internal(std::ostringstream & oss, const value & val,
|
|||
size_t i = 0;
|
||||
for (const auto & pair : obj) {
|
||||
oss << indent_str() << (indent > 0 ? std::string(indent, ' ') : "");
|
||||
oss << "\"" << pair.first << "\"" << key_sep;
|
||||
value_to_json_internal(oss, mk_val<value_string>(pair.first->as_string().str()), curr_lvl + 1, indent, item_sep, key_sep);
|
||||
oss << key_sep;
|
||||
value_to_json_internal(oss, pair.second, curr_lvl + 1, indent, item_sep, key_sep);
|
||||
if (i < obj.size() - 1) {
|
||||
oss << item_sep;
|
||||
|
|
@ -1219,4 +1220,19 @@ std::string value_to_json(const value & val, int indent, const std::string_view
|
|||
return oss.str();
|
||||
}
|
||||
|
||||
// TODO: avoid circular references
|
||||
std::string value_to_string_repr(const value & val) {
|
||||
if (is_val<value_string>(val)) {
|
||||
const std::string val_str = val->as_string().str();
|
||||
|
||||
if (val_str.find('\'') != std::string::npos) {
|
||||
return value_to_json(val);
|
||||
} else {
|
||||
return "'" + val_str + "'";
|
||||
}
|
||||
} else {
|
||||
return val->as_repr();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace jinja
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
#pragma once
|
||||
|
||||
#include "string.h"
|
||||
#include "utils.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
|
|
@ -93,7 +95,8 @@ void global_from_json(context & ctx, const T_JSON & json_obj, bool mark_input);
|
|||
|
||||
struct func_args; // function argument values
|
||||
|
||||
using func_handler = std::function<value(const func_args &)>;
|
||||
using func_hptr = value(const func_args &);
|
||||
using func_handler = std::function<func_hptr>;
|
||||
using func_builtins = std::map<std::string, func_handler>;
|
||||
|
||||
enum value_compare_op { eq, ge, gt, lt, ne };
|
||||
|
|
@ -103,28 +106,9 @@ struct value_t {
|
|||
int64_t val_int;
|
||||
double val_flt;
|
||||
string val_str;
|
||||
bool val_bool;
|
||||
|
||||
std::vector<value> val_arr;
|
||||
|
||||
struct map {
|
||||
// once set to true, all keys must be numeric
|
||||
// caveat: we only allow either all numeric keys or all non-numeric keys
|
||||
// for now, this only applied to for_statement in case of iterating over object keys/items
|
||||
bool is_key_numeric = false;
|
||||
std::map<std::string, value> unordered;
|
||||
std::vector<std::pair<std::string, value>> ordered;
|
||||
void insert(const std::string & key, const value & val) {
|
||||
if (unordered.find(key) != unordered.end()) {
|
||||
// if key exists, remove from ordered list
|
||||
ordered.erase(std::remove_if(ordered.begin(), ordered.end(),
|
||||
[&](const std::pair<std::string, value> & p) { return p.first == key; }),
|
||||
ordered.end());
|
||||
}
|
||||
unordered[key] = val;
|
||||
ordered.push_back({key, val});
|
||||
}
|
||||
} val_obj;
|
||||
std::vector<std::pair<value, value>> val_obj;
|
||||
|
||||
func_handler val_func;
|
||||
|
||||
|
|
@ -139,6 +123,7 @@ struct value_t {
|
|||
value_t(const value_t &) = default;
|
||||
virtual ~value_t() = default;
|
||||
|
||||
// Note: only for debugging and error reporting purposes
|
||||
virtual std::string type() const { return ""; }
|
||||
|
||||
virtual int64_t as_int() const { throw std::runtime_error(type() + " is not an int value"); }
|
||||
|
|
@ -146,7 +131,7 @@ struct value_t {
|
|||
virtual string as_string() const { throw std::runtime_error(type() + " is not a string value"); }
|
||||
virtual bool as_bool() const { throw std::runtime_error(type() + " is not a bool value"); }
|
||||
virtual const std::vector<value> & as_array() const { throw std::runtime_error(type() + " is not an array value"); }
|
||||
virtual const std::vector<std::pair<std::string, value>> & as_ordered_object() const { throw std::runtime_error(type() + " is not an object value"); }
|
||||
virtual const std::vector<std::pair<value, value>> & as_ordered_object() const { throw std::runtime_error(type() + " is not an object value"); }
|
||||
virtual value invoke(const func_args &) const { throw std::runtime_error(type() + " is not a function value"); }
|
||||
virtual bool is_none() const { return false; }
|
||||
virtual bool is_undefined() const { return false; }
|
||||
|
|
@ -154,43 +139,66 @@ struct value_t {
|
|||
throw std::runtime_error("No builtins available for type " + type());
|
||||
}
|
||||
|
||||
virtual bool has_key(const std::string & key) {
|
||||
return val_obj.unordered.find(key) != val_obj.unordered.end();
|
||||
}
|
||||
virtual value & at(const std::string & key, value & default_val) {
|
||||
auto it = val_obj.unordered.find(key);
|
||||
if (it == val_obj.unordered.end()) {
|
||||
return default_val;
|
||||
}
|
||||
return val_obj.unordered.at(key);
|
||||
}
|
||||
virtual value & at(const std::string & key) {
|
||||
auto it = val_obj.unordered.find(key);
|
||||
if (it == val_obj.unordered.end()) {
|
||||
throw std::runtime_error("Key '" + key + "' not found in value of type " + type());
|
||||
}
|
||||
return val_obj.unordered.at(key);
|
||||
}
|
||||
virtual value & at(int64_t index, value & default_val) {
|
||||
if (index < 0) {
|
||||
index += val_arr.size();
|
||||
}
|
||||
if (index < 0 || static_cast<size_t>(index) >= val_arr.size()) {
|
||||
return default_val;
|
||||
}
|
||||
return val_arr[index];
|
||||
}
|
||||
virtual value & at(int64_t index) {
|
||||
if (index < 0) {
|
||||
index += val_arr.size();
|
||||
}
|
||||
if (index < 0 || static_cast<size_t>(index) >= val_arr.size()) {
|
||||
throw std::runtime_error("Index " + std::to_string(index) + " out of bounds for array of size " + std::to_string(val_arr.size()));
|
||||
}
|
||||
return val_arr[index];
|
||||
}
|
||||
virtual bool has_key(const value &) { throw std::runtime_error(type() + " is not an object value"); }
|
||||
virtual void insert(const value & /* key */, const value & /* val */) { throw std::runtime_error(type() + " is not an object value"); }
|
||||
virtual value & at(const value & /* key */, value & /* default_val */) { throw std::runtime_error(type() + " is not an object value"); }
|
||||
virtual value & at(const value & /* key */) { throw std::runtime_error(type() + " is not an object value"); }
|
||||
virtual value & at(const std::string & /* key */, value & /* default_val */) { throw std::runtime_error(type() + " is not an object value"); }
|
||||
virtual value & at(const std::string & /* key */) { throw std::runtime_error(type() + " is not an object value"); }
|
||||
virtual value & at(int64_t /* idx */, value & /* default_val */) { throw std::runtime_error(type() + " is not an array value"); }
|
||||
virtual value & at(int64_t /* idx */) { throw std::runtime_error(type() + " is not an array value"); }
|
||||
|
||||
virtual bool is_numeric() const { return false; }
|
||||
virtual bool is_hashable() const { return false; }
|
||||
virtual bool is_immutable() const { return true; }
|
||||
virtual hasher unique_hash() const noexcept = 0;
|
||||
// TODO: C++20 <=> operator
|
||||
// NOTE: We are treating == as equivalent (for normal comparisons) and != as strict nonequal (for strict (is) comparisons)
|
||||
virtual bool operator==(const value_t & other) const { return equivalent(other); }
|
||||
virtual bool operator!=(const value_t & other) const { return nonequal(other); }
|
||||
|
||||
// Note: only for debugging purposes
|
||||
virtual std::string as_repr() const { return as_string().str(); }
|
||||
|
||||
protected:
|
||||
virtual bool equivalent(const value_t &) const = 0;
|
||||
virtual bool nonequal(const value_t & other) const { return !equivalent(other); }
|
||||
};
|
||||
|
||||
//
|
||||
// utils
|
||||
//
|
||||
|
||||
const func_builtins & global_builtins();
|
||||
|
||||
std::string value_to_json(const value & val, int indent = -1, const std::string_view item_sep = ", ", const std::string_view key_sep = ": ");
|
||||
|
||||
// Note: only used for debugging purposes
|
||||
std::string value_to_string_repr(const value & val);
|
||||
|
||||
struct not_implemented_exception : public std::runtime_error {
|
||||
not_implemented_exception(const std::string & msg) : std::runtime_error("NotImplemented: " + msg) {}
|
||||
};
|
||||
|
||||
struct value_hasher {
|
||||
size_t operator()(const value & val) const noexcept {
|
||||
return val->unique_hash().digest();
|
||||
}
|
||||
};
|
||||
|
||||
struct value_equivalence {
|
||||
bool operator()(const value & lhs, const value & rhs) const {
|
||||
return *lhs == *rhs;
|
||||
}
|
||||
bool operator()(const std::pair<value, value> & lhs, const std::pair<value, value> & rhs) const {
|
||||
return *(lhs.first) == *(rhs.first) && *(lhs.second) == *(rhs.second);
|
||||
}
|
||||
};
|
||||
|
||||
struct value_equality {
|
||||
bool operator()(const value & lhs, const value & rhs) const {
|
||||
return !(*lhs != *rhs);
|
||||
}
|
||||
};
|
||||
|
||||
//
|
||||
|
|
@ -198,24 +206,49 @@ struct value_t {
|
|||
//
|
||||
|
||||
struct value_int_t : public value_t {
|
||||
value_int_t(int64_t v) { val_int = v; }
|
||||
value_int_t(int64_t v) {
|
||||
val_int = v;
|
||||
val_flt = static_cast<double>(v);
|
||||
if (static_cast<int64_t>(val_flt) != v) {
|
||||
val_flt = v < 0 ? -INFINITY : INFINITY;
|
||||
}
|
||||
}
|
||||
virtual std::string type() const override { return "Integer"; }
|
||||
virtual int64_t as_int() const override { return val_int; }
|
||||
virtual double as_float() const override { return static_cast<double>(val_int); }
|
||||
virtual double as_float() const override { return val_flt; }
|
||||
virtual string as_string() const override { return std::to_string(val_int); }
|
||||
virtual bool as_bool() const override {
|
||||
return val_int != 0;
|
||||
}
|
||||
virtual const func_builtins & get_builtins() const override;
|
||||
virtual bool is_numeric() const override { return true; }
|
||||
virtual bool is_hashable() const override { return true; }
|
||||
virtual hasher unique_hash() const noexcept override {
|
||||
return hasher(typeid(*this))
|
||||
.update(&val_int, sizeof(val_int))
|
||||
.update(&val_flt, sizeof(val_flt));
|
||||
}
|
||||
protected:
|
||||
virtual bool equivalent(const value_t & other) const override {
|
||||
return other.is_numeric() && val_int == other.val_int && val_flt == other.val_flt;
|
||||
}
|
||||
virtual bool nonequal(const value_t & other) const override {
|
||||
return !(typeid(*this) == typeid(other) && val_int == other.val_int);
|
||||
}
|
||||
};
|
||||
using value_int = std::shared_ptr<value_int_t>;
|
||||
|
||||
|
||||
struct value_float_t : public value_t {
|
||||
value_float_t(double v) { val_flt = v; }
|
||||
value val;
|
||||
value_float_t(double v) {
|
||||
val_flt = v;
|
||||
val_int = std::isfinite(v) ? static_cast<int64_t>(v) : 0;
|
||||
val = mk_val<value_int>(val_int);
|
||||
}
|
||||
virtual std::string type() const override { return "Float"; }
|
||||
virtual double as_float() const override { return val_flt; }
|
||||
virtual int64_t as_int() const override { return static_cast<int64_t>(val_flt); }
|
||||
virtual int64_t as_int() const override { return val_int; }
|
||||
virtual string as_string() const override {
|
||||
std::string out = std::to_string(val_flt);
|
||||
out.erase(out.find_last_not_of('0') + 1, std::string::npos); // remove trailing zeros
|
||||
|
|
@ -226,6 +259,24 @@ struct value_float_t : public value_t {
|
|||
return val_flt != 0.0;
|
||||
}
|
||||
virtual const func_builtins & get_builtins() const override;
|
||||
virtual bool is_numeric() const override { return true; }
|
||||
virtual bool is_hashable() const override { return true; }
|
||||
virtual hasher unique_hash() const noexcept override {
|
||||
if (static_cast<double>(val_int) == val_flt) {
|
||||
return val->unique_hash();
|
||||
} else {
|
||||
return hasher(typeid(*this))
|
||||
.update(&val_int, sizeof(val_int))
|
||||
.update(&val_flt, sizeof(val_flt));
|
||||
}
|
||||
}
|
||||
protected:
|
||||
virtual bool equivalent(const value_t & other) const override {
|
||||
return other.is_numeric() && val_int == other.val_int && val_flt == other.val_flt;
|
||||
}
|
||||
virtual bool nonequal(const value_t & other) const override {
|
||||
return !(typeid(*this) == typeid(other) && val_flt == other.val_flt);
|
||||
}
|
||||
};
|
||||
using value_float = std::shared_ptr<value_float_t>;
|
||||
|
||||
|
|
@ -247,19 +298,49 @@ struct value_string_t : public value_t {
|
|||
return val_str.length() > 0;
|
||||
}
|
||||
virtual const func_builtins & get_builtins() const override;
|
||||
virtual bool is_hashable() const override { return true; }
|
||||
virtual hasher unique_hash() const noexcept override {
|
||||
const auto type_hash = typeid(*this).hash_code();
|
||||
auto hash = hasher();
|
||||
hash.update(&type_hash, sizeof(type_hash));
|
||||
val_str.hash_update(hash);
|
||||
return hash;
|
||||
}
|
||||
void mark_input() {
|
||||
val_str.mark_input();
|
||||
}
|
||||
protected:
|
||||
virtual bool equivalent(const value_t & other) const override {
|
||||
return typeid(*this) == typeid(other) && val_str.str() == other.val_str.str();
|
||||
}
|
||||
};
|
||||
using value_string = std::shared_ptr<value_string_t>;
|
||||
|
||||
|
||||
struct value_bool_t : public value_t {
|
||||
value_bool_t(bool v) { val_bool = v; }
|
||||
value val;
|
||||
value_bool_t(bool v) {
|
||||
val_int = static_cast<int64_t>(v);
|
||||
val_flt = static_cast<double>(v);
|
||||
val = mk_val<value_int>(val_int);
|
||||
}
|
||||
virtual std::string type() const override { return "Boolean"; }
|
||||
virtual bool as_bool() const override { return val_bool; }
|
||||
virtual string as_string() const override { return std::string(val_bool ? "True" : "False"); }
|
||||
virtual int64_t as_int() const override { return val_int; }
|
||||
virtual bool as_bool() const override { return val_int; }
|
||||
virtual string as_string() const override { return std::string(val_int ? "True" : "False"); }
|
||||
virtual const func_builtins & get_builtins() const override;
|
||||
virtual bool is_numeric() const override { return true; }
|
||||
virtual bool is_hashable() const override { return true; }
|
||||
virtual hasher unique_hash() const noexcept override {
|
||||
return val->unique_hash();
|
||||
}
|
||||
protected:
|
||||
virtual bool equivalent(const value_t & other) const override {
|
||||
return other.is_numeric() && val_int == other.val_int && val_flt == other.val_flt;
|
||||
}
|
||||
virtual bool nonequal(const value_t & other) const override {
|
||||
return !(typeid(*this) == typeid(other) && val_int == other.val_int);
|
||||
}
|
||||
};
|
||||
using value_bool = std::shared_ptr<value_bool_t>;
|
||||
|
||||
|
|
@ -269,13 +350,34 @@ struct value_array_t : public value_t {
|
|||
value_array_t(value & v) {
|
||||
val_arr = v->val_arr;
|
||||
}
|
||||
value_array_t(std::vector<value> && arr) {
|
||||
val_arr = arr;
|
||||
}
|
||||
value_array_t(const std::vector<value> & arr) {
|
||||
val_arr = arr;
|
||||
}
|
||||
void reverse() { std::reverse(val_arr.begin(), val_arr.end()); }
|
||||
void push_back(const value & val) { val_arr.push_back(val); }
|
||||
void push_back(value && val) { val_arr.push_back(std::move(val)); }
|
||||
void reverse() {
|
||||
if (is_immutable()) {
|
||||
throw std::runtime_error("Attempting to modify immutable type");
|
||||
}
|
||||
std::reverse(val_arr.begin(), val_arr.end());
|
||||
}
|
||||
void push_back(const value & val) {
|
||||
if (is_immutable()) {
|
||||
throw std::runtime_error("Attempting to modify immutable type");
|
||||
}
|
||||
val_arr.push_back(val);
|
||||
}
|
||||
void push_back(value && val) {
|
||||
if (is_immutable()) {
|
||||
throw std::runtime_error("Attempting to modify immutable type");
|
||||
}
|
||||
val_arr.push_back(std::move(val));
|
||||
}
|
||||
value pop_at(int64_t index) {
|
||||
if (is_immutable()) {
|
||||
throw std::runtime_error("Attempting to modify immutable type");
|
||||
}
|
||||
if (index < 0) {
|
||||
index = static_cast<int64_t>(val_arr.size()) + index;
|
||||
}
|
||||
|
|
@ -287,64 +389,225 @@ struct value_array_t : public value_t {
|
|||
return val;
|
||||
}
|
||||
virtual std::string type() const override { return "Array"; }
|
||||
virtual bool is_immutable() const override { return false; }
|
||||
virtual const std::vector<value> & as_array() const override { return val_arr; }
|
||||
virtual string as_string() const override {
|
||||
const bool immutable = is_immutable();
|
||||
std::ostringstream ss;
|
||||
ss << "[";
|
||||
ss << (immutable ? "(" : "[");
|
||||
for (size_t i = 0; i < val_arr.size(); i++) {
|
||||
if (i > 0) ss << ", ";
|
||||
ss << val_arr.at(i)->as_repr();
|
||||
value val = val_arr.at(i);
|
||||
ss << value_to_string_repr(val);
|
||||
}
|
||||
ss << "]";
|
||||
if (immutable && val_arr.size() == 1) {
|
||||
ss << ",";
|
||||
}
|
||||
ss << (immutable ? ")" : "]");
|
||||
return ss.str();
|
||||
}
|
||||
virtual bool as_bool() const override {
|
||||
return !val_arr.empty();
|
||||
}
|
||||
virtual value & at(int64_t index, value & default_val) override {
|
||||
if (index < 0) {
|
||||
index += val_arr.size();
|
||||
}
|
||||
if (index < 0 || static_cast<size_t>(index) >= val_arr.size()) {
|
||||
return default_val;
|
||||
}
|
||||
return val_arr[index];
|
||||
}
|
||||
virtual value & at(int64_t index) override {
|
||||
if (index < 0) {
|
||||
index += val_arr.size();
|
||||
}
|
||||
if (index < 0 || static_cast<size_t>(index) >= val_arr.size()) {
|
||||
throw std::runtime_error("Index " + std::to_string(index) + " out of bounds for array of size " + std::to_string(val_arr.size()));
|
||||
}
|
||||
return val_arr[index];
|
||||
}
|
||||
virtual const func_builtins & get_builtins() const override;
|
||||
virtual bool is_hashable() const override {
|
||||
if (std::all_of(val_arr.begin(), val_arr.end(), [&](auto & val) -> bool {
|
||||
return val->is_immutable() && val->is_hashable();
|
||||
})) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
virtual hasher unique_hash() const noexcept override {
|
||||
auto hash = hasher(typeid(*this));
|
||||
for (const auto & val : val_arr) {
|
||||
// must use digest to prevent problems from "concatenation" property of hasher
|
||||
// for ex. hash of [ "ab", "c" ] should be different from [ "a", "bc" ]
|
||||
const size_t val_hash = val->unique_hash().digest();
|
||||
hash.update(&val_hash, sizeof(size_t));
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
protected:
|
||||
virtual bool equivalent(const value_t & other) const override {
|
||||
return typeid(*this) == typeid(other) && is_hashable() && other.is_hashable() && std::equal(val_arr.begin(), val_arr.end(), other.val_arr.begin(), value_equivalence());
|
||||
}
|
||||
};
|
||||
using value_array = std::shared_ptr<value_array_t>;
|
||||
|
||||
|
||||
struct value_tuple_t : public value_array_t {
|
||||
value_tuple_t(value & v) {
|
||||
val_arr = v->val_arr;
|
||||
}
|
||||
value_tuple_t(std::vector<value> && arr) {
|
||||
val_arr = arr;
|
||||
}
|
||||
value_tuple_t(const std::vector<value> & arr) {
|
||||
val_arr = arr;
|
||||
}
|
||||
value_tuple_t(const std::pair<value, value> & pair) {
|
||||
val_arr.push_back(pair.first);
|
||||
val_arr.push_back(pair.second);
|
||||
}
|
||||
virtual std::string type() const override { return "Tuple"; }
|
||||
virtual bool is_immutable() const override { return true; }
|
||||
};
|
||||
using value_tuple = std::shared_ptr<value_tuple_t>;
|
||||
|
||||
|
||||
struct value_object_t : public value_t {
|
||||
std::unordered_map<value, value, value_hasher, value_equivalence> unordered;
|
||||
bool has_builtins = true; // context and loop objects do not have builtins
|
||||
value_object_t() = default;
|
||||
value_object_t(value & v) {
|
||||
val_obj = v->val_obj;
|
||||
}
|
||||
value_object_t(const std::map<std::string, value> & obj) {
|
||||
for (const auto & pair : obj) {
|
||||
val_obj.insert(pair.first, pair.second);
|
||||
for (const auto & pair : val_obj) {
|
||||
unordered[pair.first] = pair.second;
|
||||
}
|
||||
}
|
||||
value_object_t(const std::vector<std::pair<std::string, value>> & obj) {
|
||||
value_object_t(const std::map<value, value> & obj) {
|
||||
for (const auto & pair : obj) {
|
||||
val_obj.insert(pair.first, pair.second);
|
||||
insert(pair.first, pair.second);
|
||||
}
|
||||
}
|
||||
value_object_t(const std::vector<std::pair<value, value>> & obj) {
|
||||
for (const auto & pair : obj) {
|
||||
insert(pair.first, pair.second);
|
||||
}
|
||||
}
|
||||
void insert(const std::string & key, const value & val) {
|
||||
val_obj.insert(key, val);
|
||||
insert(mk_val<value_string>(key), val);
|
||||
}
|
||||
virtual std::string type() const override { return "Object"; }
|
||||
virtual const std::vector<std::pair<std::string, value>> & as_ordered_object() const override { return val_obj.ordered; }
|
||||
virtual bool is_immutable() const override { return false; }
|
||||
virtual const std::vector<std::pair<value, value>> & as_ordered_object() const override { return val_obj; }
|
||||
virtual string as_string() const override {
|
||||
std::ostringstream ss;
|
||||
ss << "{";
|
||||
for (size_t i = 0; i < val_obj.size(); i++) {
|
||||
if (i > 0) ss << ", ";
|
||||
auto & [key, val] = val_obj.at(i);
|
||||
ss << value_to_string_repr(key) << ": " << value_to_string_repr(val);
|
||||
}
|
||||
ss << "}";
|
||||
return ss.str();
|
||||
}
|
||||
virtual bool as_bool() const override {
|
||||
return !val_obj.unordered.empty();
|
||||
return !unordered.empty();
|
||||
}
|
||||
virtual bool has_key(const value & key) override {
|
||||
if (!key->is_immutable() || !key->is_hashable()) {
|
||||
throw std::runtime_error("Object key of unhashable type: " + key->type());
|
||||
}
|
||||
return unordered.find(key) != unordered.end();
|
||||
}
|
||||
virtual void insert(const value & key, const value & val) override {
|
||||
bool replaced = false;
|
||||
if (is_immutable()) {
|
||||
throw std::runtime_error("Attempting to modify immutable type");
|
||||
}
|
||||
if (has_key(key)) {
|
||||
// if key exists, replace value in ordered list instead of appending
|
||||
for (auto & pair : val_obj) {
|
||||
if (*(pair.first) == *key) {
|
||||
pair.second = val;
|
||||
replaced = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
unordered[key] = val;
|
||||
if (!replaced) {
|
||||
val_obj.push_back({key, val});
|
||||
}
|
||||
}
|
||||
virtual value & at(const value & key, value & default_val) override {
|
||||
if (!has_key(key)) {
|
||||
return default_val;
|
||||
}
|
||||
return unordered.at(key);
|
||||
}
|
||||
virtual value & at(const value & key) override {
|
||||
if (!has_key(key)) {
|
||||
throw std::runtime_error("Key '" + key->as_string().str() + "' not found in value of type " + type());
|
||||
}
|
||||
return unordered.at(key);
|
||||
}
|
||||
virtual value & at(const std::string & key, value & default_val) override {
|
||||
value key_val = mk_val<value_string>(key);
|
||||
return at(key_val, default_val);
|
||||
}
|
||||
virtual value & at(const std::string & key) override {
|
||||
value key_val = mk_val<value_string>(key);
|
||||
return at(key_val);
|
||||
}
|
||||
virtual const func_builtins & get_builtins() const override;
|
||||
virtual bool is_hashable() const override {
|
||||
if (std::all_of(val_obj.begin(), val_obj.end(), [&](auto & pair) -> bool {
|
||||
const auto & val = pair.second;
|
||||
return val->is_immutable() && val->is_hashable();
|
||||
})) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
virtual hasher unique_hash() const noexcept override {
|
||||
auto hash = hasher(typeid(*this));
|
||||
for (const auto & [key, val] : val_obj) {
|
||||
// must use digest to prevent problems from "concatenation" property of hasher
|
||||
// for ex. hash of key="ab", value="c" should be different from key="a", value="bc"
|
||||
const size_t key_hash = key->unique_hash().digest();
|
||||
const size_t val_hash = val->unique_hash().digest();
|
||||
hash.update(&key_hash, sizeof(key_hash));
|
||||
hash.update(&val_hash, sizeof(val_hash));
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
protected:
|
||||
virtual bool equivalent(const value_t & other) const override {
|
||||
return typeid(*this) == typeid(other) && is_hashable() && other.is_hashable() && std::equal(val_obj.begin(), val_obj.end(), other.val_obj.begin(), value_equivalence());
|
||||
}
|
||||
};
|
||||
using value_object = std::shared_ptr<value_object_t>;
|
||||
|
||||
//
|
||||
// null and undefined types
|
||||
// none and undefined types
|
||||
//
|
||||
|
||||
struct value_none_t : public value_t {
|
||||
virtual std::string type() const override { return "None"; }
|
||||
virtual bool is_none() const override { return true; }
|
||||
virtual bool as_bool() const override { return false; }
|
||||
virtual string as_string() const override { return string("None"); }
|
||||
virtual string as_string() const override { return string(type()); }
|
||||
virtual std::string as_repr() const override { return type(); }
|
||||
virtual const func_builtins & get_builtins() const override;
|
||||
virtual bool is_hashable() const override { return true; }
|
||||
virtual hasher unique_hash() const noexcept override {
|
||||
return hasher(typeid(*this));
|
||||
}
|
||||
protected:
|
||||
virtual bool equivalent(const value_t & other) const override {
|
||||
return typeid(*this) == typeid(other);
|
||||
}
|
||||
};
|
||||
using value_none = std::shared_ptr<value_none_t>;
|
||||
|
||||
|
|
@ -356,6 +619,13 @@ struct value_undefined_t : public value_t {
|
|||
virtual bool as_bool() const override { return false; }
|
||||
virtual std::string as_repr() const override { return type(); }
|
||||
virtual const func_builtins & get_builtins() const override;
|
||||
virtual hasher unique_hash() const noexcept override {
|
||||
return hasher(typeid(*this));
|
||||
}
|
||||
protected:
|
||||
virtual bool equivalent(const value_t & other) const override {
|
||||
return is_undefined() == other.is_undefined();
|
||||
}
|
||||
};
|
||||
using value_undefined = std::shared_ptr<value_undefined_t>;
|
||||
|
||||
|
|
@ -436,7 +706,23 @@ struct value_func_t : public value_t {
|
|||
return val_func(new_args);
|
||||
}
|
||||
virtual std::string type() const override { return "Function"; }
|
||||
virtual std::string as_repr() const override { return type(); }
|
||||
virtual std::string as_repr() const override { return type() + "<" + name + ">(" + (arg0 ? arg0->as_repr() : "") + ")"; }
|
||||
virtual bool is_hashable() const override { return false; }
|
||||
virtual hasher unique_hash() const noexcept override {
|
||||
// Note: this is unused for now, we don't support function as object keys
|
||||
// use function pointer as unique identifier
|
||||
const auto target = val_func.target<func_hptr>();
|
||||
return hasher(typeid(*this)).update(&target, sizeof(target));
|
||||
}
|
||||
protected:
|
||||
virtual bool equivalent(const value_t & other) const override {
|
||||
// Note: this is unused for now, we don't support function as object keys
|
||||
// compare function pointers
|
||||
// (val_func == other.val_func does not work as std::function::operator== is only used for nullptr check)
|
||||
const auto target_this = this->val_func.target<func_hptr>();
|
||||
const auto target_other = other.val_func.target<func_hptr>();
|
||||
return typeid(*this) == typeid(other) && target_this == target_other;
|
||||
}
|
||||
};
|
||||
using value_func = std::shared_ptr<value_func_t>;
|
||||
|
||||
|
|
@ -447,18 +733,21 @@ struct value_kwarg_t : public value_t {
|
|||
value_kwarg_t(const std::string & k, const value & v) : key(k), val(v) {}
|
||||
virtual std::string type() const override { return "KwArg"; }
|
||||
virtual std::string as_repr() const override { return type(); }
|
||||
virtual bool is_hashable() const override { return true; }
|
||||
virtual hasher unique_hash() const noexcept override {
|
||||
const auto type_hash = typeid(*this).hash_code();
|
||||
auto hash = val->unique_hash();
|
||||
hash.update(&type_hash, sizeof(type_hash))
|
||||
.update(key.data(), key.size());
|
||||
return hash;
|
||||
}
|
||||
protected:
|
||||
virtual bool equivalent(const value_t & other) const override {
|
||||
const value_kwarg_t & other_val = static_cast<const value_kwarg_t &>(other);
|
||||
return typeid(*this) == typeid(other) && key == other_val.key && val == other_val.val;
|
||||
}
|
||||
};
|
||||
using value_kwarg = std::shared_ptr<value_kwarg_t>;
|
||||
|
||||
|
||||
// utils
|
||||
|
||||
const func_builtins & global_builtins();
|
||||
std::string value_to_json(const value & val, int indent = -1, const std::string_view item_sep = ", ", const std::string_view key_sep = ": ");
|
||||
|
||||
struct not_implemented_exception : public std::runtime_error {
|
||||
not_implemented_exception(const std::string & msg) : std::runtime_error("NotImplemented: " + msg) {}
|
||||
};
|
||||
|
||||
|
||||
} // namespace jinja
|
||||
|
|
|
|||
16
ggml/include/ggml-virtgpu.h
Normal file
16
ggml/include/ggml-virtgpu.h
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define GGML_REMOTING_FRONTEND_NAME "RemotingFrontend"
|
||||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_virtgpu_reg();
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
@ -69,6 +69,10 @@
|
|||
#include "ggml-rpc.h"
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_VIRTGPU_FRONTEND
|
||||
#include "ggml-virtgpu.h"
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_CANN
|
||||
#include "ggml-cann.h"
|
||||
#endif
|
||||
|
|
@ -180,7 +184,12 @@ struct ggml_backend_registry {
|
|||
register_backend(ggml_backend_sycl_reg());
|
||||
#endif
|
||||
#ifdef GGML_USE_VULKAN
|
||||
// Add runtime disable check
|
||||
if (getenv("GGML_DISABLE_VULKAN") == nullptr) {
|
||||
register_backend(ggml_backend_vk_reg());
|
||||
} else {
|
||||
GGML_LOG_DEBUG("Vulkan backend disabled by GGML_DISABLE_VULKAN environment variable\n");
|
||||
}
|
||||
#endif
|
||||
#ifdef GGML_USE_WEBGPU
|
||||
register_backend(ggml_backend_webgpu_reg());
|
||||
|
|
@ -188,6 +197,10 @@ struct ggml_backend_registry {
|
|||
#ifdef GGML_USE_ZDNN
|
||||
register_backend(ggml_backend_zdnn_reg());
|
||||
#endif
|
||||
#ifdef GGML_USE_VIRTGPU_FRONTEND
|
||||
register_backend(ggml_backend_virtgpu_reg());
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_OPENCL
|
||||
register_backend(ggml_backend_opencl_reg());
|
||||
#endif
|
||||
|
|
@ -605,6 +618,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
|
|||
ggml_backend_load_best("rpc", silent, dir_path);
|
||||
ggml_backend_load_best("sycl", silent, dir_path);
|
||||
ggml_backend_load_best("vulkan", silent, dir_path);
|
||||
ggml_backend_load_best("virtgpu", silent, dir_path);
|
||||
ggml_backend_load_best("opencl", silent, dir_path);
|
||||
ggml_backend_load_best("hexagon", silent, dir_path);
|
||||
ggml_backend_load_best("musa", silent, dir_path);
|
||||
|
|
|
|||
|
|
@ -3148,16 +3148,17 @@ void ggml_gemm_q4_K_8x8_q8_K(int n,
|
|||
|
||||
// Scales[i] corresponds to column i
|
||||
const int scale_offset = cp * 2;
|
||||
for (int blk = 0; blk < 2; blk++) {
|
||||
const int32x4_t block_scale = {
|
||||
(int32_t) q4sb_scales[blk][scale_offset],
|
||||
(int32_t) q4sb_scales[blk][scale_offset],
|
||||
(int32_t) q4sb_scales[blk][scale_offset + 1],
|
||||
(int32_t) q4sb_scales[blk][scale_offset + 1],
|
||||
};
|
||||
acc[cp] = vmlaq_s32(acc[cp], sb_acc[blk], block_scale);
|
||||
acc[cp + 4] = vmlaq_s32(acc[cp + 4], sb_acc[blk + 2], block_scale);
|
||||
}
|
||||
const int32_t scale_00 = q4sb_scales[0][scale_offset];
|
||||
const int32_t scale_01 = q4sb_scales[0][scale_offset + 1];
|
||||
const int32_t scale_10 = q4sb_scales[1][scale_offset];
|
||||
const int32_t scale_11 = q4sb_scales[1][scale_offset + 1];
|
||||
const int32x4_t block_scale_0 = vcombine_s32(vdup_n_s32(scale_00), vdup_n_s32(scale_01));
|
||||
const int32x4_t block_scale_1 = vcombine_s32(vdup_n_s32(scale_10), vdup_n_s32(scale_11));
|
||||
|
||||
acc[cp] = vmlaq_s32(acc[cp], sb_acc[0], block_scale_0);
|
||||
acc[cp + 4] = vmlaq_s32(acc[cp + 4], sb_acc[2], block_scale_0);
|
||||
acc[cp] = vmlaq_s32(acc[cp], sb_acc[1], block_scale_1);
|
||||
acc[cp + 4] = vmlaq_s32(acc[cp + 4], sb_acc[3], block_scale_1);
|
||||
}
|
||||
|
||||
// Multiply Acc bsum + mins
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@
|
|||
// While BW spans CC 1000, 1100 & 1200, we are integrating Tensor Core instructions available to 1200 family, see
|
||||
// https://docs.nvidia.com/cutlass/media/docs/cpp/blackwell_functionality.html#blackwell-sm120-gemms
|
||||
#define GGML_CUDA_CC_BLACKWELL 1200
|
||||
#define GGML_CUDA_CC_DGX_SPARK 1210
|
||||
#define GGML_CUDA_CC_RUBIN 1300
|
||||
#define GGML_CUDA_CC_OFFSET_AMD 0x1000000
|
||||
#define GGML_CUDA_CC_OFFSET_MTHREADS 0x0100000
|
||||
|
|
|
|||
|
|
@ -789,7 +789,7 @@ void launch_fattn(
|
|||
const ggml_tensor * K = dst->src[1];
|
||||
const ggml_tensor * V = dst->src[2];
|
||||
|
||||
const bool V_is_K_view = V->view_src && V->view_offs == 0 && (V->view_src == K || V->view_src == K->view_src);
|
||||
const bool V_is_K_view = V->view_src && (V->view_src == K || (V->view_src == K->view_src && V->view_offs == K->view_offs));
|
||||
|
||||
const ggml_tensor * mask = dst->src[3];
|
||||
const ggml_tensor * sinks = dst->src[4];
|
||||
|
|
|
|||
|
|
@ -147,6 +147,14 @@ static void ggml_cuda_flash_attn_ext_mma_f16(ggml_backend_cuda_context & ctx, gg
|
|||
GGML_ASSERT(Q->ne[2] % K->ne[2] == 0);
|
||||
const int gqa_ratio = Q->ne[2] / K->ne[2];
|
||||
if (gqa_ratio == 20) { // GLM 4.7 Flash
|
||||
if (cc >= GGML_CUDA_CC_DGX_SPARK) {
|
||||
if (Q->ne[1] <= 8) {
|
||||
ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1<576, 512, 16>(ctx, dst);
|
||||
break;
|
||||
}
|
||||
ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1<576, 512, 4>(ctx, dst);
|
||||
break;
|
||||
}
|
||||
if (cc >= GGML_CUDA_CC_BLACKWELL) {
|
||||
if (Q->ne[1] <= 4 && K->ne[1] >= 65536) {
|
||||
ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1<576, 512, 16>(ctx, dst);
|
||||
|
|
@ -302,7 +310,7 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const
|
|||
}
|
||||
}
|
||||
|
||||
const bool V_is_K_view = V->view_src && V->view_offs == 0 && (V->view_src == K || V->view_src == K->view_src);
|
||||
const bool V_is_K_view = V->view_src && (V->view_src == K || (V->view_src == K->view_src && V->view_offs == K->view_offs));
|
||||
|
||||
const int cc = ggml_cuda_info().devices[device].cc;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,153 +0,0 @@
|
|||
#ifndef OP_DESC_H
|
||||
#define OP_DESC_H
|
||||
|
||||
#define GGML_COMMON_IMPL_CPP
|
||||
#include "ggml-backend-impl.h"
|
||||
#include "ggml-common.h"
|
||||
|
||||
#include <string>
|
||||
#include <stdio.h>
|
||||
|
||||
struct op_desc {
|
||||
char strides[64 * GGML_MAX_SRC];
|
||||
char dims[64 * GGML_MAX_SRC];
|
||||
char types[16 * GGML_MAX_SRC];
|
||||
char buffs[64 * GGML_MAX_SRC];
|
||||
char names[64 * GGML_MAX_SRC];
|
||||
|
||||
int format_tensor_dims(char * str, const struct ggml_tensor * t) {
|
||||
if (t->ne[2] == 1 && t->ne[3] == 1) {
|
||||
return sprintf(str, "%d:%d", (int) t->ne[0], (int) t->ne[1]);
|
||||
} else {
|
||||
return sprintf(str, "%d:%d:%d:%d", (int) t->ne[0], (int) t->ne[1], (int) t->ne[2], (int) t->ne[3]);
|
||||
}
|
||||
}
|
||||
|
||||
void format_op_dims(char * str, const struct ggml_tensor * t) {
|
||||
char * p = str;
|
||||
|
||||
// append src0 and src1 (if any)
|
||||
if (t->src[0]) {
|
||||
p += format_tensor_dims(p, t->src[0]);
|
||||
|
||||
for (int i = 1; i < GGML_MAX_SRC && t->src[i]; i++) {
|
||||
p += sprintf(p, " x ");
|
||||
p += format_tensor_dims(p, t->src[i]);
|
||||
}
|
||||
|
||||
p += sprintf(p, " -> ");
|
||||
}
|
||||
|
||||
// format self dims separately for better visual alignment
|
||||
char self[64];
|
||||
format_tensor_dims(self, t);
|
||||
|
||||
p += sprintf(p, "%s", self);
|
||||
}
|
||||
|
||||
int format_tensor_strides(char * str, const struct ggml_tensor * t) {
|
||||
const char * c = ggml_is_contiguous(t) ? "" : "!";
|
||||
|
||||
if (t->ne[2] == 1 && t->ne[3] == 1) {
|
||||
return sprintf(str, "%zu:%zu%s", (size_t) t->nb[0], (size_t) t->nb[1], c);
|
||||
} else {
|
||||
return sprintf(str, "%zu:%zu:%zu:%zu%s", (size_t) t->nb[0], (size_t) t->nb[1], (size_t) t->nb[2], (size_t) t->nb[3], c);
|
||||
}
|
||||
}
|
||||
|
||||
void format_op_strides(char * str, const struct ggml_tensor * t) {
|
||||
char * p = str;
|
||||
|
||||
// append src0 and src1 (if any)
|
||||
if (t->src[0]) {
|
||||
p += format_tensor_strides(p, t->src[0]);
|
||||
|
||||
for (int i = 1; i < GGML_MAX_SRC && t->src[i]; i++) {
|
||||
p += sprintf(p, " x ");
|
||||
p += format_tensor_strides(p, t->src[i]);
|
||||
}
|
||||
|
||||
p += sprintf(p, " -> ");
|
||||
}
|
||||
|
||||
// format self dims separately for better visual alignment
|
||||
char self[64];
|
||||
format_tensor_strides(self, t);
|
||||
|
||||
p += sprintf(p, "%s", self);
|
||||
}
|
||||
|
||||
void format_op_types(char * str, const struct ggml_tensor * t) {
|
||||
char * p = str;
|
||||
|
||||
// append src0 and src1 (if any)
|
||||
if (t->src[0]) {
|
||||
p += sprintf(p, "%s", ggml_type_name(t->src[0]->type));
|
||||
|
||||
for (int i = 1; i < GGML_MAX_SRC && t->src[i]; i++) {
|
||||
p += sprintf(p, " x ");
|
||||
p += sprintf(p, "%s", ggml_type_name(t->src[i]->type));
|
||||
}
|
||||
|
||||
p += sprintf(p, " -> ");
|
||||
}
|
||||
|
||||
p += sprintf(p, "%s", ggml_type_name(t->type));
|
||||
}
|
||||
|
||||
const char * tensor_buff_name(const struct ggml_tensor * t) {
|
||||
if (t->buffer) {
|
||||
return ggml_backend_buffer_name(t->buffer);
|
||||
}
|
||||
return "NONE";
|
||||
}
|
||||
|
||||
void format_op_buffs(char * str, const struct ggml_tensor * t) {
|
||||
char * p = str;
|
||||
|
||||
// append src0 and src1 (if any)
|
||||
if (t->src[0]) {
|
||||
p += sprintf(p, "%s", tensor_buff_name(t->src[0]));
|
||||
|
||||
for (int i = 1; i < GGML_MAX_SRC && t->src[i]; i++) {
|
||||
p += sprintf(p, " x ");
|
||||
p += sprintf(p, "%s", tensor_buff_name(t->src[i]));
|
||||
}
|
||||
|
||||
p += sprintf(p, " -> ");
|
||||
}
|
||||
|
||||
p += sprintf(p, "%s", tensor_buff_name(t));
|
||||
}
|
||||
|
||||
void format_op_names(char * str, const struct ggml_tensor * t) {
|
||||
char * p = str;
|
||||
|
||||
// append src0 and src1 (if any)
|
||||
if (t->src[0]) {
|
||||
p += sprintf(p, "%s", t->src[0]->name);
|
||||
|
||||
for (int i = 1; i < GGML_MAX_SRC && t->src[i]; i++) {
|
||||
p += sprintf(p, " x ");
|
||||
p += sprintf(p, "%s", t->src[i]->name);
|
||||
}
|
||||
|
||||
p += sprintf(p, " -> ");
|
||||
}
|
||||
|
||||
p += sprintf(p, "%s", t->name);
|
||||
}
|
||||
|
||||
void format(const ggml_tensor * op) {
|
||||
format_op_dims(dims, op);
|
||||
format_op_strides(strides, op);
|
||||
format_op_types(types, op);
|
||||
format_op_buffs(buffs, op);
|
||||
format_op_names(names, op);
|
||||
}
|
||||
|
||||
op_desc() {}
|
||||
op_desc(const ggml_tensor * op) { format(op); }
|
||||
};
|
||||
|
||||
#endif // OP_DESC_H
|
||||
|
|
@ -1,194 +0,0 @@
|
|||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
#ifdef cl_intel_subgroups
|
||||
#pragma OPENCL EXTENSION cl_intel_subgroups : enable
|
||||
#else
|
||||
#pragma OPENCL EXTENSION cl_khr_subgroups : enable
|
||||
#endif
|
||||
|
||||
#ifdef cl_intel_required_subgroup_size
|
||||
#pragma OPENCL EXTENSION cl_intel_required_subgroup_size : enable
|
||||
#define INTEL_GPU 1
|
||||
#define REQD_SUBGROUP_SIZE_16 __attribute__((intel_reqd_sub_group_size(16)))
|
||||
#define REQD_SUBGROUP_SIZE_32 __attribute__((intel_reqd_sub_group_size(32)))
|
||||
#elif defined(cl_qcom_reqd_sub_group_size)
|
||||
#pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable
|
||||
#define ADRENO_GPU 1
|
||||
#define REQD_SUBGROUP_SIZE_64 __attribute__((qcom_reqd_sub_group_size("half")))
|
||||
#define REQD_SUBGROUP_SIZE_128 __attribute__((qcom_reqd_sub_group_size("full")))
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// kernel_mul_mv_q6_K_f32_flat
|
||||
//------------------------------------------------------------------------------
|
||||
#define Q6_K_MASK1 0x03
|
||||
#define Q6_K_MASK2 0x0C
|
||||
#define Q6_K_MASK3 0x30
|
||||
#define Q6_K_MASK4 0xC0
|
||||
|
||||
#define QK_K 256
|
||||
|
||||
inline float block_q_6_K_dot_y_flat(
|
||||
global uchar * blk_ql,
|
||||
global uchar * blk_qh,
|
||||
global char * blk_scales,
|
||||
global half * blk_d,
|
||||
global float * yy,
|
||||
int ib,
|
||||
int ip,
|
||||
int is,
|
||||
int l0
|
||||
) {
|
||||
int y_offset = 128*ip + l0;
|
||||
int q_offset_l = 64*ip + l0;
|
||||
int q_offset_h = 32*ip + l0;
|
||||
|
||||
global uchar * q1 = blk_ql + ib*128 + q_offset_l;
|
||||
global uchar * q2 = q1 + QK_K/8;
|
||||
global uchar * qh = blk_qh + ib*64 + q_offset_h;
|
||||
global char * sc = blk_scales + ib*16 + is;
|
||||
|
||||
global float * y = yy + ib * QK_K + y_offset;
|
||||
|
||||
float dall = blk_d[ib];
|
||||
|
||||
float sumf = 0;
|
||||
float4 sums = {0.f, 0.f, 0.f, 0.f};
|
||||
|
||||
sums.s0 += y[0+ 0] * ((float)((q1[0] & 0xF) | ((qh[0] & Q6_K_MASK1) << 4)) - 32.f);
|
||||
sums.s1 += y[0+32] * ((float)((q2[0] & 0xF) | ((qh[0] & Q6_K_MASK2) << 2)) - 32.f);
|
||||
sums.s2 += y[0+64] * ((float)((q1[0] >> 4) | ((qh[0] & Q6_K_MASK3) << 0)) - 32.f);
|
||||
sums.s3 += y[0+96] * ((float)((q2[0] >> 4) | ((qh[0] & Q6_K_MASK4) >> 2)) - 32.f);
|
||||
|
||||
sums.s0 += y[1+ 0] * ((float)((q1[1] & 0xF) | ((qh[1] & Q6_K_MASK1) << 4)) - 32.f);
|
||||
sums.s1 += y[1+32] * ((float)((q2[1] & 0xF) | ((qh[1] & Q6_K_MASK2) << 2)) - 32.f);
|
||||
sums.s2 += y[1+64] * ((float)((q1[1] >> 4) | ((qh[1] & Q6_K_MASK3) << 0)) - 32.f);
|
||||
sums.s3 += y[1+96] * ((float)((q2[1] >> 4) | ((qh[1] & Q6_K_MASK4) >> 2)) - 32.f);
|
||||
|
||||
sums.s0 += y[2+ 0] * ((float)((q1[2] & 0xF) | ((qh[2] & Q6_K_MASK1) << 4)) - 32.f);
|
||||
sums.s1 += y[2+32] * ((float)((q2[2] & 0xF) | ((qh[2] & Q6_K_MASK2) << 2)) - 32.f);
|
||||
sums.s2 += y[2+64] * ((float)((q1[2] >> 4) | ((qh[2] & Q6_K_MASK3) << 0)) - 32.f);
|
||||
sums.s3 += y[2+96] * ((float)((q2[2] >> 4) | ((qh[2] & Q6_K_MASK4) >> 2)) - 32.f);
|
||||
|
||||
sums.s0 += y[3+ 0] * ((float)((q1[3] & 0xF) | ((qh[3] & Q6_K_MASK1) << 4)) - 32.f);
|
||||
sums.s1 += y[3+32] * ((float)((q2[3] & 0xF) | ((qh[3] & Q6_K_MASK2) << 2)) - 32.f);
|
||||
sums.s2 += y[3+64] * ((float)((q1[3] >> 4) | ((qh[3] & Q6_K_MASK3) << 0)) - 32.f);
|
||||
sums.s3 += y[3+96] * ((float)((q2[3] >> 4) | ((qh[3] & Q6_K_MASK4) >> 2)) - 32.f);
|
||||
|
||||
sumf += dall * (sums.s0 * sc[0] + sums.s1 * sc[2] + sums.s2 * sc[4] + sums.s3 * sc[6]);
|
||||
|
||||
return sumf;
|
||||
}
|
||||
|
||||
#undef N_DST
|
||||
#undef N_SIMDGROUP
|
||||
#undef N_SIMDWIDTH
|
||||
|
||||
#ifdef INTEL_GPU
|
||||
#define N_DST 4
|
||||
#define N_SIMDGROUP 2
|
||||
#define N_SIMDWIDTH 16
|
||||
#elif defined (ADRENO_GPU)
|
||||
#define N_DST 4
|
||||
#define N_SIMDGROUP 2
|
||||
#define N_SIMDWIDTH 64
|
||||
#endif
|
||||
|
||||
#define BLOCK_STRIDE (N_SIMDWIDTH/16) // number of blocks each subgroup processes
|
||||
|
||||
#ifdef INTEL_GPU
|
||||
REQD_SUBGROUP_SIZE_16
|
||||
#elif defined (ADRENO_GPU)
|
||||
REQD_SUBGROUP_SIZE_64
|
||||
#endif
|
||||
kernel void kernel_mul_mv_q6_K_f32_flat(
|
||||
global uchar * src0_ql,
|
||||
global uchar * src0_qh,
|
||||
global char * src0_s,
|
||||
global half * src0_d,
|
||||
global float * src1,
|
||||
ulong offset1,
|
||||
global float * dst,
|
||||
ulong offsetd,
|
||||
int ne00,
|
||||
int ne01,
|
||||
int ne02,
|
||||
int ne10,
|
||||
int ne12,
|
||||
int ne0,
|
||||
int ne1,
|
||||
int r2,
|
||||
int r3
|
||||
) {
|
||||
src1 = (global float*)((global char*)src1 + offset1);
|
||||
dst = (global float*)((global char*)dst + offsetd);
|
||||
|
||||
int nb = ne00/QK_K;
|
||||
|
||||
int r0 = get_group_id(0);
|
||||
int r1 = get_group_id(1);
|
||||
int im = get_group_id(2);
|
||||
|
||||
int i12 = im%ne12;
|
||||
int i13 = im/ne12;
|
||||
|
||||
int first_row = (N_SIMDGROUP * r0 + get_sub_group_id()) * N_DST;
|
||||
|
||||
ulong offset_src0 = first_row*nb + (i12/r2)*(nb*ne01) + (i13/r3)*(nb*ne01*ne02);
|
||||
ulong offset_src0_ql = offset_src0 * 128;
|
||||
ulong offset_src0_qh = offset_src0 * 64;
|
||||
ulong offset_src0_s = offset_src0 * 16;
|
||||
ulong offset_src0_d = offset_src0;
|
||||
|
||||
global uchar * blk_ql = (global uchar *) src0_ql + offset_src0_ql;
|
||||
global uchar * blk_qh = (global uchar *) src0_qh + offset_src0_qh;
|
||||
global char * blk_scales = (global char *) src0_s + offset_src0_s;
|
||||
global half * blk_d = (global half *) src0_d + offset_src0_d;
|
||||
global float * yy = (global float *) src1 + r1*ne10 + im*ne00*ne1;
|
||||
|
||||
int tid = get_sub_group_local_id()/BLOCK_STRIDE; // first block_stride groups have tid=0
|
||||
int ix = get_sub_group_local_id()%BLOCK_STRIDE; // first block is 0..block_stride-1
|
||||
int ip = tid/8; // first or second half of (super) block (0 or 1)
|
||||
int il = tid%8; // each half has 8 parts, one per scale
|
||||
int n = 4; // 4 scales at a time (and 4 sums)
|
||||
int l0 = n*il; // offset into half-block, 0..28
|
||||
int is = 8*ip + l0/16; // 0, 1, 8, 9
|
||||
|
||||
float4 sumf = 0;
|
||||
|
||||
for (int ib = ix; ib < nb; ib += BLOCK_STRIDE) {
|
||||
if (first_row + 0 < ne01) {
|
||||
sumf.s0 += block_q_6_K_dot_y_flat(blk_ql + 0*nb*128, blk_qh + 0*nb*64, blk_scales + 0*nb*16, blk_d + 0*nb, yy, ib, ip, is, l0);
|
||||
}
|
||||
if (first_row + 1 < ne01) {
|
||||
sumf.s1 += block_q_6_K_dot_y_flat(blk_ql + 1*nb*128, blk_qh + 1*nb*64, blk_scales + 1*nb*16, blk_d + 1*nb, yy, ib, ip, is, l0);
|
||||
}
|
||||
if (first_row + 2 < ne01) {
|
||||
sumf.s2 += block_q_6_K_dot_y_flat(blk_ql + 2*nb*128, blk_qh + 2*nb*64, blk_scales + 2*nb*16, blk_d + 2*nb, yy, ib, ip, is, l0);
|
||||
}
|
||||
if (first_row + 3 < ne01) {
|
||||
sumf.s3 += block_q_6_K_dot_y_flat(blk_ql + 3*nb*128, blk_qh + 3*nb*64, blk_scales + 3*nb*16, blk_d + 3*nb, yy, ib, ip, is, l0);
|
||||
}
|
||||
}
|
||||
|
||||
float4 tot = (float4)(
|
||||
sub_group_reduce_add(sumf.s0),
|
||||
sub_group_reduce_add(sumf.s1),
|
||||
sub_group_reduce_add(sumf.s2),
|
||||
sub_group_reduce_add(sumf.s3)
|
||||
);
|
||||
if (get_sub_group_local_id() == 0) {
|
||||
if (first_row + 0 < ne01) {
|
||||
dst[r1*ne0 + im*ne0*ne1 + first_row + 0] = tot.s0;
|
||||
}
|
||||
if (first_row + 1 < ne01) {
|
||||
dst[r1*ne0 + im*ne0*ne1 + first_row + 1] = tot.s1;
|
||||
}
|
||||
if (first_row + 2 < ne01) {
|
||||
dst[r1*ne0 + im*ne0*ne1 + first_row + 2] = tot.s2;
|
||||
}
|
||||
if (first_row + 3 < ne01) {
|
||||
dst[r1*ne0 + im*ne0*ne1 + first_row + 3] = tot.s3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// tri
|
||||
//------------------------------------------------------------------------------
|
||||
__kernel void kernel_tri_f32(
|
||||
global float * src0,
|
||||
ulong offset0,
|
||||
global float * dst,
|
||||
ulong offsetd,
|
||||
int n,
|
||||
int ne0,
|
||||
int ne1,
|
||||
int tri_type
|
||||
) {
|
||||
src0 = (global float*)((global char*)src0 + offset0);
|
||||
dst = (global float*)((global char*)dst + offsetd);
|
||||
|
||||
int idx = get_global_id(0);
|
||||
if (idx >= n) return;
|
||||
|
||||
int i0 = idx % ne0;
|
||||
int i1 = (idx / ne0) % ne1;
|
||||
|
||||
int keep = 0;
|
||||
if (tri_type == 0) keep = (i0 >= i1);
|
||||
else if (tri_type == 1) keep = (i0 > i1);
|
||||
else if (tri_type == 2) keep = (i0 <= i1);
|
||||
else keep = (i0 < i1);
|
||||
|
||||
dst[idx] = keep ? src0[idx] : 0.0f;
|
||||
}
|
||||
|
|
@ -1,77 +0,0 @@
|
|||
#include <sycl/sycl.hpp>
|
||||
#include "common.hpp"
|
||||
#include "add-id.hpp"
|
||||
|
||||
static void add_id_kernel(
|
||||
const float* src0,
|
||||
const float* src1,
|
||||
const int32_t* src2,
|
||||
float* dst,
|
||||
int64_t ne0,
|
||||
int64_t ne1,
|
||||
size_t nb01,
|
||||
size_t nb02,
|
||||
size_t nb11,
|
||||
size_t nb21,
|
||||
sycl::nd_item<3> item_ct1) {
|
||||
const int64_t i1 = item_ct1.get_group(2);
|
||||
const int64_t i2 = item_ct1.get_group(1);
|
||||
|
||||
const int i11 =
|
||||
*(const int32_t*)((const char*)src2 + i1 * sizeof(int32_t) + i2 * nb21);
|
||||
|
||||
const size_t nb1 = ne0 * sizeof(float);
|
||||
const size_t nb2 = ne1 * nb1;
|
||||
|
||||
float* dst_row = (float*)((char*)dst + i1 * nb1 + i2 * nb2);
|
||||
const float* src0_row =
|
||||
(const float*)((const char*)src0 + i1 * nb01 + i2 * nb02);
|
||||
const float* src1_row = (const float*)((const char*)src1 + i11 * nb11);
|
||||
|
||||
for (int64_t i0 = item_ct1.get_local_id(2); i0 < ne0;
|
||||
i0 += item_ct1.get_local_range(2)) {
|
||||
dst_row[i0] = src0_row[i0] + src1_row[i0];
|
||||
}
|
||||
}
|
||||
|
||||
void ggml_sycl_add_id(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
|
||||
const ggml_tensor* src0 = dst->src[0];
|
||||
const ggml_tensor* src1 = dst->src[1];
|
||||
const ggml_tensor* src2 = dst->src[2];
|
||||
|
||||
GGML_TENSOR_TERNARY_OP_LOCALS
|
||||
|
||||
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(src2->type == GGML_TYPE_I32);
|
||||
|
||||
GGML_ASSERT(nb00 == sizeof(float));
|
||||
GGML_ASSERT(nb10 == sizeof(float));
|
||||
GGML_ASSERT(nb20 == sizeof(int32_t));
|
||||
|
||||
const float* src0_d = (const float*)src0->data;
|
||||
const float* src1_d = (const float*)src1->data;
|
||||
const int32_t* src2_d = (const int32_t*)src2->data;
|
||||
float* dst_d = (float*)dst->data;
|
||||
|
||||
int threads = std::min((int)ne00, 768); // cols
|
||||
ctx.stream()->parallel_for(
|
||||
sycl::nd_range<3>(
|
||||
sycl::range<3>(1, ne02, ne01) * sycl::range<3>(1, 1, threads),
|
||||
sycl::range<3>(1, 1, threads)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
add_id_kernel(
|
||||
src0_d,
|
||||
src1_d,
|
||||
src2_d,
|
||||
dst_d,
|
||||
ne0,
|
||||
ne1,
|
||||
nb01,
|
||||
nb02,
|
||||
nb11,
|
||||
nb21,
|
||||
item_ct1);
|
||||
});
|
||||
}
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
#ifndef GGML_SYCL_ADD_ID_HPP
|
||||
#define GGML_SYCL_ADD_ID_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
void ggml_sycl_add_id(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
||||
|
||||
#endif // GGML_SYCL_ADD_ID_HPP
|
||||
|
|
@ -5552,22 +5552,32 @@ static void ggml_vk_instance_init() {
|
|||
|
||||
if ((new_props.properties.deviceType == vk::PhysicalDeviceType::eDiscreteGpu || new_props.properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu) && ggml_vk_device_is_supported(devices[i])) {
|
||||
// Check if there are two physical devices corresponding to the same GPU
|
||||
// This handles the case where the same GPU appears with different drivers (e.g., RADV + AMDVLK on Linux),
|
||||
// see https://github.com/ggml-org/llama.cpp/pull/7582 for original deduplication.
|
||||
// However, for MoltenVK on macOS, multiple GPUs on the same card may report the same UUID,
|
||||
// see https://github.com/KhronosGroup/MoltenVK/issues/2683. Until this is fixed, we'll only deduplicate
|
||||
// when drivers differ (same driver + same UUID = likely different GPUs)
|
||||
auto old_device = std::find_if(
|
||||
vk_instance.device_indices.begin(),
|
||||
vk_instance.device_indices.end(),
|
||||
[&devices, &new_id](const size_t k){
|
||||
[&devices, &new_id, &new_driver](const size_t k){
|
||||
vk::PhysicalDeviceProperties2 old_props;
|
||||
vk::PhysicalDeviceDriverProperties old_driver;
|
||||
vk::PhysicalDeviceIDProperties old_id;
|
||||
old_props.pNext = &old_id;
|
||||
old_props.pNext = &old_driver;
|
||||
old_driver.pNext = &old_id;
|
||||
devices[k].getProperties2(&old_props);
|
||||
|
||||
bool equals = std::equal(std::begin(old_id.deviceUUID), std::end(old_id.deviceUUID), std::begin(new_id.deviceUUID));
|
||||
equals = equals || (
|
||||
bool same_uuid = std::equal(std::begin(old_id.deviceUUID), std::end(old_id.deviceUUID), std::begin(new_id.deviceUUID));
|
||||
same_uuid = same_uuid || (
|
||||
old_id.deviceLUIDValid && new_id.deviceLUIDValid &&
|
||||
std::equal(std::begin(old_id.deviceLUID), std::end(old_id.deviceLUID), std::begin(new_id.deviceLUID))
|
||||
);
|
||||
|
||||
return equals;
|
||||
// Only deduplicate if same UUID AND different drivers
|
||||
// (same driver + same UUID on MoltenVK = likely different GPUs on multi-GPU card)
|
||||
bool different_driver = (old_driver.driverID != new_driver.driverID);
|
||||
return same_uuid && different_driver;
|
||||
}
|
||||
);
|
||||
if (old_device == vk_instance.device_indices.end()) {
|
||||
|
|
|
|||
|
|
@ -312,7 +312,7 @@ extern "C" {
|
|||
// Keep the booleans together to avoid misalignment during copy-by-value.
|
||||
bool vocab_only; // only load the vocabulary, no weights
|
||||
bool use_mmap; // use mmap if possible
|
||||
bool use_direct_io; // use direct io, takes precedence over use_mmap
|
||||
bool use_direct_io; // use direct io, takes precedence over use_mmap when supported
|
||||
bool use_mlock; // force system to keep model in RAM
|
||||
bool check_tensors; // validate model tensor data
|
||||
bool use_extra_bufts; // use extra buffer types (used for weight repacking)
|
||||
|
|
|
|||
|
|
@ -256,11 +256,7 @@ llama_context::llama_context(
|
|||
|
||||
// graph outputs buffer
|
||||
{
|
||||
// resized during inference when a batch uses more outputs
|
||||
// Create a dummy batch for initialization.
|
||||
llama_batch dummy_batch = {};
|
||||
dummy_batch.n_tokens = 0;
|
||||
if (output_reserve(params.n_seq_max, dummy_batch) < params.n_seq_max) {
|
||||
if (output_reserve(params.n_seq_max) < params.n_seq_max) {
|
||||
throw std::runtime_error("failed to reserve initial output buffer");
|
||||
}
|
||||
|
||||
|
|
@ -1235,7 +1231,7 @@ int llama_context::encode(const llama_batch & batch_inp) {
|
|||
n_queued_tokens += n_tokens;
|
||||
|
||||
// reserve output buffer
|
||||
if (output_reserve(n_tokens, batch_inp) < n_tokens) {
|
||||
if (output_reserve(n_tokens) < n_tokens) {
|
||||
LLAMA_LOG_ERROR("%s: could not reserve space for batch with %u outputs\n", __func__, n_tokens);
|
||||
return -2;
|
||||
};
|
||||
|
|
@ -1466,6 +1462,23 @@ static void copy_tensor_async_candidates(
|
|||
}
|
||||
}
|
||||
|
||||
static bool needs_raw_logits(const llama_ubatch & ubatch, const std::map<llama_seq_id, llama_sampler *> & samplers) {
|
||||
for (uint32_t i = 0; i < ubatch.n_tokens; i++) {
|
||||
if (!ubatch.output[i]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if the output token has at least one sequence without a backend sampler.
|
||||
for (int32_t j = 0; j < ubatch.n_seq_id[i]; ++j) {
|
||||
llama_seq_id seq_id = ubatch.seq_id[i][j];
|
||||
if (samplers.find(seq_id) == samplers.end()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false; // all sequences use backend sampling
|
||||
}
|
||||
|
||||
int llama_context::decode(const llama_batch & batch_inp) {
|
||||
GGML_ASSERT((!batch_inp.token && batch_inp.embd) || (batch_inp.token && !batch_inp.embd)); // NOLINT
|
||||
|
||||
|
|
@ -1598,7 +1611,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
|
|||
}
|
||||
|
||||
// reserve output buffer
|
||||
if (output_reserve(n_outputs_all, balloc->get_batch()) < n_outputs_all) {
|
||||
if (output_reserve(n_outputs_all) < n_outputs_all) {
|
||||
LLAMA_LOG_ERROR("%s: could not reserve space for batch with %d outputs\n", __func__, n_outputs_all);
|
||||
return -2;
|
||||
};
|
||||
|
|
@ -1671,10 +1684,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
|
|||
}
|
||||
|
||||
// extract logits
|
||||
// For multi-sequence batches that mix backend samplers and CPU sampler
|
||||
// this is currently inefficient as we copy all logits even for the
|
||||
// backend sampled tokens.
|
||||
if (logits && t_logits && n_outputs > 0) {
|
||||
if (logits && t_logits && n_outputs > 0 && needs_raw_logits(ubatch, sampling.samplers)) {
|
||||
ggml_backend_t backend_res = ggml_backend_sched_get_tensor_backend(sched.get(), t_logits);
|
||||
GGML_ASSERT(backend_res != nullptr);
|
||||
GGML_ASSERT(logits != nullptr);
|
||||
|
|
@ -1744,11 +1754,8 @@ int llama_context::decode(const llama_batch & batch_inp) {
|
|||
}
|
||||
}
|
||||
|
||||
// This flag indicates whether a backend sampler has actually sampled a specific
|
||||
// token, or if it has produced probabilites. If true, we can skip the normal copying of logits and embeddings.
|
||||
const bool has_sampled = !res->t_sampled.empty() || !res->t_sampled_probs.empty() || !res->t_sampled_logits.empty();
|
||||
|
||||
if (has_samplers && has_sampled) {
|
||||
// Copy backend sampling output if this ubatch produced any sampling tensors.
|
||||
if (has_samplers && (!res->t_sampled.empty() || !res->t_sampled_probs.empty() || !res->t_sampled_logits.empty())) {
|
||||
const auto seq_to_output_row = build_seq_to_output_row(ubatch, n_outputs_prev);
|
||||
const auto stride = n_vocab;
|
||||
|
||||
|
|
@ -1823,7 +1830,8 @@ int llama_context::decode(const llama_batch & batch_inp) {
|
|||
// output
|
||||
//
|
||||
|
||||
uint32_t llama_context::output_reserve(int32_t n_outputs, const llama_batch & batch) {
|
||||
uint32_t llama_context::output_reserve(int32_t n_outputs) {
|
||||
|
||||
const auto & hparams = model.hparams;
|
||||
const auto & vocab = model.vocab;
|
||||
|
||||
|
|
@ -1842,45 +1850,16 @@ uint32_t llama_context::output_reserve(int32_t n_outputs, const llama_batch & ba
|
|||
has_embd = true;
|
||||
}
|
||||
|
||||
// Check which sampling modes are needed for the current batch.
|
||||
// TODO: avoid this branching by working with the worst-case
|
||||
bool has_sampling = false;
|
||||
bool cpu_logits = false;
|
||||
|
||||
if (batch.logits) {
|
||||
for (int32_t i = 0; i < batch.n_tokens; i++) {
|
||||
if (!batch.logits[i]) {
|
||||
continue;
|
||||
}
|
||||
for (int32_t j = 0; j < batch.n_seq_id[i]; j++) {
|
||||
llama_seq_id seq_id = batch.seq_id[i][j];
|
||||
if (sampling.samplers.find(seq_id) != sampling.samplers.end()) {
|
||||
has_sampling = true;
|
||||
} else {
|
||||
cpu_logits = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// When batch.logits is nullptr (when loading state with a dummy batch),
|
||||
// allocate CPU logits.
|
||||
cpu_logits = true;
|
||||
}
|
||||
|
||||
size_t backend_float_count = 0;
|
||||
size_t backend_token_count = 0;
|
||||
|
||||
// Allocate CPU logits buffer only if needed by sequences in this batch
|
||||
logits_size = (has_logits && cpu_logits) ? n_vocab*n_outputs_max : 0;
|
||||
logits_size = has_logits ? n_vocab*n_outputs_max : 0;
|
||||
embd_size = has_embd ? n_embd_out*n_outputs_max : 0;
|
||||
|
||||
// TODO: avoid this branching by working with the worst-case
|
||||
if (!has_sampling) {
|
||||
sampling.logits_size = 0;
|
||||
sampling.probs_size = 0;
|
||||
sampling.sampled_size = 0;
|
||||
sampling.candidates_size = 0;
|
||||
} else {
|
||||
// Allocate backend sampling output buffers if there are backend samplers configured.
|
||||
const bool has_sampling = !sampling.samplers.empty();
|
||||
if (has_sampling) {
|
||||
sampling.logits_size = n_vocab*n_outputs_max;
|
||||
sampling.probs_size = n_vocab*n_outputs_max;
|
||||
sampling.sampled_size = n_outputs_max;
|
||||
|
|
@ -1938,7 +1917,7 @@ uint32_t llama_context::output_reserve(int32_t n_outputs, const llama_batch & ba
|
|||
size_t offset = 0;
|
||||
uint8_t * base = (uint8_t *) output_base;
|
||||
|
||||
logits = (has_logits && cpu_logits) ? output_base : nullptr;
|
||||
logits = has_logits ? output_base : nullptr;
|
||||
offset += logits_size * sizeof(float);
|
||||
|
||||
embd = has_embd ? (float *) (base + offset) : nullptr;
|
||||
|
|
@ -2624,10 +2603,7 @@ size_t llama_context::state_read_data(llama_io_read_i & io) {
|
|||
auto n_outputs = this->n_outputs;
|
||||
io.read_to(&n_outputs, sizeof(n_outputs));
|
||||
|
||||
// Create a dummy batch for state loading.
|
||||
llama_batch dummy_batch = {};
|
||||
dummy_batch.n_tokens = 0;
|
||||
if (n_outputs > output_reserve(n_outputs, dummy_batch)) {
|
||||
if (n_outputs > output_reserve(n_outputs)) {
|
||||
throw std::runtime_error("could not reserve outputs");
|
||||
}
|
||||
|
||||
|
|
@ -2872,7 +2848,7 @@ void llama_context::opt_epoch_iter(
|
|||
}
|
||||
|
||||
// reserve output buffer
|
||||
if (output_reserve(n_outputs_all, balloc->get_batch()) < n_outputs_all) {
|
||||
if (output_reserve(n_outputs_all) < n_outputs_all) {
|
||||
LLAMA_LOG_ERROR("%s: could not reserve space for batch with %d outputs\n", __func__, n_outputs_all);
|
||||
GGML_ABORT("TODO: handle this error");
|
||||
};
|
||||
|
|
|
|||
|
|
@ -212,7 +212,7 @@ private:
|
|||
|
||||
// Make sure enough space is available for outputs.
|
||||
// Returns max number of outputs for which space was reserved.
|
||||
uint32_t output_reserve(int32_t n_outputs, const llama_batch & batch);
|
||||
uint32_t output_reserve(int32_t n_outputs);
|
||||
|
||||
void output_reorder();
|
||||
|
||||
|
|
|
|||
|
|
@ -541,15 +541,15 @@ llama_model_loader::llama_model_loader(
|
|||
|
||||
if (use_mmap && use_direct_io) {
|
||||
if (files.back()->has_direct_io()) {
|
||||
// Disable mmap, as DirectIO is available
|
||||
use_mmap = false;
|
||||
LLAMA_LOG_WARN("%s: direct I/O is enabled, disabling mmap\n", __func__);
|
||||
use_mmap = false;
|
||||
} else {
|
||||
// Disable DirectIO and reopen file using std::fopen for mmap
|
||||
LLAMA_LOG_WARN("%s: direct I/O is not available, using mmap\n", __func__);
|
||||
use_direct_io = false;
|
||||
|
||||
// reopen file using std::fopen for mmap
|
||||
files.pop_back();
|
||||
files.emplace_back(new llama_file(fname.c_str(), "rb", false));
|
||||
LLAMA_LOG_WARN("%s: direct I/O is not available, using mmap\n", __func__);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8279,7 +8279,7 @@ llama_model_params llama_model_default_params() {
|
|||
/*.kv_overrides =*/ nullptr,
|
||||
/*.vocab_only =*/ false,
|
||||
/*.use_mmap =*/ true,
|
||||
/*.use_direct_io =*/ true,
|
||||
/*.use_direct_io =*/ false,
|
||||
/*.use_mlock =*/ false,
|
||||
/*.check_tensors =*/ false,
|
||||
/*.use_extra_bufts =*/ true,
|
||||
|
|
|
|||
|
|
@ -545,7 +545,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
|
|||
}
|
||||
|
||||
std::vector<std::string> splits = {};
|
||||
llama_model_loader ml(fname_inp, splits, use_mmap, /*use_direct_io*/ true, /*check_tensors*/ true, /*no_alloc*/ false, kv_overrides, nullptr);
|
||||
llama_model_loader ml(fname_inp, splits, use_mmap, /*use_direct_io*/ false, /*check_tensors*/ true, /*no_alloc*/ false, kv_overrides, nullptr);
|
||||
ml.init_mappings(false); // no prefetching
|
||||
|
||||
llama_model model(llama_model_default_params());
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@
|
|||
#include "jinja/runtime.h"
|
||||
#include "jinja/parser.h"
|
||||
#include "jinja/lexer.h"
|
||||
#include "jinja/utils.h"
|
||||
|
||||
#include "testing.h"
|
||||
|
||||
|
|
@ -30,6 +31,7 @@ static void test_tests(testing & t);
|
|||
static void test_string_methods(testing & t);
|
||||
static void test_array_methods(testing & t);
|
||||
static void test_object_methods(testing & t);
|
||||
static void test_hasher(testing & t);
|
||||
static void test_fuzzing(testing & t);
|
||||
|
||||
static bool g_python_mode = false;
|
||||
|
|
@ -67,6 +69,7 @@ int main(int argc, char *argv[]) {
|
|||
t.test("array methods", test_array_methods);
|
||||
t.test("object methods", test_object_methods);
|
||||
if (!g_python_mode) {
|
||||
t.test("hasher", test_hasher);
|
||||
t.test("fuzzing", test_fuzzing);
|
||||
}
|
||||
|
||||
|
|
@ -156,6 +159,18 @@ static void test_conditionals(testing & t) {
|
|||
"big"
|
||||
);
|
||||
|
||||
test_template(t, "object comparison",
|
||||
"{% if {0: 1, none: 2, 1.0: 3, '0': 4, true: 5} == {false: 1, none: 2, 1: 5, '0': 4} %}equal{% endif %}",
|
||||
json::object(),
|
||||
"equal"
|
||||
);
|
||||
|
||||
test_template(t, "array comparison",
|
||||
"{% if [0, 1.0, false] == [false, 1, 0.0] %}equal{% endif %}",
|
||||
json::object(),
|
||||
"equal"
|
||||
);
|
||||
|
||||
test_template(t, "logical and",
|
||||
"{% if a and b %}both{% endif %}",
|
||||
{{"a", true}, {"b", true}},
|
||||
|
|
@ -358,6 +373,30 @@ static void test_expressions(testing & t) {
|
|||
"b"
|
||||
);
|
||||
|
||||
test_template(t, "array negative access",
|
||||
"{{ items[-1] }}",
|
||||
{{"items", json::array({"a", "b", "c"})}},
|
||||
"c"
|
||||
);
|
||||
|
||||
test_template(t, "array slice",
|
||||
"{{ items[1:-1]|string }}",
|
||||
{{"items", json::array({"a", "b", "c"})}},
|
||||
"['b']"
|
||||
);
|
||||
|
||||
test_template(t, "array slice step",
|
||||
"{{ items[::2]|string }}",
|
||||
{{"items", json::array({"a", "b", "c"})}},
|
||||
"['a', 'c']"
|
||||
);
|
||||
|
||||
test_template(t, "tuple slice",
|
||||
"{{ ('a', 'b', 'c')[::-1]|string }}",
|
||||
json::object(),
|
||||
"('c', 'b', 'a')"
|
||||
);
|
||||
|
||||
test_template(t, "arithmetic",
|
||||
"{{ (a + b) * c }}",
|
||||
{{"a", 2}, {"b", 3}, {"c", 4}},
|
||||
|
|
@ -401,6 +440,36 @@ static void test_set_statement(testing & t) {
|
|||
json::object(),
|
||||
"1"
|
||||
);
|
||||
|
||||
test_template(t, "set dict with mixed type keys",
|
||||
"{% set d = {0: 1, none: 2, 1.0: 3, '0': 4, (0, 0): 5, false: 6, 1: 7} %}{{ d[(0, 0)] + d[0] + d[none] + d['0'] + d[false] + d[1.0] + d[1] }}",
|
||||
json::object(),
|
||||
"37"
|
||||
);
|
||||
|
||||
test_template(t, "print dict with mixed type keys",
|
||||
"{% set d = {0: 1, none: 2, 1.0: 3, '0': 4, (0, 0): 5, true: 6} %}{{ d|string }}",
|
||||
json::object(),
|
||||
"{0: 1, None: 2, 1.0: 6, '0': 4, (0, 0): 5}"
|
||||
);
|
||||
|
||||
test_template(t, "print array with mixed types",
|
||||
"{% set d = [0, none, 1.0, '0', true, (0, 0)] %}{{ d|string }}",
|
||||
json::object(),
|
||||
"[0, None, 1.0, '0', True, (0, 0)]"
|
||||
);
|
||||
|
||||
test_template(t, "object member assignment with mixed key types",
|
||||
"{% set d = namespace() %}{% set d.a = 123 %}{{ d['a'] == 123 }}",
|
||||
json::object(),
|
||||
"True"
|
||||
);
|
||||
|
||||
test_template(t, "tuple unpacking",
|
||||
"{% set t = (1, 2, 3) %}{% set a, b, c = t %}{{ a + b + c }}",
|
||||
json::object(),
|
||||
"6"
|
||||
);
|
||||
}
|
||||
|
||||
static void test_filters(testing & t) {
|
||||
|
|
@ -1312,6 +1381,154 @@ static void test_object_methods(testing & t) {
|
|||
{{"obj", {{"a", "b"}}}},
|
||||
"True True"
|
||||
);
|
||||
|
||||
test_template(t, "expression as object key",
|
||||
"{% set d = {'ab': 123} %}{{ d['a' + 'b'] == 123 }}",
|
||||
json::object(),
|
||||
"True"
|
||||
);
|
||||
|
||||
test_template(t, "numeric as object key (template: Seed-OSS)",
|
||||
"{% set d = {1: 'a', 2: 'b'} %}{{ d[1] == 'a' and d[2] == 'b' }}",
|
||||
json::object(),
|
||||
"True"
|
||||
);
|
||||
}
|
||||
|
||||
static void test_hasher(testing & t) {
|
||||
static const std::vector<std::pair<size_t, size_t>> chunk_sizes = {
|
||||
{1, 2},
|
||||
{1, 16},
|
||||
{8, 1},
|
||||
{1, 1024},
|
||||
{5, 512},
|
||||
{16, 256},
|
||||
{45, 122},
|
||||
{70, 634},
|
||||
};
|
||||
|
||||
static auto random_bytes = [](size_t length) -> std::string {
|
||||
std::string data;
|
||||
data.resize(length);
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
data[i] = static_cast<char>(rand() % 256);
|
||||
}
|
||||
return data;
|
||||
};
|
||||
|
||||
t.test("state unchanged with empty input", [](testing & t) {
|
||||
jinja::hasher hasher;
|
||||
hasher.update("some data");
|
||||
size_t initial_state = hasher.digest();
|
||||
hasher.update("", 0);
|
||||
size_t final_state = hasher.digest();
|
||||
t.assert_true("Hasher state should remain unchanged", initial_state == final_state);
|
||||
});
|
||||
|
||||
t.test("different inputs produce different hashes", [](testing & t) {
|
||||
jinja::hasher hasher1;
|
||||
hasher1.update("data one");
|
||||
size_t hash1 = hasher1.digest();
|
||||
|
||||
jinja::hasher hasher2;
|
||||
hasher2.update("data two");
|
||||
size_t hash2 = hasher2.digest();
|
||||
|
||||
t.assert_true("Different inputs should produce different hashes", hash1 != hash2);
|
||||
});
|
||||
|
||||
t.test("same inputs produce same hashes", [](testing & t) {
|
||||
jinja::hasher hasher1;
|
||||
hasher1.update("consistent data");
|
||||
size_t hash1 = hasher1.digest();
|
||||
|
||||
jinja::hasher hasher2;
|
||||
hasher2.update("consistent data");
|
||||
size_t hash2 = hasher2.digest();
|
||||
|
||||
t.assert_true("Same inputs should produce same hashes", hash1 == hash2);
|
||||
});
|
||||
|
||||
t.test("property: update(a ~ b) == update(a).update(b)", [](testing & t) {
|
||||
for (const auto & [size1, size2] : chunk_sizes) {
|
||||
std::string data1 = random_bytes(size1);
|
||||
std::string data2 = random_bytes(size2);
|
||||
|
||||
jinja::hasher hasher1;
|
||||
hasher1.update(data1);
|
||||
hasher1.update(data2);
|
||||
size_t hash1 = hasher1.digest();
|
||||
|
||||
jinja::hasher hasher2;
|
||||
hasher2.update(data1 + data2);
|
||||
size_t hash2 = hasher2.digest();
|
||||
|
||||
t.assert_true(
|
||||
"Hashing in multiple updates should match single update (" + std::to_string(size1) + ", " + std::to_string(size2) + ")",
|
||||
hash1 == hash2);
|
||||
}
|
||||
});
|
||||
|
||||
t.test("property: update(a ~ b) == update(a).update(b) with more update passes", [](testing & t) {
|
||||
static const std::vector<size_t> sizes = {3, 732, 131, 13, 17, 256, 436, 99, 4};
|
||||
|
||||
jinja::hasher hasher1;
|
||||
jinja::hasher hasher2;
|
||||
|
||||
std::string combined_data;
|
||||
for (size_t size : sizes) {
|
||||
std::string data = random_bytes(size);
|
||||
hasher1.update(data);
|
||||
combined_data += data;
|
||||
}
|
||||
|
||||
hasher2.update(combined_data);
|
||||
size_t hash1 = hasher1.digest();
|
||||
size_t hash2 = hasher2.digest();
|
||||
t.assert_true(
|
||||
"Hashing in multiple updates should match single update with many chunks",
|
||||
hash1 == hash2);
|
||||
});
|
||||
|
||||
t.test("property: non associativity of update", [](testing & t) {
|
||||
for (const auto & [size1, size2] : chunk_sizes) {
|
||||
std::string data1 = random_bytes(size1);
|
||||
std::string data2 = random_bytes(size2);
|
||||
|
||||
jinja::hasher hasher1;
|
||||
hasher1.update(data1);
|
||||
hasher1.update(data2);
|
||||
size_t hash1 = hasher1.digest();
|
||||
|
||||
jinja::hasher hasher2;
|
||||
hasher2.update(data2);
|
||||
hasher2.update(data1);
|
||||
size_t hash2 = hasher2.digest();
|
||||
|
||||
t.assert_true(
|
||||
"Hashing order should matter (" + std::to_string(size1) + ", " + std::to_string(size2) + ")",
|
||||
hash1 != hash2);
|
||||
}
|
||||
});
|
||||
|
||||
t.test("property: different lengths produce different hashes (padding block size)", [](testing & t) {
|
||||
std::string random_data = random_bytes(64);
|
||||
|
||||
jinja::hasher hasher1;
|
||||
hasher1.update(random_data);
|
||||
size_t hash1 = hasher1.digest();
|
||||
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
random_data.push_back('A'); // change length
|
||||
jinja::hasher hasher2;
|
||||
hasher2.update(random_data);
|
||||
size_t hash2 = hasher2.digest();
|
||||
|
||||
t.assert_true("Different lengths should produce different hashes (length " + std::to_string(random_data.size()) + ")", hash1 != hash2);
|
||||
|
||||
hash1 = hash2;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
static void test_template_cpp(testing & t, const std::string & name, const std::string & tmpl, const json & vars, const std::string & expect) {
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ def test_with_and_without_draft():
|
|||
"prompt": "I believe the meaning of life is",
|
||||
"temperature": 0.0,
|
||||
"top_k": 1,
|
||||
"n_predict": 16,
|
||||
})
|
||||
assert res.status_code == 200
|
||||
content_no_draft = res.body["content"]
|
||||
|
|
@ -42,6 +43,7 @@ def test_with_and_without_draft():
|
|||
"prompt": "I believe the meaning of life is",
|
||||
"temperature": 0.0,
|
||||
"top_k": 1,
|
||||
"n_predict": 16,
|
||||
})
|
||||
assert res.status_code == 200
|
||||
content_draft = res.body["content"]
|
||||
|
|
@ -68,6 +70,7 @@ def test_different_draft_min_draft_max():
|
|||
"prompt": "I believe the meaning of life is",
|
||||
"temperature": 0.0,
|
||||
"top_k": 1,
|
||||
"n_predict": 16,
|
||||
})
|
||||
assert res.status_code == 200
|
||||
if last_content is not None:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue