mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-26 10:41:25 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .devops/intel.Dockerfile # .github/workflows/build-android.yml # .github/workflows/build.yml # .github/workflows/release.yml # .gitignore # docs/backend/SYCL.md # docs/backend/snapdragon/README.md # examples/model-conversion/scripts/causal/convert-model.sh # ggml/CMakeLists.txt # ggml/src/CMakeLists.txt # ggml/src/ggml-hexagon/ggml-hexagon.cpp # ggml/src/ggml-hexagon/htp/CMakeLists.txt # ggml/src/ggml-hexagon/htp/hex-utils.h # ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c # ggml/src/ggml-hexagon/htp/htp-ctx.h # ggml/src/ggml-hexagon/htp/htp-ops.h # ggml/src/ggml-hexagon/htp/htp_iface.idl # ggml/src/ggml-hexagon/htp/hvx-base.h # ggml/src/ggml-hexagon/htp/main.c # ggml/src/ggml-hexagon/htp/matmul-ops.c # ggml/src/ggml-hexagon/libggml-htp.inf # ggml/src/ggml-sycl/ggml-sycl.cpp # ggml/src/ggml-sycl/mmvq.cpp # ggml/src/ggml-sycl/mmvq.hpp # ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp # ggml/src/ggml-webgpu/ggml-webgpu.cpp # ggml/src/ggml-webgpu/wgsl-shaders/flash_attn.wgsl # ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_blk.wgsl # ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_split.wgsl # scripts/server-test-structured.py # scripts/snapdragon/adb/run-bench.sh # scripts/snapdragon/adb/run-cli.sh # scripts/snapdragon/adb/run-completion.sh # scripts/snapdragon/adb/run-mtmd.sh # scripts/snapdragon/adb/run-tool.sh # scripts/snapdragon/qdc/requirements.txt # scripts/snapdragon/windows/run-bench.ps1 # scripts/snapdragon/windows/run-cli.ps1 # scripts/snapdragon/windows/run-completion.ps1 # scripts/snapdragon/windows/run-mtmd.ps1 # scripts/snapdragon/windows/run-tool.ps1 # tests/test-backend-ops.cpp # tools/cli/cli.cpp # ty.toml
This commit is contained in:
commit
340b22283e
20 changed files with 260 additions and 98 deletions
|
|
@ -558,6 +558,26 @@ bool common_chat_templates_was_explicit(const struct common_chat_templates * tmp
|
|||
return tmpls->has_explicit_template;
|
||||
}
|
||||
|
||||
// LFM2 format detection: template uses <|tool_list_start|>[...]<|tool_list_end|> around the tool list
|
||||
// and <|tool_call_start|>[...]<|tool_call_end|> around each tool call
|
||||
static bool is_lfm2_template(const std::string & src) {
|
||||
return src.find("<|tool_list_start|>") != std::string::npos &&
|
||||
src.find("<|tool_list_end|>") != std::string::npos;
|
||||
}
|
||||
|
||||
common_chat_prompt_preset common_chat_get_asr_prompt(const common_chat_templates * chat_templates) {
|
||||
common_chat_prompt_preset asr_preset;
|
||||
asr_preset.system = "";
|
||||
asr_preset.user = "Transcribe audio to text";
|
||||
|
||||
if (chat_templates && chat_templates->template_default && is_lfm2_template(chat_templates->template_default->source())) {
|
||||
asr_preset.system = "Perform ASR.";
|
||||
asr_preset.user = "";
|
||||
}
|
||||
|
||||
return asr_preset;
|
||||
}
|
||||
|
||||
std::string common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant) {
|
||||
if (!variant.empty()) {
|
||||
if (variant == "tool_use") {
|
||||
|
|
@ -2067,10 +2087,7 @@ std::optional<common_chat_params> common_chat_try_specialized_template(
|
|||
return common_chat_params_init_kimi_k2(tmpl, params);
|
||||
}
|
||||
|
||||
// LFM2 format detection: template uses <|tool_list_start|>[...]<|tool_list_end|> around the tool list
|
||||
// and <|tool_call_start|>[...]<|tool_call_end|> around each tool call
|
||||
if (src.find("<|tool_list_start|>") != std::string::npos &&
|
||||
src.find("<|tool_list_end|>") != std::string::npos) {
|
||||
if (is_lfm2_template(src)) {
|
||||
LOG_DBG("Using specialized template: LFM2\n");
|
||||
return common_chat_params_init_lfm2(tmpl, params);
|
||||
}
|
||||
|
|
@ -2379,4 +2396,3 @@ std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_tem
|
|||
GGML_ASSERT(chat_templates->template_default != nullptr);
|
||||
return chat_templates->template_default->caps.to_map();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -274,3 +274,11 @@ std::optional<common_chat_params> common_chat_try_specialized_template(
|
|||
const common_chat_template & tmpl,
|
||||
const std::string & src,
|
||||
autoparser::generation_params & params);
|
||||
|
||||
// specialized per-task preset
|
||||
struct common_chat_prompt_preset {
|
||||
std::string system;
|
||||
std::string user;
|
||||
};
|
||||
|
||||
common_chat_prompt_preset common_chat_get_asr_prompt(const common_chat_templates * chat_templates);
|
||||
|
|
|
|||
|
|
@ -747,6 +747,11 @@ inline bool string_starts_with(std::string_view str, std::string_view prefix) {
|
|||
str.compare(0, prefix.size(), prefix) == 0;
|
||||
}
|
||||
|
||||
// remove when moving to c++20
|
||||
inline bool string_starts_with(std::string_view str, char prefix) {
|
||||
return !str.empty() && str.front() == prefix;
|
||||
}
|
||||
|
||||
// remove when moving to c++20
|
||||
inline bool string_ends_with(std::string_view str, std::string_view suffix) {
|
||||
return str.size() >= suffix.size() &&
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
#include "log.h"
|
||||
#include "value.h"
|
||||
#include "runtime.h"
|
||||
#include "caps.h"
|
||||
|
|
|
|||
|
|
@ -106,10 +106,16 @@ struct statement {
|
|||
size_t pos; // position in source, for debugging
|
||||
virtual ~statement() = default;
|
||||
virtual std::string type() const { return "Statement"; }
|
||||
|
||||
// execute_impl must be overridden by derived classes
|
||||
virtual value execute_impl(context &) { throw std::runtime_error("cannot exec " + type()); }
|
||||
virtual value execute_impl(context &) { throw_exec_error(); }
|
||||
// execute is the public method to execute a statement with error handling
|
||||
value execute(context &);
|
||||
|
||||
private:
|
||||
[[noreturn]] void throw_exec_error() const {
|
||||
throw std::runtime_error("cannot exec " + type());
|
||||
}
|
||||
};
|
||||
|
||||
// Type Checking Utilities
|
||||
|
|
@ -143,7 +149,7 @@ struct program : public statement {
|
|||
program() = default;
|
||||
explicit program(statements && body) : body(std::move(body)) {}
|
||||
std::string type() const override { return "Program"; }
|
||||
value execute_impl(context &) override {
|
||||
[[noreturn]] value execute_impl(context &) override {
|
||||
throw std::runtime_error("Cannot execute program directly, use jinja::runtime instead");
|
||||
}
|
||||
};
|
||||
|
|
@ -195,7 +201,7 @@ struct break_statement : public statement {
|
|||
}
|
||||
};
|
||||
|
||||
value execute_impl(context &) override {
|
||||
[[noreturn]] value execute_impl(context &) override {
|
||||
throw break_statement::signal();
|
||||
}
|
||||
};
|
||||
|
|
@ -209,7 +215,7 @@ struct continue_statement : public statement {
|
|||
}
|
||||
};
|
||||
|
||||
value execute_impl(context &) override {
|
||||
[[noreturn]] value execute_impl(context &) override {
|
||||
throw continue_statement::signal();
|
||||
}
|
||||
};
|
||||
|
|
@ -509,7 +515,7 @@ struct slice_expression : public expression {
|
|||
chk_type<expression>(this->step_expr);
|
||||
}
|
||||
std::string type() const override { return "SliceExpression"; }
|
||||
value execute_impl(context &) override {
|
||||
[[noreturn]] value execute_impl(context &) override {
|
||||
throw std::runtime_error("must be handled by MemberExpression");
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -590,6 +590,10 @@ static bool string_endswith(const std::string & str, const std::string & suffix)
|
|||
return str.compare(str.length() - suffix.length(), suffix.length(), suffix) == 0;
|
||||
}
|
||||
|
||||
[[noreturn]] static value string_join_not_implemented(const func_args &) {
|
||||
throw not_implemented_exception("String join builtin not implemented");
|
||||
}
|
||||
|
||||
const func_builtins & value_string_t::get_builtins() const {
|
||||
static const func_builtins builtins = {
|
||||
{"default", default_value},
|
||||
|
|
@ -851,9 +855,7 @@ const func_builtins & value_string_t::get_builtins() const {
|
|||
res->val_str.mark_input_based_on(val_input->as_string());
|
||||
return res;
|
||||
}},
|
||||
{"join", [](const func_args &) -> value {
|
||||
throw not_implemented_exception("String join builtin not implemented");
|
||||
}},
|
||||
{"join", string_join_not_implemented},
|
||||
};
|
||||
return builtins;
|
||||
}
|
||||
|
|
@ -884,6 +886,9 @@ const func_builtins & value_bool_t::get_builtins() const {
|
|||
return builtins;
|
||||
}
|
||||
|
||||
[[noreturn]] static value array_unique_not_implemented(const func_args &) {
|
||||
throw not_implemented_exception("Array unique builtin not implemented");
|
||||
}
|
||||
|
||||
const func_builtins & value_array_t::get_builtins() const {
|
||||
static const func_builtins builtins = {
|
||||
|
|
@ -1084,13 +1089,14 @@ const func_builtins & value_array_t::get_builtins() const {
|
|||
std::reverse(arr.begin(), arr.end());
|
||||
return is_val<value_tuple>(val) ? mk_val<value_tuple>(std::move(arr)) : mk_val<value_array>(std::move(arr));
|
||||
}},
|
||||
{"unique", [](const func_args &) -> value {
|
||||
throw not_implemented_exception("Array unique builtin not implemented");
|
||||
}},
|
||||
{"unique", array_unique_not_implemented},
|
||||
};
|
||||
return builtins;
|
||||
}
|
||||
|
||||
[[noreturn]] static value object_join_not_implemented(const func_args &) {
|
||||
throw not_implemented_exception("object join not implemented");
|
||||
}
|
||||
|
||||
const func_builtins & value_object_t::get_builtins() const {
|
||||
if (!has_builtins) {
|
||||
|
|
@ -1183,9 +1189,7 @@ const func_builtins & value_object_t::get_builtins() const {
|
|||
});
|
||||
return result;
|
||||
}},
|
||||
{"join", [](const func_args &) -> value {
|
||||
throw not_implemented_exception("object join not implemented");
|
||||
}},
|
||||
{"join", object_join_not_implemented},
|
||||
};
|
||||
return builtins;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -129,27 +129,25 @@ struct value_t {
|
|||
// Note: only for debugging and error reporting purposes
|
||||
virtual std::string type() const { return ""; }
|
||||
|
||||
virtual int64_t as_int() const { throw std::runtime_error(type() + " is not an int value"); }
|
||||
virtual double as_float() const { throw std::runtime_error(type() + " is not a float value"); }
|
||||
virtual string as_string() const { throw std::runtime_error(type() + " is not a string value"); }
|
||||
virtual bool as_bool() const { throw std::runtime_error(type() + " is not a bool value"); }
|
||||
virtual const std::vector<value> & as_array() const { throw std::runtime_error(type() + " is not an array value"); }
|
||||
virtual const std::vector<std::pair<value, value>> & as_ordered_object() const { throw std::runtime_error(type() + " is not an object value"); }
|
||||
virtual value invoke(const func_args &) const { throw std::runtime_error(type() + " is not a function value"); }
|
||||
virtual int64_t as_int() const { throw_type_error("is not an int value"); }
|
||||
virtual double as_float() const { throw_type_error("is not a float value"); }
|
||||
virtual string as_string() const { throw_type_error("is not a string value"); }
|
||||
virtual bool as_bool() const { throw_type_error("is not a bool value"); }
|
||||
virtual const std::vector<value> & as_array() const { throw_type_error("is not an array value"); }
|
||||
virtual const std::vector<std::pair<value, value>> & as_ordered_object() const { throw_type_error("is not an object value"); }
|
||||
virtual value invoke(const func_args &) const { throw_type_error("is not a function value"); }
|
||||
virtual bool is_none() const { return false; }
|
||||
virtual bool is_undefined() const { return false; }
|
||||
virtual const func_builtins & get_builtins() const {
|
||||
throw std::runtime_error("No builtins available for type " + type());
|
||||
}
|
||||
virtual const func_builtins & get_builtins() const { throw_type_error("has no builtins"); }
|
||||
|
||||
virtual bool has_key(const value &) { throw std::runtime_error(type() + " is not an object value"); }
|
||||
virtual void insert(const value & /* key */, const value & /* val */) { throw std::runtime_error(type() + " is not an object value"); }
|
||||
virtual value & at(const value & /* key */, value & /* default_val */) { throw std::runtime_error(type() + " is not an object value"); }
|
||||
virtual value & at(const value & /* key */) { throw std::runtime_error(type() + " is not an object value"); }
|
||||
virtual value & at(const std::string & /* key */, value & /* default_val */) { throw std::runtime_error(type() + " is not an object value"); }
|
||||
virtual value & at(const std::string & /* key */) { throw std::runtime_error(type() + " is not an object value"); }
|
||||
virtual value & at(int64_t /* idx */, value & /* default_val */) { throw std::runtime_error(type() + " is not an array value"); }
|
||||
virtual value & at(int64_t /* idx */) { throw std::runtime_error(type() + " is not an array value"); }
|
||||
virtual bool has_key(const value &) { throw_type_error("is not an object value"); }
|
||||
virtual void insert(const value & /* key */, const value & /* val */) { throw_type_error("is not an object value"); }
|
||||
virtual value & at(const value & /* key */, value & /* default_val */) { throw_type_error("is not an object value"); }
|
||||
virtual value & at(const value & /* key */) { throw_type_error("is not an object value"); }
|
||||
virtual value & at(const std::string & /* key */, value & /* default_val */) { throw_type_error("is not an object value"); }
|
||||
virtual value & at(const std::string & /* key */) { throw_type_error("is not an object value"); }
|
||||
virtual value & at(int64_t /* idx */, value & /* default_val */) { throw_type_error("is not an array value"); }
|
||||
virtual value & at(int64_t /* idx */) { throw_type_error("is not an array value"); }
|
||||
|
||||
virtual bool is_numeric() const { return false; }
|
||||
virtual bool is_hashable() const { return false; }
|
||||
|
|
@ -163,6 +161,11 @@ struct value_t {
|
|||
// Note: only for debugging purposes
|
||||
virtual std::string as_repr() const { return as_string().str(); }
|
||||
|
||||
private:
|
||||
[[noreturn]] void throw_type_error(const char* expected) const {
|
||||
throw std::runtime_error(type() + " " + expected);
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual bool equivalent(const value_t &) const = 0;
|
||||
virtual bool nonequal(const value_t & other) const { return !equivalent(other); }
|
||||
|
|
|
|||
|
|
@ -746,7 +746,12 @@ class ModelBase:
|
|||
|
||||
if (not quant_algo or not quant_layers) and quant_config_file.is_file():
|
||||
with open(quant_config_file, "r", encoding="utf-8") as f:
|
||||
quant_config = json.load(f).get("quantization") or {}
|
||||
hf_quant_config = json.load(f)
|
||||
quant_config = hf_quant_config.get("quantization") or {}
|
||||
producer = hf_quant_config.get("producer") or {}
|
||||
producer_name = (producer.get("name") or "").lower()
|
||||
if quant_method is None:
|
||||
self.hparams.setdefault("quantization_config", {})["quant_method"] = producer_name
|
||||
quant_algo = quant_config.get("quant_algo", quant_algo)
|
||||
quant_layers = quant_config.get("quantized_layers", quant_layers) or {}
|
||||
|
||||
|
|
|
|||
|
|
@ -3608,6 +3608,30 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph,
|
|||
return true;
|
||||
}
|
||||
|
||||
if (ops.size() == 2 && ops.begin()[0] == GGML_OP_UNARY && ops.begin()[1] == GGML_OP_SQR
|
||||
&& unary_ops.size() == 1 && unary_ops.begin()[0] == GGML_UNARY_OP_RELU) {
|
||||
const ggml_tensor * unary = cgraph->nodes[node_idx];
|
||||
const ggml_tensor * sqr = cgraph->nodes[node_idx+1];
|
||||
|
||||
if (ggml_get_unary_op(unary) != GGML_UNARY_OP_RELU) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (unary->type != GGML_TYPE_F32 && unary->type != GGML_TYPE_F16) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (unary->type != sqr->type) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ggml_is_contiguous(unary->src[0])) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
if (ops.size() == 3 && ops.begin()[0] == GGML_OP_SCALE && ops.begin()[1] == GGML_OP_UNARY && ops.begin()[2] == GGML_OP_SCALE
|
||||
&& unary_ops.size() == 1 && unary_ops.begin()[0] == GGML_UNARY_OP_TANH) {
|
||||
const ggml_tensor *scale = cgraph->nodes[node_idx];
|
||||
|
|
@ -4116,6 +4140,12 @@ static void ggml_cuda_graph_evaluate_and_capture(ggml_backend_cuda_context * cud
|
|||
continue;
|
||||
}
|
||||
|
||||
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_UNARY, GGML_OP_SQR }, { GGML_UNARY_OP_RELU })) {
|
||||
ggml_cuda_op_relu_sqr(*cuda_ctx, node, cgraph->nodes[i+1]);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_SCALE, GGML_OP_UNARY, GGML_OP_SCALE }, { GGML_UNARY_OP_TANH })) {
|
||||
i += 2;
|
||||
ggml_cuda_op_softcap(*cuda_ctx, cgraph->nodes[i], node);
|
||||
|
|
|
|||
|
|
@ -65,6 +65,11 @@ static __device__ __forceinline__ float op_sqr(float x) {
|
|||
return x * x;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ float op_relu_sqr(float x) {
|
||||
const float r = fmaxf(x, 0.0f);
|
||||
return r * r;
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ float op_sqrt(float x) {
|
||||
return sqrtf(x);
|
||||
}
|
||||
|
|
@ -615,3 +620,21 @@ void ggml_cuda_op_unary_mul(ggml_backend_cuda_context & ctx, ggml_tensor * unary
|
|||
GGML_ABORT("Unsupported unary op for fused unary+mul");
|
||||
}
|
||||
}
|
||||
|
||||
/* fused relu + sqr */
|
||||
|
||||
void ggml_cuda_op_relu_sqr(ggml_backend_cuda_context & ctx, ggml_tensor * relu_node, ggml_tensor * sqr_node) {
|
||||
const ggml_tensor * src = relu_node->src[0];
|
||||
cudaStream_t stream = ctx.stream();
|
||||
|
||||
GGML_ASSERT(ggml_is_contiguous(src));
|
||||
GGML_ASSERT(src->type == GGML_TYPE_F32 || src->type == GGML_TYPE_F16);
|
||||
GGML_ASSERT(src->type == sqr_node->type);
|
||||
|
||||
const int k = ggml_nelements(src);
|
||||
if (src->type == GGML_TYPE_F16) {
|
||||
unary_cuda<op_relu_sqr>((const half *)src->data, (half *)sqr_node->data, k, stream);
|
||||
} else {
|
||||
unary_cuda<op_relu_sqr>((const float *)src->data, (float *)sqr_node->data, k, stream);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -91,6 +91,8 @@ void ggml_cuda_op_xielu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
|||
|
||||
void ggml_cuda_op_unary_mul(ggml_backend_cuda_context & ctx, ggml_tensor * unary_node, ggml_tensor * mul_node);
|
||||
|
||||
void ggml_cuda_op_relu_sqr(ggml_backend_cuda_context & ctx, ggml_tensor * relu_node, ggml_tensor * sqr_node);
|
||||
|
||||
__device__ __forceinline__ float ggml_cuda_op_silu_single(float x) {
|
||||
return x / (1.0f + expf(-x));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -820,7 +820,7 @@ ggml_metal_device_t ggml_metal_device_init(int device) {
|
|||
}
|
||||
|
||||
// print MTL GPU family:
|
||||
GGML_LOG_INFO("%s: GPU name: %s\n", __func__, dev->props.name);
|
||||
GGML_LOG_INFO("%s: GPU name: %s (%s)\n", __func__, dev->props.name, dev->props.desc);
|
||||
|
||||
// determine max supported GPU family
|
||||
// https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf
|
||||
|
|
@ -937,13 +937,13 @@ void ggml_metal_device_rsets_keep_alive(ggml_metal_device_t dev) {
|
|||
}
|
||||
|
||||
struct ggml_metal_event {
|
||||
void * obj; // id<MTLEvent>
|
||||
void * obj; // id<MTLSharedEvent>
|
||||
|
||||
atomic_int value;
|
||||
};
|
||||
|
||||
void ggml_metal_event_encode_signal(ggml_metal_event_t ev, ggml_metal_cmd_buf_t cmd_buf_raw) {
|
||||
id<MTLEvent> event = (id<MTLEvent>)ev->obj;
|
||||
id<MTLSharedEvent> event = (id<MTLSharedEvent>)ev->obj;
|
||||
|
||||
id<MTLCommandBuffer> cmd_buf = (id<MTLCommandBuffer>) cmd_buf_raw;
|
||||
|
||||
|
|
@ -951,7 +951,7 @@ void ggml_metal_event_encode_signal(ggml_metal_event_t ev, ggml_metal_cmd_buf_t
|
|||
}
|
||||
|
||||
void ggml_metal_event_encode_wait(ggml_metal_event_t ev, ggml_metal_cmd_buf_t cmd_buf_raw) {
|
||||
id<MTLEvent> event = (id<MTLEvent>)ev->obj;
|
||||
id<MTLSharedEvent> event = (id<MTLSharedEvent>)ev->obj;
|
||||
|
||||
id<MTLCommandBuffer> cmd_buf = (id<MTLCommandBuffer>) cmd_buf_raw;
|
||||
|
||||
|
|
@ -959,7 +959,7 @@ void ggml_metal_event_encode_wait(ggml_metal_event_t ev, ggml_metal_cmd_buf_t cm
|
|||
}
|
||||
|
||||
ggml_metal_event_t ggml_metal_device_event_init(ggml_metal_device_t dev) {
|
||||
id<MTLEvent> event = [dev->mtl_device newEvent];
|
||||
id<MTLSharedEvent> event = [dev->mtl_device newSharedEvent];
|
||||
|
||||
ggml_metal_event_t ev = calloc(1, sizeof(struct ggml_metal_event));
|
||||
|
||||
|
|
@ -970,7 +970,7 @@ ggml_metal_event_t ggml_metal_device_event_init(ggml_metal_device_t dev) {
|
|||
}
|
||||
|
||||
void ggml_metal_device_event_free(ggml_metal_device_t dev, ggml_metal_event_t ev) {
|
||||
id<MTLEvent> event = ev->obj;
|
||||
id<MTLSharedEvent> event = ev->obj;
|
||||
[event release];
|
||||
|
||||
free(ev);
|
||||
|
|
@ -979,14 +979,13 @@ void ggml_metal_device_event_free(ggml_metal_device_t dev, ggml_metal_event_t ev
|
|||
}
|
||||
|
||||
void ggml_metal_device_event_synchronize(ggml_metal_device_t dev, ggml_metal_event_t ev) {
|
||||
@autoreleasepool {
|
||||
id<MTLEvent> event = ev->obj;
|
||||
|
||||
id<MTLCommandBuffer> cmd_buf = [dev->mtl_queue commandBuffer];
|
||||
[cmd_buf encodeWaitForEvent:event value:atomic_load_explicit(&ev->value, memory_order_relaxed)];
|
||||
[cmd_buf commit];
|
||||
[cmd_buf waitUntilCompleted];
|
||||
id<MTLSharedEvent> event = ev->obj;
|
||||
const bool res = [event waitUntilSignaledValue:atomic_load_explicit(&ev->value, memory_order_relaxed) timeoutMS:60000];
|
||||
if (!res) {
|
||||
GGML_ABORT("%s: failed to wait for event\n", __func__);
|
||||
}
|
||||
|
||||
GGML_UNUSED(dev);
|
||||
}
|
||||
|
||||
void ggml_metal_device_get_memory(ggml_metal_device_t dev, size_t * free, size_t * total) {
|
||||
|
|
|
|||
|
|
@ -7672,7 +7672,7 @@ size_t ggml_quantize_chunk(
|
|||
int64_t nrows,
|
||||
int64_t n_per_row,
|
||||
const float * imatrix) {
|
||||
const int64_t n = (int64_t) nrows * n_per_row;
|
||||
const int64_t n = nrows * n_per_row;
|
||||
|
||||
if (ggml_quantize_requires_imatrix(type)) {
|
||||
GGML_ASSERT(imatrix != NULL);
|
||||
|
|
@ -7689,21 +7689,21 @@ size_t ggml_quantize_chunk(
|
|||
size_t result = 0;
|
||||
|
||||
switch (type) {
|
||||
case GGML_TYPE_Q1_0: result = quantize_q1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_Q4_0: result = quantize_q4_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_Q4_1: result = quantize_q4_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_Q5_0: result = quantize_q5_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_Q5_1: result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_Q8_0: result = quantize_q8_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_MXFP4: result = quantize_mxfp4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_NVFP4: result = quantize_nvfp4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_Q2_K: result = quantize_q2_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_Q3_K: result = quantize_q3_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_Q4_K: result = quantize_q4_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_Q5_K: result = quantize_q5_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_Q6_K: result = quantize_q6_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_TQ1_0: result = quantize_tq1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_TQ2_0: result = quantize_tq2_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_Q1_0: result = quantize_q1_0 (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_Q4_0: result = quantize_q4_0 (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_Q4_1: result = quantize_q4_1 (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_Q5_0: result = quantize_q5_0 (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_Q5_1: result = quantize_q5_1 (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_Q8_0: result = quantize_q8_0 (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_MXFP4: result = quantize_mxfp4 (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_NVFP4: result = quantize_nvfp4 (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_Q2_K: result = quantize_q2_K (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_Q3_K: result = quantize_q3_K (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_Q4_K: result = quantize_q4_K (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_Q5_K: result = quantize_q5_K (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_Q6_K: result = quantize_q6_K (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_TQ1_0: result = quantize_tq1_0 (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_TQ2_0: result = quantize_tq2_0 (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_IQ2_XXS: result = quantize_iq2_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_IQ2_XS: result = quantize_iq2_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
case GGML_TYPE_IQ3_XXS: result = quantize_iq3_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||
|
|
@ -7768,10 +7768,10 @@ struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
|
|||
}
|
||||
|
||||
bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
|
||||
if (p0->n_threads != p1->n_threads ) return false;
|
||||
if (p0->prio != p1->prio ) return false;
|
||||
if (p0->poll != p1->poll ) return false;
|
||||
if (p0->strict_cpu != p1->strict_cpu ) return false;
|
||||
if (p0->n_threads != p1->n_threads ) return false;
|
||||
if (p0->prio != p1->prio ) return false;
|
||||
if (p0->poll != p1->poll ) return false;
|
||||
if (p0->strict_cpu != p1->strict_cpu ) return false;
|
||||
return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -281,6 +281,42 @@ json server_chat_convert_responses_to_chatcmpl(const json & response_body) {
|
|||
return chatcmpl_body;
|
||||
}
|
||||
|
||||
// Edits the cch section of an "x-anthropic-billing-header" system prompt.
|
||||
// Does nothing to any other prompt.
|
||||
//
|
||||
// This is a claude message with a "cch=ef01a" attribute that breaks prefix caching.
|
||||
// The cch stamp is a whitebox end-to-end integrity hint. It's not meaningful as a
|
||||
// system prompt data, particularly to llama.cpp, but its presence means the prefix
|
||||
// cache will not get past it: It changes on each request.
|
||||
//
|
||||
// Reference: https://github.com/ggml-org/llama.cpp/pull/21793
|
||||
// Example header:
|
||||
// ```
|
||||
// x-anthropic-billing-header: cc_version=2.1.101.e51; cc_entrypoint=cli; cch=a5145;You are Claude Code, Anthropic's official CLI for Claude.
|
||||
// ^^^^^
|
||||
// ```
|
||||
static void normalize_anthropic_billing_header(std::string & system_text) {
|
||||
if (system_text.rfind("x-anthropic-billing-header:", 0) != 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t header_prefix_length = strlen("x-anthropic-billing-header:");
|
||||
const size_t cch_length = 5;
|
||||
const size_t index_cch = system_text.find("cch=", header_prefix_length);
|
||||
if (index_cch == std::string::npos) {
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t index_replace = index_cch + 4;
|
||||
if (index_replace + cch_length < system_text.length() && system_text[index_replace + cch_length] == ';') {
|
||||
for (size_t i = 0; i < cch_length; ++i) {
|
||||
system_text[index_replace + i] = 'f';
|
||||
}
|
||||
} else {
|
||||
LOG_ERR("anthropic string not as expected: %s", system_text.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
json server_chat_convert_anthropic_to_oai(const json & body) {
|
||||
json oai_body;
|
||||
|
||||
|
|
@ -292,10 +328,13 @@ json server_chat_convert_anthropic_to_oai(const json & body) {
|
|||
|
||||
if (system_param.is_string()) {
|
||||
system_content = system_param.get<std::string>();
|
||||
normalize_anthropic_billing_header(system_content);
|
||||
} else if (system_param.is_array()) {
|
||||
for (const auto & block : system_param) {
|
||||
if (json_value(block, "type", std::string()) == "text") {
|
||||
system_content += json_value(block, "text", std::string());
|
||||
auto system_text = json_value(block, "text", std::string());
|
||||
normalize_anthropic_billing_header(system_text);
|
||||
system_content += system_text;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -475,7 +514,7 @@ json server_chat_convert_anthropic_to_oai(const json & body) {
|
|||
}
|
||||
|
||||
// Pass through common params
|
||||
for (const auto & key : {"temperature", "top_p", "top_k", "stream"}) {
|
||||
for (const auto & key : {"temperature", "top_p", "top_k", "stream", "chat_template_kwargs"}) {
|
||||
if (body.contains(key)) {
|
||||
oai_body[key] = body.at(key);
|
||||
}
|
||||
|
|
@ -535,6 +574,7 @@ json server_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) {
|
|||
|
||||
json convert_transcriptions_to_chatcmpl(
|
||||
const json & inp_body,
|
||||
const common_chat_templates * tmpls,
|
||||
const std::map<std::string, raw_buffer> & in_files,
|
||||
std::vector<raw_buffer> & out_files) {
|
||||
// TODO @ngxson : this function may need to be improved in the future
|
||||
|
|
@ -548,27 +588,29 @@ json convert_transcriptions_to_chatcmpl(
|
|||
}
|
||||
|
||||
// handle input data
|
||||
std::string prompt = json_value(inp_body, "prompt", std::string());
|
||||
std::string language = json_value(inp_body, "language", std::string());
|
||||
std::string prompt = json_value(inp_body, "prompt", std::string());
|
||||
std::string language = json_value(inp_body, "language", std::string());
|
||||
std::string response_format = json_value(inp_body, "response_format", std::string("json"));
|
||||
if (response_format != "json") {
|
||||
throw std::invalid_argument("Only 'json' response_format is supported for transcription");
|
||||
}
|
||||
const common_chat_prompt_preset preset = common_chat_get_asr_prompt(tmpls);
|
||||
if (prompt.empty()) {
|
||||
prompt = "Transcribe audio to text";
|
||||
prompt = preset.user;
|
||||
}
|
||||
if (!language.empty()) {
|
||||
prompt += string_format(" (language: %s)", language.c_str());
|
||||
}
|
||||
prompt += get_media_marker();
|
||||
|
||||
json messages = json::array();
|
||||
if (!preset.system.empty()) {
|
||||
messages.push_back({{"role", "system"}, {"content", preset.system}});
|
||||
}
|
||||
messages.push_back({{"role", "user"}, {"content", prompt}});
|
||||
|
||||
json chatcmpl_body = inp_body; // copy all fields
|
||||
chatcmpl_body["messages"] = json::array({
|
||||
{
|
||||
{"role", "user"},
|
||||
{"content", prompt},
|
||||
},
|
||||
});
|
||||
chatcmpl_body["messages"] = messages;
|
||||
|
||||
// because input from form-data, everything is string, we need to correct the types here
|
||||
std::string stream = json_value(inp_body, "stream", std::string("false"));
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ json server_chat_convert_anthropic_to_oai(const json & body);
|
|||
// convert OpenAI transcriptions API format to OpenAI Chat Completions API format
|
||||
json convert_transcriptions_to_chatcmpl(
|
||||
const json & body,
|
||||
const common_chat_templates * tmpls,
|
||||
const std::map<std::string, raw_buffer> & in_files,
|
||||
std::vector<raw_buffer> & out_files);
|
||||
|
||||
|
|
|
|||
|
|
@ -947,7 +947,9 @@ json oaicompat_chat_params_parse(
|
|||
json response_format = json_value(body, "response_format", json::object());
|
||||
std::string response_type = json_value(response_format, "type", std::string());
|
||||
if (response_type == "json_object") {
|
||||
json_schema = json_value(response_format, "schema", json::object());
|
||||
if (response_format.contains("schema") || json_schema.empty()) {
|
||||
json_schema = json_value(response_format, "schema", json::object());
|
||||
}
|
||||
} else if (response_type == "json_schema") {
|
||||
auto schema_wrapper = json_value(response_format, "json_schema", json::object());
|
||||
json_schema = json_value(schema_wrapper, "schema", json::object());
|
||||
|
|
|
|||
|
|
@ -675,6 +675,10 @@ private:
|
|||
|
||||
int32_t n_ctx; // total context for all clients / slots
|
||||
|
||||
// set to llama_model_n_swa(model)
|
||||
// if swa_full is enabled, this is set to 0 to simulate a non-SWA model
|
||||
int32_t n_swa;
|
||||
|
||||
// slots / clients
|
||||
std::vector<server_slot> slots;
|
||||
|
||||
|
|
@ -719,7 +723,7 @@ private:
|
|||
return;
|
||||
}
|
||||
SLT_INF(slot, "%s", "saving idle slot to prompt cache\n");
|
||||
SLT_DBG(slot, "%s", "__TEST_TAG_CLEAR_IDLE_SLOT__\n");
|
||||
SLT_DBG(slot, "%s", "__TEST_TAG_CACHE_IDLE_SLOT__\n");
|
||||
slot.prompt_save(*prompt_cache);
|
||||
slot.prompt_clear(false);
|
||||
prompt_cache->update();
|
||||
|
|
@ -854,6 +858,8 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
n_swa = params_base.swa_full ? 0 : llama_model_n_swa(model);
|
||||
|
||||
// Necessary similarity of prompt for slot selection
|
||||
slot_prompt_similarity = params_base.slot_prompt_similarity;
|
||||
|
||||
|
|
@ -996,7 +1002,7 @@ private:
|
|||
params_base.cache_idle_slots = false;
|
||||
} else {
|
||||
SRV_INF("%s: idle slots will be saved to prompt cache and cleared upon starting a new task\n", __func__);
|
||||
SRV_DBG("%s", "__TEST_TAG_CLEAR_IDLE_ENABLED__\n");
|
||||
SRV_DBG("%s", "__TEST_TAG_CACHE_IDLE_SLOTS_ENABLED__\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2415,9 +2421,6 @@ private:
|
|||
|
||||
llama_pos pos_next = slot.prompt.tokens.pos_next(n_past);
|
||||
|
||||
// note: when n_swa == 0, the model does not use SWA
|
||||
const auto n_swa = std::max(0, llama_model_n_swa(model));
|
||||
|
||||
// the largest pos_min required for a checkpoint to be useful
|
||||
const auto pos_min_thold = std::max(0, pos_next - n_swa);
|
||||
|
||||
|
|
@ -2589,10 +2592,10 @@ private:
|
|||
// make a checkpoint of the parts of the memory that cannot be rolled back.
|
||||
// checkpoints are created only if:
|
||||
// - the model does not support partial sequence removal
|
||||
// - the model uses SWA and we are not using `swa_full`
|
||||
// - the model uses SWA (and we are not using `swa_full`)
|
||||
do_checkpoint = do_checkpoint && (
|
||||
(slot.ctx_seq_rm_type == COMMON_CONTEXT_SEQ_RM_TYPE_FULL) ||
|
||||
(llama_model_n_swa(model) > 0 && !params_base.swa_full));
|
||||
(n_swa > 0));
|
||||
|
||||
bool has_mtmd = false;
|
||||
|
||||
|
|
@ -3807,6 +3810,7 @@ void server_routes::init_routes() {
|
|||
std::vector<raw_buffer> files;
|
||||
json body = convert_transcriptions_to_chatcmpl(
|
||||
json::parse(req.body),
|
||||
meta->chat_params.tmpls.get(),
|
||||
req.files,
|
||||
files);
|
||||
SRV_DBG("%s\n", "Request converted: OpenAI Transcriptions -> OpenAI Chat Completions");
|
||||
|
|
|
|||
|
|
@ -270,6 +270,7 @@ task_params server_task::params_from_json_cmpl(
|
|||
params.n_indent = json_value(data, "n_indent", defaults.n_indent);
|
||||
params.n_keep = json_value(data, "n_keep", defaults.n_keep);
|
||||
params.n_discard = json_value(data, "n_discard", defaults.n_discard);
|
||||
params.n_discard = std::max(0, params.n_discard);
|
||||
params.n_cmpl = json_value(data, "n_cmpl", json_value(data, "n", 1));
|
||||
params.n_cache_reuse = json_value(data, "n_cache_reuse", defaults.n_cache_reuse);
|
||||
//params.t_max_prompt_ms = json_value(data, "t_max_prompt_ms", defaults.t_max_prompt_ms); // TODO: implement
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ def test_clear_and_restore():
|
|||
log = LogReader(server.log_path)
|
||||
|
||||
# verify feature is enabled
|
||||
assert "__TEST_TAG_CLEAR_IDLE_ENABLED__" in log.drain()
|
||||
assert "__TEST_TAG_CACHE_IDLE_SLOTS_ENABLED__" in log.drain()
|
||||
|
||||
res = server.make_request("POST", "/completion", data={
|
||||
"prompt": LONG_PROMPT,
|
||||
|
|
@ -59,7 +59,7 @@ def test_clear_and_restore():
|
|||
original_prompt_n = res.body["timings"]["prompt_n"]
|
||||
|
||||
# Slot 0 is the only slot with KV — should NOT be cleared
|
||||
assert "__TEST_TAG_CLEAR_IDLE_SLOT__" not in log.drain()
|
||||
assert "__TEST_TAG_CACHE_IDLE_SLOT__" not in log.drain()
|
||||
|
||||
# Launching slot 1 clears idle slot 0
|
||||
res = server.make_request("POST", "/completion", data={
|
||||
|
|
@ -68,7 +68,7 @@ def test_clear_and_restore():
|
|||
"cache_prompt": True,
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert "__TEST_TAG_CLEAR_IDLE_SLOT__" in log.drain()
|
||||
assert "__TEST_TAG_CACHE_IDLE_SLOT__" in log.drain()
|
||||
|
||||
# Re-send same prompt — should restore from cache-ram
|
||||
res = server.make_request("POST", "/completion", data={
|
||||
|
|
@ -86,7 +86,7 @@ def test_clear_and_restore():
|
|||
"cache_prompt": True,
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert "__TEST_TAG_CLEAR_IDLE_SLOT__" not in log.drain()
|
||||
assert "__TEST_TAG_CACHE_IDLE_SLOT__" not in log.drain()
|
||||
|
||||
|
||||
def test_disabled_with_flag():
|
||||
|
|
@ -96,7 +96,7 @@ def test_disabled_with_flag():
|
|||
log = LogReader(server.log_path)
|
||||
|
||||
# Feature should not be enabled
|
||||
assert "__TEST_TAG_CLEAR_IDLE_ENABLED__" not in log.drain()
|
||||
assert "__TEST_TAG_CACHE_IDLE_SLOTS_ENABLED__" not in log.drain()
|
||||
|
||||
res = server.make_request("POST", "/completion", data={
|
||||
"prompt": LONG_PROMPT,
|
||||
|
|
@ -112,4 +112,4 @@ def test_disabled_with_flag():
|
|||
"cache_prompt": True,
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert "__TEST_TAG_CLEAR_IDLE_SLOT__" not in log.drain()
|
||||
assert "__TEST_TAG_CACHE_IDLE_SLOT__" not in log.drain()
|
||||
|
|
|
|||
14
vendor/cpp-httplib/CMakeLists.txt
vendored
14
vendor/cpp-httplib/CMakeLists.txt
vendored
|
|
@ -81,7 +81,7 @@ if (LLAMA_BUILD_BORINGSSL)
|
|||
target_link_libraries(${TARGET} PUBLIC ssl crypto)
|
||||
|
||||
elseif (LLAMA_BUILD_LIBRESSL)
|
||||
set(LIBRESSL_VERSION "4.2.1" CACHE STRING "LibreSSL version")
|
||||
set(LIBRESSL_VERSION "4.3.1" CACHE STRING "LibreSSL version")
|
||||
|
||||
message(STATUS "Fetching LibreSSL version ${LIBRESSL_VERSION}")
|
||||
|
||||
|
|
@ -161,12 +161,24 @@ if(LLAMA_BUILD_BORINGSSL OR LLAMA_BUILD_LIBRESSL)
|
|||
if(LLAMA_BUILD_BORINGSSL)
|
||||
target_compile_options(fipsmodule PRIVATE /w)
|
||||
endif()
|
||||
if(LLAMA_BUILD_LIBRESSL)
|
||||
target_compile_options(ssl_obj PRIVATE /w)
|
||||
target_compile_options(bs_obj PRIVATE /w)
|
||||
target_compile_options(compat_obj PRIVATE /w)
|
||||
target_compile_options(crypto_obj PRIVATE /w)
|
||||
endif()
|
||||
else()
|
||||
target_compile_options(ssl PRIVATE -w)
|
||||
target_compile_options(crypto PRIVATE -w)
|
||||
if(LLAMA_BUILD_BORINGSSL)
|
||||
target_compile_options(fipsmodule PRIVATE -w)
|
||||
endif()
|
||||
if(LLAMA_BUILD_LIBRESSL)
|
||||
target_compile_options(ssl_obj PRIVATE -w)
|
||||
target_compile_options(bs_obj PRIVATE -w)
|
||||
target_compile_options(compat_obj PRIVATE -w)
|
||||
target_compile_options(crypto_obj PRIVATE -w)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue