From 423bee462b46323433e5f1e322e3076cedbeec83 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 4 Feb 2026 15:12:03 +0200 Subject: [PATCH 1/7] ci : fix sanitize workflow to enable ggml sanitizers too (#19323) --- .github/workflows/build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fd251ac4c..8ce679bd9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -293,6 +293,7 @@ jobs: cmake -B build \ -DLLAMA_FATAL_WARNINGS=ON \ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DGGML_SANITIZE_${{ matrix.sanitizer }}=ON \ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} cmake --build build --config ${{ matrix.build_type }} -j $(nproc) @@ -303,6 +304,7 @@ jobs: cmake -B build \ -DLLAMA_FATAL_WARNINGS=ON \ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DGGML_SANITIZE_${{ matrix.sanitizer }}=ON \ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ -DGGML_OPENMP=OFF cmake --build build --config ${{ matrix.build_type }} -j $(nproc) From e0c93af2a03f5c53d052dfaefd86c06ed3784646 Mon Sep 17 00:00:00 2001 From: Xuan-Son Nguyen Date: Wed, 4 Feb 2026 17:55:31 +0100 Subject: [PATCH 2/7] debug: make common_debug_print_tensor readable (#19331) * debug: make common_debug_print_tensor readable * editorconfig --- common/debug.cpp | 34 ++++++++++++++++++---------------- src/models/qwen3next.cpp | 4 ++-- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/common/debug.cpp b/common/debug.cpp index fdaddb144..0df409a79 100644 --- a/common/debug.cpp +++ b/common/debug.cpp @@ -45,6 +45,8 @@ static float common_ggml_get_float_value(const uint8_t * data, return v; } +#define INDENT " " + template void common_debug_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne, const size_t * nb, int64_t n) { GGML_ASSERT(n > 0); @@ -60,41 +62,41 @@ void common_debug_print_tensor(uint8_t * data, ggml_type type, const int64_t * n } } for (int64_t i3 = 0; i3 < ne[3]; i3++) { - LOG_ERR(" [\n"); + LOG(INDENT "[\n"); for (int64_t i2 = 0; i2 < ne[2]; i2++) { if (i2 == n && ne[2] > 2 * n) { - LOG_ERR(" ..., \n"); + LOG(INDENT INDENT "..., \n"); i2 = ne[2] - n; } - LOG_ERR(" [\n"); + LOG(INDENT INDENT "[\n"); for (int64_t i1 = 0; i1 < ne[1]; i1++) { if (i1 == n && ne[1] > 2 * n) { - LOG_ERR(" ..., \n"); + LOG(INDENT INDENT INDENT "..., \n"); i1 = ne[1] - n; } - LOG_ERR(" ["); + LOG(INDENT INDENT INDENT "["); for (int64_t i0 = 0; i0 < ne[0]; i0++) { if (i0 == n && ne[0] > 2 * n) { - LOG_ERR("..., "); + LOG(" ..., "); i0 = ne[0] - n; } const float v = common_ggml_get_float_value(data, type, nb, i0, i1, i2, i3); - LOG_ERR("%12.4f", v); + LOG("%12.4f", v); if (i0 < ne[0] - 1) { - LOG_ERR(", "); + LOG(", "); } } - LOG_ERR("],\n"); + LOG(" ],\n"); } - LOG_ERR(" ],\n"); + LOG(INDENT INDENT "],\n"); } - LOG_ERR(" ]\n"); - LOG_ERR(" sum = %f\n", sum); + LOG(INDENT "]\n"); + LOG(INDENT "sum = %f\n", sum); } if constexpr (abort) { if (std::isnan(sum)) { - LOG_ERR("encountered NaN - aborting\n"); + LOG("encountered NaN - aborting\n"); exit(0); } } @@ -137,9 +139,9 @@ template bool common_debug_cb_eval(struct ggml_tensor * t, b } if (matches_filter) { - LOG_ERR("%s: %24s = (%s) %10s(%s{%s}, %s}) = {%s}\n", __func__, t->name, ggml_type_name(t->type), - ggml_op_desc(t), src0->name, common_ggml_ne_string(src0).c_str(), src1 ? src1_str : "", - common_ggml_ne_string(t).c_str()); + LOG("%s: %24s = (%s) %10s(%s{%s}, %s}) = {%s}\n", __func__, t->name, ggml_type_name(t->type), + ggml_op_desc(t), src0->name, common_ggml_ne_string(src0).c_str(), src1 ? src1_str : "", + common_ggml_ne_string(t).c_str()); } const bool is_host = ggml_backend_buffer_is_host(t->buffer); diff --git a/src/models/qwen3next.cpp b/src/models/qwen3next.cpp index 06d946c5f..99b1a76a4 100644 --- a/src/models/qwen3next.cpp +++ b/src/models/qwen3next.cpp @@ -265,13 +265,13 @@ std::pair llm_build_qwen3next::build_delta_net_chu cb(g_diff, "g_diff", il); // shape: (chunk_size, 1, n_chunks, H_v * n_seqs) ggml_tensor * g_diff_exp = ggml_exp(ctx0, g_diff); - ggml_tensor * g_diff_exp_t = ggml_reshape_4d(ctx0, g_diff_exp, + ggml_tensor * g_diff_exp_t = ggml_reshape_4d(ctx0, g_diff_exp, 1, chunk_size, n_chunks, g_diff_exp->ne[3]); ggml_tensor * key_gdiff = ggml_mul(ctx0, k, g_diff_exp_t); cb(key_gdiff, "key_gdiff", il); // shape: (S_k, chunk_size, n_chunks, H_v * n_seqs) - ggml_tensor * key_gdiff_t = ggml_cont(ctx0, ggml_transpose(ctx0, key_gdiff)); + ggml_tensor * key_gdiff_t = ggml_cont(ctx0, ggml_transpose(ctx0, key_gdiff)); cb(key_gdiff_t, "key_gdiff_t", il); // shape: (chunk_size, S_k, n_chunks, H_v * n_seqs) From b536eb023368701fe3564210440e2df6151c3e65 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Wed, 4 Feb 2026 20:20:40 +0100 Subject: [PATCH 3/7] codeowners : add danbev for examples/debug (#19332) * codeowners : add danbev for examples/debug * Add @pwilkin to CODEOWNERS for debug --------- Co-authored-by: Piotr Wilkin (ilintar) --- CODEOWNERS | 1 + 1 file changed, 1 insertion(+) diff --git a/CODEOWNERS b/CODEOWNERS index e573a3d2e..9d252c9b8 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -27,6 +27,7 @@ /examples/batched.swift/ @ggerganov /examples/batched/ @ggerganov /examples/convert-llama2c-to-ggml/ @ggerganov +/examples/debug/ @danbev @pwilkin /examples/deprecation-warning/ @ggerganov /examples/diffusion/ @am17an /examples/embedding/ @ggerganov From e6e934c5ea1540522171ec91eee05d37909286d1 Mon Sep 17 00:00:00 2001 From: Aaron Teo Date: Thu, 5 Feb 2026 05:15:03 +0800 Subject: [PATCH 4/7] vendor: update cpp-httplib version (#19313) Signed-off-by: Aaron Teo --- scripts/sync_vendor.py | 4 +- vendor/cpp-httplib/httplib.cpp | 238 ++++++++++++++++++++++++++------- vendor/cpp-httplib/httplib.h | 102 ++++++++++++-- 3 files changed, 285 insertions(+), 59 deletions(-) diff --git a/scripts/sync_vendor.py b/scripts/sync_vendor.py index 0771942d4..1ff6a9a40 100755 --- a/scripts/sync_vendor.py +++ b/scripts/sync_vendor.py @@ -12,8 +12,8 @@ vendor = { # "https://github.com/mackron/miniaudio/raw/refs/tags/0.11.23/miniaudio.h": "vendor/miniaudio/miniaudio.h", "https://github.com/mackron/miniaudio/raw/669ed3e844524fcd883231b13095baee9f6de304/miniaudio.h": "vendor/miniaudio/miniaudio.h", - "https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.1/httplib.h": "vendor/cpp-httplib/httplib.h", - "https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.1/LICENSE": "vendor/cpp-httplib/LICENSE", + "https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.2/httplib.h": "vendor/cpp-httplib/httplib.h", + "https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.2/LICENSE": "vendor/cpp-httplib/LICENSE", "https://raw.githubusercontent.com/sheredom/subprocess.h/b49c56e9fe214488493021017bf3954b91c7c1f5/subprocess.h": "vendor/sheredom/subprocess.h", } diff --git a/vendor/cpp-httplib/httplib.cpp b/vendor/cpp-httplib/httplib.cpp index d707e65fd..ba5f9c8ff 100644 --- a/vendor/cpp-httplib/httplib.cpp +++ b/vendor/cpp-httplib/httplib.cpp @@ -117,6 +117,8 @@ time_t parse_http_date(const std::string &date_str) { #ifdef _WIN32 return _mkgmtime(&tm_buf); +#elif defined _AIX + return mktime(&tm_buf); #else return timegm(&tm_buf); #endif @@ -1376,7 +1378,7 @@ int getaddrinfo_with_timeout(const char *node, const char *service, // Allocate on the heap, so the resolver thread can keep using the data. auto state = std::make_shared(); - state->node = node; + if (node) { state->node = node; } state->service = service; state->hints = *hints; @@ -2896,10 +2898,20 @@ bool parse_range_header(const std::string &s, Ranges &ranges) try { return; } - const auto first = - static_cast(lhs.empty() ? -1 : std::stoll(lhs)); - const auto last = - static_cast(rhs.empty() ? -1 : std::stoll(rhs)); + ssize_t first = -1; + if (!lhs.empty()) { + ssize_t v; + auto res = detail::from_chars(lhs.data(), lhs.data() + lhs.size(), v); + if (res.ec == std::errc{}) { first = v; } + } + + ssize_t last = -1; + if (!rhs.empty()) { + ssize_t v; + auto res = detail::from_chars(rhs.data(), rhs.data() + rhs.size(), v); + if (res.ec == std::errc{}) { last = v; } + } + if ((first == -1 && last == -1) || (first != -1 && last != -1 && first > last)) { all_valid_ranges = false; @@ -2974,25 +2986,17 @@ bool parse_accept_header(const std::string &s, return; } -#ifdef CPPHTTPLIB_NO_EXCEPTIONS { - std::istringstream iss(quality_str); - iss >> accept_entry.quality; - - // Check if conversion was successful and entire string was consumed - if (iss.fail() || !iss.eof()) { + double v = 0.0; + auto res = detail::from_chars( + quality_str.data(), quality_str.data() + quality_str.size(), v); + if (res.ec == std::errc{}) { + accept_entry.quality = v; + } else { has_invalid_entry = true; return; } } -#else - try { - accept_entry.quality = std::stod(quality_str); - } catch (...) { - has_invalid_entry = true; - return; - } -#endif // Check if quality is in valid range [0.0, 1.0] if (accept_entry.quality < 0.0 || accept_entry.quality > 1.0) { has_invalid_entry = true; @@ -5570,13 +5574,26 @@ bool Server::read_content(Stream &strm, Request &req, Response &res) { strm, req, res, // Regular [&](const char *buf, size_t n) { + // Prevent arithmetic overflow when checking sizes. + // Avoid computing (req.body.size() + n) directly because + // adding two unsigned `size_t` values can wrap around and + // produce a small result instead of indicating overflow. + // Instead, check using subtraction: ensure `n` does not + // exceed the remaining capacity `max_size() - size()`. + if (req.body.size() >= req.body.max_size() || + n > req.body.max_size() - req.body.size()) { + return false; + } + // Limit decompressed body size to payload_max_length_ to protect // against "zip bomb" attacks where a small compressed payload // decompresses to a massive size. - if (req.body.size() + n > payload_max_length_ || - req.body.size() + n > req.body.max_size()) { + if (payload_max_length_ > 0 && + (req.body.size() >= payload_max_length_ || + n > payload_max_length_ - req.body.size())) { return false; } + req.body.append(buf, n); return true; }, @@ -5666,22 +5683,29 @@ bool Server::read_content_core( // oversized request and fail early (causing connection close). For SSL // builds we cannot reliably peek the decrypted application bytes, so keep // the original behaviour. -#if !defined(CPPHTTPLIB_OPENSSL_SUPPORT) && !defined(_WIN32) +#if !defined(CPPHTTPLIB_OPENSSL_SUPPORT) if (!req.has_header("Content-Length") && !detail::is_chunked_transfer_encoding(req.headers)) { - socket_t s = strm.socket(); - if (s != INVALID_SOCKET) { - // Peek up to payload_max_length_ + 1 bytes. If more than - // payload_max_length_ bytes are pending, reject the request. - size_t to_peek = - (payload_max_length_ > 0) - ? (std::min)(payload_max_length_ + 1, static_cast(4096)) - : 1; - std::vector peekbuf(to_peek); - ssize_t n = ::recv(s, peekbuf.data(), to_peek, MSG_PEEK); - if (n > 0 && static_cast(n) > payload_max_length_) { - // Indicate failure so connection will be closed. - return false; + // Only peek if payload_max_length is set to a finite value + if (payload_max_length_ > 0 && + payload_max_length_ < (std::numeric_limits::max)()) { + socket_t s = strm.socket(); + if (s != INVALID_SOCKET) { + // Peek to check if there is any pending data + char peekbuf[1]; + ssize_t n = ::recv(s, peekbuf, 1, MSG_PEEK); + if (n > 0) { + // There is data, so read it with payload limit enforcement + auto result = detail::read_content_without_length( + strm, payload_max_length_, out); + if (result == detail::ReadContentResult::PayloadTooLarge) { + res.status = StatusCode::PayloadTooLarge_413; + return false; + } else if (result != detail::ReadContentResult::Success) { + return false; + } + return true; + } } } return true; @@ -6656,7 +6680,8 @@ void ClientImpl::close_socket(Socket &socket) { } bool ClientImpl::read_response_line(Stream &strm, const Request &req, - Response &res) const { + Response &res, + bool skip_100_continue) const { std::array buf{}; detail::stream_line_reader line_reader(strm, buf.data(), buf.size()); @@ -6677,8 +6702,8 @@ bool ClientImpl::read_response_line(Stream &strm, const Request &req, res.status = std::stoi(std::string(m[2])); res.reason = std::string(m[3]); - // Ignore '100 Continue' - while (res.status == StatusCode::Continue_100) { + // Ignore '100 Continue' (only when not using Expect: 100-continue explicitly) + while (skip_100_continue && res.status == StatusCode::Continue_100) { if (!line_reader.getline()) { return false; } // CRLF if (!line_reader.getline()) { return false; } // next response line @@ -7463,7 +7488,8 @@ bool ClientImpl::write_content_with_provider(Stream &strm, } bool ClientImpl::write_request(Stream &strm, Request &req, - bool close_connection, Error &error) { + bool close_connection, Error &error, + bool skip_body) { // Prepare additional headers if (close_connection) { if (!req.has_header("Connection")) { @@ -7582,7 +7608,59 @@ bool ClientImpl::write_request(Stream &strm, Request &req, } } + // After sending request line and headers, wait briefly for an early server + // response (e.g. 4xx) and avoid sending a potentially large request body + // unnecessarily. This workaround is only enabled on Windows because Unix + // platforms surface write errors (EPIPE) earlier; on Windows kernel send + // buffering can accept large writes even when the peer already responded. + // Check the stream first (which covers SSL via `is_readable()`), then + // fall back to select on the socket. Only perform the wait for very large + // request bodies to avoid interfering with normal small requests and + // reduce side-effects. Poll briefly (up to 50ms as default) for an early + // response. Skip this check when using Expect: 100-continue, as the protocol + // handles early responses properly. +#if defined(_WIN32) + if (!skip_body && + req.body.size() > CPPHTTPLIB_WAIT_EARLY_SERVER_RESPONSE_THRESHOLD && + req.path.size() > CPPHTTPLIB_REQUEST_URI_MAX_LENGTH) { + auto start = std::chrono::high_resolution_clock::now(); + + for (;;) { + // Prefer socket-level readiness to avoid SSL_pending() false-positives + // from SSL internals. If the underlying socket is readable, assume an + // early response may be present. + auto sock = strm.socket(); + if (sock != INVALID_SOCKET && detail::select_read(sock, 0, 0) > 0) { + return false; + } + + // Fallback to stream-level check for non-socket streams or when the + // socket isn't reporting readable. Avoid using `is_readable()` for + // SSL, since `SSL_pending()` may report buffered records that do not + // indicate a complete application-level response yet. + if (!is_ssl() && strm.is_readable()) { return false; } + + auto now = std::chrono::high_resolution_clock::now(); + auto elapsed = + std::chrono::duration_cast(now - start) + .count(); + if (elapsed >= CPPHTTPLIB_WAIT_EARLY_SERVER_RESPONSE_TIMEOUT_MSECOND) { + break; + } + + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + } +#endif + // Body + if (skip_body) { return true; } + + return write_request_body(strm, req, error); +} + +bool ClientImpl::write_request_body(Stream &strm, Request &req, + Error &error) { if (req.body.empty()) { return write_content_with_provider(strm, req, error); } @@ -7758,8 +7836,20 @@ void ClientImpl::output_error_log(const Error &err, bool ClientImpl::process_request(Stream &strm, Request &req, Response &res, bool close_connection, Error &error) { - // Send request - if (!write_request(strm, req, close_connection, error)) { return false; } + // Auto-add Expect: 100-continue for large bodies + if (CPPHTTPLIB_EXPECT_100_THRESHOLD > 0 && !req.has_header("Expect")) { + auto body_size = req.body.empty() ? req.content_length_ : req.body.size(); + if (body_size >= CPPHTTPLIB_EXPECT_100_THRESHOLD) { + req.set_header("Expect", "100-continue"); + } + } + + // Check for Expect: 100-continue + auto expect_100_continue = req.get_header_value("Expect") == "100-continue"; + + // Send request (skip body if using Expect: 100-continue) + auto write_request_success = + write_request(strm, req, close_connection, error, expect_100_continue); #ifdef CPPHTTPLIB_OPENSSL_SUPPORT if (is_ssl()) { @@ -7774,14 +7864,48 @@ bool ClientImpl::process_request(Stream &strm, Request &req, } #endif + // Handle Expect: 100-continue with timeout + if (expect_100_continue && CPPHTTPLIB_EXPECT_100_TIMEOUT_MSECOND > 0) { + time_t sec = CPPHTTPLIB_EXPECT_100_TIMEOUT_MSECOND / 1000; + time_t usec = (CPPHTTPLIB_EXPECT_100_TIMEOUT_MSECOND % 1000) * 1000; + auto ret = detail::select_read(strm.socket(), sec, usec); + if (ret <= 0) { + // Timeout or error: send body anyway (server didn't respond in time) + if (!write_request_body(strm, req, error)) { return false; } + expect_100_continue = false; // Switch to normal response handling + } + } + // Receive response and headers - if (!read_response_line(strm, req, res) || + // When using Expect: 100-continue, don't auto-skip `100 Continue` response + if (!read_response_line(strm, req, res, !expect_100_continue) || !detail::read_headers(strm, res.headers)) { - error = Error::Read; + if (write_request_success) { error = Error::Read; } output_error_log(error, &req); return false; } + if (!write_request_success) { return false; } + + // Handle Expect: 100-continue response + if (expect_100_continue) { + if (res.status == StatusCode::Continue_100) { + // Server accepted, send the body + if (!write_request_body(strm, req, error)) { return false; } + + // Read the actual response + res.headers.clear(); + res.body.clear(); + if (!read_response_line(strm, req, res) || + !detail::read_headers(strm, res.headers)) { + error = Error::Read; + output_error_log(error, &req); + return false; + } + } + // If not 100 Continue, server returned an error; proceed with that response + } + // Body if ((res.status != StatusCode::NoContent_204) && req.method != "HEAD" && req.method != "CONNECT") { @@ -9543,7 +9667,7 @@ bool SSLClient::load_certs() { last_openssl_error_ = ERR_get_error(); ret = false; } - } else { + } else if (!ca_cert_store_) { auto loaded = false; #ifdef _WIN32 loaded = @@ -9790,7 +9914,11 @@ bool SSLClient::verify_host_with_common_name(X509 *server_cert) const { bool SSLClient::check_host_name(const char *pattern, size_t pattern_len) const { - if (host_.size() == pattern_len && host_ == pattern) { return true; } + // Exact match (case-insensitive) + if (host_.size() == pattern_len && + detail::case_ignore::equal(host_, std::string(pattern, pattern_len))) { + return true; + } // Wildcard match // https://bugs.launchpad.net/ubuntu/+source/firefox-3.0/+bug/376484 @@ -9805,9 +9933,23 @@ bool SSLClient::check_host_name(const char *pattern, auto itr = pattern_components.begin(); for (const auto &h : host_components_) { auto &p = *itr; - if (p != h && p != "*") { - auto partial_match = (p.size() > 0 && p[p.size() - 1] == '*' && - !p.compare(0, p.size() - 1, h)); + if (!httplib::detail::case_ignore::equal(p, h) && p != "*") { + bool partial_match = false; + if (!p.empty() && p[p.size() - 1] == '*') { + const auto prefix_length = p.size() - 1; + if (prefix_length == 0) { + partial_match = true; + } else if (h.size() >= prefix_length) { + partial_match = + std::equal(p.begin(), + p.begin() + static_cast( + prefix_length), + h.begin(), [](const char ca, const char cb) { + return httplib::detail::case_ignore::to_lower(ca) == + httplib::detail::case_ignore::to_lower(cb); + }); + } + } if (!partial_match) { return false; } } ++itr; diff --git a/vendor/cpp-httplib/httplib.h b/vendor/cpp-httplib/httplib.h index 613020d12..7c7790f41 100644 --- a/vendor/cpp-httplib/httplib.h +++ b/vendor/cpp-httplib/httplib.h @@ -8,8 +8,8 @@ #ifndef CPPHTTPLIB_HTTPLIB_H #define CPPHTTPLIB_HTTPLIB_H -#define CPPHTTPLIB_VERSION "0.30.1" -#define CPPHTTPLIB_VERSION_NUM "0x001E01" +#define CPPHTTPLIB_VERSION "0.30.2" +#define CPPHTTPLIB_VERSION_NUM "0x001E02" /* * Platform compatibility check @@ -98,6 +98,22 @@ #define CPPHTTPLIB_CLIENT_MAX_TIMEOUT_MSECOND 0 #endif +#ifndef CPPHTTPLIB_EXPECT_100_THRESHOLD +#define CPPHTTPLIB_EXPECT_100_THRESHOLD 1024 +#endif + +#ifndef CPPHTTPLIB_EXPECT_100_TIMEOUT_MSECOND +#define CPPHTTPLIB_EXPECT_100_TIMEOUT_MSECOND 1000 +#endif + +#ifndef CPPHTTPLIB_WAIT_EARLY_SERVER_RESPONSE_THRESHOLD +#define CPPHTTPLIB_WAIT_EARLY_SERVER_RESPONSE_THRESHOLD (1024 * 1024) +#endif + +#ifndef CPPHTTPLIB_WAIT_EARLY_SERVER_RESPONSE_TIMEOUT_MSECOND +#define CPPHTTPLIB_WAIT_EARLY_SERVER_RESPONSE_TIMEOUT_MSECOND 50 +#endif + #ifndef CPPHTTPLIB_IDLE_INTERVAL_SECOND #define CPPHTTPLIB_IDLE_INTERVAL_SECOND 0 #endif @@ -286,8 +302,10 @@ using socket_t = int; #include #include #include +#include #include #include +#include #include #include #include @@ -305,6 +323,7 @@ using socket_t = int; #include #include #include +#include #include #include #include @@ -494,6 +513,69 @@ private: bool execute_on_destruction; }; +// Simple from_chars implementation for integer and double types (C++17 +// substitute) +template struct from_chars_result { + const char *ptr; + std::errc ec; +}; + +template +inline from_chars_result from_chars(const char *first, const char *last, + T &value, int base = 10) { + value = 0; + const char *p = first; + bool negative = false; + + if (p != last && *p == '-') { + negative = true; + ++p; + } + if (p == last) { return {first, std::errc::invalid_argument}; } + + T result = 0; + for (; p != last; ++p) { + char c = *p; + int digit = -1; + if ('0' <= c && c <= '9') { + digit = c - '0'; + } else if ('a' <= c && c <= 'z') { + digit = c - 'a' + 10; + } else if ('A' <= c && c <= 'Z') { + digit = c - 'A' + 10; + } else { + break; + } + + if (digit < 0 || digit >= base) { break; } + if (result > ((std::numeric_limits::max)() - digit) / base) { + return {p, std::errc::result_out_of_range}; + } + result = result * base + digit; + } + + if (p == first || (negative && p == first + 1)) { + return {first, std::errc::invalid_argument}; + } + + value = negative ? -result : result; + return {p, std::errc{}}; +} + +// from_chars for double (simple wrapper for strtod) +inline from_chars_result from_chars(const char *first, const char *last, + double &value) { + std::string s(first, last); + char *endptr = nullptr; + errno = 0; + value = std::strtod(s.c_str(), &endptr); + if (endptr == s.c_str()) { return {first, std::errc::invalid_argument}; } + if (errno == ERANGE) { + return {first + (endptr - s.c_str()), std::errc::result_out_of_range}; + } + return {first + (endptr - s.c_str()), std::errc{}}; +} + } // namespace detail enum SSLVerifierResponse { @@ -1848,10 +1930,11 @@ private: Result send_(Request &&req); socket_t create_client_socket(Error &error) const; - bool read_response_line(Stream &strm, const Request &req, - Response &res) const; + bool read_response_line(Stream &strm, const Request &req, Response &res, + bool skip_100_continue = true) const; bool write_request(Stream &strm, Request &req, bool close_connection, - Error &error); + Error &error, bool skip_body = false); + bool write_request_body(Stream &strm, Request &req, Error &error); void prepare_default_headers(Request &r, bool for_stream, const std::string &ct); bool redirect(Request &req, Response &res, Error &error); @@ -3243,10 +3326,11 @@ private: msg.id = value; } else if (field == "retry") { // Parse retry interval in milliseconds - try { - retry_ms = std::stoi(value); - } catch (...) { - // Invalid retry value, ignore + { + int v = 0; + auto res = + detail::from_chars(value.data(), value.data() + value.size(), v); + if (res.ec == std::errc{}) { retry_ms = v; } } } // Unknown fields are ignored per SSE spec From 11fb327bf3846f390e3af1cbe929da9287c618da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Thu, 5 Feb 2026 02:27:38 +0100 Subject: [PATCH 5/7] vendor : add missing llama_add_compile_flags (#19322) * add missing llama_add_compile_flags * disable all warnings for ssl, crypto and fipsmodule --- vendor/cpp-httplib/CMakeLists.txt | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/vendor/cpp-httplib/CMakeLists.txt b/vendor/cpp-httplib/CMakeLists.txt index 3d938d9f3..18974d64c 100644 --- a/vendor/cpp-httplib/CMakeLists.txt +++ b/vendor/cpp-httplib/CMakeLists.txt @@ -3,9 +3,14 @@ license_add_file("cpp-httplib" "LICENSE") find_package(Threads REQUIRED) +llama_add_compile_flags() + add_library(${TARGET} STATIC httplib.cpp httplib.h) -if (NOT MSVC) - # disable warnings in 3rd party code + +# disable warnings in 3rd party code +if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + target_compile_options(${TARGET} PRIVATE /w) +else() target_compile_options(${TARGET} PRIVATE -w) endif() @@ -146,6 +151,23 @@ elseif (LLAMA_OPENSSL) endif() endif() +# disable warnings in 3rd party code +if(LLAMA_BUILD_BORINGSSL OR LLAMA_BUILD_LIBRESSL) + if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + target_compile_options(ssl PRIVATE /w) + target_compile_options(crypto PRIVATE /w) + if(LLAMA_BUILD_BORINGSSL) + target_compile_options(fipsmodule PRIVATE /w) + endif() + else() + target_compile_options(ssl PRIVATE -w) + target_compile_options(crypto PRIVATE -w) + if(LLAMA_BUILD_BORINGSSL) + target_compile_options(fipsmodule PRIVATE -w) + endif() + endif() +endif() + if (CPPHTTPLIB_OPENSSL_SUPPORT) target_compile_definitions(${TARGET} PUBLIC CPPHTTPLIB_OPENSSL_SUPPORT) # used in server.cpp if (APPLE AND CMAKE_SYSTEM_NAME STREQUAL "Darwin") From af252d0758b0d3ed67c57bad2a735abf53d21c55 Mon Sep 17 00:00:00 2001 From: will-lms Date: Thu, 5 Feb 2026 01:05:09 -0500 Subject: [PATCH 6/7] metal : add missing includes (#19348) --- ggml/src/ggml-metal/ggml-metal.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ggml/src/ggml-metal/ggml-metal.cpp b/ggml/src/ggml-metal/ggml-metal.cpp index a616dcdb4..1c705362f 100644 --- a/ggml/src/ggml-metal/ggml-metal.cpp +++ b/ggml/src/ggml-metal/ggml-metal.cpp @@ -7,6 +7,9 @@ #include "ggml-metal-context.h" #include "ggml-metal-ops.h" +#include +#include + #define GGML_METAL_NAME "MTL" #define GGML_METAL_MAX_DEVICES 16 From c342c3b93de358a4571941b41c35dc5ba2081145 Mon Sep 17 00:00:00 2001 From: Jeff Bolz Date: Thu, 5 Feb 2026 01:38:59 -0600 Subject: [PATCH 7/7] vulkan: fix non-contig rope (#19299) --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 32 ++++-- .../ggml-vulkan/vulkan-shaders/rms_norm.comp | 5 +- .../vulkan-shaders/rope_funcs.glsl | 99 +++++++------------ .../vulkan-shaders/rope_multi.comp | 11 ++- .../ggml-vulkan/vulkan-shaders/rope_neox.comp | 11 ++- .../ggml-vulkan/vulkan-shaders/rope_norm.comp | 11 ++- .../vulkan-shaders/rope_params.glsl | 15 ++- .../vulkan-shaders/rope_vision.comp | 11 ++- 8 files changed, 100 insertions(+), 95 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index cb7fa2c9c..af57685a3 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -1263,25 +1263,30 @@ struct vk_op_diag_mask_push_constants { struct vk_op_rope_push_constants { uint32_t rope_mode; - uint32_t ncols; uint32_t nrows; uint32_t n_dims; float freq_scale; - uint32_t p_delta_rows; float freq_base; float ext_factor; float attn_factor; float corr_dims[2]; float theta_scale; uint32_t has_ff; - uint32_t ne02; - uint32_t s1; - uint32_t s2; int32_t sections[4]; uint32_t is_imrope; uint32_t is_back; uint32_t set_rows_stride; + uint32_t ne00; + uint32_t ne01; + uint32_t ne02; + uint32_t nb01; + uint32_t nb02; + uint32_t nb03; + uint32_t nb11; + uint32_t nb12; + uint32_t nb13; }; +static_assert(sizeof(vk_op_rope_push_constants) <= 128, "sizeof(vk_op_rope_push_constants) must be <= 128"); // For fused rms_norm+mul+rope(+view+set_rows) struct vk_op_rms_norm_mul_rope_push_constants { @@ -10405,12 +10410,22 @@ static vk_op_rope_push_constants ggml_vk_make_rope_constants(const ggml_tensor * uint32_t nb01 = src0->nb[1] / ggml_type_size(src0->type); uint32_t nb02 = src0->nb[2] / ggml_type_size(src0->type); + uint32_t nb03 = src0->nb[3] / ggml_type_size(src0->type); + + uint32_t nb11 = dst->nb[1] / ggml_type_size(dst->type); + uint32_t nb12 = dst->nb[2] / ggml_type_size(dst->type); + uint32_t nb13 = dst->nb[3] / ggml_type_size(dst->type); vk_op_rope_push_constants rope { - (uint32_t)mode, (uint32_t)src0->ne[0], (uint32_t)ggml_nrows(src0), (uint32_t)n_dims, freq_scale, (uint32_t)src0->ne[1], - freq_base, ext_factor, attn_factor, {corr_dims[0], corr_dims[1]}, theta_scale, - has_ff, (uint32_t)src0->ne[2], nb01, nb02, + (uint32_t)mode, (uint32_t)ggml_nrows(src0), (uint32_t)n_dims, freq_scale, + freq_base, ext_factor, attn_factor, {corr_dims[0], corr_dims[1]}, theta_scale, has_ff, { sections[0], sections[1], sections[2], sections[3] }, is_imrope, backprop, set_rows_stride, + + (uint32_t)src0->ne[0], + (uint32_t)src0->ne[1], + (uint32_t)src0->ne[2], + nb01, nb02, nb03, + nb11, nb12, nb13, }; return rope; @@ -14798,6 +14813,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm case GGML_OP_REPEAT_BACK: return op->type == GGML_TYPE_F32 && op->src[0]->type == GGML_TYPE_F32; case GGML_OP_ROPE: + return ggml_is_contiguous_rows(op) && ggml_is_contiguous_rows(op->src[0]); case GGML_OP_ROPE_BACK: case GGML_OP_NONE: case GGML_OP_RESHAPE: diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp index 9d6d36654..55b89f19a 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp @@ -112,12 +112,11 @@ void rms_norm(uint num_iters) { #if RMS_NORM_ROPE_FUSION barrier(); rope_params rp = p.rope; - uint rope_row = (samp*nchannels + channel)*nrows + row; for (uint t = 2*tid; t < ncols; t += 2*BLOCK_SIZE) { if (rp.rope_mode == GGML_ROPE_TYPE_NEOX) { - rope_neox(t, rope_row, rp); + rope_neox(t, row, channel, samp, rp); } else if (rp.rope_mode == GGML_ROPE_TYPE_NORMAL) { - rope_norm(t, rope_row, rp); + rope_norm(t, row, channel, samp, rp); } } #endif diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl b/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl index aacec9846..2e5345990 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rope_funcs.glsl @@ -4,12 +4,12 @@ float rope_yarn_ramp(const float low, const float high, const uint i0) { return 1.0f - min(1.0f, max(0.0f, y)); } -uint rope_a_coord(const uint i0, const uint i01, const uint i02, rope_params p) { +uint rope_a_coord(const uint i0, const uint i01, const uint i02, const uint i03, rope_params p) { #if RMS_NORM_ROPE_FUSION // Per-row offset in shared memory const uint ix = i0; #else - const uint ix = i02*p.nb02 + i01*p.nb01 + i0; + const uint ix = i03*p.nb03 + i02*p.nb02 + i01*p.nb01 + i0; #endif return ix; } @@ -34,26 +34,19 @@ void rope_yarn(const float theta_extrap, const uint i0, out float cos_theta, out sin_theta = sin(theta) * mscale; } -void rope_norm(const uint i0, const uint i1, rope_params p) { - uint ne0 = p.ncols; - uint ne1 = p.p_delta_rows; - - if (i0 >= ne0) { +void rope_norm(const uint i0, const uint i1, const uint i2, const uint i3, rope_params p) { + if (i0 >= p.ne00) { return; } - // i1 is actually i2*nb2+i1, but the rows are contiguous - const uint i01 = i1 % ne1; - const uint i02 = i1 / ne1; - - uint idst = i1*ne0 + i0; - const uint ix = rope_a_coord(i0, i01, i02, p); + uint idst = i0 + i1 * p.nb11 + i2 * p.nb12 + i3 * p.nb13; + const uint ix = rope_a_coord(i0, i1, i2, i3, p); // Fusion optimization: ROPE + VIEW + SET_ROWS. // The rope output is viewed as a 1D tensor and offset based on a row index in rope_data_i. if (p.set_rows_stride != 0) { - idst = i01*ne0 + i0; - idst += rope_data_i[i02].x * p.set_rows_stride; + idst = i1*p.nb11 + i0; + idst += rope_data_i[i2].x * p.set_rows_stride; } if (i0 >= p.n_dims) { @@ -63,7 +56,7 @@ void rope_norm(const uint i0, const uint i1, rope_params p) { return; } - const float theta_base = rope_data_pos[i02] * pow(p.theta_scale, i0/2.0f); + const float theta_base = rope_data_pos[i2] * pow(p.theta_scale, i0/2.0f); const float freq_factor = p.has_ff != 0 ? rope_data_ff[i0/2] : 1.0f; @@ -77,25 +70,19 @@ void rope_norm(const uint i0, const uint i1, rope_params p) { rope_data_d[idst + 1] = ROPE_D_TYPE(x0*sin_theta + x1*cos_theta); } -void rope_neox(const uint i0, const uint i1, rope_params p) { - uint ne0 = p.ncols; - uint ne1 = p.p_delta_rows; - - if (i0 >= ne0) { +void rope_neox(const uint i0, const uint i1, const uint i2, const uint i3, rope_params p) { + if (i0 >= p.ne00) { return; } - const uint i01 = i1 % ne1; - const uint i02 = i1 / ne1; - - uint idst = i1*ne0 + i0/2; - const uint ix = rope_a_coord(i0/2, i01, i02, p); + uint idst = i0/2 + i1 * p.nb11 + i2 * p.nb12 + i3 * p.nb13; + const uint ix = rope_a_coord(i0/2, i1, i2, i3, p); // Fusion optimization: ROPE + VIEW + SET_ROWS. // The rope output is viewed as a 1D tensor and offset based on a row index in rope_data_i. if (p.set_rows_stride != 0) { - idst = i01*ne0 + i0/2; - idst += rope_data_i[i02].x * p.set_rows_stride; + idst = i1*p.nb11 + i0/2; + idst += rope_data_i[i2].x * p.set_rows_stride; } if (i0 >= p.n_dims) { @@ -105,7 +92,7 @@ void rope_neox(const uint i0, const uint i1, rope_params p) { return; } - const float theta_base = rope_data_pos[i02] * pow(p.theta_scale, i0/2.0f); + const float theta_base = rope_data_pos[i2] * pow(p.theta_scale, i0/2.0f); const float freq_factor = p.has_ff != 0 ? rope_data_ff[i0/2] : 1.0f; @@ -120,26 +107,19 @@ void rope_neox(const uint i0, const uint i1, rope_params p) { } -void rope_multi(const uint i0, const uint i1, rope_params p) { - uint ne0 = p.ncols; - uint ne1 = p.p_delta_rows; - uint ne2 = p.ne02; - - if (i0 >= ne0) { +void rope_multi(const uint i0, const uint i1, const uint i2, const uint i3, rope_params p) { + if (i0 >= p.ne00) { return; } - const uint i01 = i1 % ne1; - const uint i02 = i1 / ne1; - - uint idst = i1*ne0 + i0/2; - const uint ix = rope_a_coord(i0/2, i01, i02, p); + uint idst = i0/2 + i1 * p.nb11 + i2 * p.nb12 + i3 * p.nb13; + const uint ix = rope_a_coord(i0/2, i1, i2, i3, p); // Fusion optimization: ROPE + VIEW + SET_ROWS. // The rope output is viewed as a 1D tensor and offset based on a row index in rope_data_i. if (p.set_rows_stride != 0) { - idst = i01*ne0 + i0/2; - idst += rope_data_i[i02].x * p.set_rows_stride; + idst = i1*p.nb11 + i0/2; + idst += rope_data_i[i2].x * p.set_rows_stride; } if (i0 >= p.n_dims) { @@ -156,26 +136,26 @@ void rope_multi(const uint i0, const uint i1, rope_params p) { float theta_base = 0.0; if (p.is_imrope != 0) { if (sector % 3 == 1 && sector < 3 * p.sections[1]) { - theta_base = rope_data_pos[i02 + ne2 * 1]*pow(p.theta_scale, i0/2.0f); + theta_base = rope_data_pos[i2 + p.ne02 * 1]*pow(p.theta_scale, i0/2.0f); } else if (sector % 3 == 2 && sector < 3 * p.sections[2]) { - theta_base = rope_data_pos[i02 + ne2 * 2]*pow(p.theta_scale, i0/2.0f); + theta_base = rope_data_pos[i2 + p.ne02 * 2]*pow(p.theta_scale, i0/2.0f); } else if (sector % 3 == 0 && sector < 3 * p.sections[0]) { - theta_base = rope_data_pos[i02]*pow(p.theta_scale, i0/2.0f); + theta_base = rope_data_pos[i2]*pow(p.theta_scale, i0/2.0f); } else { - theta_base = rope_data_pos[i02 + ne2 * 3]*pow(p.theta_scale, i0/2.0f); + theta_base = rope_data_pos[i2 + p.ne02 * 3]*pow(p.theta_scale, i0/2.0f); } } else { if (sector < p.sections[0]) { - theta_base = rope_data_pos[i02]*pow(p.theta_scale, i0/2.0f); + theta_base = rope_data_pos[i2]*pow(p.theta_scale, i0/2.0f); } else if (sector >= p.sections[0] && sector < sec_w) { - theta_base = rope_data_pos[i02 + ne2 * 1]*pow(p.theta_scale, i0/2.0f); + theta_base = rope_data_pos[i2 + p.ne02 * 1]*pow(p.theta_scale, i0/2.0f); } else if (sector >= sec_w && sector < sec_w + p.sections[2]) { - theta_base = rope_data_pos[i02 + ne2 * 2]*pow(p.theta_scale, i0/2.0f); + theta_base = rope_data_pos[i2 + p.ne02 * 2]*pow(p.theta_scale, i0/2.0f); } else if (sector >= sec_w + p.sections[2]) { - theta_base = rope_data_pos[i02 + ne2 * 3]*pow(p.theta_scale, i0/2.0f); + theta_base = rope_data_pos[i2 + p.ne02 * 3]*pow(p.theta_scale, i0/2.0f); } } @@ -191,20 +171,13 @@ void rope_multi(const uint i0, const uint i1, rope_params p) { rope_data_d[idst + p.n_dims/2] = ROPE_D_TYPE(x0*sin_theta + x1*cos_theta); } -void rope_vision(const uint i0, const uint i1, rope_params p) { - uint ne0 = p.ncols; - uint ne1 = p.p_delta_rows; - uint ne2 = p.ne02; - - if (i0 >= ne0) { +void rope_vision(const uint i0, const uint i1, const uint i2, const uint i3, rope_params p) { + if (i0 >= p.ne00) { return; } - const uint i01 = i1 % ne1; - const uint i02 = i1 / ne1; - - const uint idst = i1*ne0 + i0/2; - const uint ix = rope_a_coord(i0/2, i01, i02, p); + const uint idst = i0/2 + i1 * p.nb11 + i2 * p.nb12 + i3 * p.nb13; + const uint ix = rope_a_coord(i0/2, i1, i2, i3, p); const int sect_dims = p.sections[0] + p.sections[1]; const int sec_w = p.sections[1] + p.sections[0]; @@ -213,11 +186,11 @@ void rope_vision(const uint i0, const uint i1, rope_params p) { float theta_base = 0.0; if (sector < p.sections[0]) { const uint p0 = sector; - theta_base = rope_data_pos[i02]*pow(p.theta_scale, p0); + theta_base = rope_data_pos[i2]*pow(p.theta_scale, p0); } else if (sector >= p.sections[0] && sector < sec_w) { const uint p0 = sector - p.sections[0]; - theta_base = rope_data_pos[i02 + ne2]*pow(p.theta_scale, p0); + theta_base = rope_data_pos[i2 + p.ne02]*pow(p.theta_scale, p0); } const float freq_factor = p.has_ff != 0 ? rope_data_ff[i0/2] : 1.0f; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp index f7587468a..1528fbeea 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp @@ -5,10 +5,13 @@ void main() { const uint i0 = 2*gl_GlobalInvocationID.y; - // i1 is actually i2*nb2+i1, but the rows are contiguous - const uint i1 = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z; - if (i1 >= pc.nrows) { + const uint row = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z; + if (row >= pc.nrows) { return; } - rope_multi(i0, i1, pc); + const uint i3 = row / (pc.ne01*pc.ne02); + const uint i2 = (row - i3 * pc.ne01*pc.ne02) / pc.ne01; + const uint i1 = (row - i3 * pc.ne01*pc.ne02 - i2 * pc.ne01); + + rope_multi(i0, i1, i2, i3, pc); } diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp index acb8ed781..ad0896095 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp @@ -5,10 +5,13 @@ void main() { const uint i0 = 2*gl_GlobalInvocationID.y; - // i1 is actually i2*nb2+i1, but the rows are contiguous - const uint i1 = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z; - if (i1 >= pc.nrows) { + const uint row = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z; + if (row >= pc.nrows) { return; } - rope_neox(i0, i1, pc); + const uint i3 = row / (pc.ne01*pc.ne02); + const uint i2 = (row - i3 * pc.ne01*pc.ne02) / pc.ne01; + const uint i1 = (row - i3 * pc.ne01*pc.ne02 - i2 * pc.ne01); + + rope_neox(i0, i1, i2, i3, pc); } diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp index 0033cdb22..11220817d 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp @@ -5,10 +5,13 @@ void main() { const uint i0 = 2*gl_GlobalInvocationID.y; - // i1 is actually i2*nb2+i1, but the rows are contiguous - const uint i1 = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z; - if (i1 >= pc.nrows) { + const uint row = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z; + if (row >= pc.nrows) { return; } - rope_norm(i0, i1, pc); + const uint i3 = row / (pc.ne01*pc.ne02); + const uint i2 = (row - i3 * pc.ne01*pc.ne02) / pc.ne01; + const uint i1 = (row - i3 * pc.ne01*pc.ne02 - i2 * pc.ne01); + + rope_norm(i0, i1, i2, i3, pc); } diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl b/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl index 939cf3c51..ec6ceaca9 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rope_params.glsl @@ -5,24 +5,29 @@ struct rope_params { uint rope_mode; - uint ncols; uint nrows; uint n_dims; float freq_scale; - uint p_delta_rows; float freq_base; float ext_factor; float attn_factor; float corr_dims[2]; float theta_scale; uint has_ff; - uint ne02; - uint nb01; - uint nb02; int sections[4]; uint is_imrope; uint is_back; uint set_rows_stride; + + uint ne00; + uint ne01; + uint ne02; + uint nb01; + uint nb02; + uint nb03; + uint nb11; + uint nb12; + uint nb13; }; #endif // !defined(GGML_ROPE_PARAMS) diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp index d93800b5e..ca71efb2f 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp @@ -5,10 +5,13 @@ void main() { const uint i0 = 2*gl_GlobalInvocationID.y; - // i1 is actually i2*nb2+i1, but the rows are contiguous - const uint i1 = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z; - if (i1 >= pc.nrows) { + const uint row = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z; + if (row >= pc.nrows) { return; } - rope_vision(i0, i1, pc); + const uint i3 = row / (pc.ne01*pc.ne02); + const uint i2 = (row - i3 * pc.ne01*pc.ne02) / pc.ne01; + const uint i1 = (row - i3 * pc.ne01*pc.ne02 - i2 * pc.ne01); + + rope_vision(i0, i1, i2, i3, pc); }