test-backend-ops: allow loading tests from file and parsing model operators into file (#19896)

* tests: allow loading test-backend-ops tests from json * add error threshold based on op * add error when file cannot be read * add graph operator json extraction tool * add nb parameter for non-contiguous input tensors * fix view check * only use view if non-contiguous/permuted, use C++ random instead of rand() * replace internal API calls with public llama_graph_reserve call * reduce test description length * fix nb[0] not getting set for view * add name to tests * fix inplace error * use text file instead of json * move llama_graph_reserve function to new llama-ext header, move export-graph-ops to tests/ * fix missing declaration * use pragma once * fix indent * fix Windows build
2026-05-07 17:22:04 +00:00 · 2026-03-12 13:26:00 +01:00 · 2026-03-12 13:26:00 +01:00 · 128142fe7d
commit 128142fe7d
parent 6de1bc631d
7 changed files with 529 additions and 14 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@ -7,6 +7,7 @@
 #include "llama-memory.h"
 #include "llama-mmap.h"
 #include "llama-model.h"
+#include "llama-ext.h"

 #include <cinttypes>
 #include <cmath>
@ -3129,6 +3130,19 @@ uint32_t llama_get_sampled_probs_count_ith(llama_context * ctx, int32_t i) {
    return static_cast<uint32_t>(ctx->get_sampled_probs_count(i));
 }

+struct ggml_cgraph * llama_graph_reserve(
+        struct llama_context * ctx,
+        uint32_t n_tokens,
+        uint32_t n_seqs,
+        uint32_t n_outputs) {
+    auto * memory = ctx->get_memory();
+    llama_memory_context_ptr mctx;
+    if (memory) {
+        mctx = memory->init_full();
+    }
+    return ctx->graph_reserve(n_tokens, n_seqs, n_outputs, mctx.get());
+}
+
 // llama adapter API

 int32_t llama_set_adapters_lora(