mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-07 17:22:04 +00:00
test-backend-ops: allow loading tests from file and parsing model operators into file (#19896)
* tests: allow loading test-backend-ops tests from json * add error threshold based on op * add error when file cannot be read * add graph operator json extraction tool * add nb parameter for non-contiguous input tensors * fix view check * only use view if non-contiguous/permuted, use C++ random instead of rand() * replace internal API calls with public llama_graph_reserve call * reduce test description length * fix nb[0] not getting set for view * add name to tests * fix inplace error * use text file instead of json * move llama_graph_reserve function to new llama-ext header, move export-graph-ops to tests/ * fix missing declaration * use pragma once * fix indent * fix Windows build
This commit is contained in:
parent
6de1bc631d
commit
128142fe7d
7 changed files with 529 additions and 14 deletions
|
|
@ -7,6 +7,7 @@
|
|||
#include "llama-memory.h"
|
||||
#include "llama-mmap.h"
|
||||
#include "llama-model.h"
|
||||
#include "llama-ext.h"
|
||||
|
||||
#include <cinttypes>
|
||||
#include <cmath>
|
||||
|
|
@ -3129,6 +3130,19 @@ uint32_t llama_get_sampled_probs_count_ith(llama_context * ctx, int32_t i) {
|
|||
return static_cast<uint32_t>(ctx->get_sampled_probs_count(i));
|
||||
}
|
||||
|
||||
struct ggml_cgraph * llama_graph_reserve(
|
||||
struct llama_context * ctx,
|
||||
uint32_t n_tokens,
|
||||
uint32_t n_seqs,
|
||||
uint32_t n_outputs) {
|
||||
auto * memory = ctx->get_memory();
|
||||
llama_memory_context_ptr mctx;
|
||||
if (memory) {
|
||||
mctx = memory->init_full();
|
||||
}
|
||||
return ctx->graph_reserve(n_tokens, n_seqs, n_outputs, mctx.get());
|
||||
}
|
||||
|
||||
// llama adapter API
|
||||
|
||||
int32_t llama_set_adapters_lora(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue