llama : allow other bufts when overriding to CPU, add --no-repack option (#14990)

2025-09-11 09:34:37 +00:00 · 2025-07-31 09:11:34 -07:00 · 2025-07-31 09:11:34 -07:00 · d6818d06a6
commit d6818d06a6
parent e08a98826b
5 changed files with 39 additions and 21 deletions
--- a/common/common.h
+++ b/common/common.h
@ -359,6 +359,7 @@ struct common_params {
    bool warmup            = true;  // warmup run
    bool check_tensors     = false; // validate tensor data
    bool no_op_offload     = false; // globally disable offload host tensor operations to device
+    bool no_extra_bufts    = false; // disable extra buffer types (used for weight repacking)

    bool single_turn       = false; // single turn chat conversation