llama : allow other bufts when overriding to CPU, add --no-repack option (#14990)

This commit is contained in:
Diego Devesa 2025-07-31 09:11:34 -07:00 committed by GitHub
parent e08a98826b
commit d6818d06a6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 39 additions and 21 deletions

View file

@ -359,6 +359,7 @@ struct common_params {
bool warmup = true; // warmup run
bool check_tensors = false; // validate tensor data
bool no_op_offload = false; // globally disable offload host tensor operations to device
bool no_extra_bufts = false; // disable extra buffer types (used for weight repacking)
bool single_turn = false; // single turn chat conversation