diff --git a/common/common.cpp b/common/common.cpp index 2b80284e..a656aa00 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1012,7 +1012,7 @@ static bool assign_layers_to_device( }; // get valid factors - std::vector valid_k = cparams.n_cycles > 0 ? {(int)cparams.n_cycles} : find_factors(n_layer); + std::vector valid_k = cparams.n_cycles > 0 ? std::vector{cparams.n_cycles} : find_factors(n_layer); // assign devices to sets M1, M2, M3, and M4 // M1: devices running on macOS without Metal, and with insufficient memory diff --git a/src/llama.cpp b/src/llama.cpp index 265435fd..88e13e5a 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -2574,6 +2574,7 @@ struct llama_cparams { uint32_t rank; uint32_t n_layer_window[32]; bool prefetch; + bool force; uint32_t n_ctx; // context size used during inference uint32_t n_batch; uint32_t n_ubatch; @@ -20343,6 +20344,7 @@ struct llama_context * llama_new_context_with_model( ctx->next_node_ip = params.next_node_ip; ctx->cparams.n_world = params.n_world; ctx->cparams.rank = params.rank; + ctx->cparams.force = params.force; return ctx; }