diff --git a/common/common.cpp b/common/common.cpp
index 2b80284e..a656aa00 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1012,7 +1012,7 @@ static bool assign_layers_to_device(
     };
 
     // get valid factors
-    std::vector<int> valid_k = cparams.n_cycles > 0 ? {(int)cparams.n_cycles} : find_factors(n_layer);
+    std::vector<int> valid_k = cparams.n_cycles > 0 ? std::vector<int>{cparams.n_cycles} : find_factors(n_layer);
 
     // assign devices to sets M1, M2, M3, and M4
     // M1: devices running on macOS without Metal, and with insufficient memory
diff --git a/src/llama.cpp b/src/llama.cpp
index 265435fd..88e13e5a 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -2574,6 +2574,7 @@ struct llama_cparams {
     uint32_t  rank;
     uint32_t  n_layer_window[32];
     bool      prefetch;
+    bool      force;
     uint32_t  n_ctx;           // context size used during inference
     uint32_t  n_batch;
     uint32_t  n_ubatch;
@@ -20343,6 +20344,7 @@ struct llama_context * llama_new_context_with_model(
     ctx->next_node_ip    = params.next_node_ip;
     ctx->cparams.n_world = params.n_world;
     ctx->cparams.rank    = params.rank;
+    ctx->cparams.force   = params.force;
     return ctx;
 }