merged, try to fix metal build

2025-09-10 17:14:36 +00:00 · 2024-03-14 11:15:50 +08:00 · 2024-03-14 11:15:50 +08:00 · ec5dea14d7
commit ec5dea14d7
parent 9f102b9db6 19885d205e
29 changed files with 1541 additions and 967 deletions
--- a/common/common.h
+++ b/common/common.h
@ -45,7 +45,8 @@ struct gpt_params {
    int32_t n_threads_batch_draft = -1;
    int32_t n_predict             = -1;    // new tokens to predict
    int32_t n_ctx                 = 512;   // context size
-    int32_t n_batch               = 512;   // batch size for prompt processing (must be >=32 to use BLAS)
+    int32_t n_batch               = 2048;  // logical batch size for prompt processing (must be >=32 to use BLAS)
+    int32_t n_ubatch              = 512;   // physical batch size for prompt processing (must be >=32 to use BLAS)
    int32_t n_keep                = 0;     // number of tokens to keep from initial prompt
    int32_t n_draft               = 5;     // number of tokens to draft during speculative decoding
    int32_t n_chunks              = -1;    // max number of chunks to process (-1 = unlimited)