Merge branch 'upstream' into concedo_experimental

# Conflicts: # .devops/nix/package.nix # .github/workflows/build.yml # .github/workflows/server.yml # CMakeLists.txt # Makefile # README.md # requirements.txt # scripts/LlamaConfig.cmake.in
2025-09-11 01:24:36 +00:00 · 2024-05-21 19:05:52 +08:00 · 2024-05-21 19:05:52 +08:00 · 52f9911240
commit 52f9911240
parent 618e60c279 917dc8cfa6
31 changed files with 10838 additions and 5366 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -103,7 +103,6 @@ struct slot_params {
    bool stream       = true;
    bool cache_prompt = false; // remember the prompt to avoid reprocessing all prompt

-    uint32_t seed      = -1; // RNG seed
    int32_t  n_keep    =  0; // number of tokens to keep from initial prompt
    int32_t  n_discard =  0; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half
    int32_t  n_predict = -1; // new tokens to predict
@ -1265,7 +1264,7 @@ struct server_context {
            {"n_ctx",                     slot.n_ctx},
            {"n_predict",                 slot.n_predict},
            {"model",                     params.model_alias},
-            {"seed",                      slot.params.seed},
+            {"seed",                      slot.sparams.seed},
            {"temperature",               slot.sparams.temp},
            {"dynatemp_range",            slot.sparams.dynatemp_range},
            {"dynatemp_exponent",         slot.sparams.dynatemp_exponent},
@ -1983,8 +1982,7 @@ struct server_context {
                                slot.state = SLOT_STATE_PROCESSING;
                                slot.command = SLOT_COMMAND_NONE;
                                slot.release();
-                                slot.print_timings();
-                                send_final_response(slot);
+                                send_error(slot, "input is too large to process. increase the physical batch size", ERROR_TYPE_SERVER);
                                continue;
                            }
                        } else {