From 6b4e4bd582e457a47ee7cd498058495f47fee228 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 27 May 2026 14:52:47 +0300 Subject: [PATCH] common : fix env names to all have LLAMA_ARG_ prefix (#23778) --- .github/workflows/build-3rd-party.yml | 6 +++--- .github/workflows/build-android.yml | 6 +++--- .github/workflows/build-apple.yml | 6 +++--- .github/workflows/build-cann.yml | 6 +++--- .github/workflows/build-hip.yml | 6 +++--- .github/workflows/build-ibm.yml | 6 +++--- .github/workflows/build-msys.yml | 6 +++--- .github/workflows/build-opencl.yml | 6 +++--- .github/workflows/build-openvino.yml | 6 +++--- .github/workflows/build-riscv.yml | 6 +++--- .github/workflows/build-rpc.yml | 6 +++--- .github/workflows/build-sanitize.yml | 6 +++--- .github/workflows/build-self-hosted.yml | 6 +++--- .github/workflows/build-sycl.yml | 6 +++--- .github/workflows/build-vulkan.yml | 6 +++--- .github/workflows/build-webgpu.yml | 6 +++--- .github/workflows/build.yml | 6 +++--- .github/workflows/hip-quality-check.yml | 6 +++--- .github/workflows/server-sanitize.yml | 8 ++++---- .github/workflows/server-self-hosted.yml | 8 ++++---- .github/workflows/server.yml | 8 ++++---- .github/workflows/ui-self-hosted.yml | 8 ++++---- .github/workflows/ui.yml | 8 ++++---- ci/run.sh | 4 ++-- common/arg.cpp | 10 +++++----- docs/autoparser.md | 2 +- tools/cli/README.md | 11 ++++++----- tools/completion/README.md | 8 ++++---- tools/server/README.md | 11 ++++++----- 29 files changed, 98 insertions(+), 96 deletions(-) diff --git a/.github/workflows/build-3rd-party.yml b/.github/workflows/build-3rd-party.yml index 642d97864..82e53dbaf 100644 --- a/.github/workflows/build-3rd-party.yml +++ b/.github/workflows/build-3rd-party.yml @@ -22,9 +22,9 @@ concurrency: env: GGML_NLOOP: 3 GGML_N_THREADS: 1 - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: ubuntu-24-llguidance: diff --git a/.github/workflows/build-android.yml b/.github/workflows/build-android.yml index 99c3659b9..713ccdc7f 100644 --- a/.github/workflows/build-android.yml +++ b/.github/workflows/build-android.yml @@ -27,9 +27,9 @@ concurrency: env: GGML_NLOOP: 3 GGML_N_THREADS: 1 - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: android: diff --git a/.github/workflows/build-apple.yml b/.github/workflows/build-apple.yml index 3cc384850..d2c99d0d5 100644 --- a/.github/workflows/build-apple.yml +++ b/.github/workflows/build-apple.yml @@ -32,9 +32,9 @@ concurrency: env: GGML_NLOOP: 3 GGML_N_THREADS: 1 - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: macos-latest-arm64: diff --git a/.github/workflows/build-cann.yml b/.github/workflows/build-cann.yml index b6acbbf9e..6d76ed499 100644 --- a/.github/workflows/build-cann.yml +++ b/.github/workflows/build-cann.yml @@ -29,9 +29,9 @@ concurrency: env: GGML_NLOOP: 3 GGML_N_THREADS: 1 - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: # TODO: this build is disabled to save Github Actions resources (https://github.com/ggml-org/llama.cpp/pull/23705) diff --git a/.github/workflows/build-hip.yml b/.github/workflows/build-hip.yml index edafdb939..ff8283ae7 100644 --- a/.github/workflows/build-hip.yml +++ b/.github/workflows/build-hip.yml @@ -31,9 +31,9 @@ concurrency: env: GGML_NLOOP: 3 GGML_N_THREADS: 1 - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: diff --git a/.github/workflows/build-ibm.yml b/.github/workflows/build-ibm.yml index 1742894b9..d2e4f3cda 100644 --- a/.github/workflows/build-ibm.yml +++ b/.github/workflows/build-ibm.yml @@ -29,9 +29,9 @@ concurrency: env: GGML_NLOOP: 3 GGML_N_THREADS: 1 - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: diff --git a/.github/workflows/build-msys.yml b/.github/workflows/build-msys.yml index 57cec7c16..8214f2b8d 100644 --- a/.github/workflows/build-msys.yml +++ b/.github/workflows/build-msys.yml @@ -15,9 +15,9 @@ concurrency: env: GGML_NLOOP: 3 GGML_N_THREADS: 1 - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: windows-msys2: diff --git a/.github/workflows/build-opencl.yml b/.github/workflows/build-opencl.yml index 0f8cbe0fc..fccb06b88 100644 --- a/.github/workflows/build-opencl.yml +++ b/.github/workflows/build-opencl.yml @@ -30,9 +30,9 @@ concurrency: env: GGML_NLOOP: 3 GGML_N_THREADS: 1 - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: diff --git a/.github/workflows/build-openvino.yml b/.github/workflows/build-openvino.yml index fdf96356e..47e04869c 100644 --- a/.github/workflows/build-openvino.yml +++ b/.github/workflows/build-openvino.yml @@ -29,9 +29,9 @@ concurrency: env: GGML_NLOOP: 3 GGML_N_THREADS: 1 - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: ubuntu-24-openvino: diff --git a/.github/workflows/build-riscv.yml b/.github/workflows/build-riscv.yml index d3c8f8ed3..c12aaa61f 100644 --- a/.github/workflows/build-riscv.yml +++ b/.github/workflows/build-riscv.yml @@ -29,9 +29,9 @@ concurrency: env: GGML_NLOOP: 3 GGML_N_THREADS: 1 - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: ubuntu-cpu-riscv64-native: diff --git a/.github/workflows/build-rpc.yml b/.github/workflows/build-rpc.yml index 3c52d7e96..c1ff98770 100644 --- a/.github/workflows/build-rpc.yml +++ b/.github/workflows/build-rpc.yml @@ -29,9 +29,9 @@ concurrency: env: GGML_NLOOP: 3 GGML_N_THREADS: 1 - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: diff --git a/.github/workflows/build-sanitize.yml b/.github/workflows/build-sanitize.yml index 916560b84..29f7a2922 100644 --- a/.github/workflows/build-sanitize.yml +++ b/.github/workflows/build-sanitize.yml @@ -22,9 +22,9 @@ concurrency: env: GGML_NLOOP: 3 GGML_N_THREADS: 1 - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: ctest: diff --git a/.github/workflows/build-self-hosted.yml b/.github/workflows/build-self-hosted.yml index 89ca03f27..d40de48d9 100644 --- a/.github/workflows/build-self-hosted.yml +++ b/.github/workflows/build-self-hosted.yml @@ -50,9 +50,9 @@ concurrency: env: GGML_NLOOP: 3 GGML_N_THREADS: 1 - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: ggml-ci-nvidia-cuda: diff --git a/.github/workflows/build-sycl.yml b/.github/workflows/build-sycl.yml index f38533f3b..b0697f2f2 100644 --- a/.github/workflows/build-sycl.yml +++ b/.github/workflows/build-sycl.yml @@ -29,9 +29,9 @@ concurrency: env: GGML_NLOOP: 3 GGML_N_THREADS: 1 - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: diff --git a/.github/workflows/build-vulkan.yml b/.github/workflows/build-vulkan.yml index eb6d02680..b44f08c6e 100644 --- a/.github/workflows/build-vulkan.yml +++ b/.github/workflows/build-vulkan.yml @@ -31,9 +31,9 @@ concurrency: env: GGML_NLOOP: 3 GGML_N_THREADS: 1 - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: ubuntu-24-vulkan-llvmpipe: diff --git a/.github/workflows/build-webgpu.yml b/.github/workflows/build-webgpu.yml index ff6690ffd..c7056358c 100644 --- a/.github/workflows/build-webgpu.yml +++ b/.github/workflows/build-webgpu.yml @@ -30,9 +30,9 @@ concurrency: env: GGML_NLOOP: 3 GGML_N_THREADS: 1 - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: macos-latest-webgpu: diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fbf64a3f9..69f3947a2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -52,9 +52,9 @@ concurrency: env: GGML_NLOOP: 3 GGML_N_THREADS: 1 - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: build-cmake-pkg: diff --git a/.github/workflows/hip-quality-check.yml b/.github/workflows/hip-quality-check.yml index d00d30ed6..5d03b1772 100644 --- a/.github/workflows/hip-quality-check.yml +++ b/.github/workflows/hip-quality-check.yml @@ -28,9 +28,9 @@ concurrency: env: GGML_NLOOP: 3 GGML_N_THREADS: 1 - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: ubuntu-22-hip-quality-check: diff --git a/.github/workflows/server-sanitize.yml b/.github/workflows/server-sanitize.yml index 46e474672..c0817cbba 100644 --- a/.github/workflows/server-sanitize.yml +++ b/.github/workflows/server-sanitize.yml @@ -26,10 +26,10 @@ on: ] env: - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 - LLAMA_LOG_VERBOSITY: 10 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_VERBOSITY: 10 concurrency: group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }} diff --git a/.github/workflows/server-self-hosted.yml b/.github/workflows/server-self-hosted.yml index 91e065394..b9baede58 100644 --- a/.github/workflows/server-self-hosted.yml +++ b/.github/workflows/server-self-hosted.yml @@ -29,10 +29,10 @@ on: ] env: - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 - LLAMA_LOG_VERBOSITY: 10 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_VERBOSITY: 10 concurrency: group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }} diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index dd3ff88f1..b30e33370 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -44,10 +44,10 @@ on: ] env: - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 - LLAMA_LOG_VERBOSITY: 10 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_VERBOSITY: 10 concurrency: group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }} diff --git a/.github/workflows/ui-self-hosted.yml b/.github/workflows/ui-self-hosted.yml index 64a4519c6..8a97a8284 100644 --- a/.github/workflows/ui-self-hosted.yml +++ b/.github/workflows/ui-self-hosted.yml @@ -30,10 +30,10 @@ on: ] env: - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 - LLAMA_LOG_VERBOSITY: 10 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_VERBOSITY: 10 concurrency: group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }} diff --git a/.github/workflows/ui.yml b/.github/workflows/ui.yml index b5e745718..b3712e450 100644 --- a/.github/workflows/ui.yml +++ b/.github/workflows/ui.yml @@ -26,10 +26,10 @@ on: ] env: - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 - LLAMA_LOG_VERBOSITY: 10 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 + LLAMA_ARG_LOG_VERBOSITY: 10 concurrency: group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }} diff --git a/ci/run.sh b/ci/run.sh index 341008411..e4a34ff0a 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -701,8 +701,8 @@ function gg_sum_test_backend_ops_cpu { ## main -export LLAMA_LOG_PREFIX=1 -export LLAMA_LOG_TIMESTAMPS=1 +export LLAMA_ARG_LOG_PREFIX=1 +export LLAMA_ARG_LOG_TIMESTAMPS=1 if [ -z ${GG_BUILD_LOW_PERF} ]; then # Create symlink: ./llama.cpp/models-mnt -> $MNT/models diff --git a/common/arg.cpp b/common/arg.cpp index 3df8010a2..bdc2e9eb4 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -3026,7 +3026,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.default_template_kwargs[item.key()] = item.value().dump(); } } - ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}).set_env("LLAMA_CHAT_TEMPLATE_KWARGS")); + ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}).set_env("LLAMA_ARG_CHAT_TEMPLATE_KWARGS")); add_opt(common_arg( {"-to", "--timeout"}, "N", string_format("server read/write timeout in seconds (default: %d)", params.timeout_read), @@ -3327,7 +3327,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex [](common_params &, const std::string & value) { common_log_set_file(common_log_main(), value.c_str()); } - ).set_env("LLAMA_LOG_FILE")); + ).set_env("LLAMA_ARG_LOG_FILE")); add_opt(common_arg( {"--log-colors"}, "[on|off|auto]", "Set colored logging ('on', 'off', or 'auto', default: 'auto')\n" @@ -3344,7 +3344,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex string_format("error: unknown value for --log-colors: '%s'\n", value.c_str())); } } - ).set_env("LLAMA_LOG_COLORS")); + ).set_env("LLAMA_ARG_LOG_COLORS")); add_opt(common_arg( {"-v", "--verbose", "--log-verbose"}, "Set verbosity level to infinity (i.e. log all messages, useful for debugging)", @@ -3359,7 +3359,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex [](common_params & params) { params.offline = true; } - ).set_env("LLAMA_OFFLINE")); + ).set_env("LLAMA_ARG_OFFLINE")); add_opt(common_arg( {"-lv", "--verbosity", "--log-verbosity"}, "N", string_format("Set the verbosity threshold. Messages with a higher verbosity will be ignored. Values:\n" @@ -3374,7 +3374,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.verbosity = value; common_log_set_verbosity_thold(value); } - ).set_env("LLAMA_LOG_VERBOSITY")); + ).set_env("LLAMA_ARG_LOG_VERBOSITY")); add_opt(common_arg( {"--log-prefix"}, {"--no-log-prefix"}, diff --git a/docs/autoparser.md b/docs/autoparser.md index da2f1a3a0..33ede1a22 100644 --- a/docs/autoparser.md +++ b/docs/autoparser.md @@ -459,7 +459,7 @@ Each returned parser is wrapped by `wrap_for_generation_prompt()`, which prepend - Usage: `./bin/llama-template-analysis path/to/template.jinja` -**Debug Logging**: Enable with `LLAMA_LOG_VERBOSITY=2` +**Debug Logging**: Enable with `LLAMA_ARG_LOG_VERBOSITY=2` - Shows detailed analysis steps, pattern extraction results, and generated parser structure diff --git a/tools/cli/README.md b/tools/cli/README.md index add4021e2..04aef0188 100644 --- a/tools/cli/README.md +++ b/tools/cli/README.md @@ -88,11 +88,11 @@ | `-hffv, --hf-file-v FILE` | Hugging Face model file for the vocoder model (default: unused)
(env: LLAMA_ARG_HF_FILE_V) | | `-hft, --hf-token TOKEN` | Hugging Face access token (default: value from HF_TOKEN environment variable)
(env: HF_TOKEN) | | `--log-disable` | Log disable | -| `--log-file FNAME` | Log to file
(env: LLAMA_LOG_FILE) | -| `--log-colors [on\|off\|auto]` | Set colored logging ('on', 'off', or 'auto', default: 'auto')
'auto' enables colors when output is to a terminal
(env: LLAMA_LOG_COLORS) | +| `--log-file FNAME` | Log to file
(env: LLAMA_ARG_LOG_FILE) | +| `--log-colors [on\|off\|auto]` | Set colored logging ('on', 'off', or 'auto', default: 'auto')
'auto' enables colors when output is to a terminal
(env: LLAMA_ARG_LOG_COLORS) | | `-v, --verbose, --log-verbose` | Set verbosity level to infinity (i.e. log all messages, useful for debugging) | -| `--offline` | Offline mode: forces use of cache, prevents network access
(env: LLAMA_OFFLINE) | -| `-lv, --verbosity, --log-verbosity N` | Set the verbosity threshold. Messages with a higher verbosity will be ignored. Values:
- 0: generic output
- 1: error
- 2: warning
- 3: info
- 4: debug
(default: 3)

(env: LLAMA_LOG_VERBOSITY) | +| `--offline` | Offline mode: forces use of cache, prevents network access
(env: LLAMA_ARG_OFFLINE) | +| `-lv, --verbosity, --log-verbosity N` | Set the verbosity threshold. Messages with a higher verbosity will be ignored. Values:
- 0: generic output
- 1: error
- 2: warning
- 3: info
- 4: trace (more info)
- 5: debug
(default: 3)

(env: LLAMA_ARG_LOG_VERBOSITY) | | `--log-prefix, --no-log-prefix` | Enable prefix in log messages
(env: LLAMA_ARG_LOG_PREFIX) | | `--log-timestamps, --no-log-timestamps` | Enable timestamps in log messages
(env: LLAMA_ARG_LOG_TIMESTAMPS) | | `--spec-draft-type-k, -ctkd, --cache-type-k-draft TYPE` | KV cache data type for K for the draft model
allowed values: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1
(default: f16)
(env: LLAMA_ARG_SPEC_DRAFT_CACHE_TYPE_K) | @@ -165,7 +165,7 @@ | `--image, --audio FILE` | path to an image or audio file. use with multimodal models, use comma-separated values for multiple files | | `--image-min-tokens N` | minimum number of tokens each image can take, only used by vision models with dynamic resolution (default: read from model)
(env: LLAMA_ARG_IMAGE_MIN_TOKENS) | | `--image-max-tokens N` | maximum number of tokens each image can take, only used by vision models with dynamic resolution (default: read from model)
(env: LLAMA_ARG_IMAGE_MAX_TOKENS) | -| `--chat-template-kwargs STRING` | sets additional params for the json template parser, must be a valid json object string, e.g. '{"key1":"value1","key2":"value2"}'
(env: LLAMA_CHAT_TEMPLATE_KWARGS) | +| `--chat-template-kwargs STRING` | sets additional params for the json template parser, must be a valid json object string, e.g. '{"key1":"value1","key2":"value2"}'
(env: LLAMA_ARG_CHAT_TEMPLATE_KWARGS) | | `--jinja, --no-jinja` | whether to use jinja template engine for chat (default: enabled)
(env: LLAMA_ARG_JINJA) | | `--reasoning-format FORMAT` | controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:
- none: leaves thoughts unparsed in `message.content`
- deepseek: puts thoughts in `message.reasoning_content`
- deepseek-legacy: keeps `` tags in `message.content` while also populating `message.reasoning_content`
(default: auto)
(env: LLAMA_ARG_THINK) | | `-rea, --reasoning [on\|off\|auto]` | Use reasoning/thinking in the chat ('on', 'off', or 'auto', default: 'auto' (detect from template))
(env: LLAMA_ARG_REASONING) | @@ -194,6 +194,7 @@ | `--spec-draft-n-min N` | minimum number of draft tokens to use for speculative decoding (default: 0)
(env: LLAMA_ARG_SPEC_DRAFT_N_MIN) | | `--spec-draft-p-split, --draft-p-split P` | speculative decoding split probability (default: 0.10)
(env: LLAMA_ARG_SPEC_DRAFT_P_SPLIT) | | `--spec-draft-p-min, --draft-p-min P` | minimum speculative decoding probability (greedy) (default: 0.00)
(env: LLAMA_ARG_SPEC_DRAFT_P_MIN) | +| `--spec-draft-backend-sampling, --no-spec-draft-backend-sampling` | offload draft sampling to the backend (default: enabled)
(env: LLAMA_ARG_SPEC_DRAFT_BACKEND_SAMPLING) | | `--spec-draft-device, -devd, --device-draft ` | comma-separated list of devices to use for offloading the draft model (none = don't offload)
use --list-devices to see a list of available devices | | `--spec-draft-ngl, -ngld, --gpu-layers-draft, --n-gpu-layers-draft N` | max. number of draft model layers to store in VRAM, either an exact number, 'auto', or 'all' (default: auto)
(env: LLAMA_ARG_N_GPU_LAYERS_DRAFT) | | `--spec-draft-model, -md, --model-draft FNAME` | draft model for speculative decoding (default: unused)
(env: LLAMA_ARG_SPEC_DRAFT_MODEL) | diff --git a/tools/completion/README.md b/tools/completion/README.md index edb5da806..e8a1287f3 100644 --- a/tools/completion/README.md +++ b/tools/completion/README.md @@ -171,11 +171,11 @@ llama-completion.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf --ignore-eos -n -1 | `-hffv, --hf-file-v FILE` | Hugging Face model file for the vocoder model (default: unused)
(env: LLAMA_ARG_HF_FILE_V) | | `-hft, --hf-token TOKEN` | Hugging Face access token (default: value from HF_TOKEN environment variable)
(env: HF_TOKEN) | | `--log-disable` | Log disable | -| `--log-file FNAME` | Log to file
(env: LLAMA_LOG_FILE) | -| `--log-colors [on\|off\|auto]` | Set colored logging ('on', 'off', or 'auto', default: 'auto')
'auto' enables colors when output is to a terminal
(env: LLAMA_LOG_COLORS) | +| `--log-file FNAME` | Log to file
(env: LLAMA_ARG_LOG_FILE) | +| `--log-colors [on\|off\|auto]` | Set colored logging ('on', 'off', or 'auto', default: 'auto')
'auto' enables colors when output is to a terminal
(env: LLAMA_ARG_LOG_COLORS) | | `-v, --verbose, --log-verbose` | Set verbosity level to infinity (i.e. log all messages, useful for debugging) | -| `--offline` | Offline mode: forces use of cache, prevents network access
(env: LLAMA_OFFLINE) | -| `-lv, --verbosity, --log-verbosity N` | Set the verbosity threshold. Messages with a higher verbosity will be ignored. Values:
- 0: generic output
- 1: error
- 2: warning
- 3: info
- 4: debug
(default: 3)

(env: LLAMA_LOG_VERBOSITY) | +| `--offline` | Offline mode: forces use of cache, prevents network access
(env: LLAMA_ARG_OFFLINE) | +| `-lv, --verbosity, --log-verbosity N` | Set the verbosity threshold. Messages with a higher verbosity will be ignored. Values:
- 0: generic output
- 1: error
- 2: warning
- 3: info
- 4: trace (more info)
- 5: debug
(default: 3)

(env: LLAMA_ARG_LOG_VERBOSITY) | | `--log-prefix, --no-log-prefix` | Enable prefix in log messages
(env: LLAMA_ARG_LOG_PREFIX) | | `--log-timestamps, --no-log-timestamps` | Enable timestamps in log messages
(env: LLAMA_ARG_LOG_TIMESTAMPS) | | `--spec-draft-type-k, -ctkd, --cache-type-k-draft TYPE` | KV cache data type for K for the draft model
allowed values: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1
(default: f16)
(env: LLAMA_ARG_SPEC_DRAFT_CACHE_TYPE_K) | diff --git a/tools/server/README.md b/tools/server/README.md index 0b7f9f994..0d20ced87 100644 --- a/tools/server/README.md +++ b/tools/server/README.md @@ -105,11 +105,11 @@ For the full list of features, please refer to [server's changelog](https://gith | `-hffv, --hf-file-v FILE` | Hugging Face model file for the vocoder model (default: unused)
(env: LLAMA_ARG_HF_FILE_V) | | `-hft, --hf-token TOKEN` | Hugging Face access token (default: value from HF_TOKEN environment variable)
(env: HF_TOKEN) | | `--log-disable` | Log disable | -| `--log-file FNAME` | Log to file
(env: LLAMA_LOG_FILE) | -| `--log-colors [on\|off\|auto]` | Set colored logging ('on', 'off', or 'auto', default: 'auto')
'auto' enables colors when output is to a terminal
(env: LLAMA_LOG_COLORS) | +| `--log-file FNAME` | Log to file
(env: LLAMA_ARG_LOG_FILE) | +| `--log-colors [on\|off\|auto]` | Set colored logging ('on', 'off', or 'auto', default: 'auto')
'auto' enables colors when output is to a terminal
(env: LLAMA_ARG_LOG_COLORS) | | `-v, --verbose, --log-verbose` | Set verbosity level to infinity (i.e. log all messages, useful for debugging) | -| `--offline` | Offline mode: forces use of cache, prevents network access
(env: LLAMA_OFFLINE) | -| `-lv, --verbosity, --log-verbosity N` | Set the verbosity threshold. Messages with a higher verbosity will be ignored. Values:
- 0: generic output
- 1: error
- 2: warning
- 3: info
- 4: debug
(default: 3)

(env: LLAMA_LOG_VERBOSITY) | +| `--offline` | Offline mode: forces use of cache, prevents network access
(env: LLAMA_ARG_OFFLINE) | +| `-lv, --verbosity, --log-verbosity N` | Set the verbosity threshold. Messages with a higher verbosity will be ignored. Values:
- 0: generic output
- 1: error
- 2: warning
- 3: info
- 4: trace (more info)
- 5: debug
(default: 3)

(env: LLAMA_ARG_LOG_VERBOSITY) | | `--log-prefix, --no-log-prefix` | Enable prefix in log messages
(env: LLAMA_ARG_LOG_PREFIX) | | `--log-timestamps, --no-log-timestamps` | Enable timestamps in log messages
(env: LLAMA_ARG_LOG_TIMESTAMPS) | | `--spec-draft-type-k, -ctkd, --cache-type-k-draft TYPE` | KV cache data type for K for the draft model
allowed values: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1
(default: f16)
(env: LLAMA_ARG_SPEC_DRAFT_CACHE_TYPE_K) | @@ -204,7 +204,7 @@ For the full list of features, please refer to [server's changelog](https://gith | `--api-key-file FNAME` | path to file containing API keys (default: none) | | `--ssl-key-file FNAME` | path to file a PEM-encoded SSL private key
(env: LLAMA_ARG_SSL_KEY_FILE) | | `--ssl-cert-file FNAME` | path to file a PEM-encoded SSL certificate
(env: LLAMA_ARG_SSL_CERT_FILE) | -| `--chat-template-kwargs STRING` | sets additional params for the json template parser, must be a valid json object string, e.g. '{"key1":"value1","key2":"value2"}'
(env: LLAMA_CHAT_TEMPLATE_KWARGS) | +| `--chat-template-kwargs STRING` | sets additional params for the json template parser, must be a valid json object string, e.g. '{"key1":"value1","key2":"value2"}'
(env: LLAMA_ARG_CHAT_TEMPLATE_KWARGS) | | `-to, --timeout N` | server read/write timeout in seconds (default: 600)
(env: LLAMA_ARG_TIMEOUT) | | `--threads-http N` | number of threads used to process HTTP requests (default: -1)
(env: LLAMA_ARG_THREADS_HTTP) | | `--cache-prompt, --no-cache-prompt` | whether to enable prompt caching (default: enabled)
(env: LLAMA_ARG_CACHE_PROMPT) | @@ -249,6 +249,7 @@ For the full list of features, please refer to [server's changelog](https://gith | `--spec-draft-n-min N` | minimum number of draft tokens to use for speculative decoding (default: 0)
(env: LLAMA_ARG_SPEC_DRAFT_N_MIN) | | `--spec-draft-p-split, --draft-p-split P` | speculative decoding split probability (default: 0.10)
(env: LLAMA_ARG_SPEC_DRAFT_P_SPLIT) | | `--spec-draft-p-min, --draft-p-min P` | minimum speculative decoding probability (greedy) (default: 0.00)
(env: LLAMA_ARG_SPEC_DRAFT_P_MIN) | +| `--spec-draft-backend-sampling, --no-spec-draft-backend-sampling` | offload draft sampling to the backend (default: enabled)
(env: LLAMA_ARG_SPEC_DRAFT_BACKEND_SAMPLING) | | `--spec-draft-device, -devd, --device-draft ` | comma-separated list of devices to use for offloading the draft model (none = don't offload)
use --list-devices to see a list of available devices | | `--spec-draft-ngl, -ngld, --gpu-layers-draft, --n-gpu-layers-draft N` | max. number of draft model layers to store in VRAM, either an exact number, 'auto', or 'all' (default: auto)
(env: LLAMA_ARG_N_GPU_LAYERS_DRAFT) | | `--spec-draft-model, -md, --model-draft FNAME` | draft model for speculative decoding (default: unused)
(env: LLAMA_ARG_SPEC_DRAFT_MODEL) |