Kt minimax (#1742)

[feat]: fp8 kernel and kt-cli support
2026-04-30 21:00:07 +00:00 · 2025-12-24 15:39:44 +08:00 · 2025-12-24 15:39:44 +08:00 · d8046e1bb4
commit d8046e1bb4
parent e7d277d163
65 changed files with 12111 additions and 2502 deletions
--- a/kt-kernel/python/cli/completions/init.py
+++ b/kt-kernel/python/cli/completions/init.py
@ -0,0 +1 @@
+"""Shell completion scripts for kt-cli."""
--- a/kt-kernel/python/cli/completions/_kt
+++ b/kt-kernel/python/cli/completions/_kt
@ -0,0 +1,153 @@
+#compdef kt
+# Zsh completion for kt command
+# This is a static completion script that doesn't require Python startup
+
+_kt() {
+    local -a commands
+    commands=(
+        'version:Show version information'
+        'run:Start model inference server'
+        'chat:Interactive chat with running model'
+        'quant:Quantize model weights'
+        'bench:Run full benchmark'
+        'microbench:Run micro-benchmark'
+        'doctor:Diagnose environment issues'
+        'model:Manage models and storage paths'
+        'config:Manage configuration'
+        'sft:Fine-tuning with LlamaFactory'
+    )
+
+    local -a run_opts
+    run_opts=(
+        '--host[Server host]:host:'
+        '--port[Server port]:port:'
+        '--gpu-experts[Number of GPU experts]:count:'
+        '--cpu-threads[Number of CPU threads]:count:'
+        '--tensor-parallel-size[Tensor parallel size]:size:'
+        '--kt-method[KT method]:method:(AMXINT4 FP8 RAWINT4)'
+        '--attention-backend[Attention backend]:backend:(triton flashinfer)'
+        '--max-total-tokens[Maximum total tokens]:tokens:'
+        '--dry-run[Show command without executing]'
+        '--help[Show help message]'
+    )
+
+    local -a chat_opts
+    chat_opts=(
+        '--host[Server host]:host:'
+        '--port[Server port]:port:'
+        '--model[Model name]:model:'
+        '--temperature[Sampling temperature]:temp:'
+        '--max-tokens[Maximum tokens]:tokens:'
+        '--system[System prompt]:prompt:'
+        '--save-history[Save conversation history]'
+        '--no-save-history[Do not save history]'
+        '--history-file[History file path]:path:_files'
+        '--stream[Enable streaming output]'
+        '--no-stream[Disable streaming output]'
+        '--help[Show help message]'
+    )
+
+    local -a model_cmds
+    model_cmds=(
+        'download:Download a model from HuggingFace'
+        'list:List available models'
+        'path-list:List all model storage paths'
+        'path-add:Add a new model storage path'
+        'path-remove:Remove a model storage path'
+        'search:Search for models in the registry'
+    )
+
+    local -a config_cmds
+    config_cmds=(
+        'show:Show all configuration'
+        'get:Get configuration value'
+        'set:Set configuration value'
+        'reset:Reset to defaults'
+        'path:Show configuration file path'
+        'init:Re-run first-time setup wizard'
+    )
+
+    local -a sft_cmds
+    sft_cmds=(
+        'train:Train model'
+        'chat:Chat with model'
+        'export:Export model'
+    )
+
+    _arguments -C \
+        '1: :->command' \
+        '*::arg:->args'
+
+    case $state in
+        command)
+            _describe 'kt commands' commands
+            _arguments \
+                '--help[Show help message]' \
+                '--version[Show version]'
+            ;;
+        args)
+            case $words[1] in
+                run)
+                    _arguments $run_opts \
+                        '1:model:'
+                    ;;
+                chat)
+                    _arguments $chat_opts
+                    ;;
+                quant)
+                    _arguments \
+                        '--method[Quantization method]:method:' \
+                        '--output[Output directory]:path:_files -/' \
+                        '--help[Show help message]' \
+                        '1:model:_files -/'
+                    ;;
+                bench|microbench)
+                    _arguments \
+                        '--model[Model name or path]:model:' \
+                        '--config[Config file path]:path:_files' \
+                        '--help[Show help message]'
+                    ;;
+                doctor)
+                    _arguments \
+                        '--verbose[Verbose output]' \
+                        '--help[Show help message]'
+                    ;;
+                model)
+                    _arguments \
+                        '1: :->model_cmd' \
+                        '*::arg:->model_args'
+
+                    case $state in
+                        model_cmd)
+                            _describe 'model commands' model_cmds
+                            ;;
+                    esac
+                    ;;
+                config)
+                    _arguments \
+                        '1: :->config_cmd' \
+                        '*::arg:->config_args'
+
+                    case $state in
+                        config_cmd)
+                            _describe 'config commands' config_cmds
+                            ;;
+                    esac
+                    ;;
+                sft)
+                    _arguments \
+                        '1: :->sft_cmd' \
+                        '*::arg:->sft_args'
+
+                    case $state in
+                        sft_cmd)
+                            _describe 'sft commands' sft_cmds
+                            ;;
+                    esac
+                    ;;
+            esac
+            ;;
+    esac
+}
+
+_kt "$@"
--- a/kt-kernel/python/cli/completions/kt-completion.bash
+++ b/kt-kernel/python/cli/completions/kt-completion.bash
@ -0,0 +1,73 @@
+#!/bin/bash
+# Bash completion for kt command
+# This is a static completion script that doesn't require Python startup
+
+_kt_completion() {
+    local cur prev opts
+    COMPREPLY=()
+    cur="${COMP_WORDS[COMP_CWORD]}"
+    prev="${COMP_WORDS[COMP_CWORD-1]}"
+
+    # Main commands
+    local commands="version run chat quant bench microbench doctor model config sft"
+
+    # Global options
+    local global_opts="--help --version"
+
+    # Handle subcommands
+    case "${COMP_CWORD}" in
+        1)
+            # First argument: suggest commands and global options
+            COMPREPLY=( $(compgen -W "${commands} ${global_opts}" -- ${cur}) )
+            return 0
+            ;;
+        *)
+            # Handle specific command options
+            case "${COMP_WORDS[1]}" in
+                run)
+                    local run_opts="--host --port --gpu-experts --cpu-threads --tensor-parallel-size --kt-method --attention-backend --max-total-tokens --dry-run --help"
+                    COMPREPLY=( $(compgen -W "${run_opts}" -- ${cur}) )
+                    ;;
+                chat)
+                    local chat_opts="--host --port --model --temperature --max-tokens --system --save-history --no-save-history --history-file --stream --no-stream --help"
+                    COMPREPLY=( $(compgen -W "${chat_opts}" -- ${cur}) )
+                    ;;
+                quant)
+                    local quant_opts="--method --output --help"
+                    COMPREPLY=( $(compgen -W "${quant_opts}" -- ${cur}) )
+                    ;;
+                bench|microbench)
+                    local bench_opts="--model --config --help"
+                    COMPREPLY=( $(compgen -W "${bench_opts}" -- ${cur}) )
+                    ;;
+                doctor)
+                    local doctor_opts="--verbose --help"
+                    COMPREPLY=( $(compgen -W "${doctor_opts}" -- ${cur}) )
+                    ;;
+                model)
+                    local model_cmds="download list path-list path-add path-remove search"
+                    local model_opts="--help"
+                    COMPREPLY=( $(compgen -W "${model_cmds} ${model_opts}" -- ${cur}) )
+                    ;;
+                config)
+                    local config_cmds="show get set reset path init model-path-list model-path-add model-path-remove"
+                    local config_opts="--help"
+                    COMPREPLY=( $(compgen -W "${config_cmds} ${config_opts}" -- ${cur}) )
+                    ;;
+                sft)
+                    local sft_cmds="train chat export"
+                    local sft_opts="--help"
+                    COMPREPLY=( $(compgen -W "${sft_cmds} ${sft_opts}" -- ${cur}) )
+                    ;;
+                version)
+                    COMPREPLY=( $(compgen -W "--help" -- ${cur}) )
+                    ;;
+                *)
+                    COMPREPLY=()
+                    ;;
+            esac
+            ;;
+    esac
+}
+
+complete -F _kt_completion kt
--- a/kt-kernel/python/cli/completions/kt.fish
+++ b/kt-kernel/python/cli/completions/kt.fish
@ -0,0 +1,74 @@
+# Fish completion for kt command
+# This is a static completion script that doesn't require Python startup
+
+# Main commands
+complete -c kt -f -n "__fish_use_subcommand" -a "version" -d "Show version information"
+complete -c kt -f -n "__fish_use_subcommand" -a "run" -d "Start model inference server"
+complete -c kt -f -n "__fish_use_subcommand" -a "chat" -d "Interactive chat with running model"
+complete -c kt -f -n "__fish_use_subcommand" -a "quant" -d "Quantize model weights"
+complete -c kt -f -n "__fish_use_subcommand" -a "bench" -d "Run full benchmark"
+complete -c kt -f -n "__fish_use_subcommand" -a "microbench" -d "Run micro-benchmark"
+complete -c kt -f -n "__fish_use_subcommand" -a "doctor" -d "Diagnose environment issues"
+complete -c kt -f -n "__fish_use_subcommand" -a "model" -d "Manage models and storage paths"
+complete -c kt -f -n "__fish_use_subcommand" -a "config" -d "Manage configuration"
+complete -c kt -f -n "__fish_use_subcommand" -a "sft" -d "Fine-tuning with LlamaFactory"
+
+# Global options
+complete -c kt -l help -d "Show help message"
+complete -c kt -l version -d "Show version"
+
+# Run command options
+complete -c kt -f -n "__fish_seen_subcommand_from run" -l host -d "Server host"
+complete -c kt -f -n "__fish_seen_subcommand_from run" -l port -d "Server port"
+complete -c kt -f -n "__fish_seen_subcommand_from run" -l gpu-experts -d "Number of GPU experts"
+complete -c kt -f -n "__fish_seen_subcommand_from run" -l cpu-threads -d "Number of CPU threads"
+complete -c kt -f -n "__fish_seen_subcommand_from run" -l tensor-parallel-size -d "Tensor parallel size"
+complete -c kt -f -n "__fish_seen_subcommand_from run" -l kt-method -d "KT method"
+complete -c kt -f -n "__fish_seen_subcommand_from run" -l attention-backend -d "Attention backend"
+complete -c kt -f -n "__fish_seen_subcommand_from run" -l max-total-tokens -d "Maximum total tokens"
+complete -c kt -f -n "__fish_seen_subcommand_from run" -l dry-run -d "Show command without executing"
+
+# Chat command options
+complete -c kt -f -n "__fish_seen_subcommand_from chat" -l host -d "Server host"
+complete -c kt -f -n "__fish_seen_subcommand_from chat" -l port -d "Server port"
+complete -c kt -f -n "__fish_seen_subcommand_from chat" -l model -d "Model name"
+complete -c kt -f -n "__fish_seen_subcommand_from chat" -l temperature -d "Sampling temperature"
+complete -c kt -f -n "__fish_seen_subcommand_from chat" -l max-tokens -d "Maximum tokens"
+complete -c kt -f -n "__fish_seen_subcommand_from chat" -l system -d "System prompt"
+complete -c kt -f -n "__fish_seen_subcommand_from chat" -l save-history -d "Save conversation history"
+complete -c kt -f -n "__fish_seen_subcommand_from chat" -l no-save-history -d "Do not save history"
+complete -c kt -f -n "__fish_seen_subcommand_from chat" -l history-file -d "History file path"
+complete -c kt -f -n "__fish_seen_subcommand_from chat" -l stream -d "Enable streaming output"
+complete -c kt -f -n "__fish_seen_subcommand_from chat" -l no-stream -d "Disable streaming output"
+
+# Quant command options
+complete -c kt -f -n "__fish_seen_subcommand_from quant" -l method -d "Quantization method"
+complete -c kt -f -n "__fish_seen_subcommand_from quant" -l output -d "Output directory"
+
+# Bench command options
+complete -c kt -f -n "__fish_seen_subcommand_from bench microbench" -l model -d "Model name or path"
+complete -c kt -f -n "__fish_seen_subcommand_from bench microbench" -l config -d "Config file path"
+
+# Doctor command options
+complete -c kt -f -n "__fish_seen_subcommand_from doctor" -l verbose -d "Verbose output"
+
+# Model subcommands
+complete -c kt -f -n "__fish_seen_subcommand_from model; and not __fish_seen_subcommand_from download list path-list path-add path-remove search" -a "download" -d "Download a model from HuggingFace"
+complete -c kt -f -n "__fish_seen_subcommand_from model; and not __fish_seen_subcommand_from download list path-list path-add path-remove search" -a "list" -d "List available models"
+complete -c kt -f -n "__fish_seen_subcommand_from model; and not __fish_seen_subcommand_from download list path-list path-add path-remove search" -a "path-list" -d "List all model storage paths"
+complete -c kt -f -n "__fish_seen_subcommand_from model; and not __fish_seen_subcommand_from download list path-list path-add path-remove search" -a "path-add" -d "Add a new model storage path"
+complete -c kt -f -n "__fish_seen_subcommand_from model; and not __fish_seen_subcommand_from download list path-list path-add path-remove search" -a "path-remove" -d "Remove a model storage path"
+complete -c kt -f -n "__fish_seen_subcommand_from model; and not __fish_seen_subcommand_from download list path-list path-add path-remove search" -a "search" -d "Search for models in the registry"
+
+# Config subcommands
+complete -c kt -f -n "__fish_seen_subcommand_from config; and not __fish_seen_subcommand_from show get set reset path init" -a "show" -d "Show all configuration"
+complete -c kt -f -n "__fish_seen_subcommand_from config; and not __fish_seen_subcommand_from show get set reset path init" -a "get" -d "Get configuration value"
+complete -c kt -f -n "__fish_seen_subcommand_from config; and not __fish_seen_subcommand_from show get set reset path init" -a "set" -d "Set configuration value"
+complete -c kt -f -n "__fish_seen_subcommand_from config; and not __fish_seen_subcommand_from show get set reset path init" -a "reset" -d "Reset to defaults"
+complete -c kt -f -n "__fish_seen_subcommand_from config; and not __fish_seen_subcommand_from show get set reset path init" -a "path" -d "Show configuration file path"
+complete -c kt -f -n "__fish_seen_subcommand_from config; and not __fish_seen_subcommand_from show get set reset path init" -a "init" -d "Re-run first-time setup wizard"
+
+# SFT subcommands
+complete -c kt -f -n "__fish_seen_subcommand_from sft; and not __fish_seen_subcommand_from train chat export" -a "train" -d "Train model"
+complete -c kt -f -n "__fish_seen_subcommand_from sft; and not __fish_seen_subcommand_from train chat export" -a "chat" -d "Chat with model"
+complete -c kt -f -n "__fish_seen_subcommand_from sft; and not __fish_seen_subcommand_from train chat export" -a "export" -d "Export model"