Kt minimax (#1742)

[feat]: fp8 kernel and kt-cli support
This commit is contained in:
ErvinXie 2025-12-24 15:39:44 +08:00 committed by GitHub
parent e7d277d163
commit d8046e1bb4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
65 changed files with 12111 additions and 2502 deletions

View file

@ -0,0 +1 @@
"""Shell completion scripts for kt-cli."""

View file

@ -0,0 +1,153 @@
#compdef kt
# Zsh completion for kt command
# This is a static completion script that doesn't require Python startup
_kt() {
local -a commands
commands=(
'version:Show version information'
'run:Start model inference server'
'chat:Interactive chat with running model'
'quant:Quantize model weights'
'bench:Run full benchmark'
'microbench:Run micro-benchmark'
'doctor:Diagnose environment issues'
'model:Manage models and storage paths'
'config:Manage configuration'
'sft:Fine-tuning with LlamaFactory'
)
local -a run_opts
run_opts=(
'--host[Server host]:host:'
'--port[Server port]:port:'
'--gpu-experts[Number of GPU experts]:count:'
'--cpu-threads[Number of CPU threads]:count:'
'--tensor-parallel-size[Tensor parallel size]:size:'
'--kt-method[KT method]:method:(AMXINT4 FP8 RAWINT4)'
'--attention-backend[Attention backend]:backend:(triton flashinfer)'
'--max-total-tokens[Maximum total tokens]:tokens:'
'--dry-run[Show command without executing]'
'--help[Show help message]'
)
local -a chat_opts
chat_opts=(
'--host[Server host]:host:'
'--port[Server port]:port:'
'--model[Model name]:model:'
'--temperature[Sampling temperature]:temp:'
'--max-tokens[Maximum tokens]:tokens:'
'--system[System prompt]:prompt:'
'--save-history[Save conversation history]'
'--no-save-history[Do not save history]'
'--history-file[History file path]:path:_files'
'--stream[Enable streaming output]'
'--no-stream[Disable streaming output]'
'--help[Show help message]'
)
local -a model_cmds
model_cmds=(
'download:Download a model from HuggingFace'
'list:List available models'
'path-list:List all model storage paths'
'path-add:Add a new model storage path'
'path-remove:Remove a model storage path'
'search:Search for models in the registry'
)
local -a config_cmds
config_cmds=(
'show:Show all configuration'
'get:Get configuration value'
'set:Set configuration value'
'reset:Reset to defaults'
'path:Show configuration file path'
'init:Re-run first-time setup wizard'
)
local -a sft_cmds
sft_cmds=(
'train:Train model'
'chat:Chat with model'
'export:Export model'
)
_arguments -C \
'1: :->command' \
'*::arg:->args'
case $state in
command)
_describe 'kt commands' commands
_arguments \
'--help[Show help message]' \
'--version[Show version]'
;;
args)
case $words[1] in
run)
_arguments $run_opts \
'1:model:'
;;
chat)
_arguments $chat_opts
;;
quant)
_arguments \
'--method[Quantization method]:method:' \
'--output[Output directory]:path:_files -/' \
'--help[Show help message]' \
'1:model:_files -/'
;;
bench|microbench)
_arguments \
'--model[Model name or path]:model:' \
'--config[Config file path]:path:_files' \
'--help[Show help message]'
;;
doctor)
_arguments \
'--verbose[Verbose output]' \
'--help[Show help message]'
;;
model)
_arguments \
'1: :->model_cmd' \
'*::arg:->model_args'
case $state in
model_cmd)
_describe 'model commands' model_cmds
;;
esac
;;
config)
_arguments \
'1: :->config_cmd' \
'*::arg:->config_args'
case $state in
config_cmd)
_describe 'config commands' config_cmds
;;
esac
;;
sft)
_arguments \
'1: :->sft_cmd' \
'*::arg:->sft_args'
case $state in
sft_cmd)
_describe 'sft commands' sft_cmds
;;
esac
;;
esac
;;
esac
}
_kt "$@"

View file

@ -0,0 +1,73 @@
#!/bin/bash
# Bash completion for kt command
# This is a static completion script that doesn't require Python startup
_kt_completion() {
local cur prev opts
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
# Main commands
local commands="version run chat quant bench microbench doctor model config sft"
# Global options
local global_opts="--help --version"
# Handle subcommands
case "${COMP_CWORD}" in
1)
# First argument: suggest commands and global options
COMPREPLY=( $(compgen -W "${commands} ${global_opts}" -- ${cur}) )
return 0
;;
*)
# Handle specific command options
case "${COMP_WORDS[1]}" in
run)
local run_opts="--host --port --gpu-experts --cpu-threads --tensor-parallel-size --kt-method --attention-backend --max-total-tokens --dry-run --help"
COMPREPLY=( $(compgen -W "${run_opts}" -- ${cur}) )
;;
chat)
local chat_opts="--host --port --model --temperature --max-tokens --system --save-history --no-save-history --history-file --stream --no-stream --help"
COMPREPLY=( $(compgen -W "${chat_opts}" -- ${cur}) )
;;
quant)
local quant_opts="--method --output --help"
COMPREPLY=( $(compgen -W "${quant_opts}" -- ${cur}) )
;;
bench|microbench)
local bench_opts="--model --config --help"
COMPREPLY=( $(compgen -W "${bench_opts}" -- ${cur}) )
;;
doctor)
local doctor_opts="--verbose --help"
COMPREPLY=( $(compgen -W "${doctor_opts}" -- ${cur}) )
;;
model)
local model_cmds="download list path-list path-add path-remove search"
local model_opts="--help"
COMPREPLY=( $(compgen -W "${model_cmds} ${model_opts}" -- ${cur}) )
;;
config)
local config_cmds="show get set reset path init model-path-list model-path-add model-path-remove"
local config_opts="--help"
COMPREPLY=( $(compgen -W "${config_cmds} ${config_opts}" -- ${cur}) )
;;
sft)
local sft_cmds="train chat export"
local sft_opts="--help"
COMPREPLY=( $(compgen -W "${sft_cmds} ${sft_opts}" -- ${cur}) )
;;
version)
COMPREPLY=( $(compgen -W "--help" -- ${cur}) )
;;
*)
COMPREPLY=()
;;
esac
;;
esac
}
complete -F _kt_completion kt

View file

@ -0,0 +1,74 @@
# Fish completion for kt command
# This is a static completion script that doesn't require Python startup
# Main commands
complete -c kt -f -n "__fish_use_subcommand" -a "version" -d "Show version information"
complete -c kt -f -n "__fish_use_subcommand" -a "run" -d "Start model inference server"
complete -c kt -f -n "__fish_use_subcommand" -a "chat" -d "Interactive chat with running model"
complete -c kt -f -n "__fish_use_subcommand" -a "quant" -d "Quantize model weights"
complete -c kt -f -n "__fish_use_subcommand" -a "bench" -d "Run full benchmark"
complete -c kt -f -n "__fish_use_subcommand" -a "microbench" -d "Run micro-benchmark"
complete -c kt -f -n "__fish_use_subcommand" -a "doctor" -d "Diagnose environment issues"
complete -c kt -f -n "__fish_use_subcommand" -a "model" -d "Manage models and storage paths"
complete -c kt -f -n "__fish_use_subcommand" -a "config" -d "Manage configuration"
complete -c kt -f -n "__fish_use_subcommand" -a "sft" -d "Fine-tuning with LlamaFactory"
# Global options
complete -c kt -l help -d "Show help message"
complete -c kt -l version -d "Show version"
# Run command options
complete -c kt -f -n "__fish_seen_subcommand_from run" -l host -d "Server host"
complete -c kt -f -n "__fish_seen_subcommand_from run" -l port -d "Server port"
complete -c kt -f -n "__fish_seen_subcommand_from run" -l gpu-experts -d "Number of GPU experts"
complete -c kt -f -n "__fish_seen_subcommand_from run" -l cpu-threads -d "Number of CPU threads"
complete -c kt -f -n "__fish_seen_subcommand_from run" -l tensor-parallel-size -d "Tensor parallel size"
complete -c kt -f -n "__fish_seen_subcommand_from run" -l kt-method -d "KT method"
complete -c kt -f -n "__fish_seen_subcommand_from run" -l attention-backend -d "Attention backend"
complete -c kt -f -n "__fish_seen_subcommand_from run" -l max-total-tokens -d "Maximum total tokens"
complete -c kt -f -n "__fish_seen_subcommand_from run" -l dry-run -d "Show command without executing"
# Chat command options
complete -c kt -f -n "__fish_seen_subcommand_from chat" -l host -d "Server host"
complete -c kt -f -n "__fish_seen_subcommand_from chat" -l port -d "Server port"
complete -c kt -f -n "__fish_seen_subcommand_from chat" -l model -d "Model name"
complete -c kt -f -n "__fish_seen_subcommand_from chat" -l temperature -d "Sampling temperature"
complete -c kt -f -n "__fish_seen_subcommand_from chat" -l max-tokens -d "Maximum tokens"
complete -c kt -f -n "__fish_seen_subcommand_from chat" -l system -d "System prompt"
complete -c kt -f -n "__fish_seen_subcommand_from chat" -l save-history -d "Save conversation history"
complete -c kt -f -n "__fish_seen_subcommand_from chat" -l no-save-history -d "Do not save history"
complete -c kt -f -n "__fish_seen_subcommand_from chat" -l history-file -d "History file path"
complete -c kt -f -n "__fish_seen_subcommand_from chat" -l stream -d "Enable streaming output"
complete -c kt -f -n "__fish_seen_subcommand_from chat" -l no-stream -d "Disable streaming output"
# Quant command options
complete -c kt -f -n "__fish_seen_subcommand_from quant" -l method -d "Quantization method"
complete -c kt -f -n "__fish_seen_subcommand_from quant" -l output -d "Output directory"
# Bench command options
complete -c kt -f -n "__fish_seen_subcommand_from bench microbench" -l model -d "Model name or path"
complete -c kt -f -n "__fish_seen_subcommand_from bench microbench" -l config -d "Config file path"
# Doctor command options
complete -c kt -f -n "__fish_seen_subcommand_from doctor" -l verbose -d "Verbose output"
# Model subcommands
complete -c kt -f -n "__fish_seen_subcommand_from model; and not __fish_seen_subcommand_from download list path-list path-add path-remove search" -a "download" -d "Download a model from HuggingFace"
complete -c kt -f -n "__fish_seen_subcommand_from model; and not __fish_seen_subcommand_from download list path-list path-add path-remove search" -a "list" -d "List available models"
complete -c kt -f -n "__fish_seen_subcommand_from model; and not __fish_seen_subcommand_from download list path-list path-add path-remove search" -a "path-list" -d "List all model storage paths"
complete -c kt -f -n "__fish_seen_subcommand_from model; and not __fish_seen_subcommand_from download list path-list path-add path-remove search" -a "path-add" -d "Add a new model storage path"
complete -c kt -f -n "__fish_seen_subcommand_from model; and not __fish_seen_subcommand_from download list path-list path-add path-remove search" -a "path-remove" -d "Remove a model storage path"
complete -c kt -f -n "__fish_seen_subcommand_from model; and not __fish_seen_subcommand_from download list path-list path-add path-remove search" -a "search" -d "Search for models in the registry"
# Config subcommands
complete -c kt -f -n "__fish_seen_subcommand_from config; and not __fish_seen_subcommand_from show get set reset path init" -a "show" -d "Show all configuration"
complete -c kt -f -n "__fish_seen_subcommand_from config; and not __fish_seen_subcommand_from show get set reset path init" -a "get" -d "Get configuration value"
complete -c kt -f -n "__fish_seen_subcommand_from config; and not __fish_seen_subcommand_from show get set reset path init" -a "set" -d "Set configuration value"
complete -c kt -f -n "__fish_seen_subcommand_from config; and not __fish_seen_subcommand_from show get set reset path init" -a "reset" -d "Reset to defaults"
complete -c kt -f -n "__fish_seen_subcommand_from config; and not __fish_seen_subcommand_from show get set reset path init" -a "path" -d "Show configuration file path"
complete -c kt -f -n "__fish_seen_subcommand_from config; and not __fish_seen_subcommand_from show get set reset path init" -a "init" -d "Re-run first-time setup wizard"
# SFT subcommands
complete -c kt -f -n "__fish_seen_subcommand_from sft; and not __fish_seen_subcommand_from train chat export" -a "train" -d "Train model"
complete -c kt -f -n "__fish_seen_subcommand_from sft; and not __fish_seen_subcommand_from train chat export" -a "chat" -d "Chat with model"
complete -c kt -f -n "__fish_seen_subcommand_from sft; and not __fish_seen_subcommand_from train chat export" -a "export" -d "Export model"