mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-09 13:55:27 +00:00
add balance-serve, support concurrence
This commit is contained in:
parent
8d0292aa44
commit
25cee5810e
196 changed files with 22077 additions and 565 deletions
|
@ -1,6 +1,6 @@
|
|||
import argparse
|
||||
from ktransformers.server.backend.args import ConfigArgs, default_args
|
||||
|
||||
from ktransformers.util.utils import get_free_ports
|
||||
|
||||
class ArgumentParser:
|
||||
def __init__(self, cfg):
|
||||
|
@ -16,20 +16,18 @@ class ArgumentParser:
|
|||
parser.add_argument("--web", type=bool, default=self.cfg.mount_web)
|
||||
parser.add_argument("--model_name", type=str, default=self.cfg.model_name)
|
||||
parser.add_argument("--model_dir", type=str)
|
||||
parser.add_argument("--model_path", type=str)
|
||||
parser.add_argument("--model_path", type=str, default=self.cfg.model_path)
|
||||
parser.add_argument(
|
||||
"--device", type=str, default=self.cfg.model_device, help="Warning: Abandoning this parameter"
|
||||
)
|
||||
parser.add_argument("--gguf_path", type=str, default=self.cfg.gguf_path)
|
||||
parser.add_argument("--optimize_config_path", default=self.cfg.optimize_config_path, type=str, required=False)
|
||||
parser.add_argument("--optimize_config_path", default=None, type=str, required=False)
|
||||
parser.add_argument("--cpu_infer", type=int, default=self.cfg.cpu_infer)
|
||||
parser.add_argument("--type", type=str, default=self.cfg.backend_type)
|
||||
parser.add_argument("--chunk_prefill_size", type=int, default=8192)
|
||||
parser.add_argument("--backend_type", type=str, default=self.cfg.backend_type)
|
||||
parser.add_argument("--chunk_size", type=int, default=self.cfg.chunk_size)
|
||||
|
||||
# model configs
|
||||
# parser.add_argument("--model_cache_lens", type=int, default=self.cfg.cache_lens) # int?
|
||||
parser.add_argument("--paged", type=bool, default=self.cfg.paged)
|
||||
parser.add_argument("--total_context", type=int, default=self.cfg.total_context)
|
||||
parser.add_argument("--max_batch_size", type=int, default=self.cfg.max_batch_size)
|
||||
parser.add_argument("--max_new_tokens", type=int, default=self.cfg.max_new_tokens)
|
||||
parser.add_argument("--json_mode", type=bool, default=self.cfg.json_mode)
|
||||
|
@ -62,7 +60,6 @@ class ArgumentParser:
|
|||
parser.add_argument("--repetition_penalty", type=float, default=self.cfg.repetition_penalty)
|
||||
parser.add_argument("--frequency_penalty", type=float, default=self.cfg.frequency_penalty)
|
||||
parser.add_argument("--presence_penalty", type=float, default=self.cfg.presence_penalty)
|
||||
parser.add_argument("--max_response_tokens", type=int, default=self.cfg.max_response_tokens)
|
||||
parser.add_argument("--response_chunk", type=int, default=self.cfg.response_chunk)
|
||||
parser.add_argument("--no_code_formatting", type=bool, default=self.cfg.no_code_formatting)
|
||||
parser.add_argument("--cache_8bit", type=bool, default=self.cfg.cache_8bit)
|
||||
|
@ -103,6 +100,18 @@ class ArgumentParser:
|
|||
# local chat
|
||||
parser.add_argument("--prompt_file", type=str, default=self.cfg.prompt_file)
|
||||
|
||||
|
||||
# async server
|
||||
parser.add_argument("--sched_strategy", type=str, default=self.cfg.sched_strategy)
|
||||
# parser.add_argument("--sched_port", type=int, default=self.cfg.sched_port)
|
||||
# parser.add_argument("--sched_metrics_port", type=int, default=self.cfg.sched_metrics_port)
|
||||
# parser.add_argument("--kvc2_metrics_port", type=int, default=self.cfg.kvc2_metrics_port)
|
||||
parser.add_argument("--page_size", type=str, default=self.cfg.page_size)
|
||||
parser.add_argument("--memory_gpu_only", type=str, default=self.cfg.memory_gpu_only)
|
||||
parser.add_argument("--utilization_percentage", type=str, default=self.cfg.utilization_percentage)
|
||||
parser.add_argument("--cpu_memory_size_GB", type=str, default=self.cfg.cpu_memory_size_GB)
|
||||
|
||||
|
||||
args = parser.parse_args()
|
||||
if (args.model_dir is not None or args.model_path is not None):
|
||||
if (args.model_path is not None):
|
||||
|
@ -123,6 +132,15 @@ class ArgumentParser:
|
|||
self.cfg.mount_web = args.web
|
||||
self.cfg.server_ip = args.host
|
||||
self.cfg.server_port = args.port
|
||||
self.cfg.backend_type = args.type
|
||||
self.cfg.user_force_think = args.force_think
|
||||
|
||||
args.gpu_memory_size = args.cache_lens*2*576*61
|
||||
self.cfg.gpu_memory_size = args.gpu_memory_size
|
||||
free_ports = get_free_ports(3, [args.port])
|
||||
args.sched_port = free_ports[0]
|
||||
args.sched_metrics_port = free_ports[1]
|
||||
args.kvc2_metrics_port = free_ports[2]
|
||||
self.cfg.sched_port = free_ports[0]
|
||||
self.cfg.sched_metrics_port = free_ports[1]
|
||||
self.cfg.kvc2_metrics_port = free_ports[2]
|
||||
return args
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue