support qwen3, dont speak human language

This commit is contained in:
djw 2025-04-28 08:44:47 +00:00
parent f3d842a0ca
commit 3f9bbf1181
30 changed files with 3696 additions and 290 deletions

View file

@ -20,6 +20,7 @@ class ArgumentParser:
parser.add_argument(
"--device", type=str, default=self.cfg.model_device, help="Warning: Abandoning this parameter"
)
parser.add_argument("--architectures", type=str, default=self.cfg.model_name)
parser.add_argument("--gguf_path", type=str, default=self.cfg.gguf_path)
parser.add_argument("--optimize_config_path", default=None, type=str, required=False)
parser.add_argument("--cpu_infer", type=int, default=self.cfg.cpu_infer)
@ -93,6 +94,7 @@ class ArgumentParser:
parser.add_argument("--user_algorithm", type=str, default=self.cfg.user_algorithm)
parser.add_argument("--force_think", action=argparse.BooleanOptionalAction, type=bool, default=self.cfg.user_force_think)
parser.add_argument("--use_cuda_graph", action=argparse.BooleanOptionalAction, type=bool, default=self.cfg.use_cuda_graph)
# parser.add_argument("--use_cuda_graph", action=argparse.BooleanOptionalAction, type=bool, default=False)
# web config
parser.add_argument("--web_cross_domain", type=bool, default=self.cfg.web_cross_domain)
@ -137,7 +139,7 @@ class ArgumentParser:
self.cfg.server_port = args.port
self.cfg.user_force_think = args.force_think
args.gpu_memory_size = args.cache_lens*2*576*61
args.gpu_memory_size = 4*1024*1024*1024 # TODO: set this to the actual GPU memory size
self.cfg.gpu_memory_size = args.gpu_memory_size
free_ports = get_free_ports(3, [args.port])
args.sched_port = free_ports[0]