mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-06 04:30:03 +00:00
fix-cache-lens
This commit is contained in:
parent
8ba7e5d4b8
commit
7adb7281f4
1 changed files with 6 additions and 1 deletions
|
@ -1,6 +1,7 @@
|
||||||
import argparse
|
import argparse
|
||||||
from ktransformers.server.backend.args import ConfigArgs, default_args
|
from ktransformers.server.backend.args import ConfigArgs, default_args
|
||||||
from ktransformers.util.utils import get_free_ports
|
from ktransformers.util.utils import get_free_ports
|
||||||
|
from transformers import AutoConfig
|
||||||
|
|
||||||
class ArgumentParser:
|
class ArgumentParser:
|
||||||
def __init__(self, cfg):
|
def __init__(self, cfg):
|
||||||
|
@ -138,7 +139,11 @@ class ArgumentParser:
|
||||||
self.cfg.server_port = args.port
|
self.cfg.server_port = args.port
|
||||||
self.cfg.user_force_think = args.force_think
|
self.cfg.user_force_think = args.force_think
|
||||||
|
|
||||||
args.gpu_memory_size = 4*1024*1024*1024 # TODO: set this to the actual GPU memory size
|
model_config = AutoConfig.from_pretrained(args.model_dir, trust_remote_code=True)
|
||||||
|
if args.architectures == "Qwen3MoeForCausalLM" or args.architectures == "Qwen2MoeForCausalLM" :
|
||||||
|
args.gpu_memory_size = args.cache_lens*2*2*model_config.num_hidden_layers*model_config.num_key_value_heads*model_config.head_dim
|
||||||
|
else:
|
||||||
|
args.gpu_memory_size = args.cache_lens*2*576*61
|
||||||
self.cfg.gpu_memory_size = args.gpu_memory_size
|
self.cfg.gpu_memory_size = args.gpu_memory_size
|
||||||
free_ports = get_free_ports(3, [args.port])
|
free_ports = get_free_ports(3, [args.port])
|
||||||
args.sched_port = free_ports[0]
|
args.sched_port = free_ports[0]
|
||||||
|
|
Loading…
Add table
Reference in a new issue