mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-10 23:34:35 +00:00
[ADD] support multi-gpu qlen>1 q5_k
This commit is contained in:
parent
f293803156
commit
f5f79f5c0e
63 changed files with 3271 additions and 1285 deletions
|
@ -1,6 +1,6 @@
|
|||
'''
|
||||
Description :
|
||||
Author : Boxin Zhang
|
||||
Author : Boxin Zhang, Azure-Tang
|
||||
Version : 0.1.0
|
||||
Copyright (c) 2024 by KVCache.AI, All Rights Reserved.
|
||||
'''
|
||||
|
@ -15,6 +15,7 @@ from transformers.configuration_utils import PretrainedConfig
|
|||
from ktransformers.util.custom_gguf import GGUFLoader, translate_name_to_gguf
|
||||
from ktransformers.util.utils import set_module, load_weights
|
||||
import itertools
|
||||
import copy
|
||||
|
||||
def inject(module, local_optimization_dict, model_config:AutoConfig ,gguf_loader:GGUFLoader, prefix=''):
|
||||
for name, child in module._modules.items():
|
||||
|
@ -22,18 +23,20 @@ def inject(module, local_optimization_dict, model_config:AutoConfig ,gguf_loader
|
|||
child_prefix = prefix + name
|
||||
if child_prefix in local_optimization_dict:
|
||||
inject_module_meta=local_optimization_dict[child_prefix]
|
||||
if isinstance(inject_module_meta, Mapping):
|
||||
if inject_module_meta["class"] != "default":
|
||||
import_path = inject_module_meta["class"].split(".")
|
||||
import_module_name = ".".join(import_path[:-1])
|
||||
gguf_loader.tensor_device_map[inject_module_meta["key"]] = inject_module_meta["kwargs"] if "kwargs" in inject_module_meta else dict()
|
||||
import_class_name = import_path[-1]
|
||||
module_cls=getattr(__import__(import_module_name, fromlist=[""]), import_class_name)
|
||||
print(f"Injecting {child_prefix} as", import_module_name, ".", import_class_name)
|
||||
inject_module=module_cls(key = inject_module_meta["key"], gguf_loader = gguf_loader, config = model_config, orig_module=child, device = inject_module_meta["device"], **inject_module_meta["kwargs"])
|
||||
inject_module=module_cls(key = inject_module_meta["key"], gguf_loader = gguf_loader, config = model_config, orig_module=child, **inject_module_meta["kwargs"])
|
||||
set_module(module, name, inject_module)
|
||||
elif isinstance(inject_module_meta, str):
|
||||
assert inject_module_meta=="default", "for str inject_module_meta, only support \"default\"."
|
||||
elif inject_module_meta["class"] == "default":
|
||||
print(f"Injecting {child_prefix} as default")
|
||||
gguf_loader.tensor_device_map[inject_module_meta["key"]] = inject_module_meta["kwargs"] if "kwargs" in inject_module_meta else dict()
|
||||
else:
|
||||
raise Exception("inject_module_meta must be a dict or str")
|
||||
raise Exception("inject_module_meta[\"class\"] must be \"default\" or a class path")
|
||||
child_prefix += "."
|
||||
child_optimization_dict = {k: v for k, v in local_optimization_dict.items() if k.startswith(child_prefix)}
|
||||
inject(child, child_optimization_dict, model_config, gguf_loader, child_prefix)
|
||||
|
@ -57,6 +60,8 @@ def gen_optimize_config(module: nn.Module, out_data: Mapping, rule_list: List, p
|
|||
for rule in rule_list:
|
||||
#print(rule)
|
||||
match_meta = rule["match"]
|
||||
if "class" not in match_meta and "name" not in match_meta:
|
||||
raise Exception("match must have at least one of \"class\" and \"name\"")
|
||||
if "class" in match_meta:
|
||||
import_path = match_meta["class"].split(".")
|
||||
import_module_name = ".".join(import_path[:-1])
|
||||
|
@ -67,16 +72,29 @@ def gen_optimize_config(module: nn.Module, out_data: Mapping, rule_list: List, p
|
|||
if "name" in match_meta:
|
||||
if re.search(match_meta["name"], module_name) is None:
|
||||
continue
|
||||
replace_meta = rule["replace"]
|
||||
out_data[module_name]={"key": translated_name,
|
||||
"class": replace_meta["class"],
|
||||
"device": replace_meta["device"] if "device" in replace_meta else default_device,
|
||||
"kwargs": replace_meta["kwargs"] if "kwargs" in replace_meta else dict()}
|
||||
if "replace" not in rule:
|
||||
raise Exception("replace must be in rule")
|
||||
if "replace" in rule:
|
||||
replace_meta = rule["replace"]
|
||||
if module_name not in out_data:
|
||||
out_data[module_name]={"key": translated_name,
|
||||
"class": replace_meta["class"] if "class" in replace_meta else "default",
|
||||
# "device": replace_meta["device"] if "device" in replace_meta else default_device,
|
||||
"kwargs": copy.deepcopy(replace_meta["kwargs"]) if "kwargs" in replace_meta else dict()}
|
||||
else:
|
||||
if out_data[module_name]["class"] == "default":
|
||||
out_data[module_name]["class"] = replace_meta["class"] if "class" in replace_meta else "default"
|
||||
out_data[module_name]["kwargs"].update(copy.deepcopy(replace_meta["kwargs"]) if "kwargs" in replace_meta else dict())
|
||||
if "recursive" in rule:
|
||||
recursive = bool(rule["recursive"])
|
||||
|
||||
if module_name not in out_data:
|
||||
out_data[module_name]="default"
|
||||
out_data[module_name]= {
|
||||
"class": "default",
|
||||
"key": translated_name,
|
||||
"kwargs": {"generate_device": default_device,
|
||||
"prefill_device": default_device}
|
||||
}
|
||||
|
||||
#print(out_data[module_name])
|
||||
#input()
|
||||
|
@ -88,6 +106,14 @@ def gen_optimize_config(module: nn.Module, out_data: Mapping, rule_list: List, p
|
|||
gen_optimize_config(child, out_data, rule_list, child_prefix)
|
||||
|
||||
|
||||
def translate_model_config(model_config: PretrainedConfig):
|
||||
# for supporting some special model
|
||||
if model_config.model_type == "mixtral":
|
||||
model_config.moe_intermediate_size = model_config.intermediate_size
|
||||
|
||||
return model_config
|
||||
|
||||
|
||||
def optimize_and_load_gguf(module: nn.Module, rule_file: str, gguf_path: str, model_config: PretrainedConfig, default_device: str = "cuda:0"):
|
||||
with open(rule_file, 'r', encoding='utf-8') as f:
|
||||
rule_list = yaml.load(f.read(), Loader=yaml.FullLoader)
|
||||
|
@ -95,8 +121,11 @@ def optimize_and_load_gguf(module: nn.Module, rule_file: str, gguf_path: str, mo
|
|||
optimize_config = dict()
|
||||
gen_optimize_config(module, optimize_config, rule_list, default_device = default_device)
|
||||
|
||||
model_config = translate_model_config(model_config)
|
||||
|
||||
gguf_loader=GGUFLoader(gguf_path)
|
||||
with torch.device("meta"):
|
||||
inject(module, optimize_config, model_config, gguf_loader)
|
||||
load_weights(module, gguf_loader)
|
||||
model_config.gguf_loader = gguf_loader
|
||||
del_meta(module)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue