mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-09 13:55:27 +00:00
Add fp8 linear kernel;\n Add empty cache to fit in 16G VRAM; By 'wkGCaSS - 知乎 https://zhuanlan.zhihu.com/p/25491611225'
This commit is contained in:
parent
b4fb633991
commit
7b7c6a657d
5 changed files with 331 additions and 2 deletions
|
@ -127,6 +127,7 @@ GGML_BLOCK_SIZES = {
|
|||
"Q5_K": 2 + 2 + 12 + 256 // 8 + 256 // 2,
|
||||
"Q6_K": 256 // 2 + 256 // 4 + 256 // 16 + 2,
|
||||
"IQ4_XS": 2 + 2 + 256 // 2 + 256 // 64,
|
||||
"FP8": 1,
|
||||
}
|
||||
|
||||
GGML_ELEMENTS_PER_BLOCK = {
|
||||
|
@ -142,6 +143,7 @@ GGML_ELEMENTS_PER_BLOCK = {
|
|||
"Q5_K": 256,
|
||||
"Q6_K": 256,
|
||||
"IQ4_XS": 256,
|
||||
"FP8": 1,
|
||||
}
|
||||
|
||||
DATA_TYPES = {
|
||||
|
@ -158,6 +160,7 @@ DATA_TYPES = {
|
|||
"uint64": 10,
|
||||
"int64": 11,
|
||||
"float64": 12,
|
||||
"FP8": 13,
|
||||
}
|
||||
|
||||
class GGUFLoader:
|
||||
|
@ -393,6 +396,9 @@ def read_value(f, data_type):
|
|||
elem_type, count = struct.unpack("<IQ", f.read(4 + 8))
|
||||
return [read_value(f, elem_type) for _ in range(count)]
|
||||
|
||||
elif data_type == DATA_TYPES["FP8"]:
|
||||
return struct.unpack("<B", f.read(1))[0]
|
||||
|
||||
else:
|
||||
raise NotImplementedError(f"Data type {data_type} not implemented")
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue