mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-13 02:19:41 +00:00
Rewrite history to fix bad vulkan shader commits without increasing repo size
added dpe colab (+8 squashed commit) Squashed commit: [b8362da4] updated lite [ed6c037d] move nsigma into the regular sampler stack [ac5f61c6] relative filepath fixed [05fe96ab] export template [ed0a5a3e] nix_example.md: refactor (#1401) * nix_example.md: add override example * nix_example.md: drop graphics example, already basic nixos knowledge * nix_example.md: format * nix_example.md: Vulkan is disabled on macOS Disabled in:1ccd253acc
* nix_examples.md: nixpkgs.config.cuda{Arches -> Capabilities} Fixes: https://github.com/LostRuins/koboldcpp/issues/1367 [675c62f7] AutoGuess: Phi 4 (mini) (#1402) [4bf56982
] phrasing [b8c0df04
] Add Rep Pen to Top N Sigma sampler chain (#1397) - place after nsigma and before xtc (+3 squashed commit) Squashed commit: [87c52b97
] disable VMM from HIP [ee8906f3
] edit description [e85c0e69
] Remove Unnecessary Rep Counting (#1394) * stop counting reps * fix range-based initializer * strike that - reverse it
This commit is contained in:
parent
50eae1ffeb
commit
6b7d2349a7
114 changed files with 6666 additions and 2642 deletions
|
@ -20,22 +20,15 @@ logger = logging.getLogger("gguf-convert-endian")
|
|||
|
||||
|
||||
def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None:
|
||||
if np.uint32(1) == np.uint32(1).newbyteorder("<"):
|
||||
# Host is little endian
|
||||
host_endian = "little"
|
||||
swapped_endian = "big"
|
||||
file_endian = reader.endianess.name
|
||||
if reader.byte_order == 'S':
|
||||
host_endian = 'BIG' if file_endian == 'LITTLE' else 'LITTLE'
|
||||
else:
|
||||
# Sorry PDP or other weird systems that don't use BE or LE.
|
||||
host_endian = "big"
|
||||
swapped_endian = "little"
|
||||
if reader.byte_order == "S":
|
||||
file_endian = swapped_endian
|
||||
else:
|
||||
file_endian = host_endian
|
||||
order = host_endian if args.order == "native" else args.order
|
||||
logger.info(f"* Host is {host_endian.upper()} endian, GGUF file seems to be {file_endian.upper()} endian")
|
||||
host_endian = file_endian
|
||||
order = host_endian if args.order == "native" else args.order.upper()
|
||||
logger.info(f"* Host is {host_endian} endian, GGUF file seems to be {file_endian} endian")
|
||||
if file_endian == order:
|
||||
logger.info(f"* File is already {order.upper()} endian. Nothing to do.")
|
||||
logger.info(f"* File is already {order} endian. Nothing to do.")
|
||||
sys.exit(0)
|
||||
logger.info("* Checking tensors for conversion compatibility")
|
||||
for tensor in reader.tensors:
|
||||
|
@ -43,9 +36,11 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
|
|||
gguf.GGMLQuantizationType.F32,
|
||||
gguf.GGMLQuantizationType.F16,
|
||||
gguf.GGMLQuantizationType.Q8_0,
|
||||
gguf.GGMLQuantizationType.Q4_K,
|
||||
gguf.GGMLQuantizationType.Q6_K,
|
||||
):
|
||||
raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}")
|
||||
logger.info(f"* Preparing to convert from {file_endian.upper()} to {order.upper()}")
|
||||
logger.info(f"* Preparing to convert from {file_endian} to {order}")
|
||||
if args.dry_run:
|
||||
return
|
||||
logger.warning("*** Warning *** Warning *** Warning **")
|
||||
|
@ -96,6 +91,59 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
|
|||
if block_num % 100000 == 0:
|
||||
inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
|
||||
|
||||
elif tensor.tensor_type == gguf.GGMLQuantizationType.Q4_K:
|
||||
# Handle Q4_K tensor blocks (block_q4_k)
|
||||
# Specific handling of block_q4_k is required.
|
||||
# Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
|
||||
|
||||
# first flatten structure
|
||||
newshape = 1
|
||||
for i in tensor.data.shape:
|
||||
newshape *= i
|
||||
|
||||
tensor.data.resize(newshape)
|
||||
|
||||
block_size = 144
|
||||
n_blocks = len(tensor.data) // block_size
|
||||
for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
|
||||
block_offs = block_num * block_size
|
||||
|
||||
# Byte-Swap f16 sized fields
|
||||
delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
|
||||
delta.byteswap(inplace=True)
|
||||
|
||||
delta = tensor.data[block_offs + 2:block_offs + 4].view(dtype=np.uint16)
|
||||
delta.byteswap(inplace=True)
|
||||
|
||||
# Byte-Swap
|
||||
if block_num % 100000 == 0:
|
||||
inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
|
||||
|
||||
elif tensor.tensor_type == gguf.GGMLQuantizationType.Q6_K:
|
||||
# Handle Q6_K tensor blocks (block_q6_k)
|
||||
# Specific handling of block_q6_k is required.
|
||||
# Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
|
||||
|
||||
# first flatten structure
|
||||
newshape = 1
|
||||
for i in tensor.data.shape:
|
||||
newshape *= i
|
||||
|
||||
tensor.data.resize(newshape)
|
||||
|
||||
block_size = 210
|
||||
n_blocks = len(tensor.data) // block_size
|
||||
for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
|
||||
block_offs = block_num * block_size
|
||||
|
||||
# Byte-Swap f16 sized field
|
||||
delta = tensor.data[block_offs + 208:block_offs + 210].view(dtype=np.uint16)
|
||||
delta.byteswap(inplace=True)
|
||||
|
||||
# Byte-Swap
|
||||
if block_num % 100000 == 0:
|
||||
inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
|
||||
|
||||
else:
|
||||
# Handle other tensor types
|
||||
tensor.data.byteswap(inplace=True)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue