mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-28 03:30:20 +00:00
ace converter
This commit is contained in:
parent
ac8f12f259
commit
adebf63877
2 changed files with 290 additions and 0 deletions
3
Makefile
3
Makefile
|
|
@ -915,6 +915,9 @@ quantize_mpt: otherarch/tools/mpt_quantize.cpp otherarch/tools/common-ggml.cpp g
|
|||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
quantize_clip: tools/mtmd/clip.cpp tools/quantclip.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
quantize_ace: otherarch/acestep/quantize-acestep.cpp tools/mtmd/clip.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
|
||||
|
||||
#window simple clinfo
|
||||
simplecpuinfo: simplecpuinfo.cpp
|
||||
|
|
|
|||
287
otherarch/acestep/acestep_convert.py
Normal file
287
otherarch/acestep/acestep_convert.py
Normal file
|
|
@ -0,0 +1,287 @@
|
|||
#!/usr/bin/env python3
|
||||
# convert.py: safetensors to GGUF for ACE-Step (LM, DiT, TextEncoder, VAE)
|
||||
# Reads from checkpoints/, writes GGUF to models/
|
||||
# Each GGUF is self-contained: weights + config + tokenizer + silence_latent
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import struct
|
||||
import zipfile
|
||||
import numpy as np
|
||||
import gguf
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
CHECKPOINT_DIR = os.path.join(SCRIPT_DIR, "checkpoints")
|
||||
OUTPUT_DIR = os.path.join(SCRIPT_DIR, "models")
|
||||
|
||||
BF16 = gguf.GGMLQuantizationType.BF16
|
||||
|
||||
def log(tag, msg):
|
||||
print("[%s] %s" % (tag, msg), file=sys.stderr, flush=True)
|
||||
|
||||
# Safetensors reader
|
||||
def read_sf_header(path):
|
||||
with open(path, "rb") as f:
|
||||
n = struct.unpack("<Q", f.read(8))[0]
|
||||
meta = json.loads(f.read(n))
|
||||
meta.pop("__metadata__", None)
|
||||
return meta, 8 + n
|
||||
|
||||
def find_sf_files(model_dir):
|
||||
"""Return list of safetensors paths (single, sharded, or diffusers VAE)."""
|
||||
single = os.path.join(model_dir, "model.safetensors")
|
||||
if os.path.exists(single):
|
||||
return [single]
|
||||
index = os.path.join(model_dir, "model.safetensors.index.json")
|
||||
if os.path.exists(index):
|
||||
idx = json.load(open(index, "r", encoding="utf-8"))
|
||||
shards = sorted(set(idx["weight_map"].values()))
|
||||
return [os.path.join(model_dir, s) for s in shards]
|
||||
diffusers = os.path.join(model_dir, "diffusion_pytorch_model.safetensors")
|
||||
if os.path.exists(diffusers):
|
||||
return [diffusers]
|
||||
return []
|
||||
|
||||
# Model classification
|
||||
ARCHS = {
|
||||
"lm": "acestep-lm",
|
||||
"dit": "acestep-dit",
|
||||
"text-enc": "acestep-text-enc",
|
||||
"vae": "acestep-vae",
|
||||
}
|
||||
|
||||
def classify(name):
|
||||
if name.startswith("acestep-5Hz-lm"):
|
||||
return "lm"
|
||||
if name.startswith("acestep-v15"):
|
||||
return "dit"
|
||||
if name.startswith("Qwen3-Embedding"):
|
||||
return "text-enc"
|
||||
if name == "vae":
|
||||
return "vae"
|
||||
return None
|
||||
|
||||
# GGUF metadata from config.json
|
||||
def add_metadata(w, cfg, model_type):
|
||||
if "num_hidden_layers" in cfg:
|
||||
w.add_block_count(cfg["num_hidden_layers"])
|
||||
if "hidden_size" in cfg:
|
||||
w.add_embedding_length(cfg["hidden_size"])
|
||||
if "intermediate_size" in cfg:
|
||||
w.add_feed_forward_length(cfg["intermediate_size"])
|
||||
if "num_attention_heads" in cfg:
|
||||
w.add_head_count(cfg["num_attention_heads"])
|
||||
if "num_key_value_heads" in cfg:
|
||||
w.add_head_count_kv(cfg["num_key_value_heads"])
|
||||
if "head_dim" in cfg:
|
||||
w.add_key_length(cfg["head_dim"])
|
||||
if "vocab_size" in cfg:
|
||||
w.add_vocab_size(cfg["vocab_size"])
|
||||
if "max_position_embeddings" in cfg:
|
||||
w.add_context_length(cfg["max_position_embeddings"])
|
||||
if "rms_norm_eps" in cfg:
|
||||
w.add_layer_norm_rms_eps(cfg["rms_norm_eps"])
|
||||
rope = cfg.get("rope_theta")
|
||||
if rope:
|
||||
w.add_rope_freq_base(float(rope))
|
||||
|
||||
if model_type == "lm":
|
||||
if cfg.get("tie_word_embeddings"):
|
||||
w.add_bool("acestep.tie_word_embeddings", True)
|
||||
|
||||
if model_type == "dit":
|
||||
for key in [
|
||||
"in_channels", "audio_acoustic_hidden_dim", "patch_size",
|
||||
"sliding_window", "fsq_dim", "text_hidden_dim", "timbre_hidden_dim",
|
||||
"num_lyric_encoder_hidden_layers", "num_timbre_encoder_hidden_layers",
|
||||
"num_audio_decoder_hidden_layers", "num_attention_pooler_hidden_layers",
|
||||
]:
|
||||
if key in cfg:
|
||||
w.add_uint32("acestep.%s" % key, cfg[key])
|
||||
if cfg.get("is_turbo"):
|
||||
w.add_bool("acestep.is_turbo", True)
|
||||
levels = cfg.get("fsq_input_levels")
|
||||
if levels:
|
||||
w.add_array("acestep.fsq_input_levels", levels)
|
||||
|
||||
w.add_string("acestep.config_json", json.dumps(cfg, separators=(",", ":")))
|
||||
|
||||
# Tensor packing from safetensors
|
||||
def add_tensors_from_sf(w, sf_path, tag):
|
||||
meta, hdr_size = read_sf_header(sf_path)
|
||||
names = sorted(meta.keys())
|
||||
f = open(sf_path, "rb")
|
||||
count = 0
|
||||
total = 0
|
||||
|
||||
for name in names:
|
||||
info = meta[name]
|
||||
dtype_str = info["dtype"]
|
||||
shape = info["shape"]
|
||||
off0, off1 = info["data_offsets"]
|
||||
nbytes = off1 - off0
|
||||
|
||||
f.seek(hdr_size + off0)
|
||||
raw = f.read(nbytes)
|
||||
|
||||
if dtype_str == "BF16":
|
||||
arr = np.frombuffer(raw, dtype=np.uint16).reshape(shape)
|
||||
w.add_tensor(name if name.startswith("model.") else f"model.{name}", arr, raw_dtype=BF16)
|
||||
elif dtype_str == "F16":
|
||||
arr = np.frombuffer(raw, dtype=np.float16).reshape(shape)
|
||||
w.add_tensor(name if name.startswith("model.") else f"model.{name}", arr)
|
||||
elif dtype_str == "F32":
|
||||
arr = np.frombuffer(raw, dtype=np.float32).reshape(shape)
|
||||
w.add_tensor(name if name.startswith("model.") else f"model.{name}", arr)
|
||||
else:
|
||||
log(tag, " skip %s: dtype %s" % (name, dtype_str))
|
||||
continue
|
||||
|
||||
count += 1
|
||||
total += nbytes
|
||||
|
||||
f.close()
|
||||
return count, total
|
||||
|
||||
# silence_latent.pt reader (replaces pt2bin C++ tool)
|
||||
# PyTorch .pt is a ZIP with entry "*/data/0" containing f32 [64, 15000]
|
||||
# We transpose to [15000, 64] (ggml layout: 64 contiguous per frame)
|
||||
def read_silence_latent(model_dir):
|
||||
pt_path = os.path.join(model_dir, "silence_latent.pt")
|
||||
if not os.path.exists(pt_path):
|
||||
return None
|
||||
with zipfile.ZipFile(pt_path) as z:
|
||||
for entry in z.namelist():
|
||||
if entry.endswith("/data/0"):
|
||||
raw = z.read(entry)
|
||||
src = np.frombuffer(raw, dtype=np.float32).reshape(64, 15000)
|
||||
return src.T.copy()
|
||||
return None
|
||||
|
||||
# BPE tokenizer embedding (vocab.json + merges.txt -> GGUF KV)
|
||||
def add_bpe_tokenizer(w, model_dir, tag):
|
||||
vocab_path = os.path.join(model_dir, "vocab.json")
|
||||
merges_path = os.path.join(model_dir, "merges.txt")
|
||||
if not os.path.exists(vocab_path) or not os.path.exists(merges_path):
|
||||
return False
|
||||
|
||||
vocab = json.load(open(vocab_path, "r", encoding="utf-8"))
|
||||
tokens = [""] * len(vocab)
|
||||
for tok_str, tok_id in vocab.items():
|
||||
if 0 <= tok_id < len(tokens):
|
||||
tokens[tok_id] = tok_str
|
||||
|
||||
with open(merges_path, "r", encoding="utf-8") as f:
|
||||
merges = []
|
||||
for line in f:
|
||||
line = line.rstrip("\n\r")
|
||||
if not line:
|
||||
continue
|
||||
if line.startswith("#version:"):
|
||||
continue
|
||||
merges.append(line)
|
||||
|
||||
w.add_tokenizer_model("gpt2")
|
||||
w.add_token_list(tokens)
|
||||
w.add_token_merges(merges)
|
||||
|
||||
log(tag, " tokenizer: %d vocab, %d merges" % (len(tokens), len(merges)))
|
||||
return True
|
||||
|
||||
# Main conversion
|
||||
def convert_model(name, model_dir, output_path, model_type):
|
||||
tag = "GGUF"
|
||||
cfg_path = os.path.join(model_dir, "config.json")
|
||||
if not os.path.exists(cfg_path):
|
||||
log(tag, "skip %s: no config.json" % name)
|
||||
return False
|
||||
|
||||
cfg = json.load(open(cfg_path, "r", encoding="utf-8"))
|
||||
sf_files = find_sf_files(model_dir)
|
||||
if not sf_files:
|
||||
log(tag, "skip %s: no safetensors" % name)
|
||||
return False
|
||||
|
||||
arch = ARCHS[model_type]
|
||||
log(tag, "%s (%s, %d shard%s) -> %s" % (
|
||||
name, arch, len(sf_files), "" if len(sf_files) == 1 else "s",
|
||||
os.path.basename(output_path)))
|
||||
|
||||
w = gguf.GGUFWriter(output_path, arch, use_temp_file=True)
|
||||
w.add_name(name)
|
||||
add_metadata(w, cfg, model_type)
|
||||
|
||||
# BPE tokenizer for LM and text encoder
|
||||
if model_type in ("lm", "text-enc"):
|
||||
add_bpe_tokenizer(w, model_dir, tag)
|
||||
|
||||
# Model weights
|
||||
n_tensors = 0
|
||||
n_bytes = 0
|
||||
for sf in sf_files:
|
||||
c, b = add_tensors_from_sf(w, sf, tag)
|
||||
n_tensors += c
|
||||
n_bytes += b
|
||||
if len(sf_files) > 1:
|
||||
log(tag, " %s: %d tensors" % (os.path.basename(sf), c))
|
||||
|
||||
# silence_latent for DiT (read .pt, transpose, embed as f32 tensor)
|
||||
if model_type == "dit":
|
||||
sl = read_silence_latent(model_dir)
|
||||
if sl is not None:
|
||||
w.add_tensor("silence_latent", sl)
|
||||
n_tensors += 1
|
||||
n_bytes += sl.nbytes
|
||||
log(tag, " silence_latent: [%d, %d] f32 (%.1f MB)" % (
|
||||
sl.shape[0], sl.shape[1], sl.nbytes / (1 << 20)))
|
||||
else:
|
||||
log(tag, " WARNING: no silence_latent.pt found")
|
||||
|
||||
log(tag, " total: %d tensors, %.1f GB" % (n_tensors, n_bytes / (1 << 30)))
|
||||
|
||||
w.write_header_to_file()
|
||||
w.write_kv_data_to_file()
|
||||
w.write_tensors_to_file(progress=True)
|
||||
w.close()
|
||||
|
||||
out_mb = os.path.getsize(output_path) / (1 << 20)
|
||||
log(tag, " wrote %.0f MB -> %s" % (out_mb, output_path))
|
||||
return True
|
||||
|
||||
def main():
|
||||
if not os.path.isdir(CHECKPOINT_DIR):
|
||||
log("GGUF", "checkpoints/ not found, run checkpoints.sh first")
|
||||
sys.exit(1)
|
||||
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
entries = sorted(os.listdir(CHECKPOINT_DIR))
|
||||
converted = 0
|
||||
skipped = []
|
||||
|
||||
for name in entries:
|
||||
model_dir = os.path.join(CHECKPOINT_DIR, name)
|
||||
if not os.path.isdir(model_dir):
|
||||
continue
|
||||
|
||||
model_type = classify(name)
|
||||
if model_type is None:
|
||||
skipped.append(name)
|
||||
continue
|
||||
|
||||
output_path = os.path.join(OUTPUT_DIR, "%s-BF16.gguf" % name)
|
||||
if os.path.exists(output_path):
|
||||
log("GGUF", "skip %s: %s exists" % (name, os.path.basename(output_path)))
|
||||
converted += 1
|
||||
continue
|
||||
|
||||
if convert_model(name, model_dir, output_path, model_type):
|
||||
converted += 1
|
||||
|
||||
if skipped:
|
||||
log("GGUF", "skipped (unknown): %s" % ", ".join(skipped))
|
||||
log("GGUF", "done: %d model(s) in %s" % (converted, OUTPUT_DIR))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue