ace converter

This commit is contained in:
Concedo 2026-02-26 19:53:02 +08:00
parent ac8f12f259
commit adebf63877
2 changed files with 290 additions and 0 deletions

View file

@ -915,6 +915,9 @@ quantize_mpt: otherarch/tools/mpt_quantize.cpp otherarch/tools/common-ggml.cpp g
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
quantize_clip: tools/mtmd/clip.cpp tools/quantclip.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
quantize_ace: otherarch/acestep/quantize-acestep.cpp tools/mtmd/clip.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
#window simple clinfo
simplecpuinfo: simplecpuinfo.cpp

View file

@ -0,0 +1,287 @@
#!/usr/bin/env python3
# convert.py: safetensors to GGUF for ACE-Step (LM, DiT, TextEncoder, VAE)
# Reads from checkpoints/, writes GGUF to models/
# Each GGUF is self-contained: weights + config + tokenizer + silence_latent
import os
import sys
import json
import struct
import zipfile
import numpy as np
import gguf
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
CHECKPOINT_DIR = os.path.join(SCRIPT_DIR, "checkpoints")
OUTPUT_DIR = os.path.join(SCRIPT_DIR, "models")
BF16 = gguf.GGMLQuantizationType.BF16
def log(tag, msg):
print("[%s] %s" % (tag, msg), file=sys.stderr, flush=True)
# Safetensors reader
def read_sf_header(path):
with open(path, "rb") as f:
n = struct.unpack("<Q", f.read(8))[0]
meta = json.loads(f.read(n))
meta.pop("__metadata__", None)
return meta, 8 + n
def find_sf_files(model_dir):
"""Return list of safetensors paths (single, sharded, or diffusers VAE)."""
single = os.path.join(model_dir, "model.safetensors")
if os.path.exists(single):
return [single]
index = os.path.join(model_dir, "model.safetensors.index.json")
if os.path.exists(index):
idx = json.load(open(index, "r", encoding="utf-8"))
shards = sorted(set(idx["weight_map"].values()))
return [os.path.join(model_dir, s) for s in shards]
diffusers = os.path.join(model_dir, "diffusion_pytorch_model.safetensors")
if os.path.exists(diffusers):
return [diffusers]
return []
# Model classification
ARCHS = {
"lm": "acestep-lm",
"dit": "acestep-dit",
"text-enc": "acestep-text-enc",
"vae": "acestep-vae",
}
def classify(name):
if name.startswith("acestep-5Hz-lm"):
return "lm"
if name.startswith("acestep-v15"):
return "dit"
if name.startswith("Qwen3-Embedding"):
return "text-enc"
if name == "vae":
return "vae"
return None
# GGUF metadata from config.json
def add_metadata(w, cfg, model_type):
if "num_hidden_layers" in cfg:
w.add_block_count(cfg["num_hidden_layers"])
if "hidden_size" in cfg:
w.add_embedding_length(cfg["hidden_size"])
if "intermediate_size" in cfg:
w.add_feed_forward_length(cfg["intermediate_size"])
if "num_attention_heads" in cfg:
w.add_head_count(cfg["num_attention_heads"])
if "num_key_value_heads" in cfg:
w.add_head_count_kv(cfg["num_key_value_heads"])
if "head_dim" in cfg:
w.add_key_length(cfg["head_dim"])
if "vocab_size" in cfg:
w.add_vocab_size(cfg["vocab_size"])
if "max_position_embeddings" in cfg:
w.add_context_length(cfg["max_position_embeddings"])
if "rms_norm_eps" in cfg:
w.add_layer_norm_rms_eps(cfg["rms_norm_eps"])
rope = cfg.get("rope_theta")
if rope:
w.add_rope_freq_base(float(rope))
if model_type == "lm":
if cfg.get("tie_word_embeddings"):
w.add_bool("acestep.tie_word_embeddings", True)
if model_type == "dit":
for key in [
"in_channels", "audio_acoustic_hidden_dim", "patch_size",
"sliding_window", "fsq_dim", "text_hidden_dim", "timbre_hidden_dim",
"num_lyric_encoder_hidden_layers", "num_timbre_encoder_hidden_layers",
"num_audio_decoder_hidden_layers", "num_attention_pooler_hidden_layers",
]:
if key in cfg:
w.add_uint32("acestep.%s" % key, cfg[key])
if cfg.get("is_turbo"):
w.add_bool("acestep.is_turbo", True)
levels = cfg.get("fsq_input_levels")
if levels:
w.add_array("acestep.fsq_input_levels", levels)
w.add_string("acestep.config_json", json.dumps(cfg, separators=(",", ":")))
# Tensor packing from safetensors
def add_tensors_from_sf(w, sf_path, tag):
meta, hdr_size = read_sf_header(sf_path)
names = sorted(meta.keys())
f = open(sf_path, "rb")
count = 0
total = 0
for name in names:
info = meta[name]
dtype_str = info["dtype"]
shape = info["shape"]
off0, off1 = info["data_offsets"]
nbytes = off1 - off0
f.seek(hdr_size + off0)
raw = f.read(nbytes)
if dtype_str == "BF16":
arr = np.frombuffer(raw, dtype=np.uint16).reshape(shape)
w.add_tensor(name if name.startswith("model.") else f"model.{name}", arr, raw_dtype=BF16)
elif dtype_str == "F16":
arr = np.frombuffer(raw, dtype=np.float16).reshape(shape)
w.add_tensor(name if name.startswith("model.") else f"model.{name}", arr)
elif dtype_str == "F32":
arr = np.frombuffer(raw, dtype=np.float32).reshape(shape)
w.add_tensor(name if name.startswith("model.") else f"model.{name}", arr)
else:
log(tag, " skip %s: dtype %s" % (name, dtype_str))
continue
count += 1
total += nbytes
f.close()
return count, total
# silence_latent.pt reader (replaces pt2bin C++ tool)
# PyTorch .pt is a ZIP with entry "*/data/0" containing f32 [64, 15000]
# We transpose to [15000, 64] (ggml layout: 64 contiguous per frame)
def read_silence_latent(model_dir):
pt_path = os.path.join(model_dir, "silence_latent.pt")
if not os.path.exists(pt_path):
return None
with zipfile.ZipFile(pt_path) as z:
for entry in z.namelist():
if entry.endswith("/data/0"):
raw = z.read(entry)
src = np.frombuffer(raw, dtype=np.float32).reshape(64, 15000)
return src.T.copy()
return None
# BPE tokenizer embedding (vocab.json + merges.txt -> GGUF KV)
def add_bpe_tokenizer(w, model_dir, tag):
vocab_path = os.path.join(model_dir, "vocab.json")
merges_path = os.path.join(model_dir, "merges.txt")
if not os.path.exists(vocab_path) or not os.path.exists(merges_path):
return False
vocab = json.load(open(vocab_path, "r", encoding="utf-8"))
tokens = [""] * len(vocab)
for tok_str, tok_id in vocab.items():
if 0 <= tok_id < len(tokens):
tokens[tok_id] = tok_str
with open(merges_path, "r", encoding="utf-8") as f:
merges = []
for line in f:
line = line.rstrip("\n\r")
if not line:
continue
if line.startswith("#version:"):
continue
merges.append(line)
w.add_tokenizer_model("gpt2")
w.add_token_list(tokens)
w.add_token_merges(merges)
log(tag, " tokenizer: %d vocab, %d merges" % (len(tokens), len(merges)))
return True
# Main conversion
def convert_model(name, model_dir, output_path, model_type):
tag = "GGUF"
cfg_path = os.path.join(model_dir, "config.json")
if not os.path.exists(cfg_path):
log(tag, "skip %s: no config.json" % name)
return False
cfg = json.load(open(cfg_path, "r", encoding="utf-8"))
sf_files = find_sf_files(model_dir)
if not sf_files:
log(tag, "skip %s: no safetensors" % name)
return False
arch = ARCHS[model_type]
log(tag, "%s (%s, %d shard%s) -> %s" % (
name, arch, len(sf_files), "" if len(sf_files) == 1 else "s",
os.path.basename(output_path)))
w = gguf.GGUFWriter(output_path, arch, use_temp_file=True)
w.add_name(name)
add_metadata(w, cfg, model_type)
# BPE tokenizer for LM and text encoder
if model_type in ("lm", "text-enc"):
add_bpe_tokenizer(w, model_dir, tag)
# Model weights
n_tensors = 0
n_bytes = 0
for sf in sf_files:
c, b = add_tensors_from_sf(w, sf, tag)
n_tensors += c
n_bytes += b
if len(sf_files) > 1:
log(tag, " %s: %d tensors" % (os.path.basename(sf), c))
# silence_latent for DiT (read .pt, transpose, embed as f32 tensor)
if model_type == "dit":
sl = read_silence_latent(model_dir)
if sl is not None:
w.add_tensor("silence_latent", sl)
n_tensors += 1
n_bytes += sl.nbytes
log(tag, " silence_latent: [%d, %d] f32 (%.1f MB)" % (
sl.shape[0], sl.shape[1], sl.nbytes / (1 << 20)))
else:
log(tag, " WARNING: no silence_latent.pt found")
log(tag, " total: %d tensors, %.1f GB" % (n_tensors, n_bytes / (1 << 30)))
w.write_header_to_file()
w.write_kv_data_to_file()
w.write_tensors_to_file(progress=True)
w.close()
out_mb = os.path.getsize(output_path) / (1 << 20)
log(tag, " wrote %.0f MB -> %s" % (out_mb, output_path))
return True
def main():
if not os.path.isdir(CHECKPOINT_DIR):
log("GGUF", "checkpoints/ not found, run checkpoints.sh first")
sys.exit(1)
os.makedirs(OUTPUT_DIR, exist_ok=True)
entries = sorted(os.listdir(CHECKPOINT_DIR))
converted = 0
skipped = []
for name in entries:
model_dir = os.path.join(CHECKPOINT_DIR, name)
if not os.path.isdir(model_dir):
continue
model_type = classify(name)
if model_type is None:
skipped.append(name)
continue
output_path = os.path.join(OUTPUT_DIR, "%s-BF16.gguf" % name)
if os.path.exists(output_path):
log("GGUF", "skip %s: %s exists" % (name, os.path.basename(output_path)))
converted += 1
continue
if convert_model(name, model_dir, output_path, model_type):
converted += 1
if skipped:
log("GGUF", "skipped (unknown): %s" % ", ".join(skipped))
log("GGUF", "done: %d model(s) in %s" % (converted, OUTPUT_DIR))
if __name__ == "__main__":
main()