arranged files, updated kobold lite, modified makefile for extra link args on linux, started RWKV implementation

2025-09-12 09:59:41 +00:00 · 2023-04-17 17:31:45 +08:00 · 2023-04-17 17:31:45 +08:00 · 763ad172c0
commit 763ad172c0
parent 9581171a9f
21 changed files with 13597 additions and 46 deletions
--- a/otherarch/tools/convert_hf_gpt2.py
+++ b/otherarch/tools/convert_hf_gpt2.py
@ -0,0 +1,183 @@
+# Convert Cerebras models to ggml format
+#
+# ref: https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models/
+#
+
+import sys
+import struct
+import json
+import torch
+import numpy as np
+import re
+
+from transformers import GPTJForCausalLM, AutoModelForCausalLM
+
+# ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
+def bytes_to_unicode():
+    """
+    Returns list of utf-8 byte and a corresponding list of unicode strings.
+    The reversible bpe codes work on unicode strings.
+    This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
+    When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
+    This is a signficant percentage of your normal, say, 32K bpe vocab.
+    To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
+    And avoids mapping to whitespace/control characters the bpe code barfs on.
+    """
+    bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1))
+    cs = bs[:]
+    n = 0
+    for b in range(2**8):
+        if b not in bs:
+            bs.append(b)
+            cs.append(2**8+n)
+            n += 1
+    cs = [chr(n) for n in cs]
+    return dict(zip(bs, cs))
+
+if len(sys.argv) < 2:
+    print("Usage: convert-h5-to-ggml.py dir-model [use-f32]\n")
+    sys.exit(1)
+
+# output in the same directory as the model
+dir_model = sys.argv[1]
+fname_out = sys.argv[1] + "/ggml-model-f16.bin"
+
+with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f:
+    encoder = json.load(f)
+
+with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
+    hparams = json.load(f)
+
+# use 16-bit or 32-bit floats
+use_f16 = True
+if len(sys.argv) > 2:
+    use_f16 = False
+    fname_out = sys.argv[1] + "/ggml-model-f32.bin"
+
+model = AutoModelForCausalLM.from_pretrained(dir_model, low_cpu_mem_usage=True)
+#print (model)
+
+list_vars = model.state_dict()
+#print (list_vars)
+
+print(hparams)
+
+fout = open(fname_out, "wb")
+
+fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex
+fout.write(struct.pack("i", hparams["vocab_size"]))
+fout.write(struct.pack("i", hparams["n_positions"]))
+fout.write(struct.pack("i", hparams["n_embd"]))
+fout.write(struct.pack("i", hparams["n_head"]))
+fout.write(struct.pack("i", hparams["n_layer"]))
+fout.write(struct.pack("i", use_f16))
+
+byte_encoder = bytes_to_unicode()
+byte_decoder = {v:k for k, v in byte_encoder.items()}
+
+fout.write(struct.pack("i", len(encoder)))
+
+for key in encoder:
+    text = bytearray([byte_decoder[c] for c in key])
+    fout.write(struct.pack("i", len(text)))
+    fout.write(text)
+
+for name in list_vars.keys():
+    data = list_vars[name].squeeze().numpy()
+    print("Processing variable: " + name + " with shape: ", data.shape)
+
+    # rename headers to keep compatibility
+    if name == "transformer.ln_f.weight":
+        name = "model/ln_f/g"
+    elif name == "transformer.ln_f.bias":
+        name = "model/ln_f/b"
+    elif name == "transformer.wte.weight":
+        name = "model/wte"
+    elif name == "transformer.wpe.weight":
+        name = "model/wpe"
+    elif name == "lm_head.weight":
+        name = "model/lm_head"
+    elif re.match(r"transformer.h\.\d+\.ln_1\.weight", name):
+        i = re.findall("\d+", name)[0]
+        name = f"model/h{i}/ln_1/g"
+    elif re.match(r"transformer.h\.\d+\.ln_1\.bias", name):
+        i = re.findall("\d+", name)[0]
+        name = f"model/h{i}/ln_1/b"
+    elif re.match(r"transformer.h\.\d+\.attn\.c_attn\.weight", name):
+        i = re.findall("\d+", name)[0]
+        name = f"model/h{i}/attn/c_attn/w"
+    elif re.match(r"transformer.h\.\d+\.attn\.c_attn\.bias", name):
+        i = re.findall("\d+", name)[0]
+        name = f"model/h{i}/attn/c_attn/b"
+    elif re.match(r"transformer.h\.\d+\.attn\.c_proj\.weight", name):
+        i = re.findall("\d+", name)[0]
+        name = f"model/h{i}/attn/c_proj/w"
+    elif re.match(r"transformer.h.\d+.attn.c_proj.bias", name):
+        i = re.findall("\d+", name)[0]
+        name = f"model/h{i}/attn/c_proj/b"
+    elif re.match(r"transformer.h.\d+.ln_2.weight", name):
+        i = re.findall("\d+", name)[0]
+        name = f"model/h{i}/ln_2/g"
+    elif re.match(r"transformer.h.\d+.ln_2.bias", name):
+        i = re.findall("\d+", name)[0]
+        name = f"model/h{i}/ln_2/b"
+    elif re.match(r"transformer.h.\d+.mlp.c_fc.weight", name):
+        i = re.findall("\d+", name)[0]
+        name = f"model/h{i}/mlp/c_fc/w"
+    elif re.match(r"transformer.h.\d+.mlp.c_fc.bias", name):
+        i = re.findall("\d+", name)[0]
+        name = f"model/h{i}/mlp/c_fc/b"
+    elif re.match(r"transformer.h.\d+.mlp.c_proj.weight", name):
+        i = re.findall("\d+", name)[0]
+        name = f"model/h{i}/mlp/c_proj/w"
+    elif re.match(r"transformer.h.\d+.mlp.c_proj.bias", name):
+        i = re.findall("\d+", name)[0]
+        name = f"model/h{i}/mlp/c_proj/b"
+    else:
+        print("Unrecognized variable name. %s", name)
+
+    # we don't need these
+    if name.endswith("attn.masked_bias") or name.endswith(".attn.bias"):
+        print("  Skipping variable: " + name)
+        continue
+
+    n_dims = len(data.shape);
+
+    # ftype == 0 -> float32, ftype == 1 -> float16
+    ftype = 0;
+    if use_f16:
+        if (name == "model/wte" or name == "model/lm_head" or name[-2:] == "/g" or name[-2:] == "/w") and n_dims == 2:
+            print("  Converting to float16")
+            data = data.astype(np.float16)
+            ftype = 1
+        else:
+            print("  Converting to float32")
+            data = data.astype(np.float32)
+            ftype = 0
+
+    # for efficiency - transpose the projection matrices
+    # "model/h.*/attn/c_attn/w"
+    # "model/h.*/attn/c_proj/w"
+    # "model/h.*/mlp/c_fc/w"
+    # "model/h.*/mlp/c_proj/w"
+    if name[-14:] == "/attn/c_attn/w" or \
+       name[-14:] == "/attn/c_proj/w" or \
+       name[-11:] == "/mlp/c_fc/w" or \
+       name[-13:] == "/mlp/c_proj/w":
+        print("  Transposing")
+        data = data.transpose()
+
+    # header
+    str = name.encode('utf-8')
+    fout.write(struct.pack("iii", n_dims, len(str), ftype))
+    for i in range(n_dims):
+        fout.write(struct.pack("i", data.shape[n_dims - 1 - i]))
+    fout.write(str);
+
+    # data
+    data.tofile(fout)
+
+fout.close()
+
+print("Done. Output file: " + fname_out)
+print("")