mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
examples : add model conversion tool/example (#15455)
* examples : add model conversion tool/example This commit adds an "example/tool" that is intended to help in the process of converting models to GGUF. Currently it supports normal causal models and embedding models. The readme contains instructions and command to guide through the process. The motivation for this to have a structured and repeatable process for model conversions and hopefully with time improve upon it to make the process easier and more reliable. We have started to use this for new model conversions internally and will continue doing so and improve it as we go along. Perhaps with time this should be placed in a different directory than the examples directory, but for now it seems like a good place to keep it while we are still developing it. * squash! examples : add model conversion tool/example Remove dependency on scikit-learn in model conversion example. * squash! examples : add model conversion tool/example Update transformer dep to use non-dev version. And also import `AutoModelForCausalLM` instead of `AutoModel` to ensure compatibility with the latest version. * squash! examples : add model conversion tool/example Remove the logits requirements file from the all requirements file.
This commit is contained in:
parent
b108e42904
commit
2758fa10da
33 changed files with 2230 additions and 0 deletions
42
examples/model-conversion/scripts/embedding/compare-embeddings-logits.sh
Executable file
42
examples/model-conversion/scripts/embedding/compare-embeddings-logits.sh
Executable file
|
@ -0,0 +1,42 @@
|
|||
#/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
MODEL_PATH="${1:-"$EMBEDDING_MODEL_PATH"}"
|
||||
MODEL_NAME="${2:-$(basename "$MODEL_PATH")}"
|
||||
|
||||
if [ -t 0 ]; then
|
||||
CPP_EMBEDDINGS="data/llamacpp-${MODEL_NAME}-embeddings.bin"
|
||||
else
|
||||
# Process piped JSON data and convert to binary (matching logits.cpp format)
|
||||
TEMP_FILE=$(mktemp /tmp/tmp.XXXXXX.binn)
|
||||
python3 -c "
|
||||
import json
|
||||
import sys
|
||||
import struct
|
||||
|
||||
data = json.load(sys.stdin)
|
||||
|
||||
# Flatten all embeddings completely
|
||||
flattened = []
|
||||
for item in data:
|
||||
embedding = item['embedding']
|
||||
for token_embedding in embedding:
|
||||
flattened.extend(token_embedding)
|
||||
|
||||
print(f'Total embedding values: {len(flattened)}', file=sys.stderr)
|
||||
|
||||
# Write as binary floats - matches logitc.cpp fwrite format
|
||||
with open('$TEMP_FILE', 'wb') as f:
|
||||
for value in flattened:
|
||||
f.write(struct.pack('f', value))
|
||||
"
|
||||
CPP_EMBEDDINGS="$TEMP_FILE"
|
||||
trap "rm -f $TEMP_FILE" EXIT
|
||||
fi
|
||||
|
||||
python scripts/utils/semantic_check.py --model-path $MODEL_PATH \
|
||||
--python-embeddings data/pytorch-${MODEL_NAME}-embeddings.bin \
|
||||
--cpp-embeddings $CPP_EMBEDDINGS \
|
||||
--prompt "Hello world today"
|
||||
|
22
examples/model-conversion/scripts/embedding/convert-model.sh
Executable file
22
examples/model-conversion/scripts/embedding/convert-model.sh
Executable file
|
@ -0,0 +1,22 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
MODEL_NAME="${MODEL_NAME:-$(basename "$EMBEDDING_MODEL_PATH")}"
|
||||
OUTPUT_DIR="${OUTPUT_DIR:-../../models}"
|
||||
TYPE="${OUTTYPE:-f16}"
|
||||
METADATA_OVERRIDE="${METADATA_OVERRIDE:-}"
|
||||
CONVERTED_MODEL="${OUTPUT_DIR}/${MODEL_NAME}.gguf"
|
||||
|
||||
echo "Model path: ${EMBEDDING_MODEL_PATH}"
|
||||
echo "Model name: ${MODEL_NAME}"
|
||||
echo "Data type: ${TYPE}"
|
||||
echo "Converted model path:: ${CONVERTED_MODEL}"
|
||||
python ../../convert_hf_to_gguf.py --verbose \
|
||||
${EMBEDDING_MODEL_PATH} \
|
||||
--outfile ${CONVERTED_MODEL} \
|
||||
--outtype ${TYPE}
|
||||
|
||||
echo ""
|
||||
echo "The environment variable CONVERTED_EMBEDDING MODEL can be set to this path using:"
|
||||
echo "export CONVERTED_EMBEDDING_MODEL=$(realpath ${CONVERTED_MODEL})"
|
20
examples/model-conversion/scripts/embedding/run-converted-model.sh
Executable file
20
examples/model-conversion/scripts/embedding/run-converted-model.sh
Executable file
|
@ -0,0 +1,20 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
# First try command line argument, then environment variable, then file
|
||||
CONVERTED_MODEL="${1:-"$CONVERTED_EMBEDDING_MODEL"}"
|
||||
|
||||
# Final check if we have a model path
|
||||
if [ -z "$CONVERTED_MODEL" ]; then
|
||||
echo "Error: Model path must be provided either as:" >&2
|
||||
echo " 1. Command line argument" >&2
|
||||
echo " 2. CONVERTED_EMBEDDING_MODEL environment variable" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo $CONVERTED_MODEL
|
||||
|
||||
cmake --build ../../build --target llama-logits -j8
|
||||
|
||||
../../build/bin/llama-logits -m "$CONVERTED_MODEL" -embd-mode "Hello world today"
|
116
examples/model-conversion/scripts/embedding/run-original-model.py
Executable file
116
examples/model-conversion/scripts/embedding/run-original-model.py
Executable file
|
@ -0,0 +1,116 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import numpy as np
|
||||
import importlib
|
||||
from pathlib import Path
|
||||
|
||||
from transformers import AutoTokenizer, AutoConfig, AutoModel
|
||||
import torch
|
||||
|
||||
unreleased_model_name = os.getenv('UNRELEASED_MODEL_NAME')
|
||||
|
||||
parser = argparse.ArgumentParser(description='Process model with specified path')
|
||||
parser.add_argument('--model-path', '-m', help='Path to the model')
|
||||
args = parser.parse_args()
|
||||
|
||||
model_path = os.environ.get('EMBEDDING_MODEL_PATH', args.model_path)
|
||||
if model_path is None:
|
||||
parser.error("Model path must be specified either via --model-path argument or EMBEDDING_MODEL_PATH environment variable")
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
||||
|
||||
if unreleased_model_name:
|
||||
model_name_lower = unreleased_model_name.lower()
|
||||
unreleased_module_path = f"transformers.models.{model_name_lower}.modular_{model_name_lower}"
|
||||
class_name = f"{unreleased_model_name}Model"
|
||||
print(f"Importing unreleased model module: {unreleased_module_path}")
|
||||
|
||||
try:
|
||||
model_class = getattr(importlib.import_module(unreleased_module_path), class_name)
|
||||
model = model_class.from_pretrained(model_path) # Note: from_pretrained, not fromPretrained
|
||||
except (ImportError, AttributeError) as e:
|
||||
print(f"Failed to import or load model: {e}")
|
||||
exit(1)
|
||||
else:
|
||||
model = AutoModel.from_pretrained(model_path)
|
||||
print(f"Model class: {type(model)}")
|
||||
#print(f"Model file: {type(model).__module__}")
|
||||
config = AutoConfig.from_pretrained(model_path)
|
||||
|
||||
model_name = os.path.basename(model_path)
|
||||
|
||||
texts = [ "Hello world today" ]
|
||||
|
||||
encoded = tokenizer(
|
||||
texts,
|
||||
padding=True,
|
||||
truncation=True,
|
||||
return_tensors="pt"
|
||||
)
|
||||
|
||||
tokens = encoded['input_ids'][0]
|
||||
token_strings = tokenizer.convert_ids_to_tokens(tokens)
|
||||
for i, (token_id, token_str) in enumerate(zip(tokens, token_strings)):
|
||||
print(f"{token_id:6d} -> '{token_str}'")
|
||||
|
||||
with torch.no_grad():
|
||||
outputs = model(**encoded)
|
||||
hidden_states = outputs.last_hidden_state # Shape: [batch_size, seq_len, hidden_size]
|
||||
|
||||
# Extract embeddings for each token (matching LLAMA_POOLING_TYPE_NONE behavior)
|
||||
all_embeddings = hidden_states[0].cpu().numpy() # Shape: [seq_len, hidden_size]
|
||||
|
||||
print(f"Hidden states shape: {hidden_states.shape}")
|
||||
print(f"All embeddings shape: {all_embeddings.shape}")
|
||||
print(f"Embedding dimension: {all_embeddings.shape[1]}")
|
||||
|
||||
# Print embeddings exactly like embedding.cpp does for LLAMA_POOLING_TYPE_NONE
|
||||
n_embd = all_embeddings.shape[1]
|
||||
n_embd_count = all_embeddings.shape[0]
|
||||
|
||||
print() # Empty line to match C++ output
|
||||
|
||||
for j in range(n_embd_count):
|
||||
embedding = all_embeddings[j]
|
||||
print(f"embedding {j}: ", end="")
|
||||
|
||||
# Print first 3 values
|
||||
for i in range(min(3, n_embd)):
|
||||
print(f"{embedding[i]:9.6f} ", end="")
|
||||
|
||||
print(" ... ", end="")
|
||||
|
||||
# Print last 3 values
|
||||
for i in range(n_embd - 3, n_embd):
|
||||
print(f"{embedding[i]:9.6f} ", end="")
|
||||
|
||||
print() # New line
|
||||
|
||||
print() # Final empty line to match C++ output
|
||||
|
||||
data_dir = Path("data")
|
||||
data_dir.mkdir(exist_ok=True)
|
||||
bin_filename = data_dir / f"pytorch-{model_name}-embeddings.bin"
|
||||
txt_filename = data_dir / f"pytorch-{model_name}-embeddings.txt"
|
||||
|
||||
# Save all embeddings flattened (matching what embedding.cpp would save if it did)
|
||||
flattened_embeddings = all_embeddings.flatten()
|
||||
flattened_embeddings.astype(np.float32).tofile(bin_filename)
|
||||
|
||||
with open(txt_filename, "w") as f:
|
||||
f.write(f"# Model class: {model_name}\n")
|
||||
f.write(f"# Tokens: {token_strings}\n")
|
||||
f.write(f"# Shape: {all_embeddings.shape}\n")
|
||||
f.write(f"# n_embd_count: {n_embd_count}, n_embd: {n_embd}\n\n")
|
||||
|
||||
for j in range(n_embd_count):
|
||||
f.write(f"# Token {j} ({token_strings[j]}):\n")
|
||||
for i, value in enumerate(all_embeddings[j]):
|
||||
f.write(f"{j}_{i}: {value:.6f}\n")
|
||||
f.write("\n")
|
||||
print(f"Total values: {len(flattened_embeddings)} ({n_embd_count} tokens × {n_embd} dimensions)")
|
||||
print("")
|
||||
print(f"Saved bin embeddings to: {bin_filename}")
|
||||
print(f"Saved txt embeddings to: {txt_filename}")
|
Loading…
Add table
Add a link
Reference in a new issue