cleanup unwanted stuff

2026-05-10 04:00:53 +00:00 · 2025-08-23 11:41:10 +08:00 · 2025-08-23 11:41:10 +08:00 · eca39d9823
commit eca39d9823
parent 8b8396c30c
80 changed files with 0 additions and 33791 deletions
--- a/examples/model-conversion/scripts/utils/check-nmse.py
+++ b/examples/model-conversion/scripts/utils/check-nmse.py
@ -1,174 +0,0 @@
-#!/usr/bin/env python3
-
-import numpy as np
-import sys
-import os
-import argparse
-from pathlib import Path
-
-def calculate_nmse(reference, test):
-    mse = np.mean((test - reference) ** 2)
-    ref_var = np.var(reference)
-    if ref_var == 0:
-        nmse = float('inf') if mse > 0 else 0.0
-        return mse, mse, ref_var
-
-    nmse = mse / ref_var
-
-    return nmse, mse, ref_var
-
-def load_logits(file_path):
-    if not os.path.exists(file_path):
-        raise FileNotFoundError(f"File not found: {file_path}")
-
-    if file_path.suffix == '.npy':
-        return np.load(file_path)
-    elif file_path.suffix == '.bin':
-        return np.fromfile(file_path, dtype=np.float32)
-    else:
-        # Try to load as text file
-        try:
-            # If it has index format "0: value", extract just values
-            data = []
-            with open(file_path, 'r') as f:
-                for line in f:
-                    if ':' in line:
-                        # Format: "index: value"
-                        value = float(line.split(':')[1].strip())
-                    else:
-                        # Just the value
-                        value = float(line.strip())
-                    data.append(value)
-            return np.array(data, dtype=np.float32)
-        except:
-            return np.loadtxt(file_path, dtype=np.float32)
-
-def interpret_nmse(nmse):
-    """Provide interpretation of NMSE value"""
-    if nmse == 0:
-        return "Perfect match", "🎉"
-    elif nmse < 1e-6:
-        return "Essentially identical", "✅"
-    elif nmse < 1e-4:
-        return "Excellent match", "✅"
-    elif nmse < 1e-3:
-        return "Very good match", "👍"
-    elif nmse < 1e-2:
-        return "Good match", "👍"
-    elif nmse < 0.1:
-        return "Acceptable match", "⚠️"
-    elif nmse < 1.0:
-        return "Poor match", "❌"
-    else:
-        return "Very poor match (worse than noise)", "❌"
-
-def main():
-    parser = argparse.ArgumentParser(description='Validate model logits')
-    parser.add_argument('-m', '--model-path', required=True,  help='Path to the model directory')
-    args = parser.parse_args()
-
-    model_name = os.path.splitext(os.path.basename(args.model_path))[0]
-    data_dir = Path("data")
-
-    pytorch_file = data_dir / f"pytorch-{model_name}.bin"
-    llamacpp_file = data_dir / f"llamacpp-{model_name}.bin"
-
-    print(f"Model name: {model_name}")
-    print(f"PyTorch logits file: {pytorch_file}")
-    print(f"llama.cpp logits file: {llamacpp_file}")
-
-    reference_file = pytorch_file
-    test_file = llamacpp_file
-
-    print("📊 NMSE Check for Model Comparison")
-    print("=" * 50)
-    print(f"Reference (ground truth): {reference_file}")
-    print(f"Test (to evaluate):       {test_file}")
-    print()
-
-    try:
-        print("Loading reference logits...")
-        reference = load_logits(reference_file)
-        print(f"  Shape: {reference.shape}, Type: {reference.dtype}")
-
-        print("Loading test logits...")
-        test = load_logits(test_file)
-        print(f"  Shape: {test.shape}, Type: {test.dtype}")
-
-        # Check shapes match
-        if reference.shape != test.shape:
-            print(f"\n❌ Error: Shape mismatch!")
-            print(f"  Reference: {reference.shape}")
-            print(f"  Test: {test.shape}")
-            sys.exit(1)
-
-        print(f"\n✅ Shapes match: {reference.shape}")
-
-        nmse, mse, ref_var = calculate_nmse(reference, test)
-
-        # Additional metrics
-        max_abs_error = np.max(np.abs(test - reference))
-        mean_abs_error = np.mean(np.abs(test - reference))
-
-        # Results
-        print(f"\n📈 METRICS")
-        print("=" * 30)
-        print(f"MSE (Mean Squared Error):     {mse:.6e}")
-        print(f"Reference Variance:           {ref_var:.6e}")
-        print(f"NMSE:                         {nmse:.6e}")
-        print(f"Max Absolute Error:           {max_abs_error:.6f}")
-        print(f"Mean Absolute Error:          {mean_abs_error:.6f}")
-
-        # NMSE in dB (common in signal processing)
-        if nmse > 0:
-            nmse_db = 10 * np.log10(nmse)
-            print(f"NMSE (dB):                    {nmse_db:.2f} dB")
-
-        # Interpretation
-        interpretation, emoji = interpret_nmse(nmse)
-        print(f"\n🎯 INTERPRETATION")
-        print("=" * 30)
-        print(f"{emoji} {interpretation}")
-
-        # Detailed guidance
-        print(f"\n📋 GUIDANCE")
-        print("=" * 30)
-        if nmse < 1e-3:
-            print("✅ EXCELLENT: Your GGML conversion is working very well!")
-            print("   The differences are negligible for practical use.")
-        elif nmse < 1e-2:
-            print("👍 GOOD: Your GGML conversion is working well.")
-            print("   Small differences are likely due to precision/quantization.")
-        elif nmse < 0.1:
-            print("⚠️  ACCEPTABLE: Conversion is working but with some differences.")
-            print("   Check if you're using quantization (Q4, Q8, etc.)")
-            print("   Test generation quality to see if it's acceptable.")
-        else:
-            print("❌ PROBLEMATIC: Large differences detected.")
-            print("   Check your conversion process for potential issues.")
-            print("   Verify you're using the same model weights.")
-
-        # NMSE benchmarks
-        print(f"\n📚 NMSE BENCHMARKS")
-        print("=" * 30)
-        print("< 1e-6:  Essentially identical")
-        print("< 1e-4:  Excellent (typical for good conversions)")
-        print("< 1e-3:  Very good")
-        print("< 1e-2:  Good (acceptable for most use cases)")
-        print("< 0.1:   Acceptable (may need verification)")
-        print("> 1.0:   Poor (worse than random)")
-
-        # Exit code based on NMSE
-        if nmse < 1e-2:
-            print(f"\n✅ RESULT: PASS (NMSE = {nmse:.2e})")
-            sys.exit(0)
-        else:
-            print(f"\n❌ RESULT: NEEDS REVIEW (NMSE = {nmse:.2e})")
-            sys.exit(1)
-
-    except Exception as e:
-        print(f"❌ Error: {e}")
-        sys.exit(1)
-
-if __name__ == "__main__":
-    main()
--- a/examples/model-conversion/scripts/utils/create-collection-add-model.sh
+++ b/examples/model-conversion/scripts/utils/create-collection-add-model.sh
@ -1,6 +0,0 @@
-
-COLLECTION_SLUG=$(python ./create_collection.py --return-slug)
-echo "Created collection: $COLLECTION_SLUG"
-
-# Use it in the next command
-python add_model_to_collection.py "$COLLECTION_SLUG" "username/my-model"
--- a/examples/model-conversion/scripts/utils/hf-add-model-to-collection.py
+++ b/examples/model-conversion/scripts/utils/hf-add-model-to-collection.py
@ -1,80 +0,0 @@
-#!/usr/bin/env python3
-
-from huggingface_hub import HfApi
-import argparse
-import sys
-
-def add_model_to_collection(collection_slug, model_id, note=""):
-    """
-    Add a model to an existing collection
-
-    Args:
-        collection_slug: The slug of the collection (e.g., "username/collection-name-12345")
-        model_id: The model repository ID (e.g., "username/model-name")
-        note: Optional note about the model
-
-    Returns:
-        True if successful, False if failed
-    """
-
-    # Initialize API
-    api = HfApi()
-
-    try:
-        user_info = api.whoami()
-        print(f"✅ Authenticated as: {user_info['name']}")
-
-        # Verify the model exists
-        print(f"🔍 Checking if model exists: {model_id}")
-        try:
-            model_info = api.model_info(model_id)
-        except Exception as e:
-            print(f"❌ Model not found or not accessible: {model_id}")
-            print(f"Error: {e}")
-            return False
-
-        print(f"📚 Adding model to collection...")
-        api.add_collection_item(
-            collection_slug=collection_slug,
-            item_id=model_id,
-            item_type="model",
-            note=note
-        )
-
-        print(f"✅ Model added to collection successfully!")
-        print(f"🔗 Collection URL: https://huggingface.co/collections/{collection_slug}")
-
-        return True
-
-    except Exception as e:
-        print(f"❌ Error adding model to collection: {e}")
-        return False
-
-def main():
-    # This script requires that the environment variable HF_TOKEN is set with your
-    # Hugging Face API token.
-    api = HfApi()
-
-    parser = argparse.ArgumentParser(description='Add model to a Huggingface Collection')
-    parser.add_argument('--collection', '-c', help='The collection slug username/collection-hash', required=True)
-    parser.add_argument('--model', '-m', help='The model to add to the Collection', required=True)
-    parser.add_argument('--note', '-n', help='An optional note/description', required=False)
-    args = parser.parse_args()
-
-    collection = args.collection
-    model = args.model
-    note = args.note
-
-    success = add_model_to_collection(
-        collection_slug=collection,
-        model_id=model,
-        note=note
-    )
-
-    if success:
-        print("\n🎉 Model added successfully!")
-    else:
-        print("\n❌ Failed to add model to collection")
-        sys.exit(1)
-if __name__ == "__main__":
-    main()
--- a/examples/model-conversion/scripts/utils/hf-create-collection.py
+++ b/examples/model-conversion/scripts/utils/hf-create-collection.py
@ -1,106 +0,0 @@
-#!/usr/bin/env python3
-
-from huggingface_hub import HfApi
-import argparse
-import os
-import sys
-
-
-def create_collection(title, description, private=False, namespace=None, return_slug=False):
-    """
-    Create a new collection on Hugging Face
-
-    Args:
-        title: Collection title
-        description: Collection description
-        private: Whether the collection should be private (default: False)
-        namespace: Optional namespace (defaults to your username)
-
-    Returns:
-        Collection object if successful, None if failed
-    """
-
-    # Check if HF_TOKEN is available
-    token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
-    if not token:
-        print("❌ No HF_TOKEN or HUGGINGFACE_HUB_TOKEN found in environment variables")
-        print("Please set your Hugging Face token as an environment variable")
-        return None
-
-    # Initialize API
-    api = HfApi()
-
-    try:
-        # Test authentication first
-        user_info = api.whoami()
-        if not return_slug:
-            print(f"✅ Authenticated as: {user_info['name']}")
-
-        # Create the collection
-        if not return_slug:
-            print(f"📚 Creating collection: '{title}'...")
-        collection = api.create_collection(
-            title=title,
-            description=description,
-            private=private,
-            namespace=namespace
-        )
-
-        if not return_slug:
-            print(f"✅ Collection created successfully!")
-            print(f"📋 Collection slug: {collection.slug}")
-            print(f"🔗 Collection URL: https://huggingface.co/collections/{collection.slug}")
-
-        return collection
-
-    except Exception as e:
-        print(f"❌ Error creating collection: {e}")
-        return None
-
-def main():
-    # This script requires that the environment variable HF_TOKEN is set with your
-    # Hugging Face API token.
-    api = HfApi()
-
-    parser = argparse.ArgumentParser(description='Create a Huggingface Collection')
-    parser.add_argument('--name', '-n', help='The name/title of the Collection', required=True)
-    parser.add_argument('--description', '-d', help='The description for the Collection', required=True)
-    parser.add_argument('--namespace', '-ns', help='The namespace to add the Collection to', required=True)
-    parser.add_argument('--private', '-p', help='Create a private Collection', action='store_true')  # Fixed
-    parser.add_argument('--return-slug', '-s', help='Only output the collection slug', action='store_true')  # Fixed
-
-    args = parser.parse_args()
-
-    name = args.name
-    description = args.description
-    private = args.private
-    namespace = args.namespace
-    return_slug = args.return_slug
-
-    if not return_slug:
-        print("🚀 Creating Hugging Face Collection")
-        print(f"Title: {name}")
-        print(f"Description: {description}")
-        print(f"Namespace: {namespace}")
-        print(f"Private: {private}")
-
-    collection = create_collection(
-        title=name,
-        description=description,
-        private=private,
-        namespace=namespace,
-        return_slug=return_slug
-    )
-
-    if collection:
-        if return_slug:
-            print(collection.slug)
-        else:
-            print("\n🎉 Collection created successfully!")
-            print(f"Use this slug to add models: {collection.slug}")
-    else:
-        print("\n❌ Failed to create collection")
-        sys.exit(1)
-
-if __name__ == "__main__":
-    main()
--- a/examples/model-conversion/scripts/utils/hf-create-model.py
+++ b/examples/model-conversion/scripts/utils/hf-create-model.py
@ -1,63 +0,0 @@
-#!/usr/bin/env python3
-
-from huggingface_hub import HfApi
-import argparse
-
-# This script requires that the environment variable HF_TOKEN is set with your
-# Hugging Face API token.
-api = HfApi()
-
-def load_template_and_substitute(template_path, **kwargs):
-    try:
-        with open(template_path, 'r', encoding='utf-8') as f:
-            template_content = f.read()
-
-        return template_content.format(**kwargs)
-    except FileNotFoundError:
-        print(f"Template file '{template_path}' not found!")
-        return None
-    except KeyError as e:
-        print(f"Missing template variable: {e}")
-        return None
-
-parser = argparse.ArgumentParser(description='Create a new Hugging Face model repository')
-parser.add_argument('--model-name', '-m', help='Name for the model', required=True)
-parser.add_argument('--namespace', '-ns', help='Namespace to add the model to', required=True)
-parser.add_argument('--org-base-model', '-b', help='Original Base model name', default="")
-parser.add_argument('--no-card', action='store_true', help='Skip creating model card')
-parser.add_argument('--private', '-p', action='store_true', help='Create private model')
-
-args = parser.parse_args()
-
-repo_id = f"{args.namespace}/{args.model_name}-GGUF"
-print("Repository ID: ", repo_id)
-
-repo_url = api.create_repo(
-    repo_id=repo_id,
-    repo_type="model",
-    private=args.private,
-    exist_ok=False
-)
-
-if not args.no_card:
-    template_path = "scripts/readme.md.template"
-    model_card_content = load_template_and_substitute(
-        template_path,
-        model_name=args.model_name,
-        namespace=args.namespace,
-        base_model=args.org_base_model,
-    )
-
-    if model_card_content:
-        api.upload_file(
-            path_or_fileobj=model_card_content.encode('utf-8'),
-            path_in_repo="README.md",
-            repo_id=repo_id
-        )
-        print("Model card created successfully.")
-    else:
-        print("Failed to create model card.")
-
-print(f"Repository created: {repo_url}")
-
-
--- a/examples/model-conversion/scripts/utils/hf-upload-gguf-model.py
+++ b/examples/model-conversion/scripts/utils/hf-upload-gguf-model.py
@ -1,58 +0,0 @@
-#!/usr/bin/env python3
-
-from huggingface_hub import HfApi
-import argparse
-import os
-
-def upload_gguf_file(local_file_path, repo_id, filename_in_repo=None):
-    """
-    Upload a GGUF file to a Hugging Face model repository
-
-    Args:
-        local_file_path: Path to your local GGUF file
-        repo_id: Your repository ID (e.g., "username/model-name")
-        filename_in_repo: Optional custom name for the file in the repo
-    """
-
-    if not os.path.exists(local_file_path):
-        print(f"❌ File not found: {local_file_path}")
-        return False
-
-    if filename_in_repo is None:
-        filename_in_repo = os.path.basename(local_file_path)
-
-    if filename_in_repo is None or filename_in_repo == "":
-        filename_in_repo = os.path.basename(local_file_path)
-
-    print(f"📤 Uploading {local_file_path} to {repo_id}/{filename_in_repo}")
-
-    api = HfApi()
-
-    try:
-        api.upload_file(
-            path_or_fileobj=local_file_path,
-            path_in_repo=filename_in_repo,
-            repo_id=repo_id,
-            repo_type="model",
-            commit_message=f"Upload {filename_in_repo}"
-        )
-
-        print("✅ Upload successful!")
-        print(f"🔗 File available at: https://huggingface.co/{repo_id}/blob/main/{filename_in_repo}")
-        return True
-
-    except Exception as e:
-        print(f"❌ Upload failed: {e}")
-        return False
-
-# This script requires that the environment variable HF_TOKEN is set with your
-# Hugging Face API token.
-api = HfApi()
-
-parser = argparse.ArgumentParser(description='Upload a GGUF model to a Huggingface model repository')
-parser.add_argument('--gguf-model-path', '-m', help='The GGUF model file to upload', required=True)
-parser.add_argument('--repo-id', '-r', help='The repository to upload to', required=True)
-parser.add_argument('--name', '-o', help='The name in the model repository', required=False)
-args = parser.parse_args()
-
-upload_gguf_file(args.gguf_model_path, args.repo_id, args.name)
--- a/examples/model-conversion/scripts/utils/inspect-converted-model.sh
+++ b/examples/model-conversion/scripts/utils/inspect-converted-model.sh
@ -1,14 +0,0 @@
-#!/bin/bash
-
-# First try command line argument, then environment variable, then file
-CONVERTED_MODEL="${1:-"$CONVERTED_MODEL"}"
-
-# Final check if we have a model path
-if [ -z "$CONVERTED_MODEL" ]; then
-    echo "Error: Model path must be provided either as:" >&2
-    echo "  1. Command line argument" >&2
-    echo "  2. CONVERTED_MODEL environment variable" >&2
-    exit 1
-fi
-
-../../gguf-py/gguf/scripts/gguf_dump.py $CONVERTED_MODEL
--- a/examples/model-conversion/scripts/utils/inspect-org-model.py
+++ b/examples/model-conversion/scripts/utils/inspect-org-model.py
@ -1,67 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import os
-import json
-from safetensors import safe_open
-from collections import defaultdict
-
-parser = argparse.ArgumentParser(description='Process model with specified path')
-parser.add_argument('--model-path', '-m', help='Path to the model')
-args = parser.parse_args()
-
-model_path = os.environ.get('MODEL_PATH', args.model_path)
-if model_path is None:
-    parser.error("Model path must be specified either via --model-path argument or MODEL_PATH environment variable")
-
-# Check if there's an index file (multi-file model)
-index_path = os.path.join(model_path, "model.safetensors.index.json")
-single_file_path = os.path.join(model_path, "model.safetensors")
-
-if os.path.exists(index_path):
-    # Multi-file model
-    print("Multi-file model detected")
-
-    with open(index_path, 'r') as f:
-        index_data = json.load(f)
-
-    # Get the weight map (tensor_name -> file_name)
-    weight_map = index_data.get("weight_map", {})
-
-    # Group tensors by file for efficient processing
-    file_tensors = defaultdict(list)
-    for tensor_name, file_name in weight_map.items():
-        file_tensors[file_name].append(tensor_name)
-
-    print("Tensors in model:")
-
-    # Process each shard file
-    for file_name, tensor_names in file_tensors.items():
-        file_path = os.path.join(model_path, file_name)
-        print(f"\n--- From {file_name} ---")
-
-        with safe_open(file_path, framework="pt") as f:
-            for tensor_name in sorted(tensor_names):
-                tensor = f.get_tensor(tensor_name)
-                print(f"- {tensor_name} : shape = {tensor.shape}, dtype = {tensor.dtype}")
-
-elif os.path.exists(single_file_path):
-    # Single file model (original behavior)
-    print("Single-file model detected")
-
-    with safe_open(single_file_path, framework="pt") as f:
-        keys = f.keys()
-        print("Tensors in model:")
-        for key in sorted(keys):
-            tensor = f.get_tensor(key)
-            print(f"- {key} : shape = {tensor.shape}, dtype = {tensor.dtype}")
-
-else:
-    print(f"Error: Neither 'model.safetensors.index.json' nor 'model.safetensors' found in {model_path}")
-    print("Available files:")
-    if os.path.exists(model_path):
-        for item in sorted(os.listdir(model_path)):
-            print(f"  {item}")
-    else:
-        print(f"  Directory {model_path} does not exist")
-    exit(1)
--- a/examples/model-conversion/scripts/utils/perplexity-gen.sh
+++ b/examples/model-conversion/scripts/utils/perplexity-gen.sh
@ -1,35 +0,0 @@
-#!/bin/bash
-
-set -e
-
-CONVERTED_MODEL="${1:-"$CONVERTED_MODEL"}"
-
-# Final check if we have a model path
-if [ -z "$CONVERTED_MODEL" ]; then
-    echo "Error: Model path must be provided either as:" >&2
-    echo "  1. Command line argument" >&2
-    echo "  2. CONVERTED_MODEL environment variable" >&2
-    exit 1
-fi
-
-# Check if data/wikitext-2-raw directory exists
-if [ ! -d "ppl/wikitext-2-raw" ]; then
-    echo "ppl/wikitext-2-raw directory does not exist. Downloading..." >&2
-    mkdir -p ppl
-    pushd ppl
-    ./../../../scripts/get-wikitext-2.sh
-    popd
-fi
-
-mkdir -p ppl
-OUTPUTFILE="ppl/$(basename $CONVERTED_MODEL).kld"
-echo "Model: $CONVERTED_MODEL"
-
-cmake --build ../../build --target llama-perplexity -j8
-
-../.././build/bin/llama-perplexity -m $CONVERTED_MODEL \
-    -f ppl/wikitext-2-raw/wiki.test.raw \
-    --kl-divergence-base $OUTPUTFILE
-
-echo "Generated logits in $OUTPUTFILE"
-
--- a/examples/model-conversion/scripts/utils/perplexity-run-simple.sh
+++ b/examples/model-conversion/scripts/utils/perplexity-run-simple.sh
@ -1,27 +0,0 @@
-#!/bin/bash
-
-set -e
-
-QUANTIZED_MODEL="${1:-"$QUANTIZED_MODEL"}"
-
-if [ -z "$QUANTIZED_MODEL" ]; then
-    echo "Error: Model path must be provided either as:" >&2
-    echo "  1. Command line argument" >&2
-    echo "  2. QUANTIZED_MODEL environment variable" >&2
-    exit 1
-fi
-
-# Check if data/wikitext-2-raw directory exists
-if [ ! -d "ppl/wikitext-2-raw" ]; then
-    echo "ppl/wikitext-2-raw directory does not exist. Downloading..." >&2
-    mkdir -p ppl
-    pushd ppl
-    ./../../../scripts/get-wikitext-2.sh
-    popd
-fi
-
-cmake --build ../../build --target llama-perplexity -j8
-
-../.././build/bin/llama-perplexity -m $QUANTIZED_MODEL -f ppl/wikitext-2-raw/wiki.test.raw
-
-
--- a/examples/model-conversion/scripts/utils/perplexity-run.sh
+++ b/examples/model-conversion/scripts/utils/perplexity-run.sh
@ -1,28 +0,0 @@
-#!/bin/bash
-
-set -e
-
-QUANTIZED_MODEL="${1:-"$QUANTIZED_MODEL"}"
-LOGITS_FILE="${1:-"$LOGITS_FILE"}"
-
-if [ -z "$QUANTIZED_MODEL" ]; then
-    echo "Error: Model path must be provided either as:" >&2
-    echo "  1. Command line argument" >&2
-    echo "  2. QUANTIZED_MODEL environment variable" >&2
-    exit 1
-fi
-
-if [ ! -f ${LOGITS_FILE} ]; then
-    echo "Error: logits file '${LOGITS_FILE} was not found"
-    echo "Did you run the perplexity-gen.sh script?"
-    exit 1
-fi
-
-echo "Model: $QUANTIZED_MODEL"
-echo "Data file: $LOGITS_FILE"
-
-cmake --build ../../build --target llama-perplexity -j8
-
-../.././build/bin/llama-perplexity -m $QUANTIZED_MODEL \
-    --kl-divergence-base $LOGITS_FILE \
-    --kl-divergence
--- a/examples/model-conversion/scripts/utils/quantize.sh
+++ b/examples/model-conversion/scripts/utils/quantize.sh
@ -1,34 +0,0 @@
-#!/bin/bash
-
-set -e
-
-CONVERTED_MODEL="${1:-"$CONVERTED_MODEL"}"
-QUANTIZED_TYPE="${2:-"$QUANTIZED_TYPE"}"
-QUANTIZED_MODEL=$CONVERTED_MODEL
-
-# Final check if we have a model path
-if [ -z "$CONVERTED_MODEL" ]; then
-    echo "Error: Model path must be provided either as:" >&2
-    echo "  1. Command line argument" >&2
-    echo "  2. CONVERTED_MODEL environment variable" >&2
-    exit 1
-fi
-
-echo $CONVERTED_MODEL
-
-# Process the quantized model filename
-if [[ "$QUANTIZED_MODEL" == *.gguf ]]; then
-    # Remove .gguf suffix, add quantized type, then add .gguf back
-    BASE_NAME="${QUANTIZED_MODEL%.gguf}"
-    QUANTIZED_MODEL="${BASE_NAME}-${QUANTIZED_TYPE}.gguf"
-else
-    echo "Error: QUANTIZED_MODEL must end with .gguf extension" >&2
-    exit 1
-fi
-
-
-cmake --build ../../build --target llama-quantize -j8
-
-../../build/bin/llama-quantize $CONVERTED_MODEL $QUANTIZED_MODEL $QUANTIZED_TYPE
-
-echo "Quantized model saved to: $QUANTIZED_MODEL"
--- a/examples/model-conversion/scripts/utils/run-embedding-server.sh
+++ b/examples/model-conversion/scripts/utils/run-embedding-server.sh
@ -1,22 +0,0 @@
-#!/bin/bash
-
-set -e
-#
-# First try command line argument, then environment variable, then file
-CONVERTED_MODEL="${1:-"$CONVERTED_MODEL"}"
-
-# Final check if we have a model path
-if [ -z "$CONVERTED_MODEL" ]; then
-    echo "Error: Model path must be provided either as:" >&2
-    echo "  1. Command line argument" >&2
-    echo "  2. CONVERTED_MODEL environment variable" >&2
-    exit 1
-fi
-
-echo $CONVERTED_MODEL
-
-cmake --build ../../build --target llama-server
-
-../../build/bin/llama-server -m $CONVERTED_MODEL \
-    --embedding \
-    --pooling none
--- a/examples/model-conversion/scripts/utils/semantic_check.py
+++ b/examples/model-conversion/scripts/utils/semantic_check.py
@ -1,179 +0,0 @@
-#!/usr/bin/env python3
-
-import numpy as np
-import argparse
-import os
-import importlib
-
-from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, AutoModel
-
-unreleased_model_name = os.getenv('UNRELEASED_MODEL_NAME')
-
-def cosine_similarity(a, b=None):
-    a = np.asarray(a)
-    if b is None:
-        b = a
-    else:
-        b = np.asarray(b)
-
-    if a.ndim == 1:
-        a = a.reshape(1, -1)
-    if b.ndim == 1:
-        b = b.reshape(1, -1)
-
-    a_norms = np.linalg.norm(a, axis=1, keepdims=True)
-    b_norms = np.linalg.norm(b, axis=1, keepdims=True)
-
-    a_norms = np.where(a_norms == 0, 1e-8, a_norms)
-    b_norms = np.where(b_norms == 0, 1e-8, b_norms)
-
-    a_normalized = a / a_norms
-    b_normalized = b / b_norms
-
-    # Compute cosine similarity
-    return np.dot(a_normalized, b_normalized.T)
-
-def load_embeddings_from_file(filename, n_tokens, n_embd):
-    embeddings = np.fromfile(filename, dtype=np.float32)
-    return embeddings.reshape(n_tokens, n_embd)
-
-def test_single_prompt_similarity(python_emb, cpp_emb, tokens, prompt):
-    np.set_printoptions(suppress=True, precision=6)
-    print("pytorch embeddings:");
-    print(python_emb)
-    print("llama.cpp embeddings:");
-    print(cpp_emb)
-    print(f"\n=== Prompt: '{prompt}' ===")
-    print(f"Tokens: {tokens}")
-    print(f"Embeddings shape: Python {python_emb.shape}, llama.cpp {cpp_emb.shape}")
-
-    n_tokens = len(tokens)
-
-    # 1. Direct embedding comparison
-    print(f"\n1. Raw Embedding Magnitude Comparison:")
-    # Check if the distance of each token embedding from the origin and compare
-    # if the vectors are on the same "sphere". This does not tell us about
-    # direction (meaning of the token embedding), just magnitude.
-    for i in range(n_tokens):
-        py_mag = np.linalg.norm(python_emb[i]) # calculate standard euclidean norm for Python embeddings
-        cpp_mag = np.linalg.norm(cpp_emb[i])   # calculate standard euclidean norm for llama.cpp embeddings
-        ratio = py_mag / cpp_mag if cpp_mag > 0 else float('inf')
-        print(f"   Token {i} ({tokens[i]}): Python={py_mag:.3f}, llama.cpp={cpp_mag:.3f}, ratio={ratio:.3f}")
-
-    # 2. Cosine similarity between tokens within each model
-    # Here we check the direction of token embeddings to see if the have the
-    # same meaning (similarity). This is done by calculating cosine similarity
-    # of a pair of token embeddings within each model.
-    print(f"\n2. Within-Model Token Similarities:")
-    print("   Python model:")
-    for i in range(n_tokens):
-        for j in range(i+1, n_tokens):
-            sim = cosine_similarity([python_emb[i]], [python_emb[j]])[0][0]
-            print(f"     {tokens[i]} ↔ {tokens[j]}: {sim:.4f}")
-
-    print("   llama.cpp model:")
-    for i in range(n_tokens):
-        for j in range(i+1, n_tokens):
-            sim = cosine_similarity([cpp_emb[i]], [cpp_emb[j]])[0][0]
-            print(f"     {tokens[i]} ↔ {tokens[j]}: {sim:.4f}")
-
-    # 3. Cross-model similarity (same token position)
-    print(f"\n3. Cross-Model Same-Token Similarities:")
-    for i in range(n_tokens):
-        sim = cosine_similarity([python_emb[i]], [cpp_emb[i]])[0][0]
-        print(f"   Token {i} ({tokens[i]}): {sim:.4f}")
-
-    # 4. Similarity matrix comparison
-    print(f"\n4. Similarity Matrix Differences:")
-    py_sim_matrix = cosine_similarity(python_emb)
-    cpp_sim_matrix = cosine_similarity(cpp_emb)
-    diff_matrix = np.abs(py_sim_matrix - cpp_sim_matrix)
-
-    print(f"   Max difference: {np.max(diff_matrix):.4f}")
-    print(f"   Mean difference: {np.mean(diff_matrix):.4f}")
-    print(f"   RMS difference: {np.sqrt(np.mean(diff_matrix**2)):.4f}")
-
-    return {
-        'cross_model_similarities': [cosine_similarity([python_emb[i]], [cpp_emb[i]])[0][0] for i in range(n_tokens)],
-        'similarity_matrix_diff': diff_matrix,
-        'max_diff': np.max(diff_matrix),
-        'mean_diff': np.mean(diff_matrix),
-        'rms_diff': np.sqrt(np.mean(diff_matrix**2))
-    }
-
-def main():
-    parser = argparse.ArgumentParser(description='Test semantic similarity between Python and llama.cpp embeddings')
-    parser.add_argument('--model-path', '-m', required=True, help='Path to the original Python model')
-    parser.add_argument('--python-embeddings', '-pe', help='Path to pytorch embeddings "logits" binary file')
-    parser.add_argument('--cpp-embeddings', '-ce', help='Path to llama.cpp embeddings "logits" binary file')
-    parser.add_argument('--causal', '-c', default=False, help='if the model is causal (default: false)', action='store_true')
-    parser.add_argument('--prompt', '-p', default='Hello world today', help='Test prompt')
-
-    args = parser.parse_args()
-
-    print("Semantic Similarity Test Between Python and llama.cpp Embedding Models")
-    print("=" * 70)
-
-    # Single prompt detailed comparison
-    print(f"\nTesting with prompt: '{args.prompt}'")
-
-    # Load the python model to get configuration information and also to load the tokenizer.
-    print("Loading model and tokenizer using AutoTokenizer:", args.model_path)
-    tokenizer = AutoTokenizer.from_pretrained(args.model_path)
-    config = AutoConfig.from_pretrained(args.model_path)
-
-    if unreleased_model_name:
-        model_name_lower = unreleased_model_name.lower()
-        unreleased_module_path = f"transformers.models.{model_name_lower}.modular_{model_name_lower}"
-        if args.causal:
-            class_name = f"{unreleased_model_name}ForCausalLM"
-        else:
-            class_name = f"{unreleased_model_name}Model"
-        print(f"Model class: {class_name}")
-        print(f"Importing unreleased model module: {unreleased_module_path}")
-
-        try:
-            model_class = getattr(importlib.import_module(unreleased_module_path), class_name)
-            model = model_class.from_pretrained(args.model_path)
-        except (ImportError, AttributeError) as e:
-            print(f"Failed to import or load model: {e}")
-            exit(1)
-    else:
-        if args.causal:
-            model = AutoModelForCausalLM.from_pretrained(args.model_path)
-        else:
-            model = AutoModel.from_pretrained(args.model_path)
-
-    encoded = tokenizer(args.prompt, return_tensors="pt")
-    tokens = tokenizer.convert_ids_to_tokens(encoded['input_ids'][0])
-    n_tokens = len(tokens)
-    print(f"n_tokens: {n_tokens}");
-    print(f"hidden_size: {model.config.hidden_size}")
-
-    # Load binary embeddings from data directory.
-    llamacpp_embeddings = load_embeddings_from_file(args.cpp_embeddings, n_tokens, model.config.hidden_size)
-    python_embeddings = load_embeddings_from_file(args.python_embeddings, n_tokens, model.config.hidden_size)
-
-    # Run comparison
-    results = test_single_prompt_similarity(python_embeddings, llamacpp_embeddings, tokens, args.prompt)
-
-    # Summary
-    print(f"\n=== SUMMARY ===")
-    avg_cross_sim = np.mean(results['cross_model_similarities'])
-    print(f"Average cross-model similarity: {avg_cross_sim:.4f}")
-    print(f"Similarity matrix RMS difference: {results['rms_diff']:.4f}")
-
-    # Quality assessment
-    if avg_cross_sim > 0.95:
-        print("✅ EXCELLENT: Models are highly similar")
-    elif avg_cross_sim > 0.90:
-        print("✅ VERY GOOD: Models are very similar")
-    elif avg_cross_sim > 0.80:
-        print("⚠️  GOOD: Models are reasonably similar")
-    elif avg_cross_sim > 0.70:
-        print("⚠️  FAIR: Models have some differences")
-    else:
-        print("❌ POOR: Models are significantly different")
-
-if __name__ == "__main__":
-    main()