From b7b3e0d2a734cbbd83a1e1c6b8b5f3b838956c03 Mon Sep 17 00:00:00 2001 From: kallewoof Date: Fri, 25 Jul 2025 23:14:18 +0900 Subject: [PATCH] add adapter tests for autoguess (#1654) --- .github/workflows/test-autoguess.yaml | 2 +- tests/test_autoguess.py | 86 +++++++++++++++++++++++---- 2 files changed, 77 insertions(+), 11 deletions(-) diff --git a/.github/workflows/test-autoguess.yaml b/.github/workflows/test-autoguess.yaml index 84e339cf1..0777fa1ce 100644 --- a/.github/workflows/test-autoguess.yaml +++ b/.github/workflows/test-autoguess.yaml @@ -23,7 +23,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install requests + pip install requests transformers jinja2 tiktoken protobuf blobfile sentencepiece git clone https://github.com/kallewoof/gated-tokenizers.git tests/gated-tokenizers - name: Run AutoGuess tests diff --git a/tests/test_autoguess.py b/tests/test_autoguess.py index a956d2e13..1ea9ed70b 100644 --- a/tests/test_autoguess.py +++ b/tests/test_autoguess.py @@ -4,8 +4,10 @@ Also checks that every template is being tested so that when new AutoGuess addit """ import os import sys +import jinja2 import requests import json +from transformers import AutoTokenizer # Map an AutoGuess name to a HuggingFace model ID @@ -15,10 +17,10 @@ AUTOGUESS_MAPPING = { "ChatML (Qwen 2.5 based)": "Qwen/Qwen2.5-0.5B-Instruct", "ChatML (Kimi)": "moonshotai/Kimi-K2-Instruct", "Google Gemma 2": "Efficient-Large-Model/gemma-2-2b-it", - "Google Gemma 3": "scb10x/typhoon2.1-gemma3-12b", + "Google Gemma 3": "google/gemma-3-4b-it", "Google Gemma 3n": "lmstudio-community/gemma-3n-E4B-it-MLX-bf16", "Llama 3.x": "Steelskull/L3.3-Shakudo-70b", - "Llama 4": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "Llama 4": "nvidia/Llama-4-Scout-17B-16E-Instruct-FP8", "Mistral V7 (with system prompt)": "Doctor-Shotgun/MS3.2-24B-Magnum-Diamond", "Mistral V3": "mistralai/Mistral-7B-Instruct-v0.3", "GLM-4": "THUDM/glm-4-9b-chat-hf", @@ -33,6 +35,11 @@ AUTOGUESS_MAPPING = { "ChatML (Generic)": "NewEden/Gemma-27B-chatml", } +AUTOGUESS_SKIP_ADAPTER_TESTS = { + "Mistral V3": {"system"}, # Poor system support + "Mistral (Generic)": {"system"}, # Poor system support +} + # User may be running this test from ./ or from ../ -- we want to be in ./ (i.e. tests) if os.path.exists("tests"): os.chdir("tests") @@ -46,6 +53,11 @@ def get_tokenizer_config_for_huggingface_model_id(huggingface_model_id: str): with open(fname) as f: return json.load(f) + fname = f"gated-tokenizers/tokenizer_configs/{huggingface_model_id.replace('/','_')}/tokenizer_config.json" + if os.path.exists(fname): + with open(fname) as f: + return json.load(f) + for filename in ["tokenizer_config.json", "chat_template.json"]: url = f"https://huggingface.co/{huggingface_model_id}/resolve/main/{filename}" response = requests.get(url) @@ -55,7 +67,13 @@ def get_tokenizer_config_for_huggingface_model_id(huggingface_model_id: str): return v raise ValueError(f"Failed to fetch tokenizer config for {huggingface_model_id}.") -def match_chat_template_to_adapter(chat_template: str|list) -> tuple[str, str|None]|None: +def get_tokenizer_for_huggingface_model_id(huggingface_model_id: str): + dname = f"gated-tokenizers/tokenizer_configs/{huggingface_model_id.replace('/','_')}" + if os.path.exists(dname): + return AutoTokenizer.from_pretrained(dname, trust_remote_code=True) + return AutoTokenizer.from_pretrained(huggingface_model_id, trust_remote_code=True) + +def match_chat_template_to_adapter(chat_template: str|list) -> tuple[dict, str|None]|None: # Additional code in tester not present in application: support for multiple chat templates, and use default if present sub_template: str|None = None if isinstance(chat_template, list): @@ -74,7 +92,48 @@ def match_chat_template_to_adapter(chat_template: str|list) -> tuple[str, str|No if chat_template != "": for entry in autoguess: if all(s in chat_template for s in entry['search']): - return entry['name'], sub_template + return entry, sub_template + +def test_tokenizer_with_adapter(tokenizer, adapter: dict[str, str], skip: set) -> tuple[bool, str|None]: + """ + See if the adapter correctly reflects the tokenizer chat template. + """ + def adapter_wrap(role, content): + return adapter[f"{role}_start"] + content + adapter[f"{role}_end"] + def system(content): return adapter_wrap("system", content) + def user(content): return adapter_wrap("user", content) + def assistant(content): return adapter_wrap("assistant", content) + def templ(rolelist): + return tokenizer.apply_chat_template(rolelist, tokenize=False) + + try: + # We skip system checks if user and system are identical, or if in skip + if "system" not in skip and user("x") != system("x"): + # Test system + expect = system("SyS-tEm") + templated = templ([{"role": "system", "content": "SyS-tEm"}, {"role": "user", "content": "user"}]) + if expect not in templated: + return False, f"system role missing expected fragment {expect.replace("\n", "\\n")}: {templated.replace("\n", "\\n")}" + + # Test user/asst/usernvidia/Llama-4-Scout-17B-16E-Instruct-FP8 + expect = [ + user("user_1"), + assistant("asst_1"), + user("user_2") + ] + templated = templ([ + {"role":"user", "content": "user_1"}, + {"role":"assistant", "content": "asst_1"}, + {"role":"user", "content": "user_2"}, + ]) + rem = templated + for sub in expect: + if sub not in rem: + return False, f"missing expected fragment {sub.replace("\n", "\\n")}: {rem.replace("\n", "\\n")}" + rem = rem.split(sub, 1)[1] + except jinja2.exceptions.TemplateError as e: + return False, f"template error: {e}" + return True, None failures = 0 seen = set() @@ -87,14 +146,21 @@ for name, huggingface_model_id in AUTOGUESS_MAPPING.items(): continue tokenizer_config = get_tokenizer_config_for_huggingface_model_id(huggingface_model_id) assert 'chat_template' in tokenizer_config - matched = match_chat_template_to_adapter(tokenizer_config['chat_template']) - if matched is None: - matched, sub_template = "MISSING MAPPING", None + match = match_chat_template_to_adapter(tokenizer_config['chat_template']) + if match is None: + matched, sub_template, adapter = "MISSING", None, None else: - matched, sub_template = matched + match, sub_template = match + matched = match['name'] + adapter = match['adapter'] sub_template = f"[{sub_template}]" if sub_template else "" - print(namefmt.format(name=name) + " = " + namefmt.format(name=matched) + " : " + ("OK " if name == matched else "FAILURE") + " " + hmifmt.format(huggingface_model_id=huggingface_model_id) + " " + sub_template) - failures += name != matched + adaptercheck, reason = False, '?' + if name == matched: + assert adapter + tokenizer = get_tokenizer_for_huggingface_model_id(huggingface_model_id) + adaptercheck, reason = test_tokenizer_with_adapter(tokenizer, adapter, AUTOGUESS_SKIP_ADAPTER_TESTS.get(name, set())) + print(namefmt.format(name=name) + " = " + namefmt.format(name=matched) + " : " + ("OK " if adaptercheck and name == matched else reason if not adaptercheck else "FAILURE") + " " + hmifmt.format(huggingface_model_id=huggingface_model_id) + " " + sub_template) + failures += name != matched or not adaptercheck for entry in autoguess: if entry['name'] not in seen: