⚡ add humaneval support

2025-09-10 06:14:58 +00:00 · 2025-03-04 20:54:49 +08:00 · 2025-03-04 20:54:49 +08:00 · dc10480ef6
commit dc10480ef6
parent 216a63b858
7 changed files with 130 additions and 4 deletions
--- a/ktransformers/tests/humaneval/eval_api.py
+++ b/ktransformers/tests/humaneval/eval_api.py
@ -0,0 +1,89 @@
+# adapt from https://github.com/abacaj/code-eval?tab=readme-ov-file
+import argparse
+import os
+import requests
+from human_eval.data import write_jsonl, read_problems
+import tqdm
+
+from evaluation import filter_code, fix_indents
+from prompts import instruct_prompt
+
+def generate_text(api_url,question , model_name, stream=False, auth_token=None):
+    headers = {
+        'accept': 'application/json',
+        'Content-Type': 'application/json',
+        # 添加 API Key
+        'Authorization' : 'Bearer ' + auth_token
+    }
+    question = instruct_prompt(question)
+    data = {
+        "messages": [{"content": question, "role": "user"}],
+        "model": model_name,
+        "stream": stream,
+        "temperature": 0.6
+    }
+    print(f"content: {question}")
+    response = requests.post(api_url, headers=headers, json=data,verify=False)
+    if response.status_code == 200:
+        result = response.json()
+        results = result.get('choices', [{}])[0].get('message', {}).get('content', '')
+        return [filter_code(fix_indents(results))]
+    else:
+        print(f"API Request failed with status code {response.status_code}")
+        return None
+
+def run_eval_api(
+    api_url: str,
+    model_name: str,
+    out_path: str,
+    format_tabs: bool = False,
+    auth_token: str = None,
+    problem_file: str = None,
+    append: bool = False
+):
+    if(problem_file is None):
+        problems = read_problems()
+    else:
+        problems = read_problems(problem_file)
+    samples = []
+    pbar = tqdm.tqdm(total=len(problems) * 1)
+    try:
+        for task_id in problems:
+            if format_tabs:
+                prompt = problems[task_id]["prompt"].replace("    ", "\t")
+            else:
+                prompt = problems[task_id]["prompt"]
+            completion = generate_text(api_url, prompt, model_name, auth_token=auth_token)
+            # samples.append({"task_id": task_id, "completion": completion})
+            for sample in completion:
+                result = dict(
+                    task_id=task_id,
+                    completion=sample,
+                )
+                samples += [result]
+                if append:
+                    write_jsonl(out_path, [result],append=append)
+            pbar.update(1)
+        if not append:
+            write_jsonl(out_path, samples,append=append)
+    except Exception as e:
+        write_jsonl(out_path, samples,append=append)
+        print(f"Error: {e}")
+
+def main(output_path, api_url, model_name, auth_token, format_tabs,problem_file, append):
+    os.makedirs(os.path.dirname(output_path), exist_ok=True)
+    run_eval_api(api_url, model_name, output_path, format_tabs, auth_token, problem_file,append)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="API Generate Tester")
+    parser.add_argument("--api_url", type=str, default="https://api.siliconflow.cn/v1/chat/completions", help="API URL")
+    parser.add_argument("--model_name", type=str, default="Pro/deepseek-ai/DeepSeek-V3", help="Model Name")
+    parser.add_argument("--out_path", type=str, default="results/api/eval.jsonl", help="Output Path")
+    parser.add_argument("--auth_token", type=str, default=None, help="Auth Token")
+    parser.add_argument("--format_tabs", action="store_true", help="Format Tabs")
+    parser.add_argument("--problem_file", type=str, default=None, help="Evalset File")
+    parser.add_argument("--no_append", action="store_false", help="Append to existing file")
+    args = parser.parse_args()
+    # api_url = "https://api.siliconflow.cn/v1/chat/completions"
+    main(args.out_path, args.api_url, args.model_name, args.auth_token, args.format_tabs, args.problem_file, args.no_append)