kvcache-ai-ktransformers/ktransformers/tests/humaneval/eval_api.py

# adapt from https://github.com/abacaj/code-eval?tab=readme-ov-file
import argparse
import os
import requests
from human_eval.data import write_jsonl, read_problems
import tqdm

from evaluation import filter_code, fix_indents
from prompts import instruct_prompt

def generate_text(api_url,question , model_name, stream=False, auth_token=None):
    headers = {
        'accept': 'application/json',
        'Content-Type': 'application/json',
        # 添加 API Key
        'Authorization' : 'Bearer ' + auth_token if auth_token else ''
    }
    question = instruct_prompt(question)
    data = {
        "messages": [{"content": question, "role": "user"}],
        "model": model_name,
        "stream": stream,
        "temperature": 0.6
    }
    print(f"content: {question}")
    response = requests.post(api_url, headers=headers, json=data,verify=False)
    if response.status_code == 200:
        result = response.json()
        results = result.get('choices', [{}])[0].get('message', {}).get('content', '')
        return [filter_code(fix_indents(results))]
    else:
        print(f"API Request failed with status code {response.status_code}")
        return None

def run_eval_api(
    api_url: str,
    model_name: str,
    out_path: str,
    format_tabs: bool = False,
    auth_token: str = None,
    problem_file: str = None,
    append: bool = False,
    skip: int = 0
):
    if(problem_file is None):
        problems = read_problems()
    else:
        problems = read_problems(problem_file)
    samples = []
    pbar = tqdm.tqdm(total=len(problems) * 1)
    pbar.update(skip)
    try:
        for task_id in problems:
            # skip some tasks
            if skip > 0:
                skip -= 1
                continue

            if format_tabs:
                prompt = problems[task_id]["prompt"].replace("    ", "\t")
            else:
                prompt = problems[task_id]["prompt"]
            completion = generate_text(api_url, prompt, model_name, auth_token=auth_token)
            # samples.append({"task_id": task_id, "completion": completion})
            for sample in completion:
                result = dict(
                    task_id=task_id,
                    completion=sample,
                )
                samples += [result]
                if append:
                    write_jsonl(out_path, [result],append=append)
            pbar.update(1)
        if not append:
            write_jsonl(out_path, samples,append=append)
    except Exception as e:
        if not append:
            write_jsonl(out_path, samples,append=append)
        print(f"Error: {e}")

def main(output_path, api_url, model_name, auth_token, format_tabs,problem_file, append,skip):
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    run_eval_api(api_url, model_name, output_path, format_tabs, auth_token, problem_file,append,skip)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="API Generate Tester")
    parser.add_argument("--api_url", type=str, default="https://api.siliconflow.cn/v1/chat/completions", help="API URL")
    parser.add_argument("--model_name", type=str, default="Pro/deepseek-ai/DeepSeek-V3", help="Model Name")
    parser.add_argument("--out_path", type=str, default="results/api/eval_b.jsonl", help="Output Path")
    parser.add_argument("--auth_token", type=str, default=None, help="Auth Token")
    parser.add_argument("--format_tabs", action="store_true", help="Format Tabs")
    parser.add_argument("--problem_file", type=str, default=None, help="Evalset File")
    parser.add_argument("--no_append", action="store_false", help="Append to existing file")
    parser.add_argument("--skip", type=int, default=0, help="Skip first n problems")
    args = parser.parse_args()
    # api_url = "https://api.siliconflow.cn/v1/chat/completions"
    main(args.out_path, args.api_url, args.model_name, args.auth_token, args.format_tabs, args.problem_file, args.no_append,args.skip)