kvcache-ai-ktransformers/ktransformers/tests/humaneval/eval_api.py
2025-03-06 12:18:04 +08:00

98 lines
No EOL
3.9 KiB
Python

# adapt from https://github.com/abacaj/code-eval?tab=readme-ov-file
import argparse
import os
import requests
from human_eval.data import write_jsonl, read_problems
import tqdm
from evaluation import filter_code, fix_indents
from prompts import instruct_prompt
def generate_text(api_url,question , model_name, stream=False, auth_token=None):
headers = {
'accept': 'application/json',
'Content-Type': 'application/json',
# 添加 API Key
'Authorization' : 'Bearer ' + auth_token if auth_token else ''
}
question = instruct_prompt(question)
data = {
"messages": [{"content": question, "role": "user"}],
"model": model_name,
"stream": stream,
"temperature": 0.6
}
print(f"content: {question}")
response = requests.post(api_url, headers=headers, json=data,verify=False)
if response.status_code == 200:
result = response.json()
results = result.get('choices', [{}])[0].get('message', {}).get('content', '')
return [filter_code(fix_indents(results))]
else:
print(f"API Request failed with status code {response.status_code}")
return None
def run_eval_api(
api_url: str,
model_name: str,
out_path: str,
format_tabs: bool = False,
auth_token: str = None,
problem_file: str = None,
append: bool = False,
skip: int = 0
):
if(problem_file is None):
problems = read_problems()
else:
problems = read_problems(problem_file)
samples = []
pbar = tqdm.tqdm(total=len(problems) * 1)
pbar.update(skip)
try:
for task_id in problems:
# skip some tasks
if skip > 0:
skip -= 1
continue
if format_tabs:
prompt = problems[task_id]["prompt"].replace(" ", "\t")
else:
prompt = problems[task_id]["prompt"]
completion = generate_text(api_url, prompt, model_name, auth_token=auth_token)
# samples.append({"task_id": task_id, "completion": completion})
for sample in completion:
result = dict(
task_id=task_id,
completion=sample,
)
samples += [result]
if append:
write_jsonl(out_path, [result],append=append)
pbar.update(1)
if not append:
write_jsonl(out_path, samples,append=append)
except Exception as e:
if not append:
write_jsonl(out_path, samples,append=append)
print(f"Error: {e}")
def main(output_path, api_url, model_name, auth_token, format_tabs,problem_file, append,skip):
os.makedirs(os.path.dirname(output_path), exist_ok=True)
run_eval_api(api_url, model_name, output_path, format_tabs, auth_token, problem_file,append,skip)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="API Generate Tester")
parser.add_argument("--api_url", type=str, default="https://api.siliconflow.cn/v1/chat/completions", help="API URL")
parser.add_argument("--model_name", type=str, default="Pro/deepseek-ai/DeepSeek-V3", help="Model Name")
parser.add_argument("--out_path", type=str, default="results/api/eval_b.jsonl", help="Output Path")
parser.add_argument("--auth_token", type=str, default=None, help="Auth Token")
parser.add_argument("--format_tabs", action="store_true", help="Format Tabs")
parser.add_argument("--problem_file", type=str, default=None, help="Evalset File")
parser.add_argument("--no_append", action="store_false", help="Append to existing file")
parser.add_argument("--skip", type=int, default=0, help="Skip first n problems")
args = parser.parse_args()
# api_url = "https://api.siliconflow.cn/v1/chat/completions"
main(args.out_path, args.api_url, args.model_name, args.auth_token, args.format_tabs, args.problem_file, args.no_append,args.skip)