mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-05 20:19:51 +00:00
⚡ add humaneval support
This commit is contained in:
parent
216a63b858
commit
dc10480ef6
7 changed files with 130 additions and 4 deletions
7
Makefile
7
Makefile
|
@ -19,6 +19,13 @@ dev_install:
|
|||
echo "Installing ktransformers"
|
||||
KTRANSFORMERS_FORCE_BUILD=TRUE pip install -e . -v --no-build-isolation
|
||||
echo "Installation completed successfully"
|
||||
clean:
|
||||
rm -rf build
|
||||
rm -rf *.egg-info
|
||||
rm -rf ktransformers/ktransformers_ext/build
|
||||
rm -rf ktransformers/ktransformers_ext/cuda/build
|
||||
rm -rf ktransformers/ktransformers_ext/cuda/dist
|
||||
rm -rf ktransformers/ktransformers_ext/cuda/*.egg-info
|
||||
install_numa:
|
||||
USE_NUMA=1 make dev_install
|
||||
install_no_numa:
|
||||
|
|
1
ktransformers/tests/.gitignore
vendored
Normal file
1
ktransformers/tests/.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
humaneval/results
|
89
ktransformers/tests/humaneval/eval_api.py
Normal file
89
ktransformers/tests/humaneval/eval_api.py
Normal file
|
@ -0,0 +1,89 @@
|
|||
# adapt from https://github.com/abacaj/code-eval?tab=readme-ov-file
|
||||
import argparse
|
||||
import os
|
||||
import requests
|
||||
from human_eval.data import write_jsonl, read_problems
|
||||
import tqdm
|
||||
|
||||
from evaluation import filter_code, fix_indents
|
||||
from prompts import instruct_prompt
|
||||
|
||||
def generate_text(api_url,question , model_name, stream=False, auth_token=None):
|
||||
headers = {
|
||||
'accept': 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
# 添加 API Key
|
||||
'Authorization' : 'Bearer ' + auth_token
|
||||
}
|
||||
question = instruct_prompt(question)
|
||||
data = {
|
||||
"messages": [{"content": question, "role": "user"}],
|
||||
"model": model_name,
|
||||
"stream": stream,
|
||||
"temperature": 0.6
|
||||
}
|
||||
print(f"content: {question}")
|
||||
response = requests.post(api_url, headers=headers, json=data,verify=False)
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
results = result.get('choices', [{}])[0].get('message', {}).get('content', '')
|
||||
return [filter_code(fix_indents(results))]
|
||||
else:
|
||||
print(f"API Request failed with status code {response.status_code}")
|
||||
return None
|
||||
|
||||
def run_eval_api(
|
||||
api_url: str,
|
||||
model_name: str,
|
||||
out_path: str,
|
||||
format_tabs: bool = False,
|
||||
auth_token: str = None,
|
||||
problem_file: str = None,
|
||||
append: bool = False
|
||||
):
|
||||
if(problem_file is None):
|
||||
problems = read_problems()
|
||||
else:
|
||||
problems = read_problems(problem_file)
|
||||
samples = []
|
||||
pbar = tqdm.tqdm(total=len(problems) * 1)
|
||||
try:
|
||||
for task_id in problems:
|
||||
if format_tabs:
|
||||
prompt = problems[task_id]["prompt"].replace(" ", "\t")
|
||||
else:
|
||||
prompt = problems[task_id]["prompt"]
|
||||
completion = generate_text(api_url, prompt, model_name, auth_token=auth_token)
|
||||
# samples.append({"task_id": task_id, "completion": completion})
|
||||
for sample in completion:
|
||||
result = dict(
|
||||
task_id=task_id,
|
||||
completion=sample,
|
||||
)
|
||||
samples += [result]
|
||||
if append:
|
||||
write_jsonl(out_path, [result],append=append)
|
||||
pbar.update(1)
|
||||
if not append:
|
||||
write_jsonl(out_path, samples,append=append)
|
||||
except Exception as e:
|
||||
write_jsonl(out_path, samples,append=append)
|
||||
print(f"Error: {e}")
|
||||
|
||||
def main(output_path, api_url, model_name, auth_token, format_tabs,problem_file, append):
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
run_eval_api(api_url, model_name, output_path, format_tabs, auth_token, problem_file,append)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="API Generate Tester")
|
||||
parser.add_argument("--api_url", type=str, default="https://api.siliconflow.cn/v1/chat/completions", help="API URL")
|
||||
parser.add_argument("--model_name", type=str, default="Pro/deepseek-ai/DeepSeek-V3", help="Model Name")
|
||||
parser.add_argument("--out_path", type=str, default="results/api/eval.jsonl", help="Output Path")
|
||||
parser.add_argument("--auth_token", type=str, default=None, help="Auth Token")
|
||||
parser.add_argument("--format_tabs", action="store_true", help="Format Tabs")
|
||||
parser.add_argument("--problem_file", type=str, default=None, help="Evalset File")
|
||||
parser.add_argument("--no_append", action="store_false", help="Append to existing file")
|
||||
args = parser.parse_args()
|
||||
# api_url = "https://api.siliconflow.cn/v1/chat/completions"
|
||||
main(args.out_path, args.api_url, args.model_name, args.auth_token, args.format_tabs, args.problem_file, args.no_append)
|
15
ktransformers/tests/humaneval/evaluation.py
Normal file
15
ktransformers/tests/humaneval/evaluation.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
# reference: https://github.com/declare-lab/instruct-eval/blob/main/human_eval/main.py#L35
|
||||
def filter_code(completion: str) -> str:
|
||||
# The program tends to overwrite, we only take the first function
|
||||
completion = completion.lstrip("\n")
|
||||
# we also remove ```python\n and ```
|
||||
completion = completion.replace("```python\n", "").replace("```", "")
|
||||
if 'if __name__ == "__main__":' in completion:
|
||||
completion = completion.split('if __name__ == "__main__":')[0]
|
||||
if "# Example usage" in completion:
|
||||
completion = completion.split("# Example usage")[0]
|
||||
return completion.split("\n\n")[0]
|
||||
|
||||
|
||||
def fix_indents(text: str) -> str:
|
||||
return text.replace("\t", " ")
|
14
ktransformers/tests/humaneval/prompts.py
Normal file
14
ktransformers/tests/humaneval/prompts.py
Normal file
|
@ -0,0 +1,14 @@
|
|||
def instruct_prompt(prompt: str) -> str:
|
||||
return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nComplete the following Python code without any tests or explanation\n{prompt}\n\n### Response:"""
|
||||
|
||||
|
||||
def standard_prompt(prompt: str) -> str:
|
||||
return f"""Complete the following Python code without any tests or explanation\n{prompt}"""
|
||||
|
||||
|
||||
def write_prompt(prompt: str) -> str:
|
||||
return f"""Write a python program to complete the following code:\n{prompt}"""
|
||||
|
||||
|
||||
def replit_glaive_prompt(prompt: str) -> str:
|
||||
return f"""Below is an instruction that describes a task, paired with an input that provides further context.\n Write a response that appropriately completes the request.\n\n ### Instruction:\nWrite a program to perform the given task.\n\n Input:\n{prompt}\n\n### Response:"""
|
|
@ -59,8 +59,8 @@ class DataEvaluator:
|
|||
:param text: The raw prediction string.
|
||||
:return: Processed prediction string.
|
||||
"""
|
||||
text = text.lstrip('\n').split('\n')[0]
|
||||
return text[:1]
|
||||
text = text.lstrip('\n').split('\n')[-1]
|
||||
return text[-1:]
|
||||
|
||||
def score(self, pred, answers):
|
||||
"""
|
||||
|
|
|
@ -59,8 +59,8 @@ class DataEvaluator:
|
|||
:param text: The raw prediction string.
|
||||
:return: Processed prediction string.
|
||||
"""
|
||||
text = text.lstrip('\n').split('\n')[0]
|
||||
return text[:1]
|
||||
text = text.lstrip('\n').split('\n')[-1]
|
||||
return text[-1:]
|
||||
|
||||
def score(self, pred, answers):
|
||||
"""
|
||||
|
|
Loading…
Add table
Reference in a new issue