diff --git a/Makefile b/Makefile index 1602eef..8349809 100644 --- a/Makefile +++ b/Makefile @@ -19,6 +19,13 @@ dev_install: echo "Installing ktransformers" KTRANSFORMERS_FORCE_BUILD=TRUE pip install -e . -v --no-build-isolation echo "Installation completed successfully" +clean: + rm -rf build + rm -rf *.egg-info + rm -rf ktransformers/ktransformers_ext/build + rm -rf ktransformers/ktransformers_ext/cuda/build + rm -rf ktransformers/ktransformers_ext/cuda/dist + rm -rf ktransformers/ktransformers_ext/cuda/*.egg-info install_numa: USE_NUMA=1 make dev_install install_no_numa: diff --git a/ktransformers/tests/.gitignore b/ktransformers/tests/.gitignore new file mode 100644 index 0000000..dbc6609 --- /dev/null +++ b/ktransformers/tests/.gitignore @@ -0,0 +1 @@ +humaneval/results \ No newline at end of file diff --git a/ktransformers/tests/humaneval/eval_api.py b/ktransformers/tests/humaneval/eval_api.py new file mode 100644 index 0000000..72b9790 --- /dev/null +++ b/ktransformers/tests/humaneval/eval_api.py @@ -0,0 +1,89 @@ +# adapt from https://github.com/abacaj/code-eval?tab=readme-ov-file +import argparse +import os +import requests +from human_eval.data import write_jsonl, read_problems +import tqdm + +from evaluation import filter_code, fix_indents +from prompts import instruct_prompt + +def generate_text(api_url,question , model_name, stream=False, auth_token=None): + headers = { + 'accept': 'application/json', + 'Content-Type': 'application/json', + # 添加 API Key + 'Authorization' : 'Bearer ' + auth_token + } + question = instruct_prompt(question) + data = { + "messages": [{"content": question, "role": "user"}], + "model": model_name, + "stream": stream, + "temperature": 0.6 + } + print(f"content: {question}") + response = requests.post(api_url, headers=headers, json=data,verify=False) + if response.status_code == 200: + result = response.json() + results = result.get('choices', [{}])[0].get('message', {}).get('content', '') + return [filter_code(fix_indents(results))] + else: + print(f"API Request failed with status code {response.status_code}") + return None + +def run_eval_api( + api_url: str, + model_name: str, + out_path: str, + format_tabs: bool = False, + auth_token: str = None, + problem_file: str = None, + append: bool = False +): + if(problem_file is None): + problems = read_problems() + else: + problems = read_problems(problem_file) + samples = [] + pbar = tqdm.tqdm(total=len(problems) * 1) + try: + for task_id in problems: + if format_tabs: + prompt = problems[task_id]["prompt"].replace(" ", "\t") + else: + prompt = problems[task_id]["prompt"] + completion = generate_text(api_url, prompt, model_name, auth_token=auth_token) + # samples.append({"task_id": task_id, "completion": completion}) + for sample in completion: + result = dict( + task_id=task_id, + completion=sample, + ) + samples += [result] + if append: + write_jsonl(out_path, [result],append=append) + pbar.update(1) + if not append: + write_jsonl(out_path, samples,append=append) + except Exception as e: + write_jsonl(out_path, samples,append=append) + print(f"Error: {e}") + +def main(output_path, api_url, model_name, auth_token, format_tabs,problem_file, append): + os.makedirs(os.path.dirname(output_path), exist_ok=True) + run_eval_api(api_url, model_name, output_path, format_tabs, auth_token, problem_file,append) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="API Generate Tester") + parser.add_argument("--api_url", type=str, default="https://api.siliconflow.cn/v1/chat/completions", help="API URL") + parser.add_argument("--model_name", type=str, default="Pro/deepseek-ai/DeepSeek-V3", help="Model Name") + parser.add_argument("--out_path", type=str, default="results/api/eval.jsonl", help="Output Path") + parser.add_argument("--auth_token", type=str, default=None, help="Auth Token") + parser.add_argument("--format_tabs", action="store_true", help="Format Tabs") + parser.add_argument("--problem_file", type=str, default=None, help="Evalset File") + parser.add_argument("--no_append", action="store_false", help="Append to existing file") + args = parser.parse_args() + # api_url = "https://api.siliconflow.cn/v1/chat/completions" + main(args.out_path, args.api_url, args.model_name, args.auth_token, args.format_tabs, args.problem_file, args.no_append) \ No newline at end of file diff --git a/ktransformers/tests/humaneval/evaluation.py b/ktransformers/tests/humaneval/evaluation.py new file mode 100644 index 0000000..9d93a20 --- /dev/null +++ b/ktransformers/tests/humaneval/evaluation.py @@ -0,0 +1,15 @@ +# reference: https://github.com/declare-lab/instruct-eval/blob/main/human_eval/main.py#L35 +def filter_code(completion: str) -> str: + # The program tends to overwrite, we only take the first function + completion = completion.lstrip("\n") + # we also remove ```python\n and ``` + completion = completion.replace("```python\n", "").replace("```", "") + if 'if __name__ == "__main__":' in completion: + completion = completion.split('if __name__ == "__main__":')[0] + if "# Example usage" in completion: + completion = completion.split("# Example usage")[0] + return completion.split("\n\n")[0] + + +def fix_indents(text: str) -> str: + return text.replace("\t", " ") diff --git a/ktransformers/tests/humaneval/prompts.py b/ktransformers/tests/humaneval/prompts.py new file mode 100644 index 0000000..694bac8 --- /dev/null +++ b/ktransformers/tests/humaneval/prompts.py @@ -0,0 +1,14 @@ +def instruct_prompt(prompt: str) -> str: + return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nComplete the following Python code without any tests or explanation\n{prompt}\n\n### Response:""" + + +def standard_prompt(prompt: str) -> str: + return f"""Complete the following Python code without any tests or explanation\n{prompt}""" + + +def write_prompt(prompt: str) -> str: + return f"""Write a python program to complete the following code:\n{prompt}""" + + +def replit_glaive_prompt(prompt: str) -> str: + return f"""Below is an instruction that describes a task, paired with an input that provides further context.\n Write a response that appropriately completes the request.\n\n ### Instruction:\nWrite a program to perform the given task.\n\n Input:\n{prompt}\n\n### Response:""" diff --git a/ktransformers/tests/mmlu_pro_test.py b/ktransformers/tests/mmlu_pro_test.py index dffe296..c0db4a5 100644 --- a/ktransformers/tests/mmlu_pro_test.py +++ b/ktransformers/tests/mmlu_pro_test.py @@ -59,8 +59,8 @@ class DataEvaluator: :param text: The raw prediction string. :return: Processed prediction string. """ - text = text.lstrip('\n').split('\n')[0] - return text[:1] + text = text.lstrip('\n').split('\n')[-1] + return text[-1:] def score(self, pred, answers): """ diff --git a/ktransformers/tests/mmlu_test.py b/ktransformers/tests/mmlu_test.py index 296d81a..452cbbf 100644 --- a/ktransformers/tests/mmlu_test.py +++ b/ktransformers/tests/mmlu_test.py @@ -59,8 +59,8 @@ class DataEvaluator: :param text: The raw prediction string. :return: Processed prediction string. """ - text = text.lstrip('\n').split('\n')[0] - return text[:1] + text = text.lstrip('\n').split('\n')[-1] + return text[-1:] def score(self, pred, answers): """