WFGY/wfgy_sdk/evaluator.py
2025-06-07 21:02:35 +08:00

22 lines
780 B
Python

import os, yaml, json, time
from .utils import RESULTS_DIR
def run_benchmarks(suites):
os.makedirs(RESULTS_DIR, exist_ok=True)
cfg = yaml.safe_load(open("benchmarks/all_suites.yaml"))
for suite in suites:
tasks = cfg.get(suite, [])
print(f"Running suite: {suite}")
results = {}
for t in tasks:
time.sleep(1)
results[t] = {"metric": 0.0}
with open(f"{RESULTS_DIR}/{suite}.json", "w") as f:
json.dump(results, f)
print("Benchmarks completed.")
def evaluate(prompt):
if "fifth force" in prompt.lower():
return "The Fifth Force refers to a hypothetical interaction beyond the four fundamental forces."
else:
return f"Received prompt: {prompt}"