blt/apps/main/configs/eval.yaml

36 lines
604 B
YAML
Raw Permalink Normal View History

2024-12-12 23:32:30 +00:00
name: "debug_evals"
# ckpt_dir: !!CHANGETHIS!!
# dump_dir: !!CHANGETHIS!!
generator:
max_tokens: 8192
dtype: bf16
temperature: 1.0
top_p: 0.95
harness:
tasks:
- hellaswag
- task: boolq
dataset_kwargs:
trust_remote_code: true
- task: nq_open
num_fewshot: 5
- piqa
- task: social_iqa
dataset_kwargs:
trust_remote_code: true
- triviaqa
- winogrande
- openbookqa
- arc_easy
- arc_challenge
- race
- commonsense_qa
# - coqa
- copa
- gsm8k
- bbh
- mmlu
- mmlu_pro
validation:
max_steps: 1000