blt/apps/main/configs/eval.yaml
2024-12-12 15:32:30 -08:00

36 lines
604 B
YAML

name: "debug_evals"
# ckpt_dir: !!CHANGETHIS!!
# dump_dir: !!CHANGETHIS!!
generator:
max_tokens: 8192
dtype: bf16
temperature: 1.0
top_p: 0.95
harness:
tasks:
- hellaswag
- task: boolq
dataset_kwargs:
trust_remote_code: true
- task: nq_open
num_fewshot: 5
- piqa
- task: social_iqa
dataset_kwargs:
trust_remote_code: true
- triviaqa
- winogrande
- openbookqa
- arc_easy
- arc_challenge
- race
- commonsense_qa
# - coqa
- copa
- gsm8k
- bbh
- mmlu
- mmlu_pro
validation:
max_steps: 1000