name: "debug_evals"
# ckpt_dir: !!CHANGETHIS!!
# dump_dir: !!CHANGETHIS!!
generator:
  max_tokens: 8192
  dtype: bf16
  temperature: 1.0
  top_p: 0.95
harness:
  tasks:
    - hellaswag
    - task: boolq
      dataset_kwargs:
        trust_remote_code: true
    - task: nq_open
      num_fewshot: 5
    - piqa
    - task: social_iqa
      dataset_kwargs:
        trust_remote_code: true
    - triviaqa
    - winogrande
    - openbookqa
    - arc_easy
    - arc_challenge
    - race
    - commonsense_qa
    # - coqa
    - copa
    - gsm8k
    - bbh
    - mmlu
    - mmlu_pro
validation:
  max_steps: 1000