blt/apps/main/configs/llama_7B.yaml
2024-12-12 15:32:30 -08:00

96 lines
1.6 KiB
YAML

#python -m lingua.stool config=apps/main/configs/llama2_7B.yaml nodes=32 account=fair_amaia_cw_codegen qos=lowest
# dump_dir: !!!CHANGE_THIS!!!
name: "7b_baseline"
steps: 100_000
grad_acc_steps: 1
probe_freq: 100
seed: 777
optim:
lr: 1.0e-3
weight_decay: 0.1
warmup: 2000
lr_min_ratio: 0.000001
clip: 1.0
distributed:
fsdp_type: full_shard
compile: true
model_dtype: bf16
matmul_allow_tf32: false
selective_activation_checkpointing: false
tp_size: 1
model:
dim: 4096
n_layers: 32
n_heads: 32
rope_theta: 100_000
ffn_dim_multiplier: 1.0
multiple_of: 256
data:
root_dir: data/shuffled
sources:
dclm_baseline_1.0: 1.0
batch_size: 2
prefetch_size: 1024
seq_len: 4096
n_views: 2
load_async: true
tokenizer:
name: tiktoken
path: tokenizers/cl_toplang_128k.tiktoken
profiling:
run: true
mem_warmup: 0
mem_steps: 4
profile_warmup: 100
profile_steps: 4
checkpoint:
dump:
every: 10000
keep: -1
eval:
every: 1000
keep: 3
logging:
freq: 1
async_eval_gpus: 8
eval:
dataset_dir: datasets/eval
harness:
tasks:
- hellaswag
- task: boolq
dataset_kwargs:
trust_remote_code: true
- piqa
- task: social_iqa
dataset_kwargs:
trust_remote_code: true
- winogrande
- openbookqa
- arc_easy
- arc_challenge
- race
- commonsense_qa
# - coqa
- copa
- mmlu
- mmlu_pro
# - task: nq_open
# num_fewshot: 5
# - triviaqa
# - gsm8k
# - bbh
validation:
max_steps: 1000
generator:
max_tokens: 8192
dtype: bf16