blt/apps/main/configs/llama_1B.yaml
2024-12-12 15:32:30 -08:00

88 lines
1.4 KiB
YAML

# dump_dir: !!!CHANGE_THIS!!!
name: large_lm
steps: 60_000
probe_freq: null
seed: 777
optim:
lr: 3e-3
weight_decay: 0.033
warmup: 5000
lr_min_ratio: 0.000001
clip: 1.0
distributed:
fsdp_type: full_shard
compile: true
model_dtype: bf16
matmul_allow_tf32: false
selective_activation_checkpointing: false
tp_size: 1
model:
dim: 2048
n_layers: 25
n_heads: 16
data:
root_dir: data/shuffled
sources:
dclm_baseline_1.0: 100.0
batch_size: 4
prefetch_size: 1024
seq_len: 4096
n_views: 2
load_async: true
add_bos: true
add_eos: true
tokenizer:
name: tiktoken
path: tokenizers/cl_toplang_128k.tiktoken
profiling:
run: true
mem_warmup: 0
mem_steps: 4
profile_warmup: 100
profile_steps: 4
checkpoint:
dump:
every: 2500
keep: 3
eval:
every: 5000
keep: -1
logging:
freq: 1
async_eval_gpus: 8
eval:
harness:
tasks:
- hellaswag
- task: boolq
dataset_kwargs:
trust_remote_code: true
- piqa
- task: social_iqa
dataset_kwargs:
trust_remote_code: true
- winogrande
- openbookqa
- arc_easy
- arc_challenge
- race
- commonsense_qa
- copa
# - coqa
# - task: nq_open
# num_fewshot: 5
# - triviaqa
validation:
max_steps: 1000
generator:
max_tokens: 16384
dtype: bf16