mirror of
https://github.com/facebookresearch/blt.git
synced 2025-01-19 17:07:45 +00:00
88 lines
1.4 KiB
YAML
88 lines
1.4 KiB
YAML
|
# dump_dir: !!!CHANGE_THIS!!!
|
||
|
name: large_lm
|
||
|
steps: 60_000
|
||
|
probe_freq: null
|
||
|
seed: 777
|
||
|
|
||
|
optim:
|
||
|
lr: 3e-3
|
||
|
weight_decay: 0.033
|
||
|
warmup: 5000
|
||
|
lr_min_ratio: 0.000001
|
||
|
clip: 1.0
|
||
|
|
||
|
distributed:
|
||
|
fsdp_type: full_shard
|
||
|
compile: true
|
||
|
model_dtype: bf16
|
||
|
matmul_allow_tf32: false
|
||
|
selective_activation_checkpointing: false
|
||
|
tp_size: 1
|
||
|
|
||
|
model:
|
||
|
dim: 2048
|
||
|
n_layers: 25
|
||
|
n_heads: 16
|
||
|
|
||
|
data:
|
||
|
root_dir: data/shuffled
|
||
|
sources:
|
||
|
dclm_baseline_1.0: 100.0
|
||
|
batch_size: 4
|
||
|
prefetch_size: 1024
|
||
|
seq_len: 4096
|
||
|
n_views: 2
|
||
|
load_async: true
|
||
|
add_bos: true
|
||
|
add_eos: true
|
||
|
tokenizer:
|
||
|
name: tiktoken
|
||
|
path: tokenizers/cl_toplang_128k.tiktoken
|
||
|
|
||
|
profiling:
|
||
|
run: true
|
||
|
mem_warmup: 0
|
||
|
mem_steps: 4
|
||
|
profile_warmup: 100
|
||
|
profile_steps: 4
|
||
|
|
||
|
checkpoint:
|
||
|
dump:
|
||
|
every: 2500
|
||
|
keep: 3
|
||
|
eval:
|
||
|
every: 5000
|
||
|
keep: -1
|
||
|
|
||
|
logging:
|
||
|
freq: 1
|
||
|
|
||
|
async_eval_gpus: 8
|
||
|
eval:
|
||
|
harness:
|
||
|
tasks:
|
||
|
- hellaswag
|
||
|
- task: boolq
|
||
|
dataset_kwargs:
|
||
|
trust_remote_code: true
|
||
|
- piqa
|
||
|
- task: social_iqa
|
||
|
dataset_kwargs:
|
||
|
trust_remote_code: true
|
||
|
- winogrande
|
||
|
- openbookqa
|
||
|
- arc_easy
|
||
|
- arc_challenge
|
||
|
- race
|
||
|
- commonsense_qa
|
||
|
- copa
|
||
|
# - coqa
|
||
|
# - task: nq_open
|
||
|
# num_fewshot: 5
|
||
|
# - triviaqa
|
||
|
validation:
|
||
|
max_steps: 1000
|
||
|
generator:
|
||
|
max_tokens: 16384
|
||
|
dtype: bf16
|