mirror of
https://github.com/facebookresearch/blt.git
synced 2025-01-19 00:47:44 +00:00
96 lines
1.6 KiB
YAML
96 lines
1.6 KiB
YAML
|
#python -m lingua.stool config=apps/main/configs/llama2_7B.yaml nodes=32 account=fair_amaia_cw_codegen qos=lowest
|
||
|
# dump_dir: !!!CHANGE_THIS!!!
|
||
|
name: "7b_baseline"
|
||
|
steps: 100_000
|
||
|
grad_acc_steps: 1
|
||
|
probe_freq: 100
|
||
|
|
||
|
seed: 777
|
||
|
optim:
|
||
|
lr: 1.0e-3
|
||
|
weight_decay: 0.1
|
||
|
warmup: 2000
|
||
|
lr_min_ratio: 0.000001
|
||
|
clip: 1.0
|
||
|
|
||
|
distributed:
|
||
|
fsdp_type: full_shard
|
||
|
compile: true
|
||
|
model_dtype: bf16
|
||
|
matmul_allow_tf32: false
|
||
|
selective_activation_checkpointing: false
|
||
|
tp_size: 1
|
||
|
|
||
|
model:
|
||
|
dim: 4096
|
||
|
n_layers: 32
|
||
|
n_heads: 32
|
||
|
rope_theta: 100_000
|
||
|
ffn_dim_multiplier: 1.0
|
||
|
multiple_of: 256
|
||
|
|
||
|
data:
|
||
|
root_dir: data/shuffled
|
||
|
sources:
|
||
|
dclm_baseline_1.0: 1.0
|
||
|
batch_size: 2
|
||
|
prefetch_size: 1024
|
||
|
seq_len: 4096
|
||
|
n_views: 2
|
||
|
load_async: true
|
||
|
tokenizer:
|
||
|
name: tiktoken
|
||
|
path: tokenizers/cl_toplang_128k.tiktoken
|
||
|
|
||
|
profiling:
|
||
|
run: true
|
||
|
mem_warmup: 0
|
||
|
mem_steps: 4
|
||
|
profile_warmup: 100
|
||
|
profile_steps: 4
|
||
|
|
||
|
checkpoint:
|
||
|
dump:
|
||
|
every: 10000
|
||
|
keep: -1
|
||
|
eval:
|
||
|
every: 1000
|
||
|
keep: 3
|
||
|
|
||
|
logging:
|
||
|
freq: 1
|
||
|
|
||
|
async_eval_gpus: 8
|
||
|
eval:
|
||
|
dataset_dir: datasets/eval
|
||
|
harness:
|
||
|
tasks:
|
||
|
- hellaswag
|
||
|
- task: boolq
|
||
|
dataset_kwargs:
|
||
|
trust_remote_code: true
|
||
|
- piqa
|
||
|
- task: social_iqa
|
||
|
dataset_kwargs:
|
||
|
trust_remote_code: true
|
||
|
- winogrande
|
||
|
- openbookqa
|
||
|
- arc_easy
|
||
|
- arc_challenge
|
||
|
- race
|
||
|
- commonsense_qa
|
||
|
# - coqa
|
||
|
- copa
|
||
|
- mmlu
|
||
|
- mmlu_pro
|
||
|
# - task: nq_open
|
||
|
# num_fewshot: 5
|
||
|
# - triviaqa
|
||
|
# - gsm8k
|
||
|
# - bbh
|
||
|
validation:
|
||
|
max_steps: 1000
|
||
|
generator:
|
||
|
max_tokens: 8192
|
||
|
dtype: bf16
|