# Template config, need to change dump_dir, data.root_dir and tokenizer.path # Evals can be activated by uncommenting its config # python -m launchers.stool config=apps/main/configs/debug.yaml nodes=8 account=fair_amaia_cw_codegen qos=lowest dump_dir: /tmp/ name: "debug" steps: 100_000 probe_freq: null seed: 777 optim: lr: 4e-04 warmup: 500 lr_min_ratio: 0.1 clip: 10.0 distributed: fsdp_type: full_shard model_dtype: bf16 matmul_allow_tf32: false selective_activation_checkpointing: false tp_size: 1 train_entropy_model: true model: null entropy_model: dim: 768 n_layers: 14 n_heads: 12 max_seqlen: 8192 # vocab_size: -1 vocab_size: 260 ffn_dim_multiplier: 1.0 sliding_window: 512 attn_bias_type: "local_block_causal" attn_impl: "xformers" data: s3_profile: blt root_dir: ??? sources: dclm_baseline_1.0: 1.0 batch_size: 2 prefetch_size: 64 # seqlen is in terms of patches and # max_encoder_seq_length is in terms of bytes. # For entropy model, these are the same since 1 patch=1 byte seq_len: 8192 max_encoder_seq_length: 8192 load_async: true preprocess_dir: ??? # We don't need patches for this model add_patches: false patcher_args: # This doesn't matter since byte entropy model doesn't use patching, # so pick the most efficient, so static patching_mode: byte tokenizer_args: name: bytes profiling: run: false checkpoint: dump: every: 500 keep: 3 eval: every: 1000 keep: -1 logging: freq: 10 eval_on_gpus: 8 eval: null