blt/bytelatent/tokenizers/constants.py
2024-12-12 15:32:30 -08:00

13 lines
185 B
Python

# Copyright (c) Meta Platforms, Inc. and affiliates.
SEP = " "
BOS_ID: int = 1
EOS_ID: int = 2
PAD_ID: int = -1
BOE_ID: int = 0
BPE_ID: int = 3
OFFSET: int = 4
BYTE_UNITS: int = 256