mirror of
https://github.com/facebookresearch/blt.git
synced 2025-01-18 16:37:46 +00:00
20 lines
526 B
Python
20 lines
526 B
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
import abc
|
|
|
|
|
|
class Tokenizer(abc.ABC):
|
|
@abc.abstractmethod
|
|
def encode(self, text: str, add_bos: bool, add_eos: bool):
|
|
pass
|
|
|
|
@abc.abstractmethod
|
|
def decode(self, tokens: list[int]):
|
|
pass
|
|
|
|
@abc.abstractmethod
|
|
def get_token_offsets(
|
|
self, text: str, tokens: list[int] | None = None
|
|
) -> tuple[list[str], list[int]]:
|
|
"""Return the offsets of the tokens in the original text. Only used for evaluation."""
|
|
pass
|