mirror of
https://github.com/illian64/llm-translate.git
synced 2026-04-28 20:00:12 +00:00
init
This commit is contained in:
commit
8557624008
29 changed files with 2560 additions and 0 deletions
86
plugins/plugin_nllb_200_ctranslate2.py
Normal file
86
plugins/plugin_nllb_200_ctranslate2.py
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
import os
|
||||
|
||||
import ctranslate2
|
||||
from ctranslate2 import Translator
|
||||
from tqdm import tqdm
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
from app import cuda, struct
|
||||
from app.app_core import AppCore
|
||||
from app.lang_dict import lang_2_chars_to_nllb_lang
|
||||
from app.struct import TranslateStruct, tp
|
||||
|
||||
modname = os.path.basename(__file__)[:-3]
|
||||
|
||||
model: Translator
|
||||
tokenizers:dict = {}
|
||||
|
||||
|
||||
def start(core: AppCore):
|
||||
manifest = { # plugin settings
|
||||
"name": "NLLB 200 CTranslate2", # name
|
||||
"version": "1.0", # version
|
||||
|
||||
"translate": {
|
||||
"nllb_200_ctranslate2": (init, translate) # 1 function - init, 2 - translate
|
||||
},
|
||||
|
||||
"default_options": {
|
||||
"model": "models/nllb-200-3.3B-ct2-float16", # model
|
||||
"compute_type": "bfloat16",
|
||||
"cuda": True, # false if you want to run on CPU, true - if on CUDA
|
||||
"cuda_device_index": 0, # GPU index (if you have more than one GPU)
|
||||
"max_batch_size": 16,
|
||||
|
||||
"text_split_params": {
|
||||
"split_by_sentences_only": True,
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
return manifest
|
||||
|
||||
|
||||
def start_with_options(core: AppCore, manifest:dict):
|
||||
struct.read_plugin_params(manifest)
|
||||
|
||||
return manifest
|
||||
|
||||
|
||||
def init(core:AppCore):
|
||||
options = core.plugin_options(modname)
|
||||
|
||||
global model
|
||||
|
||||
model = ctranslate2.Translator(options["model"],
|
||||
device=cuda.get_device(options), device_index=options["cuda_device_index"])
|
||||
|
||||
return modname
|
||||
|
||||
|
||||
def translate(core: AppCore, ts: TranslateStruct):
|
||||
options = core.plugin_options(modname)
|
||||
|
||||
from_lang = lang_2_chars_to_nllb_lang[ts.req.from_lang]
|
||||
to_lang = lang_2_chars_to_nllb_lang[ts.req.to_lang]
|
||||
if tokenizers.get(from_lang) is None:
|
||||
tokenizers[from_lang] = AutoTokenizer.from_pretrained(options["model"], src_lang=from_lang)
|
||||
tokenizer = tokenizers[from_lang]
|
||||
|
||||
# translate_batch not optimal, but there are problems with try to implement batch processing like madlab_ctranslate2
|
||||
for part in tqdm(ts.parts, unit=tp.unit, ascii=tp.ascii, desc=tp.desc):
|
||||
if part.need_to_translate():
|
||||
input_text = part.text
|
||||
tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(input_text))
|
||||
|
||||
translate_results = model.translate_batch(
|
||||
[tokens], max_batch_size=options["max_batch_size"], beam_size=1, return_scores=False, disable_unk=False,
|
||||
target_prefix=[[to_lang]], batch_type="tokens"
|
||||
)
|
||||
output_tokens = translate_results[0].hypotheses[0]
|
||||
decoded_text = tokenizer.decode(tokenizer.convert_tokens_to_ids(output_tokens))
|
||||
if to_lang in decoded_text:
|
||||
decoded_text = decoded_text.replace(to_lang, "").lstrip()
|
||||
part.translate = decoded_text
|
||||
|
||||
return ts
|
||||
Loading…
Add table
Add a link
Reference in a new issue