mirror of
https://github.com/illian64/llm-translate.git
synced 2026-04-30 21:00:10 +00:00
Files processing - books
* book translate * files processing * files processing * files processing * files processing --------- Co-authored-by: APodoinikov <APodoynikov@detmir.ru>
This commit is contained in:
parent
e9e0e647f7
commit
0a70da3b98
39 changed files with 1737 additions and 493 deletions
|
|
@ -5,12 +5,12 @@ from ctranslate2 import Translator
|
|||
from tqdm import tqdm
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
from app import cuda, struct
|
||||
from app import cuda, params
|
||||
from app.app_core import AppCore
|
||||
from app.dto import TranslatePluginInitInfo, TranslateStruct
|
||||
from app.lang_dict import lang_2_chars_to_nllb_lang
|
||||
from app.struct import TranslateStruct, tp
|
||||
|
||||
modname = os.path.basename(__file__)[:-3]
|
||||
plugin_name = os.path.basename(__file__)[:-3]
|
||||
|
||||
model: Translator
|
||||
tokenizers:dict = {}
|
||||
|
|
@ -42,24 +42,24 @@ def start(core: AppCore):
|
|||
|
||||
|
||||
def start_with_options(core: AppCore, manifest:dict):
|
||||
struct.read_plugin_params(manifest)
|
||||
params.read_plugin_translate_params(manifest)
|
||||
|
||||
return manifest
|
||||
|
||||
|
||||
def init(core:AppCore):
|
||||
options = core.plugin_options(modname)
|
||||
def init(core:AppCore) -> TranslatePluginInitInfo:
|
||||
options = core.plugin_options(plugin_name)
|
||||
|
||||
global model
|
||||
|
||||
model = ctranslate2.Translator(options["model"],
|
||||
model = ctranslate2.Translator(options["model"], compute_type=options["compute_type"],
|
||||
device=cuda.get_device(options), device_index=options["cuda_device_index"])
|
||||
|
||||
return modname
|
||||
return TranslatePluginInitInfo(plugin_name=plugin_name, model_name=f'{options["model"]}__{options["compute_type"]}')
|
||||
|
||||
|
||||
def translate(core: AppCore, ts: TranslateStruct):
|
||||
options = core.plugin_options(modname)
|
||||
options = core.plugin_options(plugin_name)
|
||||
|
||||
from_lang = lang_2_chars_to_nllb_lang[ts.req.from_lang]
|
||||
to_lang = lang_2_chars_to_nllb_lang[ts.req.to_lang]
|
||||
|
|
@ -68,7 +68,7 @@ def translate(core: AppCore, ts: TranslateStruct):
|
|||
tokenizer = tokenizers[from_lang]
|
||||
|
||||
# translate_batch not optimal, but there are problems with try to implement batch processing like madlab_ctranslate2
|
||||
for part in tqdm(ts.parts, unit=tp.unit, ascii=tp.ascii, desc=tp.desc):
|
||||
for part in tqdm(ts.parts, unit=params.tp.unit, ascii=params.tp.ascii, desc=params.tp.desc):
|
||||
if part.need_to_translate():
|
||||
input_text = part.text
|
||||
tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(input_text))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue