Files processing - books

* book translate * files processing * files processing * files processing * files processing --------- Co-authored-by: APodoinikov <APodoynikov@detmir.ru>
2026-04-30 21:00:10 +00:00 · 2025-09-04 11:09:29 +07:00 · 2025-09-04 11:09:29 +07:00 · 0a70da3b98
commit 0a70da3b98
parent e9e0e647f7
39 changed files with 1737 additions and 493 deletions
--- a/plugins/plugin_nllb_200_ctranslate2.py
+++ b/plugins/plugin_nllb_200_ctranslate2.py
@ -5,12 +5,12 @@ from ctranslate2 import Translator
 from tqdm import tqdm
 from transformers import AutoTokenizer

-from app import cuda, struct
+from app import cuda, params
 from app.app_core import AppCore
+from app.dto import TranslatePluginInitInfo, TranslateStruct
 from app.lang_dict import lang_2_chars_to_nllb_lang
-from app.struct import TranslateStruct, tp

-modname = os.path.basename(__file__)[:-3]
+plugin_name = os.path.basename(__file__)[:-3]

 model: Translator
 tokenizers:dict = {}
@ -42,24 +42,24 @@ def start(core: AppCore):


 def start_with_options(core: AppCore, manifest:dict):
-    struct.read_plugin_params(manifest)
+    params.read_plugin_translate_params(manifest)

    return manifest


-def init(core:AppCore):
-    options = core.plugin_options(modname)
+def init(core:AppCore) -> TranslatePluginInitInfo:
+    options = core.plugin_options(plugin_name)

    global model

-    model = ctranslate2.Translator(options["model"],
+    model = ctranslate2.Translator(options["model"], compute_type=options["compute_type"],
                                   device=cuda.get_device(options), device_index=options["cuda_device_index"])

-    return modname
+    return TranslatePluginInitInfo(plugin_name=plugin_name, model_name=f'{options["model"]}__{options["compute_type"]}')


 def translate(core: AppCore, ts: TranslateStruct):
-    options = core.plugin_options(modname)
+    options = core.plugin_options(plugin_name)

    from_lang = lang_2_chars_to_nllb_lang[ts.req.from_lang]
    to_lang = lang_2_chars_to_nllb_lang[ts.req.to_lang]
@ -68,7 +68,7 @@ def translate(core: AppCore, ts: TranslateStruct):
    tokenizer = tokenizers[from_lang]

    # translate_batch not optimal, but there are problems with try to implement batch processing like madlab_ctranslate2
-    for part in tqdm(ts.parts, unit=tp.unit, ascii=tp.ascii, desc=tp.desc):
+    for part in tqdm(ts.parts, unit=params.tp.unit, ascii=params.tp.ascii, desc=params.tp.desc):
        if part.need_to_translate():
            input_text = part.text
            tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(input_text))