diff --git a/.gitignore b/.gitignore index a8edfe2..40c0e33 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,5 @@ test/_trial_temp /.idea/ /options/ /models/ -venv \ No newline at end of file +venv +__pycache__ \ No newline at end of file diff --git a/app.py b/app.py index f24b1aa..15d4894 100644 --- a/app.py +++ b/app.py @@ -1,17 +1,16 @@ -from contextlib import asynccontextmanager -from fastapi import FastAPI -import uvicorn import logging +from contextlib import asynccontextmanager +import uvicorn +from fastapi import FastAPI from starlette.staticfiles import StaticFiles from app.app_core import AppCore from app.cuda import cuda_info -from app.dto import TranslateReq -from app.struct import Request +from app.dto import TranslateReq, TranslateCommonRequest, TranslateResp, ProcessingFileDirReq, ProcessingFileDirResp, \ + ProcessingFileDirListResp from app.properties import Properties - core: AppCore logger = logging.getLogger('uvicorn') @@ -22,7 +21,7 @@ async def lifespan(fast_api: FastAPI): logger.info("Starting llm-translate") global core core = AppCore() - core.init_with_plugins() + core.init_with_translate_plugins() yield logger.info("Stopping llm-translate") @@ -33,9 +32,10 @@ properties = Properties() @app.get("/translate") -async def translate_get(text: str, from_lang: str = "", to_lang: str = "", translator_plugin: str = ""): +async def translate_get(text: str, from_lang: str = "", to_lang: str = "", + translator_plugin: str = "") -> TranslateResp: """ - Return translation + Translate text. :param str text: text to translate @@ -48,26 +48,34 @@ async def translate_get(text: str, from_lang: str = "", to_lang: str = "", trans :param str translator_plugin: to use. If blank, default will be used. If not initialized (not in "default_translate_plugin" and not in "init_on_start" from options - throw error) - :param str api_key: api key for access (if service setup in security mode with api keys) - :return: dict (result: text) - """ + """ - request = Request(text, from_lang, to_lang, translator_plugin) + request = TranslateCommonRequest(text, from_lang, to_lang, translator_plugin) return core.translate(request) @app.post("/translate") -async def translate_post(req: TranslateReq): - request = Request(req.text, req.from_lang, req.to_lang, req.translator_plugin) +async def translate_post(req: TranslateReq) -> TranslateResp: + request = TranslateCommonRequest(req.text, req.from_lang, req.to_lang, req.translator_plugin) return core.translate(request) +@app.get("/process-files-list") +async def process_files_list(recursive_sub_dirs: bool) -> ProcessingFileDirListResp: + return core.process_files_list(recursive_sub_dirs) + + +@app.post("/process-files") +async def process_files(req: ProcessingFileDirReq) -> ProcessingFileDirResp: + return core.process_files(req) + + if __name__ == "__main__": log_config = uvicorn.config.LOGGING_CONFIG log_config["formatters"]["access"]["fmt"] = "%(asctime)s %(levelname)s %(message)s" log_config["formatters"]["default"]["fmt"] = "%(asctime)s %(levelname)s %(message)s" app.mount('/', StaticFiles(directory='static', html=True), name='static') - uvicorn.run(app, host="127.0.0.1", port=properties.port, log_level="info", log_config=log_config, use_colors=False) \ No newline at end of file + uvicorn.run(app, host="127.0.0.1", port=properties.port, log_level="info", log_config=log_config, use_colors=False) diff --git a/app/app_core.py b/app/app_core.py index a421fce..2bf5084 100644 --- a/app/app_core.py +++ b/app/app_core.py @@ -1,11 +1,16 @@ import logging +import os +import time import traceback +from os import walk -from app import text_splitter +from app import text_splitter, file_processor from app.cache import Cache -from app.dto import TranslateResp -from app.struct import TranslateStruct, TranslationParams, TextSplitParams, TextProcessParams, Request, Part, \ - CacheParams +from app.dto import TranslateResp, ProcessingFileDirReq, \ + ProcessingFileDirResp, TranslatePluginInitInfo, Part, TranslateStruct, FileProcessingPluginInitInfo, \ + TranslateCommonRequest, ProcessingFileResp, ProcessingFileStruct, ProcessingFileStatus, ProcessingFileDirListResp, \ + ProcessingFileDirListItemIn, ProcessingFileDirListItemOut +from app.params import TranslationParams, TextSplitParams, TextProcessParams, CacheParams, FileProcessingParams from app.text_processor import pre_process from jaa import JaaCore @@ -21,22 +26,39 @@ class AppCore(JaaCore): self.init_on_start = "" self.translation_params = TranslationParams("", "") - self.text_split_params: TextSplitParams = None - self.text_process_params: TextProcessParams = None - self.cache_params: CacheParams = None + self.text_split_params: TextSplitParams | None = None + self.text_process_params: TextProcessParams | None = None + self.cache_params: CacheParams | None = None + self.file_processing_params: FileProcessingParams | None = None self.translators: dict = {} - self.initialized_translator_engines = dict() - self.cache: Cache = None + self.initialized_translator_engines: dict[str, TranslatePluginInitInfo] = dict() + self.cache: Cache | None = None + + self.files_ext_to_processors: dict[str, list[FileProcessingPluginInitInfo]] = dict() + self.sleep_after_translate: float = 0.0 def process_plugin_manifest(self, modname, manifest): - if "translate" in manifest: # process commands + if "translate" in manifest: # collect translate plugins for cmd in manifest["translate"].keys(): self.translators[cmd] = manifest["translate"][cmd] + if "file_processing" in manifest and manifest["options"]["enabled"]: # collect file processing plugins + for cmd in manifest["file_processing"].keys(): + init_info: FileProcessingPluginInitInfo = manifest["file_processing"][cmd][0](self) # init call + init_info.name = cmd + init_info.processing_function = manifest["file_processing"][cmd][1] + init_info.processed_file_name_function = manifest["file_processing"][cmd][2] + logger.info("Init file processing plugin '%s' for next file extensions: %s", + init_info.name, init_info.supported_extensions) + for ext in init_info.supported_extensions: + ext_list = self.files_ext_to_processors.get(ext, list()) + ext_list.append(init_info) + self.files_ext_to_processors[ext] = ext_list + return manifest - def init_with_plugins(self): + def init_with_translate_plugins(self) -> None: self.init_plugins(["core"]) self.cache = Cache(self.cache_params) @@ -44,68 +66,81 @@ class AppCore(JaaCore): self.init_translator_engine(self.default_translate_plugin) - init_on_start_list = self.init_on_start.replace(" ", "").split(",") + init_on_start_list = self.init_on_start.replace(" ", "").split(",") # TODO to array for translator in init_on_start_list: if translator != "": self.init_translator_engine(translator) logger.info("Found translation engines: %s", ", ".join(str(key) for key in self.translators.keys())) - def init_translator_engine(self, translator_engine: str): + def init_translator_engine(self, translator_engine: str) -> None: if translator_engine in self.initialized_translator_engines: # already inited return try: logger.info("Try to init translation plugin '%s'...", translator_engine) - modname = self.translators[translator_engine][0](self) - self.initialized_translator_engines[translator_engine] = modname + model_init_info: TranslatePluginInitInfo = self.translators[translator_engine][0](self) + self.initialized_translator_engines[translator_engine] = model_init_info logger.info("Success init translation plugin: '%s'.", translator_engine) except Exception as e: logger.error("Error init translation plugin '%s'...", translator_engine, e) - def get_plugin_options(self, translator_engine: str): - modname = self.initialized_translator_engines[translator_engine] - return self.plugin_options(modname) - - def get_translation_params(self, translator_engine: str): - options = self.get_plugin_options(translator_engine) - if options['translation_params_struct']: - return options['translation_params_struct'] + def get_translation_params(self, plugin_name: str) -> TranslationParams: + options = self.plugin_options(plugin_name) + if options and options.get('translation_params_struct'): + return options.get('translation_params_struct') else: return self.translation_params - def get_text_split_params(self, translator_engine: str): - options = self.get_plugin_options(translator_engine) - if options['text_split_params_struct']: - return options['text_split_params_struct'] + def get_text_split_params(self, plugin_name: str) -> TextSplitParams: + options = self.plugin_options(plugin_name) + if options and options.get('text_split_params_struct'): + return options.get('text_split_params_struct') else: return self.text_split_params - def get_text_process_params(self, translator_engine: str): - options = self.get_plugin_options(translator_engine) - if options['text_process_params_struct']: - return options['text_process_params_struct'] + def get_text_process_params(self, plugin_name: str) -> TextProcessParams: + options = self.plugin_options(plugin_name) + if options and options.get('text_process_params_struct'): + return options.get('text_process_params_struct') else: return self.text_process_params - def translate(self, req: Request): + def get_translator_plugin(self, req_plugin: str) -> str: + translator_plugin: str + if not req_plugin or req_plugin == "": + translator_plugin = self.default_translate_plugin + else: + translator_plugin = req_plugin + + if translator_plugin not in self.initialized_translator_engines: + raise ValueError("This translate_plugin not in initialized: " + translator_plugin) + + return translator_plugin + + def get_from_language(self, req_lang: str, plugin_name: str) -> str: + if req_lang == "" or req_lang == "--": + return self.get_translation_params(plugin_name).default_from_lang + else: + return req_lang + + def get_to_language(self, req_lang: str, plugin_name: str) -> str: + if req_lang == "" or req_lang == "--": + return self.get_translation_params(plugin_name).default_to_lang + else: + return req_lang + + def translate(self, req: TranslateCommonRequest) -> TranslateResp: if req.text == '': return TranslateResp(result='', parts=[], error=None) try: - if not req.translator_plugin or req.translator_plugin == "": - req.translator_plugin = self.default_translate_plugin - - if req.translator_plugin not in self.initialized_translator_engines: - raise ValueError("This translate_plugin not in initialized: " + req.translator_plugin) - - if req.from_lang == "": - req.from_lang = self.get_translation_params(req.translator_plugin).default_from_lang - - if req.to_lang == "": - req.to_lang = self.get_translation_params(req.translator_plugin).default_to_lang + req.translator_plugin = self.get_translator_plugin(req.translator_plugin) + plugin_info = self.initialized_translator_engines[req.translator_plugin] + req.from_lang = self.get_from_language(req.from_lang, plugin_info.plugin_name) + req.to_lang = self.get_to_language(req.to_lang, plugin_info.plugin_name) processed_text: str if self.get_text_process_params(req.translator_plugin).apply_for_request: @@ -116,12 +151,18 @@ class AppCore(JaaCore): text_parts: list[Part] = text_splitter.split_text(processed_text, self.get_text_split_params(req.translator_plugin), req.from_lang) - self.cache_read(req, text_parts) + for text_part in text_parts: + if not text_part.need_to_translate(): + text_part.translate = text_part.text + + self.cache.cache_read(req, text_parts, self.cache_params, plugin_info.model_name) translate_struct = TranslateStruct(req=req, processed_text=processed_text, parts=text_parts) - - translate_struct: TranslateStruct = self.translators[req.translator_plugin][1](self, translate_struct) - self.cache_write(req, translate_struct.parts) + if translate_struct.need_to_translate(): + translate_struct: TranslateStruct = self.translators[req.translator_plugin][1](self, translate_struct) + self.cache.cache_write(req, translate_struct.parts, self.cache_params, plugin_info.model_name) + if self.sleep_after_translate > 0: + time.sleep(self.sleep_after_translate) (translate_text, translate_parts) = text_splitter.join_text(translate_struct.parts) @@ -137,19 +178,143 @@ class AppCore(JaaCore): traceback.print_tb(e.__traceback__, limit=10) return TranslateResp(result=None, parts=None, error=getattr(e, 'message', repr(e))) - def cache_read(self, req: Request, parts: list[Part]): - if self.cache_params.enabled and req.translator_plugin not in self.cache_params.disable_for_plugins: - for part in parts: - if part.need_to_translate(): - cached_translate = self.cache.get(req, part.text) - if cached_translate: - part.cache_found = True - part.translate = cached_translate - else: - part.cache_found = False + def process_files_list(self, recursive_sub_dirs: bool) -> ProcessingFileDirListResp: + files_in: list[ProcessingFileDirListItemIn] = [] + for root, dirs, file_names in os.walk(self.file_processing_params.directory_in): + for file_name in file_names: + name, extension = os.path.splitext(file_name) + extension = extension.lower().replace(".", "") + processor_name = None + file_processor_error = None + try: + processor = self.get_file_processor(extension, None) + if processor: + processor_name = processor.name + except ValueError as ve: + file_processor_error = "error: " + ve.args[0] - def cache_write(self, req: Request, parts: list[Part]): - if self.cache_params.enabled and req.translator_plugin not in self.cache_params.disable_for_plugins: - for part in parts: - if part.need_to_translate() and not part.cache_found: - self.cache.put(req, part.text, part.translate) + files_in.append(ProcessingFileDirListItemIn( + file_with_path=file_processor.get_file_with_path_for_list( + self.file_processing_params.directory_in, root.replace(os.sep, "/"), file_name), + file_processor=processor_name, file_processor_error=file_processor_error)) + + if not recursive_sub_dirs: + break + + # output directory files list + files_out: list[ProcessingFileDirListItemOut] = [] + for root, dirs, file_names in walk(self.file_processing_params.directory_out): + for file_name in file_names: + files_out.append(ProcessingFileDirListItemOut( + file_with_path=file_processor.get_file_with_path_for_list(self.file_processing_params.directory_out, + root.replace(os.sep, "/"), file_name))) + if not recursive_sub_dirs: + break + + return ProcessingFileDirListResp(files_in=files_in, files_out=files_out, + directory_in=self.file_processing_params.directory_in, + directory_out=self.file_processing_params.directory_out, + error=None) + + def process_files(self, req: ProcessingFileDirReq) -> ProcessingFileDirResp: + try: + req.translator_plugin = self.get_translator_plugin(req.translator_plugin) + plugin_name = self.initialized_translator_engines[req.translator_plugin].plugin_name + req.from_lang = self.get_from_language(req.from_lang, plugin_name) + req.to_lang = self.get_to_language(req.to_lang, plugin_name) + + if not req.directory_in or req.directory_in == "": + req.directory_in = self.file_processing_params.directory_in + if not req.directory_out or req.directory_out == "": + req.directory_out = self.file_processing_params.directory_out + if req.preserve_original_text is None: + req.preserve_original_text = self.file_processing_params.preserve_original_text + if req.overwrite_processed_files is None: + req.overwrite_processed_files = self.file_processing_params.overwrite_processed_files + + files: list[ProcessingFileResp] = [] + for root, dirs, file_names in walk(req.directory_in): + for file_name in file_names: + files.append(self.process_file(req, root, file_name)) + if not req.recursive_sub_dirs: + break + + return ProcessingFileDirResp(files, "") + except ValueError as ve: + return ProcessingFileDirResp(files=list(), error=ve.args[0]) + except Exception as e: + traceback.print_tb(e.__traceback__, limit=10) + return ProcessingFileDirResp(files=list(), error=getattr(e, 'message', repr(e))) + + def process_file(self, req: ProcessingFileDirReq, root: str, file_name: str) -> ProcessingFileResp: + try: + name, extension = os.path.splitext(file_name) + + # try to find processor + extension = extension.lower().replace(".", "") + req_processor = req.file_processors.get(extension) if req.file_processors else None + processor = self.get_file_processor(extension, req_processor) + if processor is None: + return ProcessingFileResp(file_in=file_name, file_out="", + path_file_in=f'{root}/{file_name}'.replace(os.sep, "/"), + path_file_out=None, status=ProcessingFileStatus.TYPE_NOT_SUPPORT, + file_processor="", message=None) + + # calculate output path and validate file exists (depend on request) + path_out = root.replace(req.directory_in, req.directory_out) + file_struct = ProcessingFileStruct( + path_in=root, path_out=path_out, file_name=name, + file_ext=extension, file_name_ext=file_name, file_processor=processor.name) + + processed_file_name = processor.processed_file_name_function(self, file_struct, req) + + if (not req.overwrite_processed_files + and os.path.isfile(f'{path_out}/{processed_file_name}')): + return file_processor.get_processing_file_resp(file_struct=file_struct, file_out=processed_file_name, + file_processor=processor.name, + status=ProcessingFileStatus.TRANSLATE_ALREADY_EXISTS) + else: + logger.info("Start processing file %s/%s", root.replace(os.sep, "/"), file_name) + os.makedirs(file_struct.path_out, exist_ok=True) # make output directory structure + + return processor.processing_function(self, file_struct, req) + + except ValueError as ve: + return file_processor.get_processing_file_resp_error(file_in=file_name, path_in=root, error_msg=ve.args[0]) + except Exception as e: + traceback.print_tb(e.__traceback__, limit=10) + return file_processor.get_processing_file_resp_error(file_in=file_name, path_in=root, error_msg=repr(e)) + + def get_file_processor(self, extension: str, req_processor: str | None) -> FileProcessingPluginInitInfo | None: + if not extension or extension == "": # skip files without extension + return None + + processors: list[FileProcessingPluginInitInfo] = self.files_ext_to_processors.get(extension, None) + if not processors: + return None + + if req_processor: # try to find processor by name from request (if set) + for processor in processors: + if processor.name == req_processor: + return processor + if req_processor: + raise ValueError(f'Not found processor with name from request: {req_processor} for extension {extension}') + + if len(processors) == 1: # only one processor found - ok, return it + return processors[0] + + # try to find default processor + default_processors_list: list[FileProcessingPluginInitInfo] = [] + for processor in processors: + options = self.plugin_options(processor.plugin_name) + if options and options.get('default_extension_processor'): + default_processors_list.append(processor) + + if len(default_processors_list) == 1: # only one default processor found - return it + return default_processors_list[0] + elif len(default_processors_list) > 1: # find more than one default processors - error + processor_names = map(lambda p: p.name, default_processors_list) + raise ValueError(f'Found more than one default processor {processor_names} for extension: {extension}') + + processor_names = map(lambda p: p.name, processors) # find more than one processor, without default - error + raise ValueError(f'Found more than one not default processors {processor_names} for extension: {extension}') diff --git a/app/book_epub_translate.py b/app/book_epub_translate.py new file mode 100644 index 0000000..3ba30fb --- /dev/null +++ b/app/book_epub_translate.py @@ -0,0 +1,54 @@ +import logging + +import ebooklib +from app.struct import TranslateBook, Request +from bs4 import BeautifulSoup +from ebooklib import epub +from tqdm import tqdm + +from app.app_core import AppCore +from app.dto import TranslateBookItemStatus +from app.params import tp + +logger = logging.getLogger('uvicorn') +tag_headers = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] +tag_text = ['p'] + + +class BookEpubTranslate: + def translate_book(self, translate_func, req: TranslateBook, output_file_name: str) -> TranslateBookItemStatus: + book = epub.read_epub(req.file) + for item in book.get_items(): + logger.info("Translate item with id %s", item.get_id()) + if item.get_type() == ebooklib.ITEM_DOCUMENT and item.get_id() == "item_1": + content = BeautifulSoup(item.get_content(), features="xml") + + for child in tqdm(content.descendants, unit=tp.unit, ascii=tp.ascii, desc=tp.desc): + if child and child.text and child.parent: + if child.parent.name and child.parent.string and (child.parent.name in tag_text or child.parent.name in tag_headers): + text = child.parent.string + translated_text = self.translate_text(core, req, text) + + if child.parent.name in tag_text: + if req.preserve_original_text: + translate_tag = content.new_tag(child.parent.name) + translate_tag.string = translated_text + child.insert_after(translate_tag) + else: + child.parent.string = translated_text + + if child.parent.name in tag_headers: + if req.preserve_original_text: + child.parent.string = f'{child.parent.string} / {translated_text}' + else: + child.parent.string = translated_text + + item.set_content(content.encode()) + + epub.write_epub(file[:len(file) - 4] + "__translate.epub", book, {}) + + def translate_text(self, core: AppCore, req: TranslateBook, text: str) -> str: + translate_result = core.translate(Request(text=text, from_lang=req.from_lang, to_lang=req.to_lang, + translator_plugin=req.translator_plugin)) + + return translate_result.result diff --git a/app/books_translate.py b/app/books_translate.py new file mode 100644 index 0000000..86efb8b --- /dev/null +++ b/app/books_translate.py @@ -0,0 +1,50 @@ +import logging +import os +from os import walk + +from app.dto import TranslateBookDirReq, TranslateBookDirResp, TranslateBookItem, TranslateBookItemStatus + +logger = logging.getLogger('uvicorn') + + +class BookDirectoryTranslate: + supported_extensions = ['epub'] + overwrite_exists_translated_books = True + + def __init__(self, translate_func): + self.translate_func = translate_func + + def translate(self, req: TranslateBookDirReq) -> TranslateBookDirResp: + filenames: list[str] = [] + for dir_path, dir_names, filenames in walk(req.directory_in): + break + + if not filenames: + return TranslateBookDirResp([], "") + + books: list[TranslateBookItem] = [] + for filename in filenames: + books.append(self.process_file(req, filename)) + + + def process_file(self, req: TranslateBookDirReq, filename: str) -> TranslateBookItem: + name, extension = os.path.splitext(filename) + if extension in self.supported_extensions: + translate_book_file_name = self.get_translate_book_file_name(req, name, extension) + if not self.overwrite_exists_translated_books and os.path.isfile(f'{req.directory_out}/{translate_book_file_name}'): + return TranslateBookItem(f'{req.directory_in}/{filename}', "", TranslateBookItemStatus.translate_already_exists) + else: + if extension == 'epub': + pass #TODO fix + + else: + return TranslateBookItem(f'{req.directory_in}/{name}.{extension}', "", TranslateBookItemStatus.type_not_support) + + + def get_translate_book_file_name(self, req: TranslateBookDirReq, name: str, extension: str) -> str: + from_lang_part = "_" + req.from_lang if req.preserve_original_text else "" + + return f'{name}__{from_lang_part}_{req.to_lang}.{extension}' + + + diff --git a/app/cache.py b/app/cache.py index 05e4f68..0ef8a24 100644 --- a/app/cache.py +++ b/app/cache.py @@ -1,7 +1,8 @@ import logging import sqlite3 -from app.struct import CacheParams, Request +from app.dto import TranslateCommonRequest, Part +from app.params import CacheParams logger = logging.getLogger('uvicorn') @@ -9,11 +10,9 @@ logger = logging.getLogger('uvicorn') class Cache: cache_table_name = "cache_translate" params: CacheParams - connection: sqlite3.Connection def __init__(self, params: CacheParams): self.params = params - self.connection = self.get_connection() self.init() def get_connection(self): @@ -23,53 +22,73 @@ class Cache: if not self.params.enabled: return None - cursor = self.connection.cursor() + connection = self.get_connection() + cursor = connection.cursor() cursor.execute( "SELECT name FROM sqlite_master WHERE type='table' AND name='{0}'".format(self.cache_table_name)) table_exists = cursor.fetchall() + cursor.connection.commit() if len(table_exists) == 0: logger.info("Init cache table: %s, file db: %s", self.cache_table_name, self.params.file) create_table = """ CREATE TABLE IF NOT EXISTS {0} (key TEXT NOT NULL, created TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - from_lang TEXT NOT NULL, to_lang TEXT NOT NULL, plugin TEXT NOT NULL, value TEXT NOT NULL) + from_lang TEXT NOT NULL, to_lang TEXT NOT NULL, plugin TEXT NOT NULL, + model TEXT NOT NULL, value TEXT NOT NULL) """.format(self.cache_table_name) create_idx_translate_cols = ('CREATE UNIQUE INDEX IF NOT EXISTS idx_translate_cols ' - 'ON {0} (key, from_lang, to_lang, plugin)').format(self.cache_table_name) + 'ON {0} (key, from_lang, to_lang, plugin, model)').format(self.cache_table_name) create_idx_created = ('CREATE INDEX IF NOT EXISTS idx_created ' 'ON {0} (created)').format(self.cache_table_name) - with cursor: - cursor.execute(create_table) - cursor.execute(create_idx_translate_cols) - cursor.execute(create_idx_created) + cursor.execute(create_table) + cursor.execute(create_idx_translate_cols) + cursor.execute(create_idx_created) else: if (self.params.expire_days > 0): delete_expired_values = "DELETE FROM {0} WHERE created < date('now', '-{1} day')".format( self.cache_table_name, self.params.expire_days) cursor.execute(delete_expired_values) - self.connection.commit() + connection.commit() - def get(self, req: Request, text: str): - select = "SELECT value FROM {0} WHERE key = ? AND from_lang = ? AND to_lang = ? AND plugin = ?".format( + def get(self, req: TranslateCommonRequest, text: str, model_name: str): + select = ("SELECT value FROM {0} " + "WHERE key = ? AND from_lang = ? AND to_lang = ? AND plugin = ? AND model = ?").format( self.cache_table_name) - cursor = self.connection.cursor() - cursor.execute(select, (text, req.from_lang, req.to_lang, req.translator_plugin)) + cursor = self.get_connection().cursor() + cursor.execute(select, (text, req.from_lang, req.to_lang, req.translator_plugin, model_name)) value = cursor.fetchone() if value: return value[0] else: return None - def put(self, req: Request, text: str, value: str): + def put(self, req: TranslateCommonRequest, text: str, value: str, model_name: str): try: insert_connection = self.get_connection() cursor = insert_connection.cursor() - cursor.execute('INSERT INTO {0} (KEY, from_lang, to_lang, plugin, VALUE) VALUES (?, ?, ?, ?, ?)'.format( - self.cache_table_name),(text, req.from_lang, req.to_lang, req.translator_plugin, value)) + insert = 'INSERT INTO {0} (KEY, from_lang, to_lang, plugin, model, VALUE) VALUES (?, ?, ?, ?, ?, ?)'.format(self.cache_table_name) + cursor.execute(insert,(text, req.from_lang, req.to_lang, req.translator_plugin, model_name, value)) insert_connection.commit() insert_connection.close() except Exception as e: logger.error("Error save cache entry, text = %s, req = %s, error=%s", text, req, e) + + def cache_read(self, req: TranslateCommonRequest, parts: list[Part], params: CacheParams, model_name: str): + if params.enabled and req.translator_plugin not in params.disable_for_plugins: + for part in parts: + if part.need_to_translate(): + cached_translate = self.get(req, part.text, model_name) + if cached_translate: + part.cache_found = True + part.translate = cached_translate + else: + part.cache_found = False + + def cache_write(self, req: TranslateCommonRequest, parts: list[Part], params: CacheParams, model_name: str): + if params.enabled and req.translator_plugin not in params.disable_for_plugins: + for part in parts: + if part.need_to_translate() and not part.cache_found: + self.put(req, part.text, part.translate, model_name) diff --git a/app/cuda.py b/app/cuda.py index 3991eb1..d2312e8 100644 --- a/app/cuda.py +++ b/app/cuda.py @@ -5,7 +5,7 @@ import torch logger = logging.getLogger('uvicorn') -def cuda_info(): +def cuda_info() -> None: cuda_is_available = torch.cuda.is_available() device_count = torch.cuda.device_count() current_device = torch.cuda.current_device() @@ -17,14 +17,15 @@ def cuda_info(): logger.info("GPU #%d: %s", i, torch.cuda.get_device_name(i)) -def get_device(options: dict): +def get_device(options: dict) -> str: cuda_opt = options["cuda"] if cuda_opt: return "cuda" else: return "cpu" -def get_device_with_gpu_num(options: dict): + +def get_device_with_gpu_num(options: dict) -> str: cuda_opt = options["cuda"] if cuda_opt: return "cuda:{0}".format(options["cuda_device_index"]) diff --git a/app/dto.py b/app/dto.py index 4fc1c2d..036c67b 100644 --- a/app/dto.py +++ b/app/dto.py @@ -1,4 +1,7 @@ +import enum +import os from dataclasses import dataclass +from typing import Callable, Any from pydantic import BaseModel @@ -10,6 +13,46 @@ class TranslateReq(BaseModel): translator_plugin: str | None = "" +@dataclass +class TranslateCommonRequest: + text: str + from_lang: str | None + to_lang: str | None + translator_plugin: str | None + + +class ProcessingFileDirReq(BaseModel): + from_lang: str | None = "" + to_lang: str | None = "" + translator_plugin: str | None = "" + preserve_original_text: bool + directory_in: str | None = None + directory_out: str | None = None + file_processors: dict[str, str] | None + overwrite_processed_files: bool | None + recursive_sub_dirs: bool + + def translate_req(self, text: str) -> TranslateCommonRequest: + return TranslateCommonRequest(text=text, from_lang=self.from_lang, to_lang=self.to_lang, + translator_plugin=self.translator_plugin) + + +@dataclass +class ProcessingFileStruct: + path_in: str + path_out: str + file_name: str + file_ext: str + file_name_ext: str + file_processor: str + + def path_file_in(self) -> str: + return f'{self.path_in}{os.sep}{self.file_name_ext}' + + def path_file_out(self, out_file_name_ext: str) -> str: + return f'{self.path_out}{os.sep}{out_file_name_ext}' + + @dataclass class TranslatePart: text: str @@ -23,3 +66,107 @@ class TranslateResp: parts: list[TranslatePart] | None error: str | None + +class ProcessingFileStatus(enum.Enum): + OK = "OK" + ERROR = "ERROR" + TRANSLATE_ALREADY_EXISTS = "TRANSLATE_ALREADY_EXISTS" + TYPE_NOT_SUPPORT = "TYPE_NOT_SUPPORT" + + +@dataclass +class ProcessingFileResp: + file_in: str + file_out: str | None + + path_file_in: str + path_file_out: str | None + + status: ProcessingFileStatus + file_processor: str | None + message: str | None + + +@dataclass +class ProcessingFileDirResp: + files: list[ProcessingFileResp] | None + error: str | None + + +@dataclass +class ProcessingFileDirListItemIn: + file_with_path: str + file_processor: str | None + file_processor_error: str | None + + +@dataclass +class ProcessingFileDirListItemOut: + file_with_path: str + + +@dataclass +class ProcessingFileDirListResp: + files_in: list[ProcessingFileDirListItemIn] + files_out: list[ProcessingFileDirListItemOut] + directory_in: str + directory_out: str + + error: str | None + + +@dataclass +class TranslatePluginInitInfo: + plugin_name: str + model_name: str + # todo translate_function: Callable[[...], ...] + + +@dataclass +class FileProcessingPluginInitInfo: + name: str + plugin_name: str + processing_function: Callable[[Any, ProcessingFileStruct, ProcessingFileDirReq], ProcessingFileResp] + processed_file_name_function: Callable[[Any, ProcessingFileStruct, ProcessingFileDirReq], str] + supported_extensions: set[str] # lower case + + def __init__(self, plugin_name: str, supported_extensions: set[str]): + self.plugin_name = plugin_name + self.supported_extensions = supported_extensions + + +@dataclass +class Part: + text: str + translate: str + paragraph_end: bool + cache_found: bool + + def is_contains_alpha(self) -> bool: + if any(letter.isalpha() for letter in self.text): + return True + + return False + + def need_to_translate(self): + return not self.cache_found and self.text and self.is_contains_alpha() + + def __init__(self, text: str, paragraph_end: bool): + self.text = text + self.translate = "" + self.paragraph_end = paragraph_end + self.cache_found = False + + +@dataclass +class TranslateStruct: + req: TranslateCommonRequest + processed_text: str + parts: list[Part] + + def need_to_translate(self) -> bool: + for part in self.parts: + if part.need_to_translate(): + return True + + return False diff --git a/app/file_processor.py b/app/file_processor.py new file mode 100644 index 0000000..7d3e3ce --- /dev/null +++ b/app/file_processor.py @@ -0,0 +1,75 @@ +import logging +import os + +import chardet + +from app.dto import ProcessingFileStruct, ProcessingFileDirReq, ProcessingFileResp, ProcessingFileStatus + +logger = logging.getLogger('uvicorn') + +def processed_file_name_def(file_struct: ProcessingFileStruct, req: ProcessingFileDirReq) -> str: + from_lang_part = "_" + req.from_lang if req.preserve_original_text else "" + + return f'{file_struct.file_name}__{from_lang_part}_{req.to_lang}.{file_struct.file_ext}' + + +def file_name_from_template(file_struct: ProcessingFileStruct, req: ProcessingFileDirReq, options: dict) -> str: + """ + Generate output file name from template. Template in options, for preserve original and not. + Special parameters in template: + %source% - original file name + %from_lang% - source language + %to_lang% - target language + + :param file_struct: struct with file info + :param req: file process request + :param template: template with special parameters + :return: output file name + """ + template_dict = options["output_file_name_template"] + template = template_dict["preserve_original"] if req.preserve_original_text else template_dict["without_original"] + return ((template.replace("%source%", file_struct.file_name) + .replace("%from_lang%", req.from_lang) + .replace("%to_lang%", req.to_lang)) + + "." + file_struct.file_ext) + + +def get_file_with_path_for_list(init_dir: str, root: str, file_name: str) -> str: + file_with_path = root.replace(init_dir, "") + "/" + file_name + return file_with_path[1:] + + +def get_processing_file_resp(file_struct: ProcessingFileStruct, file_out: str, file_processor: str, + status: ProcessingFileStatus, message: str | None = None) -> ProcessingFileResp: + return ProcessingFileResp( + file_in=file_struct.file_name, file_out=file_out, + path_file_in=file_struct.path_file_in().replace(os.sep, "/"), + path_file_out=file_struct.path_file_out(file_out).replace(os.sep, "/"), + status=status, file_processor=file_processor, message=message + ) + + +def get_processing_file_resp_ok(file_struct: ProcessingFileStruct, file_out: str) -> ProcessingFileResp: + return get_processing_file_resp( + file_struct=file_struct, file_out=file_out, + status=ProcessingFileStatus.OK, file_processor=file_struct.file_processor + ) + + +def get_processing_file_resp_error(file_in: str, path_in: str, error_msg: str) -> ProcessingFileResp: + return ProcessingFileResp( + file_in=file_in, path_file_in=f'{path_in}{os.sep}{file_in}', file_out=None, path_file_out=None, + file_processor=None, status=ProcessingFileStatus.ERROR, message=error_msg + ) + + +def read_file_with_fix_encoding(path_file: str) -> str: + with open(path_file, "rb") as file: + content_raw = file.read() + encoding = chardet.detect(content_raw)['encoding'] + if encoding.lower() != "utf-8": + logger.info("Charset encoding in file %s: %s",path_file, encoding) + return content_raw.decode(encoding=encoding, errors='ignore') + else: + return content_raw.decode(encoding="utf-8") + diff --git a/app/file_processor_html.py b/app/file_processor_html.py new file mode 100644 index 0000000..63aea81 --- /dev/null +++ b/app/file_processor_html.py @@ -0,0 +1,113 @@ +from typing import Iterator + +from bs4 import BeautifulSoup, PageElement, Tag, NavigableString + +from app.app_core import AppCore +from app.dto import ProcessingFileDirReq + + +class FileProcessorHtml: + attribute_source = "data-src" + attribute_translate = "data-tr" + + def __init__(self, core: AppCore, options: dict): + self.core = core + self.options = options + self.header_tags = options["header_tags"] + self.text_tags = options["text_tags"] + self.original_tag: str = options["text_format"]["original_tag"] + self.translate_tag: str = options["text_format"]["translate_tag"] + self.header_delimiter: str = options["text_format"]["header_delimiter"] + + def get_translate_element(self, soup: BeautifulSoup, child: PageElement, translate_txt: str) -> Tag: + translate_element = soup.new_tag(child.parent.name) + translate_element[self.attribute_translate] = "t" + if self.translate_tag == "": + translate_element.string = translate_txt + else: + additional_tag_element = soup.new_tag(self.translate_tag) + additional_tag_element.string = translate_txt + translate_element.append(additional_tag_element) + + return translate_element + + def get_original_element(self, soup: BeautifulSoup, child: PageElement, original_text: str) -> None | Tag: + if self.original_tag == "": + return None + else: + original_element = soup.new_tag(child.parent.name) + additional_tag_element = soup.new_tag(self.original_tag) + additional_tag_element.string = original_text + original_element.append(additional_tag_element) + return original_element + + def process(self, req: ProcessingFileDirReq, soup: BeautifulSoup, body_tag: str = None) -> None: + translate_only_first_paragraphs: int = self.options.get("translate_only_first_paragraphs", 0) + children: Iterator[PageElement] = soup.find(body_tag).descendants if body_tag else soup.descendants + translated_paragraphs = 0 + for child in children: + if (child and child.text and child.parent and child.parent.get(self.attribute_source) is None + and child.parent.get(self.attribute_translate) is None): + child_tag = child.parent.name + if child_tag and child.parent.text and (child_tag in self.text_tags or child_tag in self.header_tags): + # get contents - for example
123
- 3 items. 1, 3 - tags, 2 - simple string
+ # contents = child.parent.contents - for translate with save format within paragraph
+
+ child.parent[self.attribute_source] = "1"
+ original_text = child.parent.text
+
+ translate_req = req.translate_req(original_text)
+ translate_txt = self.core.translate(translate_req).result
+ translated_paragraphs = translated_paragraphs + 1
+ if 0 < translate_only_first_paragraphs <= translated_paragraphs:
+ break
+
+ if child_tag in self.text_tags:
+ translate_element = self.get_translate_element(soup, child, translate_txt)
+ if req.preserve_original_text:
+ child.parent.insert_after(translate_element)
+ original_element = self.get_original_element(soup, child, original_text)
+ if original_element:
+ child.replaceWith(original_element)
+ else:
+ child.replaceWith(translate_element)
+
+ elif child_tag in self.header_tags:
+ if req.preserve_original_text:
+ child.parent.string = f'{original_text}{self.header_delimiter}{translate_txt}'
+ else:
+ child.parent.string = translate_txt
+
+
+ def process1(self, req: ProcessingFileDirReq, soup: BeautifulSoup, body_tag: str = None) -> None:
+ translate_only_first_paragraphs: int = self.options.get("translate_only_first_paragraphs", 0)
+ children: Iterator[PageElement] = soup.find(body_tag).descendants if body_tag else soup.descendants
+ translated_paragraphs = 0
+ for child in children:
+ if child and child.text and child.parent and child.parent.get(self.translated_attribute) is None:
+ child_tag = child.parent.name
+ is_simple_string = isinstance(child, NavigableString)
+ if is_simple_string and child_tag and child.parent.string and (child_tag in self.text_tags or child_tag in self.header_tags):
+ original_text = child.parent.string
+
+ translate_req = req.translate_req(original_text)
+ translate_txt = self.core.translate(translate_req).result
+ translated_paragraphs = translated_paragraphs + 1
+ if 0 < translate_only_first_paragraphs <= translated_paragraphs:
+ break
+
+ if child_tag in self.text_tags:
+ translate_element = self.get_translate_element(soup, child, translate_txt)
+ if req.preserve_original_text:
+ child.parent.insert_after(translate_element)
+ original_element = self.get_original_element(soup, child, original_text)
+ if original_element:
+ child.replaceWith(original_element)
+ else:
+ child.replaceWith(translate_element)
+
+ elif child_tag in self.header_tags:
+ if req.preserve_original_text:
+ child.parent.string = f'{original_text}{self.header_delimiter}{translate_txt}'
+ else:
+ child.parent.string = translate_txt
diff --git a/app/log.py b/app/log.py
new file mode 100644
index 0000000..9d58cff
--- /dev/null
+++ b/app/log.py
@@ -0,0 +1,11 @@
+import logging
+import traceback
+
+
+def logger():
+ return logging.getLogger('uvicorn')
+
+
+def log_exception(message: str, e: Exception) -> None:
+ traceback.print_tb(e.__traceback__, limit=10)
+ logging.error(message, str(e))
\ No newline at end of file
diff --git a/app/struct.py b/app/params.py
similarity index 68%
rename from app/struct.py
rename to app/params.py
index a0ddb6d..22cbf12 100644
--- a/app/struct.py
+++ b/app/params.py
@@ -1,50 +1,4 @@
-from dataclasses import dataclass, field
-
-
-# dict_field: dict = field(default_factory=lambda: {})
-@dataclass
-class Request:
- text: str
- from_lang: str | None
- to_lang: str | None
- translator_plugin: str | None
-
-
-@dataclass
-class Sentence:
- text: str
-
-
-@dataclass
-class Part:
- text: str
- translate: str
- paragraph_end: bool
- cache_found: bool
-
- def is_numeric_or_empty(self):
- processed_text = (self.text
- .replace(" ", "")
- .replace(",", "")
- .replace(".", ""))
-
- return processed_text.isnumeric() or len(processed_text) == 0
-
- def need_to_translate(self):
- return not self.cache_found and self.text and self.text != "" and not self.is_numeric_or_empty()
-
- def __init__(self, text: str, paragraph_end: bool):
- self.text = text
- self.translate = ""
- self.paragraph_end = paragraph_end
- self.cache_found = False
-
-
-@dataclass
-class TranslateStruct:
- req: Request
- processed_text: str
- parts: list[Part]
+from dataclasses import dataclass
@dataclass
@@ -65,7 +19,7 @@ class TextSplitParams:
# pysbd (default) / blingfire
sentence_splitter: str
- def split_enabled(self):
+ def split_enabled(self) -> bool:
return (self.split_by_paragraphs_only or self.split_by_paragraphs_and_length
or self.split_by_sentences_and_length or self.split_by_sentences_only)
@@ -96,6 +50,14 @@ class CacheParams:
expire_days: int
+@dataclass
+class FileProcessingParams:
+ directory_in: str
+ directory_out: str
+ preserve_original_text: bool
+ overwrite_processed_files: bool
+
+
@dataclass
class TranslateProgress:
unit: str
@@ -103,16 +65,31 @@ class TranslateProgress:
desc: str
-tp: TranslateProgress = TranslateProgress(unit="part", ascii=True, desc="translate parts: ")
+@dataclass
+class FileProcessingTextFormat:
+ original_prefix: str
+ original_postfix: str
+ translate_prefix: str
+ translate_postfix: str
+
+ def original_text(self, text: str) -> str:
+ return self.original_prefix + text + self.original_postfix
+
+ def translate_text(self, text: str) -> str:
+ return self.translate_prefix + text + self.translate_postfix
-def read_plugin_params(manifest: dict):
+def read_plugin_translate_params(manifest: dict):
manifest["options"]["translation_params_struct"] = read_translation_params(manifest)
manifest["options"]["text_split_params_struct"] = read_text_split_params(manifest)
manifest["options"]["text_process_params_struct"] = read_text_process_params(manifest)
-def read_translation_params(manifest: dict):
+def read_plugin_file_processing_params(manifest: dict):
+ manifest["options"]["translation_params_struct"] = read_translation_params(manifest)
+
+
+def read_translation_params(manifest: dict) -> TranslationParams | None:
options = manifest["options"]
if "translation_params" not in options:
return None
@@ -123,7 +100,7 @@ def read_translation_params(manifest: dict):
)
-def read_text_split_params(manifest: dict):
+def read_text_split_params(manifest: dict) -> TextSplitParams | None:
options = manifest["options"]
if "text_split_params" not in options:
@@ -141,7 +118,7 @@ def read_text_split_params(manifest: dict):
)
-def read_text_process_params(manifest: dict):
+def read_text_process_params(manifest: dict) -> TextProcessParams | None:
options = manifest["options"]
if "text_processing_params" not in options:
@@ -165,7 +142,7 @@ def read_text_process_params(manifest: dict):
)
-def read_cache_params(manifest: dict):
+def read_cache_params(manifest: dict) -> CacheParams:
options = manifest["options"]
return CacheParams(
@@ -174,3 +151,28 @@ def read_cache_params(manifest: dict):
disable_for_plugins=options["cache_params"]["disable_for_plugins"],
expire_days=options["cache_params"]["expire_days"],
)
+
+
+def read_file_processing_params(manifest: dict) -> FileProcessingParams | None:
+ options = manifest["options"]
+ if "file_processing_params" not in options:
+ return None
+
+ return FileProcessingParams(
+ directory_in=options["file_processing_params"]["directory_in"],
+ directory_out=options["file_processing_params"]["directory_out"],
+ preserve_original_text=options["file_processing_params"]["preserve_original_text"],
+ overwrite_processed_files=options["file_processing_params"]["overwrite_processed_files"],
+ )
+
+
+def read_plugin_file_processing_text_format(options: dict):
+ return FileProcessingTextFormat(
+ original_prefix=options["text_format"]["original_prefix"],
+ original_postfix=options["text_format"]["original_postfix"],
+ translate_prefix=options["text_format"]["translate_prefix"],
+ translate_postfix=options["text_format"]["translate_postfix"],
+ )
+
+
+tp: TranslateProgress = TranslateProgress(unit="part", ascii=True, desc="translate parts: ")
diff --git a/app/text_processor.py b/app/text_processor.py
index baed6de..10cf2dd 100644
--- a/app/text_processor.py
+++ b/app/text_processor.py
@@ -1,13 +1,12 @@
import logging
import re
-from app.struct import TextProcessParams
-
+from app.params import TextProcessParams
logger = logging.getLogger('uvicorn')
-def pre_process(params: TextProcessParams, original_text: str):
+def pre_process(params: TextProcessParams, original_text: str) -> str:
processed_text = replace_text_from_to(original_text, params.replace_text_from_to)
if params.replace_non_standard_new_lines_chars:
@@ -28,7 +27,7 @@ def pre_process(params: TextProcessParams, original_text: str):
return processed_text
-def replace_not_text_chars(text: str, allowed_chars_ignoring_replace: set, replace_not_text_target_char: str):
+def replace_not_text_chars(text: str, allowed_chars_ignoring_replace: set, replace_not_text_target_char: str) -> str:
result = ""
replaced_chars = []
for char in text:
@@ -45,7 +44,7 @@ def replace_not_text_chars(text: str, allowed_chars_ignoring_replace: set, repla
return result
-def replace_non_standard_new_lines_chars(text: str):
+def replace_non_standard_new_lines_chars(text: str) -> str:
return text.replace("\r\n", "\n").replace("\n\r", "\n").replace("\r", "\n")
@@ -57,14 +56,14 @@ def remove_identical_characters(text: str,
return re.sub(regexp, r'\1' * remove_identical_characters_max_repeats, text)
-def remove_multiple_spaces(text: str):
+def remove_multiple_spaces(text: str) -> str:
while ' ' in text:
text = text.replace(' ', ' ')
return text
-def replace_text_from_to(text: str, from_to: dict | None):
+def replace_text_from_to(text: str, from_to: dict | None) -> str:
if from_to and len(from_to) > 0:
for key, value in from_to.items():
text = text.replace(key, value)
diff --git a/app/text_splitter.py b/app/text_splitter.py
index a5e4bf7..37ebe60 100644
--- a/app/text_splitter.py
+++ b/app/text_splitter.py
@@ -1,8 +1,8 @@
import pysbd
from blingfire import text_to_sentences
-from app.dto import TranslatePart
-from app.struct import TextSplitParams, Part
+from app.dto import TranslatePart, Part
+from app.params import TextSplitParams
def is_arr_fin(arr: list, i):
diff --git a/files_processing/in/_source_files b/files_processing/in/_source_files
new file mode 100644
index 0000000..e69de29
diff --git a/files_processing/out/_processed_files b/files_processing/out/_processed_files
new file mode 100644
index 0000000..e69de29
diff --git a/jaa.py b/jaa.py
index facf319..f9ff48c 100644
--- a/jaa.py
+++ b/jaa.py
@@ -40,8 +40,8 @@ main.init_plugins()
Python 3.5+ (due to dict mix in final_options calc), can be relaxed
"""
-import os
import json
+import os
# here we trying to use termcolor to highlight plugin info and errors during load
try:
@@ -195,8 +195,8 @@ class JaaCore:
return self.plugin_manifests[pluginname]
return {}
- def plugin_options(self, pluginname):
- manifest = self.plugin_manifest(pluginname)
+ def plugin_options(self, plugin_name):
+ manifest = self.plugin_manifest(plugin_name)
if "options" in manifest:
return manifest["options"]
return None
diff --git a/plugins/core.py b/plugins/core.py
index 4f65f19..337ea0e 100644
--- a/plugins/core.py
+++ b/plugins/core.py
@@ -1,65 +1,72 @@
+from app import params
from app.app_core import AppCore
-from app.struct import TranslationParams, read_text_split_params, \
- read_text_process_params, read_translation_params, read_cache_params
+
+manifest = {
+ "name": "Core plugin",
+ "version": "1.0",
+
+ # this is DEFAULT options
+ # ACTUAL options is in options/LLM Translate File processing
+
+
+ Input directory content:
+
+
+
+ File name File processor
+
+ Output directory content:
+
+
+
+ File name
+
+
+
+
+
+ Source File Result File Status "
+ }
+ for (const fileItem of data.files_out) {
+ processFilesTableOut.innerHTML += "" + fileItem.file_with_path
+ + " " + file_processor + " "
+ }
+ directoryIn.innerHTML = data.directory_in;
+ directoryOut.innerHTML = data.directory_out;
+ }
+
+ return "";
+}
+
+async function process_files() {
+ const elProgress = document.getElementById('progress');
+ const submit = document.getElementById('submit');
+ const errorText = document.getElementById('errorText');
+ submit.disabled = true;
+ elProgress.style.display = 'inline';
+
+ const preserve_original_text = document.getElementById('preserve_original_text').checked;
+ const overwrite_processed_files = document.getElementById('overwrite_processed_files').checked;
+ const recursiveSubDirs = document.getElementById('recursive_sub_dirs').checked;
+ const fromLang = document.getElementById('from_lang_select').value;
+ const toLang = document.getElementById('to_lang_select').value;
+ const plugin = document.getElementById('plugin').value;
+
+ const reqBody = JSON.stringify({
+ from_lang: fromLang, to_lang: toLang, translator_plugin: plugin,
+ preserve_original_text: preserve_original_text, overwrite_processed_files: overwrite_processed_files,
+ recursive_sub_dirs: recursiveSubDirs, file_processors: null
+ });
+ const reqParam = {
+ method: 'POST',
+ body: reqBody,
+ signal: AbortSignal.timeout(600000),
+ headers: {
+ 'Accept': 'application/json',
+ 'Content-Type': 'application/json',
+ }
+ }
+ try {
+ const response = await fetch(`/process-files`, reqParam);
+ const data = await response.json();
+ if (data.error) {
+ errorText.innerHTML = data.error;
+ return "";
+ } else {
+ const processFilesTableResult = document.getElementById('process_files_table_result');
+ processFilesTableResult.innerHTML = "";
+ for (const fileItem of data.files) {
+ let file_class = "";
+ if (fileItem.status === 'ERROR') {
+ file_class = "text-error";
+ } else if (fileItem.status === 'OK') {
+ file_class = "text-primary text-bold";
+ } else {
+ file_class = "";
+ }
+
+ let status = fileItem.status;
+ switch (fileItem.status) {
+ case "ERROR":
+ status = "Error";
+ break;
+ case "TYPE_NOT_SUPPORT":
+ status = "Type not support";
+ break;
+ case "TRANSLATE_ALREADY_EXISTS":
+ status = "Translate already exists"
+ }
+
+ const pathFileOut = fileItem.path_file_out ? fileItem.path_file_out : "";
+
+ processFilesTableResult.innerHTML += "" + fileItem.file_with_path + " "
+ }
+
+ return "";
+ }
+ } catch (error) {
+ errorText.innerHTML = error.message;
+ console.error(error.message);
+ } finally {
+ elProgress.style.display = 'none';
+ submit.disabled = false;
+ }
+}
+
+window.onload = () => {
+ const recursiveSubDirs = document.getElementById('recursive_sub_dirs');
+ recursiveSubDirs.onchange = () => {
+ load_file_list();
+ }
+ const submit = document.getElementById('submit');
+ submit.onmouseup = () => {
+ process_files();
+ load_file_list();
+ };
+
+ fill_language_select_elements();
+
+ load_file_list();
+}
+
diff --git a/static/index.html b/static/index.html
index c3a9320..f83e553 100644
--- a/static/index.html
+++ b/static/index.html
@@ -5,33 +5,21 @@
" + fileItem.path_file_in
+ + " " + pathFileOut + " " + status + " LLM Translate
+ LLM Translate File processing