Cache migration script. Unification log messages.

Co-authored-by: APodoinikov <APodoynikov@detmir.ru>
This commit is contained in:
illian64 2025-09-21 15:44:04 +07:00 committed by GitHub
parent 8ae83f6495
commit 85a0d0b538
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 178 additions and 124 deletions

View file

@ -1,10 +1,8 @@
import logging
import os
import time
import traceback
from os import walk
from app import text_splitter, file_processor
from app import text_splitter, file_processor, log
from app.cache import Cache
from app.dto import TranslateResp, ProcessingFileDirReq, \
ProcessingFileDirResp, TranslatePluginInitInfo, Part, TranslateStruct, FileProcessingPluginInitInfo, \
@ -14,7 +12,7 @@ from app.params import TranslationParams, TextSplitParams, TextProcessParams, Ca
from app.text_processor import pre_process
from jaa import JaaCore
logger = logging.getLogger('uvicorn')
logger = log.logger()
version = "0.1.0"
@ -174,7 +172,7 @@ class AppCore(JaaCore):
except ValueError as ve:
return TranslateResp(result=None, parts=None, error=ve.args[0])
except Exception as e:
traceback.print_tb(e.__traceback__, limit=10)
log.log_exception(f"Translate error {repr(req)}", e)
return TranslateResp(result=None, parts=None, error=getattr(e, 'message', repr(e)))
def process_files_list(self, recursive_sub_dirs: bool) -> ProcessingFileDirListResp:
@ -242,7 +240,7 @@ class AppCore(JaaCore):
except ValueError as ve:
return ProcessingFileDirResp(files=list(), error=ve.args[0])
except Exception as e:
traceback.print_tb(e.__traceback__, limit=10)
log.log_exception("Error proces files: ", e)
return ProcessingFileDirResp(files=list(), error=getattr(e, 'message', repr(e)))
def process_file(self, req: ProcessingFileDirReq, root: str, file_name: str) -> ProcessingFileResp:
@ -276,12 +274,18 @@ class AppCore(JaaCore):
logger.info("Start processing file %s/%s", root.replace(os.sep, "/"), file_name)
os.makedirs(file_struct.path_out, exist_ok=True) # make output directory structure
return processor.processing_function(self, file_struct, req)
try:
return processor.processing_function(self, file_struct, req)
except Exception as e:
log.log_exception(f'Error with processing file {file_struct.file_name_ext}', e)
return file_processor.get_processing_file_resp_error(
file_in=file_struct.file_name_ext, path_in=file_struct.path_in, error_msg=str(e))
except ValueError as ve:
return file_processor.get_processing_file_resp_error(file_in=file_name, path_in=root, error_msg=ve.args[0])
except Exception as e:
traceback.print_tb(e.__traceback__, limit=10)
log.log_exception(f'Error with processing file {file_name}', e)
return file_processor.get_processing_file_resp_error(file_in=file_name, path_in=root, error_msg=repr(e))
def get_file_processor(self, extension: str, req_processor: str | None) -> FileProcessingPluginInitInfo | None:

View file

@ -1,62 +1,54 @@
import logging
import os
import sqlite3
import pyway.info
import pyway.migrate
import pyway.validate
from app import log
from app.dto import TranslateCommonRequest, Part
from app.params import CacheParams
logger = logging.getLogger('uvicorn')
logger = log.logger()
class Cache:
cache_table_name = "cache_translate"
params: CacheParams
def __init__(self, params: CacheParams):
self.params = params
self.init()
self.init_pybase_migration()
self.init_delete_expired_values()
def get_connection(self):
return sqlite3.connect(self.params.file)
def init(self):
def init_pybase_migration(self):
os.environ["PYWAY_TYPE"] = "sqlite"
os.environ["PYWAY_TABLE"] = "pyway_migrations"
os.environ["PYWAY_DATABASE_NAME"] = self.params.file
migration_path = self.params.migration_path if self.params.migration_path else "cache/migrations"
os.environ["PYWAY_DATABASE_MIGRATION_DIR"] = migration_path
migrate = pyway.migrate.Migrate(pyway.migrate.ConfigFile())
logger.info("Result apply migrations: %s", migrate.run())
def init_delete_expired_values(self) -> None:
if not self.params.enabled:
return None
connection = self.get_connection()
cursor = connection.cursor()
cursor.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='{0}'".format(self.cache_table_name))
table_exists = cursor.fetchall()
cursor.connection.commit()
if len(table_exists) == 0:
logger.info("Init cache table: %s, file db: %s", self.cache_table_name, self.params.file)
create_table = """
CREATE TABLE IF NOT EXISTS {0}
(key TEXT NOT NULL, created TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
from_lang TEXT NOT NULL, to_lang TEXT NOT NULL, plugin TEXT NOT NULL,
model TEXT NOT NULL, value TEXT NOT NULL)
""".format(self.cache_table_name)
create_idx_translate_cols = ('CREATE UNIQUE INDEX IF NOT EXISTS idx_translate_cols '
'ON {0} (key, from_lang, to_lang, plugin, model)').format(self.cache_table_name)
create_idx_created = ('CREATE INDEX IF NOT EXISTS idx_created '
'ON {0} (created)').format(self.cache_table_name)
cursor.execute(create_table)
cursor.execute(create_idx_translate_cols)
cursor.execute(create_idx_created)
else:
if (self.params.expire_days > 0):
delete_expired_values = "DELETE FROM {0} WHERE created < date('now', '-{1} day')".format(
self.cache_table_name, self.params.expire_days)
cursor.execute(delete_expired_values)
if self.params.expire_days > 0:
delete_expired_values = "DELETE FROM cache_translate WHERE created < date('now', '-{0} day')".format(
self.params.expire_days)
cursor.execute(delete_expired_values)
connection.commit()
def get(self, req: TranslateCommonRequest, text: str, model_name: str):
select = ("SELECT value FROM {0} "
"WHERE key = ? AND from_lang = ? AND to_lang = ? AND plugin = ? AND model = ?").format(
self.cache_table_name)
select = ("SELECT value FROM cache_translate "
"WHERE key = ? AND from_lang = ? AND to_lang = ? AND plugin = ? AND model = ?")
cursor = self.get_connection().cursor()
cursor.execute(select, (text, req.from_lang, req.to_lang, req.translator_plugin, model_name))
value = cursor.fetchone()
@ -69,12 +61,12 @@ class Cache:
try:
insert_connection = self.get_connection()
cursor = insert_connection.cursor()
insert = 'INSERT INTO {0} (KEY, from_lang, to_lang, plugin, model, VALUE) VALUES (?, ?, ?, ?, ?, ?)'.format(self.cache_table_name)
insert = 'INSERT INTO cache_translate (KEY, from_lang, to_lang, plugin, model, VALUE) VALUES (?, ?, ?, ?, ?, ?)'
cursor.execute(insert,(text, req.from_lang, req.to_lang, req.translator_plugin, model_name, value))
insert_connection.commit()
insert_connection.close()
except Exception as e:
logger.error("Error save cache entry, text = %s, req = %s, error=%s", text, req, e)
log.log_exception("Error save cache entry, text = {0}, req = {1}".format(text, req), e)
def cache_read(self, req: TranslateCommonRequest, parts: list[Part], params: CacheParams, model_name: str):
if params.enabled and req.translator_plugin not in params.disable_for_plugins:

View file

@ -1,11 +1,11 @@
import logging
import os
import chardet
from app import log
from app.dto import ProcessingFileStruct, ProcessingFileDirReq, ProcessingFileResp, ProcessingFileStatus
logger = logging.getLogger('uvicorn')
logger = log.logger()
def processed_file_name_def(file_struct: ProcessingFileStruct, req: ProcessingFileDirReq) -> str:
from_lang_part = "_" + req.from_lang if req.preserve_original_text else ""

View file

@ -1,5 +1,4 @@
import logging
import traceback
def logger():
@ -7,5 +6,4 @@ def logger():
def log_exception(message: str, e: Exception) -> None:
traceback.print_tb(e.__traceback__, limit=10)
logging.error(message, str(e))
logger().exception(message + ": " + str(e))

View file

@ -50,6 +50,7 @@ class CacheParams:
file: str
disable_for_plugins: list[str]
expire_days: int
migration_path: str | None
@dataclass
@ -154,6 +155,7 @@ def read_cache_params(manifest: dict) -> CacheParams:
file=options["cache_params"]["file"],
disable_for_plugins=options["cache_params"]["disable_for_plugins"],
expire_days=options["cache_params"]["expire_days"],
migration_path=None
)

View file

@ -1,9 +1,9 @@
import logging
import re
from app import log
from app.params import TextProcessParams
logger = logging.getLogger('uvicorn')
logger = log.logger()
def pre_process(params: TextProcessParams, original_text: str) -> str:
@ -70,7 +70,7 @@ def replace_text_from_to(text: str, from_to: dict | None) -> str:
return text
def remove_repeated_words1(text: str, remove_identical_words_max_repeats) -> str:
def remove_repeated_words(text: str, remove_identical_words_max_repeats) -> str:
pattern = r'(\b\w+\b)(?:\s*[^\w\s]*\s*\1){' + str(remove_identical_words_max_repeats) + ',}'
replacement = ' '.join([r'\1'] * remove_identical_words_max_repeats)

15
cache/migrations/V01_01__cache_init.sql vendored Normal file
View file

@ -0,0 +1,15 @@
CREATE TABLE IF NOT EXISTS cache_translate (
key TEXT NOT NULL,
created TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
from_lang TEXT NOT NULL,
to_lang TEXT NOT NULL,
plugin TEXT NOT NULL,
model TEXT NOT NULL,
value TEXT NOT NULL
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_translate_cols
ON cache_translate (key, from_lang, to_lang, plugin, model);
CREATE INDEX IF NOT EXISTS idx_created
ON cache_translate (created);

16
main.py
View file

@ -1,16 +1,16 @@
import logging
import sys
from contextlib import asynccontextmanager
import uvicorn
from fastapi import FastAPI
from starlette.staticfiles import StaticFiles
from app import dto
from app import dto, log
from app.app_core import AppCore
from app.cuda import cuda_info
core: AppCore
logger = logging.getLogger('uvicorn')
logger = log.logger()
@asynccontextmanager
@ -20,9 +20,13 @@ async def lifespan(fast_api: FastAPI):
app.mount('/', StaticFiles(directory='static', html=True), name='static')
cuda_info()
global core
core = AppCore()
core.init_with_translate_plugins()
try:
global core
core = AppCore()
core.init_with_translate_plugins()
except Exception as e:
log.log_exception("Error init app", e)
sys.exit(-1)
yield
logger.info("Stopping llm-translate")

View file

@ -1,19 +1,17 @@
import logging
import os
import traceback
import ebooklib
from bs4 import BeautifulSoup
from ebooklib import epub
from natsort import os_sorted
from app import file_processor
from app import file_processor, log
from app.app_core import AppCore
from app.dto import ProcessingFileDirReq, ProcessingFileResp, FileProcessingPluginInitInfo, ProcessingFileStruct
from app.file_processor_html import FileProcessorHtml
plugin_name = os.path.basename(__file__)[:-3] # calculating modname
logger = logging.getLogger('uvicorn')
logger = log.logger()
def start(core: AppCore):
@ -62,42 +60,36 @@ def file_processing(core: AppCore, file_struct: ProcessingFileStruct, req: Proce
html_processor = FileProcessorHtml(core=core, options=options)
book_documents_ids: list[str] = []
try:
book = epub.read_epub(file_struct.path_file_in())
book = epub.read_epub(file_struct.path_file_in())
docs_count = 0
for item in book.get_items():
if item.get_type() == ebooklib.ITEM_DOCUMENT:
book_documents_ids.append(item.id)
docs_count = docs_count + 1
docs_count = 0
for item in book.get_items():
if item.get_type() == ebooklib.ITEM_DOCUMENT:
book_documents_ids.append(item.id)
docs_count = docs_count + 1
book_documents_ids_set: set[str]
if translate_only_first_chapters_amount > 0:
book_documents_ids = os_sorted(book_documents_ids)
book_documents_ids_set = set(book_documents_ids[:translate_only_first_chapters_amount])
else:
book_documents_ids_set = set(book_documents_ids)
book_documents_ids_set: set[str]
if translate_only_first_chapters_amount > 0:
book_documents_ids = os_sorted(book_documents_ids)
book_documents_ids_set = set(book_documents_ids[:translate_only_first_chapters_amount])
else:
book_documents_ids_set = set(book_documents_ids)
processed_count = 0
log_limit_info = f"(limit: {translate_only_first_chapters_amount})" if translate_only_first_chapters_amount > 0 else ""
for item in book.get_items():
if item.get_type() == ebooklib.ITEM_DOCUMENT and item.id in book_documents_ids_set:
processed_count = processed_count + 1
logger.info("Translate file %s, item with id %s, item %s / %s %s",
file_struct.file_name_ext, item.get_id(), processed_count, docs_count, log_limit_info)
soup = BeautifulSoup(item.get_content(), features="xml")
html_processor.process(req=req, soup=soup)
item.set_content(soup.encode())
processed_count = 0
log_limit_info = f"(limit: {translate_only_first_chapters_amount})" if translate_only_first_chapters_amount > 0 else ""
for item in book.get_items():
if item.get_type() == ebooklib.ITEM_DOCUMENT and item.id in book_documents_ids_set:
processed_count = processed_count + 1
logger.info("Translate file %s, item with id %s, item %s / %s %s",
file_struct.file_name_ext, item.get_id(), processed_count, docs_count, log_limit_info)
soup = BeautifulSoup(item.get_content(), features="xml")
html_processor.process(req=req, soup=soup)
item.set_content(soup.encode())
out_file_name = processed_file_name(core=core, file_struct=file_struct, req=req)
epub.write_epub(file_struct.path_file_out(out_file_name), book, {})
out_file_name = processed_file_name(core=core, file_struct=file_struct, req=req)
epub.write_epub(file_struct.path_file_out(out_file_name), book, {})
return file_processor.get_processing_file_resp_ok(file_struct=file_struct, file_out=out_file_name)
except Exception as e:
traceback.print_tb(e.__traceback__, limit=10)
logging.error("Error with processing file %s: %s", file_struct.file_name_ext, str(e))
return file_processor.get_processing_file_resp_error(
file_in=file_struct.file_name_ext, path_in=file_struct.path_in, error_msg=str(e))
return file_processor.get_processing_file_resp_ok(file_struct=file_struct, file_out=out_file_name)
def processed_file_name(core: AppCore, file_struct: ProcessingFileStruct, req: ProcessingFileDirReq) -> str:

View file

@ -1,16 +1,14 @@
import logging
import os
import traceback
from bs4 import BeautifulSoup
from app import file_processor
from app import file_processor, log
from app.app_core import AppCore
from app.dto import ProcessingFileDirReq, ProcessingFileResp, FileProcessingPluginInitInfo, ProcessingFileStruct
from app.file_processor_html import FileProcessorHtml
plugin_name = os.path.basename(__file__)[:-3] # calculating modname
logger = logging.getLogger('uvicorn')
logger = log.logger()
def start(core: AppCore):
@ -57,23 +55,17 @@ def file_processing(core: AppCore, file_struct: ProcessingFileStruct, req: Proce
options = core.plugin_options(plugin_name)
html_processor = FileProcessorHtml(core=core, options=options)
try:
fb2_content = file_processor.read_file_with_fix_encoding(file_struct.path_file_in())
fb2_content = file_processor.read_file_with_fix_encoding(file_struct.path_file_in())
soup = BeautifulSoup(fb2_content, features="xml")
html_processor.process(req, soup, "body")
soup = BeautifulSoup(fb2_content, features="xml")
html_processor.process(req, soup, "body")
out_file_name = processed_file_name(core=core, file_struct=file_struct, req=req)
out_file_name = processed_file_name(core=core, file_struct=file_struct, req=req)
with open(file_struct.path_file_out(out_file_name), 'w+', encoding='utf-8') as fb2_put_file:
fb2_put_file.write(soup.decode())
with open(file_struct.path_file_out(out_file_name), 'w+', encoding='utf-8') as fb2_put_file:
fb2_put_file.write(soup.decode())
return file_processor.get_processing_file_resp_ok(file_struct=file_struct, file_out=out_file_name)
except Exception as e:
traceback.print_tb(e.__traceback__, limit=10)
logging.error("Error with processing file %s: %s", file_struct.file_name_ext, str(e))
return file_processor.get_processing_file_resp_error(
file_in=file_struct.file_name_ext, path_in=file_struct.path_in, error_msg=str(e))
return file_processor.get_processing_file_resp_ok(file_struct=file_struct, file_out=out_file_name)
def processed_file_name(core: AppCore, file_struct: ProcessingFileStruct, req: ProcessingFileDirReq) -> str:

View file

@ -9,6 +9,7 @@ fastapi == 0.115.12
termcolor == 3.1.0
natsort == 8.4.0
chardet == 5.2.0
pyway == 0.3.32
transformers == 4.53.1
ctranslate2 == 4.6.0

View file

@ -1,15 +0,0 @@
from unittest import TestCase
from app.dto import Part
class StructTest(TestCase):
def test_is_numeric_or_empty(self):
self.assertEqual(False, Part("1 000 000 c", False).is_numeric_or_empty())
self.assertEqual(True, Part("1.23", False).is_numeric_or_empty())
self.assertEqual(True, Part("1,23", False).is_numeric_or_empty())
self.assertEqual(True, Part(" ", False).is_numeric_or_empty())
self.assertEqual(True, Part("1 000 000", False).is_numeric_or_empty())
self.assertEqual(True, Part("", False).is_numeric_or_empty())
self.assertEqual(True, Part("...", False).is_numeric_or_empty())

0
tests/__init__.py Normal file
View file

54
tests/test_cache.py Normal file
View file

@ -0,0 +1,54 @@
import os
from unittest import TestCase
from app.cache import Cache
from app.dto import TranslateCommonRequest
from app.params import CacheParams
class CacheTest(TestCase):
params = CacheParams(enabled=True, file="../files/test.db", disable_for_plugins=[], expire_days=20,
migration_path="../../cache/migrations")
req = TranslateCommonRequest(text="all text", from_lang="fr", to_lang="to", translator_plugin="plugin1")
def test_operations(self):
print("Absolute path: " + os.path.abspath(self.params.migration_path))
cache = Cache(self.params)
cache.put(req=self.req, text="part1 text 1", value="translate 1", model_name="model 1")
cache.put(req=self.req, text="part1 text 1", value="translate 2", model_name="model 2")
value1 = cache.get(req=self.req, text="part1 text 1", model_name="model 1")
self.assertEqual("translate 1", value1)
value2 = cache.get(req=self.req, text="part1 text 1", model_name="model 2")
self.assertEqual("translate 2", value2)
os.remove("../files/test.db")
def test_delete_expired_values(self):
cache = Cache(self.params)
cache.put(req=self.req, text="part1 text 1", value="translate 1", model_name="model 1")
cache.put(req=self.req, text="part1 text 1", value="translate 2", model_name="model 2")
connection = cache.get_connection()
cursor = connection.cursor()
sql1 = "UPDATE cache_translate SET created = date('now', '-30 day') WHERE model='model 1'"
sql2 = "UPDATE cache_translate SET created = date('now', '-10 day') WHERE model='model 2'"
cursor.execute(sql1)
cursor.execute(sql2)
connection.commit()
connection.close()
cache = Cache(self.params)
value1 = cache.get(req=self.req, text="part1 text 1", model_name="model 1")
self.assertEqual(None, value1)
value2 = cache.get(req=self.req, text="part1 text 1", model_name="model 2")
self.assertEqual("translate 2", value2)
os.remove("../files/test.db")

15
tests/test_struct.py Normal file
View file

@ -0,0 +1,15 @@
from unittest import TestCase
from app.dto import Part
class StructTest(TestCase):
def test_is_numeric_or_empty(self):
self.assertEqual(True, Part("1 000 000 c", False).is_contains_alpha())
self.assertEqual(False, Part("1.23", False).is_contains_alpha())
self.assertEqual(False, Part("1,23", False).is_contains_alpha())
self.assertEqual(False, Part(" ", False).is_contains_alpha())
self.assertEqual(False, Part("1 000 000", False).is_contains_alpha())
self.assertEqual(False, Part("", False).is_contains_alpha())
self.assertEqual(False, Part("...", False).is_contains_alpha())