Update indexing configuration: reduce batch size to 1 and set batch delay to 0.1 seconds; enhance logging for CPU usage and batch settings in index_files function.

This commit is contained in:
Dmitriy Kazimirov 2025-04-06 17:06:58 +00:00
parent 3c73c62092
commit 9e70543654
3 changed files with 10 additions and 4 deletions

View file

@ -71,8 +71,8 @@ ELASTICSEARCH_USERNAME = admin
# Elastic password
ELASTICSEARCH_PASSWORD = password
# Indexing batch size (smaller batches use less memory)
INDEXING_BATCH_SIZE=3
# Indexing batch size (smaller batches use less memory), old default - 100
INDEXING_BATCH_SIZE=1
# Delay between batches in seconds (helps prevent memory spikes)
INDEXING_BATCH_DELAY=0
# Delay between batches in seconds (helps prevent memory spikes), old default - 0.5
INDEXING_BATCH_DELAY=0.1

View file

@ -210,6 +210,9 @@ def count_directory_stats(dir_path):
'token_count': dir_token_count
}
# Whato does this function do?
# It clears old cache entries from the directory cache
# Clear directory cache periodically (every hour)
def clear_old_cache_entries():
with directory_cache_lock:

View file

@ -464,6 +464,9 @@ def index_files(directory):
available_cpus = multiprocessing.cpu_count()
used_cpus = int(float(cpu_limit)) if cpu_limit else max(1, available_cpus - 1)
logging.info(f"Using {used_cpus} CPUs for indexing (of {available_cpus} available")
logging.info(f"Batch size: {batch_size}, Batch delay: {batch_delay} seconds")
with progress_lock:
indexing_progress = {
'total_files': 0,