Update indexing configuration: reduce batch size to 1 and set batch delay to 0.1 seconds; enhance logging for CPU usage and batch settings in index_files function.
This commit is contained in:
parent
3c73c62092
commit
9e70543654
3 changed files with 10 additions and 4 deletions
|
@ -71,8 +71,8 @@ ELASTICSEARCH_USERNAME = admin
|
|||
# Elastic password
|
||||
ELASTICSEARCH_PASSWORD = password
|
||||
|
||||
# Indexing batch size (smaller batches use less memory)
|
||||
INDEXING_BATCH_SIZE=3
|
||||
# Indexing batch size (smaller batches use less memory), old default - 100
|
||||
INDEXING_BATCH_SIZE=1
|
||||
|
||||
# Delay between batches in seconds (helps prevent memory spikes)
|
||||
INDEXING_BATCH_DELAY=0
|
||||
# Delay between batches in seconds (helps prevent memory spikes), old default - 0.5
|
||||
INDEXING_BATCH_DELAY=0.1
|
||||
|
|
|
@ -210,6 +210,9 @@ def count_directory_stats(dir_path):
|
|||
'token_count': dir_token_count
|
||||
}
|
||||
|
||||
# Whato does this function do?
|
||||
# It clears old cache entries from the directory cache
|
||||
|
||||
# Clear directory cache periodically (every hour)
|
||||
def clear_old_cache_entries():
|
||||
with directory_cache_lock:
|
||||
|
|
|
@ -464,6 +464,9 @@ def index_files(directory):
|
|||
available_cpus = multiprocessing.cpu_count()
|
||||
used_cpus = int(float(cpu_limit)) if cpu_limit else max(1, available_cpus - 1)
|
||||
|
||||
logging.info(f"Using {used_cpus} CPUs for indexing (of {available_cpus} available")
|
||||
logging.info(f"Batch size: {batch_size}, Batch delay: {batch_delay} seconds")
|
||||
|
||||
with progress_lock:
|
||||
indexing_progress = {
|
||||
'total_files': 0,
|
||||
|
|
Loading…
Add table
Reference in a new issue