Fix for windows model unloading not releasing memory (#569)

* Add in model processes as a separate process so it can be killed when unloading to release memory on windows

* Fix from Henky
This commit is contained in:
ebolam 2023-12-19 02:55:41 -05:00 committed by GitHub
parent 4c274dc2fd
commit 6948da5a0d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 65 additions and 26 deletions

View file

@ -13,6 +13,7 @@ import os
import argparse
import json, sys, http.server, time, asyncio, socket, threading
from concurrent.futures import ThreadPoolExecutor
import multiprocessing
sampler_order_max = 7
stop_token_max = 16
@ -2330,6 +2331,25 @@ def main(launch_args,start_server=True):
else:
print(f"Server was not started, main function complete. Idling.")
def run_in_queue(launch_args, input_queue, output_queue):
main(launch_args, start_server=False)
output_queue.put({'command': 'complete'})
while True:
if not input_queue.empty():
while not input_queue.empty():
data = input_queue.get()
if data['command'] == 'generate':
(args, kwargs) = data['data']
output_queue.put({'command': 'generated text', 'data': generate(*args, **kwargs)})
time.sleep(0.2)
def start_in_seperate_process(launch_args):
input_queue = multiprocessing.Queue()
output_queue = multiprocessing.Queue()
p = multiprocessing.Process(target=run_in_queue, args=(launch_args, input_queue, output_queue))
p.start()
return (output_queue, input_queue, p)
if __name__ == '__main__':
print("***\nWelcome to KoboldCpp - Version " + KcppVersion) # just update version manually
# print("Python version: " + sys.version)