mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
Fix for windows model unloading not releasing memory (#569)
* Add in model processes as a separate process so it can be killed when unloading to release memory on windows * Fix from Henky
This commit is contained in:
parent
4c274dc2fd
commit
6948da5a0d
2 changed files with 65 additions and 26 deletions
20
koboldcpp.py
20
koboldcpp.py
|
@ -13,6 +13,7 @@ import os
|
|||
import argparse
|
||||
import json, sys, http.server, time, asyncio, socket, threading
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import multiprocessing
|
||||
|
||||
sampler_order_max = 7
|
||||
stop_token_max = 16
|
||||
|
@ -2330,6 +2331,25 @@ def main(launch_args,start_server=True):
|
|||
else:
|
||||
print(f"Server was not started, main function complete. Idling.")
|
||||
|
||||
def run_in_queue(launch_args, input_queue, output_queue):
|
||||
main(launch_args, start_server=False)
|
||||
output_queue.put({'command': 'complete'})
|
||||
while True:
|
||||
if not input_queue.empty():
|
||||
while not input_queue.empty():
|
||||
data = input_queue.get()
|
||||
if data['command'] == 'generate':
|
||||
(args, kwargs) = data['data']
|
||||
output_queue.put({'command': 'generated text', 'data': generate(*args, **kwargs)})
|
||||
time.sleep(0.2)
|
||||
|
||||
def start_in_seperate_process(launch_args):
|
||||
input_queue = multiprocessing.Queue()
|
||||
output_queue = multiprocessing.Queue()
|
||||
p = multiprocessing.Process(target=run_in_queue, args=(launch_args, input_queue, output_queue))
|
||||
p.start()
|
||||
return (output_queue, input_queue, p)
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("***\nWelcome to KoboldCpp - Version " + KcppVersion) # just update version manually
|
||||
# print("Python version: " + sys.version)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue