From 4538bdae977a83f29c0c16d3e4b372efe0cdbb96 Mon Sep 17 00:00:00 2001 From: wangkuigang-yewu-cmss Date: Sun, 13 Apr 2025 16:12:18 +0800 Subject: [PATCH] prevent rpc process from crashing on long prompt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 当prompt超过cache_len的时候,rpc进程会crash掉,导致整体不可用。 这里增加一个检查,让过长的prompt在请求早期就被提前过滤掉 --- ktransformers/server/backend/interfaces/balance_serve.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ktransformers/server/backend/interfaces/balance_serve.py b/ktransformers/server/backend/interfaces/balance_serve.py index 2722dfd..2a9d5fd 100644 --- a/ktransformers/server/backend/interfaces/balance_serve.py +++ b/ktransformers/server/backend/interfaces/balance_serve.py @@ -374,6 +374,10 @@ class BalanceServeInterface(BackendInterfaceBase): top_p = 0.0001 query_add.sample_options.top_p = top_p query_add.estimated_length = min(self.args.cache_lens, query_length+self.args.max_new_tokens) + + if query_add.estimated_length < query_add.query_length: + raise Exception(f'query too long: estimated_length={query_add.estimated_length} < query_length={query_add.query_length}') + query_id = self.sched_client.add_query(query_add) queue = asyncio.Queue(maxsize=self.args.max_new_tokens) self.queue_map[query_id] = queue