feat: add prefix cache for server

2025-09-10 15:29:39 +00:00 · 2025-02-17 00:10:55 +08:00 · 2025-02-17 00:10:55 +08:00 · bb0ccc7b1a
commit bb0ccc7b1a
parent c515cc49a5
5 changed files with 132 additions and 55 deletions
--- a/ktransformers/server/main.py
+++ b/ktransformers/server/main.py
@ -105,6 +105,10 @@ def custom_openapi(app):

 def main():
    cfg = Config()
+
+    # Temporarily disable cuda graph by default because of a bug in the prefix cache.
+    cfg.use_cuda_graph = False
+
    arg_parser = ArgumentParser(cfg)

    # 初始化消息