mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-10 15:29:39 +00:00
feat: add prefix cache for server
This commit is contained in:
parent
c515cc49a5
commit
bb0ccc7b1a
5 changed files with 132 additions and 55 deletions
|
@ -105,6 +105,10 @@ def custom_openapi(app):
|
|||
|
||||
def main():
|
||||
cfg = Config()
|
||||
|
||||
# Temporarily disable cuda graph by default because of a bug in the prefix cache.
|
||||
cfg.use_cuda_graph = False
|
||||
|
||||
arg_parser = ArgumentParser(cfg)
|
||||
|
||||
# 初始化消息
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue