feat: add prefix cache for server

This commit is contained in:
ceerrep 2025-02-17 00:10:55 +08:00
parent c515cc49a5
commit bb0ccc7b1a
5 changed files with 132 additions and 55 deletions

View file

@ -105,6 +105,10 @@ def custom_openapi(app):
def main():
cfg = Config()
# Temporarily disable cuda graph by default because of a bug in the prefix cache.
cfg.use_cuda_graph = False
arg_parser = ArgumentParser(cfg)
# 初始化消息