mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-05 07:10:41 +00:00
llama: use FA + max. GPU layers by default (#15434)
* llama: use max. GPU layers by default, auto -fa * ggml-backend: abort instead of segfault
This commit is contained in:
parent
38ad381f9f
commit
e81b8e4b7f
19 changed files with 235 additions and 72 deletions
|
|
@ -14,6 +14,7 @@ def create_server():
|
|||
server.model_draft = download_file(MODEL_DRAFT_FILE_URL)
|
||||
server.draft_min = 4
|
||||
server.draft_max = 8
|
||||
server.fa = "off"
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue