mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-09 19:46:11 +00:00
llama: use FA + max. GPU layers by default (#15434)
* llama: use max. GPU layers by default, auto -fa * ggml-backend: abort instead of segfault
This commit is contained in:
parent
38ad381f9f
commit
e81b8e4b7f
19 changed files with 235 additions and 72 deletions
|
|
@ -25,6 +25,18 @@
|
|||
// interface implementation
|
||||
//
|
||||
|
||||
const char * llama_flash_attn_type_name(enum llama_flash_attn_type flash_attn_type) {
|
||||
switch (flash_attn_type) {
|
||||
case LLAMA_FLASH_ATTN_TYPE_AUTO:
|
||||
return "auto";
|
||||
case LLAMA_FLASH_ATTN_TYPE_DISABLED:
|
||||
return "disabled";
|
||||
case LLAMA_FLASH_ATTN_TYPE_ENABLED:
|
||||
return "enabled";
|
||||
}
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
struct llama_sampler_chain_params llama_sampler_chain_default_params() {
|
||||
struct llama_sampler_chain_params result = {
|
||||
/*.no_perf =*/ true,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue