Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	ggml.c
#	scripts/compare-llama-bench.py
#	tests/test-backend-ops.cpp
This commit is contained in:
Concedo 2024-05-08 18:19:28 +08:00
commit 165a56088b
13 changed files with 1291 additions and 87 deletions

View file

@ -151,6 +151,8 @@ for model in models:
# print the "pre_tokenizer" content from the tokenizer.json # print the "pre_tokenizer" content from the tokenizer.json
with open(f"models/tokenizers/{name}/tokenizer.json", "r", encoding="utf-8") as f: with open(f"models/tokenizers/{name}/tokenizer.json", "r", encoding="utf-8") as f:
cfg = json.load(f) cfg = json.load(f)
normalizer = cfg["normalizer"]
logger.info("normalizer: " + json.dumps(normalizer, indent=4))
pre_tokenizer = cfg["pre_tokenizer"] pre_tokenizer = cfg["pre_tokenizer"]
logger.info("pre_tokenizer: " + json.dumps(pre_tokenizer, indent=4)) logger.info("pre_tokenizer: " + json.dumps(pre_tokenizer, indent=4))

View file

@ -575,7 +575,7 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs(
GGML_ASSERT(tokens_input->type == GGML_TYPE_I32); GGML_ASSERT(tokens_input->type == GGML_TYPE_I32);
auto add_to_f32 = [] (struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { auto add_to_f32 = [] (struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) {
if (ggml_is_quantized(a->type) || a->type == GGML_TYPE_F16) { if (ggml_is_quantized(a->type) || a->type == GGML_TYPE_F16 || a->type == GGML_TYPE_BF16) {
return ggml_add_cast(ctx, a, b, GGML_TYPE_F32); return ggml_add_cast(ctx, a, b, GGML_TYPE_F32);
} else if (a->type == GGML_TYPE_F32) { } else if (a->type == GGML_TYPE_F32) {
return ggml_add(ctx, a, b); return ggml_add(ctx, a, b);

View file

@ -47,7 +47,8 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
{ "Q5_K_M", LLAMA_FTYPE_MOSTLY_Q5_K_M, " 4.45G, +0.0122 ppl @ LLaMA-v1-7B", }, { "Q5_K_M", LLAMA_FTYPE_MOSTLY_Q5_K_M, " 4.45G, +0.0122 ppl @ LLaMA-v1-7B", },
{ "Q6_K", LLAMA_FTYPE_MOSTLY_Q6_K, " 5.15G, +0.0008 ppl @ LLaMA-v1-7B", }, { "Q6_K", LLAMA_FTYPE_MOSTLY_Q6_K, " 5.15G, +0.0008 ppl @ LLaMA-v1-7B", },
{ "Q8_0", LLAMA_FTYPE_MOSTLY_Q8_0, " 6.70G, +0.0004 ppl @ LLaMA-v1-7B", }, { "Q8_0", LLAMA_FTYPE_MOSTLY_Q8_0, " 6.70G, +0.0004 ppl @ LLaMA-v1-7B", },
{ "F16", LLAMA_FTYPE_MOSTLY_F16, "13.00G @ 7B", }, { "F16", LLAMA_FTYPE_MOSTLY_F16, "14.00G, -0.0020 ppl @ Mistral-7B", },
{ "BF16", LLAMA_FTYPE_MOSTLY_BF16, "14.00G, -0.0050 ppl @ Mistral-7B", },
{ "F32", LLAMA_FTYPE_ALL_F32, "26.00G @ 7B", }, { "F32", LLAMA_FTYPE_ALL_F32, "26.00G @ 7B", },
// Note: Ensure COPY comes after F32 to avoid ftype 0 from matching. // Note: Ensure COPY comes after F32 to avoid ftype 0 from matching.
{ "COPY", LLAMA_FTYPE_ALL_F32, "only copy tensors, no quantizing", }, { "COPY", LLAMA_FTYPE_ALL_F32, "only copy tensors, no quantizing", },

View file

@ -17,6 +17,83 @@
#define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b))
/**
* Converts brain16 to float32.
*
* The bfloat16 floating point format has the following structure:
*
* sign
*
* exponent
*
* mantissa
*
*
* 0b0000000000000000 brain16
*
* Since bf16 has the same number of exponent bits as a 32bit float,
* encoding and decoding numbers becomes relatively straightforward.
*
* sign
*
* exponent
*
* mantissa
*
*
* 0b00000000000000000000000000000000 IEEE binary32
*
* For comparison, the standard fp16 format has fewer exponent bits.
*
* sign
*
* exponent
*
* mantissa
*
*
* 0b0000000000000000 IEEE binary16
*
* @see IEEE 754-2008
*/
static inline float ggml_compute_bf16_to_fp32(ggml_bf16_t h) {
union {
float f;
uint32_t i;
} u;
u.i = (uint32_t)h.bits << 16;
return u.f;
}
/**
* Converts float32 to brain16.
*
* This function is binary identical to AMD Zen4 VCVTNEPS2BF16.
* Subnormals shall be flushed to zero, and NANs will be quiet.
* This code should vectorize nicely if using modern compilers.
*/
static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
ggml_bf16_t h;
union {
float f;
uint32_t i;
} u;
u.f = s;
if ((u.i & 0x7fffffff) > 0x7f800000) { /* nan */
h.bits = (u.i >> 16) | 64; /* force to quiet */
return h;
}
if (!(u.i & 0x7f800000)) { /* subnormal */
h.bits = (u.i & 0x80000000) >> 16; /* flush to zero */
return h;
}
h.bits = (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16;
return h;
}
#define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
#define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif

View file

@ -803,7 +803,7 @@ static bool ggml_metal_supports_op(const struct ggml_metal_context * ctx, const
case GGML_OP_DIAG_MASK_INF: case GGML_OP_DIAG_MASK_INF:
case GGML_OP_GET_ROWS: case GGML_OP_GET_ROWS:
{ {
return op->ne[3] == 1; return op->src[0]->type != GGML_TYPE_BF16 && op->ne[3] == 1;
} }
default: default:
return false; return false;

View file

@ -2175,7 +2175,7 @@ kernel void kernel_flash_attn_ext_f16(
const short D4 = D/4; const short D4 = D/4;
const short D8 = D/8; const short D8 = D/8;
const short Q8 = Q/8; //const short Q8 = Q/8;
const short NW = N_SIMDWIDTH; const short NW = N_SIMDWIDTH;
const short SH = (C + Q); // shared memory per simdgroup in (half) const short SH = (C + Q); // shared memory per simdgroup in (half)

View file

@ -12451,6 +12451,24 @@ bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbyte
const size_t nb = nbytes/ggml_type_size(type); const size_t nb = nbytes/ggml_type_size(type);
switch (type) { switch (type) {
case GGML_TYPE_BF16:
{
int nans = 0;
int infs = 0;
const unsigned short * f = (const unsigned short *) data;
for (size_t i = 0; i < nb; ++i) {
nans += (f[i] & 0x7fff) > 0x7f80;
infs += (f[i] & 0x7fff) == 0x7f80;
}
if (nans) {
fprintf(stderr, "%s: found %d NaNs in row of %zu BF16 values\n", __func__, nans, nb);
return false;
}
if (infs) {
fprintf(stderr, "%s: found %d infinities in row of %zu BF16 values\n", __func__, infs, nb);
return false;
}
} break;
case GGML_TYPE_F16: case GGML_TYPE_F16:
{ {
const ggml_fp16_t * f = (const ggml_fp16_t *) data; const ggml_fp16_t * f = (const ggml_fp16_t *) data;

1049
ggml.c

File diff suppressed because it is too large Load diff

20
ggml.h
View file

@ -333,14 +333,20 @@ extern "C" {
// get ggml_status name string // get ggml_status name string
GGML_API GGML_CALL const char * ggml_status_to_string(enum ggml_status status); GGML_API GGML_CALL const char * ggml_status_to_string(enum ggml_status status);
// ieee 754-2008 half-precision float16
// todo: make this not an integral type
typedef uint16_t ggml_fp16_t; typedef uint16_t ggml_fp16_t;
GGML_API float ggml_fp16_to_fp32(ggml_fp16_t);
GGML_API ggml_fp16_t ggml_fp32_to_fp16(float);
GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t *, float *, int64_t);
GGML_API void ggml_fp32_to_fp16_row(const float *, ggml_fp16_t *, int64_t);
// convert FP16 <-> FP32 // google brain half-precision bfloat16
GGML_API float ggml_fp16_to_fp32(ggml_fp16_t x); typedef struct { uint16_t bits; } ggml_bf16_t;
GGML_API ggml_fp16_t ggml_fp32_to_fp16(float x); GGML_API ggml_bf16_t ggml_fp32_to_bf16(float);
GGML_API float ggml_bf16_to_fp32(ggml_bf16_t); // consider just doing << 16
GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n); GGML_API void ggml_bf16_to_fp32_row(const ggml_bf16_t *, float *, int64_t);
GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n); GGML_API void ggml_fp32_to_bf16_row(const float *, ggml_bf16_t *, int64_t);
struct ggml_object; struct ggml_object;
struct ggml_context; struct ggml_context;
@ -377,6 +383,7 @@ extern "C" {
GGML_TYPE_I64 = 27, GGML_TYPE_I64 = 27,
GGML_TYPE_F64 = 28, GGML_TYPE_F64 = 28,
GGML_TYPE_IQ1_M = 29, GGML_TYPE_IQ1_M = 29,
GGML_TYPE_BF16 = 30,
GGML_TYPE_COUNT, GGML_TYPE_COUNT,
}; };
@ -417,6 +424,7 @@ extern "C" {
GGML_FTYPE_MOSTLY_IQ2_S = 21, // except 1d tensors GGML_FTYPE_MOSTLY_IQ2_S = 21, // except 1d tensors
GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
}; };
// available tensor operations: // available tensor operations:

View file

@ -817,6 +817,7 @@ class GGMLQuantizationType(IntEnum):
I64 = 27 I64 = 27
F64 = 28 F64 = 28
IQ1_M = 29 IQ1_M = 29
BF16 = 30
class GGUFEndian(IntEnum): class GGUFEndian(IntEnum):
@ -888,6 +889,7 @@ GGML_QUANT_SIZES = {
GGMLQuantizationType.I64: (1, 8), GGMLQuantizationType.I64: (1, 8),
GGMLQuantizationType.F64: (1, 8), GGMLQuantizationType.F64: (1, 8),
GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32), GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
GGMLQuantizationType.BF16: (1, 2),
} }

View file

@ -3710,12 +3710,15 @@ Current version: 138
opmode: 4, //what mode are we in? 1=story, 2=adventure, 3=chat, 4=instruct opmode: 4, //what mode are we in? 1=story, 2=adventure, 3=chat, 4=instruct
adventure_is_action: false, //in adventure mode, determine story or action adventure_is_action: false, //in adventure mode, determine story or action
adventure_context_mod: true, //extra injection for adventure mode adventure_context_mod: true, //extra injection for adventure mode
chat_context_mod: true, //extra injection for chat mode
chatname: "You", //name to use in chat chatname: "You", //name to use in chat
chatopponent: defaultchatopponent, chatopponent: defaultchatopponent,
instruct_starttag: "\\n### Instruction:\\n", instruct_starttag: "\\n### Instruction:\\n",
instruct_endtag: "\\n### Response:\\n", instruct_endtag: "\\n### Response:\\n",
instruct_sysprompt: "",
instruct_has_markdown: true, instruct_has_markdown: true,
placeholder_tags: true, placeholder_tags: true,
render_special_tags: false,
persist_session: true, persist_session: true,
speech_synth: 0, //0 is disabled, 1000 is xtts speech_synth: 0, //0 is disabled, 1000 is xtts
xtts_voice: "female_calm", xtts_voice: "female_calm",
@ -3750,6 +3753,7 @@ Current version: 138
allow_continue_chat: false, allow_continue_chat: false,
inject_timestamps_chat: false, inject_timestamps_chat: false,
inject_timestamps_instruct: false, inject_timestamps_instruct: false,
inject_chatnames_instruct: false,
inject_jailbreak_instruct: false, inject_jailbreak_instruct: false,
idle_responses: 0, idle_responses: 0,
idle_duration: 60, idle_duration: 60,
@ -8381,6 +8385,7 @@ Current version: 138
document.getElementById("trimsentences").checked = localsettings.trimsentences; document.getElementById("trimsentences").checked = localsettings.trimsentences;
document.getElementById("trimwhitespace").checked = localsettings.trimwhitespace; document.getElementById("trimwhitespace").checked = localsettings.trimwhitespace;
document.getElementById("compressnewlines").checked = localsettings.compressnewlines; document.getElementById("compressnewlines").checked = localsettings.compressnewlines;
document.getElementById("render_special_tags").checked = localsettings.render_special_tags;
document.getElementById("eos_ban_mode").value = localsettings.eos_ban_mode; document.getElementById("eos_ban_mode").value = localsettings.eos_ban_mode;
document.getElementById("persist_session").checked = localsettings.persist_session; document.getElementById("persist_session").checked = localsettings.persist_session;
document.getElementById("opmode").value = localsettings.opmode; document.getElementById("opmode").value = localsettings.opmode;
@ -8388,6 +8393,8 @@ Current version: 138
document.getElementById("chatopponent").value = replaceAll(localsettings.chatopponent,"||$||","\n"); document.getElementById("chatopponent").value = replaceAll(localsettings.chatopponent,"||$||","\n");
handle_bot_name_onchange(); handle_bot_name_onchange();
document.getElementById("instruct_starttag").value = localsettings.instruct_starttag; document.getElementById("instruct_starttag").value = localsettings.instruct_starttag;
let sp = replaceAll(localsettings.instruct_sysprompt, "\n", "\\n");
document.getElementById("instruct_sysprompt").value = sp;
document.getElementById("instruct_endtag").value = localsettings.instruct_endtag; document.getElementById("instruct_endtag").value = localsettings.instruct_endtag;
document.getElementById("min_p").value = localsettings.min_p; document.getElementById("min_p").value = localsettings.min_p;
document.getElementById("dynatemp_range").value = localsettings.dynatemp_range; document.getElementById("dynatemp_range").value = localsettings.dynatemp_range;
@ -8436,10 +8443,12 @@ Current version: 138
document.getElementById("allow_continue_chat").checked = localsettings.allow_continue_chat; document.getElementById("allow_continue_chat").checked = localsettings.allow_continue_chat;
document.getElementById("inject_timestamps_chat").checked = localsettings.inject_timestamps_chat; document.getElementById("inject_timestamps_chat").checked = localsettings.inject_timestamps_chat;
document.getElementById("inject_timestamps_instruct").checked = localsettings.inject_timestamps_instruct; document.getElementById("inject_timestamps_instruct").checked = localsettings.inject_timestamps_instruct;
document.getElementById("inject_chatnames_instruct").checked = localsettings.inject_chatnames_instruct;
document.getElementById("inject_jailbreak_instruct").checked = localsettings.inject_jailbreak_instruct; document.getElementById("inject_jailbreak_instruct").checked = localsettings.inject_jailbreak_instruct;
document.getElementById("idle_responses").value = localsettings.idle_responses; document.getElementById("idle_responses").value = localsettings.idle_responses;
document.getElementById("idle_duration").value = localsettings.idle_duration; document.getElementById("idle_duration").value = localsettings.idle_duration;
document.getElementById("adventure_context_mod").checked = localsettings.adventure_context_mod; document.getElementById("adventure_context_mod").checked = localsettings.adventure_context_mod;
document.getElementById("chat_context_mod").checked = localsettings.chat_context_mod;
document.getElementById("instruct_has_markdown").checked = localsettings.instruct_has_markdown; document.getElementById("instruct_has_markdown").checked = localsettings.instruct_has_markdown;
document.getElementById("placeholder_tags").checked = localsettings.placeholder_tags; document.getElementById("placeholder_tags").checked = localsettings.placeholder_tags;
document.getElementById("run_in_background").checked = run_in_background; document.getElementById("run_in_background").checked = run_in_background;
@ -8653,6 +8662,8 @@ Current version: 138
} }
localsettings.chatopponent = newopps; localsettings.chatopponent = newopps;
localsettings.instruct_starttag = document.getElementById("instruct_starttag").value; localsettings.instruct_starttag = document.getElementById("instruct_starttag").value;
localsettings.instruct_sysprompt = document.getElementById("instruct_sysprompt").value;
localsettings.instruct_sysprompt = replaceAll(localsettings.instruct_sysprompt, "\\n", "\n");
if (localsettings.instruct_starttag == null || localsettings.instruct_starttag == "") { if (localsettings.instruct_starttag == null || localsettings.instruct_starttag == "") {
localsettings.instruct_starttag = "\\n### Instruction:\\n"; localsettings.instruct_starttag = "\\n### Instruction:\\n";
} }
@ -8678,6 +8689,7 @@ Current version: 138
localsettings.trimsentences = (document.getElementById("trimsentences").checked ? true : false); localsettings.trimsentences = (document.getElementById("trimsentences").checked ? true : false);
localsettings.trimwhitespace = (document.getElementById("trimwhitespace").checked ? true : false); localsettings.trimwhitespace = (document.getElementById("trimwhitespace").checked ? true : false);
localsettings.compressnewlines = (document.getElementById("compressnewlines").checked ? true : false); localsettings.compressnewlines = (document.getElementById("compressnewlines").checked ? true : false);
localsettings.render_special_tags = (document.getElementById("render_special_tags").checked ? true : false);
localsettings.eos_ban_mode = document.getElementById("eos_ban_mode").value; localsettings.eos_ban_mode = document.getElementById("eos_ban_mode").value;
localsettings.persist_session = (document.getElementById("persist_session").checked ? true : false); localsettings.persist_session = (document.getElementById("persist_session").checked ? true : false);
if(document.getElementById("opmode").value==1) if(document.getElementById("opmode").value==1)
@ -8701,10 +8713,12 @@ Current version: 138
localsettings.allow_continue_chat = (document.getElementById("allow_continue_chat").checked ? true : false); localsettings.allow_continue_chat = (document.getElementById("allow_continue_chat").checked ? true : false);
localsettings.inject_timestamps_chat = (document.getElementById("inject_timestamps_chat").checked ? true : false); localsettings.inject_timestamps_chat = (document.getElementById("inject_timestamps_chat").checked ? true : false);
localsettings.inject_timestamps_instruct = (document.getElementById("inject_timestamps_instruct").checked ? true : false); localsettings.inject_timestamps_instruct = (document.getElementById("inject_timestamps_instruct").checked ? true : false);
localsettings.inject_chatnames_instruct = (document.getElementById("inject_chatnames_instruct").checked ? true : false);
localsettings.inject_jailbreak_instruct = (document.getElementById("inject_jailbreak_instruct").checked ? true : false); localsettings.inject_jailbreak_instruct = (document.getElementById("inject_jailbreak_instruct").checked ? true : false);
localsettings.idle_responses = document.getElementById("idle_responses").value; localsettings.idle_responses = document.getElementById("idle_responses").value;
localsettings.idle_duration = document.getElementById("idle_duration").value; localsettings.idle_duration = document.getElementById("idle_duration").value;
localsettings.adventure_context_mod = (document.getElementById("adventure_context_mod").checked ? true : false); localsettings.adventure_context_mod = (document.getElementById("adventure_context_mod").checked ? true : false);
localsettings.chat_context_mod = (document.getElementById("chat_context_mod").checked ? true : false);
localsettings.instruct_has_markdown = (document.getElementById("instruct_has_markdown").checked ? true : false); localsettings.instruct_has_markdown = (document.getElementById("instruct_has_markdown").checked ? true : false);
localsettings.placeholder_tags = (document.getElementById("placeholder_tags").checked ? true : false); localsettings.placeholder_tags = (document.getElementById("placeholder_tags").checked ? true : false);
run_in_background = (document.getElementById("run_in_background").checked ? true : false); run_in_background = (document.getElementById("run_in_background").checked ? true : false);
@ -8955,12 +8969,21 @@ Current version: 138
if (document.getElementById('gui_type').value==2) { document.getElementById('btn_aesthetics').classList.remove('hidden'); } if (document.getElementById('gui_type').value==2) { document.getElementById('btn_aesthetics').classList.remove('hidden'); }
else { document.getElementById('btn_aesthetics').classList.add('hidden'); } else { document.getElementById('btn_aesthetics').classList.add('hidden'); }
} }
function toggle_include_chatnames()
{
if (document.getElementById("inject_chatnames_instruct").checked) {
document.getElementById('chatinstructsharedsection2').classList.remove('hidden');
} else {
document.getElementById('chatinstructsharedsection2').classList.add('hidden');
}
}
function toggle_opmode() { function toggle_opmode() {
document.getElementById('chatnamesection1').classList.add('hidden'); document.getElementById('chatnamesection1').classList.add('hidden');
document.getElementById('adventuresection1').classList.add('hidden'); document.getElementById('adventuresection1').classList.add('hidden');
document.getElementById('instructsection1').classList.add('hidden'); document.getElementById('instructsection1').classList.add('hidden');
document.getElementById('chatnamesection2').classList.add('hidden'); document.getElementById('chatnamesection2').classList.add('hidden');
document.getElementById('chatinstructsharedsection2').classList.add('hidden');
document.getElementById('adventuresection2').classList.add('hidden'); document.getElementById('adventuresection2').classList.add('hidden');
document.getElementById('instructsection2').classList.add('hidden'); document.getElementById('instructsection2').classList.add('hidden');
@ -8982,6 +9005,7 @@ Current version: 138
document.getElementById('gui_type').value = localsettings.gui_type_chat; document.getElementById('gui_type').value = localsettings.gui_type_chat;
document.getElementById('chatnamesection1').classList.remove('hidden'); document.getElementById('chatnamesection1').classList.remove('hidden');
document.getElementById('chatnamesection2').classList.remove('hidden'); document.getElementById('chatnamesection2').classList.remove('hidden');
document.getElementById('chatinstructsharedsection2').classList.remove('hidden');
document.getElementById('uipicker_messenger').classList.remove('hidden'); document.getElementById('uipicker_messenger').classList.remove('hidden');
document.getElementById('uipicker_aesthetic').classList.remove('hidden'); document.getElementById('uipicker_aesthetic').classList.remove('hidden');
} }
@ -8990,6 +9014,7 @@ Current version: 138
document.getElementById('instructsection1').classList.remove('hidden'); document.getElementById('instructsection1').classList.remove('hidden');
document.getElementById('instructsection2').classList.remove('hidden'); document.getElementById('instructsection2').classList.remove('hidden');
document.getElementById('uipicker_aesthetic').classList.remove('hidden'); document.getElementById('uipicker_aesthetic').classList.remove('hidden');
toggle_include_chatnames();
} }
//deselect invalid //deselect invalid
@ -9973,10 +9998,15 @@ Current version: 138
if(newgen != "") if(newgen != "")
{ {
if(localsettings.inject_chatnames_instruct)
{
newgen = localsettings.chatname + ": " + newgen;
}
if(localsettings.inject_timestamps_instruct) if(localsettings.inject_timestamps_instruct)
{ {
newgen = "["+(new Date().toLocaleTimeString([], {year: 'numeric', month: 'numeric', day: 'numeric', hour: '2-digit', minute: '2-digit'}))+"] " + newgen; newgen = "["+(new Date().toLocaleTimeString([], {year: 'numeric', month: 'numeric', day: 'numeric', hour: '2-digit', minute: '2-digit'}))+"] " + newgen;
} }
//append instruction for instruct mode //append instruction for instruct mode
newgen = ist + newgen + iet; newgen = ist + newgen + iet;
@ -10175,7 +10205,7 @@ Current version: 138
if(trimmed!=""){ co = trimmed; } if(trimmed!=""){ co = trimmed; }
} }
if (current_anote.length == 0 && current_memory.length == 0) { if (localsettings.chat_context_mod && current_anote.length == 0 && current_memory.length == 0 && current_wi.length == 0) {
if (gametext_arr.length > 0 && gametext_arr[0].startsWith("\n" + me + ": ")) { if (gametext_arr.length > 0 && gametext_arr[0].startsWith("\n" + me + ": ")) {
let injected = "[The following is an interesting chat message log between " + me + " and " + co + ".]\n\n" + localsettings.chatname + ": Hi.\n" + co + ": Hello."; let injected = "[The following is an interesting chat message log between " + me + " and " + co + ".]\n\n" + localsettings.chatname + ": Hi.\n" + co + ": Hello.";
if(co=="") if(co=="")
@ -10227,16 +10257,23 @@ Current version: 138
} }
if (localsettings.opmode == 4) { if (localsettings.opmode == 4)
{
if(localsettings.inject_timestamps_instruct && pending_context_preinjection=="" && truncated_context!="") if (pending_context_preinjection == "" && truncated_context != "")
{ {
let endmatcher = (localsettings.placeholder_tags?instructendplaceholder:get_instruct_endtag(false)); let endmatcher = (localsettings.placeholder_tags ? instructendplaceholder : get_instruct_endtag(false));
if (truncated_context.toLowerCase().trim().endsWith(endmatcher.toLowerCase().trim())) {
if(truncated_context.toLowerCase().trim().endsWith(endmatcher.toLowerCase().trim())) if (localsettings.inject_timestamps_instruct) {
{ pending_context_preinjection += "[" + (new Date().toLocaleTimeString([], { year: 'numeric', month: 'numeric', day: 'numeric', hour: '2-digit', minute: '2-digit' })) + "]";
pending_context_preinjection += "["+(new Date().toLocaleTimeString([], {year: 'numeric', month: 'numeric', day: 'numeric', hour: '2-digit', minute: '2-digit'}))+"]"; }
if (localsettings.inject_chatnames_instruct && localsettings.chatopponent!="") {
if (localsettings.inject_timestamps_instruct) {
pending_context_preinjection += " ";
}
pending_context_preinjection += localsettings.chatopponent + ":";
}
} }
} }
truncated_context += pending_context_preinjection; truncated_context += pending_context_preinjection;
} }
@ -10258,7 +10295,13 @@ Current version: 138
//memory is allowed to be up to 0.8 times of ctx allowance, anote up to 0.6 times //memory is allowed to be up to 0.8 times of ctx allowance, anote up to 0.6 times
let max_mem_len = Math.floor(max_allowed_characters*0.8); let max_mem_len = Math.floor(max_allowed_characters*0.8);
let max_anote_len = Math.floor(max_allowed_characters*0.6); let max_anote_len = Math.floor(max_allowed_characters*0.6);
let truncated_memory = substring_to_boundary(current_memory, max_mem_len); let appendedsysprompt = "";
if(localsettings.opmode==4 && localsettings.instruct_sysprompt!="")
{
max_mem_len = Math.floor(max_allowed_characters*0.7);
appendedsysprompt = get_instruct_starttag(false)+" "+localsettings.instruct_sysprompt + "\n";
}
let truncated_memory = appendedsysprompt + substring_to_boundary(current_memory, max_mem_len);
if (truncated_memory != null && truncated_memory != "") { if (truncated_memory != null && truncated_memory != "") {
if(newlineaftermemory) if(newlineaftermemory)
{ {
@ -10603,6 +10646,7 @@ Current version: 138
submit_payload.params.dynatemp_exponent = localsettings.dynatemp_exponent; submit_payload.params.dynatemp_exponent = localsettings.dynatemp_exponent;
submit_payload.params.smoothing_factor = localsettings.smoothing_factor; submit_payload.params.smoothing_factor = localsettings.smoothing_factor;
submit_payload.params.banned_tokens = get_token_bans(); submit_payload.params.banned_tokens = get_token_bans();
submit_payload.params.render_special = localsettings.render_special_tags;
} }
//presence pen and logit bias for OAI and newer kcpp //presence pen and logit bias for OAI and newer kcpp
if((custom_kobold_endpoint != "" && is_using_kcpp_with_mirostat()) || custom_oai_endpoint!="") if((custom_kobold_endpoint != "" && is_using_kcpp_with_mirostat()) || custom_oai_endpoint!="")
@ -12190,7 +12234,7 @@ Current version: 138
if (idle_timer > idle_timer_max) { if (idle_timer > idle_timer_max) {
idle_timer = 0; idle_timer = 0;
let nextcounter = ++idle_triggered_counter; let nextcounter = ++idle_triggered_counter;
if(localsettings.opmode == 4) if(localsettings.opmode == 4) //handle idle messages
{ {
if (!localsettings.placeholder_tags) { if (!localsettings.placeholder_tags) {
pending_context_preinjection = get_instruct_endtag(false); pending_context_preinjection = get_instruct_endtag(false);
@ -12873,6 +12917,18 @@ Current version: 138
fulltxt = replaceAll(fulltxt, `%SpcStg%`, `<hr class="hr_instruct"><span class="color_cyan"><img src="`+human_square+`" style="height:38px;width:auto;padding:3px 6px 3px 3px;border-radius: 8%;"/>`); fulltxt = replaceAll(fulltxt, `%SpcStg%`, `<hr class="hr_instruct"><span class="color_cyan"><img src="`+human_square+`" style="height:38px;width:auto;padding:3px 6px 3px 3px;border-radius: 8%;"/>`);
fulltxt = replaceAll(fulltxt, `%SpcEtg%`, `</span><hr class="hr_instruct"><img src="`+niko_square+`" style="height:38px;width:auto;padding:3px 6px 3px 3px;border-radius: 8%;"/>`); fulltxt = replaceAll(fulltxt, `%SpcEtg%`, `</span><hr class="hr_instruct"><img src="`+niko_square+`" style="height:38px;width:auto;padding:3px 6px 3px 3px;border-radius: 8%;"/>`);
//apply stylization to time tags
if(localsettings.inject_timestamps_instruct && localsettings.instruct_has_markdown)
{
fulltxt = fulltxt.replace(/(\[\d{1,2}\/\d{1,2}\/\d{4}, \d{1,2}:\d{2} [AP]M\])/g, "$1\n");
}
if(localsettings.inject_chatnames_instruct && localsettings.instruct_has_markdown)
{
let m_name = localsettings.chatname + ": ";
let m_opp = localsettings.chatopponent + ": ";
fulltxt = replaceAll(fulltxt, m_name, `<b>` + escapeHtml(m_name) + `</b>`);
fulltxt = replaceAll(fulltxt, m_opp, `<b>` + escapeHtml(m_opp) + `</b>`);
}
}else{ }else{
fulltxt = replaceAll(fulltxt, get_instruct_starttag(true), `%SclStg%`+escapeHtml(get_instruct_starttag(true))+`%SpnEtg%`); fulltxt = replaceAll(fulltxt, get_instruct_starttag(true), `%SclStg%`+escapeHtml(get_instruct_starttag(true))+`%SpnEtg%`);
fulltxt = replaceAll(fulltxt, get_instruct_endtag(true), `%SclStg%`+escapeHtml(get_instruct_endtag(true))+`%SpnEtg%`); fulltxt = replaceAll(fulltxt, get_instruct_endtag(true), `%SclStg%`+escapeHtml(get_instruct_endtag(true))+`%SpnEtg%`);
@ -15149,50 +15205,51 @@ Current version: 138
</div> </div>
<div id="chatnamesection1" class="settinglabel hidden" style="padding-top: 3px;"> <div id="chatnamesection1" class="settinglabel hidden" style="padding-top: 3px;">
<div class="settinglabel">
<div class="justifyleft settingsmall">Chat PrePrompt <span class="helpicon">?<span
class="helptext">Modifies the context, injecting tokens to improve chat quality for new chats.</span></span> </div>
<input type="checkbox" id="adventure_context_mod" style="margin:0px 0 0;">
</div>
<div class="settinglabel">
<div class="justifyleft settingsmall">Inject Timestamps <span class="helpicon">?<span
class="helptext">Injects timestamps into context, allowing the AI to have a sense of time.</span></span></div>
<input type="checkbox" id="inject_timestamps_chat" style="margin:0px 0 0;">
</div>
</div> </div>
<div id="adventuresection1" class="settinglabel hidden" style="padding-top: 3px;"> <div id="adventuresection1" class="settinglabel hidden" style="padding-top: 3px;">
<div class="settinglabel"> <div class="settinglabel">
<div class="justifyleft settingsmall">Adventure Prompt <span class="helpicon">?<span <div class="justifyleft settingsmall">Adventure PrePrompt <span class="helpicon">?<span
class="helptext">Modifies the context, injecting tokens to improve adventure quality for adventure mode.</span></span> </div> class="helptext">Modifies the context, injecting tokens to improve adventure quality for new adventures.</span></span> </div>
<input type="checkbox" id="adventure_context_mod" style="margin:0px 0 0;"> <input type="checkbox" id="chat_context_mod" style="margin:0px 0 0;">
</div> </div>
</div> </div>
<div id="instructsection1" class="settinglabel hidden" style="padding-top: 3px;"> <div id="instructsection1" class="settinglabel hidden" style="padding-top: 3px;">
<div class="justifyleft settingsmall">Enable Markdown <span class="helpicon">?<span
class="helptext">Allows the UI to use markdown formatting such as quotes and code blocks.</span></span>
<input type="checkbox" id="instruct_has_markdown" style="margin:0px 0 0;">
</div>
<div class="settinglabel">
<div class="justifyleft settingsmall">Inject Timestamps <span class="helpicon">?<span
class="helptext">Injects timestamps into context, allowing the AI to have a sense of time.</span></span></div>
<input type="checkbox" id="inject_timestamps_instruct" style="margin:0px 0 0;">
</div>
<div class="settinglabel">
<div class="justifyleft settingsmall">Inject ChatNames <span class="helpicon">?<span
class="helptext">Appends chat names after every instruct tag, a hybrid chat mode.</span></span></div>
<input type="checkbox" id="inject_chatnames_instruct" style="margin:0px 0 0;" onchange="toggle_include_chatnames()">
</div>
<div class="settinglabel">
<div class="justifyleft settingsmall">Assistant Jailbreak <span class="helpicon">?<span
class="helptext">Automatically injects a jailbreak message after every query to make the AI more likely to obey you.</span></span></div>
<input type="checkbox" id="inject_jailbreak_instruct" style="margin:0px 0 0;">
</div>
</div> </div>
</div> </div>
</div> </div>
<div class="settingitem"> <div class="settingitem">
<div class="settinglabel"> <div class="settinglabel">
<div id="chatnamesection2" class="settinglabel hidden" style="padding-top: 3px;">
<table class="settingsmall text-center" style="border-spacing: 4px 2px; border-collapse: separate;">
<tr>
<th>Your Name</th>
<th>AI Name <span class="helpicon">?<span class="helptext">The name of the person you want to chat with. Multiple opponents can be specified, creating a group chat, separate their names using multiple lines.</span></span></th>
</tr>
<tr>
<td style="vertical-align: top;"><input class="settinglabel miniinput" type="text" placeholder="(Enter Name)" value="" id="chatname" title="The name that you will be chatting as"></td>
<td style="vertical-align: top;"><textarea class="settinglabel miniinput" style="resize: none;overflow:hidden;" id="chatopponent" placeholder="(Auto)" rows="1" wrap="off" title="The name of the person you want to chat with" oninput="handle_bot_name_input()" onchange="handle_bot_name_onchange()"></textarea></td>
</tr>
</table>
<div class="settinglabel">
<div class="justifyleft settingsmall">Multiline Replies <span class="helpicon">?<span
class="helptext">Whether to allow multiple lines in AI responses. Disable this if the AI starts generating rubbish.</span></span> </div>
<input type="checkbox" id="multiline_replies" style="margin:0px 0 0;">
</div>
<div class="settinglabel">
<div class="justifyleft settingsmall">Continue Bot Replies <span class="helpicon">?<span
class="helptext">Allow incomplete AI chat replies, which can be continued by pressing submit again. Not recommended for newbies.</span></span></div>
<input type="checkbox" id="allow_continue_chat" style="margin:0px 0 0;">
</div>
<div class="settinglabel">
<div class="justifyleft settingsmall">Inject Timestamps <span class="helpicon">?<span
class="helptext">Injects timestamps into context, allowing the AI to have a sense of time.</span></span></div>
<input type="checkbox" id="inject_timestamps_chat" style="margin:0px 0 0;">
</div>
</div>
<div id="adventuresection2" class="settinglabel hidden" style="padding-top: 3px;"> <div id="adventuresection2" class="settinglabel hidden" style="padding-top: 3px;">
<div class="settinglabel"> <div class="settinglabel">
<div class="justifyleft settingsmall">Multiline Replies <span class="helpicon">?<span <div class="justifyleft settingsmall">Multiline Replies <span class="helpicon">?<span
@ -15214,6 +15271,10 @@ Current version: 138
<option value="8">CommandR</option> <option value="8">CommandR</option>
<option value="9">Llama 3 Chat</option> <option value="9">Llama 3 Chat</option>
</select> </select>
<div class="settingsmall miniinput" style="width:100%;padding:2px">
<div class="justifyleft settingsmall">Sys. Prompt <span class="helpicon">?<span class="helptext">A system pre-prompt sent at the very start to guide the AI behavior. Usually NOT needed.</span></span></div>
<input class="settinglabel miniinput" type="text" placeholder="(Optional)" value="" id="instruct_sysprompt">
</div>
<table class="settingsmall text-center" style="border-spacing: 3px 2px; border-collapse: separate;"> <table class="settingsmall text-center" style="border-spacing: 3px 2px; border-collapse: separate;">
<tr> <tr>
<th>Start Seq.<span class="helpicon">?<span class="helptext">The sequence to start an instruction prompt</span></span></th> <th>Start Seq.<span class="helpicon">?<span class="helptext">The sequence to start an instruction prompt</span></span></th>
@ -15224,19 +15285,29 @@ Current version: 138
<td><input class="settinglabel miniinput" type="text" placeholder="\\n### Response:\\n" value="" id="instruct_endtag" onchange="edit_instruct_tag_format()" title="The sequence to end an instruction prompt"></td> <td><input class="settinglabel miniinput" type="text" placeholder="\\n### Response:\\n" value="" id="instruct_endtag" onchange="edit_instruct_tag_format()" title="The sequence to end an instruction prompt"></td>
</tr> </tr>
</table> </table>
</div>
<div class="justifyleft settingsmall">Enable Markdown <span class="helpicon">?<span <div id="chatinstructsharedsection2" class="settinglabel hidden" style="padding-top: 3px;">
class="helptext">Allows the UI to use markdown formatting such as quotes and code blocks.</span></span> </div> <table class="settingsmall text-center" style="border-spacing: 4px 2px; border-collapse: separate;">
<input type="checkbox" id="instruct_has_markdown" style="margin:0px 0 0;"> <tr>
<th>Your Name</th>
<th>AI Name <span class="helpicon">?<span class="helptext">Name of the person(s) you want to chat with. Multiple opponents can be specified, creating a group chat, separate their names using multiple lines.</span></span></th>
</tr>
<tr>
<td style="vertical-align: top;"><input class="settinglabel miniinput" style="height:18px;" type="text" placeholder="(Enter Name)" value="" id="chatname" title="The name that you will be chatting as"></td>
<td style="vertical-align: top;"><textarea class="settinglabel miniinput" style="resize: none;overflow:hidden;" id="chatopponent" placeholder="(Auto)" rows="1" wrap="off" title="The name of the person you want to chat with" oninput="handle_bot_name_input()" onchange="handle_bot_name_onchange()"></textarea></td>
</tr>
</table>
</div>
<div id="chatnamesection2" class="settinglabel hidden" style="padding-top: 3px;">
<div class="settinglabel"> <div class="settinglabel">
<div class="justifyleft settingsmall">Inject Timestamps <span class="helpicon">?<span <div class="justifyleft settingsmall">Multiline Replies <span class="helpicon">?<span
class="helptext">Injects timestamps into context, allowing the AI to have a sense of time.</span></span></div> class="helptext">Whether to allow multiple lines in AI responses. Disable this if the AI starts generating rubbish.</span></span> </div>
<input type="checkbox" id="inject_timestamps_instruct" style="margin:0px 0 0;"> <input type="checkbox" id="multiline_replies" style="margin:0px 0 0;">
</div> </div>
<div class="settinglabel"> <div class="settinglabel">
<div class="justifyleft settingsmall">Assistant Jailbreak <span class="helpicon">?<span <div class="justifyleft settingsmall">Continue Bot Replies <span class="helpicon">?<span
class="helptext">Automatically injects a jailbreak message after every query to make the AI more likely to obey you.</span></span></div> class="helptext">Allow incomplete AI chat replies, which can be continued by pressing submit again. Not recommended for newbies.</span></span></div>
<input type="checkbox" id="inject_jailbreak_instruct" style="margin:0px 0 0;"> <input type="checkbox" id="allow_continue_chat" style="margin:0px 0 0;">
</div> </div>
</div> </div>
</div> </div>
@ -15513,6 +15584,11 @@ Current version: 138
class="helptext">If enabled, uses universal {{user}} and {{[INPUT]}} placeholders that get swapped on submit. If disabled, uses plaintext chat or instruct tags verbatim.</span></span></div> class="helptext">If enabled, uses universal {{user}} and {{[INPUT]}} placeholders that get swapped on submit. If disabled, uses plaintext chat or instruct tags verbatim.</span></span></div>
<input type="checkbox" id="placeholder_tags" style="margin:0px 0px 0px auto;"> <input type="checkbox" id="placeholder_tags" style="margin:0px 0px 0px auto;">
</div> </div>
<div class="settinglabel">
<div class="justifyleft settingsmall">Render Sp.Tags <span class="helpicon">?<span
class="helptext">If enabled, renders special tags like EOS and padding tokens. Not recommended.</span></span></div>
<input type="checkbox" id="render_special_tags" style="margin:0px 0px 0px auto;">
</div>
<div class="settinglabel"> <div class="settinglabel">
<div class="justifyleft settingsmall">Run In Background <span class="helpicon">?<span <div class="justifyleft settingsmall">Run In Background <span class="helpicon">?<span
class="helptext">Prevents the browser from suspending Kobold Lite by playing a silent audio track. This setting cannot be saved.</span></span></div> class="helptext">Prevents the browser from suspending Kobold Lite by playing a silent audio track. This setting cannot be saved.</span></span></div>

View file

@ -3206,6 +3206,7 @@ struct llama_model_loader {
switch (type_max) { switch (type_max) {
case GGML_TYPE_F32: ftype = LLAMA_FTYPE_ALL_F32; break; case GGML_TYPE_F32: ftype = LLAMA_FTYPE_ALL_F32; break;
case GGML_TYPE_F16: ftype = LLAMA_FTYPE_MOSTLY_F16; break; case GGML_TYPE_F16: ftype = LLAMA_FTYPE_MOSTLY_F16; break;
case GGML_TYPE_BF16: ftype = LLAMA_FTYPE_MOSTLY_BF16; break;
case GGML_TYPE_Q4_0: ftype = LLAMA_FTYPE_MOSTLY_Q4_0; break; case GGML_TYPE_Q4_0: ftype = LLAMA_FTYPE_MOSTLY_Q4_0; break;
case GGML_TYPE_Q4_1: ftype = LLAMA_FTYPE_MOSTLY_Q4_1; break; case GGML_TYPE_Q4_1: ftype = LLAMA_FTYPE_MOSTLY_Q4_1; break;
case GGML_TYPE_Q5_0: ftype = LLAMA_FTYPE_MOSTLY_Q5_0; break; case GGML_TYPE_Q5_0: ftype = LLAMA_FTYPE_MOSTLY_Q5_0; break;
@ -3710,6 +3711,7 @@ static std::string llama_model_ftype_name(llama_ftype ftype) {
switch (ftype) { switch (ftype) {
case LLAMA_FTYPE_ALL_F32: return "all F32"; case LLAMA_FTYPE_ALL_F32: return "all F32";
case LLAMA_FTYPE_MOSTLY_F16: return "F16"; case LLAMA_FTYPE_MOSTLY_F16: return "F16";
case LLAMA_FTYPE_MOSTLY_BF16: return "BF16";
case LLAMA_FTYPE_MOSTLY_Q4_0: return "Q4_0"; case LLAMA_FTYPE_MOSTLY_Q4_0: return "Q4_0";
case LLAMA_FTYPE_MOSTLY_Q4_1: return "Q4_1"; case LLAMA_FTYPE_MOSTLY_Q4_1: return "Q4_1";
case LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16: case LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16:
@ -6199,6 +6201,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
|| !( || !(
model.ftype == LLAMA_FTYPE_ALL_F32 || model.ftype == LLAMA_FTYPE_ALL_F32 ||
model.ftype == LLAMA_FTYPE_MOSTLY_F16 || model.ftype == LLAMA_FTYPE_MOSTLY_F16 ||
model.ftype == LLAMA_FTYPE_MOSTLY_BF16 ||
model.ftype == LLAMA_FTYPE_MOSTLY_Q4_0 || model.ftype == LLAMA_FTYPE_MOSTLY_Q4_0 ||
model.ftype == LLAMA_FTYPE_MOSTLY_Q4_1 model.ftype == LLAMA_FTYPE_MOSTLY_Q4_1
) )
@ -14473,13 +14476,16 @@ static void llama_tensor_dequantize_internal(
if (qtype.to_float == NULL) { if (qtype.to_float == NULL) {
throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available", ggml_type_name(tensor->type))); throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available", ggml_type_name(tensor->type)));
} }
} else if (tensor->type != GGML_TYPE_F16) { } else if (tensor->type != GGML_TYPE_F16 &&
tensor->type != GGML_TYPE_BF16) {
throw std::runtime_error(format("cannot dequantize/convert tensor type %s", ggml_type_name(tensor->type))); throw std::runtime_error(format("cannot dequantize/convert tensor type %s", ggml_type_name(tensor->type)));
} }
if (nthread < 2) { if (nthread < 2) {
if (tensor->type == GGML_TYPE_F16) { if (tensor->type == GGML_TYPE_F16) {
ggml_fp16_to_fp32_row((ggml_fp16_t *)tensor->data, f32_output, nelements); ggml_fp16_to_fp32_row((ggml_fp16_t *)tensor->data, f32_output, nelements);
} else if (tensor->type == GGML_TYPE_BF16) {
ggml_bf16_to_fp32_row((ggml_bf16_t *)tensor->data, f32_output, nelements);
} else if (ggml_is_quantized(tensor->type)) { } else if (ggml_is_quantized(tensor->type)) {
qtype.to_float(tensor->data, f32_output, nelements); qtype.to_float(tensor->data, f32_output, nelements);
} else { } else {
@ -14488,7 +14494,14 @@ static void llama_tensor_dequantize_internal(
return; return;
} }
size_t block_size = tensor->type == GGML_TYPE_F16 ? 1 : (size_t)ggml_blck_size(tensor->type); size_t block_size;
if (tensor->type == GGML_TYPE_F16 ||
tensor->type == GGML_TYPE_BF16) {
block_size = 1;
} else {
block_size = (size_t)ggml_blck_size(tensor->type);
}
size_t block_size_bytes = ggml_type_size(tensor->type); size_t block_size_bytes = ggml_type_size(tensor->type);
GGML_ASSERT(nelements % block_size == 0); GGML_ASSERT(nelements % block_size == 0);
@ -14507,6 +14520,8 @@ static void llama_tensor_dequantize_internal(
auto compute = [qtype] (ggml_type typ, uint8_t * inbuf, float * outbuf, int nels) { auto compute = [qtype] (ggml_type typ, uint8_t * inbuf, float * outbuf, int nels) {
if (typ == GGML_TYPE_F16) { if (typ == GGML_TYPE_F16) {
ggml_fp16_to_fp32_row((ggml_fp16_t *)inbuf, outbuf, nels); ggml_fp16_to_fp32_row((ggml_fp16_t *)inbuf, outbuf, nels);
} else if (typ == GGML_TYPE_BF16) {
ggml_bf16_to_fp32_row((ggml_bf16_t *)inbuf, outbuf, nels);
} else { } else {
qtype.to_float(inbuf, outbuf, nels); qtype.to_float(inbuf, outbuf, nels);
} }
@ -14867,6 +14882,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
case LLAMA_FTYPE_MOSTLY_Q5_1: default_type = GGML_TYPE_Q5_1; break; case LLAMA_FTYPE_MOSTLY_Q5_1: default_type = GGML_TYPE_Q5_1; break;
case LLAMA_FTYPE_MOSTLY_Q8_0: default_type = GGML_TYPE_Q8_0; break; case LLAMA_FTYPE_MOSTLY_Q8_0: default_type = GGML_TYPE_Q8_0; break;
case LLAMA_FTYPE_MOSTLY_F16: default_type = GGML_TYPE_F16; break; case LLAMA_FTYPE_MOSTLY_F16: default_type = GGML_TYPE_F16; break;
case LLAMA_FTYPE_MOSTLY_BF16: default_type = GGML_TYPE_BF16; break;
case LLAMA_FTYPE_ALL_F32: default_type = GGML_TYPE_F32; break; case LLAMA_FTYPE_ALL_F32: default_type = GGML_TYPE_F32; break;
// K-quants // K-quants

View file

@ -137,6 +137,7 @@ extern "C" {
LLAMA_FTYPE_MOSTLY_IQ2_M = 29, // except 1d tensors LLAMA_FTYPE_MOSTLY_IQ2_M = 29, // except 1d tensors
LLAMA_FTYPE_MOSTLY_IQ4_XS = 30, // except 1d tensors LLAMA_FTYPE_MOSTLY_IQ4_XS = 30, // except 1d tensors
LLAMA_FTYPE_MOSTLY_IQ1_M = 31, // except 1d tensors LLAMA_FTYPE_MOSTLY_IQ1_M = 31, // except 1d tensors
LLAMA_FTYPE_MOSTLY_BF16 = 32, // except 1d tensors
LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
}; };