mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-19 08:00:25 +00:00
Merge branch 'upstream' into concedo_experimental
This commit is contained in:
commit
720fc30832
12 changed files with 149 additions and 98 deletions
|
|
@ -5971,20 +5971,12 @@ class Mamba2Model(TextModel):
|
|||
class JambaModel(TextModel):
|
||||
model_arch = gguf.MODEL_ARCH.JAMBA
|
||||
|
||||
def get_vocab_base_pre(self, tokenizer) -> str:
|
||||
del tokenizer # unused
|
||||
|
||||
return "gpt-2"
|
||||
|
||||
def set_vocab(self):
|
||||
if (self.dir_model / "tokenizer.model").is_file():
|
||||
# Using Jamba's tokenizer.json causes errors on model load
|
||||
# (something about "byte not found in vocab"),
|
||||
# but there's a working tokenizer.model
|
||||
self._set_vocab_sentencepiece()
|
||||
else:
|
||||
# Some Jamba models only have a tokenizer.json, which works.
|
||||
self._set_vocab_gpt2()
|
||||
self._set_vocab_llama_hf()
|
||||
self.gguf_writer.add_add_space_prefix(False)
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
d_model = self.find_hparam(["hidden_size", "mamba_d_model"])
|
||||
|
|
|
|||
|
|
@ -3881,7 +3881,6 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
|
|||
dev_ctx->device = i;
|
||||
dev_ctx->name = GGML_CUDA_NAME + std::to_string(i);
|
||||
|
||||
ggml_cuda_set_device(i);
|
||||
cudaDeviceProp prop;
|
||||
CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
|
||||
dev_ctx->description = prop.name;
|
||||
|
|
|
|||
|
|
@ -1546,9 +1546,8 @@ int ggml_metal_op_mul_mat(ggml_metal_op_t ctx, int idx) {
|
|||
!ggml_is_transposed(op->src[1]) &&
|
||||
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
|
||||
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
|
||||
props_dev->has_simdgroup_mm && ne00 >= 64 &&
|
||||
(ne11 > ne11_mm_min || (ggml_is_quantized(op->src[0]->type) && ne12 > 1))) {
|
||||
//printf("matrix: ne00 = %6d, ne01 = %6d, ne02 = %6d, ne11 = %6d, ne12 = %6d\n", ne00, ne01, ne02, ne11, ne12);
|
||||
props_dev->has_simdgroup_mm && ne00 >= 64 && ne11 > ne11_mm_min) {
|
||||
//GGML_LOG_INFO("matrix: ne00 = %6d, ne01 = %6d, ne02 = %6d, ne11 = %6d, ne12 = %6d\n", ne00, ne01, ne02, ne11, ne12);
|
||||
|
||||
// some Metal matrix data types require aligned pointers
|
||||
// ref: https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf (Table 2.5)
|
||||
|
|
|
|||
|
|
@ -7487,7 +7487,7 @@ kernel void kernel_mul_mv_iq1_m_f32(
|
|||
kernel_mul_mv_iq1_m_f32_impl<N_R0_IQ1_M, constant ggml_metal_kargs_mul_mv &>(args, src0, src1, dst, nullptr, tgpig, tiisg, sgitg);
|
||||
}
|
||||
|
||||
template<int nr0, typename args_t>
|
||||
template<int NR0, typename args_t>
|
||||
void kernel_mul_mv_iq4_nl_f32_impl(
|
||||
args_t args,
|
||||
device const char * src0,
|
||||
|
|
@ -7500,13 +7500,12 @@ void kernel_mul_mv_iq4_nl_f32_impl(
|
|||
const short NSG = FC_mul_mv_nsg;
|
||||
|
||||
threadgroup float * shmem_f32 = (threadgroup float *) shmem;
|
||||
const int nb = args.ne00/QK4_NL;
|
||||
|
||||
const int r0 = tgpig.x;
|
||||
const int r1 = tgpig.y;
|
||||
const int im = tgpig.z;
|
||||
|
||||
const int first_row = (r0 * NSG + sgitg) * nr0;
|
||||
const int first_row = (r0 * NSG + sgitg) * NR0;
|
||||
|
||||
const uint i12 = im%args.ne12;
|
||||
const uint i13 = im/args.ne12;
|
||||
|
|
@ -7517,6 +7516,9 @@ void kernel_mul_mv_iq4_nl_f32_impl(
|
|||
device const block_iq4_nl * x = (device const block_iq4_nl *) (src0 + offset0);
|
||||
device const float * y = (device const float *) (src1 + offset1);
|
||||
|
||||
const int nb = args.ne00/QK4_NL;
|
||||
const int ns01 = args.nb01/args.nb00;
|
||||
|
||||
const short ix = tiisg/2; // 0...15
|
||||
const short it = tiisg%2; // 0 or 1
|
||||
|
||||
|
|
@ -7524,24 +7526,25 @@ void kernel_mul_mv_iq4_nl_f32_impl(
|
|||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
||||
float4 yl[4];
|
||||
float sumf[nr0]={0.f};
|
||||
float sumf[NR0]={0.f};
|
||||
|
||||
device const float * yb = y + ix * QK4_NL + it * 8;
|
||||
device const float * yb = y + ix*QK4_NL + it*8;
|
||||
|
||||
uint32_t aux32[2];
|
||||
thread const uint8_t * q8 = (thread const uint8_t *)aux32;
|
||||
|
||||
float4 qf1, qf2;
|
||||
|
||||
for (int ib = ix; ib < nb; ib += 16) {
|
||||
// [TAG_MUL_MV_WEIRD]
|
||||
for (int ib = ix; ib < nb && ib < ns01; ib += 16) {
|
||||
device const float4 * y4 = (device const float4 *)yb;
|
||||
yl[0] = y4[0];
|
||||
yl[1] = y4[4];
|
||||
yl[2] = y4[1];
|
||||
yl[3] = y4[5];
|
||||
|
||||
for (short row = 0; row < nr0; row++) {
|
||||
device const block_iq4_nl & xb = x[row*nb + ib];
|
||||
for (short row = 0; row < NR0; row++) {
|
||||
device const block_iq4_nl & xb = x[row*ns01 + ib];
|
||||
device const uint16_t * q4 = (device const uint16_t *)(xb.qs + 8*it);
|
||||
|
||||
float4 acc1 = {0.f}, acc2 = {0.f};
|
||||
|
|
@ -7572,7 +7575,7 @@ void kernel_mul_mv_iq4_nl_f32_impl(
|
|||
|
||||
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
|
||||
|
||||
for (int row = 0; row < nr0 && first_row + row < args.ne0; ++row) {
|
||||
for (int row = 0; row < NR0 && first_row + row < args.ne0; ++row) {
|
||||
float sum_all = simd_sum(sumf[row]);
|
||||
if (tiisg == 0) {
|
||||
dst_f32[first_row + row] = sum_all;
|
||||
|
|
@ -7594,7 +7597,7 @@ kernel void kernel_mul_mv_iq4_nl_f32(
|
|||
kernel_mul_mv_iq4_nl_f32_impl<N_R0_IQ4_NL, constant ggml_metal_kargs_mul_mv &>(args, src0, src1, dst, shmem, tgpig, tiisg, sgitg);
|
||||
}
|
||||
|
||||
template<int nr0, typename args_t>
|
||||
template<int NR0, typename args_t>
|
||||
void kernel_mul_mv_iq4_xs_f32_impl(
|
||||
args_t args,
|
||||
device const char * src0,
|
||||
|
|
@ -7607,12 +7610,11 @@ void kernel_mul_mv_iq4_xs_f32_impl(
|
|||
const short NSG = FC_mul_mv_nsg;
|
||||
|
||||
threadgroup float * shmem_f32 = (threadgroup float *) shmem;
|
||||
const int nb = args.ne00/QK_K;
|
||||
|
||||
const int r0 = tgpig.x;
|
||||
const int r1 = tgpig.y;
|
||||
const int im = tgpig.z;
|
||||
const int first_row = (r0 * NSG + sgitg) * nr0;
|
||||
const int first_row = (r0 * NSG + sgitg) * NR0;
|
||||
|
||||
const uint i12 = im%args.ne12;
|
||||
const uint i13 = im/args.ne12;
|
||||
|
|
@ -7623,6 +7625,9 @@ void kernel_mul_mv_iq4_xs_f32_impl(
|
|||
device const block_iq4_xs * x = (device const block_iq4_xs *) (src0 + offset0);
|
||||
device const float * y = (device const float *) (src1 + offset1);
|
||||
|
||||
const int nb = args.ne00/QK_K;
|
||||
const int ns01 = args.nb01/args.nb00;
|
||||
|
||||
const short ix = tiisg/16; // 0 or 1
|
||||
const short it = tiisg%16; // 0...15
|
||||
const short ib = it/2;
|
||||
|
|
@ -7632,7 +7637,7 @@ void kernel_mul_mv_iq4_xs_f32_impl(
|
|||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
||||
float4 yl[4];
|
||||
float sumf[nr0]={0.f};
|
||||
float sumf[NR0]={0.f};
|
||||
|
||||
device const float * yb = y + ix * QK_K + ib * 32 + il * 8;
|
||||
|
||||
|
|
@ -7641,15 +7646,16 @@ void kernel_mul_mv_iq4_xs_f32_impl(
|
|||
|
||||
float4 qf1, qf2;
|
||||
|
||||
for (int ibl = ix; ibl < nb; ibl += 2) {
|
||||
// [TAG_MUL_MV_WEIRD]
|
||||
for (int ibl = ix; ibl < nb && ibl < ns01; ibl += 2) {
|
||||
device const float4 * y4 = (device const float4 *)yb;
|
||||
yl[0] = y4[0];
|
||||
yl[1] = y4[4];
|
||||
yl[2] = y4[1];
|
||||
yl[3] = y4[5];
|
||||
|
||||
for (short row = 0; row < nr0; ++row) {
|
||||
device const block_iq4_xs & xb = x[row*nb + ibl];
|
||||
for (short row = 0; row < NR0; ++row) {
|
||||
device const block_iq4_xs & xb = x[row*ns01 + ibl];
|
||||
device const uint32_t * q4 = (device const uint32_t *)(xb.qs + 16*ib + 8*il);
|
||||
|
||||
float4 acc1 = {0.f}, acc2 = {0.f};
|
||||
|
|
@ -7679,7 +7685,7 @@ void kernel_mul_mv_iq4_xs_f32_impl(
|
|||
|
||||
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
|
||||
|
||||
for (int row = 0; row < nr0 && first_row + row < args.ne0; ++row) {
|
||||
for (int row = 0; row < NR0 && first_row + row < args.ne0; ++row) {
|
||||
float sum_all = simd_sum(sumf[row]);
|
||||
if (tiisg == 0) {
|
||||
dst_f32[first_row + row] = sum_all;
|
||||
|
|
@ -7701,7 +7707,7 @@ kernel void kernel_mul_mv_iq4_xs_f32(
|
|||
kernel_mul_mv_iq4_xs_f32_impl<N_R0_IQ4_XS, constant ggml_metal_kargs_mul_mv &>(args, src0, src1, dst, shmem, tgpig, tiisg, sgitg);
|
||||
}
|
||||
|
||||
template<int nr0, typename args_t>
|
||||
template<int NR0, typename args_t>
|
||||
void kernel_mul_mv_mxfp4_f32_impl(
|
||||
args_t args,
|
||||
device const char * src0,
|
||||
|
|
@ -7714,13 +7720,12 @@ void kernel_mul_mv_mxfp4_f32_impl(
|
|||
const short NSG = FC_mul_mv_nsg;
|
||||
|
||||
threadgroup float * shmem_f32 = (threadgroup float *) shmem;
|
||||
const int nb = args.ne00/QK_MXFP4;
|
||||
|
||||
const int r0 = tgpig.x;
|
||||
const int r1 = tgpig.y;
|
||||
const int im = tgpig.z;
|
||||
|
||||
const int first_row = (r0 * NSG + sgitg) * nr0;
|
||||
const int first_row = (r0 * NSG + sgitg) * NR0;
|
||||
|
||||
const uint i12 = im%args.ne12;
|
||||
const uint i13 = im/args.ne12;
|
||||
|
|
@ -7731,6 +7736,9 @@ void kernel_mul_mv_mxfp4_f32_impl(
|
|||
device const block_mxfp4 * x = (device const block_mxfp4 *) (src0 + offset0);
|
||||
device const float * y = (device const float *) (src1 + offset1);
|
||||
|
||||
const int nb = args.ne00/QK_MXFP4;
|
||||
const int ns01 = args.nb01/args.nb00; // this can be larger than nb for permuted src0 tensors
|
||||
|
||||
const short ix = tiisg/2; // 0...15
|
||||
const short it = tiisg%2; // 0 or 1
|
||||
|
||||
|
|
@ -7738,20 +7746,22 @@ void kernel_mul_mv_mxfp4_f32_impl(
|
|||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
|
||||
float4 yl[4];
|
||||
float sumf[nr0]={0.f};
|
||||
float sumf[NR0]={0.f};
|
||||
|
||||
device const float * yb = y + ix * QK_MXFP4 + it * 8;
|
||||
device const float * yb = y + ix*QK_MXFP4 + it*8;
|
||||
|
||||
// note: just the check `ib < nb` is enough, but adding the redundant `&& ib < ns01` check makes the kernel a bit faster
|
||||
// no idea why that is - needs some deeper investigation [TAG_MUL_MV_WEIRD]
|
||||
for (int ib = ix; ib < nb && ib < ns01; ib += 16) {
|
||||
device const float4 * y4 = (device const float4 *) yb;
|
||||
|
||||
for (int ib = ix; ib < nb; ib += 16) {
|
||||
device const float4 * y4 = (device const float4 *)yb;
|
||||
yl[0] = y4[0];
|
||||
yl[1] = y4[4];
|
||||
yl[2] = y4[1];
|
||||
yl[3] = y4[5];
|
||||
|
||||
#pragma unroll(nr0)
|
||||
for (short row = 0; row < nr0; row++) {
|
||||
device const block_mxfp4 & xb = x[row*nb + ib];
|
||||
FOR_UNROLL (short row = 0; row < NR0; row++) {
|
||||
device const block_mxfp4 & xb = x[row*ns01 + ib];
|
||||
device const uint8_t * q2 = (device const uint8_t *)(xb.qs + 8*it);
|
||||
|
||||
float4 acc1 = yl[0]*float4(shmem_f32[q2[0] & 0x0F], shmem_f32[q2[1] & 0x0F], shmem_f32[q2[2] & 0x0F], shmem_f32[q2[3] & 0x0F]);
|
||||
|
|
@ -7769,7 +7779,7 @@ void kernel_mul_mv_mxfp4_f32_impl(
|
|||
|
||||
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
|
||||
|
||||
for (int row = 0; row < nr0 && first_row + row < args.ne0; ++row) {
|
||||
for (int row = 0; row < NR0 && first_row + row < args.ne0; ++row) {
|
||||
float sum_all = simd_sum(sumf[row]);
|
||||
if (tiisg == 0) {
|
||||
dst_f32[first_row + row] = sum_all;
|
||||
|
|
|
|||
|
|
@ -16375,10 +16375,10 @@ struct llm_build_granite_hybrid : public llm_graph_context_mamba {
|
|||
}
|
||||
|
||||
ggml_tensor * build_layer_ffn(
|
||||
ggml_tensor * cur,
|
||||
ggml_tensor * inpSA,
|
||||
const llama_model & model,
|
||||
const int il) {
|
||||
ggml_tensor * cur,
|
||||
ggml_tensor * inpSA,
|
||||
const llama_model & model,
|
||||
const int il) {
|
||||
|
||||
// For Granite architectures - scale residual
|
||||
if (hparams.f_residual_scale) {
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -5401,15 +5401,6 @@ int main(int argc, char ** argv) {
|
|||
|
||||
const json body = json::parse(req.body);
|
||||
|
||||
// TODO: implement
|
||||
//int top_n = 1;
|
||||
//if (body.count("top_n") != 1) {
|
||||
// top_n = body.at("top_n");
|
||||
//} else {
|
||||
// res_error(res, format_error_response("\"top_n\" must be provided", ERROR_TYPE_INVALID_REQUEST));
|
||||
// return;
|
||||
//}
|
||||
|
||||
// if true, use TEI API format, otherwise use Jina API format
|
||||
// Jina: https://jina.ai/reranker/
|
||||
// TEI: https://huggingface.github.io/text-embeddings-inference/#/Text%20Embeddings%20Inference/rerank
|
||||
|
|
@ -5434,6 +5425,8 @@ int main(int argc, char ** argv) {
|
|||
return;
|
||||
}
|
||||
|
||||
int top_n = json_value(body, "top_n", (int)documents.size());
|
||||
|
||||
// create and queue the task
|
||||
json responses = json::array();
|
||||
bool error = false;
|
||||
|
|
@ -5474,7 +5467,8 @@ int main(int argc, char ** argv) {
|
|||
body,
|
||||
responses,
|
||||
is_tei_format,
|
||||
documents);
|
||||
documents,
|
||||
top_n);
|
||||
|
||||
res_ok(res, root);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -102,3 +102,45 @@ def test_rerank_usage(query, doc1, doc2, n_tokens):
|
|||
assert res.status_code == 200
|
||||
assert res.body['usage']['prompt_tokens'] == res.body['usage']['total_tokens']
|
||||
assert res.body['usage']['prompt_tokens'] == n_tokens
|
||||
|
||||
|
||||
@pytest.mark.parametrize("top_n,expected_len", [
|
||||
(None, len(TEST_DOCUMENTS)), # no top_n parameter
|
||||
(2, 2),
|
||||
(4, 4),
|
||||
(99, len(TEST_DOCUMENTS)), # higher than available docs
|
||||
])
|
||||
def test_rerank_top_n(top_n, expected_len):
|
||||
global server
|
||||
server.start()
|
||||
data = {
|
||||
"query": "Machine learning is",
|
||||
"documents": TEST_DOCUMENTS,
|
||||
}
|
||||
if top_n is not None:
|
||||
data["top_n"] = top_n
|
||||
|
||||
res = server.make_request("POST", "/rerank", data=data)
|
||||
assert res.status_code == 200
|
||||
assert len(res.body["results"]) == expected_len
|
||||
|
||||
|
||||
@pytest.mark.parametrize("top_n,expected_len", [
|
||||
(None, len(TEST_DOCUMENTS)), # no top_n parameter
|
||||
(2, 2),
|
||||
(4, 4),
|
||||
(99, len(TEST_DOCUMENTS)), # higher than available docs
|
||||
])
|
||||
def test_rerank_tei_top_n(top_n, expected_len):
|
||||
global server
|
||||
server.start()
|
||||
data = {
|
||||
"query": "Machine learning is",
|
||||
"texts": TEST_DOCUMENTS,
|
||||
}
|
||||
if top_n is not None:
|
||||
data["top_n"] = top_n
|
||||
|
||||
res = server.make_request("POST", "/rerank", data=data)
|
||||
assert res.status_code == 200
|
||||
assert len(res.body) == expected_len
|
||||
|
|
|
|||
|
|
@ -849,47 +849,44 @@ static json format_response_rerank(
|
|||
const json & request,
|
||||
const json & ranks,
|
||||
bool is_tei_format,
|
||||
std::vector<std::string> & texts) {
|
||||
json res;
|
||||
if (is_tei_format) {
|
||||
// TEI response format
|
||||
res = json::array();
|
||||
bool return_text = json_value(request, "return_text", false);
|
||||
for (const auto & rank : ranks) {
|
||||
int index = json_value(rank, "index", 0);
|
||||
json elem = json{
|
||||
{"index", index},
|
||||
{"score", json_value(rank, "score", 0.0)},
|
||||
};
|
||||
if (return_text) {
|
||||
elem["text"] = std::move(texts[index]);
|
||||
}
|
||||
res.push_back(elem);
|
||||
}
|
||||
} else {
|
||||
// Jina response format
|
||||
json results = json::array();
|
||||
int32_t n_tokens = 0;
|
||||
for (const auto & rank : ranks) {
|
||||
results.push_back(json{
|
||||
{"index", json_value(rank, "index", 0)},
|
||||
{"relevance_score", json_value(rank, "score", 0.0)},
|
||||
});
|
||||
|
||||
n_tokens += json_value(rank, "tokens_evaluated", 0);
|
||||
}
|
||||
|
||||
res = json{
|
||||
{"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
|
||||
{"object", "list"},
|
||||
{"usage", json{
|
||||
{"prompt_tokens", n_tokens},
|
||||
{"total_tokens", n_tokens}
|
||||
}},
|
||||
{"results", results}
|
||||
std::vector<std::string> & texts,
|
||||
int top_n) {
|
||||
int32_t n_tokens = 0;
|
||||
bool return_text = is_tei_format && json_value(request, "return_text", false);
|
||||
std::vector<json> elements; // Temporary vector to hold unsorted elements
|
||||
std::string score_label = is_tei_format ? "score" : "relevance_score";
|
||||
for (const auto & rank : ranks) {
|
||||
int index = json_value(rank, "index", 0);
|
||||
json elem = json{
|
||||
{"index", index},
|
||||
{score_label, json_value(rank, "score", 0.0)},
|
||||
};
|
||||
n_tokens += json_value(rank, "tokens_evaluated", 0);
|
||||
if (return_text) {
|
||||
elem["text"] = std::move(texts[index]);
|
||||
}
|
||||
elements.push_back(elem);
|
||||
}
|
||||
|
||||
std::sort(elements.begin(), elements.end(), [score_label](const json& a, const json& b) {
|
||||
return json_value(a, score_label, 0.0) > json_value(b, score_label, 0.0);
|
||||
});
|
||||
|
||||
elements.resize(std::min(top_n, (int)elements.size()));
|
||||
json results = elements;
|
||||
|
||||
if (is_tei_format) return results;
|
||||
|
||||
json res = json{
|
||||
{"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
|
||||
{"object", "list"},
|
||||
{"usage", json{
|
||||
{"prompt_tokens", n_tokens},
|
||||
{"total_tokens", n_tokens}
|
||||
}},
|
||||
{"results", results}
|
||||
};
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,8 +2,9 @@
|
|||
import { Check, X } from '@lucide/svelte';
|
||||
import { Card } from '$lib/components/ui/card';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import { ChatAttachmentsList } from '$lib/components/app';
|
||||
import { ChatAttachmentsList, MarkdownContent } from '$lib/components/app';
|
||||
import { INPUT_CLASSES } from '$lib/constants/input-classes';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import ChatMessageActions from './ChatMessageActions.svelte';
|
||||
|
||||
interface Props {
|
||||
|
|
@ -55,6 +56,7 @@
|
|||
|
||||
let isMultiline = $state(false);
|
||||
let messageElement: HTMLElement | undefined = $state();
|
||||
const currentConfig = config();
|
||||
|
||||
$effect(() => {
|
||||
if (!messageElement || !message.content.trim()) return;
|
||||
|
|
@ -123,9 +125,18 @@
|
|||
class="max-w-[80%] rounded-[1.125rem] bg-primary px-3.75 py-1.5 text-primary-foreground data-[multiline]:py-2.5"
|
||||
data-multiline={isMultiline ? '' : undefined}
|
||||
>
|
||||
<span bind:this={messageElement} class="text-md whitespace-pre-wrap">
|
||||
{message.content}
|
||||
</span>
|
||||
{#if currentConfig.renderUserContentAsMarkdown}
|
||||
<div bind:this={messageElement} class="text-md">
|
||||
<MarkdownContent
|
||||
class="markdown-user-content text-primary-foreground"
|
||||
content={message.content}
|
||||
/>
|
||||
</div>
|
||||
{:else}
|
||||
<span bind:this={messageElement} class="text-md whitespace-pre-wrap">
|
||||
{message.content}
|
||||
</span>
|
||||
{/if}
|
||||
</Card>
|
||||
{/if}
|
||||
|
||||
|
|
|
|||
|
|
@ -80,6 +80,11 @@
|
|||
key: 'showModelInfo',
|
||||
label: 'Show model information',
|
||||
type: 'checkbox'
|
||||
},
|
||||
{
|
||||
key: 'renderUserContentAsMarkdown',
|
||||
label: 'Render user content as Markdown',
|
||||
type: 'checkbox'
|
||||
}
|
||||
]
|
||||
},
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ export const SETTING_CONFIG_DEFAULT: Record<string, string | number | boolean> =
|
|||
pasteLongTextToFileLen: 2500,
|
||||
pdfAsImage: false,
|
||||
showModelInfo: false,
|
||||
renderUserContentAsMarkdown: false,
|
||||
// make sure these default values are in sync with `common.h`
|
||||
samplers: 'top_k;typ_p;top_p;min_p;temperature',
|
||||
temperature: 0.8,
|
||||
|
|
@ -84,6 +85,7 @@ export const SETTING_CONFIG_INFO: Record<string, string> = {
|
|||
'Ask for confirmation before automatically changing conversation title when editing the first message.',
|
||||
pdfAsImage: 'Parse PDF as image instead of text (requires vision-capable model).',
|
||||
showModelInfo: 'Display the model name used to generate each message below the message content.',
|
||||
renderUserContentAsMarkdown: 'Render user messages using markdown formatting in the chat.',
|
||||
pyInterpreterEnabled:
|
||||
'Enable Python interpreter using Pyodide. Allows running Python code in markdown code blocks.'
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue