mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-21 18:52:02 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .github/workflows/build-linux-cross.yml # .github/workflows/build.yml # CODEOWNERS # ggml/CMakeLists.txt # ggml/src/ggml-cuda/fattn.cu # ggml/src/ggml-webgpu/CMakeLists.txt # ggml/src/ggml-webgpu/ggml-webgpu.cpp # ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl # tests/test-backend-ops.cpp # tests/test-chat-template.cpp # tools/llama-bench/llama-bench.cpp # tools/rpc/README.md # tools/server/README.md
This commit is contained in:
commit
b6f6338bba
32 changed files with 1556 additions and 636 deletions
Binary file not shown.
|
|
@ -1937,7 +1937,7 @@ private:
|
|||
void cleanup_pending_task(int id_target) {
|
||||
// no need lock because this is called exclusively by post()
|
||||
auto rm_func = [id_target](const server_task & task) {
|
||||
return task.id_target == id_target;
|
||||
return task.id == id_target;
|
||||
};
|
||||
queue_tasks.erase(
|
||||
std::remove_if(queue_tasks.begin(), queue_tasks.end(), rm_func),
|
||||
|
|
@ -3676,6 +3676,20 @@ struct server_context {
|
|||
alora_disabled_id = enabled_loras[0];
|
||||
}
|
||||
|
||||
bool do_checkpoint = params_base.n_ctx_checkpoints > 0;
|
||||
|
||||
// make a checkpoint of the parts of the memory that cannot be rolled back.
|
||||
// checkpoints are created only if:
|
||||
// - the model uses SWA and we are not using `swa_full`
|
||||
// - the model architecture is marked as recurrent or hybrid
|
||||
//
|
||||
// TODO: try to make this conditional on the context or the memory module, instead of the model type
|
||||
do_checkpoint = do_checkpoint && (
|
||||
llama_model_is_recurrent(model) ||
|
||||
llama_model_is_hybrid(model) ||
|
||||
(llama_model_n_swa(model) > 0 && !params_base.swa_full)
|
||||
);
|
||||
|
||||
// add prompt tokens for processing in the current batch
|
||||
while (slot.n_past < slot.n_prompt_tokens && batch.n_tokens < n_batch) {
|
||||
// get next token to process
|
||||
|
|
@ -3700,6 +3714,11 @@ struct server_context {
|
|||
|
||||
slot.n_prompt_tokens_processed++;
|
||||
slot.n_past++;
|
||||
|
||||
// process the last few tokens of the prompt separately in order to allow for a checkpoint to be created.
|
||||
if (do_checkpoint && slot.n_prompt_tokens - slot.n_past == 64) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// SLT_INF(slot, "new cache_tokens: %s\n", slot.cache_tokens.str().c_str());
|
||||
|
|
@ -3730,6 +3749,39 @@ struct server_context {
|
|||
slot.i_batch = batch.n_tokens - 1;
|
||||
|
||||
SLT_INF(slot, "prompt done, n_past = %d, n_tokens = %d\n", slot.n_past, batch.n_tokens);
|
||||
|
||||
const auto pos_min = llama_memory_seq_pos_min(llama_get_memory(ctx), slot.id);
|
||||
const auto pos_max = llama_memory_seq_pos_max(llama_get_memory(ctx), slot.id);
|
||||
|
||||
// no need for empty or small checkpoints
|
||||
do_checkpoint = do_checkpoint && (pos_min >= 0 && pos_max >= 64);
|
||||
|
||||
// no need to create checkpoints that are too close together
|
||||
do_checkpoint = do_checkpoint && (slot.ctx_checkpoints.empty() || pos_max > slot.ctx_checkpoints.back().pos_max + 64);
|
||||
|
||||
if (do_checkpoint) {
|
||||
while (slot.ctx_checkpoints.size() >= (size_t) params_base.n_ctx_checkpoints) {
|
||||
// make room for the new checkpoint, if needed
|
||||
const auto & cur = slot.ctx_checkpoints.front();
|
||||
SLT_WRN(slot, "erasing old context checkpoint (pos_min = %d, pos_max = %d, size = %.3f MiB)\n",
|
||||
cur.pos_min, cur.pos_max, (float) cur.data.size() / 1024 / 1024);
|
||||
|
||||
slot.ctx_checkpoints.erase(slot.ctx_checkpoints.begin());
|
||||
}
|
||||
|
||||
const size_t checkpoint_size = llama_state_seq_get_size_ext(ctx, slot.id, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);
|
||||
|
||||
auto & cur = slot.ctx_checkpoints.emplace_back(ctx_checkpoint{
|
||||
/*.pos_min = */ pos_min,
|
||||
/*.pos_max = */ pos_max,
|
||||
/*.data = */ std::vector<uint8_t>(checkpoint_size),
|
||||
});
|
||||
|
||||
llama_state_seq_get_data_ext(ctx, cur.data.data(), checkpoint_size, slot.id, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);
|
||||
|
||||
SLT_WRN(slot, "saved context checkpoint %d of %d (pos_min = %d, pos_max = %d, size = %.3f MiB)\n",
|
||||
(int) slot.ctx_checkpoints.size(), params_base.n_ctx_checkpoints, cur.pos_min, cur.pos_max, (float) cur.data.size() / 1024 / 1024);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3853,40 +3905,6 @@ struct server_context {
|
|||
|
||||
// prompt evaluated for next-token prediction
|
||||
slot.state = SLOT_STATE_GENERATING;
|
||||
|
||||
// make a checkpoint of the parts of the memory that cannot be rolled back.
|
||||
// checkpoints are created only if:
|
||||
// - the model uses SWA and we are not using `swa_full`
|
||||
// - the model architecture is marked as recurrent or hybrid
|
||||
//
|
||||
// TODO: try to make this conditional on the context or the memory module, instead of the model type
|
||||
const bool do_checkpoint =
|
||||
(llama_model_is_recurrent(model) || llama_model_is_hybrid(model)) ||
|
||||
(llama_model_n_swa(model) > 0 && !params_base.swa_full);
|
||||
|
||||
if (do_checkpoint && params_base.n_ctx_checkpoints > 0) {
|
||||
while (slot.ctx_checkpoints.size() >= (size_t) params_base.n_ctx_checkpoints) {
|
||||
// make room for the new checkpoint, if needed
|
||||
const auto & cur = slot.ctx_checkpoints.front();
|
||||
SLT_WRN(slot, "erasing old context checkpoint (pos_min = %d, pos_max = %d, size = %.3f MiB)\n",
|
||||
cur.pos_min, cur.pos_max, (float) cur.data.size() / 1024 / 1024);
|
||||
|
||||
slot.ctx_checkpoints.erase(slot.ctx_checkpoints.begin());
|
||||
}
|
||||
|
||||
const size_t checkpoint_size = llama_state_seq_get_size_ext(ctx, slot.id, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);
|
||||
|
||||
auto & cur = slot.ctx_checkpoints.emplace_back(ctx_checkpoint{
|
||||
/*.pos_min = */ llama_memory_seq_pos_min(llama_get_memory(ctx), slot.id),
|
||||
/*.pos_max = */ llama_memory_seq_pos_max(llama_get_memory(ctx), slot.id),
|
||||
/*.data = */ std::vector<uint8_t>(checkpoint_size),
|
||||
});
|
||||
|
||||
llama_state_seq_get_data_ext(ctx, cur.data.data(), checkpoint_size, slot.id, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);
|
||||
|
||||
SLT_WRN(slot, "saved context checkpoint %d of %d (pos_min = %d, pos_max = %d, size = %.3f MiB)\n",
|
||||
(int) slot.ctx_checkpoints.size(), params_base.n_ctx_checkpoints, cur.pos_min, cur.pos_max, (float) cur.data.size() / 1024 / 1024);
|
||||
}
|
||||
} else if (slot.state != SLOT_STATE_GENERATING) {
|
||||
continue; // continue loop of slots
|
||||
}
|
||||
|
|
@ -4184,6 +4202,7 @@ int main(int argc, char ** argv) {
|
|||
auto middleware_validate_api_key = [¶ms, &res_error](const httplib::Request & req, httplib::Response & res) {
|
||||
static const std::unordered_set<std::string> public_endpoints = {
|
||||
"/health",
|
||||
"/v1/health",
|
||||
"/models",
|
||||
"/v1/models",
|
||||
"/api/tags"
|
||||
|
|
@ -5232,6 +5251,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// register API routes
|
||||
svr->Get (params.api_prefix + "/health", handle_health); // public endpoint (no API key check)
|
||||
svr->Get (params.api_prefix + "/v1/health", handle_health); // public endpoint (no API key check)
|
||||
svr->Get (params.api_prefix + "/metrics", handle_metrics);
|
||||
svr->Get (params.api_prefix + "/props", handle_props);
|
||||
svr->Post(params.api_prefix + "/props", handle_props_change);
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
<script lang="ts">
|
||||
import { Search, SquarePen, X } from '@lucide/svelte';
|
||||
import { Search, SquarePen, X, Download, Upload } from '@lucide/svelte';
|
||||
import { KeyboardShortcutInfo } from '$lib/components/app';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import { Input } from '$lib/components/ui/input';
|
||||
import { exportAllConversations, importConversations } from '$lib/stores/chat.svelte';
|
||||
|
||||
interface Props {
|
||||
handleMobileSidebarItemClick: () => void;
|
||||
|
|
@ -77,5 +78,34 @@
|
|||
|
||||
<KeyboardShortcutInfo keys={['cmd', 'k']} />
|
||||
</Button>
|
||||
|
||||
<Button
|
||||
class="w-full justify-start text-sm"
|
||||
onclick={() => {
|
||||
importConversations().catch((err) => {
|
||||
console.error('Import failed:', err);
|
||||
// Optional: show toast or dialog
|
||||
});
|
||||
}}
|
||||
variant="ghost"
|
||||
>
|
||||
<div class="flex items-center gap-2">
|
||||
<Upload class="h-4 w-4" />
|
||||
Import conversations
|
||||
</div>
|
||||
</Button>
|
||||
|
||||
<Button
|
||||
class="w-full justify-start text-sm"
|
||||
onclick={() => {
|
||||
exportAllConversations();
|
||||
}}
|
||||
variant="ghost"
|
||||
>
|
||||
<div class="flex items-center gap-2">
|
||||
<Download class="h-4 w-4" />
|
||||
Export all conversations
|
||||
</div>
|
||||
</Button>
|
||||
{/if}
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
<script lang="ts">
|
||||
import { Trash2, Pencil, MoreHorizontal } from '@lucide/svelte';
|
||||
import { Trash2, Pencil, MoreHorizontal, Download } from '@lucide/svelte';
|
||||
import { ActionDropdown } from '$lib/components/app';
|
||||
import { downloadConversation } from '$lib/stores/chat.svelte';
|
||||
import { onMount } from 'svelte';
|
||||
|
||||
interface Props {
|
||||
|
|
@ -101,6 +102,15 @@
|
|||
onclick: handleEdit,
|
||||
shortcut: ['shift', 'cmd', 'e']
|
||||
},
|
||||
{
|
||||
icon: Download,
|
||||
label: 'Export',
|
||||
onclick: (e) => {
|
||||
e.stopPropagation();
|
||||
downloadConversation(conversation.id);
|
||||
},
|
||||
shortcut: ['shift', 'cmd', 's']
|
||||
},
|
||||
{
|
||||
icon: Trash2,
|
||||
label: 'Delete',
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/u
|
|||
import { browser } from '$app/environment';
|
||||
import { goto } from '$app/navigation';
|
||||
import { extractPartialThinking } from '$lib/utils/thinking';
|
||||
import { toast } from 'svelte-sonner';
|
||||
import type { ExportedConversations } from '$lib/types/database';
|
||||
|
||||
/**
|
||||
* ChatStore - Central state management for chat conversations and AI interactions
|
||||
|
|
@ -951,6 +953,166 @@ class ChatStore {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Downloads a conversation as JSON file
|
||||
* @param convId - The conversation ID to download
|
||||
*/
|
||||
async downloadConversation(convId: string): Promise<void> {
|
||||
if (!this.activeConversation || this.activeConversation.id !== convId) {
|
||||
// Load the conversation if not currently active
|
||||
const conversation = await DatabaseStore.getConversation(convId);
|
||||
if (!conversation) return;
|
||||
|
||||
const messages = await DatabaseStore.getConversationMessages(convId);
|
||||
const conversationData = {
|
||||
conv: conversation,
|
||||
messages
|
||||
};
|
||||
|
||||
this.triggerDownload(conversationData);
|
||||
} else {
|
||||
// Use current active conversation data
|
||||
const conversationData: ExportedConversations = {
|
||||
conv: this.activeConversation!,
|
||||
messages: this.activeMessages
|
||||
};
|
||||
|
||||
this.triggerDownload(conversationData);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Triggers file download in browser
|
||||
* @param data - Data to download (expected: { conv: DatabaseConversation, messages: DatabaseMessage[] })
|
||||
* @param filename - Optional filename
|
||||
*/
|
||||
private triggerDownload(data: ExportedConversations, filename?: string): void {
|
||||
const conversation =
|
||||
'conv' in data ? data.conv : Array.isArray(data) ? data[0]?.conv : undefined;
|
||||
if (!conversation) {
|
||||
console.error('Invalid data: missing conversation');
|
||||
return;
|
||||
}
|
||||
const conversationName = conversation.name ? conversation.name.trim() : '';
|
||||
const convId = conversation.id || 'unknown';
|
||||
const truncatedSuffix = conversationName
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]/gi, '_')
|
||||
.replace(/_+/g, '_')
|
||||
.substring(0, 20);
|
||||
const downloadFilename = filename || `conversation_${convId}_${truncatedSuffix}.json`;
|
||||
|
||||
const conversationJson = JSON.stringify(data, null, 2);
|
||||
const blob = new Blob([conversationJson], {
|
||||
type: 'application/json'
|
||||
});
|
||||
const url = URL.createObjectURL(blob);
|
||||
const a = document.createElement('a');
|
||||
a.href = url;
|
||||
a.download = downloadFilename;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
document.body.removeChild(a);
|
||||
URL.revokeObjectURL(url);
|
||||
}
|
||||
|
||||
/**
|
||||
* Exports all conversations with their messages as a JSON file
|
||||
*/
|
||||
async exportAllConversations(): Promise<void> {
|
||||
try {
|
||||
const allConversations = await DatabaseStore.getAllConversations();
|
||||
if (allConversations.length === 0) {
|
||||
throw new Error('No conversations to export');
|
||||
}
|
||||
|
||||
const allData: ExportedConversations = await Promise.all(
|
||||
allConversations.map(async (conv) => {
|
||||
const messages = await DatabaseStore.getConversationMessages(conv.id);
|
||||
return { conv, messages };
|
||||
})
|
||||
);
|
||||
|
||||
const blob = new Blob([JSON.stringify(allData, null, 2)], {
|
||||
type: 'application/json'
|
||||
});
|
||||
const url = URL.createObjectURL(blob);
|
||||
const a = document.createElement('a');
|
||||
a.href = url;
|
||||
a.download = `all_conversations_${new Date().toISOString().split('T')[0]}.json`;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
document.body.removeChild(a);
|
||||
URL.revokeObjectURL(url);
|
||||
|
||||
toast.success(`All conversations (${allConversations.length}) prepared for download`);
|
||||
} catch (err) {
|
||||
console.error('Failed to export conversations:', err);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Imports conversations from a JSON file.
|
||||
* Supports both single conversation (object) and multiple conversations (array).
|
||||
* Uses DatabaseStore for safe, encapsulated data access
|
||||
*/
|
||||
async importConversations(): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const input = document.createElement('input');
|
||||
input.type = 'file';
|
||||
input.accept = '.json';
|
||||
|
||||
input.onchange = async (e) => {
|
||||
const file = (e.target as HTMLInputElement)?.files?.[0];
|
||||
if (!file) {
|
||||
reject(new Error('No file selected'));
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const text = await file.text();
|
||||
const parsedData = JSON.parse(text);
|
||||
let importedData: ExportedConversations;
|
||||
|
||||
if (Array.isArray(parsedData)) {
|
||||
importedData = parsedData;
|
||||
} else if (
|
||||
parsedData &&
|
||||
typeof parsedData === 'object' &&
|
||||
'conv' in parsedData &&
|
||||
'messages' in parsedData
|
||||
) {
|
||||
// Single conversation object
|
||||
importedData = [parsedData];
|
||||
} else {
|
||||
throw new Error(
|
||||
'Invalid file format: expected array of conversations or single conversation object'
|
||||
);
|
||||
}
|
||||
|
||||
const result = await DatabaseStore.importConversations(importedData);
|
||||
|
||||
// Refresh UI
|
||||
await this.loadConversations();
|
||||
|
||||
toast.success(`Imported ${result.imported} conversation(s), skipped ${result.skipped}`);
|
||||
|
||||
resolve(undefined);
|
||||
} catch (err: unknown) {
|
||||
const message = err instanceof Error ? err.message : 'Unknown error';
|
||||
console.error('Failed to import conversations:', err);
|
||||
toast.error('Import failed', {
|
||||
description: message
|
||||
});
|
||||
reject(new Error(`Import failed: ${message}`));
|
||||
}
|
||||
};
|
||||
|
||||
input.click();
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes a conversation and all its messages
|
||||
* @param convId - The conversation ID to delete
|
||||
|
|
@ -1427,6 +1589,9 @@ export const isInitialized = () => chatStore.isInitialized;
|
|||
export const maxContextError = () => chatStore.maxContextError;
|
||||
|
||||
export const createConversation = chatStore.createConversation.bind(chatStore);
|
||||
export const downloadConversation = chatStore.downloadConversation.bind(chatStore);
|
||||
export const exportAllConversations = chatStore.exportAllConversations.bind(chatStore);
|
||||
export const importConversations = chatStore.importConversations.bind(chatStore);
|
||||
export const deleteConversation = chatStore.deleteConversation.bind(chatStore);
|
||||
export const sendMessage = chatStore.sendMessage.bind(chatStore);
|
||||
export const gracefulStop = chatStore.gracefulStop.bind(chatStore);
|
||||
|
|
|
|||
|
|
@ -346,4 +346,39 @@ export class DatabaseStore {
|
|||
): Promise<void> {
|
||||
await db.messages.update(id, updates);
|
||||
}
|
||||
|
||||
/**
|
||||
* Imports multiple conversations and their messages.
|
||||
* Skips conversations that already exist.
|
||||
*
|
||||
* @param data - Array of { conv, messages } objects
|
||||
*/
|
||||
static async importConversations(
|
||||
data: { conv: DatabaseConversation; messages: DatabaseMessage[] }[]
|
||||
): Promise<{ imported: number; skipped: number }> {
|
||||
let importedCount = 0;
|
||||
let skippedCount = 0;
|
||||
|
||||
return await db.transaction('rw', [db.conversations, db.messages], async () => {
|
||||
for (const item of data) {
|
||||
const { conv, messages } = item;
|
||||
|
||||
const existing = await db.conversations.get(conv.id);
|
||||
if (existing) {
|
||||
console.warn(`Conversation "${conv.name}" already exists, skipping...`);
|
||||
skippedCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
await db.conversations.add(conv);
|
||||
for (const msg of messages) {
|
||||
await db.messages.put(msg);
|
||||
}
|
||||
|
||||
importedCount++;
|
||||
}
|
||||
|
||||
return { imported: importedCount, skipped: skippedCount };
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
15
tools/server/webui/src/lib/types/database.d.ts
vendored
15
tools/server/webui/src/lib/types/database.d.ts
vendored
|
|
@ -54,3 +54,18 @@ export interface DatabaseMessage {
|
|||
timings?: ChatMessageTimings;
|
||||
model?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a single conversation with its associated messages,
|
||||
* typically used for import/export operations.
|
||||
*/
|
||||
export type ExportedConversation = {
|
||||
conv: DatabaseConversation;
|
||||
messages: DatabaseMessage[];
|
||||
};
|
||||
|
||||
/**
|
||||
* Type representing one or more exported conversations.
|
||||
* Can be a single conversation object or an array of them.
|
||||
*/
|
||||
export type ExportedConversations = ExportedConversation | ExportedConversation[];
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue