Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.github/workflows/build-linux-cross.yml
#	.github/workflows/build.yml
#	CODEOWNERS
#	ggml/CMakeLists.txt
#	ggml/src/ggml-cuda/fattn.cu
#	ggml/src/ggml-webgpu/CMakeLists.txt
#	ggml/src/ggml-webgpu/ggml-webgpu.cpp
#	ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.tmpl.wgsl
#	tests/test-backend-ops.cpp
#	tests/test-chat-template.cpp
#	tools/llama-bench/llama-bench.cpp
#	tools/rpc/README.md
#	tools/server/README.md
This commit is contained in:
Concedo 2025-10-09 01:33:27 +08:00
commit b6f6338bba
32 changed files with 1556 additions and 636 deletions

Binary file not shown.

View file

@ -1937,7 +1937,7 @@ private:
void cleanup_pending_task(int id_target) {
// no need lock because this is called exclusively by post()
auto rm_func = [id_target](const server_task & task) {
return task.id_target == id_target;
return task.id == id_target;
};
queue_tasks.erase(
std::remove_if(queue_tasks.begin(), queue_tasks.end(), rm_func),
@ -3676,6 +3676,20 @@ struct server_context {
alora_disabled_id = enabled_loras[0];
}
bool do_checkpoint = params_base.n_ctx_checkpoints > 0;
// make a checkpoint of the parts of the memory that cannot be rolled back.
// checkpoints are created only if:
// - the model uses SWA and we are not using `swa_full`
// - the model architecture is marked as recurrent or hybrid
//
// TODO: try to make this conditional on the context or the memory module, instead of the model type
do_checkpoint = do_checkpoint && (
llama_model_is_recurrent(model) ||
llama_model_is_hybrid(model) ||
(llama_model_n_swa(model) > 0 && !params_base.swa_full)
);
// add prompt tokens for processing in the current batch
while (slot.n_past < slot.n_prompt_tokens && batch.n_tokens < n_batch) {
// get next token to process
@ -3700,6 +3714,11 @@ struct server_context {
slot.n_prompt_tokens_processed++;
slot.n_past++;
// process the last few tokens of the prompt separately in order to allow for a checkpoint to be created.
if (do_checkpoint && slot.n_prompt_tokens - slot.n_past == 64) {
break;
}
}
// SLT_INF(slot, "new cache_tokens: %s\n", slot.cache_tokens.str().c_str());
@ -3730,6 +3749,39 @@ struct server_context {
slot.i_batch = batch.n_tokens - 1;
SLT_INF(slot, "prompt done, n_past = %d, n_tokens = %d\n", slot.n_past, batch.n_tokens);
const auto pos_min = llama_memory_seq_pos_min(llama_get_memory(ctx), slot.id);
const auto pos_max = llama_memory_seq_pos_max(llama_get_memory(ctx), slot.id);
// no need for empty or small checkpoints
do_checkpoint = do_checkpoint && (pos_min >= 0 && pos_max >= 64);
// no need to create checkpoints that are too close together
do_checkpoint = do_checkpoint && (slot.ctx_checkpoints.empty() || pos_max > slot.ctx_checkpoints.back().pos_max + 64);
if (do_checkpoint) {
while (slot.ctx_checkpoints.size() >= (size_t) params_base.n_ctx_checkpoints) {
// make room for the new checkpoint, if needed
const auto & cur = slot.ctx_checkpoints.front();
SLT_WRN(slot, "erasing old context checkpoint (pos_min = %d, pos_max = %d, size = %.3f MiB)\n",
cur.pos_min, cur.pos_max, (float) cur.data.size() / 1024 / 1024);
slot.ctx_checkpoints.erase(slot.ctx_checkpoints.begin());
}
const size_t checkpoint_size = llama_state_seq_get_size_ext(ctx, slot.id, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);
auto & cur = slot.ctx_checkpoints.emplace_back(ctx_checkpoint{
/*.pos_min = */ pos_min,
/*.pos_max = */ pos_max,
/*.data = */ std::vector<uint8_t>(checkpoint_size),
});
llama_state_seq_get_data_ext(ctx, cur.data.data(), checkpoint_size, slot.id, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);
SLT_WRN(slot, "saved context checkpoint %d of %d (pos_min = %d, pos_max = %d, size = %.3f MiB)\n",
(int) slot.ctx_checkpoints.size(), params_base.n_ctx_checkpoints, cur.pos_min, cur.pos_max, (float) cur.data.size() / 1024 / 1024);
}
}
}
@ -3853,40 +3905,6 @@ struct server_context {
// prompt evaluated for next-token prediction
slot.state = SLOT_STATE_GENERATING;
// make a checkpoint of the parts of the memory that cannot be rolled back.
// checkpoints are created only if:
// - the model uses SWA and we are not using `swa_full`
// - the model architecture is marked as recurrent or hybrid
//
// TODO: try to make this conditional on the context or the memory module, instead of the model type
const bool do_checkpoint =
(llama_model_is_recurrent(model) || llama_model_is_hybrid(model)) ||
(llama_model_n_swa(model) > 0 && !params_base.swa_full);
if (do_checkpoint && params_base.n_ctx_checkpoints > 0) {
while (slot.ctx_checkpoints.size() >= (size_t) params_base.n_ctx_checkpoints) {
// make room for the new checkpoint, if needed
const auto & cur = slot.ctx_checkpoints.front();
SLT_WRN(slot, "erasing old context checkpoint (pos_min = %d, pos_max = %d, size = %.3f MiB)\n",
cur.pos_min, cur.pos_max, (float) cur.data.size() / 1024 / 1024);
slot.ctx_checkpoints.erase(slot.ctx_checkpoints.begin());
}
const size_t checkpoint_size = llama_state_seq_get_size_ext(ctx, slot.id, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);
auto & cur = slot.ctx_checkpoints.emplace_back(ctx_checkpoint{
/*.pos_min = */ llama_memory_seq_pos_min(llama_get_memory(ctx), slot.id),
/*.pos_max = */ llama_memory_seq_pos_max(llama_get_memory(ctx), slot.id),
/*.data = */ std::vector<uint8_t>(checkpoint_size),
});
llama_state_seq_get_data_ext(ctx, cur.data.data(), checkpoint_size, slot.id, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);
SLT_WRN(slot, "saved context checkpoint %d of %d (pos_min = %d, pos_max = %d, size = %.3f MiB)\n",
(int) slot.ctx_checkpoints.size(), params_base.n_ctx_checkpoints, cur.pos_min, cur.pos_max, (float) cur.data.size() / 1024 / 1024);
}
} else if (slot.state != SLOT_STATE_GENERATING) {
continue; // continue loop of slots
}
@ -4184,6 +4202,7 @@ int main(int argc, char ** argv) {
auto middleware_validate_api_key = [&params, &res_error](const httplib::Request & req, httplib::Response & res) {
static const std::unordered_set<std::string> public_endpoints = {
"/health",
"/v1/health",
"/models",
"/v1/models",
"/api/tags"
@ -5232,6 +5251,7 @@ int main(int argc, char ** argv) {
// register API routes
svr->Get (params.api_prefix + "/health", handle_health); // public endpoint (no API key check)
svr->Get (params.api_prefix + "/v1/health", handle_health); // public endpoint (no API key check)
svr->Get (params.api_prefix + "/metrics", handle_metrics);
svr->Get (params.api_prefix + "/props", handle_props);
svr->Post(params.api_prefix + "/props", handle_props_change);

View file

@ -1,8 +1,9 @@
<script lang="ts">
import { Search, SquarePen, X } from '@lucide/svelte';
import { Search, SquarePen, X, Download, Upload } from '@lucide/svelte';
import { KeyboardShortcutInfo } from '$lib/components/app';
import { Button } from '$lib/components/ui/button';
import { Input } from '$lib/components/ui/input';
import { exportAllConversations, importConversations } from '$lib/stores/chat.svelte';
interface Props {
handleMobileSidebarItemClick: () => void;
@ -77,5 +78,34 @@
<KeyboardShortcutInfo keys={['cmd', 'k']} />
</Button>
<Button
class="w-full justify-start text-sm"
onclick={() => {
importConversations().catch((err) => {
console.error('Import failed:', err);
// Optional: show toast or dialog
});
}}
variant="ghost"
>
<div class="flex items-center gap-2">
<Upload class="h-4 w-4" />
Import conversations
</div>
</Button>
<Button
class="w-full justify-start text-sm"
onclick={() => {
exportAllConversations();
}}
variant="ghost"
>
<div class="flex items-center gap-2">
<Download class="h-4 w-4" />
Export all conversations
</div>
</Button>
{/if}
</div>

View file

@ -1,6 +1,7 @@
<script lang="ts">
import { Trash2, Pencil, MoreHorizontal } from '@lucide/svelte';
import { Trash2, Pencil, MoreHorizontal, Download } from '@lucide/svelte';
import { ActionDropdown } from '$lib/components/app';
import { downloadConversation } from '$lib/stores/chat.svelte';
import { onMount } from 'svelte';
interface Props {
@ -101,6 +102,15 @@
onclick: handleEdit,
shortcut: ['shift', 'cmd', 'e']
},
{
icon: Download,
label: 'Export',
onclick: (e) => {
e.stopPropagation();
downloadConversation(conversation.id);
},
shortcut: ['shift', 'cmd', 's']
},
{
icon: Trash2,
label: 'Delete',

View file

@ -6,6 +6,8 @@ import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/u
import { browser } from '$app/environment';
import { goto } from '$app/navigation';
import { extractPartialThinking } from '$lib/utils/thinking';
import { toast } from 'svelte-sonner';
import type { ExportedConversations } from '$lib/types/database';
/**
* ChatStore - Central state management for chat conversations and AI interactions
@ -951,6 +953,166 @@ class ChatStore {
}
}
/**
* Downloads a conversation as JSON file
* @param convId - The conversation ID to download
*/
async downloadConversation(convId: string): Promise<void> {
if (!this.activeConversation || this.activeConversation.id !== convId) {
// Load the conversation if not currently active
const conversation = await DatabaseStore.getConversation(convId);
if (!conversation) return;
const messages = await DatabaseStore.getConversationMessages(convId);
const conversationData = {
conv: conversation,
messages
};
this.triggerDownload(conversationData);
} else {
// Use current active conversation data
const conversationData: ExportedConversations = {
conv: this.activeConversation!,
messages: this.activeMessages
};
this.triggerDownload(conversationData);
}
}
/**
* Triggers file download in browser
* @param data - Data to download (expected: { conv: DatabaseConversation, messages: DatabaseMessage[] })
* @param filename - Optional filename
*/
private triggerDownload(data: ExportedConversations, filename?: string): void {
const conversation =
'conv' in data ? data.conv : Array.isArray(data) ? data[0]?.conv : undefined;
if (!conversation) {
console.error('Invalid data: missing conversation');
return;
}
const conversationName = conversation.name ? conversation.name.trim() : '';
const convId = conversation.id || 'unknown';
const truncatedSuffix = conversationName
.toLowerCase()
.replace(/[^a-z0-9]/gi, '_')
.replace(/_+/g, '_')
.substring(0, 20);
const downloadFilename = filename || `conversation_${convId}_${truncatedSuffix}.json`;
const conversationJson = JSON.stringify(data, null, 2);
const blob = new Blob([conversationJson], {
type: 'application/json'
});
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = downloadFilename;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
}
/**
* Exports all conversations with their messages as a JSON file
*/
async exportAllConversations(): Promise<void> {
try {
const allConversations = await DatabaseStore.getAllConversations();
if (allConversations.length === 0) {
throw new Error('No conversations to export');
}
const allData: ExportedConversations = await Promise.all(
allConversations.map(async (conv) => {
const messages = await DatabaseStore.getConversationMessages(conv.id);
return { conv, messages };
})
);
const blob = new Blob([JSON.stringify(allData, null, 2)], {
type: 'application/json'
});
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = `all_conversations_${new Date().toISOString().split('T')[0]}.json`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
toast.success(`All conversations (${allConversations.length}) prepared for download`);
} catch (err) {
console.error('Failed to export conversations:', err);
throw err;
}
}
/**
* Imports conversations from a JSON file.
* Supports both single conversation (object) and multiple conversations (array).
* Uses DatabaseStore for safe, encapsulated data access
*/
async importConversations(): Promise<void> {
return new Promise((resolve, reject) => {
const input = document.createElement('input');
input.type = 'file';
input.accept = '.json';
input.onchange = async (e) => {
const file = (e.target as HTMLInputElement)?.files?.[0];
if (!file) {
reject(new Error('No file selected'));
return;
}
try {
const text = await file.text();
const parsedData = JSON.parse(text);
let importedData: ExportedConversations;
if (Array.isArray(parsedData)) {
importedData = parsedData;
} else if (
parsedData &&
typeof parsedData === 'object' &&
'conv' in parsedData &&
'messages' in parsedData
) {
// Single conversation object
importedData = [parsedData];
} else {
throw new Error(
'Invalid file format: expected array of conversations or single conversation object'
);
}
const result = await DatabaseStore.importConversations(importedData);
// Refresh UI
await this.loadConversations();
toast.success(`Imported ${result.imported} conversation(s), skipped ${result.skipped}`);
resolve(undefined);
} catch (err: unknown) {
const message = err instanceof Error ? err.message : 'Unknown error';
console.error('Failed to import conversations:', err);
toast.error('Import failed', {
description: message
});
reject(new Error(`Import failed: ${message}`));
}
};
input.click();
});
}
/**
* Deletes a conversation and all its messages
* @param convId - The conversation ID to delete
@ -1427,6 +1589,9 @@ export const isInitialized = () => chatStore.isInitialized;
export const maxContextError = () => chatStore.maxContextError;
export const createConversation = chatStore.createConversation.bind(chatStore);
export const downloadConversation = chatStore.downloadConversation.bind(chatStore);
export const exportAllConversations = chatStore.exportAllConversations.bind(chatStore);
export const importConversations = chatStore.importConversations.bind(chatStore);
export const deleteConversation = chatStore.deleteConversation.bind(chatStore);
export const sendMessage = chatStore.sendMessage.bind(chatStore);
export const gracefulStop = chatStore.gracefulStop.bind(chatStore);

View file

@ -346,4 +346,39 @@ export class DatabaseStore {
): Promise<void> {
await db.messages.update(id, updates);
}
/**
* Imports multiple conversations and their messages.
* Skips conversations that already exist.
*
* @param data - Array of { conv, messages } objects
*/
static async importConversations(
data: { conv: DatabaseConversation; messages: DatabaseMessage[] }[]
): Promise<{ imported: number; skipped: number }> {
let importedCount = 0;
let skippedCount = 0;
return await db.transaction('rw', [db.conversations, db.messages], async () => {
for (const item of data) {
const { conv, messages } = item;
const existing = await db.conversations.get(conv.id);
if (existing) {
console.warn(`Conversation "${conv.name}" already exists, skipping...`);
skippedCount++;
continue;
}
await db.conversations.add(conv);
for (const msg of messages) {
await db.messages.put(msg);
}
importedCount++;
}
return { imported: importedCount, skipped: skippedCount };
});
}
}

View file

@ -54,3 +54,18 @@ export interface DatabaseMessage {
timings?: ChatMessageTimings;
model?: string;
}
/**
* Represents a single conversation with its associated messages,
* typically used for import/export operations.
*/
export type ExportedConversation = {
conv: DatabaseConversation;
messages: DatabaseMessage[];
};
/**
* Type representing one or more exported conversations.
* Can be a single conversation object or an array of them.
*/
export type ExportedConversations = ExportedConversation | ExportedConversation[];