diff --git a/common/arg.cpp b/common/arg.cpp index f9434bf70..0c5e9b651 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -774,6 +774,11 @@ bool common_params_to_map(int argc, char ** argv, llama_example ex, std::map=,...", + {"-ot", "--override-tensor"}, "=,...", "override tensor buffer type", [](common_params & params, const std::string & value) { parse_tensor_buffer_overrides(value, params.tensor_buft_overrides); } )); add_opt(common_arg( - {"--override-tensor-draft", "-otd"}, "=,...", + {"-otd", "--override-tensor-draft"}, "=,...", "override tensor buffer type for draft model", [](common_params & params, const std::string & value) { parse_tensor_buffer_overrides(value, params.speculative.tensor_buft_overrides); } ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI})); add_opt(common_arg( - {"--cpu-moe", "-cmoe"}, + {"-cmoe", "--cpu-moe"}, "keep all Mixture of Experts (MoE) weights in the CPU", [](common_params & params) { params.tensor_buft_overrides.push_back(llm_ffn_exps_cpu_override()); } ).set_env("LLAMA_ARG_CPU_MOE")); add_opt(common_arg( - {"--n-cpu-moe", "-ncmoe"}, "N", + {"-ncmoe", "--n-cpu-moe"}, "N", "keep the Mixture of Experts (MoE) weights of the first N layers in the CPU", [](common_params & params, int value) { if (value < 0) { @@ -2109,14 +2114,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex } ).set_env("LLAMA_ARG_N_CPU_MOE")); add_opt(common_arg( - {"--cpu-moe-draft", "-cmoed"}, + {"-cmoed", "--cpu-moe-draft"}, "keep all Mixture of Experts (MoE) weights in the CPU for the draft model", [](common_params & params) { params.speculative.tensor_buft_overrides.push_back(llm_ffn_exps_cpu_override()); } ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}).set_env("LLAMA_ARG_CPU_MOE_DRAFT")); add_opt(common_arg( - {"--n-cpu-moe-draft", "-ncmoed"}, "N", + {"-ncmoed", "--n-cpu-moe-draft"}, "N", "keep the Mixture of Experts (MoE) weights of the first N layers in the CPU for the draft model", [](common_params & params, int value) { if (value < 0) { @@ -2644,7 +2649,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex } ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_EMBEDDINGS")); add_opt(common_arg( - {"--reranking", "--rerank"}, + {"--rerank", "--reranking"}, string_format("enable reranking endpoint on server (default: %s)", "disabled"), [](common_params & params) { params.embedding = true; @@ -3115,7 +3120,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex } ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); add_opt(common_arg( - {"--draft-max", "--draft", "--draft-n"}, "N", + {"--draft", "--draft-n", "--draft-max"}, "N", string_format("number of tokens to draft for speculative decoding (default: %d)", params.speculative.n_max), [](common_params & params, int value) { params.speculative.n_max = value; diff --git a/common/preset.cpp b/common/preset.cpp index 60746aad5..1aa9864d0 100644 --- a/common/preset.cpp +++ b/common/preset.cpp @@ -2,6 +2,7 @@ #include "preset.h" #include "peg-parser.h" #include "log.h" +#include "download.h" #include #include @@ -15,9 +16,13 @@ static std::string rm_leading_dashes(const std::string & str) { return str.substr(pos); } -std::vector common_preset::to_args() const { +std::vector common_preset::to_args(const std::string & bin_path) const { std::vector args; + if (!bin_path.empty()) { + args.push_back(bin_path); + } + for (const auto & [opt, value] : options) { args.push_back(opt.args.back()); // use the last arg as the main arg if (opt.value_hint == nullptr && opt.value_hint_2 == nullptr) { @@ -63,6 +68,52 @@ std::string common_preset::to_ini() const { return ss.str(); } +void common_preset::set_option(const common_preset_context & ctx, const std::string & env, const std::string & value) { + // try if option exists, update it + for (auto & [opt, val] : options) { + if (opt.env && env == opt.env) { + val = value; + return; + } + } + // if option does not exist, we need to add it + if (ctx.key_to_opt.find(env) == ctx.key_to_opt.end()) { + throw std::runtime_error(string_format( + "%s: option with env '%s' not found in ctx_params", + __func__, env.c_str() + )); + } + options[ctx.key_to_opt.at(env)] = value; +} + +void common_preset::unset_option(const std::string & env) { + for (auto it = options.begin(); it != options.end(); ) { + const common_arg & opt = it->first; + if (opt.env && env == opt.env) { + it = options.erase(it); + return; + } else { + ++it; + } + } +} + +bool common_preset::get_option(const std::string & env, std::string & value) const { + for (const auto & [opt, val] : options) { + if (opt.env && env == opt.env) { + value = val; + return true; + } + } + return false; +} + +void common_preset::merge(const common_preset & other) { + for (const auto & [opt, val] : other.options) { + options[opt] = val; // overwrite existing options + } +} + static std::map> parse_ini_from_file(const std::string & path) { std::map> parsed; @@ -172,9 +223,12 @@ static std::string parse_bool_arg(const common_arg & arg, const std::string & ke return value; } -common_presets common_presets_load(const std::string & path, common_params_context & ctx_params) { +common_preset_context::common_preset_context(llama_example ex) + : ctx_params(common_params_parser_init(default_params, ex)), + key_to_opt(get_map_key_opt(ctx_params)) {} + +common_presets common_preset_context::load_from_ini(const std::string & path, common_preset & global) const { common_presets out; - auto key_to_opt = get_map_key_opt(ctx_params); auto ini_data = parse_ini_from_file(path); for (auto section : ini_data) { @@ -188,7 +242,7 @@ common_presets common_presets_load(const std::string & path, common_params_conte for (const auto & [key, value] : section.second) { LOG_DBG("option: %s = %s\n", key.c_str(), value.c_str()); if (key_to_opt.find(key) != key_to_opt.end()) { - auto & opt = key_to_opt[key]; + const auto & opt = key_to_opt.at(key); if (is_bool_arg(opt)) { preset.options[opt] = parse_bool_arg(opt, key, value); } else { @@ -199,8 +253,137 @@ common_presets common_presets_load(const std::string & path, common_params_conte // TODO: maybe warn about unknown key? } } + + if (preset.name == "*") { + // handle global preset + global = preset; + } else { + out[preset.name] = preset; + } + } + + return out; +} + +common_presets common_preset_context::load_from_cache() const { + common_presets out; + + auto cached_models = common_list_cached_models(); + for (const auto & model : cached_models) { + common_preset preset; + preset.name = model.to_string(); + preset.set_option(*this, "LLAMA_ARG_HF_REPO", model.to_string()); out[preset.name] = preset; } return out; } + +struct local_model { + std::string name; + std::string path; + std::string path_mmproj; +}; + +common_presets common_preset_context::load_from_models_dir(const std::string & models_dir) const { + if (!std::filesystem::exists(models_dir) || !std::filesystem::is_directory(models_dir)) { + throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", models_dir.c_str())); + } + + std::vector models; + auto scan_subdir = [&models](const std::string & subdir_path, const std::string & name) { + auto files = fs_list(subdir_path, false); + common_file_info model_file; + common_file_info first_shard_file; + common_file_info mmproj_file; + for (const auto & file : files) { + if (string_ends_with(file.name, ".gguf")) { + if (file.name.find("mmproj") != std::string::npos) { + mmproj_file = file; + } else if (file.name.find("-00001-of-") != std::string::npos) { + first_shard_file = file; + } else { + model_file = file; + } + } + } + // single file model + local_model model{ + /* name */ name, + /* path */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path, + /* path_mmproj */ mmproj_file.path // can be empty + }; + if (!model.path.empty()) { + models.push_back(model); + } + }; + + auto files = fs_list(models_dir, true); + for (const auto & file : files) { + if (file.is_dir) { + scan_subdir(file.path, file.name); + } else if (string_ends_with(file.name, ".gguf")) { + // single file model + std::string name = file.name; + string_replace_all(name, ".gguf", ""); + local_model model{ + /* name */ name, + /* path */ file.path, + /* path_mmproj */ "" + }; + models.push_back(model); + } + } + + // convert local models to presets + common_presets out; + for (const auto & model : models) { + common_preset preset; + preset.name = model.name; + preset.set_option(*this, "LLAMA_ARG_MODEL", model.path); + if (!model.path_mmproj.empty()) { + preset.set_option(*this, "LLAMA_ARG_MMPROJ", model.path_mmproj); + } + out[preset.name] = preset; + } + + return out; +} + +common_preset common_preset_context::load_from_args(int argc, char ** argv) const { + common_preset preset; + preset.name = COMMON_PRESET_DEFAULT_NAME; + + bool ok = common_params_to_map(argc, argv, ctx_params.ex, preset.options); + if (!ok) { + throw std::runtime_error("failed to parse CLI arguments into preset"); + } + + return preset; +} + +common_presets common_preset_context::cascade(const common_presets & base, const common_presets & added) const { + common_presets out = base; // copy + for (const auto & [name, preset_added] : added) { + if (out.find(name) != out.end()) { + // if exists, merge + common_preset & target = out[name]; + target.merge(preset_added); + } else { + // otherwise, add directly + out[name] = preset_added; + } + } + return out; +} + +common_presets common_preset_context::cascade(const common_preset & base, const common_presets & presets) const { + common_presets out; + for (const auto & [name, preset] : presets) { + common_preset tmp = base; // copy + tmp.name = name; + tmp.merge(preset); + out[name] = std::move(tmp); + } + return out; +} diff --git a/common/preset.h b/common/preset.h index dceb849eb..3a84d1be2 100644 --- a/common/preset.h +++ b/common/preset.h @@ -13,20 +13,62 @@ constexpr const char * COMMON_PRESET_DEFAULT_NAME = "default"; +struct common_preset_context; + struct common_preset { std::string name; - // TODO: support repeated args in the future + + // options are stored as common_arg to string mapping, representing CLI arg and its value std::map options; // convert preset to CLI argument list - std::vector to_args() const; + std::vector to_args(const std::string & bin_path = "") const; // convert preset to INI format string std::string to_ini() const; // TODO: maybe implement to_env() if needed + + // modify preset options where argument is identified by its env variable + void set_option(const common_preset_context & ctx, const std::string & env, const std::string & value); + + // unset option by its env variable + void unset_option(const std::string & env); + + // get option value by its env variable, return false if not found + bool get_option(const std::string & env, std::string & value) const; + + // merge another preset into this one, overwriting existing options + void merge(const common_preset & other); }; // interface for multiple presets in one file using common_presets = std::map; -common_presets common_presets_load(const std::string & path, common_params_context & ctx_params); + +// context for loading and editing presets +struct common_preset_context { + common_params default_params; // unused for now + common_params_context ctx_params; + std::map key_to_opt; + common_preset_context(llama_example ex); + + // load presets from INI file + common_presets load_from_ini(const std::string & path, common_preset & global) const; + + // generate presets from cached models + common_presets load_from_cache() const; + + // generate presets from local models directory + // for the directory structure, see "Using multiple models" in server/README.md + common_presets load_from_models_dir(const std::string & models_dir) const; + + // generate one preset from CLI arguments + common_preset load_from_args(int argc, char ** argv) const; + + // cascade multiple presets if exist on both: base < added + // if preset does not exist in base, it will be added without modification + common_presets cascade(const common_presets & base, const common_presets & added) const; + + // apply presets over a base preset (same idea as CSS cascading) + common_presets cascade(const common_preset & base, const common_presets & presets) const; +}; diff --git a/docs/android/imported-into-android-studio.jpg b/docs/android/imported-into-android-studio.jpg new file mode 100644 index 000000000..bbe6867c6 Binary files /dev/null and b/docs/android/imported-into-android-studio.jpg differ diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index a948570e2..2f54905bb 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -1544,6 +1544,8 @@ private: #endif // GGML_VULKAN_MEMORY_DEBUG static bool vk_perf_logger_enabled = false; +static bool vk_perf_logger_concurrent = false; +static bool vk_enable_sync_logger = false; // number of calls between perf logger prints static uint32_t vk_perf_logger_frequency = 1; @@ -1594,14 +1596,14 @@ class vk_perf_logger { flops.clear(); } - void log_timing(const ggml_tensor * node, const char *fusion_name, uint64_t time) { + std::string get_node_fusion_name(const ggml_tensor * node, const char *fusion_name, uint64_t *n_flops) { + *n_flops = 0; std::string fusion_str; if (fusion_name) { fusion_str = fusion_name + std::string(" "); } if (node->op == GGML_OP_UNARY) { - timings[fusion_str + ggml_unary_op_name(ggml_get_unary_op(node))].push_back(time); - return; + return fusion_str + ggml_unary_op_name(ggml_get_unary_op(node)); } if (node->op == GGML_OP_MUL_MAT || node->op == GGML_OP_MUL_MAT_ID) { const uint64_t m = node->ne[0]; @@ -1623,9 +1625,8 @@ class vk_perf_logger { name += " batch=" + std::to_string(batch); } name = fusion_str + name; - timings[name].push_back(time); - flops[name].push_back(m * n * (k + (k - 1)) * batch); - return; + *n_flops = m * n * (k + (k - 1)) * batch; + return name; } if (node->op == GGML_OP_CONV_2D || node->op == GGML_OP_CONV_TRANSPOSE_2D) { std::string name = ggml_op_name(node->op); @@ -1641,20 +1642,17 @@ class vk_perf_logger { uint64_t size_M = Cout; uint64_t size_K = Cin * KW * KH; uint64_t size_N = N * OW * OH; - uint64_t n_flops = size_M * size_N * (size_K + (size_K - 1)); + *n_flops = size_M * size_N * (size_K + (size_K - 1)); name += " M=Cout=" + std::to_string(size_M) + ", K=Cin*KW*KH=" + std::to_string(size_K) + ", N=N*OW*OH=" + std::to_string(size_N); name = fusion_str + name; - flops[name].push_back(n_flops); - timings[name].push_back(time); - return; + return name; } if (node->op == GGML_OP_RMS_NORM) { std::string name = ggml_op_name(node->op); name += "(" + std::to_string(node->ne[0]) + "," + std::to_string(node->ne[1]) + "," + std::to_string(node->ne[2]) + "," + std::to_string(node->ne[3]) + ")"; name = fusion_str + name; - timings[name].push_back(time); - return; + return name; } if (node->op == GGML_OP_FLASH_ATTN_EXT) { const ggml_tensor * dst = node; @@ -1670,8 +1668,7 @@ class vk_perf_logger { " k(" << k->ne[0] << "," << k->ne[1] << "," << k->ne[2] << "," << k->ne[3] << "), " << " v(" << v->ne[0] << "," << v->ne[1] << "," << v->ne[2] << "," << v->ne[3] << "), " << " m(" << (m?m->ne[0]:0) << "," << (m?m->ne[1]:0) << "," << (m?m->ne[2]:0) << "," << (m?m->ne[3]:0) << ")"; - timings[name.str()].push_back(time); - return; + return name.str(); } if (node->op == GGML_OP_TOP_K) { std::stringstream name; @@ -1679,11 +1676,38 @@ class vk_perf_logger { name << ggml_op_name(node->op) << " K=" << node->ne[0] << " (" << node->src[0]->ne[0] << "," << node->src[0]->ne[1] << "," << node->src[0]->ne[2] << "," << node->src[0]->ne[3] << ")"; - timings[name.str()].push_back(time); - return; + return name.str(); } - timings[fusion_str + ggml_op_name(node->op)].push_back(time); + return fusion_str + ggml_op_name(node->op); } + + void log_timing(const ggml_tensor * node, const char *fusion_name, uint64_t time) { + uint64_t n_flops; + std::string name = get_node_fusion_name(node, fusion_name, &n_flops); + if (n_flops) { + flops[name].push_back(n_flops); + } + timings[name].push_back(time); + } + + void log_timing(const std::vector &nodes, const std::vector &names, uint64_t time) { + uint64_t total_flops = 0; + std::string name; + for (size_t n = 0; n < nodes.size(); ++n) { + uint64_t n_flops = 0; + name += get_node_fusion_name(nodes[n], names[n], &n_flops); + total_flops += n_flops; + + if (n != nodes.size() - 1) { + name += ", "; + } + } + if (total_flops) { + flops[name].push_back(total_flops); + } + timings[name].push_back(time); + } + private: std::map> timings; std::map> flops; @@ -1746,7 +1770,9 @@ struct ggml_backend_vk_context { std::unique_ptr perf_logger; vk::QueryPool query_pool; std::vector query_fusion_names; + std::vector query_fusion_node_count; std::vector query_nodes; + std::vector query_node_idx; int32_t num_queries {}; int32_t query_idx {}; }; @@ -5225,6 +5251,8 @@ static void ggml_vk_instance_init() { } vk_perf_logger_enabled = getenv("GGML_VK_PERF_LOGGER") != nullptr; + vk_perf_logger_concurrent = getenv("GGML_VK_PERF_LOGGER_CONCURRENT") != nullptr; + vk_enable_sync_logger = getenv("GGML_VK_SYNC_LOGGER") != nullptr; const char* GGML_VK_PERF_LOGGER_FREQUENCY = getenv("GGML_VK_PERF_LOGGER_FREQUENCY"); if (GGML_VK_PERF_LOGGER_FREQUENCY != nullptr) { @@ -11857,15 +11885,18 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgr } } -#define ENABLE_SYNC_LOGGING 0 - if (need_sync) { -#if ENABLE_SYNC_LOGGING - std::cerr << "sync" << std::endl; -#endif + if (vk_enable_sync_logger) { + std::cerr << "sync" << std::endl; + } ctx->unsynced_nodes_written.clear(); ctx->unsynced_nodes_read.clear(); ggml_vk_sync_buffers(ctx, compute_ctx); + + if (vk_perf_logger_enabled && vk_perf_logger_concurrent) { + ctx->query_node_idx[ctx->query_idx] = node_idx; + compute_ctx->s->buffer.writeTimestamp(vk::PipelineStageFlagBits::eAllCommands, ctx->query_pool, ctx->query_idx++); + } } // Add all fused nodes to the unsynchronized lists. for (int32_t i = 0; i < ctx->num_additional_fused_ops + 1; ++i) { @@ -11882,20 +11913,20 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgr } } } -#if ENABLE_SYNC_LOGGING - for (int i = 0; i < ctx->num_additional_fused_ops + 1; ++i) { - auto *n = cgraph->nodes[node_idx + i]; - std::cerr << node_idx + i << " " << ggml_op_name(n->op) << " " << n->name; - if (n->op == GGML_OP_GLU) { - std::cerr << " " << ggml_glu_op_name(ggml_get_glu_op(n)) << " " << (n->src[1] ? "split" : "single") << " "; + if (vk_enable_sync_logger) { + for (int i = 0; i < ctx->num_additional_fused_ops + 1; ++i) { + auto *n = cgraph->nodes[node_idx + i]; + std::cerr << node_idx + i << " " << ggml_op_name(n->op) << " " << n->name; + if (n->op == GGML_OP_GLU) { + std::cerr << " " << ggml_glu_op_name(ggml_get_glu_op(n)) << " " << (n->src[1] ? "split" : "single") << " "; + } + if (n->op == GGML_OP_ROPE) { + const int mode = ((const int32_t *) n->op_params)[2]; + std::cerr << " rope mode: " << mode; + } + std::cerr << std::endl; } - if (n->op == GGML_OP_ROPE) { - const int mode = ((const int32_t *) n->op_params)[2]; - std::cerr << " rope mode: " << mode; - } - std::cerr << std::endl; } -#endif switch (node->op) { case GGML_OP_REPEAT: @@ -13175,12 +13206,16 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg ctx->query_pool = ctx->device->device.createQueryPool(query_create_info); ctx->num_queries = query_create_info.queryCount; ctx->query_fusion_names.resize(ctx->num_queries); + ctx->query_fusion_node_count.resize(ctx->num_queries); ctx->query_nodes.resize(ctx->num_queries); + ctx->query_node_idx.resize(ctx->num_queries); } ctx->device->device.resetQueryPool(ctx->query_pool, 0, cgraph->n_nodes+1); std::fill(ctx->query_fusion_names.begin(), ctx->query_fusion_names.end(), nullptr); + std::fill(ctx->query_fusion_node_count.begin(), ctx->query_fusion_node_count.end(), 0); std::fill(ctx->query_nodes.begin(), ctx->query_nodes.end(), nullptr); + std::fill(ctx->query_node_idx.begin(), ctx->query_node_idx.end(), 0); GGML_ASSERT(ctx->compute_ctx.expired()); compute_ctx = ggml_vk_create_context(ctx, ctx->compute_cmd_pool); @@ -13309,9 +13344,16 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg } else { compute_ctx = ctx->compute_ctx.lock(); } - ctx->query_nodes[ctx->query_idx] = cgraph->nodes[i]; - ctx->query_fusion_names[ctx->query_idx] = fusion_string; - compute_ctx->s->buffer.writeTimestamp(vk::PipelineStageFlagBits::eAllCommands, ctx->query_pool, ctx->query_idx++); + if (!vk_perf_logger_concurrent) { + // track a single node/fusion for the current query + ctx->query_nodes[ctx->query_idx] = cgraph->nodes[i]; + ctx->query_fusion_names[ctx->query_idx] = fusion_string; + compute_ctx->s->buffer.writeTimestamp(vk::PipelineStageFlagBits::eAllCommands, ctx->query_pool, ctx->query_idx++); + } else { + // track a fusion string and number of fused ops for the current node_idx + ctx->query_fusion_names[i] = fusion_string; + ctx->query_fusion_node_count[i] = ctx->num_additional_fused_ops; + } } if (enqueued) { @@ -13353,12 +13395,32 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg // Get the results and pass them to the logger std::vector timestamps(cgraph->n_nodes + 1); VK_CHECK(ctx->device->device.getQueryPoolResults(ctx->query_pool, 0, ctx->query_idx, (cgraph->n_nodes + 1)*sizeof(uint64_t), timestamps.data(), sizeof(uint64_t), vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait), "get timestamp results"); - for (int i = 1; i < ctx->query_idx; i++) { - auto node = ctx->query_nodes[i]; - auto name = ctx->query_fusion_names[i]; - ctx->perf_logger->log_timing(node, name, uint64_t((timestamps[i] - timestamps[i-1]) * ctx->device->properties.limits.timestampPeriod)); + if (!vk_perf_logger_concurrent) { + // Log each op separately + for (int i = 1; i < ctx->query_idx; i++) { + auto node = ctx->query_nodes[i]; + auto name = ctx->query_fusion_names[i]; + ctx->perf_logger->log_timing(node, name, uint64_t((timestamps[i] - timestamps[i-1]) * ctx->device->properties.limits.timestampPeriod)); + } + } else { + // Log each group of nodes + int prev_node_idx = 0; + for (int i = 1; i < ctx->query_idx; i++) { + auto cur_node_idx = ctx->query_node_idx[i]; + std::vector nodes; + std::vector names; + for (int node_idx = prev_node_idx; node_idx < cur_node_idx; ++node_idx) { + if (ggml_op_is_empty(cgraph->nodes[node_idx]->op)) { + continue; + } + nodes.push_back(cgraph->nodes[node_idx]); + names.push_back(ctx->query_fusion_names[node_idx]); + node_idx += ctx->query_fusion_node_count[node_idx]; + } + prev_node_idx = cur_node_idx; + ctx->perf_logger->log_timing(nodes, names, uint64_t((timestamps[i] - timestamps[i-1]) * ctx->device->properties.limits.timestampPeriod)); + } } - ctx->perf_logger->print_timings(); } diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp index 966e82d00..b64549f56 100644 --- a/src/llama-model-loader.cpp +++ b/src/llama-model-loader.cpp @@ -1109,10 +1109,10 @@ bool llama_model_loader::load_all_data( } else { // If upload_backend is valid load the tensor in chunks to pinned memory and upload the buffers asynchronously to the GPU. if (upload_backend) { - auto offset = (off_t) weight->offs; + size_t offset = weight->offs; alignment = file->read_alignment(); - off_t aligned_offset = offset & ~(alignment - 1); - off_t offset_from_alignment = offset - aligned_offset; + size_t aligned_offset = offset & ~(alignment - 1); + size_t offset_from_alignment = offset - aligned_offset; file->seek(aligned_offset, SEEK_SET); // Calculate aligned read boundaries diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index 9e44f0326..b5266edee 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index def57d025..9228fba9f 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -1974,19 +1974,33 @@ struct server_context_impl { if (!slot.can_split()) { if (slot.task->n_tokens() > n_ubatch) { - send_error(slot, "input is too large to process. increase the physical batch size", ERROR_TYPE_SERVER); + send_error(slot, + string_format( + "input (%d tokens) is too large to process. increase the physical batch " + "size (current batch size: %d)", + slot.task->n_tokens(), n_ubatch), + ERROR_TYPE_SERVER); slot.release(); continue; } if (slot.task->n_tokens() > slot.n_ctx) { - send_error(slot, "input is larger than the max context size. skipping", ERROR_TYPE_EXCEED_CONTEXT_SIZE); + send_error( + slot, + string_format( + "input (%d tokens) is larger than the max context size (%d tokens). skipping", + slot.task->n_tokens(), slot.n_ctx), + ERROR_TYPE_EXCEED_CONTEXT_SIZE); slot.release(); continue; } } else { if (slot.task->n_tokens() >= slot.n_ctx) { - send_error(slot, "the request exceeds the available context size, try increasing it", ERROR_TYPE_EXCEED_CONTEXT_SIZE); + send_error(slot, + string_format("request (%d tokens) exceeds the available context size (%d " + "tokens), try increasing it", + slot.task->n_tokens(), slot.n_ctx), + ERROR_TYPE_EXCEED_CONTEXT_SIZE); slot.release(); continue; } diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index c1f86e549..db7ab667f 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -82,154 +82,30 @@ static std::filesystem::path get_server_exec_path() { #endif } -struct local_model { - std::string name; - std::string path; - std::string path_mmproj; -}; - -static std::vector list_local_models(const std::string & dir) { - if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) { - throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", dir.c_str())); - } - - std::vector models; - auto scan_subdir = [&models](const std::string & subdir_path, const std::string & name) { - auto files = fs_list(subdir_path, false); - common_file_info model_file; - common_file_info first_shard_file; - common_file_info mmproj_file; - for (const auto & file : files) { - if (string_ends_with(file.name, ".gguf")) { - if (file.name.find("mmproj") != std::string::npos) { - mmproj_file = file; - } else if (file.name.find("-00001-of-") != std::string::npos) { - first_shard_file = file; - } else { - model_file = file; - } - } - } - // single file model - local_model model{ - /* name */ name, - /* path */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path, - /* path_mmproj */ mmproj_file.path // can be empty - }; - if (!model.path.empty()) { - models.push_back(model); - } - }; - - auto files = fs_list(dir, true); - for (const auto & file : files) { - if (file.is_dir) { - scan_subdir(file.path, file.name); - } else if (string_ends_with(file.name, ".gguf")) { - // single file model - std::string name = file.name; - string_replace_all(name, ".gguf", ""); - local_model model{ - /* name */ name, - /* path */ file.path, - /* path_mmproj */ "" - }; - models.push_back(model); - } - } - return models; -} - -// -// server_presets -// - - -server_presets::server_presets(int argc, char ** argv, common_params & base_params, const std::string & presets_path) - : ctx_params(common_params_parser_init(base_params, LLAMA_EXAMPLE_SERVER)) { - if (!presets_path.empty()) { - presets = common_presets_load(presets_path, ctx_params); - SRV_INF("Loaded %zu presets from %s\n", presets.size(), presets_path.c_str()); - } - - // populate reserved args (will be appended by the router) - for (auto & opt : ctx_params.options) { - if (opt.env == nullptr) { - continue; - } - std::string env = opt.env; - if (env == "LLAMA_ARG_PORT" || - env == "LLAMA_ARG_HOST" || - env == "LLAMA_ARG_ALIAS" || - env == "LLAMA_ARG_API_KEY" || - env == "LLAMA_ARG_MODELS_DIR" || - env == "LLAMA_ARG_MODELS_MAX" || - env == "LLAMA_ARG_MODELS_PRESET" || - env == "LLAMA_ARG_MODEL" || - env == "LLAMA_ARG_MMPROJ" || - env == "LLAMA_ARG_HF_REPO" || - env == "LLAMA_ARG_NO_MODELS_AUTOLOAD" || - env == "LLAMA_ARG_SSL_KEY_FILE" || - env == "LLAMA_ARG_SSL_CERT_FILE") { - control_args[env] = opt; - } - } - - // read base args from router's argv - common_params_to_map(argc, argv, LLAMA_EXAMPLE_SERVER, base_args); - - // remove any router-controlled args from base_args - for (const auto & cargs : control_args) { - auto it = base_args.find(cargs.second); - if (it != base_args.end()) { - base_args.erase(it); - } +static void unset_reserved_args(common_preset & preset, bool unset_model_args) { + preset.unset_option("LLAMA_ARG_SSL_KEY_FILE"); + preset.unset_option("LLAMA_ARG_SSL_CERT_FILE"); + preset.unset_option("LLAMA_API_KEY"); + preset.unset_option("LLAMA_ARG_MODELS_DIR"); + preset.unset_option("LLAMA_ARG_MODELS_MAX"); + preset.unset_option("LLAMA_ARG_MODELS_PRESET"); + preset.unset_option("LLAMA_ARG_MODELS_AUTOLOAD"); + if (unset_model_args) { + preset.unset_option("LLAMA_ARG_MODEL"); + preset.unset_option("LLAMA_ARG_MMPROJ"); + preset.unset_option("LLAMA_ARG_HF_REPO"); } } -common_preset server_presets::get_preset(const std::string & name) { - auto it = presets.find(name); - if (it != presets.end()) { - return it->second; - } - return common_preset(); -} - -void server_presets::render_args(server_model_meta & meta) { - common_preset preset = meta.preset; // copy - // merging 3 kinds of args: - // 1. model-specific args (from preset) - // force removing control args if any - for (auto & cargs : control_args) { - if (preset.options.find(cargs.second) != preset.options.end()) { - SRV_WRN("Preset '%s' contains reserved arg '%s', removing it\n", preset.name.c_str(), cargs.second.args[0]); - preset.options.erase(cargs.second); - } - } - // 2. base args (from router) - // inherit from base args - for (const auto & [arg, value] : base_args) { - preset.options[arg] = value; - } - // 3. control args (from router) - // set control values - preset.options[control_args["LLAMA_ARG_HOST"]] = CHILD_ADDR; - preset.options[control_args["LLAMA_ARG_PORT"]] = std::to_string(meta.port); - preset.options[control_args["LLAMA_ARG_ALIAS"]] = meta.name; - if (meta.in_cache) { - preset.options[control_args["LLAMA_ARG_HF_REPO"]] = meta.name; - } else { - preset.options[control_args["LLAMA_ARG_MODEL"]] = meta.path; - if (!meta.path_mmproj.empty()) { - preset.options[control_args["LLAMA_ARG_MMPROJ"]] = meta.path_mmproj; - } - } - // disable SSL for child processes (HTTPS already handled by router) - preset.options[control_args["LLAMA_ARG_SSL_KEY_FILE"]] = ""; - preset.options[control_args["LLAMA_ARG_SSL_CERT_FILE"]] = ""; - meta.args = preset.to_args(); - // add back the binary path at the front - meta.args.insert(meta.args.begin(), get_server_exec_path().string()); +void server_model_meta::update_args(common_preset_context & ctx_preset, std::string bin_path) { + // update params + unset_reserved_args(preset, false); + preset.set_option(ctx_preset, "LLAMA_ARG_HOST", CHILD_ADDR); + preset.set_option(ctx_preset, "LLAMA_ARG_PORT", std::to_string(port)); + preset.set_option(ctx_preset, "LLAMA_ARG_ALIAS", name); + // TODO: maybe validate preset before rendering ? + // render args + args = preset.to_args(bin_path); } // @@ -240,20 +116,22 @@ server_models::server_models( const common_params & params, int argc, char ** argv, - char ** envp) : base_params(params), presets(argc, argv, base_params, params.models_preset) { - for (int i = 0; i < argc; i++) { - base_args.push_back(std::string(argv[i])); - } + char ** envp) + : ctx_preset(LLAMA_EXAMPLE_SERVER), + base_params(params), + base_preset(ctx_preset.load_from_args(argc, argv)) { for (char ** env = envp; *env != nullptr; env++) { base_env.push_back(std::string(*env)); } - GGML_ASSERT(!base_args.empty()); + // clean up base preset + unset_reserved_args(base_preset, true); // set binary path try { - base_args[0] = get_server_exec_path().string(); + bin_path = get_server_exec_path().string(); } catch (const std::exception & e) { + bin_path = argv[0]; LOG_WRN("failed to get server executable path: %s\n", e.what()); - LOG_WRN("using original argv[0] as fallback: %s\n", base_args[0].c_str()); + LOG_WRN("using original argv[0] as fallback: %s\n", argv[0]); } load_models(); } @@ -262,7 +140,7 @@ void server_models::add_model(server_model_meta && meta) { if (mapping.find(meta.name) != mapping.end()) { throw std::runtime_error(string_format("model '%s' appears multiple times", meta.name.c_str())); } - presets.render_args(meta); // populate meta.args + meta.update_args(ctx_preset, bin_path); // render args std::string name = meta.name; mapping[name] = instance_t{ /* subproc */ std::make_shared(), @@ -271,86 +149,62 @@ void server_models::add_model(server_model_meta && meta) { }; } -static std::vector list_custom_path_models(server_presets & presets) { - // detect any custom-path models in presets - std::vector custom_models; - for (auto & [model_name, preset] : presets.presets) { - local_model model; - model.name = model_name; - std::vector to_erase; - for (auto & [arg, value] : preset.options) { - std::string env(arg.env ? arg.env : ""); - if (env == "LLAMA_ARG_MODEL") { - model.path = value; - to_erase.push_back(arg); - } - if (env == "LLAMA_ARG_MMPROJ") { - model.path_mmproj = value; - to_erase.push_back(arg); - } - } - for (auto & arg : to_erase) { - preset.options.erase(arg); - } - if (!model.name.empty() && !model.path.empty()) { - custom_models.push_back(model); - } - } - return custom_models; -} - // TODO: allow refreshing cached model list void server_models::load_models() { // loading models from 3 sources: // 1. cached models - auto cached_models = common_list_cached_models(); - for (const auto & model : cached_models) { - server_model_meta meta{ - /* preset */ presets.get_preset(model.to_string()), - /* name */ model.to_string(), - /* path */ model.manifest_path, - /* path_mmproj */ "", // auto-detected when loading - /* in_cache */ true, - /* port */ 0, - /* status */ SERVER_MODEL_STATUS_UNLOADED, - /* last_used */ 0, - /* args */ std::vector(), - /* exit_code */ 0 - }; - add_model(std::move(meta)); - } - // 2. local models specificed via --models-dir + common_presets cached_models = ctx_preset.load_from_cache(); + SRV_INF("Loaded %zu cached model presets\n", cached_models.size()); + // 2. local models from --models-dir + common_presets local_models; if (!base_params.models_dir.empty()) { - auto local_models = list_local_models(base_params.models_dir); - for (const auto & model : local_models) { - if (mapping.find(model.name) != mapping.end()) { - // already exists in cached models, skip - continue; - } - server_model_meta meta{ - /* preset */ presets.get_preset(model.name), - /* name */ model.name, - /* path */ model.path, - /* path_mmproj */ model.path_mmproj, - /* in_cache */ false, - /* port */ 0, - /* status */ SERVER_MODEL_STATUS_UNLOADED, - /* last_used */ 0, - /* args */ std::vector(), - /* exit_code */ 0 - }; - add_model(std::move(meta)); + local_models = ctx_preset.load_from_models_dir(base_params.models_dir); + SRV_INF("Loaded %zu local model presets from %s\n", local_models.size(), base_params.models_dir.c_str()); + } + // 3. custom-path models from presets + common_preset global = {}; + common_presets custom_presets = {}; + if (!base_params.models_preset.empty()) { + custom_presets = ctx_preset.load_from_ini(base_params.models_preset, global); + SRV_INF("Loaded %zu custom model presets from %s\n", custom_presets.size(), base_params.models_preset.c_str()); + } + + // cascade, apply global preset first + cached_models = ctx_preset.cascade(global, cached_models); + local_models = ctx_preset.cascade(global, local_models); + custom_presets = ctx_preset.cascade(global, custom_presets); + + // note: if a model exists in both cached and local, local takes precedence + common_presets final_presets; + for (const auto & [name, preset] : cached_models) { + final_presets[name] = preset; + } + for (const auto & [name, preset] : local_models) { + final_presets[name] = preset; + } + + // process custom presets from INI + for (const auto & [name, custom] : custom_presets) { + if (final_presets.find(name) != final_presets.end()) { + // apply custom config if exists + common_preset & target = final_presets[name]; + target.merge(custom); + } else { + // otherwise add directly + final_presets[name] = custom; } } - // 3. custom-path models specified in presets - auto custom_models = list_custom_path_models(presets); - for (const auto & model : custom_models) { + + // server base preset from CLI args take highest precedence + for (auto & [name, preset] : final_presets) { + preset.merge(base_preset); + } + + // convert presets to server_model_meta and add to mapping + for (const auto & preset : final_presets) { server_model_meta meta{ - /* preset */ presets.get_preset(model.name), - /* name */ model.name, - /* path */ model.path, - /* path_mmproj */ model.path_mmproj, - /* in_cache */ false, + /* preset */ preset.second, + /* name */ preset.first, /* port */ 0, /* status */ SERVER_MODEL_STATUS_UNLOADED, /* last_used */ 0, @@ -359,10 +213,18 @@ void server_models::load_models() { }; add_model(std::move(meta)); } + // log available models - SRV_INF("Available models (%zu) (*: custom preset)\n", mapping.size()); - for (const auto & [name, inst] : mapping) { - SRV_INF(" %c %s\n", inst.meta.preset.name.empty() ? ' ' : '*', name.c_str()); + { + std::unordered_set custom_names; + for (const auto & [name, preset] : custom_presets) { + custom_names.insert(name); + } + SRV_INF("Available models (%zu) (*: custom preset)\n", mapping.size()); + for (const auto & [name, inst] : mapping) { + bool has_custom = custom_names.find(name) != custom_names.end(); + SRV_INF(" %c %s\n", has_custom ? '*' : ' ', name.c_str()); + } } } @@ -526,7 +388,7 @@ void server_models::load(const std::string & name) { { SRV_INF("spawning server instance with name=%s on port %d\n", inst.meta.name.c_str(), inst.meta.port); - presets.render_args(inst.meta); // update meta.args + inst.meta.update_args(ctx_preset, bin_path); // render args std::vector child_args = inst.meta.args; // copy std::vector child_env = base_env; // copy @@ -877,7 +739,12 @@ void server_models_routes::init_routes() { {"args", meta.args}, }; if (!meta.preset.name.empty()) { - status["preset"] = meta.preset.to_ini(); + common_preset preset_copy = meta.preset; + unset_reserved_args(preset_copy, false); + preset_copy.unset_option("LLAMA_ARG_HOST"); + preset_copy.unset_option("LLAMA_ARG_PORT"); + preset_copy.unset_option("LLAMA_ARG_ALIAS"); + status["preset"] = preset_copy.to_ini(); } if (meta.is_failed()) { status["exit_code"] = meta.exit_code; @@ -888,8 +755,6 @@ void server_models_routes::init_routes() { {"object", "model"}, // for OAI-compat {"owned_by", "llamacpp"}, // for OAI-compat {"created", t}, // for OAI-compat - {"in_cache", meta.in_cache}, - {"path", meta.path}, {"status", status}, // TODO: add other fields, may require reading GGUF metadata }); diff --git a/tools/server/server-models.h b/tools/server/server-models.h index cbc4c4324..56fb398e3 100644 --- a/tools/server/server-models.h +++ b/tools/server/server-models.h @@ -51,9 +51,6 @@ static std::string server_model_status_to_string(server_model_status status) { struct server_model_meta { common_preset preset; std::string name; - std::string path; - std::string path_mmproj; // only available if in_cache=false - bool in_cache = false; // if true, use -hf; use -m otherwise int port = 0; server_model_status status = SERVER_MODEL_STATUS_UNLOADED; int64_t last_used = 0; // for LRU unloading @@ -67,19 +64,8 @@ struct server_model_meta { bool is_failed() const { return status == SERVER_MODEL_STATUS_UNLOADED && exit_code != 0; } -}; -// the server_presets struct holds the presets read from presets.ini -// as well as base args from the router server -struct server_presets { - common_presets presets; - common_params_context ctx_params; - std::map base_args; - std::map control_args; // args reserved for server control - - server_presets(int argc, char ** argv, common_params & base_params, const std::string & models_dir); - common_preset get_preset(const std::string & name); - void render_args(server_model_meta & meta); + void update_args(common_preset_context & ctx_presets, std::string bin_path); }; struct subprocess_s; @@ -97,11 +83,12 @@ private: std::condition_variable cv; std::map mapping; - common_params base_params; - std::vector base_args; - std::vector base_env; + common_preset_context ctx_preset; - server_presets presets; + common_params base_params; + std::string bin_path; + std::vector base_env; + common_preset base_preset; // base preset from llama-server CLI args void update_meta(const std::string & name, const server_model_meta & meta); diff --git a/tools/server/webui/docs/architecture/high-level-architecture-simplified.md b/tools/server/webui/docs/architecture/high-level-architecture-simplified.md index 50f2e1df0..a6cb1e9c3 100644 --- a/tools/server/webui/docs/architecture/high-level-architecture-simplified.md +++ b/tools/server/webui/docs/architecture/high-level-architecture-simplified.md @@ -11,6 +11,8 @@ flowchart TB C_Screen["ChatScreen"] C_Form["ChatForm"] C_Messages["ChatMessages"] + C_Message["ChatMessage"] + C_MessageEditForm["ChatMessageEditForm"] C_ModelsSelector["ModelsSelector"] C_Settings["ChatSettings"] end @@ -54,7 +56,9 @@ flowchart TB %% Component hierarchy C_Screen --> C_Form & C_Messages & C_Settings - C_Form & C_Messages --> C_ModelsSelector + C_Messages --> C_Message + C_Message --> C_MessageEditForm + C_Form & C_MessageEditForm --> C_ModelsSelector %% Components → Hooks → Stores C_Form & C_Messages --> H1 & H2 @@ -93,7 +97,7 @@ flowchart TB classDef apiStyle fill:#e3f2fd,stroke:#1565c0,stroke-width:2px class R1,R2,RL routeStyle - class C_Sidebar,C_Screen,C_Form,C_Messages,C_ModelsSelector,C_Settings componentStyle + class C_Sidebar,C_Screen,C_Form,C_Messages,C_Message,C_MessageEditForm,C_ModelsSelector,C_Settings componentStyle class H1,H2 hookStyle class S1,S2,S3,S4,S5 storeStyle class SV1,SV2,SV3,SV4,SV5 serviceStyle diff --git a/tools/server/webui/docs/architecture/high-level-architecture.md b/tools/server/webui/docs/architecture/high-level-architecture.md index 730da10a5..c5ec4d690 100644 --- a/tools/server/webui/docs/architecture/high-level-architecture.md +++ b/tools/server/webui/docs/architecture/high-level-architecture.md @@ -16,6 +16,8 @@ end C_Form["ChatForm"] C_Messages["ChatMessages"] C_Message["ChatMessage"] + C_MessageUser["ChatMessageUser"] + C_MessageEditForm["ChatMessageEditForm"] C_Attach["ChatAttachments"] C_ModelsSelector["ModelsSelector"] C_Settings["ChatSettings"] @@ -38,7 +40,7 @@ end S1Error["Error Handling:
showErrorDialog()
dismissErrorDialog()
isAbortError()"] S1Msg["Message Operations:
addMessage()
sendMessage()
updateMessage()
deleteMessage()
getDeletionInfo()"] S1Regen["Regeneration:
regenerateMessage()
regenerateMessageWithBranching()
continueAssistantMessage()"] - S1Edit["Editing:
editAssistantMessage()
editUserMessagePreserveResponses()
editMessageWithBranching()"] + S1Edit["Editing:
editAssistantMessage()
editUserMessagePreserveResponses()
editMessageWithBranching()
clearEditMode()
isEditModeActive()
getAddFilesHandler()
setEditModeActive()"] S1Utils["Utilities:
getApiOptions()
parseTimingData()
getOrCreateAbortController()
getConversationModel()"] end subgraph S2["conversationsStore"] @@ -88,6 +90,10 @@ end RE7["getChatStreaming()"] RE8["getAllLoadingChats()"] RE9["getAllStreamingChats()"] + RE9a["isEditModeActive()"] + RE9b["getAddFilesHandler()"] + RE9c["setEditModeActive()"] + RE9d["clearEditMode()"] end subgraph ConvExports["conversationsStore"] RE10["conversations()"] @@ -182,7 +188,10 @@ end %% Component hierarchy C_Screen --> C_Form & C_Messages & C_Settings C_Messages --> C_Message - C_Message --> C_ModelsSelector + C_Message --> C_MessageUser + C_MessageUser --> C_MessageEditForm + C_MessageEditForm --> C_ModelsSelector + C_MessageEditForm --> C_Attach C_Form --> C_ModelsSelector C_Form --> C_Attach C_Message --> C_Attach @@ -190,6 +199,7 @@ end %% Components use Hooks C_Form --> H1 C_Message --> H1 & H2 + C_MessageEditForm --> H1 C_Screen --> H2 %% Hooks use Stores @@ -244,7 +254,7 @@ end classDef apiStyle fill:#e3f2fd,stroke:#1565c0,stroke-width:2px class R1,R2,RL routeStyle - class C_Sidebar,C_Screen,C_Form,C_Messages,C_Message componentStyle + class C_Sidebar,C_Screen,C_Form,C_Messages,C_Message,C_MessageUser,C_MessageEditForm componentStyle class C_ModelsSelector,C_Settings componentStyle class C_Attach componentStyle class H1,H2,H3 methodStyle diff --git a/tools/server/webui/package-lock.json b/tools/server/webui/package-lock.json index 0d1a03aca..6fa9d39c7 100644 --- a/tools/server/webui/package-lock.json +++ b/tools/server/webui/package-lock.json @@ -25,7 +25,7 @@ "@chromatic-com/storybook": "^4.1.2", "@eslint/compat": "^1.2.5", "@eslint/js": "^9.18.0", - "@internationalized/date": "^3.8.2", + "@internationalized/date": "^3.10.1", "@lucide/svelte": "^0.515.0", "@playwright/test": "^1.49.1", "@storybook/addon-a11y": "^10.0.7", @@ -862,9 +862,9 @@ } }, "node_modules/@internationalized/date": { - "version": "3.8.2", - "resolved": "https://registry.npmjs.org/@internationalized/date/-/date-3.8.2.tgz", - "integrity": "sha512-/wENk7CbvLbkUvX1tu0mwq49CVkkWpkXubGel6birjRPyo6uQ4nQpnq5xZu823zRCwwn82zgHrvgF1vZyvmVgA==", + "version": "3.10.1", + "resolved": "https://registry.npmjs.org/@internationalized/date/-/date-3.10.1.tgz", + "integrity": "sha512-oJrXtQiAXLvT9clCf1K4kxp3eKsQhIaZqxEyowkBcsvZDdZkbWrVmnGknxs5flTD0VGsxrxKgBCZty1EzoiMzA==", "dev": true, "license": "Apache-2.0", "dependencies": { diff --git a/tools/server/webui/package.json b/tools/server/webui/package.json index 1c970ae7a..1a8c27374 100644 --- a/tools/server/webui/package.json +++ b/tools/server/webui/package.json @@ -26,7 +26,7 @@ "@chromatic-com/storybook": "^4.1.2", "@eslint/compat": "^1.2.5", "@eslint/js": "^9.18.0", - "@internationalized/date": "^3.8.2", + "@internationalized/date": "^3.10.1", "@lucide/svelte": "^0.515.0", "@playwright/test": "^1.49.1", "@storybook/addon-a11y": "^10.0.7", diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte index 3ad14ed3a..fd2f7f60e 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte @@ -8,6 +8,7 @@ ChatFormTextarea } from '$lib/components/app'; import { INPUT_CLASSES } from '$lib/constants/input-classes'; + import { SETTING_CONFIG_DEFAULT } from '$lib/constants/settings-config'; import { config } from '$lib/stores/settings.svelte'; import { modelsStore, modelOptions, selectedModelId } from '$lib/stores/models.svelte'; import { isRouterMode } from '$lib/stores/server.svelte'; @@ -66,7 +67,7 @@ let message = $state(''); let pasteLongTextToFileLength = $derived.by(() => { const n = Number(currentConfig.pasteLongTextToFileLen); - return Number.isNaN(n) ? 2500 : n; + return Number.isNaN(n) ? Number(SETTING_CONFIG_DEFAULT.pasteLongTextToFileLen) : n; }); let previousIsLoading = $state(isLoading); let recordingSupported = $state(false); diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte index 0969a937e..220276fc9 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte @@ -12,13 +12,21 @@ onCopy?: (message: DatabaseMessage) => void; onContinueAssistantMessage?: (message: DatabaseMessage) => void; onDelete?: (message: DatabaseMessage) => void; - onEditWithBranching?: (message: DatabaseMessage, newContent: string) => void; + onEditWithBranching?: ( + message: DatabaseMessage, + newContent: string, + newExtras?: DatabaseMessageExtra[] + ) => void; onEditWithReplacement?: ( message: DatabaseMessage, newContent: string, shouldBranch: boolean ) => void; - onEditUserMessagePreserveResponses?: (message: DatabaseMessage, newContent: string) => void; + onEditUserMessagePreserveResponses?: ( + message: DatabaseMessage, + newContent: string, + newExtras?: DatabaseMessageExtra[] + ) => void; onNavigateToSibling?: (siblingId: string) => void; onRegenerateWithBranching?: (message: DatabaseMessage, modelOverride?: string) => void; siblingInfo?: ChatMessageSiblingInfo | null; @@ -45,6 +53,8 @@ messageTypes: string[]; } | null>(null); let editedContent = $state(message.content); + let editedExtras = $state(message.extra ? [...message.extra] : []); + let editedUploadedFiles = $state([]); let isEditing = $state(false); let showDeleteDialog = $state(false); let shouldBranchAfterEdit = $state(false); @@ -85,6 +95,16 @@ function handleCancelEdit() { isEditing = false; editedContent = message.content; + editedExtras = message.extra ? [...message.extra] : []; + editedUploadedFiles = []; + } + + function handleEditedExtrasChange(extras: DatabaseMessageExtra[]) { + editedExtras = extras; + } + + function handleEditedUploadedFilesChange(files: ChatUploadedFile[]) { + editedUploadedFiles = files; } async function handleCopy() { @@ -107,6 +127,8 @@ function handleEdit() { isEditing = true; editedContent = message.content; + editedExtras = message.extra ? [...message.extra] : []; + editedUploadedFiles = []; setTimeout(() => { if (textareaElement) { @@ -143,9 +165,10 @@ onContinueAssistantMessage?.(message); } - function handleSaveEdit() { + async function handleSaveEdit() { if (message.role === 'user' || message.role === 'system') { - onEditWithBranching?.(message, editedContent.trim()); + const finalExtras = await getMergedExtras(); + onEditWithBranching?.(message, editedContent.trim(), finalExtras); } else { // For assistant messages, preserve exact content including trailing whitespace // This is important for the Continue feature to work properly @@ -154,15 +177,30 @@ isEditing = false; shouldBranchAfterEdit = false; + editedUploadedFiles = []; } - function handleSaveEditOnly() { + async function handleSaveEditOnly() { if (message.role === 'user') { // For user messages, trim to avoid accidental whitespace - onEditUserMessagePreserveResponses?.(message, editedContent.trim()); + const finalExtras = await getMergedExtras(); + onEditUserMessagePreserveResponses?.(message, editedContent.trim(), finalExtras); } isEditing = false; + editedUploadedFiles = []; + } + + async function getMergedExtras(): Promise { + if (editedUploadedFiles.length === 0) { + return editedExtras; + } + + const { parseFilesToMessageExtras } = await import('$lib/utils/browser-only'); + const result = await parseFilesToMessageExtras(editedUploadedFiles); + const newExtras = result?.extras || []; + + return [...editedExtras, ...newExtras]; } function handleShowDeleteDialogChange(show: boolean) { @@ -197,6 +235,8 @@ class={className} {deletionInfo} {editedContent} + {editedExtras} + {editedUploadedFiles} {isEditing} {message} onCancelEdit={handleCancelEdit} @@ -206,6 +246,8 @@ onEdit={handleEdit} onEditKeydown={handleEditKeydown} onEditedContentChange={handleEditedContentChange} + onEditedExtrasChange={handleEditedExtrasChange} + onEditedUploadedFilesChange={handleEditedUploadedFilesChange} {onNavigateToSibling} onSaveEdit={handleSaveEdit} onSaveEditOnly={handleSaveEditOnly} diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageEditForm.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageEditForm.svelte new file mode 100644 index 000000000..f812ea2fd --- /dev/null +++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageEditForm.svelte @@ -0,0 +1,391 @@ + + + + + + +
+ { + if (fileId.startsWith('attachment-')) { + const index = parseInt(fileId.replace('attachment-', ''), 10); + if (!isNaN(index) && index >= 0 && index < editedExtras.length) { + handleRemoveExistingAttachment(index); + } + } else { + handleRemoveUploadedFile(fileId); + } + }} + limitToSingleRow + class="py-5" + style="scroll-padding: 1rem;" + /> + +
+ + +
+ + +
+ + {#if isRouter} + + {/if} + + +
+
+
+ +
+ {#if showSaveOnlyOption && onSaveEditOnly} +
+ + + +
+ {:else} +
+ {/if} + + +
+ + (showDiscardDialog = false)} +/> diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageUser.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageUser.svelte index 3d2b8dd35..041c6bd25 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageUser.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageUser.svelte @@ -1,18 +1,17 @@ + + + + diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts index e0431ee64..010889452 100644 --- a/tools/server/webui/src/lib/stores/chat.svelte.ts +++ b/tools/server/webui/src/lib/stores/chat.svelte.ts @@ -74,6 +74,8 @@ class ChatStore { private processingStates = new SvelteMap(); private activeConversationId = $state(null); private isStreamingActive = $state(false); + private isEditModeActive = $state(false); + private addFilesHandler: ((files: File[]) => void) | null = $state(null); // ───────────────────────────────────────────────────────────────────────────── // Loading State @@ -965,230 +967,9 @@ class ChatStore { // Editing // ───────────────────────────────────────────────────────────────────────────── - async editAssistantMessage( - messageId: string, - newContent: string, - shouldBranch: boolean - ): Promise { - const activeConv = conversationsStore.activeConversation; - if (!activeConv || this.isLoading) return; - - const result = this.getMessageByIdWithRole(messageId, 'assistant'); - if (!result) return; - const { message: msg, index: idx } = result; - - try { - if (shouldBranch) { - const newMessage = await DatabaseService.createMessageBranch( - { - convId: msg.convId, - type: msg.type, - timestamp: Date.now(), - role: msg.role, - content: newContent, - thinking: msg.thinking || '', - toolCalls: msg.toolCalls || '', - children: [], - model: msg.model - }, - msg.parent! - ); - await conversationsStore.updateCurrentNode(newMessage.id); - } else { - await DatabaseService.updateMessage(msg.id, { content: newContent, timestamp: Date.now() }); - await conversationsStore.updateCurrentNode(msg.id); - conversationsStore.updateMessageAtIndex(idx, { - content: newContent, - timestamp: Date.now() - }); - } - conversationsStore.updateConversationTimestamp(); - await conversationsStore.refreshActiveMessages(); - } catch (error) { - console.error('Failed to edit assistant message:', error); - } - } - - async editUserMessagePreserveResponses(messageId: string, newContent: string): Promise { - const activeConv = conversationsStore.activeConversation; - if (!activeConv) return; - - const result = this.getMessageByIdWithRole(messageId, 'user'); - if (!result) return; - const { message: msg, index: idx } = result; - - try { - await DatabaseService.updateMessage(messageId, { - content: newContent, - timestamp: Date.now() - }); - conversationsStore.updateMessageAtIndex(idx, { content: newContent, timestamp: Date.now() }); - - const allMessages = await conversationsStore.getConversationMessages(activeConv.id); - const rootMessage = allMessages.find((m) => m.type === 'root' && m.parent === null); - - if (rootMessage && msg.parent === rootMessage.id && newContent.trim()) { - await conversationsStore.updateConversationTitleWithConfirmation( - activeConv.id, - newContent.trim(), - conversationsStore.titleUpdateConfirmationCallback - ); - } - conversationsStore.updateConversationTimestamp(); - } catch (error) { - console.error('Failed to edit user message:', error); - } - } - - async editMessageWithBranching(messageId: string, newContent: string): Promise { - const activeConv = conversationsStore.activeConversation; - if (!activeConv || this.isLoading) return; - - let result = this.getMessageByIdWithRole(messageId, 'user'); - - if (!result) { - result = this.getMessageByIdWithRole(messageId, 'system'); - } - - if (!result) return; - const { message: msg } = result; - - try { - const allMessages = await conversationsStore.getConversationMessages(activeConv.id); - const rootMessage = allMessages.find((m) => m.type === 'root' && m.parent === null); - const isFirstUserMessage = - msg.role === 'user' && rootMessage && msg.parent === rootMessage.id; - - const parentId = msg.parent || rootMessage?.id; - if (!parentId) return; - - const newMessage = await DatabaseService.createMessageBranch( - { - convId: msg.convId, - type: msg.type, - timestamp: Date.now(), - role: msg.role, - content: newContent, - thinking: msg.thinking || '', - toolCalls: msg.toolCalls || '', - children: [], - extra: msg.extra ? JSON.parse(JSON.stringify(msg.extra)) : undefined, - model: msg.model - }, - parentId - ); - await conversationsStore.updateCurrentNode(newMessage.id); - conversationsStore.updateConversationTimestamp(); - - if (isFirstUserMessage && newContent.trim()) { - await conversationsStore.updateConversationTitleWithConfirmation( - activeConv.id, - newContent.trim(), - conversationsStore.titleUpdateConfirmationCallback - ); - } - await conversationsStore.refreshActiveMessages(); - - if (msg.role === 'user') { - await this.generateResponseForMessage(newMessage.id); - } - } catch (error) { - console.error('Failed to edit message with branching:', error); - } - } - - async regenerateMessageWithBranching(messageId: string, modelOverride?: string): Promise { - const activeConv = conversationsStore.activeConversation; - if (!activeConv || this.isLoading) return; - try { - const idx = conversationsStore.findMessageIndex(messageId); - if (idx === -1) return; - const msg = conversationsStore.activeMessages[idx]; - if (msg.role !== 'assistant') return; - - const allMessages = await conversationsStore.getConversationMessages(activeConv.id); - const parentMessage = allMessages.find((m) => m.id === msg.parent); - if (!parentMessage) return; - - this.setChatLoading(activeConv.id, true); - this.clearChatStreaming(activeConv.id); - - const newAssistantMessage = await DatabaseService.createMessageBranch( - { - convId: activeConv.id, - type: 'text', - timestamp: Date.now(), - role: 'assistant', - content: '', - thinking: '', - toolCalls: '', - children: [], - model: null - }, - parentMessage.id - ); - await conversationsStore.updateCurrentNode(newAssistantMessage.id); - conversationsStore.updateConversationTimestamp(); - await conversationsStore.refreshActiveMessages(); - - const conversationPath = filterByLeafNodeId( - allMessages, - parentMessage.id, - false - ) as DatabaseMessage[]; - // Use modelOverride if provided, otherwise use the original message's model - // If neither is available, don't pass model (will use global selection) - const modelToUse = modelOverride || msg.model || undefined; - await this.streamChatCompletion( - conversationPath, - newAssistantMessage, - undefined, - undefined, - modelToUse - ); - } catch (error) { - if (!this.isAbortError(error)) - console.error('Failed to regenerate message with branching:', error); - this.setChatLoading(activeConv?.id || '', false); - } - } - - private async generateResponseForMessage(userMessageId: string): Promise { - const activeConv = conversationsStore.activeConversation; - - if (!activeConv) return; - - this.errorDialogState = null; - this.setChatLoading(activeConv.id, true); - this.clearChatStreaming(activeConv.id); - - try { - const allMessages = await conversationsStore.getConversationMessages(activeConv.id); - const conversationPath = filterByLeafNodeId( - allMessages, - userMessageId, - false - ) as DatabaseMessage[]; - const assistantMessage = await DatabaseService.createMessageBranch( - { - convId: activeConv.id, - type: 'text', - timestamp: Date.now(), - role: 'assistant', - content: '', - thinking: '', - toolCalls: '', - children: [], - model: null - }, - userMessageId - ); - conversationsStore.addMessageToActive(assistantMessage); - await this.streamChatCompletion(conversationPath, assistantMessage); - } catch (error) { - console.error('Failed to generate response:', error); - this.setChatLoading(activeConv.id, false); - } + clearEditMode(): void { + this.isEditModeActive = false; + this.addFilesHandler = null; } async continueAssistantMessage(messageId: string): Promise { @@ -1340,19 +1121,284 @@ class ChatStore { } } - public isChatLoadingPublic(convId: string): boolean { - return this.isChatLoading(convId); + async editAssistantMessage( + messageId: string, + newContent: string, + shouldBranch: boolean + ): Promise { + const activeConv = conversationsStore.activeConversation; + if (!activeConv || this.isLoading) return; + + const result = this.getMessageByIdWithRole(messageId, 'assistant'); + if (!result) return; + const { message: msg, index: idx } = result; + + try { + if (shouldBranch) { + const newMessage = await DatabaseService.createMessageBranch( + { + convId: msg.convId, + type: msg.type, + timestamp: Date.now(), + role: msg.role, + content: newContent, + thinking: msg.thinking || '', + toolCalls: msg.toolCalls || '', + children: [], + model: msg.model + }, + msg.parent! + ); + await conversationsStore.updateCurrentNode(newMessage.id); + } else { + await DatabaseService.updateMessage(msg.id, { content: newContent }); + await conversationsStore.updateCurrentNode(msg.id); + conversationsStore.updateMessageAtIndex(idx, { + content: newContent + }); + } + conversationsStore.updateConversationTimestamp(); + await conversationsStore.refreshActiveMessages(); + } catch (error) { + console.error('Failed to edit assistant message:', error); + } } + + async editUserMessagePreserveResponses( + messageId: string, + newContent: string, + newExtras?: DatabaseMessageExtra[] + ): Promise { + const activeConv = conversationsStore.activeConversation; + if (!activeConv) return; + + const result = this.getMessageByIdWithRole(messageId, 'user'); + if (!result) return; + const { message: msg, index: idx } = result; + + try { + const updateData: Partial = { + content: newContent + }; + + // Update extras if provided (including empty array to clear attachments) + // Deep clone to avoid Proxy objects from Svelte reactivity + if (newExtras !== undefined) { + updateData.extra = JSON.parse(JSON.stringify(newExtras)); + } + + await DatabaseService.updateMessage(messageId, updateData); + conversationsStore.updateMessageAtIndex(idx, updateData); + + const allMessages = await conversationsStore.getConversationMessages(activeConv.id); + const rootMessage = allMessages.find((m) => m.type === 'root' && m.parent === null); + + if (rootMessage && msg.parent === rootMessage.id && newContent.trim()) { + await conversationsStore.updateConversationTitleWithConfirmation( + activeConv.id, + newContent.trim(), + conversationsStore.titleUpdateConfirmationCallback + ); + } + conversationsStore.updateConversationTimestamp(); + } catch (error) { + console.error('Failed to edit user message:', error); + } + } + + async editMessageWithBranching( + messageId: string, + newContent: string, + newExtras?: DatabaseMessageExtra[] + ): Promise { + const activeConv = conversationsStore.activeConversation; + if (!activeConv || this.isLoading) return; + + let result = this.getMessageByIdWithRole(messageId, 'user'); + + if (!result) { + result = this.getMessageByIdWithRole(messageId, 'system'); + } + + if (!result) return; + const { message: msg } = result; + + try { + const allMessages = await conversationsStore.getConversationMessages(activeConv.id); + const rootMessage = allMessages.find((m) => m.type === 'root' && m.parent === null); + const isFirstUserMessage = + msg.role === 'user' && rootMessage && msg.parent === rootMessage.id; + + const parentId = msg.parent || rootMessage?.id; + if (!parentId) return; + + // Use newExtras if provided, otherwise copy existing extras + // Deep clone to avoid Proxy objects from Svelte reactivity + const extrasToUse = + newExtras !== undefined + ? JSON.parse(JSON.stringify(newExtras)) + : msg.extra + ? JSON.parse(JSON.stringify(msg.extra)) + : undefined; + + const newMessage = await DatabaseService.createMessageBranch( + { + convId: msg.convId, + type: msg.type, + timestamp: Date.now(), + role: msg.role, + content: newContent, + thinking: msg.thinking || '', + toolCalls: msg.toolCalls || '', + children: [], + extra: extrasToUse, + model: msg.model + }, + parentId + ); + await conversationsStore.updateCurrentNode(newMessage.id); + conversationsStore.updateConversationTimestamp(); + + if (isFirstUserMessage && newContent.trim()) { + await conversationsStore.updateConversationTitleWithConfirmation( + activeConv.id, + newContent.trim(), + conversationsStore.titleUpdateConfirmationCallback + ); + } + await conversationsStore.refreshActiveMessages(); + + if (msg.role === 'user') { + await this.generateResponseForMessage(newMessage.id); + } + } catch (error) { + console.error('Failed to edit message with branching:', error); + } + } + + async regenerateMessageWithBranching(messageId: string, modelOverride?: string): Promise { + const activeConv = conversationsStore.activeConversation; + if (!activeConv || this.isLoading) return; + try { + const idx = conversationsStore.findMessageIndex(messageId); + if (idx === -1) return; + const msg = conversationsStore.activeMessages[idx]; + if (msg.role !== 'assistant') return; + + const allMessages = await conversationsStore.getConversationMessages(activeConv.id); + const parentMessage = allMessages.find((m) => m.id === msg.parent); + if (!parentMessage) return; + + this.setChatLoading(activeConv.id, true); + this.clearChatStreaming(activeConv.id); + + const newAssistantMessage = await DatabaseService.createMessageBranch( + { + convId: activeConv.id, + type: 'text', + timestamp: Date.now(), + role: 'assistant', + content: '', + thinking: '', + toolCalls: '', + children: [], + model: null + }, + parentMessage.id + ); + await conversationsStore.updateCurrentNode(newAssistantMessage.id); + conversationsStore.updateConversationTimestamp(); + await conversationsStore.refreshActiveMessages(); + + const conversationPath = filterByLeafNodeId( + allMessages, + parentMessage.id, + false + ) as DatabaseMessage[]; + // Use modelOverride if provided, otherwise use the original message's model + // If neither is available, don't pass model (will use global selection) + const modelToUse = modelOverride || msg.model || undefined; + await this.streamChatCompletion( + conversationPath, + newAssistantMessage, + undefined, + undefined, + modelToUse + ); + } catch (error) { + if (!this.isAbortError(error)) + console.error('Failed to regenerate message with branching:', error); + this.setChatLoading(activeConv?.id || '', false); + } + } + + private async generateResponseForMessage(userMessageId: string): Promise { + const activeConv = conversationsStore.activeConversation; + + if (!activeConv) return; + + this.errorDialogState = null; + this.setChatLoading(activeConv.id, true); + this.clearChatStreaming(activeConv.id); + + try { + const allMessages = await conversationsStore.getConversationMessages(activeConv.id); + const conversationPath = filterByLeafNodeId( + allMessages, + userMessageId, + false + ) as DatabaseMessage[]; + const assistantMessage = await DatabaseService.createMessageBranch( + { + convId: activeConv.id, + type: 'text', + timestamp: Date.now(), + role: 'assistant', + content: '', + thinking: '', + toolCalls: '', + children: [], + model: null + }, + userMessageId + ); + conversationsStore.addMessageToActive(assistantMessage); + await this.streamChatCompletion(conversationPath, assistantMessage); + } catch (error) { + console.error('Failed to generate response:', error); + this.setChatLoading(activeConv.id, false); + } + } + + getAddFilesHandler(): ((files: File[]) => void) | null { + return this.addFilesHandler; + } + + public getAllLoadingChats(): string[] { + return Array.from(this.chatLoadingStates.keys()); + } + + public getAllStreamingChats(): string[] { + return Array.from(this.chatStreamingStates.keys()); + } + public getChatStreamingPublic( convId: string ): { response: string; messageId: string } | undefined { return this.getChatStreaming(convId); } - public getAllLoadingChats(): string[] { - return Array.from(this.chatLoadingStates.keys()); + + public isChatLoadingPublic(convId: string): boolean { + return this.isChatLoading(convId); } - public getAllStreamingChats(): string[] { - return Array.from(this.chatStreamingStates.keys()); + + isEditing(): boolean { + return this.isEditModeActive; + } + + setEditModeActive(handler: (files: File[]) => void): void { + this.isEditModeActive = true; + this.addFilesHandler = handler; } // ───────────────────────────────────────────────────────────────────────────── @@ -1416,13 +1462,17 @@ class ChatStore { export const chatStore = new ChatStore(); -export const isLoading = () => chatStore.isLoading; +export const activeProcessingState = () => chatStore.activeProcessingState; +export const clearEditMode = () => chatStore.clearEditMode(); export const currentResponse = () => chatStore.currentResponse; export const errorDialog = () => chatStore.errorDialogState; -export const activeProcessingState = () => chatStore.activeProcessingState; -export const isChatStreaming = () => chatStore.isStreaming(); - -export const isChatLoading = (convId: string) => chatStore.isChatLoadingPublic(convId); -export const getChatStreaming = (convId: string) => chatStore.getChatStreamingPublic(convId); +export const getAddFilesHandler = () => chatStore.getAddFilesHandler(); export const getAllLoadingChats = () => chatStore.getAllLoadingChats(); export const getAllStreamingChats = () => chatStore.getAllStreamingChats(); +export const getChatStreaming = (convId: string) => chatStore.getChatStreamingPublic(convId); +export const isChatLoading = (convId: string) => chatStore.isChatLoadingPublic(convId); +export const isChatStreaming = () => chatStore.isStreaming(); +export const isEditing = () => chatStore.isEditing(); +export const isLoading = () => chatStore.isLoading; +export const setEditModeActive = (handler: (files: File[]) => void) => + chatStore.setEditModeActive(handler);