diff --git a/common/arg.cpp b/common/arg.cpp
index 4d01e7c4a..d2578ff65 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2402,7 +2402,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
                 params.fit_params = false;
             } else {
                 throw std::runtime_error(
-                    string_format("error: unkown value for --fit: '%s'\n", value.c_str()));
+                    string_format("error: unknown value for --fit: '%s'\n", value.c_str()));
             }
         }
     ).set_env("LLAMA_ARG_FIT"));
diff --git a/common/common.h b/common/common.h
index a136d9391..74aa2c326 100644
--- a/common/common.h
+++ b/common/common.h
@@ -866,7 +866,7 @@ std::string common_detokenize(
 // Embedding utils
 //
 
-// TODO: repace embd_norm with an enum
+// TODO: replace embd_norm with an enum
 void common_embd_normalize(const float * inp, float * out, int n, int embd_norm);
 
 float common_embd_similarity_cos(const float * embd1, const float * embd2, int n);
diff --git a/common/console.cpp b/common/console.cpp
index 2ea178f81..a770416ab 100644
--- a/common/console.cpp
+++ b/common/console.cpp
@@ -80,6 +80,8 @@ namespace console {
     static termios      initial_state;
 #endif
 
+    static completion_callback completion_cb = nullptr;
+
     //
     // Init and cleanup
     //
@@ -493,7 +495,7 @@ namespace console {
     }
 
     static void set_line_contents(std::string new_line, std::string & line, std::vector<int> & widths, size_t & char_pos,
-                                  size_t & byte_pos) {
+                                  size_t & byte_pos, int cursor_byte_pos = -1) {
         move_to_line_start(char_pos, byte_pos, widths);
         clear_current_line(widths);
 
@@ -503,6 +505,7 @@ namespace console {
         char_pos = 0;
 
         size_t idx = 0;
+        int back_width = 0;
         while (idx < line.size()) {
             size_t advance = 0;
             char32_t cp = decode_utf8(line, idx, advance);
@@ -511,8 +514,15 @@ namespace console {
             if (real_width < 0) real_width = 0;
             widths.push_back(real_width);
             idx += advance;
-            ++char_pos;
-            byte_pos = idx;
+            if (cursor_byte_pos >= 0 && static_cast<size_t>(cursor_byte_pos) < idx) {
+                back_width += real_width;
+            } else {
+                ++char_pos;
+                byte_pos = idx;
+            }
+        }
+        if (cursor_byte_pos >= 0) {
+            move_cursor(-back_width);
         }
     }
 
@@ -784,6 +794,20 @@ namespace console {
                 break;
             }
 
+            if (completion_cb && input_char == '\t') {
+                auto candidates = completion_cb(line, byte_pos);
+
+                if (!candidates.empty()) {
+                    if (candidates.size() > 1 || candidates[0].first != line) {
+                        // TODO?: Display all candidates
+                        set_line_contents(candidates[0].first, line, widths, char_pos, byte_pos, candidates[0].second);
+                    } else {
+                        // TODO: Move cursor to new byte_pos
+                    }
+                    continue;
+                }
+            }
+
             if (input_char == (char32_t) WEOF || input_char == 0x04 /* Ctrl+D */) {
                 end_of_stream = true;
                 break;
@@ -1062,6 +1086,10 @@ namespace console {
         return readline_advanced(line, multiline_input);
     }
 
+    void set_completion_callback(completion_callback cb) {
+        completion_cb = cb;
+    }
+
     namespace spinner {
         static const char LOADING_CHARS[] = {'|', '/', '-', '\\'};
         static std::condition_variable cv_stop;
diff --git a/common/console.h b/common/console.h
index fad6d3953..72781bea6 100644
--- a/common/console.h
+++ b/common/console.h
@@ -4,7 +4,9 @@
 
 #include "common.h"
 
+#include <functional>
 #include <string>
+#include <vector>
 
 enum display_type {
     DISPLAY_TYPE_RESET = 0,
@@ -21,6 +23,9 @@ namespace console {
     void set_display(display_type display);
     bool readline(std::string & line, bool multiline_input);
 
+    using completion_callback = std::function<std::vector<std::pair<std::string, size_t>>(std::string_view, size_t)>;
+    void set_completion_callback(completion_callback cb);
+
     namespace spinner {
         void start();
         void stop();
diff --git a/common/debug.h b/common/debug.h
index 0c5596325..e563b40d6 100644
--- a/common/debug.h
+++ b/common/debug.h
@@ -18,7 +18,7 @@ template <bool abort_on_nan> void common_debug_print_tensor(uint8_t * data, ggml
 // prints tensors that are processed in the computation graph
 // by default prints all tensors, but can be configured by creating a `base_callback_data` instance with
 // non-empty filter_patterns. See examples/debug.ccp for possible usage patterns
-// The template parameter determins whether an error should be thrown whenever a NaN is encountered
+// The template parameter determines whether an error should be thrown whenever a NaN is encountered
 // in a tensor (useful for stopping debug sessions on first erroneous tensor)
 // The callback data will be passed as the third parameter (user_data)
 template <bool abort_on_nan> bool common_debug_cb_eval(struct ggml_tensor * t, bool ask, void * user_data);
diff --git a/common/jinja/README.md b/common/jinja/README.md
index 7059105ee..829124076 100644
--- a/common/jinja/README.md
+++ b/common/jinja/README.md
@@ -63,7 +63,7 @@ The llama.cpp Jinja engine introduces `jinja::string` (see `jinja/string.h`), wh
   - **One-to-many** (e.g., split): result is marked `is_input` **only if ALL** input parts are marked `is_input`
   - **Many-to-one** (e.g., join): same as one-to-many
 
-For string concatenation, string parts will be appended to the new string as-is, while perserving the `is_input` flag.
+For string concatenation, string parts will be appended to the new string as-is, while preserving the `is_input` flag.
 
 **Enabling Input Marking:**
 
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 095441739..083b5bca9 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -4031,7 +4031,7 @@ class Qwen2VLVisionModel(MmprojModel):
                 # split Conv3D into Conv2Ds
                 c1, c2, kt, kh, kw = data_torch.shape
                 del c1, c2, kh, kw  # unused
-                assert kt == 2, "Current implmentation only support temporal_patch_size of 2"
+                assert kt == 2, "Current implementation only support temporal_patch_size of 2"
                 yield (gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_ENC_EMBD_PATCH] + ".weight"  , data_torch[:, :, 0, ...])
                 yield (gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_ENC_EMBD_PATCH] + ".weight.1", data_torch[:, :, 1, ...])
             else:
@@ -4842,12 +4842,12 @@ class _LinearAttentionVReorderBase(Qwen3NextModel):
         yield from super().modify_tensors(data_torch, name, bid)
 
 
-@ModelBase.register("Qwen3_5ForConditionalGeneration")
+@ModelBase.register("Qwen3_5ForConditionalGeneration", "Qwen3_5ForCausalLM")
 class Qwen3_5TextModel(_LinearAttentionVReorderBase):
     model_arch = gguf.MODEL_ARCH.QWEN35
 
 
-@ModelBase.register("Qwen3_5MoeForConditionalGeneration")
+@ModelBase.register("Qwen3_5MoeForConditionalGeneration", "Qwen3_5MoeForCausalLM")
 class Qwen3_5MoeTextModel(_LinearAttentionVReorderBase):
     model_arch = gguf.MODEL_ARCH.QWEN35MOE
 
@@ -5404,7 +5404,7 @@ class KimiLinearModel(TextModel):
         # Get ssm_d_conv from linear_attn_config.short_conv_kernel_size or ssm_d_conv
         linear_attn_config = self.hparams["linear_attn_config"]
         # n_head == 0 for KDA layers, n_head > 0 for MLA layers
-        # full_attention_layers list will be used to distingush layer type
+        # full_attention_layers list will be used to distinguish layer type
         _num_kv_heads = list()
         _full_attn_layers = linear_attn_config["full_attn_layers"]
         for il in range(self.hparams["num_hidden_layers"]):
@@ -6505,7 +6505,7 @@ class Gemma3VisionModel(MmprojModel):
         super().set_gguf_parameters()
         hparams = self.hparams
         self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.GEMMA3)
-        # default values below are taken from HF tranformers code
+        # default values below are taken from HF transformers code
         self.gguf_writer.add_vision_attention_layernorm_eps(hparams.get("layer_norm_eps", 1e-6))
         self.gguf_writer.add_vision_use_gelu(True)
         # calculate proj_scale_factor (used by tinygemma3 test model)
@@ -7097,7 +7097,7 @@ class Rwkv7Model(TextModel):
 
             if bid == 0 and "time_mix_a" in new_name:
                 # dummy v0/v1/v2 on first layer
-                # easist way to make llama happy
+                # easiest way to make llama happy
                 yield (new_name.replace("time_mix_a", "time_mix_v"), data_torch)
 
             yield (new_name, data_torch)
@@ -9596,7 +9596,7 @@ class GraniteHybridModel(Mamba2Model, GraniteMoeModel):
         # NOTE: Explicitly include hparam prefix prefix for d_model to
         #   disambiguate with top-level head_dim
         # NOTE 2: If needed for future models, this can be isolated in a method
-        #   to separate the prefix setting and teh keys used
+        #   to separate the prefix setting and the keys used
         self.d_model = self.find_hparam([f"{self.hparam_prefixes[0]}_head_dim", "hidden_size", "d_model"])
         self.n_group = self.find_hparam(["n_groups", "num_groups"])
         self.d_inner = self.find_hparam(["expand", "num_heads"]) * self.d_model
@@ -9743,7 +9743,7 @@ class NemotronHModel(GraniteHybridModel):
         self.gguf_writer.add_value_length(self.head_dim)
 
         # Set feed_forward_length
-        # NOTE: This will trigger an override warning. This is preferrable to
+        # NOTE: This will trigger an override warning. This is preferable to
         #   duplicating all the parent logic
         if not self.is_moe:
             n_ff = self.find_hparam(["intermediate_size", "n_inner", "hidden_dim"])
diff --git a/examples/diffusion/README.md b/examples/diffusion/README.md
index f71d24131..b39420021 100644
--- a/examples/diffusion/README.md
+++ b/examples/diffusion/README.md
@@ -43,12 +43,12 @@ Choose one of the following scheduling methods:
 - `-b`: Batch size
 
 ### Examples
-#### Dream architechture:
+#### Dream architecture:
 ```
 llama-diffusion-cli -m dream7b.gguf -p "write code to train MNIST in pytorch" -ub 512 --diffusion-eps 0.001 --diffusion-algorithm 3 --diffusion-steps 256 --diffusion-visual
 ```
 
-#### LLaDA architechture:
+#### LLaDA architecture:
 ```
 llama-diffusion-cli -m llada-8b.gguf -p "write code to train MNIST in pytorch" -ub 512 --diffusion-block-length 32 --diffusion-steps 256 --diffusion-visual
 ```
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
index a9d177864..9fd3f7f32 100644
--- a/ggml/include/ggml-backend.h
+++ b/ggml/include/ggml-backend.h
@@ -259,7 +259,7 @@ extern "C" {
       Example usage:
 
         // operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned
-        // preferrably to run on the same backend as the buffer
+        // preferably to run on the same backend as the buffer
         ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
 
         sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false, true);
diff --git a/ggml/include/ggml-opt.h b/ggml/include/ggml-opt.h
index 4703a05af..1c2ed79b7 100644
--- a/ggml/include/ggml-opt.h
+++ b/ggml/include/ggml-opt.h
@@ -138,7 +138,7 @@ extern "C" {
     GGML_API ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params);
     GGML_API void ggml_opt_free(ggml_opt_context_t opt_ctx);
 
-    // set gradients to zero, initilize loss, and optionally reset the optimizer
+    // set gradients to zero, initialize loss, and optionally reset the optimizer
     GGML_API void ggml_opt_reset(ggml_opt_context_t opt_ctx, bool optimizer);
 
     GGML_API bool ggml_opt_static_graphs(ggml_opt_context_t opt_ctx); // whether the graphs are allocated_statically
diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index 0ad789ac8..e37944c4e 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -2593,7 +2593,7 @@ extern "C" {
         struct ggml_tensor *  grad,
         struct ggml_tensor *  sgd_params); // alpha, weight decay
 
-    // build forward mutiple tensors and select one of them for computing
+    // build forward multiple tensors and select one of them for computing
     // this is useful for creating graphs that have constant topology but compute different things based on the input
     // ref: https://github.com/ggml-org/llama.cpp/pull/18550
     //
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
index cc9196899..b3ab9ebb8 100644
--- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp
@@ -1462,6 +1462,10 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
         int split_backend_id = split->backend_id;
         ggml_backend_t split_backend = sched->backends[split_backend_id];
 
+        if (sched->events[split_backend_id][sched->cur_copy] == NULL) {
+            ggml_backend_synchronize(split_backend);
+        }
+
         // copy the input tensors to the split backend
         for (int input_id = 0; input_id < split->n_inputs; input_id++) {
             ggml_backend_t input_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[input_id]);
@@ -1472,16 +1476,12 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
                 // inputs from the user must be copied immediately to prevent the user overwriting the data before the copy is done
                 if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
                     ggml_backend_event_synchronize(sched->events[split_backend_id][sched->cur_copy]);
-                } else {
-                    ggml_backend_synchronize(split_backend);
                 }
-                ggml_backend_tensor_copy(input, input_cpy);
+                ggml_backend_tensor_copy_async(input_backend, split_backend, input, input_cpy);
             } else {
                 // wait for the split backend to finish using the input before overwriting it
                 if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
                     ggml_backend_event_wait(split_backend, sched->events[split_backend_id][sched->cur_copy]);
-                } else {
-                    ggml_backend_synchronize(split_backend);
                 }
 
                 // when offloading MoE weights, we can reduce the amount of data copied by copying only the experts that are used
@@ -1585,6 +1585,10 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
             }
         }
 
+        if (sched->events[split_backend_id][sched->cur_copy] == NULL) {
+            ggml_backend_synchronize(split_backend);
+        }
+
         if (!sched->callback_eval) {
             enum ggml_status ec = ggml_backend_graph_compute_async(split_backend, &split->graph);
             if (ec != GGML_STATUS_SUCCESS) {
diff --git a/ggml/src/ggml-cpu/arch/arm/quants.c b/ggml/src/ggml-cpu/arch/arm/quants.c
index b390ab61c..a707d6398 100644
--- a/ggml/src/ggml-cpu/arch/arm/quants.c
+++ b/ggml/src/ggml-cpu/arch/arm/quants.c
@@ -968,7 +968,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi
 
     const int vector_length = ggml_cpu_get_sve_cnt()*8;
 
-    //VLA Implemenation for SVE
+    //VLA Implementation for SVE
     switch (vector_length) {
         case 128:
             {
diff --git a/ggml/src/ggml-cpu/arch/arm/repack.cpp b/ggml/src/ggml-cpu/arch/arm/repack.cpp
index c2e4623f3..fea659ee5 100644
--- a/ggml/src/ggml-cpu/arch/arm/repack.cpp
+++ b/ggml/src/ggml-cpu/arch/arm/repack.cpp
@@ -706,7 +706,7 @@ void ggml_gemv_q4_K_8x8_q8_K(int                        n,
 
                 const uint8_t * q4_base = q4_ptr[b].qs + sb * QK_K;
 
-                // Load the 64 quants from q8K duplicated to use vecdots with the interelaved columns
+                // Load the 64 quants from q8K duplicated to use vecdots with the interleaved columns
                 // but still need the qs to use the low and hi bits from q4
                 const int8_t * q8_base = q8_ptr[b].qs + sb * 64;
                 int8x16_t      q8_qs[8];
@@ -3640,7 +3640,7 @@ void ggml_gemm_q4_K_8x8_q8_K(int                        n,
 
                 for (int b = 0; b < nb; b++) {
                     // bsums pairs belongs to the same q8_k subblock
-                    // 64 elemnts loaded and made sum of 0-7 and 8-15 sum || 16-23 and 24 - 31 sum
+                    // 64 elements loaded and made sum of 0-7 and 8-15 sum || 16-23 and 24 - 31 sum
                     const int16x8_t bsums[4]{
                         vpaddq_s16(vld1q_s16(q8_ptr[b].bsums + 16 * 0), vld1q_s16(q8_ptr[b].bsums + 16 * 0 + 8)),
                         vpaddq_s16(vld1q_s16(q8_ptr[b].bsums + 16 * 1), vld1q_s16(q8_ptr[b].bsums + 16 * 1 + 8)),
diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c
index 6147ea36f..2b5a352da 100644
--- a/ggml/src/ggml-cpu/ggml-cpu.c
+++ b/ggml/src/ggml-cpu/ggml-cpu.c
@@ -3305,7 +3305,7 @@ static bool ggml_thread_apply_priority(int32_t prio) {
 #ifndef USE_FAILSAFE
     if (prio != GGML_SCHED_PRIO_LOW) {
         // Tell Windows that this thread should not be throttled (needs its own CPU core).
-        // Newer Windows 11 versions aggresively park (offline) CPU cores and often place
+        // Newer Windows 11 versions aggressively park (offline) CPU cores and often place
         // all our threads onto the first 4 cores which results in terrible performance with
         // n_threads > 4
         #if _WIN32_WINNT >= 0x602
diff --git a/ggml/src/ggml-cpu/llamafile/sgemm.cpp b/ggml/src/ggml-cpu/llamafile/sgemm.cpp
index da412fd00..5fd452a03 100644
--- a/ggml/src/ggml-cpu/llamafile/sgemm.cpp
+++ b/ggml/src/ggml-cpu/llamafile/sgemm.cpp
@@ -533,7 +533,7 @@ class tinyBLAS {
         if constexpr (RN > 1) {
             return mnpack<RM, RN-1, BM>(m, n, SIZE_N, BN);
         } else {
-            GGML_LOG_ERROR("mnpack<%d, %d> bloc size not supported\n", RM, (int)SIZE_N);
+            GGML_LOG_ERROR("mnpack<%d, %d> block size not supported\n", RM, (int)SIZE_N);
             GGML_ASSERT(false); // we have miss something.
         }
     }
@@ -711,7 +711,7 @@ class tinyBLAS_RVV {
         if constexpr (RN > 1) {
             return mnpack<RM, RN-1, BM>(m, n, SIZE_N, BN);
         } else {
-            GGML_LOG_ERROR("mnpack<%d, %d> bloc size not supported\n", RM, (int)SIZE_N);
+            GGML_LOG_ERROR("mnpack<%d, %d> block size not supported\n", RM, (int)SIZE_N);
             GGML_ASSERT(false); // we have miss something.
         }
     }
diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp
index b7a70e06f..ca1b3059b 100644
--- a/ggml/src/ggml-cpu/ops.cpp
+++ b/ggml/src/ggml-cpu/ops.cpp
@@ -375,7 +375,7 @@ static void ggml_compute_forward_dup_bytes(
         const size_t rs = ne00 * type_size;
 
         if (nb00 == type_size) {
-            // src0 is contigous on first dimension, copy by rows
+            // src0 is contiguous on first dimension, copy by rows
             for (int64_t i03 = 0; i03 < ne03; i03++) {
                 for (int64_t i02 = 0; i02 < ne02; i02++) {
                     id += rs * ir0;
@@ -1795,7 +1795,7 @@ void ggml_compute_forward_repeat(
             {
                 ggml_compute_forward_repeat_f32(params, dst);
             } break;
-        // TODO: templateify the implemenation and support for I64
+        // TODO: templateify the implementation and support for I64
         //       ref https://github.com/ggml-org/llama.cpp/pull/14274#discussion_r2169492225
         //case GGML_TYPE_I64:
         //    {
diff --git a/ggml/src/ggml-cpu/repack.cpp b/ggml/src/ggml-cpu/repack.cpp
index 303ff8e8e..600df25e4 100644
--- a/ggml/src/ggml-cpu/repack.cpp
+++ b/ggml/src/ggml-cpu/repack.cpp
@@ -2605,7 +2605,7 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
             case GGML_OP_MUL_MAT_ID:
                 {
                     size = ggml_row_size(PARAM_TYPE, ggml_nelements(op->src[1]));
-                    size = GGML_PAD(size, sizeof(int64_t)); // + padding for next bloc.
+                    size = GGML_PAD(size, sizeof(int64_t)); // + padding for next block.
 
                     const int64_t ne02 = op->src[0]->ne[2]; // n_as, n_expert
                     const int64_t ne12 = op->src[1]->ne[2]; // n_tokens
@@ -2870,7 +2870,7 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
         auto * wdata          = (char *)params->wdata;
         auto * wdata_src1_end = (char *)wdata + GGML_PAD(nbw3, sizeof(int64_t));
 
-        // total of [n_as][ne12 + 1] elemets of type mmid_row_mapping (2*int32_t = int64_t)
+        // total of [n_as][ne12 + 1] elements of type mmid_row_mapping (2*int32_t = int64_t)
         auto * matrix_row_counts = (int64_t *) (wdata_src1_end);                                        // [n_as]
         struct mmid_row_mapping * matrix_rows = (struct mmid_row_mapping *) (matrix_row_counts + n_as); // [n_as][ne12]
 
diff --git a/ggml/src/ggml-cuda/fattn-mma-f16.cuh b/ggml/src/ggml-cuda/fattn-mma-f16.cuh
index beb7e32e4..fff70c8eb 100644
--- a/ggml/src/ggml-cuda/fattn-mma-f16.cuh
+++ b/ggml/src/ggml-cuda/fattn-mma-f16.cuh
@@ -1215,7 +1215,7 @@ static __device__ __forceinline__ void flash_attn_ext_f16_process_tile(
     }
 
     // If attention sinks are used, potentially re-scale if KQ_max is small.
-    // Also add the sink as a value to KQ_rowsum, this is done after synchonization of KQ_rowsum
+    // Also add the sink as a value to KQ_rowsum, this is done after synchronization of KQ_rowsum
     //     so it's being done unconditionally for every thread.
     if (!is_fixup && (np == 1 || threadIdx.y % np == 0) && sinks_f) {
         float KQ_max_scale[cols_per_thread];
diff --git a/ggml/src/ggml-cuda/fattn-vec.cuh b/ggml/src/ggml-cuda/fattn-vec.cuh
index 3f4a78cc6..7cbe32633 100644
--- a/ggml/src/ggml-cuda/fattn-vec.cuh
+++ b/ggml/src/ggml-cuda/fattn-vec.cuh
@@ -10,7 +10,7 @@ static constexpr __device__ int ggml_cuda_fattn_vec_get_nthreads_device() {
     return 128;
 }
 
-// Currenlty llvm with the amdgcn target does not support unrolling loops
+// Currently llvm with the amdgcn target does not support unrolling loops
 // that contain a break that can not be resolved at compile time.
 #ifdef __clang__
 #pragma clang diagnostic push
diff --git a/ggml/src/ggml-cuda/fattn-wmma-f16.cuh b/ggml/src/ggml-cuda/fattn-wmma-f16.cuh
index cd3bfd405..aaf711a61 100644
--- a/ggml/src/ggml-cuda/fattn-wmma-f16.cuh
+++ b/ggml/src/ggml-cuda/fattn-wmma-f16.cuh
@@ -18,7 +18,7 @@
 #if defined(RDNA4) && ROCWMMA_VERSION_MAJOR > 1
 #define GGML_USE_WMMA_FATTN
 #elif defined(RDNA4)
-#warning "rocwmma fattn is not suported on RDNA4 on rocwmma < v2.0.0, expect degraded performance"
+#warning "rocwmma fattn is not supported on RDNA4 on rocwmma < v2.0.0, expect degraded performance"
 #endif // defined(RDNA4) && ROCWMMA_VERSION_MAJOR > 1
 #endif // defined(GGML_HIP_ROCWMMA_FATTN)
 
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
index 89d2c35e5..9575746ba 100644
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -2815,11 +2815,14 @@ static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_src, ggml_
     ggml_backend_buffer_t buf_src = src->view_src ? src->view_src->buffer : src->buffer;
     ggml_backend_buffer_t buf_dst = dst->view_src ? dst->view_src->buffer : dst->buffer;
 
-    if (!ggml_backend_is_cuda(backend_src) || !ggml_backend_is_cuda(backend_dst)) {
+    //enables async copies from CPU to CUDA, instead of only CUDA-to-CUDA
+    bool copy_from_host = ggml_backend_buffer_is_host(buf_src) && ggml_backend_dev_type(backend_src->device) == GGML_BACKEND_DEVICE_TYPE_CPU;
+
+    if (!(copy_from_host || ggml_backend_is_cuda(backend_src)) || !ggml_backend_is_cuda(backend_dst)) {
         return false;
     }
 
-    if (!ggml_backend_buffer_is_cuda(src->buffer) || !ggml_backend_buffer_is_cuda(dst->buffer)) {
+    if (!(copy_from_host || ggml_backend_buffer_is_cuda(buf_src)) || !ggml_backend_buffer_is_cuda(dst->buffer)) {
         return false;
     }
 
@@ -2830,14 +2833,17 @@ static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_src, ggml_
     ggml_backend_cuda_buffer_context * buf_ctx_src = (ggml_backend_cuda_buffer_context *)buf_src->context;
     ggml_backend_cuda_buffer_context * buf_ctx_dst = (ggml_backend_cuda_buffer_context *)buf_dst->context;
 
-    if (cuda_ctx_src->device != buf_ctx_src->device || cuda_ctx_dst->device != buf_ctx_dst->device) {
+    if ((copy_from_host && cuda_ctx_dst->device != buf_ctx_dst->device) ||
+        !copy_from_host && (cuda_ctx_src->device != buf_ctx_src->device || cuda_ctx_dst->device != buf_ctx_dst->device)) {
 #ifndef NDEBUG
         GGML_LOG_DEBUG("%s: backend and buffer devices do not match\n", __func__);
 #endif
         return false;
     }
 
-    if (backend_src != backend_dst) {
+    if (copy_from_host) {
+        CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyHostToDevice, cuda_ctx_dst->stream()));
+    } else if (backend_src != backend_dst) {
         // copy on src stream
         if (cuda_ctx_src->device == cuda_ctx_dst->device) {
             CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyDeviceToDevice, cuda_ctx_src->stream()));
@@ -3346,7 +3352,7 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph *                cgraph,
             return false;
         }
 
-        //rms_norm kernel assumes contigous rows
+        //rms_norm kernel assumes contiguous rows
         if (!ggml_is_contiguous_rows(mul->src[0]) || !ggml_is_contiguous_rows(mul->src[1])) {
             return false;
         }
diff --git a/ggml/src/ggml-cuda/quantize.cu b/ggml/src/ggml-cuda/quantize.cu
index a8c68e44b..4300ffc14 100644
--- a/ggml/src/ggml-cuda/quantize.cu
+++ b/ggml/src/ggml-cuda/quantize.cu
@@ -235,7 +235,7 @@ static __global__ void quantize_mmq_q8_1(
     q.z = roundf(xi.z*d_inv);
     q.w = roundf(xi.w*d_inv);
 
-    // Write back 4 int8 values as a single 32 bit value for better memroy bandwidth:
+    // Write back 4 int8 values as a single 32 bit value for better memory bandwidth:
     char4 * yqs4 = (char4 *) y[ib].qs;
     yqs4[iqs/4] = q;
 
diff --git a/ggml/src/ggml-cuda/softmax.cu b/ggml/src/ggml-cuda/softmax.cu
index dc06d0693..285c0e954 100644
--- a/ggml/src/ggml-cuda/softmax.cu
+++ b/ggml/src/ggml-cuda/softmax.cu
@@ -46,7 +46,7 @@ struct soft_max_params {
 };
 
 // When ncols_template == 0 the bounds for the loops in this function are not known and can't be unrolled.
-// As we want to keep pragma unroll for all other cases we supress the clang transformation warning here.
+// As we want to keep pragma unroll for all other cases we suppress the clang transformation warning here.
 #ifdef __clang__
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wpass-failed"
diff --git a/ggml/src/ggml-cuda/solve_tri.cu b/ggml/src/ggml-cuda/solve_tri.cu
index 177ffc268..07ca33f51 100644
--- a/ggml/src/ggml-cuda/solve_tri.cu
+++ b/ggml/src/ggml-cuda/solve_tri.cu
@@ -83,7 +83,7 @@ static void solve_tri_f32_cublas(ggml_backend_cuda_context & ctx,
 // ======================
 // When ncols_template == 0 the bounds for the loops in this function are not
 // known and can't be unrolled. As we want to keep pragma unroll for all other
-// cases we supress the clang transformation warning here.
+// cases we suppress the clang transformation warning here.
 #ifdef __clang__
 #    pragma clang diagnostic push
 #    pragma clang diagnostic ignored "-Wpass-failed"
diff --git a/ggml/src/ggml-metal/ggml-metal-device.m b/ggml/src/ggml-metal/ggml-metal-device.m
index 802d3dea0..540479500 100644
--- a/ggml/src/ggml-metal/ggml-metal-device.m
+++ b/ggml/src/ggml-metal/ggml-metal-device.m
@@ -1287,7 +1287,7 @@ struct ggml_metal_buffer {
     bool use_residency_sets;
 
     // optional MTLResidencySet
-    // note: cannot use explicity "id<MTLResidencySet>" here because it is not available on certain OSes
+    // note: cannot use explicitly "id<MTLResidencySet>" here because it is not available on certain OSes
     id rset;
 
     // pointers to global device
diff --git a/ggml/src/ggml-metal/ggml-metal-ops.cpp b/ggml/src/ggml-metal/ggml-metal-ops.cpp
index 3d5db0b79..b3390352f 100644
--- a/ggml/src/ggml-metal/ggml-metal-ops.cpp
+++ b/ggml/src/ggml-metal/ggml-metal-ops.cpp
@@ -631,7 +631,7 @@ int ggml_metal_op_acc(ggml_metal_op_t ctx, int idx) {
     const bool inplace = (bool) ((const int32_t *) op->op_params)[4];
 
     if (!inplace) {
-        // run a separete kernel to cpy src->dst
+        // run a separate kernel to cpy src->dst
         // not sure how to avoid this
         // TODO: make a simpler cpy_bytes kernel
 
@@ -1644,7 +1644,7 @@ int ggml_metal_op_set(ggml_metal_op_t ctx, int idx) {
     const bool inplace = (bool) ((const int32_t *) op->op_params)[4];
 
     if (!inplace) {
-        // run a separete kernel to cpy src->dst
+        // run a separate kernel to cpy src->dst
         // not sure how to avoid this
         // TODO: make a simpler cpy_bytes kernel
 
@@ -2005,7 +2005,7 @@ int ggml_metal_op_mul_mat(ggml_metal_op_t ctx, int idx) {
         const int16_t r0ptg  = nypsg*nsg;         // num src0 rows per threadgroup
               int16_t r1ptg  = 4;                 // num src1 rows per threadgroup
 
-        // note: not sure how optimal are those across all different hardware. there might be someting cleverer
+        // note: not sure how optimal are those across all different hardware. there might be something cleverer
         switch (ne11) {
             case 2:
                 r1ptg = 2; break;
diff --git a/ggml/src/ggml-metal/ggml-metal.cpp b/ggml/src/ggml-metal/ggml-metal.cpp
index 1c705362f..9382ce53b 100644
--- a/ggml/src/ggml-metal/ggml-metal.cpp
+++ b/ggml/src/ggml-metal/ggml-metal.cpp
@@ -14,7 +14,7 @@
 #define GGML_METAL_MAX_DEVICES 16
 
 // number of Metal devices
-// note: can be overriden with GGML_METAL_DEVICES env to simulate virtual devices
+// note: can be overridden with GGML_METAL_DEVICES env to simulate virtual devices
 static int g_devices = 1;
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal
index 6c349aa0c..a58e641ad 100644
--- a/ggml/src/ggml-metal/ggml-metal.metal
+++ b/ggml/src/ggml-metal/ggml-metal.metal
@@ -4218,7 +4218,7 @@ kernel void kernel_im2col(
 template [[host_name("kernel_im2col_f32")]] kernel im2col_t kernel_im2col<float>;
 template [[host_name("kernel_im2col_f16")]] kernel im2col_t kernel_im2col<half>;
 
-// TODO: obolete -- remove
+// TODO: obsolete -- remove
 //typedef void (im2col_ext_t)(
 //        constant ggml_metal_kargs_im2col & args,
 //        device const float * x,
diff --git a/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl b/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl
new file mode 100644
index 000000000..a22d245d2
--- /dev/null
+++ b/ggml/src/ggml-webgpu/wgsl-shaders/concat.wgsl
@@ -0,0 +1,75 @@
+struct Params {
+    ne: u32,
+
+    offset_src0: u32,
+    offset_src1: u32,
+    offset_dst: u32,
+
+    stride_src0_0: u32,
+    stride_src0_1: u32,
+    stride_src0_2: u32,
+    stride_src0_3: u32,
+
+    stride_src1_0: u32,
+    stride_src1_1: u32,
+    stride_src1_2: u32,
+    stride_src1_3: u32,
+
+    ne0: u32,
+    ne1: u32,
+    ne2: u32,
+    ne3: u32,
+
+    dim: u32,
+    src0_nedim: u32
+};
+
+#ifdef TYPE_F32
+#define DataType f32
+#endif
+#ifdef TYPE_I32
+#define DataType i32
+#endif
+
+@group(0) @binding(0)
+var<storage, read_write> src0: array<DataType>;
+
+@group(0) @binding(1)
+var<storage, read_write> src1 : array<DataType>;
+
+@group(0) @binding(2)
+var<storage, read_write> dst: array<DataType>;
+
+@group(0) @binding(3)
+var<uniform> params: Params;
+
+@compute @workgroup_size(WG_SIZE)
+fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
+
+    if (gid.x < params.ne) {
+        var i = gid.x;
+        let i3 = i / (params.ne2 * params.ne1 * params.ne0);
+        i = i % (params.ne2 * params.ne1 * params.ne0);
+        let i2 = i / (params.ne1 * params.ne0);
+        i = i % (params.ne1 * params.ne0);
+        let i1 = i / params.ne0;
+        let i0 = i % params.ne0;
+
+        var ni = array<u32, 4>(i0, i1, i2, i3);
+
+        if (ni[params.dim] < params.src0_nedim) {
+            let src_i = ni[0] * params.stride_src0_0 +
+                             ni[1] * params.stride_src0_1 +
+                             ni[2] * params.stride_src0_2 +
+                             ni[3] * params.stride_src0_3;
+            dst[params.offset_dst + gid.x] = src0[params.offset_src0 + src_i];
+        } else {
+            ni[params.dim] -= params.src0_nedim;
+            let src_i = ni[0] * params.stride_src1_0 +
+                             ni[1] * params.stride_src1_1 +
+                             ni[2] * params.stride_src1_2 +
+                             ni[3] * params.stride_src1_3;
+            dst[params.offset_dst + gid.x] = src1[params.offset_src1 + src_i];
+        }
+    }
+}
diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py
index e0d478ce9..e954644e2 100644
--- a/gguf-py/gguf/metadata.py
+++ b/gguf-py/gguf/metadata.py
@@ -186,7 +186,7 @@ class Metadata:
         # Quick hack to fix the Norway problem
         # https://hitchdev.com/strictyaml/why/implicit-typing-removed/
         yaml_content = yaml_content.replace("- no\n", "- \"no\"\n")
-        # yaml should use 2 spaces insted of tab
+        # yaml should use 2 spaces instead of tab
         # this issue has came up with the Qwen/Qwen3-235B-A22B-Instruct-2507 model card
         #    (I've also sent a pr tp fix the modelcard too)
         yaml_content = yaml_content.replace("\t", "  ")
diff --git a/gguf-py/tests/test_metadata.py b/gguf-py/tests/test_metadata.py
index 40d484f4e..b77c563ff 100755
--- a/gguf-py/tests/test_metadata.py
+++ b/gguf-py/tests/test_metadata.py
@@ -164,7 +164,7 @@ class TestMetadataMethod(unittest.TestCase):
         self.assertEqual(gguf.Metadata.get_model_id_components("Llama-3-Instruct-abliteration-LoRA-8B"),
                          ('Llama-3-Instruct-abliteration-LoRA-8B', None, 'Llama-3', 'Instruct-abliteration-LoRA', None, '8B'))
 
-        # Negative size --> output is a LoRA adaper --> prune "LoRA" out of the name to avoid redundancy with the suffix
+        # Negative size --> output is a LoRA adapter --> prune "LoRA" out of the name to avoid redundancy with the suffix
         self.assertEqual(gguf.Metadata.get_model_id_components("Llama-3-Instruct-abliteration-LoRA-8B", -1234),
                          ('Llama-3-Instruct-abliteration-LoRA-8B', None, 'Llama-3', 'Instruct-abliteration', None, '8B'))
 
diff --git a/include/llama.h b/include/llama.h
index f273a3b11..90bf25395 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -976,7 +976,7 @@ extern "C" {
 
     // Logits for the ith token. For positive indices, Equivalent to:
     // llama_get_logits(ctx) + ctx->output_ids[i]*n_vocab
-    // Negative indicies can be used to access logits in reverse order, -1 is the last logit.
+    // Negative indices can be used to access logits in reverse order, -1 is the last logit.
     // returns NULL for invalid ids.
     LLAMA_API float * llama_get_logits_ith(struct llama_context * ctx, int32_t i);
 
@@ -991,7 +991,7 @@ extern "C" {
 
     // Get the embeddings for the ith token. For positive indices, Equivalent to:
     // llama_get_embeddings(ctx) + ctx->output_ids[i]*n_embd
-    // Negative indicies can be used to access embeddings in reverse order, -1 is the last embedding.
+    // Negative indices can be used to access embeddings in reverse order, -1 is the last embedding.
     // shape: [n_embd] (1-dimensional)
     // returns NULL for invalid ids.
     LLAMA_API float * llama_get_embeddings_ith(struct llama_context * ctx, int32_t i);
@@ -1011,9 +1011,9 @@ extern "C" {
     // Returns LLAMA_TOKEN_NULL if no token was sampled.
     LLAMA_API llama_token llama_get_sampled_token_ith(struct llama_context * ctx, int32_t i);
 
-    // Get the backend sampled probabilites for the ith token
+    // Get the backend sampled probabilities for the ith token
     // The index matches llama_get_sampled_token_ith().
-    // Returns NULL if no probabilites were generated.
+    // Returns NULL if no probabilities were generated.
     LLAMA_API float *  llama_get_sampled_probs_ith      (struct llama_context * ctx, int32_t i);
     LLAMA_API uint32_t llama_get_sampled_probs_count_ith(struct llama_context * ctx, int32_t i);
 
@@ -1340,7 +1340,7 @@ extern "C" {
                                float   tau,
                                float   eta);
 
-    /// @details Intializes a GBNF grammar, see grammars/README.md for details.
+    /// @details Initializes a GBNF grammar, see grammars/README.md for details.
     /// @param vocab The vocabulary that this grammar will be used with.
     /// @param grammar_str The production rules for the grammar, encoded as a string. Returns an empty grammar if empty. Returns NULL if parsing of grammar_str fails.
     /// @param grammar_root The name of the start symbol for the grammar.
diff --git a/scripts/snapdragon/windows/run-completion.ps1 b/scripts/snapdragon/windows/run-completion.ps1
new file mode 100644
index 000000000..8a48d2d74
--- /dev/null
+++ b/scripts/snapdragon/windows/run-completion.ps1
@@ -0,0 +1,53 @@
+
+#!/usr/bin/env pwsh
+
+# Basedir on device
+$basedir=".\pkg-snapdragon"
+
+$cli_opts=$args
+
+$model="Llama-3.2-3B-Instruct-Q4_0.gguf"
+if ($null -ne $env:M) {
+    $model=$env:M
+}
+
+$device="HTP0"
+if ($null -ne $env:D) {
+    $device=$env:D
+}
+
+if ($null -ne $env:V) {
+    $env:GGML_HEXAGON_VERBOSE=$env:V
+}
+
+if ($null -ne $env:E) {
+    $env:GGML_HEXAGON_EXPERIMENTAL=$env:E
+}
+
+if ($null -ne $env:SCHED) {
+    $env:GGML_SCHED_DEBUG=$env:SCHED; $cli_opts="$cli_opts -v"
+}
+
+if ($null -ne $env:PROF) {
+    $env:GGML_HEXAGON_PROFILE=$env:PROF; $env:GGML_HEXAGON_OPSYNC=1
+}
+
+if ($null -ne $env:OPMASK) {
+    $env:GGML_HEXAGON_OPMASK=$env:OPMASK
+}
+
+if ($null -ne $env:NHVX) {
+    $env:GGML_HEXAGON_NHVX=$env:NHVX
+}
+
+if ($null -ne $env:NDEV) {
+    $env:GGML_HEXAGON_NDEV=$env:NDEV
+}
+
+$env:ADSP_LIBRARY_PATH="$basedir\lib"
+
+& "$basedir\bin\llama-completion.exe" `
+    --no-mmap -m $basedir\..\..\gguf\$model `
+    --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 `
+    --ctx-size 8192 --batch-size 128 -fa on `
+    -ngl 99 -no-cnv --device $device $cli_opts
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index 7609e56cc..ff78c8cfc 100644
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -161,7 +161,7 @@ llama_context::llama_context(
     cparams.op_offload = params.op_offload;
     cparams.kv_unified = params.kv_unified;
 
-    // intialized later
+    // initialized later
     cparams.pipeline_parallel = false;
 
     {
@@ -1991,7 +1991,7 @@ ggml_cgraph * llama_context::graph_reserve(
 
     ggml_backend_sched_reset(sched.get());
 
-    // when the scheduler is reset, we cannnot reuse the old graph, so we reset the previous graph result to prevent that
+    // when the scheduler is reset, we cannot reuse the old graph, so we reset the previous graph result to prevent that
     gf_res_prev->reset();
 
     // store the n_outputs as it is, and restore it afterwards
diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp
index 23a86ea29..b8126ce50 100644
--- a/src/llama-graph.cpp
+++ b/src/llama-graph.cpp
@@ -1616,7 +1616,7 @@ ggml_tensor * llm_graph_context::build_inp_attn_scale() const {
 ggml_tensor * llm_graph_context::build_inp_out_ids() const {
     // note: when all tokens are output, we could skip this optimization to spare the ggml_get_rows() calls,
     //       but this would make the graph topology depend on the number of output tokens, which can interere with
-    //       features that require constant topology such as pipline parallelism
+    //       features that require constant topology such as pipeline parallelism
     //       ref: https://github.com/ggml-org/llama.cpp/pull/14275#issuecomment-2987424471
     //if (n_outputs < n_tokens) {
     //    return nullptr;
@@ -1779,7 +1779,7 @@ ggml_tensor * llm_graph_context::build_attn_mha(
         if (v_mla) {
 #if 0
             // v_mla can be applied as a matrix-vector multiplication with broadcasting across dimension 3 == n_tokens.
-            // However, the code is optimized for dimensions 0 and 1 being large, so this is ineffient.
+            // However, the code is optimized for dimensions 0 and 1 being large, so this is inefficient.
             cur = ggml_reshape_4d(ctx0, cur, v_mla->ne[0], 1, n_head, n_tokens);
             cur = ggml_mul_mat(ctx0, v_mla, cur);
 #else
diff --git a/src/llama-kv-cache.cpp b/src/llama-kv-cache.cpp
index 69b4cc72f..9c3084060 100644
--- a/src/llama-kv-cache.cpp
+++ b/src/llama-kv-cache.cpp
@@ -583,7 +583,7 @@ llama_kv_cache::slot_info_vec_t llama_kv_cache::prepare(const std::vector<llama_
             break;
         }
 
-        // remeber the position that we found
+        // remember the position that we found
         res.push_back(sinfo_new);
 
         // store the old state of the cells in the recovery stack
@@ -1293,7 +1293,7 @@ static void set_input_kq_mask_impl(const args_set_input_kq_mask & args, float *
     }
 
     for (uint32_t s = 0; s < n_stream; ++s) {
-        // bookeeping of the KQ mask cells that could change for other tokens of the same sequence
+        // bookkeeping of the KQ mask cells that could change for other tokens of the same sequence
         std::unordered_map<llama_seq_id, uint32_t>              seq_srct;
         std::unordered_map<llama_seq_id, std::vector<uint32_t>> seq_idxs;
 
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
index b78423a60..414e60b9a 100644
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -175,6 +175,7 @@ const char * llm_type_name(llm_type type) {
         case LLM_TYPE_0_3B:          return "0.3B";
         case LLM_TYPE_0_5B:          return "0.5B";
         case LLM_TYPE_0_6B:          return "0.6B";
+        case LLM_TYPE_0_8B:          return "0.8B";
         case LLM_TYPE_1B:            return "1B";
         case LLM_TYPE_1_2B:          return "1.2B";
         case LLM_TYPE_1_3B:          return "1.3B";
@@ -246,12 +247,14 @@ const char * llm_type_name(llm_type type) {
         case LLM_TYPE_100B_A6B:      return "100B.A6B";
         case LLM_TYPE_102B_A12B:     return "102B.A12B";
         case LLM_TYPE_106B_A12B:     return "106B.A12B";
+        case LLM_TYPE_122B_A10B:     return "122B.A10B";
         case LLM_TYPE_196B_A11B:     return "196B.A11B";
         case LLM_TYPE_230B_A10B:     return "230B.A10B";
         case LLM_TYPE_235B_A22B:     return "235B.A22B";
         case LLM_TYPE_300B_A47B:     return "300B.A47B";
         case LLM_TYPE_310B_A15B:     return "310B.A15B";
         case LLM_TYPE_355B_A32B:     return "355B.A32B";
+        case LLM_TYPE_397B_A17B:     return "397B.A17B";
         case LLM_TYPE_744B_A40B:     return "744B.A40B";
         case LLM_TYPE_E2B:           return "E2B";
         case LLM_TYPE_E4B:           return "E4B";
@@ -1638,7 +1641,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
                 }
 
                 switch (hparams.n_layer) {
-                    // TODO: Jamba layers are a bit heterogenous, so naming this is hard.
+                    // TODO: Jamba layers are a bit heterogeneous, so naming this is hard.
                     case 12: // 900M  8x???M
                     case 32: // 51B  16x?B
                     default: type = LLM_TYPE_UNKNOWN;
@@ -2642,7 +2645,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
                 }
 
                 switch (hparams.n_layer) {
-                    case 24: type = LLM_TYPE_2B; break;
+                    case 24: type = hparams.n_embd == 1024 ? LLM_TYPE_0_8B : LLM_TYPE_2B; break;
+                    case 32: type = hparams.n_embd == 2560 ? LLM_TYPE_4B : LLM_TYPE_9B; break;
+                    case 64: type = LLM_TYPE_27B; break;
                     default: type = LLM_TYPE_UNKNOWN;
                 }
             } break;
@@ -2671,8 +2676,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
                 }
 
                 switch (hparams.n_layer) {
-                    case 28: type = LLM_TYPE_35B_A3B; break;
-                    case 48: type = LLM_TYPE_80B_A3B; break;
+                    case 40: type = LLM_TYPE_35B_A3B; break;
+                    case 48: type = LLM_TYPE_122B_A10B; break;
+                    case 60: type = LLM_TYPE_397B_A17B; break;
                     default: type = LLM_TYPE_UNKNOWN;
                 }
             } break;
diff --git a/src/llama-model.h b/src/llama-model.h
index d7c3e7d1c..5ecb8344a 100644
--- a/src/llama-model.h
+++ b/src/llama-model.h
@@ -54,6 +54,7 @@ enum llm_type {
     LLM_TYPE_0_3B,
     LLM_TYPE_0_5B,
     LLM_TYPE_0_6B,
+    LLM_TYPE_0_8B,
     LLM_TYPE_1B,
     LLM_TYPE_1_2B,
     LLM_TYPE_1_3B,
@@ -125,12 +126,14 @@ enum llm_type {
     LLM_TYPE_100B_A6B,
     LLM_TYPE_102B_A12B, // Solar-Open
     LLM_TYPE_106B_A12B, // GLM-4.5-Air
+    LLM_TYPE_122B_A10B, // Qwen3.5
     LLM_TYPE_196B_A11B, // Step3.5-Flash
     LLM_TYPE_230B_A10B, // Minimax M2
     LLM_TYPE_235B_A22B,
     LLM_TYPE_300B_A47B, // Ernie MoE big
     LLM_TYPE_310B_A15B, // /MiMo-V2-Flash
     LLM_TYPE_355B_A32B, // GLM-4.5
+    LLM_TYPE_397B_A17B, // Qwen3.5
     LLM_TYPE_744B_A40B, // GLM-5
     LLM_TYPE_E2B,
     LLM_TYPE_E4B,
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index 9cf9a1d17..477d2d32c 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -2069,7 +2069,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                 const char * pc = (const char *) gguf_get_arr_data(ctx, precompiled_charsmap_keyidx);
                 precompiled_charsmap.assign(pc, pc + n_precompiled_charsmap);
 #if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-                // correct endiannes of data in precompiled_charsmap binary blob
+                // correct endianness of data in precompiled_charsmap binary blob
                 uint32_t * xcda_blob_size = (uint32_t *) &precompiled_charsmap[0];
                 *xcda_blob_size = __builtin_bswap32(*xcda_blob_size);
                 assert(*xcda_blob_size + sizeof(uint32_t) < n_precompiled_charsmap);
diff --git a/src/models/deepseek2.cpp b/src/models/deepseek2.cpp
index b608396e5..be81709c5 100644
--- a/src/models/deepseek2.cpp
+++ b/src/models/deepseek2.cpp
@@ -146,7 +146,7 @@ llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_gr
                     cb(Qcur, "Qcur_attn_temp_scaled", il);
                 }
 
-                // note: MLA with the absorption optimzation converts into MQA (ie: GQA with 1 group)
+                // note: MLA with the absorption optimization converts into MQA (ie: GQA with 1 group)
                 cur = build_attn(inp_attn_k,
                         model.layers[il].wo, NULL,
                         Qcur, Kcur, Vcur, nullptr, nullptr, model.layers[il].wv_b, kq_scale, il);
diff --git a/src/models/models.h b/src/models/models.h
index 0712d03d8..cf9ba04e7 100644
--- a/src/models/models.h
+++ b/src/models/models.h
@@ -3,7 +3,7 @@
 #include "llama-model.h"
 #include "llama-graph.h"
 
-// note: almost all graphs require atleast sqrtf, so include cmath globally
+// note: almost all graphs require at least sqrtf, so include cmath globally
 #include <cmath>
 
 //
diff --git a/src/unicode.cpp b/src/unicode.cpp
index 1475b53b6..122c8ca04 100644
--- a/src/unicode.cpp
+++ b/src/unicode.cpp
@@ -773,7 +773,7 @@ static std::vector<size_t> unicode_regex_split_custom(const std::string & text,
         // tiny_aya digit grouping pattern from tokenizer.json:
         //   {"type": "Split", "pattern": {"Regex": "\\d{1,3}(?=(?:\\d{3})*\\b)"}, "behavior": "Isolated"}
         // Splits digits into groups of 3 from the right (e.g., 1234567 -> 1, 234, 567)
-        // TODO: Revisit this regex, incase there are any subtle tokenization differences with the original regex.
+        // TODO: Revisit this regex, in case there are any subtle tokenization differences with the original regex.
         bpe_offsets = unicode_regex_split_custom_afmoe(text, offsets);
     }
 
diff --git a/tools/mtmd/clip-impl.h b/tools/mtmd/clip-impl.h
index 21a027594..d9116e5da 100644
--- a/tools/mtmd/clip-impl.h
+++ b/tools/mtmd/clip-impl.h
@@ -68,7 +68,7 @@
 
 #define TN_POS_EMBD        "%s.position_embd.weight"
 #define TN_CLASS_EMBD      "v.class_embd"
-#define TN_PATCH_EMBD      "v.patch_embd.weight"  // not rename tensor with ".0" postfix for backwrad compat
+#define TN_PATCH_EMBD      "v.patch_embd.weight"  // not rename tensor with ".0" postfix for backward compat
 #define TN_PATCH_EMBD_1    "v.patch_embd.weight.1"
 #define TN_PATCH_BIAS      "v.patch_embd.bias"
 #define TN_NORM_EMBD       "v.norm_embd.%s"
diff --git a/tools/mtmd/clip-model.h b/tools/mtmd/clip-model.h
index e0eb9b32c..eeb8da58e 100644
--- a/tools/mtmd/clip-model.h
+++ b/tools/mtmd/clip-model.h
@@ -46,7 +46,7 @@ struct clip_hparams {
     float image_std[3];
 
     // for models using dynamic image size, we need to have a smaller image size to warmup
-    // otherwise, user will get OOM everytime they load the model
+    // otherwise, user will get OOM every time they load the model
     int32_t warmup_image_size = 0;
     int32_t warmup_audio_size = 3000;
 
@@ -221,7 +221,7 @@ struct clip_model {
     // embeddings
     ggml_tensor * class_embedding = nullptr;
     ggml_tensor * patch_embeddings_0 = nullptr;
-    ggml_tensor * patch_embeddings_1 = nullptr;  // second Conv2D kernel when we decouple Conv3D along temproal dimension (Qwen2VL)
+    ggml_tensor * patch_embeddings_1 = nullptr;  // second Conv2D kernel when we decouple Conv3D along temporal dimension (Qwen2VL)
     ggml_tensor * patch_bias = nullptr;
     ggml_tensor * position_embeddings = nullptr;
     ggml_tensor * norm_embd_w = nullptr;
diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp
index d7630aaf9..2a765bf75 100644
--- a/tools/mtmd/clip.cpp
+++ b/tools/mtmd/clip.cpp
@@ -2491,7 +2491,7 @@ static void normalize_image_u8_to_f32(const clip_image_u8 & src, clip_image_f32
     }
 }
 
-// set of tools to manupulate images
+// set of tools to manipulate images
 // in the future, we can have HW acceleration by allowing this struct to access 3rd party lib like imagick or opencv
 struct img_tool {
     enum resize_algo {
diff --git a/tools/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py b/tools/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py
index 944037e70..1f563fbfc 100644
--- a/tools/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py
+++ b/tools/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py
@@ -186,7 +186,7 @@ def trunc_normal_tf_(
     best when :math:`a \\leq \text{mean} \\leq b`.
     NOTE: this 'tf' variant behaves closer to Tensorflow / JAX impl where the
     bounds [a, b] are applied when sampling the normal distribution with mean=0, std=1.0
-    and the result is subsquently scaled and shifted by the mean and std args.
+    and the result is subsequently scaled and shifted by the mean and std args.
     Args:
         tensor: an n-dimensional `torch.Tensor`
         mean: the mean of the normal distribution
diff --git a/tools/mtmd/mtmd-audio.cpp b/tools/mtmd/mtmd-audio.cpp
index e8eef035f..447f61aaa 100644
--- a/tools/mtmd/mtmd-audio.cpp
+++ b/tools/mtmd/mtmd-audio.cpp
@@ -560,7 +560,7 @@ bool mtmd_audio_preprocessor_whisper::preprocess(const float *                 s
     for (size_t off = 0; off < (size_t) out_full.n_len; off += frames_per_chunk) {
         int n_len = std::min(frames_per_chunk, (size_t) out_full.n_len - off);
         if ((size_t) n_len < frames_per_chunk) {
-            break;  // last uncomplete chunk will always be a padded chunk, safe to ignore
+            break;  // last incomplete chunk will always be a padded chunk, safe to ignore
         }
 
         mtmd_audio_mel out_chunk;
diff --git a/tools/quantize/README.md b/tools/quantize/README.md
index 22f071028..b8c225124 100644
--- a/tools/quantize/README.md
+++ b/tools/quantize/README.md
@@ -100,7 +100,7 @@ Examples:
 ## Memory/Disk Requirements
 
 When running the larger models, make sure you have enough disk space to store all the intermediate files.
-As the models are currently fully loaded into memory, you will need adequate disk space to save them and sufficient RAM to load them. At the moment, memory and disk requirements are the same. For exmaple (Llama 3.1):
+As the models are currently fully loaded into memory, you will need adequate disk space to save them and sufficient RAM to load them. At the moment, memory and disk requirements are the same. For example (Llama 3.1):
 
 | Model | Original size | Quantized size (Q4_K_M) |
 | ----: | ------------: | ----------------------: |
diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz
index a5465fcd1..77362ce66 100644
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
diff --git a/tools/server/public_legacy/index-new.html b/tools/server/public_legacy/index-new.html
index e2f39d668..2cee7f3c3 100644
--- a/tools/server/public_legacy/index-new.html
+++ b/tools/server/public_legacy/index-new.html
@@ -36,7 +36,7 @@
 
     const params = signal({
       n_predict: 358, // 358 is a nice number
-      temperature: 0.8, // adapt all following parameters to optimized min-p requierements. If for non-english, set to 0.6 or lower
+      temperature: 0.8, // adapt all following parameters to optimized min-p requirements. If for non-english, set to 0.6 or lower
       repeat_last_n: 0, // 0 = disable penalty, -1 = context size
       repeat_penalty: 1.0, // 1.0 = disabled
       dry_multiplier: 0.0, // 0.0 = disabled, 0.8 works well
@@ -108,7 +108,7 @@
     let importedTemplates = local_storage_getDataAsObject('user_templates')
 
     if (importedTemplates) {
-      // saved templates were successfuly imported.
+      // saved templates were successfully imported.
 
       console.log('Processing saved templates and updating default template')
       params.value = { ...params.value, image_data: [] };
@@ -129,7 +129,7 @@
     }
 
     function userTemplateResetToDefault() {
-      console.log('Reseting themplate to default')
+      console.log('Reseting template to default')
       selectedUserTemplate.value.name = 'default';
       selectedUserTemplate.value.data = savedUserTemplates.value['default'];
     }
diff --git a/tools/server/public_simplechat/datautils.mjs b/tools/server/public_simplechat/datautils.mjs
index 75159d6b1..08ccc219b 100644
--- a/tools/server/public_simplechat/datautils.mjs
+++ b/tools/server/public_simplechat/datautils.mjs
@@ -63,7 +63,7 @@ export function trim_repeat_garbage_at_end(sIn, maxSubL=10, maxMatchLenThreshold
 
 
 /**
- * Simple minded logic to help remove repeating garbage at end of the string, till it cant.
+ * Simple minded logic to help remove repeating garbage at end of the string, till it can't.
  * If its not able to trim, then it will try to skip a char at end and then trim, a few times.
  * This ensures that even if there are multiple runs of garbage with different patterns, the
  * logic still tries to munch through them.
diff --git a/tools/server/public_simplechat/simplechat.js b/tools/server/public_simplechat/simplechat.js
index 2fcd24a86..c67577d5a 100644
--- a/tools/server/public_simplechat/simplechat.js
+++ b/tools/server/public_simplechat/simplechat.js
@@ -318,7 +318,7 @@ class SimpleChat {
     }
 
     /**
-     * Allow setting of system prompt, but only at begining.
+     * Allow setting of system prompt, but only at beginning.
      * @param {string} sysPrompt
      * @param {string} msgTag
      */
@@ -333,7 +333,7 @@ class SimpleChat {
                     console.error(`ERRR:SimpleChat:SC:${msgTag}:You need to specify system prompt before any user query, ignoring...`);
                 } else {
                     if (this.xchat[0].content !== sysPrompt) {
-                        console.error(`ERRR:SimpleChat:SC:${msgTag}:You cant change system prompt, mid way through, ignoring...`);
+                        console.error(`ERRR:SimpleChat:SC:${msgTag}:You can't change system prompt, mid way through, ignoring...`);
                     }
                 }
             }
diff --git a/tools/server/public_simplechat/ui.mjs b/tools/server/public_simplechat/ui.mjs
index b2d5b9aea..afa619a06 100644
--- a/tools/server/public_simplechat/ui.mjs
+++ b/tools/server/public_simplechat/ui.mjs
@@ -44,7 +44,7 @@ export function el_create_button(id, callback, name=undefined, innerText=undefin
 }
 
 /**
- * Create a para and set it up. Optionaly append it to a passed parent.
+ * Create a para and set it up. Optionally append it to a passed parent.
  * @param {string} text
  * @param {HTMLElement | undefined} elParent
  * @param {string | undefined} id
@@ -111,7 +111,7 @@ export function el_creatediv_boolbutton(id, label, texts, defaultValue, cb, clas
 /**
  * Create a select ui element, with a set of options to select from.
  * * options: an object which contains name-value pairs
- * * defaultOption: the value whose name should be choosen, by default.
+ * * defaultOption: the value whose name should be chosen, by default.
  * * cb : the call back returns the name string of the option selected.
  *
  * @param {string} id
diff --git a/tools/server/webui/src/lib/components/app/actions/ActionIcon.svelte b/tools/server/webui/src/lib/components/app/actions/ActionIcon.svelte
index 4494ea880..c676e224a 100644
--- a/tools/server/webui/src/lib/components/app/actions/ActionIcon.svelte
+++ b/tools/server/webui/src/lib/components/app/actions/ActionIcon.svelte
@@ -8,6 +8,7 @@
 		tooltip: string;
 		variant?: 'default' | 'destructive' | 'outline' | 'secondary' | 'ghost' | 'link';
 		size?: 'default' | 'sm' | 'lg' | 'icon';
+		iconSize?: string;
 		class?: string;
 		disabled?: boolean;
 		onclick: () => void;
@@ -21,6 +22,7 @@
 		size = 'sm',
 		class: className = '',
 		disabled = false,
+		iconSize = 'h-3 w-3',
 		onclick,
 		'aria-label': ariaLabel
 	}: Props = $props();
@@ -38,7 +40,7 @@
 		>
 			{@const IconComponent = icon}
 
-			<IconComponent class="h-3 w-3" />
+			<IconComponent class={iconSize} />
 		</Button>
 	</Tooltip.Trigger>
 
diff --git a/tools/server/webui/src/lib/components/app/badges/BadgeModality.svelte b/tools/server/webui/src/lib/components/app/badges/BadgeModality.svelte
index a0d5e863c..15936691a 100644
--- a/tools/server/webui/src/lib/components/app/badges/BadgeModality.svelte
+++ b/tools/server/webui/src/lib/components/app/badges/BadgeModality.svelte
@@ -1,6 +1,6 @@
 <script lang="ts">
 	import { ModelModality } from '$lib/enums';
-	import { MODALITY_ICONS, MODALITY_LABELS } from '$lib/constants/icons';
+	import { MODALITY_ICONS, MODALITY_LABELS } from '$lib/constants';
 	import { cn } from '$lib/components/ui/utils';
 
 	type DisplayableModality = ModelModality.VISION | ModelModality.AUDIO;
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte
index 3551b0b3d..37888d92e 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte
@@ -5,9 +5,11 @@
 		ChatFormFileInputInvisible,
 		ChatFormTextarea
 	} from '$lib/components/app';
-	import { INPUT_CLASSES } from '$lib/constants/css-classes';
-	import { SETTING_CONFIG_DEFAULT } from '$lib/constants/settings-config';
-	import { CLIPBOARD_CONTENT_QUOTE_PREFIX } from '$lib/constants/chat-form';
+	import {
+		CLIPBOARD_CONTENT_QUOTE_PREFIX,
+		INPUT_CLASSES,
+		SETTING_CONFIG_DEFAULT
+	} from '$lib/constants';
 	import { KeyboardKey, MimeTypeText } from '$lib/enums';
 	import { config } from '$lib/stores/settings.svelte';
 	import { modelOptions, selectedModelId } from '$lib/stores/models.svelte';
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAttachmentsDropdown.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAttachmentsDropdown.svelte
index b1cff67dc..86dca8448 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAttachmentsDropdown.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAttachmentsDropdown.svelte
@@ -4,8 +4,7 @@
 	import { Button } from '$lib/components/ui/button';
 	import * as DropdownMenu from '$lib/components/ui/dropdown-menu';
 	import * as Tooltip from '$lib/components/ui/tooltip';
-	import { FILE_TYPE_ICONS } from '$lib/constants/icons';
-	import { TOOLTIP_DELAY_DURATION } from '$lib/constants/tooltip-config';
+	import { FILE_TYPE_ICONS, TOOLTIP_DELAY_DURATION } from '$lib/constants';
 
 	interface Props {
 		class?: string;
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActions.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActions.svelte
index 54b11c862..c94fe267d 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActions.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActions.svelte
@@ -11,7 +11,7 @@
 	import { getFileTypeCategory } from '$lib/utils';
 	import { config } from '$lib/stores/settings.svelte';
 	import { modelsStore, modelOptions, selectedModelId } from '$lib/stores/models.svelte';
-	import { isRouterMode } from '$lib/stores/server.svelte';
+	import { isRouterMode, serverError } from '$lib/stores/server.svelte';
 	import { chatStore } from '$lib/stores/chat.svelte';
 	import { activeMessages } from '$lib/stores/conversations.svelte';
 
@@ -45,6 +45,7 @@
 
 	let currentConfig = $derived(config());
 	let isRouter = $derived(isRouterMode());
+	let isOffline = $derived(!!serverError());
 
 	let conversationModel = $derived(
 		chatStore.getConversationModel(activeMessages() as DatabaseMessage[])
@@ -55,7 +56,10 @@
 	$effect(() => {
 		if (conversationModel && conversationModel !== previousConversationModel) {
 			previousConversationModel = conversationModel;
-			modelsStore.selectModelByName(conversationModel);
+
+			if (!isRouter || modelsStore.isModelLoaded(conversationModel)) {
+				modelsStore.selectModelByName(conversationModel);
+			}
 		}
 	});
 
@@ -168,9 +172,9 @@
 
 	<div class="ml-auto flex items-center gap-1.5">
 		<ModelsSelector
-			{disabled}
 			bind:this={selectorModelRef}
 			currentModel={conversationModel}
+			disabled={disabled || isOffline}
 			forceForegroundText={true}
 			useGlobalSelection={true}
 		/>
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte
index ebf7f433d..ba8990f01 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte
@@ -5,7 +5,7 @@
 	import { chatStore, pendingEditMessageId } from '$lib/stores/chat.svelte';
 	import { conversationsStore } from '$lib/stores/conversations.svelte';
 	import { DatabaseService } from '$lib/services';
-	import { SYSTEM_MESSAGE_PLACEHOLDER } from '$lib/constants/ui';
+	import { SYSTEM_MESSAGE_PLACEHOLDER } from '$lib/constants';
 	import { MessageRole } from '$lib/enums';
 	import {
 		ChatMessageAssistant,
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
index 263f90ec8..5eb16f53a 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
@@ -16,14 +16,13 @@
 	import { Check, X } from '@lucide/svelte';
 	import { Button } from '$lib/components/ui/button';
 	import { Checkbox } from '$lib/components/ui/checkbox';
-	import { INPUT_CLASSES } from '$lib/constants/css-classes';
+	import { INPUT_CLASSES, REASONING_TAGS } from '$lib/constants';
 	import { MessageRole, KeyboardKey } from '$lib/enums';
 	import Label from '$lib/components/ui/label/label.svelte';
 	import { config } from '$lib/stores/settings.svelte';
 	import { isRouterMode } from '$lib/stores/server.svelte';
 	import { modelsStore } from '$lib/stores/models.svelte';
 	import { ServerModelStatus } from '$lib/enums';
-	import { REASONING_TAGS } from '$lib/constants/agentic';
 
 	interface Props {
 		class?: string;
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics.svelte
index 77951e9d2..d74ecd782 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics.svelte
@@ -4,7 +4,7 @@
 	import * as Tooltip from '$lib/components/ui/tooltip';
 	import { ChatMessageStatsView } from '$lib/enums';
 	import { formatPerformanceTime } from '$lib/utils';
-	import { MS_PER_SECOND, DEFAULT_PERFORMANCE_TIME } from '$lib/constants/formatters';
+	import { MS_PER_SECOND, DEFAULT_PERFORMANCE_TIME } from '$lib/constants';
 
 	interface Props {
 		predictedTokens?: number;
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageSystem.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageSystem.svelte
index aec2d90c0..9dadf3231 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageSystem.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageSystem.svelte
@@ -4,7 +4,7 @@
 	import { Button } from '$lib/components/ui/button';
 	import { MarkdownContent } from '$lib/components/app';
 	import { getMessageEditContext } from '$lib/contexts';
-	import { INPUT_CLASSES } from '$lib/constants/css-classes';
+	import { INPUT_CLASSES } from '$lib/constants';
 	import { config } from '$lib/stores/settings.svelte';
 	import { isIMEComposing } from '$lib/utils';
 	import ChatMessageActions from './ChatMessageActions.svelte';
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte b/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte
index 40cf491d6..9e29ee568 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte
@@ -12,7 +12,7 @@
 	} from '$lib/components/app';
 	import * as Alert from '$lib/components/ui/alert';
 	import * as AlertDialog from '$lib/components/ui/alert-dialog';
-	import { INITIAL_SCROLL_DELAY } from '$lib/constants/auto-scroll';
+	import { INITIAL_SCROLL_DELAY } from '$lib/constants';
 	import { KeyboardKey } from '$lib/enums';
 	import { createAutoScrollController } from '$lib/hooks/use-auto-scroll.svelte';
 	import {
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreenProcessingInfo.svelte b/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreenProcessingInfo.svelte
index cc7b22cfd..8c88480ce 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreenProcessingInfo.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreenProcessingInfo.svelte
@@ -1,6 +1,6 @@
 <script lang="ts">
 	import { untrack } from 'svelte';
-	import { PROCESSING_INFO_TIMEOUT } from '$lib/constants/processing-info';
+	import { PROCESSING_INFO_TIMEOUT } from '$lib/constants';
 	import { useProcessingState } from '$lib/hooks/use-processing-state.svelte';
 	import { chatStore, isLoading, isChatStreaming } from '$lib/stores/chat.svelte';
 	import { activeMessages, activeConversation } from '$lib/stores/conversations.svelte';
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte
index 2130658dd..ce2925b02 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte
@@ -18,15 +18,16 @@
 	import { config, settingsStore } from '$lib/stores/settings.svelte';
 	import {
 		SETTINGS_SECTION_TITLES,
-		type SettingsSectionTitle
-	} from '$lib/constants/settings-sections';
+		type SettingsSectionTitle,
+		NUMERIC_FIELDS,
+		POSITIVE_INTEGER_FIELDS,
+		SETTINGS_COLOR_MODES_CONFIG,
+		SETTINGS_KEYS
+	} from '$lib/constants';
 	import { setMode } from 'mode-watcher';
 	import { ColorMode } from '$lib/enums/ui';
 	import { SettingsFieldType } from '$lib/enums/settings';
 	import type { Component } from 'svelte';
-	import { NUMERIC_FIELDS, POSITIVE_INTEGER_FIELDS } from '$lib/constants/settings-fields';
-	import { SETTINGS_COLOR_MODES_CONFIG } from '$lib/constants/settings-config';
-	import { SETTINGS_KEYS } from '$lib/constants/settings-keys';
 
 	interface Props {
 		onSave?: () => void;
@@ -133,6 +134,11 @@
 					key: SETTINGS_KEYS.AUTO_SHOW_SIDEBAR_ON_NEW_CHAT,
 					label: 'Auto-show sidebar on new chat',
 					type: SettingsFieldType.CHECKBOX
+				},
+				{
+					key: SETTINGS_KEYS.SHOW_RAW_MODEL_NAMES,
+					label: 'Show raw model names',
+					type: SettingsFieldType.CHECKBOX
 				}
 			]
 		},
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsFields.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsFields.svelte
index 077499441..b9015c196 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsFields.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsFields.svelte
@@ -5,8 +5,7 @@
 	import Label from '$lib/components/ui/label/label.svelte';
 	import * as Select from '$lib/components/ui/select';
 	import { Textarea } from '$lib/components/ui/textarea';
-	import { SETTING_CONFIG_DEFAULT, SETTING_CONFIG_INFO } from '$lib/constants/settings-config';
-	import { SETTINGS_KEYS } from '$lib/constants/settings-keys';
+	import { SETTING_CONFIG_DEFAULT, SETTING_CONFIG_INFO, SETTINGS_KEYS } from '$lib/constants';
 	import { SettingsFieldType } from '$lib/enums/settings';
 	import { settingsStore } from '$lib/stores/settings.svelte';
 	import { ChatSettingsParameterSourceIndicator } from '$lib/components/app';
diff --git a/tools/server/webui/src/lib/components/app/content/MarkdownContent.svelte b/tools/server/webui/src/lib/components/app/content/MarkdownContent.svelte
index a0944e18a..e4155afc5 100644
--- a/tools/server/webui/src/lib/components/app/content/MarkdownContent.svelte
+++ b/tools/server/webui/src/lib/components/app/content/MarkdownContent.svelte
@@ -22,8 +22,9 @@
 		IMAGE_NOT_ERROR_BOUND_SELECTOR,
 		DATA_ERROR_BOUND_ATTR,
 		DATA_ERROR_HANDLED_ATTR,
-		BOOL_TRUE_STRING
-	} from '$lib/constants/markdown';
+		BOOL_TRUE_STRING,
+		SETTINGS_KEYS
+	} from '$lib/constants';
 	import { UrlPrefix } from '$lib/enums';
 	import { FileTypeText } from '$lib/enums/files';
 	import {
@@ -39,7 +40,6 @@
 	import { createAutoScrollController } from '$lib/hooks/use-auto-scroll.svelte';
 	import type { DatabaseMessageExtra } from '$lib/types/database';
 	import { config } from '$lib/stores/settings.svelte';
-	import { SETTINGS_KEYS } from '$lib/constants/settings-keys';
 
 	interface Props {
 		attachments?: DatabaseMessageExtra[];
diff --git a/tools/server/webui/src/lib/components/app/dialogs/DialogChatSettings.svelte b/tools/server/webui/src/lib/components/app/dialogs/DialogChatSettings.svelte
index 7b1e598ce..793af9a40 100644
--- a/tools/server/webui/src/lib/components/app/dialogs/DialogChatSettings.svelte
+++ b/tools/server/webui/src/lib/components/app/dialogs/DialogChatSettings.svelte
@@ -1,7 +1,7 @@
 <script lang="ts">
 	import * as Dialog from '$lib/components/ui/dialog';
 	import { ChatSettings } from '$lib/components/app';
-	import type { SettingsSectionTitle } from '$lib/constants/settings-sections';
+	import type { SettingsSectionTitle } from '$lib/constants';
 
 	interface Props {
 		onOpenChange?: (open: boolean) => void;
diff --git a/tools/server/webui/src/lib/components/app/models/ModelBadge.svelte b/tools/server/webui/src/lib/components/app/models/ModelBadge.svelte
index f98ba7d78..cc1d1848e 100644
--- a/tools/server/webui/src/lib/components/app/models/ModelBadge.svelte
+++ b/tools/server/webui/src/lib/components/app/models/ModelBadge.svelte
@@ -1,6 +1,7 @@
 <script lang="ts">
 	import { Package } from '@lucide/svelte';
 	import { BadgeInfo, ActionIconCopyToClipboard } from '$lib/components/app';
+	import ModelId from './ModelId.svelte';
 	import { modelsStore } from '$lib/stores/models.svelte';
 	import { serverStore } from '$lib/stores/server.svelte';
 	import * as Tooltip from '$lib/components/ui/tooltip';
@@ -23,6 +24,7 @@
 
 	let model = $derived(modelProp || modelsStore.singleModelName);
 	let isModelMode = $derived(serverStore.isModelMode);
+	let shouldShow = $derived(model && (modelProp !== undefined || isModelMode));
 </script>
 
 {#snippet badgeContent()}
@@ -31,7 +33,9 @@
 			<Package class="h-3 w-3" />
 		{/snippet}
 
-		{model}
+		{#if model}
+			<ModelId modelId={model} />
+		{/if}
 
 		{#if showCopyIcon}
 			<ActionIconCopyToClipboard text={model || ''} ariaLabel="Copy model name" />
@@ -39,7 +43,7 @@
 	</BadgeInfo>
 {/snippet}
 
-{#if model && isModelMode}
+{#if shouldShow}
 	{#if showTooltip}
 		<Tooltip.Root>
 			<Tooltip.Trigger>
diff --git a/tools/server/webui/src/lib/components/app/models/ModelId.svelte b/tools/server/webui/src/lib/components/app/models/ModelId.svelte
new file mode 100644
index 000000000..817e88286
--- /dev/null
+++ b/tools/server/webui/src/lib/components/app/models/ModelId.svelte
@@ -0,0 +1,64 @@
+<script lang="ts">
+	import { ModelsService } from '$lib/services/models.service';
+	import { config } from '$lib/stores/settings.svelte';
+
+	interface Props {
+		modelId: string;
+		showOrgName?: boolean;
+		showRaw?: boolean;
+		aliases?: string[];
+		tags?: string[];
+		class?: string;
+	}
+
+	let {
+		modelId,
+		showOrgName = false,
+		showRaw = undefined,
+		aliases,
+		tags,
+		class: className = ''
+	}: Props = $props();
+
+	const badgeClass =
+		'inline-flex w-fit shrink-0 items-center justify-center whitespace-nowrap rounded-md border border-border/50 px-1 py-0 text-[10px] font-mono bg-foreground/15 dark:bg-foreground/10 text-foreground [a&]:hover:bg-foreground/25';
+	const tagBadgeClass =
+		'inline-flex w-fit shrink-0 items-center justify-center whitespace-nowrap rounded-md border border-border/50 px-1 py-0 text-[10px] font-mono text-foreground [a&]:hover:bg-accent [a&]:hover:text-accent-foreground';
+
+	let parsed = $derived(ModelsService.parseModelId(modelId));
+	let resolvedShowRaw = $derived(showRaw ?? (config().showRawModelNames as boolean) ?? false);
+</script>
+
+{#if resolvedShowRaw}
+	<span class="min-w-0 truncate font-medium {className}">{modelId}</span>
+{:else}
+	<span class="flex min-w-0 flex-wrap items-center gap-1 {className}">
+		<span class="min-w-0 truncate font-medium">
+			{#if showOrgName}{parsed.orgName}/{/if}{parsed.modelName ?? modelId}
+		</span>
+
+		{#if parsed.params}
+			<span class={badgeClass}>
+				{parsed.params}{parsed.activatedParams ? `-${parsed.activatedParams}` : ''}
+			</span>
+		{/if}
+
+		{#if parsed.quantization}
+			<span class={badgeClass}>
+				{parsed.quantization}
+			</span>
+		{/if}
+
+		{#if aliases && aliases.length > 0}
+			{#each aliases as alias (alias)}
+				<span class={badgeClass}>{alias}</span>
+			{/each}
+		{/if}
+
+		{#if tags && tags.length > 0}
+			{#each tags as tag (tag)}
+				<span class={tagBadgeClass}>{tag}</span>
+			{/each}
+		{/if}
+	</span>
+{/if}
diff --git a/tools/server/webui/src/lib/components/app/models/ModelsSelector.svelte b/tools/server/webui/src/lib/components/app/models/ModelsSelector.svelte
index ebffae121..a40501e2c 100644
--- a/tools/server/webui/src/lib/components/app/models/ModelsSelector.svelte
+++ b/tools/server/webui/src/lib/components/app/models/ModelsSelector.svelte
@@ -1,6 +1,7 @@
 <script lang="ts">
 	import { onMount } from 'svelte';
-	import { ChevronDown, Loader2, Package, Power } from '@lucide/svelte';
+	import { SvelteMap } from 'svelte/reactivity';
+	import { ChevronDown, Loader2, Package } from '@lucide/svelte';
 	import * as DropdownMenu from '$lib/components/ui/dropdown-menu';
 	import * as Tooltip from '$lib/components/ui/tooltip';
 	import { cn } from '$lib/components/ui/utils';
@@ -10,35 +11,33 @@
 		modelsLoading,
 		modelsUpdating,
 		selectedModelId,
-		routerModels,
 		singleModelName
 	} from '$lib/stores/models.svelte';
-	import { KeyboardKey, ServerModelStatus } from '$lib/enums';
+	import { KeyboardKey } from '$lib/enums';
 	import { isRouterMode } from '$lib/stores/server.svelte';
 	import {
 		DialogModelInformation,
 		DropdownMenuSearchable,
-		TruncatedText
+		ModelId,
+		ModelsSelectorOption
 	} from '$lib/components/app';
 	import type { ModelOption } from '$lib/types/models';
 
 	interface Props {
 		class?: string;
 		currentModel?: string | null;
-		/** Callback when model changes. Return false to keep menu open (e.g., for validation failures) */
-		onModelChange?: (modelId: string, modelName: string) => Promise<boolean> | boolean | void;
 		disabled?: boolean;
 		forceForegroundText?: boolean;
-		/** When true, user's global selection takes priority over currentModel (for form selector) */
+		onModelChange?: (modelId: string, modelName: string) => Promise<boolean> | boolean | void;
 		useGlobalSelection?: boolean;
 	}
 
 	let {
 		class: className = '',
 		currentModel = null,
-		onModelChange,
 		disabled = false,
 		forceForegroundText = false,
+		onModelChange,
 		useGlobalSelection = false
 	}: Props = $props();
 
@@ -55,46 +54,109 @@
 	let isRouter = $derived(isRouterMode());
 	let serverModel = $derived(singleModelName());
 
-	// Reactive router models state - needed for proper reactivity of status checks
-	let currentRouterModels = $derived(routerModels());
+	let isHighlightedCurrentModelActive = $derived.by(() => {
+		if (!isRouter || !currentModel) return false;
 
-	function getModelStatus(modelId: string): ServerModelStatus | null {
-		const model = currentRouterModels.find((m) => m.id === modelId);
-		return (model?.status?.value as ServerModelStatus) ?? null;
-	}
+		const currentOption = options.find((option) => option.model === currentModel);
 
-	let isHighlightedCurrentModelActive = $derived(
-		!isRouter || !currentModel
-			? false
-			: (() => {
-					const currentOption = options.find((option) => option.model === currentModel);
+		return currentOption ? currentOption.id === activeId : false;
+	});
 
-					return currentOption ? currentOption.id === activeId : false;
-				})()
-	);
-
-	let isCurrentModelInCache = $derived(() => {
+	let isCurrentModelInCache = $derived.by(() => {
 		if (!isRouter || !currentModel) return true;
 
 		return options.some((option) => option.model === currentModel);
 	});
 
+	let isLoadingModel = $state(false);
+
 	let searchTerm = $state('');
 	let highlightedIndex = $state<number>(-1);
 
-	let filteredOptions: ModelOption[] = $derived(
-		(() => {
-			const term = searchTerm.trim().toLowerCase();
-			if (!term) return options;
+	let filteredOptions: ModelOption[] = $derived.by(() => {
+		const term = searchTerm.trim().toLowerCase();
+		if (!term) return options;
 
-			return options.filter(
-				(option) =>
-					option.model.toLowerCase().includes(term) || option.name?.toLowerCase().includes(term)
-			);
-		})()
-	);
+		return options.filter(
+			(option) =>
+				option.model.toLowerCase().includes(term) ||
+				option.name?.toLowerCase().includes(term) ||
+				option.aliases?.some((alias: string) => alias.toLowerCase().includes(term)) ||
+				option.tags?.some((tag: string) => tag.toLowerCase().includes(term))
+		);
+	});
+
+	let groupedFilteredOptions = $derived.by(() => {
+		const favIds = modelsStore.favouriteModelIds;
+		const result: {
+			orgName: string | null;
+			isFavouritesGroup: boolean;
+			isLoadedGroup: boolean;
+			items: { option: ModelOption; flatIndex: number }[];
+		}[] = [];
+
+		// Loaded models group (top)
+		const loadedItems: { option: ModelOption; flatIndex: number }[] = [];
+		for (let i = 0; i < filteredOptions.length; i++) {
+			if (modelsStore.isModelLoaded(filteredOptions[i].model)) {
+				loadedItems.push({ option: filteredOptions[i], flatIndex: i });
+			}
+		}
+
+		if (loadedItems.length > 0) {
+			result.push({
+				orgName: null,
+				isFavouritesGroup: false,
+				isLoadedGroup: true,
+				items: loadedItems
+			});
+		}
+
+		// Favourites group
+		const loadedModelIds = new Set(loadedItems.map((item) => item.option.model));
+		const favItems: { option: ModelOption; flatIndex: number }[] = [];
+		for (let i = 0; i < filteredOptions.length; i++) {
+			if (favIds.has(filteredOptions[i].model) && !loadedModelIds.has(filteredOptions[i].model)) {
+				favItems.push({ option: filteredOptions[i], flatIndex: i });
+			}
+		}
+
+		if (favItems.length > 0) {
+			result.push({
+				orgName: null,
+				isFavouritesGroup: true,
+				isLoadedGroup: false,
+				items: favItems
+			});
+		}
+
+		// Org groups (excluding loaded and favourites)
+		const orgGroups = new SvelteMap<string, { option: ModelOption; flatIndex: number }[]>();
+		for (let i = 0; i < filteredOptions.length; i++) {
+			const option = filteredOptions[i];
+
+			if (loadedModelIds.has(option.model) || favIds.has(option.model)) continue;
+
+			const orgName = option.parsedId?.orgName ?? null;
+			const key = orgName ?? '';
+
+			if (!orgGroups.has(key)) orgGroups.set(key, []);
+
+			orgGroups.get(key)!.push({ option, flatIndex: i });
+		}
+
+		for (const [orgName, items] of orgGroups) {
+			result.push({
+				orgName: orgName || null,
+				isFavouritesGroup: false,
+				isLoadedGroup: false,
+				items
+			});
+		}
+
+		return result;
+	});
 
-	// Reset highlighted index when search term changes
 	$effect(() => {
 		void searchTerm;
 		highlightedIndex = -1;
@@ -109,8 +171,6 @@
 		});
 	});
 
-	// Handle changes to the model selector dropdown or the model dialog, depending on if the server is in
-	// router mode or not.
 	function handleOpenChange(open: boolean) {
 		if (loading || updating) return;
 
@@ -142,6 +202,7 @@
 
 		if (event.key === KeyboardKey.ARROW_DOWN) {
 			event.preventDefault();
+
 			if (filteredOptions.length === 0) return;
 
 			if (highlightedIndex === -1 || highlightedIndex === filteredOptions.length - 1) {
@@ -151,6 +212,7 @@
 			}
 		} else if (event.key === KeyboardKey.ARROW_UP) {
 			event.preventDefault();
+
 			if (filteredOptions.length === 0) return;
 
 			if (highlightedIndex === -1 || highlightedIndex === 0) {
@@ -160,11 +222,12 @@
 			}
 		} else if (event.key === KeyboardKey.ENTER) {
 			event.preventDefault();
+
 			if (highlightedIndex >= 0 && highlightedIndex < filteredOptions.length) {
 				const option = filteredOptions[highlightedIndex];
+
 				handleSelect(option.id);
 			} else if (filteredOptions.length > 0) {
-				// No selection - highlight first option
 				highlightedIndex = 0;
 			}
 		}
@@ -177,31 +240,18 @@
 		let shouldCloseMenu = true;
 
 		if (onModelChange) {
-			// If callback provided, use it (for regenerate functionality)
 			const result = await onModelChange(option.id, option.model);
 
-			// If callback returns false, keep menu open (validation failed)
 			if (result === false) {
 				shouldCloseMenu = false;
 			}
 		} else {
-			// Update global selection
 			await modelsStore.selectModelById(option.id);
-
-			// Load the model if not already loaded (router mode)
-			if (isRouter && getModelStatus(option.model) !== ServerModelStatus.LOADED) {
-				try {
-					await modelsStore.loadModel(option.model);
-				} catch (error) {
-					console.error('Failed to load model:', error);
-				}
-			}
 		}
 
 		if (shouldCloseMenu) {
 			handleOpenChange(false);
 
-			// Focus the chat textarea after model selection
 			requestAnimationFrame(() => {
 				const textarea = document.querySelector<HTMLTextAreaElement>(
 					'[data-slot="chat-form"] textarea'
@@ -209,32 +259,39 @@
 				textarea?.focus();
 			});
 		}
+
+		if (!onModelChange && isRouter && !modelsStore.isModelLoaded(option.model)) {
+			isLoadingModel = true;
+			modelsStore
+				.loadModel(option.model)
+				.catch((error) => console.error('Failed to load model:', error))
+				.finally(() => (isLoadingModel = false));
+		}
 	}
 
 	function getDisplayOption(): ModelOption | undefined {
 		if (!isRouter) {
-			if (serverModel) {
+			const displayModel = serverModel || currentModel;
+			if (displayModel) {
 				return {
-					id: 'current',
-					model: serverModel,
-					name: serverModel.split('/').pop() || serverModel,
-					capabilities: [] // Empty array for single model mode
+					id: serverModel ? 'current' : 'offline-current',
+					model: displayModel,
+					name: displayModel.split('/').pop() || displayModel,
+					capabilities: []
 				};
 			}
 
 			return undefined;
 		}
 
-		// When useGlobalSelection is true (form selector), prioritize user selection
-		// Otherwise (message display), prioritize currentModel
 		if (useGlobalSelection && activeId) {
 			const selected = options.find((option) => option.id === activeId);
+
 			if (selected) return selected;
 		}
 
-		// Show currentModel (from message payload or conversation)
 		if (currentModel) {
-			if (!isCurrentModelInCache()) {
+			if (!isCurrentModelInCache) {
 				return {
 					id: 'not-in-cache',
 					model: currentModel,
@@ -246,12 +303,10 @@
 			return options.find((option) => option.model === currentModel);
 		}
 
-		// Fallback to user selection (for new chats before first message)
 		if (activeId) {
 			return options.find((option) => option.id === activeId);
 		}
 
-		// No selection - return undefined to show "Select model"
 		return undefined;
 	}
 </script>
@@ -260,10 +315,25 @@
 	{#if loading && options.length === 0 && isRouter}
 		<div class="flex items-center gap-2 text-xs text-muted-foreground">
 			<Loader2 class="h-3.5 w-3.5 animate-spin" />
+
 			Loading models…
 		</div>
 	{:else if options.length === 0 && isRouter}
-		<p class="text-xs text-muted-foreground">No models available.</p>
+		{#if currentModel}
+			<span
+				class={cn(
+					'inline-flex items-center gap-1.5 rounded-sm bg-muted-foreground/10 px-1.5 py-1 text-xs text-muted-foreground',
+					className
+				)}
+				style="max-width: min(calc(100cqw - 9rem), 20rem)"
+			>
+				<Package class="h-3.5 w-3.5" />
+
+				<ModelId modelId={currentModel} class="min-w-0" showOrgName />
+			</span>
+		{:else}
+			<p class="text-xs text-muted-foreground">No models available.</p>
+		{/if}
 	{:else}
 		{@const selectedOption = getDisplayOption()}
 
@@ -280,7 +350,7 @@
 						type="button"
 						class={cn(
 							`inline-grid cursor-pointer grid-cols-[1fr_auto_1fr] items-center gap-1.5 rounded-sm bg-muted-foreground/10 px-1.5 py-1 text-xs transition hover:text-foreground focus:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-60`,
-							!isCurrentModelInCache()
+							!isCurrentModelInCache
 								? 'bg-red-400/10 !text-red-400 hover:bg-red-400/20 hover:text-red-400'
 								: forceForegroundText
 									? 'text-foreground'
@@ -294,12 +364,21 @@
 					>
 						<Package class="h-3.5 w-3.5" />
 
-						<TruncatedText
-							text={selectedOption?.model || 'Select model'}
-							class="min-w-0 font-medium"
-						/>
+						{#if selectedOption}
+							<Tooltip.Root>
+								<Tooltip.Trigger class="min-w-0 overflow-hidden">
+									<ModelId modelId={selectedOption.model} class="min-w-0" showOrgName />
+								</Tooltip.Trigger>
 
-						{#if updating}
+								<Tooltip.Content>
+									<p class="font-mono">{selectedOption.model}</p>
+								</Tooltip.Content>
+							</Tooltip.Root>
+						{:else}
+							<span class="min-w-0 font-medium">Select model</span>
+						{/if}
+
+						{#if updating || isLoadingModel}
 							<Loader2 class="h-3 w-3.5 animate-spin" />
 						{:else}
 							<ChevronDown class="h-3 w-3.5" />
@@ -316,10 +395,10 @@
 						placeholder="Search models..."
 						onSearchKeyDown={handleSearchKeyDown}
 						emptyMessage="No models found."
-						isEmpty={filteredOptions.length === 0 && isCurrentModelInCache()}
+						isEmpty={filteredOptions.length === 0 && isCurrentModelInCache}
 					>
 						<div class="models-list">
-							{#if !isCurrentModelInCache() && currentModel}
+							{#if !isCurrentModelInCache && currentModel}
 								<!-- Show unavailable model as first option (disabled) -->
 								<button
 									type="button"
@@ -329,90 +408,54 @@
 									aria-disabled="true"
 									disabled
 								>
-									<span
-										class="min-w-0 flex-1 truncate text-left sm:overflow-visible sm:text-clip sm:whitespace-nowrap"
-									>
-										{selectedOption?.name || currentModel}
-									</span>
+									<ModelId modelId={currentModel} class="flex-1" showOrgName />
+
 									<span class="ml-2 text-xs whitespace-nowrap opacity-70">(not available)</span>
 								</button>
-								<div class="my-1 h-px bg-border"></div>
 							{/if}
+
 							{#if filteredOptions.length === 0}
 								<p class="px-4 py-3 text-sm text-muted-foreground">No models found.</p>
 							{/if}
-							{#each filteredOptions as option, index (option.id)}
-								{@const status = getModelStatus(option.model)}
-								{@const isLoaded = status === ServerModelStatus.LOADED}
-								{@const isLoading = status === ServerModelStatus.LOADING}
-								{@const isSelected = currentModel === option.model || activeId === option.id}
-								{@const isHighlighted = index === highlightedIndex}
 
-								<div
-									class={cn(
-										'group flex w-full items-center gap-2 rounded-sm p-2 text-left text-sm transition focus:outline-none',
-										'cursor-pointer hover:bg-muted focus:bg-muted',
-										isSelected || isHighlighted
-											? 'bg-accent text-accent-foreground'
-											: 'hover:bg-accent hover:text-accent-foreground',
-										isLoaded ? 'text-popover-foreground' : 'text-muted-foreground'
-									)}
-									role="option"
-									aria-selected={isSelected || isHighlighted}
-									tabindex="0"
-									onclick={() => handleSelect(option.id)}
-									onmouseenter={() => (highlightedIndex = index)}
-									onkeydown={(e) => {
-										if (e.key === 'Enter' || e.key === ' ') {
-											e.preventDefault();
-											handleSelect(option.id);
-										}
-									}}
-								>
-									<span
-										class="min-w-0 flex-1 truncate text-left sm:overflow-visible sm:pr-2 sm:text-clip sm:whitespace-nowrap"
+							{#each groupedFilteredOptions as group (group.isLoadedGroup ? '__loaded__' : group.isFavouritesGroup ? '__favourites__' : group.orgName)}
+								{#if group.isLoadedGroup}
+									<p class="px-2 py-2 text-xs font-semibold text-muted-foreground/60 select-none">
+										Loaded models
+									</p>
+								{:else if group.isFavouritesGroup}
+									<p class="px-2 py-2 text-xs font-semibold text-muted-foreground/60 select-none">
+										Favourite models
+									</p>
+								{:else if group.orgName}
+									<p
+										class="px-2 py-2 text-xs font-semibold text-muted-foreground/60 select-none [&:not(:first-child)]:mt-2"
 									>
-										{option.model}
-									</span>
+										{group.orgName}
+									</p>
+								{/if}
 
-									<div class="flex w-6 shrink-0 justify-center">
-										{#if isLoading}
-											<Tooltip.Root>
-												<Tooltip.Trigger>
-													<Loader2 class="h-4 w-4 animate-spin text-muted-foreground" />
-												</Tooltip.Trigger>
-												<Tooltip.Content class="z-[9999]">
-													<p>Loading model...</p>
-												</Tooltip.Content>
-											</Tooltip.Root>
-										{:else if isLoaded}
-											<Tooltip.Root>
-												<Tooltip.Trigger>
-													<button
-														type="button"
-														class="relative flex h-4 w-4 items-center justify-center"
-														onclick={(e) => {
-															e.stopPropagation();
-															modelsStore.unloadModel(option.model);
-														}}
-													>
-														<span
-															class="h-2 w-2 rounded-full bg-green-500 transition-opacity group-hover:opacity-0"
-														></span>
-														<Power
-															class="absolute h-4 w-4 text-red-500 opacity-0 transition-opacity group-hover:opacity-100 hover:text-red-600"
-														/>
-													</button>
-												</Tooltip.Trigger>
-												<Tooltip.Content class="z-[9999]">
-													<p>Unload model</p>
-												</Tooltip.Content>
-											</Tooltip.Root>
-										{:else}
-											<span class="h-2 w-2 rounded-full bg-muted-foreground/50"></span>
-										{/if}
-									</div>
-								</div>
+								{#each group.items as { option, flatIndex } (group.isLoadedGroup ? `loaded-${option.id}` : group.isFavouritesGroup ? `fav-${option.id}` : option.id)}
+									{@const isSelected = currentModel === option.model || activeId === option.id}
+									{@const isHighlighted = flatIndex === highlightedIndex}
+									{@const isFav = modelsStore.favouriteModelIds.has(option.model)}
+
+									<ModelsSelectorOption
+										{option}
+										{isSelected}
+										{isHighlighted}
+										{isFav}
+										showOrgName={group.isFavouritesGroup || group.isLoadedGroup}
+										onSelect={handleSelect}
+										onMouseEnter={() => (highlightedIndex = flatIndex)}
+										onKeyDown={(e) => {
+											if (e.key === KeyboardKey.ENTER || e.key === KeyboardKey.SPACE) {
+												e.preventDefault();
+												handleSelect(option.id);
+											}
+										}}
+									/>
+								{/each}
 							{/each}
 						</div>
 					</DropdownMenuSearchable>
@@ -422,7 +465,7 @@
 			<button
 				class={cn(
 					`inline-flex cursor-pointer items-center gap-1.5 rounded-sm bg-muted-foreground/10 px-1.5 py-1 text-xs transition hover:text-foreground focus:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-60`,
-					!isCurrentModelInCache()
+					!isCurrentModelInCache
 						? 'bg-red-400/10 !text-red-400 hover:bg-red-400/20 hover:text-red-400'
 						: forceForegroundText
 							? 'text-foreground'
@@ -437,7 +480,17 @@
 			>
 				<Package class="h-3.5 w-3.5" />
 
-				<TruncatedText text={selectedOption?.model || ''} class="min-w-0 font-medium" />
+				{#if selectedOption}
+					<Tooltip.Root>
+						<Tooltip.Trigger class="min-w-0 overflow-hidden">
+							<ModelId modelId={selectedOption.model} class="min-w-0" showOrgName />
+						</Tooltip.Trigger>
+
+						<Tooltip.Content>
+							<p class="font-mono">{selectedOption.model}</p>
+						</Tooltip.Content>
+					</Tooltip.Root>
+				{/if}
 
 				{#if updating}
 					<Loader2 class="h-3 w-3.5 animate-spin" />
diff --git a/tools/server/webui/src/lib/components/app/models/ModelsSelectorOption.svelte b/tools/server/webui/src/lib/components/app/models/ModelsSelectorOption.svelte
new file mode 100644
index 000000000..d4239fb1a
--- /dev/null
+++ b/tools/server/webui/src/lib/components/app/models/ModelsSelectorOption.svelte
@@ -0,0 +1,143 @@
+<script lang="ts">
+	import { CircleAlert, Heart, HeartOff, Loader2, Power, PowerOff, RotateCw } from '@lucide/svelte';
+	import { cn } from '$lib/components/ui/utils';
+	import { ActionIcon, ModelId } from '$lib/components/app';
+	import type { ModelOption } from '$lib/types/models';
+	import { ServerModelStatus } from '$lib/enums';
+	import { modelsStore, routerModels } from '$lib/stores/models.svelte';
+
+	interface Props {
+		option: ModelOption;
+		isSelected: boolean;
+		isHighlighted: boolean;
+		isFav: boolean;
+		showOrgName?: boolean;
+		onSelect: (modelId: string) => void;
+		onMouseEnter: () => void;
+		onKeyDown: (e: KeyboardEvent) => void;
+	}
+
+	let {
+		option,
+		isSelected,
+		isHighlighted,
+		isFav,
+		showOrgName = false,
+		onSelect,
+		onMouseEnter,
+		onKeyDown
+	}: Props = $props();
+
+	let currentRouterModels = $derived(routerModels());
+	let serverStatus = $derived.by(() => {
+		const model = currentRouterModels.find((m) => m.id === option.model);
+		return (model?.status?.value as ServerModelStatus) ?? null;
+	});
+	let isOperationInProgress = $derived(modelsStore.isModelOperationInProgress(option.model));
+	let isFailed = $derived(serverStatus === ServerModelStatus.FAILED);
+	let isLoaded = $derived(serverStatus === ServerModelStatus.LOADED && !isOperationInProgress);
+	let isLoading = $derived(serverStatus === ServerModelStatus.LOADING || isOperationInProgress);
+</script>
+
+<div
+	class={cn(
+		'group flex w-full items-center gap-2 rounded-sm p-2 text-left text-sm transition focus:outline-none',
+		'cursor-pointer hover:bg-muted focus:bg-muted',
+		isSelected || isHighlighted
+			? 'bg-accent text-accent-foreground'
+			: 'hover:bg-accent hover:text-accent-foreground',
+		isLoaded ? 'text-popover-foreground' : 'text-muted-foreground'
+	)}
+	role="option"
+	aria-selected={isSelected || isHighlighted}
+	tabindex="0"
+	onclick={() => onSelect(option.id)}
+	onmouseenter={onMouseEnter}
+	onkeydown={onKeyDown}
+>
+	<ModelId
+		modelId={option.model}
+		{showOrgName}
+		aliases={option.aliases}
+		tags={option.tags}
+		class="flex-1"
+	/>
+
+	<div class="flex shrink-0 items-center gap-2.5">
+		<!-- svelte-ignore a11y_no_static_element_interactions -->
+		<!-- svelte-ignore a11y_click_events_have_key_events -->
+		<div
+			class="pointer-events-none flex w-4 items-center justify-center pl-2 opacity-0 group-hover:pointer-events-auto group-hover:opacity-100"
+			onclick={(e) => e.stopPropagation()}
+		>
+			{#if isFav}
+				<ActionIcon
+					iconSize="h-2.5 w-2.5"
+					icon={HeartOff}
+					tooltip="Remove from favourites"
+					class="h-3 w-3 hover:text-foreground"
+					onclick={() => modelsStore.toggleFavourite(option.model)}
+				/>
+			{:else}
+				<ActionIcon
+					iconSize="h-2.5 w-2.5"
+					icon={Heart}
+					tooltip="Add to favourites"
+					class="h-3 w-3 hover:text-foreground"
+					onclick={() => modelsStore.toggleFavourite(option.model)}
+				/>
+			{/if}
+		</div>
+		{#if isLoading}
+			<Loader2 class="h-4 w-4 animate-spin text-muted-foreground" />
+		{:else if isFailed}
+			<div class="flex w-4 items-center justify-center">
+				<CircleAlert class="h-3.5 w-3.5 text-red-500 group-hover:hidden" />
+
+				<!-- svelte-ignore a11y_no_static_element_interactions -->
+				<!-- svelte-ignore a11y_click_events_have_key_events -->
+				<div class="hidden group-hover:flex" onclick={(e) => e.stopPropagation()}>
+					<ActionIcon
+						iconSize="h-2.5 w-2.5"
+						icon={RotateCw}
+						tooltip="Retry loading model"
+						class="h-3 w-3 text-red-500 hover:text-foreground"
+						onclick={() => modelsStore.loadModel(option.model)}
+					/>
+				</div>
+			</div>
+		{:else if isLoaded}
+			<div class="flex w-4 items-center justify-center">
+				<span class="h-2 w-2 rounded-full bg-green-500 group-hover:hidden"></span>
+
+				<!-- svelte-ignore a11y_no_static_element_interactions -->
+				<!-- svelte-ignore a11y_click_events_have_key_events -->
+				<div class="hidden group-hover:flex" onclick={(e) => e.stopPropagation()}>
+					<ActionIcon
+						iconSize="h-2.5 w-2.5"
+						icon={PowerOff}
+						tooltip="Unload model"
+						class="h-3 w-3 text-red-500 hover:text-red-600"
+						onclick={() => modelsStore.unloadModel(option.model)}
+					/>
+				</div>
+			</div>
+		{:else}
+			<div class="flex w-4 items-center justify-center">
+				<span class="h-2 w-2 rounded-full bg-muted-foreground/50 group-hover:hidden"></span>
+
+				<!-- svelte-ignore a11y_no_static_element_interactions -->
+				<!-- svelte-ignore a11y_click_events_have_key_events -->
+				<div class="hidden group-hover:flex" onclick={(e) => e.stopPropagation()}>
+					<ActionIcon
+						iconSize="h-2.5 w-2.5"
+						icon={Power}
+						tooltip="Load model"
+						class="h-3 w-3"
+						onclick={() => modelsStore.loadModel(option.model)}
+					/>
+				</div>
+			</div>
+		{/if}
+	</div>
+</div>
diff --git a/tools/server/webui/src/lib/components/app/models/index.ts b/tools/server/webui/src/lib/components/app/models/index.ts
index bb3710d30..aadc86cda 100644
--- a/tools/server/webui/src/lib/components/app/models/index.ts
+++ b/tools/server/webui/src/lib/components/app/models/index.ts
@@ -71,3 +71,5 @@ export { default as ModelsSelector } from './ModelsSelector.svelte';
  * ```
  */
 export { default as ModelBadge } from './ModelBadge.svelte';
+export { default as ModelId } from './ModelId.svelte';
+export { default as ModelsSelectorOption } from './ModelsSelectorOption.svelte';
diff --git a/tools/server/webui/src/lib/components/ui/badge/badge.svelte b/tools/server/webui/src/lib/components/ui/badge/badge.svelte
index c3e6ac072..9fbf0b80a 100644
--- a/tools/server/webui/src/lib/components/ui/badge/badge.svelte
+++ b/tools/server/webui/src/lib/components/ui/badge/badge.svelte
@@ -8,6 +8,8 @@
 				default: 'bg-primary text-primary-foreground [a&]:hover:bg-primary/90 border-transparent',
 				secondary:
 					'bg-secondary text-secondary-foreground [a&]:hover:bg-secondary/90 border-transparent',
+				tertiary:
+					'bg-foreground/15 dark:bg-foreground/10 text-foreground [a&]:hover:bg-foreground/25 border-transparent',
 				destructive:
 					'bg-destructive [a&]:hover:bg-destructive/90 focus-visible:ring-destructive/20 dark:focus-visible:ring-destructive/40 dark:bg-destructive/70 border-transparent text-white',
 				outline: 'text-foreground [a&]:hover:bg-accent [a&]:hover:text-accent-foreground'
diff --git a/tools/server/webui/src/lib/components/ui/card/card.svelte b/tools/server/webui/src/lib/components/ui/card/card.svelte
index b9dcd2de6..d0a57d0c9 100644
--- a/tools/server/webui/src/lib/components/ui/card/card.svelte
+++ b/tools/server/webui/src/lib/components/ui/card/card.svelte
@@ -1,7 +1,7 @@
 <script lang="ts">
 	import type { HTMLAttributes } from 'svelte/elements';
 	import { cn, type WithElementRef } from '$lib/components/ui/utils';
-	import { BOX_BORDER } from '$lib/constants/css-classes';
+	import { BOX_BORDER } from '$lib/constants';
 
 	let {
 		ref = $bindable(null),
diff --git a/tools/server/webui/src/lib/constants/api-endpoints.ts b/tools/server/webui/src/lib/constants/api-endpoints.ts
new file mode 100644
index 000000000..f04424193
--- /dev/null
+++ b/tools/server/webui/src/lib/constants/api-endpoints.ts
@@ -0,0 +1,5 @@
+export const API_MODELS = {
+	LIST: '/v1/models',
+	LOAD: '/models/load',
+	UNLOAD: '/models/unload'
+};
diff --git a/tools/server/webui/src/lib/constants/index.ts b/tools/server/webui/src/lib/constants/index.ts
new file mode 100644
index 000000000..d80dc8f09
--- /dev/null
+++ b/tools/server/webui/src/lib/constants/index.ts
@@ -0,0 +1,33 @@
+// Central constants export file
+// All constants should be imported from '$lib/constants'
+
+export * from './agentic';
+export * from './api-endpoints';
+export * from './attachment-labels';
+export * from './auto-scroll';
+export * from './binary-detection';
+export * from './cache';
+export * from './chat-form';
+export * from './code-blocks';
+export * from './code';
+export * from './css-classes';
+export * from './floating-ui-constraints';
+export * from './formatters';
+export * from './icons';
+export * from './latex-protection';
+export * from './literal-html';
+export * from './localstorage-keys';
+export * from './markdown';
+export * from './max-bundle-size';
+export * from './model-id';
+export * from './precision';
+export * from './processing-info';
+export * from './settings-config';
+export * from './settings-fields';
+export * from './settings-keys';
+export * from './settings-sections';
+export * from './supported-file-types';
+export * from './table-html-restorer';
+export * from './tooltip-config';
+export * from './ui';
+export * from './viewport';
diff --git a/tools/server/webui/src/lib/constants/localstorage-keys.ts b/tools/server/webui/src/lib/constants/localstorage-keys.ts
index 919b6ea06..6b9a9e0e2 100644
--- a/tools/server/webui/src/lib/constants/localstorage-keys.ts
+++ b/tools/server/webui/src/lib/constants/localstorage-keys.ts
@@ -1,2 +1,3 @@
 export const CONFIG_LOCALSTORAGE_KEY = 'LlamaCppWebui.config';
 export const USER_OVERRIDES_LOCALSTORAGE_KEY = 'LlamaCppWebui.userOverrides';
+export const FAVOURITE_MODELS_LOCALSTORAGE_KEY = 'LlamaCppWebui.favouriteModels';
diff --git a/tools/server/webui/src/lib/constants/model-id.ts b/tools/server/webui/src/lib/constants/model-id.ts
new file mode 100644
index 000000000..eb6662a02
--- /dev/null
+++ b/tools/server/webui/src/lib/constants/model-id.ts
@@ -0,0 +1,28 @@
+/** Sentinel value returned by `indexOf` when a substring is not found. */
+export const MODEL_ID_NOT_FOUND = -1;
+
+/** Separates `<org>` from `<model>` in a model ID, e.g. `org/ModelName`. */
+export const MODEL_ID_ORG_SEPARATOR = '/';
+
+/** Separates named segments within the model path, e.g. `ModelName-7B-GGUF`. */
+export const MODEL_ID_SEGMENT_SEPARATOR = '-';
+
+/** Separates the model path from the quantization tag, e.g. `model:Q4_K_M`. */
+export const MODEL_ID_QUANTIZATION_SEPARATOR = ':';
+
+/**
+ * Matches a trailing ALL-CAPS format segment, e.g. `GGUF`, `BF16`, `Q4_K_M`.
+ * Must be at least 2 uppercase letters, optionally followed by uppercase letters or digits.
+ */
+export const MODEL_FORMAT_SEGMENT_RE = /^[A-Z]{2,}[A-Z0-9]*$/;
+
+/**
+ * Matches a parameter-count segment, e.g. `7B`, `1.5b`, `120M`.
+ */
+export const MODEL_PARAMS_RE = /^\d+(\.\d+)?[BbMmKkTt]$/;
+
+/**
+ * Matches an activated-parameter-count segment, e.g. `A10B`, `A2.4b`.
+ * The leading `A` distinguishes it from a regular params segment.
+ */
+export const MODEL_ACTIVATED_PARAMS_RE = /^A\d+(\.\d+)?[BbMmKkTt]$/;
diff --git a/tools/server/webui/src/lib/constants/settings-config.ts b/tools/server/webui/src/lib/constants/settings-config.ts
index eafa1a1dc..03bcb133c 100644
--- a/tools/server/webui/src/lib/constants/settings-config.ts
+++ b/tools/server/webui/src/lib/constants/settings-config.ts
@@ -23,6 +23,7 @@ export const SETTING_CONFIG_DEFAULT: Record<string, string | number | boolean> =
 	autoShowSidebarOnNewChat: true,
 	autoMicOnEmpty: false,
 	fullHeightCodeBlocks: false,
+	showRawModelNames: false,
 	// make sure these default values are in sync with `common.h`
 	samplers: 'top_k;typ_p;top_p;min_p;temperature',
 	backend_sampling: false,
@@ -116,6 +117,8 @@ export const SETTING_CONFIG_INFO: Record<string, string> = {
 		'Automatically show microphone button instead of send button when textarea is empty for models with audio modality support.',
 	fullHeightCodeBlocks:
 		'Always display code blocks at their full natural height, overriding any height limits.',
+	showRawModelNames:
+		'Display full raw model identifiers (e.g. "unsloth/Qwen3.5-27B-GGUF:BF16") instead of parsed names with badges.',
 	pyInterpreterEnabled:
 		'Enable Python interpreter using Pyodide. Allows running Python code in markdown code blocks.',
 	enableContinueGeneration:
diff --git a/tools/server/webui/src/lib/constants/settings-keys.ts b/tools/server/webui/src/lib/constants/settings-keys.ts
index 38de41ffe..0f531cb1e 100644
--- a/tools/server/webui/src/lib/constants/settings-keys.ts
+++ b/tools/server/webui/src/lib/constants/settings-keys.ts
@@ -24,6 +24,7 @@ export const SETTINGS_KEYS = {
 	ALWAYS_SHOW_SIDEBAR_ON_DESKTOP: 'alwaysShowSidebarOnDesktop',
 	AUTO_SHOW_SIDEBAR_ON_NEW_CHAT: 'autoShowSidebarOnNewChat',
 	FULL_HEIGHT_CODE_BLOCKS: 'fullHeightCodeBlocks',
+	SHOW_RAW_MODEL_NAMES: 'showRawModelNames',
 	// Sampling
 	TEMPERATURE: 'temperature',
 	DYNATEMP_RANGE: 'dynatemp_range',
diff --git a/tools/server/webui/src/lib/enums/keyboard.ts b/tools/server/webui/src/lib/enums/keyboard.ts
index b8f6d5f7a..bea68d790 100644
--- a/tools/server/webui/src/lib/enums/keyboard.ts
+++ b/tools/server/webui/src/lib/enums/keyboard.ts
@@ -11,5 +11,6 @@ export enum KeyboardKey {
 	D_UPPER = 'D',
 	E_UPPER = 'E',
 	K_LOWER = 'k',
-	O_UPPER = 'O'
+	O_UPPER = 'O',
+	SPACE = ' '
 }
diff --git a/tools/server/webui/src/lib/hooks/is-mobile.svelte.ts b/tools/server/webui/src/lib/hooks/is-mobile.svelte.ts
index 22c74f4a6..6454fc5b5 100644
--- a/tools/server/webui/src/lib/hooks/is-mobile.svelte.ts
+++ b/tools/server/webui/src/lib/hooks/is-mobile.svelte.ts
@@ -1,4 +1,4 @@
-import { DEFAULT_MOBILE_BREAKPOINT } from '$lib/constants/viewport';
+import { DEFAULT_MOBILE_BREAKPOINT } from '$lib/constants';
 import { MediaQuery } from 'svelte/reactivity';
 
 export class IsMobile extends MediaQuery {
diff --git a/tools/server/webui/src/lib/hooks/use-auto-scroll.svelte.ts b/tools/server/webui/src/lib/hooks/use-auto-scroll.svelte.ts
index bbaa5d136..afd0aad83 100644
--- a/tools/server/webui/src/lib/hooks/use-auto-scroll.svelte.ts
+++ b/tools/server/webui/src/lib/hooks/use-auto-scroll.svelte.ts
@@ -1,4 +1,4 @@
-import { AUTO_SCROLL_AT_BOTTOM_THRESHOLD, AUTO_SCROLL_INTERVAL } from '$lib/constants/auto-scroll';
+import { AUTO_SCROLL_AT_BOTTOM_THRESHOLD, AUTO_SCROLL_INTERVAL } from '$lib/constants';
 
 export interface AutoScrollOptions {
 	/** Whether auto-scroll is disabled globally (e.g., from settings) */
diff --git a/tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts b/tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts
index 1205d9b97..f28031972 100644
--- a/tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts
+++ b/tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts
@@ -1,6 +1,6 @@
 import { activeProcessingState } from '$lib/stores/chat.svelte';
 import { config } from '$lib/stores/settings.svelte';
-import { STATS_UNITS } from '$lib/constants/processing-info';
+import { STATS_UNITS } from '$lib/constants';
 import type { ApiProcessingState, LiveProcessingStats, LiveGenerationStats } from '$lib/types';
 
 export interface UseProcessingStateReturn {
diff --git a/tools/server/webui/src/lib/markdown/enhance-code-blocks.ts b/tools/server/webui/src/lib/markdown/enhance-code-blocks.ts
index 168de9740..7c3b7f167 100644
--- a/tools/server/webui/src/lib/markdown/enhance-code-blocks.ts
+++ b/tools/server/webui/src/lib/markdown/enhance-code-blocks.ts
@@ -22,7 +22,7 @@ import {
 	COPY_CODE_BTN_CLASS,
 	PREVIEW_CODE_BTN_CLASS,
 	RELATIVE_CLASS
-} from '$lib/constants/code-blocks';
+} from '$lib/constants';
 
 declare global {
 	interface Window {
diff --git a/tools/server/webui/src/lib/markdown/literal-html.ts b/tools/server/webui/src/lib/markdown/literal-html.ts
index d4ace01af..c974d8b18 100644
--- a/tools/server/webui/src/lib/markdown/literal-html.ts
+++ b/tools/server/webui/src/lib/markdown/literal-html.ts
@@ -1,7 +1,7 @@
 import type { Plugin } from 'unified';
 import { visit } from 'unist-util-visit';
 import type { Break, Content, Paragraph, PhrasingContent, Root, Text } from 'mdast';
-import { LINE_BREAK, NBSP, PHRASE_PARENTS, TAB_AS_SPACES } from '$lib/constants/literal-html';
+import { LINE_BREAK, NBSP, PHRASE_PARENTS, TAB_AS_SPACES } from '$lib/constants';
 
 /**
  * remark plugin that rewrites raw HTML nodes into plain-text equivalents.
diff --git a/tools/server/webui/src/lib/markdown/table-html-restorer.ts b/tools/server/webui/src/lib/markdown/table-html-restorer.ts
index 918aa4681..bc5d03465 100644
--- a/tools/server/webui/src/lib/markdown/table-html-restorer.ts
+++ b/tools/server/webui/src/lib/markdown/table-html-restorer.ts
@@ -68,7 +68,7 @@ import type { Plugin } from 'unified';
 import type { Element, ElementContent, Root, Text } from 'hast';
 import { visit } from 'unist-util-visit';
 import { visitParents } from 'unist-util-visit-parents';
-import { BR_PATTERN, LIST_PATTERN, LI_PATTERN } from '$lib/constants/table-html-restorer';
+import { BR_PATTERN, LIST_PATTERN, LI_PATTERN } from '$lib/constants';
 
 /**
  * Expands text containing `<br>` tags into an array of text nodes and br elements.
diff --git a/tools/server/webui/src/lib/services/chat.service.ts b/tools/server/webui/src/lib/services/chat.service.ts
index c98c86e14..20c65f0f1 100644
--- a/tools/server/webui/src/lib/services/chat.service.ts
+++ b/tools/server/webui/src/lib/services/chat.service.ts
@@ -1,5 +1,5 @@
 import { getJsonHeaders, formatAttachmentText, isAbortError } from '$lib/utils';
-import { ATTACHMENT_LABEL_PDF_FILE } from '$lib/constants/attachment-labels';
+import { AGENTIC_REGEX, ATTACHMENT_LABEL_PDF_FILE } from '$lib/constants';
 import {
 	AttachmentType,
 	ContentPartType,
@@ -9,7 +9,6 @@ import {
 } from '$lib/enums';
 import type { ApiChatMessageContentPart, ApiChatCompletionToolCall } from '$lib/types/api';
 import { modelsStore } from '$lib/stores/models.svelte';
-import { AGENTIC_REGEX } from '$lib/constants/agentic';
 
 export class ChatService {
 	private static stripReasoningContent(
diff --git a/tools/server/webui/src/lib/services/models.service.ts b/tools/server/webui/src/lib/services/models.service.ts
index 347f17184..de90c48cf 100644
--- a/tools/server/webui/src/lib/services/models.service.ts
+++ b/tools/server/webui/src/lib/services/models.service.ts
@@ -1,5 +1,16 @@
 import { ServerModelStatus } from '$lib/enums';
 import { apiFetch, apiPost } from '$lib/utils';
+import type { ParsedModelId } from '$lib/types/models';
+import {
+	MODEL_FORMAT_SEGMENT_RE,
+	MODEL_PARAMS_RE,
+	MODEL_ACTIVATED_PARAMS_RE,
+	MODEL_ID_NOT_FOUND,
+	MODEL_ID_ORG_SEPARATOR,
+	MODEL_ID_SEGMENT_SEPARATOR,
+	MODEL_ID_QUANTIZATION_SEPARATOR,
+	API_MODELS
+} from '$lib/constants';
 
 export class ModelsService {
 	/**
@@ -17,7 +28,7 @@ export class ModelsService {
 	 * @returns List of available models with basic metadata
 	 */
 	static async list(): Promise<ApiModelListResponse> {
-		return apiFetch<ApiModelListResponse>('/v1/models');
+		return apiFetch<ApiModelListResponse>(API_MODELS.LIST);
 	}
 
 	/**
@@ -28,7 +39,7 @@ export class ModelsService {
 	 * @returns List of models with detailed status and configuration info
 	 */
 	static async listRouter(): Promise<ApiRouterModelsListResponse> {
-		return apiFetch<ApiRouterModelsListResponse>('/v1/models');
+		return apiFetch<ApiRouterModelsListResponse>(API_MODELS.LIST);
 	}
 
 	/**
@@ -54,7 +65,7 @@ export class ModelsService {
 			payload.extra_args = extraArgs;
 		}
 
-		return apiPost<ApiRouterModelsLoadResponse>('/models/load', payload);
+		return apiPost<ApiRouterModelsLoadResponse>(API_MODELS.LOAD, payload);
 	}
 
 	/**
@@ -66,7 +77,7 @@ export class ModelsService {
 	 * @returns Unload response from the server
 	 */
 	static async unload(modelId: string): Promise<ApiRouterModelsUnloadResponse> {
-		return apiPost<ApiRouterModelsUnloadResponse>('/models/unload', { model: modelId });
+		return apiPost<ApiRouterModelsUnloadResponse>(API_MODELS.UNLOAD, { model: modelId });
 	}
 
 	/**
@@ -96,4 +107,89 @@ export class ModelsService {
 	static isModelLoading(model: ApiModelDataEntry): boolean {
 		return model.status.value === ServerModelStatus.LOADING;
 	}
+
+	/**
+	 *
+	 *
+	 * Parsing
+	 *
+	 *
+	 */
+
+	/**
+	 * Parse a model ID string into its structured components.
+	 *
+	 * Handles the convention:
+	 *   `<org>/<ModelName>-<Parameters>(-<ActivatedParameters>)-<Format>:<QuantizationType>`
+	 *
+	 * @param modelId - Raw model identifier string
+	 * @returns Structured {@link ParsedModelId} with all detected fields
+	 */
+	static parseModelId(modelId: string): ParsedModelId {
+		const result: ParsedModelId = {
+			raw: modelId,
+			orgName: null,
+			modelName: null,
+			params: null,
+			activatedParams: null,
+			format: null,
+			quantization: null,
+			tags: []
+		};
+
+		const colonIdx = modelId.indexOf(MODEL_ID_QUANTIZATION_SEPARATOR);
+		let modelPath: string;
+
+		if (colonIdx !== MODEL_ID_NOT_FOUND) {
+			result.quantization = modelId.slice(colonIdx + 1) || null;
+			modelPath = modelId.slice(0, colonIdx);
+		} else {
+			modelPath = modelId;
+		}
+
+		const slashIdx = modelPath.indexOf(MODEL_ID_ORG_SEPARATOR);
+		let modelStr: string;
+
+		if (slashIdx !== MODEL_ID_NOT_FOUND) {
+			result.orgName = modelPath.slice(0, slashIdx);
+			modelStr = modelPath.slice(slashIdx + 1);
+		} else {
+			modelStr = modelPath;
+		}
+
+		const segments = modelStr.split(MODEL_ID_SEGMENT_SEPARATOR);
+
+		if (segments.length > 0 && MODEL_FORMAT_SEGMENT_RE.test(segments[segments.length - 1])) {
+			result.format = segments.pop()!;
+		}
+
+		const paramsRe = MODEL_PARAMS_RE;
+		const activatedParamsRe = MODEL_ACTIVATED_PARAMS_RE;
+
+		let paramsIdx = MODEL_ID_NOT_FOUND;
+		let activatedParamsIdx = MODEL_ID_NOT_FOUND;
+
+		for (let i = 0; i < segments.length; i++) {
+			const seg = segments[i];
+			if (paramsIdx === -1 && paramsRe.test(seg)) {
+				paramsIdx = i;
+				result.params = seg.toUpperCase();
+			} else if (activatedParamsRe.test(seg)) {
+				activatedParamsIdx = i;
+				result.activatedParams = seg.toUpperCase();
+			}
+		}
+
+		const pivotIdx = paramsIdx !== MODEL_ID_NOT_FOUND ? paramsIdx : segments.length;
+
+		result.modelName = segments.slice(0, pivotIdx).join(MODEL_ID_SEGMENT_SEPARATOR) || null;
+
+		if (paramsIdx !== MODEL_ID_NOT_FOUND) {
+			result.tags = segments
+				.slice(paramsIdx + 1)
+				.filter((_, relIdx) => paramsIdx + 1 + relIdx !== activatedParamsIdx);
+		}
+
+		return result;
+	}
 }
diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts
index 66d6eaf0d..cad82b847 100644
--- a/tools/server/webui/src/lib/stores/chat.svelte.ts
+++ b/tools/server/webui/src/lib/stores/chat.svelte.ts
@@ -28,12 +28,12 @@ import {
 	findLeafNode,
 	isAbortError
 } from '$lib/utils';
-import { SYSTEM_MESSAGE_PLACEHOLDER } from '$lib/constants/ui';
-import { REASONING_TAGS } from '$lib/constants/agentic';
 import {
 	MAX_INACTIVE_CONVERSATION_STATES,
-	INACTIVE_CONVERSATION_STATE_MAX_AGE_MS
-} from '$lib/constants/cache';
+	INACTIVE_CONVERSATION_STATE_MAX_AGE_MS,
+	REASONING_TAGS,
+	SYSTEM_MESSAGE_PLACEHOLDER
+} from '$lib/constants';
 import type {
 	ChatMessageTimings,
 	ChatMessagePromptProgress,
diff --git a/tools/server/webui/src/lib/stores/models.svelte.ts b/tools/server/webui/src/lib/stores/models.svelte.ts
index c4cc3d386..a6d7d6572 100644
--- a/tools/server/webui/src/lib/stores/models.svelte.ts
+++ b/tools/server/webui/src/lib/stores/models.svelte.ts
@@ -1,9 +1,14 @@
-import { SvelteSet } from 'svelte/reactivity';
+import { SvelteMap, SvelteSet } from 'svelte/reactivity';
+import { toast } from 'svelte-sonner';
 import { ServerModelStatus, ModelModality } from '$lib/enums';
 import { ModelsService, PropsService } from '$lib/services';
 import { serverStore } from '$lib/stores/server.svelte';
 import { TTLCache } from '$lib/utils';
-import { MODEL_PROPS_CACHE_TTL_MS, MODEL_PROPS_CACHE_MAX_ENTRIES } from '$lib/constants/cache';
+import {
+	MODEL_PROPS_CACHE_TTL_MS,
+	MODEL_PROPS_CACHE_MAX_ENTRIES,
+	FAVOURITE_MODELS_LOCALSTORAGE_KEY
+} from '$lib/constants';
 
 /**
  * modelsStore - Reactive store for model management in both MODEL and ROUTER modes
@@ -50,7 +55,9 @@ class ModelsStore {
 	selectedModelName = $state<string | null>(null);
 
 	private modelUsage = $state<Map<string, SvelteSet<string>>>(new Map());
-	private modelLoadingStates = $state<Map<string, boolean>>(new Map());
+	private modelLoadingStates = new SvelteMap<string, boolean>();
+
+	favouriteModelIds = $state<Set<string>>(this.loadFavouritesFromStorage());
 
 	/**
 	 * Model-specific props cache with TTL
@@ -261,15 +268,19 @@ class ModelsStore {
 				const displayNameSource =
 					details?.name && details.name.trim().length > 0 ? details.name : item.id;
 				const displayName = this.toDisplayName(displayNameSource);
+				const modelId = details?.model || item.id;
 
 				return {
 					id: item.id,
 					name: displayName,
-					model: details?.model || item.id,
+					model: modelId,
 					description: details?.description,
 					capabilities: rawCapabilities.filter((value: unknown): value is string => Boolean(value)),
 					details: details?.details,
-					meta: item.meta ?? null
+					meta: item.meta ?? null,
+					parsedId: ModelsService.parseModelId(modelId),
+					aliases: item.aliases ?? [],
+					tags: item.tags ?? []
 				} satisfies ModelOption;
 			});
 
@@ -497,22 +508,21 @@ class ModelsStore {
 
 	/** Polling interval in ms for checking model status */
 	private static readonly STATUS_POLL_INTERVAL = 500;
-	/** Maximum polling attempts before giving up */
-	private static readonly STATUS_POLL_MAX_ATTEMPTS = 60; // 30 seconds max
 
 	/**
 	 * Poll for expected model status after load/unload operation.
-	 * Keeps polling until the model reaches the expected status or max attempts reached.
+	 * Keeps polling indefinitely until the model reaches the expected status or fails.
 	 *
 	 * @param modelId - Model identifier to check
 	 * @param expectedStatus - Expected status to wait for
-	 * @returns Promise that resolves when expected status is reached
+	 * @throws Error if model reaches FAILED status
 	 */
 	private async pollForModelStatus(
 		modelId: string,
 		expectedStatus: ServerModelStatus
 	): Promise<void> {
-		for (let attempt = 0; attempt < ModelsStore.STATUS_POLL_MAX_ATTEMPTS; attempt++) {
+		let attempt = 0;
+		while (true) {
 			await this.fetchRouterModels();
 
 			const currentStatus = this.getModelStatus(modelId);
@@ -520,12 +530,23 @@ class ModelsStore {
 				return;
 			}
 
+			if (currentStatus === ServerModelStatus.FAILED) {
+				throw new Error(
+					`Model failed to ${expectedStatus === ServerModelStatus.LOADED ? 'load' : 'unload'}`
+				);
+			}
+
+			if (
+				expectedStatus === ServerModelStatus.LOADED &&
+				currentStatus === ServerModelStatus.UNLOADED &&
+				attempt > 2
+			) {
+				throw new Error('Model was unloaded unexpectedly during loading');
+			}
+
+			attempt++;
 			await new Promise((resolve) => setTimeout(resolve, ModelsStore.STATUS_POLL_INTERVAL));
 		}
-
-		console.warn(
-			`Model ${modelId} did not reach expected status ${expectedStatus} after ${ModelsStore.STATUS_POLL_MAX_ATTEMPTS} attempts`
-		);
 	}
 
 	/**
@@ -547,8 +568,10 @@ class ModelsStore {
 			await this.pollForModelStatus(modelId, ServerModelStatus.LOADED);
 
 			await this.updateModelModalities(modelId);
+			toast.success(`Model loaded: ${this.toDisplayName(modelId)}`);
 		} catch (error) {
 			this.error = error instanceof Error ? error.message : 'Failed to load model';
+			toast.error(`Failed to load model: ${this.toDisplayName(modelId)}`);
 			throw error;
 		} finally {
 			this.modelLoadingStates.set(modelId, false);
@@ -573,8 +596,10 @@ class ModelsStore {
 			await ModelsService.unload(modelId);
 
 			await this.pollForModelStatus(modelId, ServerModelStatus.UNLOADED);
+			toast.info(`Model unloaded: ${this.toDisplayName(modelId)}`);
 		} catch (error) {
 			this.error = error instanceof Error ? error.message : 'Failed to unload model';
+			toast.error(`Failed to unload model: ${this.toDisplayName(modelId)}`);
 			throw error;
 		} finally {
 			this.modelLoadingStates.set(modelId, false);
@@ -593,6 +618,48 @@ class ModelsStore {
 		await this.loadModel(modelId);
 	}
 
+	/**
+	 *
+	 *
+	 * Favourites
+	 *
+	 *
+	 */
+
+	isFavourite(modelId: string): boolean {
+		return this.favouriteModelIds.has(modelId);
+	}
+
+	toggleFavourite(modelId: string): void {
+		const next = new SvelteSet(this.favouriteModelIds);
+
+		if (next.has(modelId)) {
+			next.delete(modelId);
+		} else {
+			next.add(modelId);
+		}
+
+		this.favouriteModelIds = next;
+
+		try {
+			localStorage.setItem(FAVOURITE_MODELS_LOCALSTORAGE_KEY, JSON.stringify([...next]));
+		} catch {
+			toast.error('Failed to save favourite models to local storage');
+		}
+	}
+
+	private loadFavouritesFromStorage(): Set<string> {
+		try {
+			const raw = localStorage.getItem(FAVOURITE_MODELS_LOCALSTORAGE_KEY);
+
+			return raw ? new Set(JSON.parse(raw) as string[]) : new Set();
+		} catch {
+			toast.error('Failed to load favourite models from local storage');
+
+			return new Set();
+		}
+	}
+
 	/**
 	 *
 	 *
@@ -646,3 +713,4 @@ export const loadingModelIds = () => modelsStore.loadingModelIds;
 export const propsCacheVersion = () => modelsStore.propsCacheVersion;
 export const singleModelName = () => modelsStore.singleModelName;
 export const selectedModelContextSize = () => modelsStore.selectedModelContextSize;
+export const favouriteModelIds = () => modelsStore.favouriteModelIds;
diff --git a/tools/server/webui/src/lib/stores/settings.svelte.ts b/tools/server/webui/src/lib/stores/settings.svelte.ts
index 68431f4e3..8ab817c07 100644
--- a/tools/server/webui/src/lib/stores/settings.svelte.ts
+++ b/tools/server/webui/src/lib/stores/settings.svelte.ts
@@ -32,7 +32,11 @@
  */
 
 import { browser } from '$app/environment';
-import { SETTING_CONFIG_DEFAULT } from '$lib/constants/settings-config';
+import {
+	CONFIG_LOCALSTORAGE_KEY,
+	SETTING_CONFIG_DEFAULT,
+	USER_OVERRIDES_LOCALSTORAGE_KEY
+} from '$lib/constants';
 import { ParameterSyncService } from '$lib/services/parameter-sync.service';
 import { serverStore } from '$lib/stores/server.svelte';
 import {
@@ -41,10 +45,6 @@ import {
 	getConfigValue,
 	setConfigValue
 } from '$lib/utils';
-import {
-	CONFIG_LOCALSTORAGE_KEY,
-	USER_OVERRIDES_LOCALSTORAGE_KEY
-} from '$lib/constants/localstorage-keys';
 
 class SettingsStore {
 	/**
diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts
index 307e3b71d..c90825842 100644
--- a/tools/server/webui/src/lib/types/api.d.ts
+++ b/tools/server/webui/src/lib/types/api.d.ts
@@ -81,6 +81,10 @@ export interface ApiModelDataEntry {
 	path: string;
 	/** Current status of the model */
 	status: ApiModelStatus;
+	/** Alternative names that resolve to this model */
+	aliases?: string[];
+	/** Informational tags for this model */
+	tags?: string[];
 	/** Legacy meta field (may be present in older responses) */
 	meta?: Record<string, unknown> | null;
 }
diff --git a/tools/server/webui/src/lib/types/models.d.ts b/tools/server/webui/src/lib/types/models.d.ts
index 505867a1f..dc8e86485 100644
--- a/tools/server/webui/src/lib/types/models.d.ts
+++ b/tools/server/webui/src/lib/types/models.d.ts
@@ -14,6 +14,20 @@ export interface ModelOption {
 	modalities?: ModelModalities;
 	details?: ApiModelDetails['details'];
 	meta?: ApiModelDataEntry['meta'];
+	parsedId?: ParsedModelId;
+	aliases?: string[];
+	tags?: string[];
+}
+
+export interface ParsedModelId {
+	raw: string;
+	orgName: string | null;
+	modelName: string | null;
+	params: string | null;
+	activatedParams: string | null;
+	format: string | null;
+	quantization: string | null;
+	tags: string[];
 }
 
 /**
diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts
index 303462b2c..f9f5a7824 100644
--- a/tools/server/webui/src/lib/types/settings.d.ts
+++ b/tools/server/webui/src/lib/types/settings.d.ts
@@ -1,4 +1,4 @@
-import type { SETTING_CONFIG_DEFAULT } from '$lib/constants/settings-config';
+import type { SETTING_CONFIG_DEFAULT } from '$lib/constants';
 import type { ChatMessagePromptProgress, ChatMessageTimings } from './chat';
 import type { DatabaseMessageExtra } from './database';
 import type { ParameterSource, SyncableParameterType, SettingsFieldType } from '$lib/enums';
diff --git a/tools/server/webui/src/lib/utils/cache-ttl.ts b/tools/server/webui/src/lib/utils/cache-ttl.ts
index 9a69501d0..4e414dd54 100644
--- a/tools/server/webui/src/lib/utils/cache-ttl.ts
+++ b/tools/server/webui/src/lib/utils/cache-ttl.ts
@@ -1,4 +1,4 @@
-import { DEFAULT_CACHE_TTL_MS, DEFAULT_CACHE_MAX_ENTRIES } from '$lib/constants/cache';
+import { DEFAULT_CACHE_TTL_MS, DEFAULT_CACHE_MAX_ENTRIES } from '$lib/constants';
 
 /**
  * TTL Cache - Time-To-Live cache implementation for memory optimization
diff --git a/tools/server/webui/src/lib/utils/code.ts b/tools/server/webui/src/lib/utils/code.ts
index 67efc6b27..d83bc31af 100644
--- a/tools/server/webui/src/lib/utils/code.ts
+++ b/tools/server/webui/src/lib/utils/code.ts
@@ -7,7 +7,7 @@ import {
 	LT_REGEX,
 	GT_REGEX,
 	FENCE_PATTERN
-} from '$lib/constants/code';
+} from '$lib/constants';
 
 export interface IncompleteCodeBlock {
 	language: string;
diff --git a/tools/server/webui/src/lib/utils/file-type.ts b/tools/server/webui/src/lib/utils/file-type.ts
index 9a9996d17..4c670600c 100644
--- a/tools/server/webui/src/lib/utils/file-type.ts
+++ b/tools/server/webui/src/lib/utils/file-type.ts
@@ -3,7 +3,7 @@ import {
 	IMAGE_FILE_TYPES,
 	PDF_FILE_TYPES,
 	TEXT_FILE_TYPES
-} from '$lib/constants/supported-file-types';
+} from '$lib/constants';
 import {
 	FileExtensionAudio,
 	FileExtensionImage,
diff --git a/tools/server/webui/src/lib/utils/formatters.ts b/tools/server/webui/src/lib/utils/formatters.ts
index 37a8a3358..24a2c1c94 100644
--- a/tools/server/webui/src/lib/utils/formatters.ts
+++ b/tools/server/webui/src/lib/utils/formatters.ts
@@ -4,7 +4,7 @@ import {
 	SECONDS_PER_HOUR,
 	SHORT_DURATION_THRESHOLD,
 	MEDIUM_DURATION_THRESHOLD
-} from '$lib/constants/formatters';
+} from '$lib/constants';
 
 /**
  * Formats file size in bytes to human readable format
diff --git a/tools/server/webui/src/lib/utils/latex-protection.ts b/tools/server/webui/src/lib/utils/latex-protection.ts
index cafa2d476..839306978 100644
--- a/tools/server/webui/src/lib/utils/latex-protection.ts
+++ b/tools/server/webui/src/lib/utils/latex-protection.ts
@@ -3,7 +3,7 @@ import {
 	LATEX_MATH_AND_CODE_PATTERN,
 	LATEX_LINEBREAK_REGEXP,
 	MHCHEM_PATTERN_MAP
-} from '$lib/constants/latex-protection';
+} from '$lib/constants';
 
 /**
  * Replaces inline LaTeX expressions enclosed in `$...$` with placeholders, avoiding dollar signs
diff --git a/tools/server/webui/src/lib/utils/precision.ts b/tools/server/webui/src/lib/utils/precision.ts
index 6da200cf0..500281dc9 100644
--- a/tools/server/webui/src/lib/utils/precision.ts
+++ b/tools/server/webui/src/lib/utils/precision.ts
@@ -5,7 +5,7 @@
  * and display, addressing JavaScript's floating-point precision issues.
  */
 
-import { PRECISION_MULTIPLIER } from '$lib/constants/precision';
+import { PRECISION_MULTIPLIER } from '$lib/constants';
 
 /**
  * Normalize floating-point numbers for consistent comparison
diff --git a/tools/server/webui/src/lib/utils/text-files.ts b/tools/server/webui/src/lib/utils/text-files.ts
index b7fdd4038..3f7a55ebc 100644
--- a/tools/server/webui/src/lib/utils/text-files.ts
+++ b/tools/server/webui/src/lib/utils/text-files.ts
@@ -3,7 +3,7 @@
  * Handles text file detection, reading, and validation
  */
 
-import { DEFAULT_BINARY_DETECTION_OPTIONS } from '$lib/constants/binary-detection';
+import { DEFAULT_BINARY_DETECTION_OPTIONS } from '$lib/constants';
 import type { BinaryDetectionOptions } from '$lib/types';
 import { FileExtensionText } from '$lib/enums';
 
diff --git a/tools/server/webui/src/routes/+layout.svelte b/tools/server/webui/src/routes/+layout.svelte
index 705066119..4e9bf3990 100644
--- a/tools/server/webui/src/routes/+layout.svelte
+++ b/tools/server/webui/src/routes/+layout.svelte
@@ -14,7 +14,7 @@
 	import { Toaster } from 'svelte-sonner';
 	import { goto } from '$app/navigation';
 	import { modelsStore } from '$lib/stores/models.svelte';
-	import { TOOLTIP_DELAY_DURATION } from '$lib/constants/tooltip-config';
+	import { TOOLTIP_DELAY_DURATION } from '$lib/constants';
 	import { KeyboardKey } from '$lib/enums';
 	import { IsMobile } from '$lib/hooks/is-mobile.svelte';
 
diff --git a/tools/server/webui/src/routes/+page.svelte b/tools/server/webui/src/routes/+page.svelte
index 3162b3ba9..c34e9c547 100644
--- a/tools/server/webui/src/routes/+page.svelte
+++ b/tools/server/webui/src/routes/+page.svelte
@@ -3,6 +3,7 @@
 	import { chatStore } from '$lib/stores/chat.svelte';
 	import { conversationsStore, isConversationsInitialized } from '$lib/stores/conversations.svelte';
 	import { modelsStore, modelOptions } from '$lib/stores/models.svelte';
+	import { isRouterMode } from '$lib/stores/server.svelte';
 	import { onMount } from 'svelte';
 	import { page } from '$app/state';
 	import { replaceState } from '$app/navigation';
@@ -71,6 +72,14 @@
 		conversationsStore.clearActiveConversation();
 		chatStore.clearUIState();
 
+		if (
+			isRouterMode() &&
+			modelsStore.selectedModelName &&
+			!modelsStore.isModelLoaded(modelsStore.selectedModelName)
+		) {
+			modelsStore.clearSelection();
+		}
+
 		// Handle URL params only if we have ?q= or ?model= or ?new_chat=true
 		if (qParam !== null || modelParam !== null || newChatParam === 'true') {
 			await handleUrlParams();
diff --git a/tools/server/webui/src/routes/chat/[id]/+page.svelte b/tools/server/webui/src/routes/chat/[id]/+page.svelte
index 260097728..5299b02fa 100644
--- a/tools/server/webui/src/routes/chat/[id]/+page.svelte
+++ b/tools/server/webui/src/routes/chat/[id]/+page.svelte
@@ -99,10 +99,12 @@
 			(option) => option.model === lastMessageWithModel.model
 		);
 
-		if (matchingModel) {
+		if (matchingModel && modelsStore.isModelLoaded(matchingModel.model)) {
 			try {
 				await modelsStore.selectModelById(matchingModel.id);
-				console.log(`Automatically loaded model: ${lastMessageWithModel.model} from last message`);
+				console.log(
+					`Automatically selected model: ${lastMessageWithModel.model} from last message`
+				);
 			} catch (error) {
 				console.warn('Failed to automatically select model from last message:', error);
 			}