From bb4f7a9e4eec171fecf0f640b1337a1c24485560 Mon Sep 17 00:00:00 2001
From: compilade <git@compilade.net>
Date: Tue, 8 Jul 2025 11:37:47 -0400
Subject: [PATCH] memory : fix broken batch splits for recurrent cache (#14575)

Splits producing more than one ubatch per batch for recurrent models
were broken with #14512.

This fixes it by moving the completeness check after the ubatch split loop.
---
 src/llama-memory-recurrent.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/llama-memory-recurrent.cpp b/src/llama-memory-recurrent.cpp
index 4b90dac7a..a1b5b1a27 100644
--- a/src/llama-memory-recurrent.cpp
+++ b/src/llama-memory-recurrent.cpp
@@ -377,14 +377,18 @@ llama_memory_context_ptr llama_memory_recurrent::init_batch(llama_batch_allocr &
                 ubatch = balloc.split_equal(n_ubatch, false);
             }
 
-            if (balloc.get_n_used() < balloc.get_n_tokens()) {
-                // failed to find a suitable split
+            if (ubatch.n_tokens == 0) {
                 break;
             }
 
             ubatches.push_back(std::move(ubatch)); // NOLINT
         }
 
+        if (balloc.get_n_used() < balloc.get_n_tokens()) {
+            // failed to find a suitable split
+            break;
+        }
+
         if (!prepare(ubatches)) {
             break;
         }