llama : rename missed batch params/vars to ubatch (#10059)

This commit renames the `batch` parameter to `ubatch` in the
`llama_kv_cache_find_slot`, `llm_build_inp_embd`, and
`llm_build_mamba` functions.

The motivation for this is that this should have been done as part of
Commit 19d900a756 ("llama : rename batch
to ubatch (#9950)") but for some reason I missed these functions in
that commit and only noticed them now (sorry).
This commit is contained in:
Daniel Bevenius 2025-01-06 10:28:17 +01:00 committed by GitHub
parent 47182dd03f
commit 6369f867a4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 25 additions and 25 deletions

View file

@ -2540,21 +2540,21 @@ static struct ggml_tensor * llm_build_inp_embd(
struct ggml_context * ctx,
struct llama_context & lctx,
const llama_hparams & hparams,
const llama_ubatch & batch,
const llama_ubatch & ubatch,
struct ggml_tensor * tok_embd,
const llm_build_cb & cb) {
const int64_t n_embd = hparams.n_embd;
struct ggml_tensor * inpL;
if (batch.token) {
lctx.inp_tokens = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, batch.n_tokens);
if (ubatch.token) {
lctx.inp_tokens = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, ubatch.n_tokens);
cb(lctx.inp_tokens, "inp_tokens", -1);
ggml_set_input(lctx.inp_tokens);
inpL = ggml_get_rows(ctx, tok_embd, lctx.inp_tokens);
} else {
lctx.inp_embd = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, batch.n_tokens);
lctx.inp_embd = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, ubatch.n_tokens);
inpL = lctx.inp_embd;
ggml_set_input(lctx.inp_embd);
}
@ -3149,7 +3149,7 @@ static struct ggml_tensor * llm_build_copy_mask_state(
static struct ggml_tensor * llm_build_mamba(
struct ggml_context * ctx,
struct llama_context & lctx,
const llama_ubatch & batch,
const llama_ubatch & ubatch,
struct ggml_cgraph * graph,
struct ggml_tensor * cur,
struct ggml_tensor * state_copy,
@ -3165,17 +3165,17 @@ static struct ggml_tensor * llm_build_mamba(
const int64_t d_inner = hparams.ssm_d_inner;
const int64_t d_state = hparams.ssm_d_state;
const int64_t dt_rank = hparams.ssm_dt_rank;
const int64_t n_seqs = batch.n_seqs;
const int64_t n_seqs = ubatch.n_seqs;
// Some variants of Mamba arch (e.g. FalconMamba do apply layer norm on B and Dt layers)
const bool ssm_dt_b_c_rms = hparams.ssm_dt_b_c_rms;
// Use the same RMS norm as the final layer norm
const float norm_rms_eps = hparams.f_norm_rms_eps;
const int64_t n_seq_tokens = batch.n_seq_tokens;
const int64_t n_seq_tokens = ubatch.n_seq_tokens;
GGML_ASSERT(n_seqs != 0);
GGML_ASSERT(batch.equal_seqs);
GGML_ASSERT(batch.n_tokens == n_seq_tokens * n_seqs);
GGML_ASSERT(ubatch.equal_seqs);
GGML_ASSERT(ubatch.n_tokens == n_seq_tokens * n_seqs);
struct ggml_tensor * conv_states_all = kv.k_l[il];
struct ggml_tensor * ssm_states_all = kv.v_l[il];