mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
llama : rename missed batch params/vars to ubatch (#10059)
This commit renames the `batch` parameter to `ubatch` in the
`llama_kv_cache_find_slot`, `llm_build_inp_embd`, and
`llm_build_mamba` functions.
The motivation for this is that this should have been done as part of
Commit 19d900a756
("llama : rename batch
to ubatch (#9950)") but for some reason I missed these functions in
that commit and only noticed them now (sorry).
This commit is contained in:
parent
47182dd03f
commit
6369f867a4
2 changed files with 25 additions and 25 deletions
|
@ -2540,21 +2540,21 @@ static struct ggml_tensor * llm_build_inp_embd(
|
|||
struct ggml_context * ctx,
|
||||
struct llama_context & lctx,
|
||||
const llama_hparams & hparams,
|
||||
const llama_ubatch & batch,
|
||||
const llama_ubatch & ubatch,
|
||||
struct ggml_tensor * tok_embd,
|
||||
const llm_build_cb & cb) {
|
||||
const int64_t n_embd = hparams.n_embd;
|
||||
|
||||
struct ggml_tensor * inpL;
|
||||
|
||||
if (batch.token) {
|
||||
lctx.inp_tokens = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, batch.n_tokens);
|
||||
if (ubatch.token) {
|
||||
lctx.inp_tokens = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, ubatch.n_tokens);
|
||||
cb(lctx.inp_tokens, "inp_tokens", -1);
|
||||
ggml_set_input(lctx.inp_tokens);
|
||||
|
||||
inpL = ggml_get_rows(ctx, tok_embd, lctx.inp_tokens);
|
||||
} else {
|
||||
lctx.inp_embd = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, batch.n_tokens);
|
||||
lctx.inp_embd = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, ubatch.n_tokens);
|
||||
inpL = lctx.inp_embd;
|
||||
ggml_set_input(lctx.inp_embd);
|
||||
}
|
||||
|
@ -3149,7 +3149,7 @@ static struct ggml_tensor * llm_build_copy_mask_state(
|
|||
static struct ggml_tensor * llm_build_mamba(
|
||||
struct ggml_context * ctx,
|
||||
struct llama_context & lctx,
|
||||
const llama_ubatch & batch,
|
||||
const llama_ubatch & ubatch,
|
||||
struct ggml_cgraph * graph,
|
||||
struct ggml_tensor * cur,
|
||||
struct ggml_tensor * state_copy,
|
||||
|
@ -3165,17 +3165,17 @@ static struct ggml_tensor * llm_build_mamba(
|
|||
const int64_t d_inner = hparams.ssm_d_inner;
|
||||
const int64_t d_state = hparams.ssm_d_state;
|
||||
const int64_t dt_rank = hparams.ssm_dt_rank;
|
||||
const int64_t n_seqs = batch.n_seqs;
|
||||
const int64_t n_seqs = ubatch.n_seqs;
|
||||
// Some variants of Mamba arch (e.g. FalconMamba do apply layer norm on B and Dt layers)
|
||||
const bool ssm_dt_b_c_rms = hparams.ssm_dt_b_c_rms;
|
||||
// Use the same RMS norm as the final layer norm
|
||||
const float norm_rms_eps = hparams.f_norm_rms_eps;
|
||||
|
||||
const int64_t n_seq_tokens = batch.n_seq_tokens;
|
||||
const int64_t n_seq_tokens = ubatch.n_seq_tokens;
|
||||
|
||||
GGML_ASSERT(n_seqs != 0);
|
||||
GGML_ASSERT(batch.equal_seqs);
|
||||
GGML_ASSERT(batch.n_tokens == n_seq_tokens * n_seqs);
|
||||
GGML_ASSERT(ubatch.equal_seqs);
|
||||
GGML_ASSERT(ubatch.n_tokens == n_seq_tokens * n_seqs);
|
||||
|
||||
struct ggml_tensor * conv_states_all = kv.k_l[il];
|
||||
struct ggml_tensor * ssm_states_all = kv.v_l[il];
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue