mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-16 19:59:16 +00:00
split utils.cpp into 2 files to support sd.cpp
This commit is contained in:
parent
276c651a12
commit
950676fdb7
9 changed files with 231 additions and 225 deletions
|
|
@ -463,6 +463,8 @@ add_library(common2
|
|||
src/unicode-data.cpp
|
||||
otherarch/utils.cpp
|
||||
otherarch/utils.h
|
||||
otherarch/llmutils.cpp
|
||||
otherarch/llmutils.h
|
||||
common/reasoning-budget.cpp
|
||||
common/reasoning-budget.h
|
||||
tools/mtmd/mtmd-audio.cpp
|
||||
|
|
|
|||
10
Makefile
10
Makefile
|
|
@ -110,10 +110,10 @@ endif
|
|||
CUBLASLD_FLAGS =
|
||||
CUBLAS_OBJS =
|
||||
|
||||
OBJS_FULL += ggml-alloc.o ggml-cpu-traits.o ggml-quants.o ggml-cpu-quants.o kcpp-quantmapper.o kcpp-repackmapper.o unicode.o unicode-common.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm.o common.o llama-impl.o sampling.o budget.o kcpputils.o mtmdaudio.o
|
||||
OBJS_SIMPLE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx2.o ggml-cpu-quants.o kcpp-quantmapper_noavx2.o kcpp-repackmapper_noavx2.o unicode.o unicode-common.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_noavx2.o common.o llama-impl.o sampling.o budget.o kcpputils.o mtmdaudio.o
|
||||
OBJS_SIMPLER += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx1.o ggml-cpu-quants.o kcpp-quantmapper_noavx1.o kcpp-repackmapper_noavx1.o unicode.o unicode-common.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_noavx1.o common.o llama-impl.o sampling.o budget.o kcpputils.o mtmdaudio.o
|
||||
OBJS_FAILSAFE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_failsafe.o ggml-cpu-quants.o kcpp-quantmapper_failsafe.o kcpp-repackmapper_failsafe.o unicode.o unicode-common.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_failsafe.o common.o llama-impl.o sampling.o budget.o kcpputils.o mtmdaudio.o
|
||||
OBJS_FULL += ggml-alloc.o ggml-cpu-traits.o ggml-quants.o ggml-cpu-quants.o kcpp-quantmapper.o kcpp-repackmapper.o unicode.o unicode-common.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm.o common.o llama-impl.o sampling.o budget.o kcpputils.o kcppllmutils.o mtmdaudio.o
|
||||
OBJS_SIMPLE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx2.o ggml-cpu-quants.o kcpp-quantmapper_noavx2.o kcpp-repackmapper_noavx2.o unicode.o unicode-common.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_noavx2.o common.o llama-impl.o sampling.o budget.o kcpputils.o kcppllmutils.o mtmdaudio.o
|
||||
OBJS_SIMPLER += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx1.o ggml-cpu-quants.o kcpp-quantmapper_noavx1.o kcpp-repackmapper_noavx1.o unicode.o unicode-common.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_noavx1.o common.o llama-impl.o sampling.o budget.o kcpputils.o kcppllmutils.o mtmdaudio.o
|
||||
OBJS_FAILSAFE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_failsafe.o ggml-cpu-quants.o kcpp-quantmapper_failsafe.o kcpp-repackmapper_failsafe.o unicode.o unicode-common.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_failsafe.o common.o llama-impl.o sampling.o budget.o kcpputils.o kcppllmutils.o mtmdaudio.o
|
||||
|
||||
# OS specific
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
|
|
@ -602,6 +602,8 @@ gguf.o: ggml/src/gguf.cpp ggml/include/gguf.h
|
|||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
kcpputils.o: otherarch/utils.cpp otherarch/utils.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
kcppllmutils.o: otherarch/llmutils.cpp otherarch/llmutils.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
mtmdaudio.o: tools/mtmd/mtmd-audio.cpp tools/mtmd/mtmd-audio.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
ggml-backend.o: ggml/src/ggml-backend.cpp ggml/src/ggml-backend-impl.h ggml/include/ggml.h ggml/include/ggml-backend.h
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
#include <chrono>
|
||||
|
||||
#include "utils.h"
|
||||
#include "llmutils.h"
|
||||
|
||||
//for easier compilation
|
||||
//concat source files into one file for compilation purposes
|
||||
|
|
|
|||
164
otherarch/llmutils.cpp
Normal file
164
otherarch/llmutils.cpp
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
|
||||
#include "llmutils.h"
|
||||
|
||||
void kcpp_embd_batch::init_kcpp_batch(int32_t n_tokens,
|
||||
int32_t npast,
|
||||
bool use_mrope,
|
||||
bool return_all_logits,
|
||||
bool mrope_is_image,
|
||||
int img_nx,
|
||||
int img_ny) {
|
||||
const int n_pos_per_embd = use_mrope ? 4 : 1;
|
||||
const llama_seq_id seq_id = 0;
|
||||
|
||||
if (use_mrope && mrope_is_image) {
|
||||
GGML_ASSERT(img_nx > 0 && img_ny > 0);
|
||||
GGML_ASSERT(img_nx * img_ny == n_tokens);
|
||||
}
|
||||
|
||||
pos.resize(n_tokens * n_pos_per_embd);
|
||||
std::fill(pos.begin(), pos.end(), 0);
|
||||
|
||||
n_seq_id.resize(n_tokens);
|
||||
seq_ids.resize(n_tokens + 1);
|
||||
logits.resize(n_tokens);
|
||||
seq_id_0.resize(1);
|
||||
|
||||
seq_id_0[0] = seq_id;
|
||||
seq_ids[n_tokens] = nullptr;
|
||||
|
||||
batch.pos = pos.data();
|
||||
batch.n_seq_id = n_seq_id.data();
|
||||
batch.seq_id = seq_ids.data();
|
||||
batch.logits = logits.data();
|
||||
|
||||
for (int i = 0; i < n_tokens; ++i) {
|
||||
n_seq_id[i] = 1;
|
||||
seq_ids[i] = seq_id_0.data();
|
||||
logits[i] = return_all_logits;
|
||||
}
|
||||
|
||||
// ---- position encoding ----
|
||||
if (!use_mrope) {
|
||||
for (int i = 0; i < n_tokens; ++i) {
|
||||
pos[i] = npast + i;
|
||||
}
|
||||
} else if (!mrope_is_image) {
|
||||
// 1D M-RoPE (audio / embedding stream)
|
||||
for (int i = 0; i < n_tokens; ++i) {
|
||||
pos[i + 0 * n_tokens] = npast + i;
|
||||
pos[i + 1 * n_tokens] = npast + i;
|
||||
pos[i + 2 * n_tokens] = npast + i;
|
||||
pos[i + 3 * n_tokens] = 0;
|
||||
}
|
||||
} else {
|
||||
// 2D image M-RoPE
|
||||
int idx = 0;
|
||||
for (int y = 0; y < img_ny; ++y) {
|
||||
for (int x = 0; x < img_nx; ++x) {
|
||||
pos[idx + 0 * n_tokens] = npast;
|
||||
pos[idx + 1 * n_tokens] = npast + y;
|
||||
pos[idx + 2 * n_tokens] = npast + x;
|
||||
pos[idx + 3 * n_tokens] = 0;
|
||||
++idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Always request logits for last token
|
||||
logits[n_tokens - 1] = true;
|
||||
}
|
||||
|
||||
//for embeddings
|
||||
kcpp_embd_batch::kcpp_embd_batch(float * embd,
|
||||
int32_t n_tokens,
|
||||
int32_t npast,
|
||||
bool use_mrope,
|
||||
bool mrope_is_image,
|
||||
int img_nx,
|
||||
int img_ny) {
|
||||
batch = {
|
||||
/* n_tokens = */ n_tokens,
|
||||
/* tokens = */ nullptr,
|
||||
/* embd = */ embd,
|
||||
/* pos = */ nullptr,
|
||||
/* n_seq_id = */ nullptr,
|
||||
/* seq_id = */ nullptr,
|
||||
/* logits = */ nullptr,
|
||||
};
|
||||
|
||||
init_kcpp_batch(n_tokens, npast, use_mrope,
|
||||
/*return_all_logits=*/false, mrope_is_image, img_nx, img_ny);
|
||||
}
|
||||
|
||||
// for tokens
|
||||
kcpp_embd_batch::kcpp_embd_batch(std::vector<llama_token> & tokens,
|
||||
int32_t npast,
|
||||
bool use_mrope,
|
||||
bool return_all_logits,
|
||||
bool mrope_is_image,
|
||||
int img_nx,
|
||||
int img_ny) {
|
||||
batch = {
|
||||
/* n_tokens = */ (int32_t) tokens.size(),
|
||||
/* tokens = */ tokens.data(),
|
||||
/* embd = */ nullptr,
|
||||
/* pos = */ nullptr,
|
||||
/* n_seq_id = */ nullptr,
|
||||
/* seq_id = */ nullptr,
|
||||
/* logits = */ nullptr,
|
||||
};
|
||||
|
||||
init_kcpp_batch(batch.n_tokens, npast, use_mrope, return_all_logits, mrope_is_image, img_nx, img_ny);
|
||||
}
|
||||
|
||||
llama_batch kcpp_embd_batch::get_view(int offset, int n_tokens, int n_embd_mmproj) {
|
||||
GGML_ASSERT(offset >= 0);
|
||||
GGML_ASSERT(n_tokens > 0);
|
||||
GGML_ASSERT(offset + n_tokens <= batch.n_tokens);
|
||||
|
||||
const int total_tokens = batch.n_tokens;
|
||||
llama_pos * pos_ptr = nullptr;
|
||||
|
||||
// Detect M-RoPE vs normal RoPE
|
||||
const bool is_mrope = (pos.size() > (size_t)total_tokens);
|
||||
|
||||
pos_view.clear();
|
||||
|
||||
if (is_mrope) {
|
||||
const int n_pos_per_embd = pos.size() / total_tokens;
|
||||
GGML_ASSERT(n_pos_per_embd == 4);
|
||||
|
||||
// Layout:
|
||||
// src: [dim0_all_tokens][dim1_all_tokens][dim2_all_tokens][dim3_all_tokens]
|
||||
// dst: same layout, but only [offset : offset + n_tokens]
|
||||
pos_view.reserve(n_tokens * n_pos_per_embd);
|
||||
|
||||
for (int dim = 0; dim < n_pos_per_embd; ++dim) {
|
||||
const llama_pos * src =
|
||||
pos.data() + dim * total_tokens + offset;
|
||||
|
||||
pos_view.insert(
|
||||
pos_view.end(),
|
||||
src,
|
||||
src + n_tokens
|
||||
);
|
||||
}
|
||||
|
||||
pos_ptr = pos_view.data();
|
||||
}
|
||||
else {
|
||||
// Normal RoPE: contiguous slice
|
||||
pos_ptr = pos.data() + offset;
|
||||
}
|
||||
|
||||
return {
|
||||
/* n_tokens = */ n_tokens,
|
||||
/* tokens = */ nullptr,
|
||||
/* embd = */ batch.embd ? batch.embd + offset*n_embd_mmproj : nullptr,
|
||||
/* pos = */ pos_ptr,
|
||||
/* n_seq_id = */ batch.n_seq_id + offset,
|
||||
/* seq_id = */ batch.seq_id + offset,
|
||||
/* logits = */ batch.logits + offset,
|
||||
};
|
||||
}
|
||||
54
otherarch/llmutils.h
Normal file
54
otherarch/llmutils.h
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <thread>
|
||||
#include "llama.h"
|
||||
|
||||
//duplcated and modified from llava_embd_batch
|
||||
struct kcpp_embd_batch {
|
||||
std::vector<llama_pos> pos;
|
||||
std::vector<llama_pos> pos_view;
|
||||
std::vector<int32_t> n_seq_id;
|
||||
std::vector<llama_seq_id> seq_id_0;
|
||||
std::vector<llama_seq_id*> seq_ids;
|
||||
std::vector<int8_t> logits;
|
||||
llama_batch batch;
|
||||
|
||||
llama_batch get_view(int offset, int n_tokens, int n_embd_mmproj);
|
||||
|
||||
// Embedding constructor
|
||||
kcpp_embd_batch(
|
||||
float * embd,
|
||||
int32_t n_tokens,
|
||||
int32_t npast,
|
||||
bool use_mrope,
|
||||
bool mrope_is_image = false,
|
||||
int img_nx = 0,
|
||||
int img_ny = 0
|
||||
);
|
||||
|
||||
// Token constructor
|
||||
kcpp_embd_batch(
|
||||
std::vector<llama_token> & tokens,
|
||||
int32_t npast,
|
||||
bool use_mrope,
|
||||
bool return_all_logits,
|
||||
bool mrope_is_image = false,
|
||||
int img_nx = 0,
|
||||
int img_ny = 0
|
||||
);
|
||||
|
||||
private:
|
||||
void init_kcpp_batch(
|
||||
int32_t n_tokens,
|
||||
int32_t npast,
|
||||
bool use_mrope,
|
||||
bool return_all_logits,
|
||||
bool mrope_is_image,
|
||||
int img_nx,
|
||||
int img_ny
|
||||
);
|
||||
};
|
||||
|
|
@ -14,6 +14,8 @@
|
|||
#include <algorithm>
|
||||
#include <filesystem>
|
||||
|
||||
#include "otherarch/utils.h"
|
||||
|
||||
#include "model_adapter.h"
|
||||
#include "tokenizers/vocab/vocab.h"
|
||||
#include "flux.hpp"
|
||||
|
|
@ -54,10 +56,6 @@ using namespace torch_zip;
|
|||
#include "tokenizers/tokenizer.cpp"
|
||||
#include "tokenizers/tokenize_util.cpp"
|
||||
|
||||
// FIXME: llama.h errors out if included (through utils.h)
|
||||
std::vector<uint8_t> kcpp_base64_decode(const std::string & encoded_string);
|
||||
std::string kcpp_base64_encode(const unsigned char* data, unsigned int data_length);
|
||||
std::string get_timestamp_str();
|
||||
|
||||
// #include "preprocessing.hpp"
|
||||
#include "stable-diffusion.h"
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#include "model_adapter.h"
|
||||
#include "otherarch/utils.h"
|
||||
#include "otherarch/llmutils.h"
|
||||
|
||||
#include "common.h"
|
||||
#include "sampling.h"
|
||||
|
|
|
|||
|
|
@ -760,167 +760,6 @@ int32_t kcpp_quick_sample(float * logits, const int n_logits, const std::vector<
|
|||
return logits_id[idx].second;
|
||||
}
|
||||
|
||||
void kcpp_embd_batch::init_kcpp_batch(int32_t n_tokens,
|
||||
int32_t npast,
|
||||
bool use_mrope,
|
||||
bool return_all_logits,
|
||||
bool mrope_is_image,
|
||||
int img_nx,
|
||||
int img_ny) {
|
||||
const int n_pos_per_embd = use_mrope ? 4 : 1;
|
||||
const llama_seq_id seq_id = 0;
|
||||
|
||||
if (use_mrope && mrope_is_image) {
|
||||
GGML_ASSERT(img_nx > 0 && img_ny > 0);
|
||||
GGML_ASSERT(img_nx * img_ny == n_tokens);
|
||||
}
|
||||
|
||||
pos.resize(n_tokens * n_pos_per_embd);
|
||||
std::fill(pos.begin(), pos.end(), 0);
|
||||
|
||||
n_seq_id.resize(n_tokens);
|
||||
seq_ids.resize(n_tokens + 1);
|
||||
logits.resize(n_tokens);
|
||||
seq_id_0.resize(1);
|
||||
|
||||
seq_id_0[0] = seq_id;
|
||||
seq_ids[n_tokens] = nullptr;
|
||||
|
||||
batch.pos = pos.data();
|
||||
batch.n_seq_id = n_seq_id.data();
|
||||
batch.seq_id = seq_ids.data();
|
||||
batch.logits = logits.data();
|
||||
|
||||
for (int i = 0; i < n_tokens; ++i) {
|
||||
n_seq_id[i] = 1;
|
||||
seq_ids[i] = seq_id_0.data();
|
||||
logits[i] = return_all_logits;
|
||||
}
|
||||
|
||||
// ---- position encoding ----
|
||||
if (!use_mrope) {
|
||||
for (int i = 0; i < n_tokens; ++i) {
|
||||
pos[i] = npast + i;
|
||||
}
|
||||
} else if (!mrope_is_image) {
|
||||
// 1D M-RoPE (audio / embedding stream)
|
||||
for (int i = 0; i < n_tokens; ++i) {
|
||||
pos[i + 0 * n_tokens] = npast + i;
|
||||
pos[i + 1 * n_tokens] = npast + i;
|
||||
pos[i + 2 * n_tokens] = npast + i;
|
||||
pos[i + 3 * n_tokens] = 0;
|
||||
}
|
||||
} else {
|
||||
// 2D image M-RoPE
|
||||
int idx = 0;
|
||||
for (int y = 0; y < img_ny; ++y) {
|
||||
for (int x = 0; x < img_nx; ++x) {
|
||||
pos[idx + 0 * n_tokens] = npast;
|
||||
pos[idx + 1 * n_tokens] = npast + y;
|
||||
pos[idx + 2 * n_tokens] = npast + x;
|
||||
pos[idx + 3 * n_tokens] = 0;
|
||||
++idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Always request logits for last token
|
||||
logits[n_tokens - 1] = true;
|
||||
}
|
||||
|
||||
//for embeddings
|
||||
kcpp_embd_batch::kcpp_embd_batch(float * embd,
|
||||
int32_t n_tokens,
|
||||
int32_t npast,
|
||||
bool use_mrope,
|
||||
bool mrope_is_image,
|
||||
int img_nx,
|
||||
int img_ny) {
|
||||
batch = {
|
||||
/* n_tokens = */ n_tokens,
|
||||
/* tokens = */ nullptr,
|
||||
/* embd = */ embd,
|
||||
/* pos = */ nullptr,
|
||||
/* n_seq_id = */ nullptr,
|
||||
/* seq_id = */ nullptr,
|
||||
/* logits = */ nullptr,
|
||||
};
|
||||
|
||||
init_kcpp_batch(n_tokens, npast, use_mrope,
|
||||
/*return_all_logits=*/false, mrope_is_image, img_nx, img_ny);
|
||||
}
|
||||
|
||||
// for tokens
|
||||
kcpp_embd_batch::kcpp_embd_batch(std::vector<llama_token> & tokens,
|
||||
int32_t npast,
|
||||
bool use_mrope,
|
||||
bool return_all_logits,
|
||||
bool mrope_is_image,
|
||||
int img_nx,
|
||||
int img_ny) {
|
||||
batch = {
|
||||
/* n_tokens = */ (int32_t) tokens.size(),
|
||||
/* tokens = */ tokens.data(),
|
||||
/* embd = */ nullptr,
|
||||
/* pos = */ nullptr,
|
||||
/* n_seq_id = */ nullptr,
|
||||
/* seq_id = */ nullptr,
|
||||
/* logits = */ nullptr,
|
||||
};
|
||||
|
||||
init_kcpp_batch(batch.n_tokens, npast, use_mrope, return_all_logits, mrope_is_image, img_nx, img_ny);
|
||||
}
|
||||
|
||||
llama_batch kcpp_embd_batch::get_view(int offset, int n_tokens, int n_embd_mmproj) {
|
||||
GGML_ASSERT(offset >= 0);
|
||||
GGML_ASSERT(n_tokens > 0);
|
||||
GGML_ASSERT(offset + n_tokens <= batch.n_tokens);
|
||||
|
||||
const int total_tokens = batch.n_tokens;
|
||||
llama_pos * pos_ptr = nullptr;
|
||||
|
||||
// Detect M-RoPE vs normal RoPE
|
||||
const bool is_mrope = (pos.size() > (size_t)total_tokens);
|
||||
|
||||
pos_view.clear();
|
||||
|
||||
if (is_mrope) {
|
||||
const int n_pos_per_embd = pos.size() / total_tokens;
|
||||
GGML_ASSERT(n_pos_per_embd == 4);
|
||||
|
||||
// Layout:
|
||||
// src: [dim0_all_tokens][dim1_all_tokens][dim2_all_tokens][dim3_all_tokens]
|
||||
// dst: same layout, but only [offset : offset + n_tokens]
|
||||
pos_view.reserve(n_tokens * n_pos_per_embd);
|
||||
|
||||
for (int dim = 0; dim < n_pos_per_embd; ++dim) {
|
||||
const llama_pos * src =
|
||||
pos.data() + dim * total_tokens + offset;
|
||||
|
||||
pos_view.insert(
|
||||
pos_view.end(),
|
||||
src,
|
||||
src + n_tokens
|
||||
);
|
||||
}
|
||||
|
||||
pos_ptr = pos_view.data();
|
||||
}
|
||||
else {
|
||||
// Normal RoPE: contiguous slice
|
||||
pos_ptr = pos.data() + offset;
|
||||
}
|
||||
|
||||
return {
|
||||
/* n_tokens = */ n_tokens,
|
||||
/* tokens = */ nullptr,
|
||||
/* embd = */ batch.embd ? batch.embd + offset*n_embd_mmproj : nullptr,
|
||||
/* pos = */ pos_ptr,
|
||||
/* n_seq_id = */ batch.n_seq_id + offset,
|
||||
/* seq_id = */ batch.seq_id + offset,
|
||||
/* logits = */ batch.logits + offset,
|
||||
};
|
||||
}
|
||||
|
||||
std::vector<std::string> split_string(const std::string& input, const std::string& separator) {
|
||||
std::vector<std::string> result;
|
||||
|
|
|
|||
|
|
@ -8,16 +8,6 @@
|
|||
#include <random>
|
||||
#include <thread>
|
||||
#include "ggml_v3.h"
|
||||
#include "llama.h"
|
||||
|
||||
//
|
||||
// CLI argument parsing
|
||||
//
|
||||
|
||||
|
||||
//
|
||||
// Vocab utils
|
||||
//
|
||||
|
||||
struct gpt_vocab {
|
||||
using id = int32_t;
|
||||
|
|
@ -73,6 +63,7 @@ std::vector<std::string> split_string(const std::string& input, const std::strin
|
|||
bool kcpp_decode_audio_from_buf(const unsigned char * buf_in, size_t len, int target_sampler_rate, std::vector<float> & pcmf32_mono);
|
||||
bool kcpp_decode_audio_to_f32_stereo_48k(const uint8_t * data, size_t data_size, std::vector<float> & pcm, int & T_audio);
|
||||
|
||||
typedef struct ggml_backend_device * ggml_backend_dev_t;
|
||||
std::vector<ggml_backend_dev_t> kcpp_parse_device_list(const std::string & value);
|
||||
|
||||
bool kcpp_string_ends_with(const std::string& str, const std::string& suffix);
|
||||
|
|
@ -81,52 +72,6 @@ int ComputeSharedPrefixLength(const std::vector<int> &tokens_a,const std::vector
|
|||
float ComputePrefixMatchPercent(const std::vector<int> &tokens_a,const std::vector<int> &tokens_b);
|
||||
bool FullyContainedPrefix(std::vector<int> &sequence1, std::vector<int> &sequence2);
|
||||
|
||||
//duplcated and modified from llava_embd_batch
|
||||
struct kcpp_embd_batch {
|
||||
std::vector<llama_pos> pos;
|
||||
std::vector<llama_pos> pos_view;
|
||||
std::vector<int32_t> n_seq_id;
|
||||
std::vector<llama_seq_id> seq_id_0;
|
||||
std::vector<llama_seq_id*> seq_ids;
|
||||
std::vector<int8_t> logits;
|
||||
llama_batch batch;
|
||||
|
||||
llama_batch get_view(int offset, int n_tokens, int n_embd_mmproj);
|
||||
|
||||
// Embedding constructor
|
||||
kcpp_embd_batch(
|
||||
float * embd,
|
||||
int32_t n_tokens,
|
||||
int32_t npast,
|
||||
bool use_mrope,
|
||||
bool mrope_is_image = false,
|
||||
int img_nx = 0,
|
||||
int img_ny = 0
|
||||
);
|
||||
|
||||
// Token constructor
|
||||
kcpp_embd_batch(
|
||||
std::vector<llama_token> & tokens,
|
||||
int32_t npast,
|
||||
bool use_mrope,
|
||||
bool return_all_logits,
|
||||
bool mrope_is_image = false,
|
||||
int img_nx = 0,
|
||||
int img_ny = 0
|
||||
);
|
||||
|
||||
private:
|
||||
void init_kcpp_batch(
|
||||
int32_t n_tokens,
|
||||
int32_t npast,
|
||||
bool use_mrope,
|
||||
bool return_all_logits,
|
||||
bool mrope_is_image,
|
||||
int img_nx,
|
||||
int img_ny
|
||||
);
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
struct wav16_header {
|
||||
char riff[4] = {'R', 'I', 'F', 'F'};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue