mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
wip integration of llava
This commit is contained in:
parent
ca19199bc8
commit
c08d7e5042
9 changed files with 137 additions and 8003 deletions
|
@ -423,7 +423,11 @@ add_library(common2
|
||||||
common/common.cpp
|
common/common.cpp
|
||||||
common/common.h
|
common/common.h
|
||||||
common/grammar-parser.h
|
common/grammar-parser.h
|
||||||
common/grammar-parser.cpp)
|
common/grammar-parser.cpp
|
||||||
|
examples/llava/llava.cpp
|
||||||
|
examples/llava/llava.h
|
||||||
|
examples/llava/clip.cpp
|
||||||
|
examples/llava/clip.h)
|
||||||
target_include_directories(common2 PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
target_include_directories(common2 PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
||||||
target_compile_features(common2 PUBLIC cxx_std_11) # don't bump
|
target_compile_features(common2 PUBLIC cxx_std_11) # don't bump
|
||||||
target_link_libraries(common2 PRIVATE ggml ${LLAMA_EXTRA_LIBS})
|
target_link_libraries(common2 PRIVATE ggml ${LLAMA_EXTRA_LIBS})
|
||||||
|
|
24
Makefile
24
Makefile
|
@ -414,6 +414,10 @@ ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
|
||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
|
ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
|
||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
llava.o: examples/llava/llava.cpp examples/llava/llava.h
|
||||||
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
llavaclip.o: examples/llava/clip.cpp examples/llava/clip.h
|
||||||
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
#version 3 libs
|
#version 3 libs
|
||||||
ggml_v3.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
|
ggml_v3.o: otherarch/ggml_v3.c otherarch/ggml_v3.h
|
||||||
|
@ -518,11 +522,11 @@ gguf: examples/gguf/gguf.cpp build-info.h ggml.o llama.o $(OBJS)
|
||||||
|
|
||||||
|
|
||||||
#generated libraries
|
#generated libraries
|
||||||
koboldcpp_default: ggml.o ggml_v3.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o ggml-quants.o ggml-alloc.o ggml-backend.o grammar-parser.o sdcpp_default.o $(OBJS)
|
koboldcpp_default: ggml.o ggml_v3.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o ggml-quants.o ggml-alloc.o ggml-backend.o llava.o llavaclip.o grammar-parser.o sdcpp_default.o $(OBJS)
|
||||||
$(DEFAULT_BUILD)
|
$(DEFAULT_BUILD)
|
||||||
|
|
||||||
ifdef OPENBLAS_BUILD
|
ifdef OPENBLAS_BUILD
|
||||||
koboldcpp_openblas: ggml_v4_openblas.o ggml_v3_openblas.o ggml_v2_openblas.o ggml_v1.o expose.o common.o gpttype_adapter.o ggml-quants.o ggml-alloc.o ggml-backend.o grammar-parser.o sdcpp_default.o $(OBJS)
|
koboldcpp_openblas: ggml_v4_openblas.o ggml_v3_openblas.o ggml_v2_openblas.o ggml_v1.o expose.o common.o gpttype_adapter.o ggml-quants.o ggml-alloc.o ggml-backend.o llava.o llavaclip.o grammar-parser.o sdcpp_default.o $(OBJS)
|
||||||
$(OPENBLAS_BUILD)
|
$(OPENBLAS_BUILD)
|
||||||
else
|
else
|
||||||
koboldcpp_openblas:
|
koboldcpp_openblas:
|
||||||
|
@ -530,7 +534,7 @@ koboldcpp_openblas:
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef FAILSAFE_BUILD
|
ifdef FAILSAFE_BUILD
|
||||||
koboldcpp_failsafe: ggml_v4_failsafe.o ggml_v3_failsafe.o ggml_v2_failsafe.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o ggml-quants_failsafe.o ggml-alloc.o ggml-backend.o grammar-parser.o sdcpp_default.o $(OBJS)
|
koboldcpp_failsafe: ggml_v4_failsafe.o ggml_v3_failsafe.o ggml_v2_failsafe.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o ggml-quants_failsafe.o ggml-alloc.o ggml-backend.o llava.o llavaclip.o grammar-parser.o sdcpp_default.o $(OBJS)
|
||||||
$(FAILSAFE_BUILD)
|
$(FAILSAFE_BUILD)
|
||||||
else
|
else
|
||||||
koboldcpp_failsafe:
|
koboldcpp_failsafe:
|
||||||
|
@ -538,7 +542,7 @@ koboldcpp_failsafe:
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef NOAVX2_BUILD
|
ifdef NOAVX2_BUILD
|
||||||
koboldcpp_noavx2: ggml_v4_noavx2.o ggml_v3_noavx2.o ggml_v2_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o ggml-quants_noavx2.o ggml-alloc.o ggml-backend.o grammar-parser.o sdcpp_default.o $(OBJS)
|
koboldcpp_noavx2: ggml_v4_noavx2.o ggml_v3_noavx2.o ggml_v2_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o ggml-quants_noavx2.o ggml-alloc.o ggml-backend.o llava.o llavaclip.o grammar-parser.o sdcpp_default.o $(OBJS)
|
||||||
$(NOAVX2_BUILD)
|
$(NOAVX2_BUILD)
|
||||||
else
|
else
|
||||||
koboldcpp_noavx2:
|
koboldcpp_noavx2:
|
||||||
|
@ -546,10 +550,10 @@ koboldcpp_noavx2:
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef CLBLAST_BUILD
|
ifdef CLBLAST_BUILD
|
||||||
koboldcpp_clblast: ggml_v4_clblast.o ggml_v3_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o ggml-quants.o ggml-alloc.o ggml-backend.o grammar-parser.o sdcpp_default.o $(OBJS)
|
koboldcpp_clblast: ggml_v4_clblast.o ggml_v3_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o ggml-quants.o ggml-alloc.o ggml-backend.o llava.o llavaclip.o grammar-parser.o sdcpp_default.o $(OBJS)
|
||||||
$(CLBLAST_BUILD)
|
$(CLBLAST_BUILD)
|
||||||
ifdef NOAVX2_BUILD
|
ifdef NOAVX2_BUILD
|
||||||
koboldcpp_clblast_noavx2: ggml_v4_clblast_noavx2.o ggml_v3_clblast_noavx2.o ggml_v2_clblast_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_clblast_noavx2.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o ggml-quants_noavx2.o ggml-alloc.o ggml-backend.o grammar-parser.o sdcpp_default.o $(OBJS)
|
koboldcpp_clblast_noavx2: ggml_v4_clblast_noavx2.o ggml_v3_clblast_noavx2.o ggml_v2_clblast_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_clblast_noavx2.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o ggml-quants_noavx2.o ggml-alloc.o ggml-backend.o llava.o llavaclip.o grammar-parser.o sdcpp_default.o $(OBJS)
|
||||||
$(CLBLAST_BUILD)
|
$(CLBLAST_BUILD)
|
||||||
else
|
else
|
||||||
koboldcpp_clblast_noavx2:
|
koboldcpp_clblast_noavx2:
|
||||||
|
@ -563,7 +567,7 @@ koboldcpp_clblast_noavx2:
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef CUBLAS_BUILD
|
ifdef CUBLAS_BUILD
|
||||||
koboldcpp_cublas: ggml_v4_cublas.o ggml_v3_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o ggml-quants.o ggml-alloc.o ggml-backend.o grammar-parser.o sdcpp_cublas.o $(CUBLAS_OBJS) $(OBJS)
|
koboldcpp_cublas: ggml_v4_cublas.o ggml_v3_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o ggml-quants.o ggml-alloc.o ggml-backend.o llava.o llavaclip.o grammar-parser.o sdcpp_cublas.o $(CUBLAS_OBJS) $(OBJS)
|
||||||
$(CUBLAS_BUILD)
|
$(CUBLAS_BUILD)
|
||||||
else
|
else
|
||||||
koboldcpp_cublas:
|
koboldcpp_cublas:
|
||||||
|
@ -571,7 +575,7 @@ koboldcpp_cublas:
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef HIPBLAS_BUILD
|
ifdef HIPBLAS_BUILD
|
||||||
koboldcpp_hipblas: ggml_v4_cublas.o ggml_v3_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o ggml-quants.o ggml-alloc.o ggml-backend.o grammar-parser.o sdcpp_cublas.o $(HIP_OBJS) $(OBJS)
|
koboldcpp_hipblas: ggml_v4_cublas.o ggml_v3_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o ggml-quants.o ggml-alloc.o ggml-backend.o llava.o llavaclip.o grammar-parser.o sdcpp_cublas.o $(HIP_OBJS) $(OBJS)
|
||||||
$(HIPBLAS_BUILD)
|
$(HIPBLAS_BUILD)
|
||||||
else
|
else
|
||||||
koboldcpp_hipblas:
|
koboldcpp_hipblas:
|
||||||
|
@ -579,10 +583,10 @@ koboldcpp_hipblas:
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef VULKAN_BUILD
|
ifdef VULKAN_BUILD
|
||||||
koboldcpp_vulkan: ggml_v4_vulkan.o ggml_v3.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter_vulkan.o ggml-vulkan.o ggml-quants.o ggml-alloc.o ggml-backend.o grammar-parser.o sdcpp_default.o $(OBJS)
|
koboldcpp_vulkan: ggml_v4_vulkan.o ggml_v3.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter_vulkan.o ggml-vulkan.o ggml-quants.o ggml-alloc.o ggml-backend.o llava.o llavaclip.o grammar-parser.o sdcpp_default.o $(OBJS)
|
||||||
$(VULKAN_BUILD)
|
$(VULKAN_BUILD)
|
||||||
ifdef NOAVX2_BUILD
|
ifdef NOAVX2_BUILD
|
||||||
koboldcpp_vulkan_noavx2: ggml_v4_vulkan_noavx2.o ggml_v3_noavx2.o ggml_v2_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_vulkan_noavx2.o ggml-vulkan.o ggml-quants_noavx2.o ggml-alloc.o ggml-backend.o grammar-parser.o sdcpp_default.o $(OBJS)
|
koboldcpp_vulkan_noavx2: ggml_v4_vulkan_noavx2.o ggml_v3_noavx2.o ggml_v2_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_vulkan_noavx2.o ggml-vulkan.o ggml-quants_noavx2.o ggml-alloc.o ggml-backend.o llava.o llavaclip.o grammar-parser.o sdcpp_default.o $(OBJS)
|
||||||
$(VULKAN_BUILD)
|
$(VULKAN_BUILD)
|
||||||
else
|
else
|
||||||
koboldcpp_vulkan_noavx2:
|
koboldcpp_vulkan_noavx2:
|
||||||
|
|
3
expose.h
3
expose.h
|
@ -4,6 +4,8 @@ const int stop_token_max = 16;
|
||||||
const int ban_token_max = 16;
|
const int ban_token_max = 16;
|
||||||
const int tensor_split_max = 16;
|
const int tensor_split_max = 16;
|
||||||
const int logit_bias_max = 16;
|
const int logit_bias_max = 16;
|
||||||
|
const int images_max = 4;
|
||||||
|
|
||||||
// match kobold's sampler list and order
|
// match kobold's sampler list and order
|
||||||
enum samplers
|
enum samplers
|
||||||
{
|
{
|
||||||
|
@ -60,6 +62,7 @@ struct generation_inputs
|
||||||
const int seed;
|
const int seed;
|
||||||
const char * prompt;
|
const char * prompt;
|
||||||
const char * memory;
|
const char * memory;
|
||||||
|
const char * images[images_max];
|
||||||
const int max_context_length;
|
const int max_context_length;
|
||||||
const int max_length;
|
const int max_length;
|
||||||
const float temperature;
|
const float temperature;
|
||||||
|
|
|
@ -30,11 +30,14 @@
|
||||||
#include "neox_v2.cpp"
|
#include "neox_v2.cpp"
|
||||||
#include "neox_v3.cpp"
|
#include "neox_v3.cpp"
|
||||||
#include "mpt_v3.cpp"
|
#include "mpt_v3.cpp"
|
||||||
|
#include "examples/llava/clip.h"
|
||||||
|
#include "examples/llava/llava.h"
|
||||||
|
|
||||||
//shared
|
//shared
|
||||||
std::string executable_path = "";
|
std::string executable_path = "";
|
||||||
std::string lora_filename = "";
|
std::string lora_filename = "";
|
||||||
std::string lora_base = "";
|
std::string lora_base = "";
|
||||||
|
std::string mmproj_filename = "";
|
||||||
bool generation_finished;
|
bool generation_finished;
|
||||||
float last_process_time = 0;
|
float last_process_time = 0;
|
||||||
float last_eval_time = 0;
|
float last_eval_time = 0;
|
||||||
|
@ -74,6 +77,10 @@ static llama_v2_context * llama_ctx_v2;
|
||||||
static llama_v3_context * llama_ctx_v3;
|
static llama_v3_context * llama_ctx_v3;
|
||||||
static llama_context * llama_ctx_v4;
|
static llama_context * llama_ctx_v4;
|
||||||
|
|
||||||
|
static clip_ctx * clp_ctx = nullptr; //for llava
|
||||||
|
static clip_image_u8 * clp_img_data = nullptr; //most recent image
|
||||||
|
static std::vector<llava_image> llava_images;
|
||||||
|
|
||||||
static gpt_params * kcpp_params = nullptr;
|
static gpt_params * kcpp_params = nullptr;
|
||||||
static int max_context_limit_at_load = 0;
|
static int max_context_limit_at_load = 0;
|
||||||
static int n_past = 0;
|
static int n_past = 0;
|
||||||
|
@ -1055,6 +1062,22 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(mmproj_filename != "")
|
||||||
|
{
|
||||||
|
clp_ctx = clip_model_load(mmproj_filename.c_str(), /*verbosity=*/ 1);
|
||||||
|
if(clp_ctx == nullptr) {
|
||||||
|
fprintf(stderr, "%s: error: failed to load mmproj model!\n", __func__);
|
||||||
|
return ModelLoadResult::FAIL;
|
||||||
|
}
|
||||||
|
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
|
||||||
|
const int n_embd_llm = llama_n_embd(llamamodel);
|
||||||
|
if (n_embd_clip != n_embd_llm) {
|
||||||
|
fprintf(stderr, "%s: mmproj embedding mismatch (%d and %d)! Make sure you use the correct mmproj file!\n", __func__,n_embd_clip, n_embd_llm);
|
||||||
|
return ModelLoadResult::FAIL;
|
||||||
|
}
|
||||||
|
clp_img_data = clip_image_u8_init();
|
||||||
|
}
|
||||||
|
|
||||||
n_vocab = llama_n_vocab(llamamodel);
|
n_vocab = llama_n_vocab(llamamodel);
|
||||||
|
|
||||||
//determine mem per token
|
//determine mem per token
|
||||||
|
@ -1541,6 +1564,27 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
||||||
|
|
||||||
std::string addedmemory = inputs.memory;
|
std::string addedmemory = inputs.memory;
|
||||||
|
|
||||||
|
//clear previous run llava embd memory, just-in-time free
|
||||||
|
for(int i=0;i<llava_images.size();++i)
|
||||||
|
{
|
||||||
|
if(llava_images[i].b64data!="" && llava_images[i].clp_img_embd!=nullptr)
|
||||||
|
{
|
||||||
|
free(llava_images[i].clp_img_embd);
|
||||||
|
llava_images[i].clp_img_embd = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
llava_images.clear();
|
||||||
|
for(int x=0;x<images_max;++x)
|
||||||
|
{
|
||||||
|
std::string item = inputs.images[x];
|
||||||
|
if(item!="")
|
||||||
|
{
|
||||||
|
llava_image lv;
|
||||||
|
lv.b64data = item;
|
||||||
|
llava_images.push_back(lv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
kcpp_params->prompt = inputs.prompt;
|
kcpp_params->prompt = inputs.prompt;
|
||||||
kcpp_params->seed = inputs.seed;
|
kcpp_params->seed = inputs.seed;
|
||||||
kcpp_params->n_predict = inputs.max_length;
|
kcpp_params->n_predict = inputs.max_length;
|
||||||
|
@ -1605,6 +1649,57 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
||||||
std::vector<int> embd_inp;
|
std::vector<int> embd_inp;
|
||||||
std::vector<int> embd_inp_mem; //for storing added memory
|
std::vector<int> embd_inp_mem; //for storing added memory
|
||||||
TokenizeString(kcpp_params->prompt, embd_inp, file_format);
|
TokenizeString(kcpp_params->prompt, embd_inp, file_format);
|
||||||
|
|
||||||
|
if(clp_ctx!=nullptr && clp_img_data!=nullptr)
|
||||||
|
{
|
||||||
|
for(int i=0;i<llava_images.size();++i)
|
||||||
|
{
|
||||||
|
std::string llava_image = llava_images[i].b64data;
|
||||||
|
const std::vector<uint8_t> image_buffer = kcpp_base64_decode(llava_image);
|
||||||
|
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), clp_img_data))
|
||||||
|
{
|
||||||
|
//failed to load image
|
||||||
|
printf("\nError: Clip image %d failed to load!",i);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
llava_images[i].clp_image_tokens = 0;
|
||||||
|
if (!llava_image_embed_make_with_clip_img(clp_ctx, kcpp_params->n_threads, clp_img_data, &llava_images[i].clp_img_embd, &llava_images[i].clp_image_tokens)) {
|
||||||
|
printf("\nError: Clip image %d failed to create embd!",i);
|
||||||
|
}
|
||||||
|
printf("\nLLAVA Clip Embed %i used Tokens: %d",i,llava_images[i].clp_image_tokens);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// for (int i = 0; i < img.image_tokens; i += n_batch)
|
||||||
|
// {
|
||||||
|
// int n_eval = img.image_tokens - i;
|
||||||
|
// if (n_eval > n_batch)
|
||||||
|
// {
|
||||||
|
// n_eval = n_batch;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// const int n_embd = llama_n_embd(model);
|
||||||
|
// llama_batch batch_img = {
|
||||||
|
// n_eval,
|
||||||
|
// nullptr,
|
||||||
|
// (img.image_embedding + i * n_embd),
|
||||||
|
// nullptr,
|
||||||
|
// nullptr,
|
||||||
|
// nullptr,
|
||||||
|
// nullptr,
|
||||||
|
// slot.n_past,
|
||||||
|
// 1, 0
|
||||||
|
// };
|
||||||
|
// if (llama_decode(ctx, batch_img))
|
||||||
|
// {
|
||||||
|
// LOG_TEE("%s : failed to eval image\n", __func__);
|
||||||
|
// return false;
|
||||||
|
// }
|
||||||
|
// slot.n_past += n_eval;
|
||||||
|
// }
|
||||||
|
|
||||||
if(addedmemory!="")
|
if(addedmemory!="")
|
||||||
{
|
{
|
||||||
TokenizeString(addedmemory, embd_inp_mem, file_format);
|
TokenizeString(addedmemory, embd_inp_mem, file_format);
|
||||||
|
|
14
koboldcpp.py
14
koboldcpp.py
|
@ -19,6 +19,7 @@ stop_token_max = 16
|
||||||
ban_token_max = 16
|
ban_token_max = 16
|
||||||
tensor_split_max = 16
|
tensor_split_max = 16
|
||||||
logit_bias_max = 16
|
logit_bias_max = 16
|
||||||
|
images_max = 4
|
||||||
bias_min_value = -100.0
|
bias_min_value = -100.0
|
||||||
bias_max_value = 100.0
|
bias_max_value = 100.0
|
||||||
|
|
||||||
|
@ -61,6 +62,7 @@ class generation_inputs(ctypes.Structure):
|
||||||
_fields_ = [("seed", ctypes.c_int),
|
_fields_ = [("seed", ctypes.c_int),
|
||||||
("prompt", ctypes.c_char_p),
|
("prompt", ctypes.c_char_p),
|
||||||
("memory", ctypes.c_char_p),
|
("memory", ctypes.c_char_p),
|
||||||
|
("images", ctypes.c_char_p * images_max),
|
||||||
("max_context_length", ctypes.c_int),
|
("max_context_length", ctypes.c_int),
|
||||||
("max_length", ctypes.c_int),
|
("max_length", ctypes.c_int),
|
||||||
("temperature", ctypes.c_float),
|
("temperature", ctypes.c_float),
|
||||||
|
@ -380,11 +382,16 @@ def load_model(model_filename):
|
||||||
ret = handle.load_model(inputs)
|
ret = handle.load_model(inputs)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def generate(prompt, memory="", max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, dynatemp_exponent=1.0, smoothing_factor=0.0, logit_biases={}):
|
def generate(prompt, memory="", images=[], max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, dynatemp_exponent=1.0, smoothing_factor=0.0, logit_biases={}):
|
||||||
global maxctx, args, currentusergenkey, totalgens, pendingabortkey
|
global maxctx, args, currentusergenkey, totalgens, pendingabortkey
|
||||||
inputs = generation_inputs()
|
inputs = generation_inputs()
|
||||||
inputs.prompt = prompt.encode("UTF-8")
|
inputs.prompt = prompt.encode("UTF-8")
|
||||||
inputs.memory = memory.encode("UTF-8")
|
inputs.memory = memory.encode("UTF-8")
|
||||||
|
for n in range(images_max):
|
||||||
|
if not images or n >= len(images):
|
||||||
|
inputs.images[n] = "".encode("UTF-8")
|
||||||
|
else:
|
||||||
|
inputs.images[n] = images[n].encode("UTF-8")
|
||||||
if max_length >= (max_context_length-1):
|
if max_length >= (max_context_length-1):
|
||||||
max_length = max_context_length-1
|
max_length = max_context_length-1
|
||||||
print("\nWarning: You are trying to generate with max_length near or exceeding max_context_length. Most of the context will be removed, and your outputs will not be very coherent.")
|
print("\nWarning: You are trying to generate with max_length near or exceeding max_context_length. Most of the context will be removed, and your outputs will not be very coherent.")
|
||||||
|
@ -695,6 +702,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
return generate(
|
return generate(
|
||||||
prompt=genparams.get('prompt', ""),
|
prompt=genparams.get('prompt', ""),
|
||||||
memory=genparams.get('memory', ""),
|
memory=genparams.get('memory', ""),
|
||||||
|
images=genparams.get('images', []),
|
||||||
max_context_length=genparams.get('max_context_length', maxctx),
|
max_context_length=genparams.get('max_context_length', maxctx),
|
||||||
max_length=genparams.get('max_length', 100),
|
max_length=genparams.get('max_length', 100),
|
||||||
temperature=genparams.get('temperature', 0.7),
|
temperature=genparams.get('temperature', 0.7),
|
||||||
|
@ -1407,7 +1415,7 @@ def show_new_gui():
|
||||||
nocertifymode = ctk.IntVar(value=0)
|
nocertifymode = ctk.IntVar(value=0)
|
||||||
|
|
||||||
lowvram_var = ctk.IntVar()
|
lowvram_var = ctk.IntVar()
|
||||||
mmq_var = ctk.IntVar(value=1)
|
mmq_var = ctk.IntVar(value=0)
|
||||||
blas_threads_var = ctk.StringVar()
|
blas_threads_var = ctk.StringVar()
|
||||||
blas_size_var = ctk.IntVar()
|
blas_size_var = ctk.IntVar()
|
||||||
version_var = ctk.StringVar(value="0")
|
version_var = ctk.StringVar(value="0")
|
||||||
|
@ -2808,7 +2816,7 @@ def main(launch_args,start_server=True):
|
||||||
benchprompt = "11111111"
|
benchprompt = "11111111"
|
||||||
for i in range(0,10): #generate massive prompt
|
for i in range(0,10): #generate massive prompt
|
||||||
benchprompt += benchprompt
|
benchprompt += benchprompt
|
||||||
result = generate(benchprompt,memory="",max_length=benchlen,max_context_length=benchmaxctx,temperature=0.1,top_k=1,rep_pen=1,use_default_badwordsids=True)
|
result = generate(benchprompt,memory="",images=[],max_length=benchlen,max_context_length=benchmaxctx,temperature=0.1,top_k=1,rep_pen=1,use_default_badwordsids=True)
|
||||||
result = (result[:5] if len(result)>5 else "")
|
result = (result[:5] if len(result)>5 else "")
|
||||||
resultok = (result=="11111")
|
resultok = (result=="11111")
|
||||||
t_pp = float(handle.get_last_process_time())*float(benchmaxctx-benchlen)*0.001
|
t_pp = float(handle.get_last_process_time())*float(benchmaxctx-benchlen)*0.001
|
||||||
|
|
|
@ -458,4 +458,11 @@ struct mpt_model {
|
||||||
std::map<std::string, struct ggml_v3_tensor *> tensors;
|
std::map<std::string, struct ggml_v3_tensor *> tensors;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct llava_image
|
||||||
|
{
|
||||||
|
std::string b64data = "";
|
||||||
|
int32_t clp_image_tokens = 0; //holds number of tokens llava used
|
||||||
|
float * clp_img_embd = nullptr; //this holds dynamic memory and must be freed each use!
|
||||||
|
};
|
||||||
|
|
||||||
const float default_norm_eps = 1e-5f;
|
const float default_norm_eps = 1e-5f;
|
||||||
|
|
|
@ -21,7 +21,7 @@
|
||||||
// #include "preprocessing.hpp"
|
// #include "preprocessing.hpp"
|
||||||
#include "stable-diffusion.h"
|
#include "stable-diffusion.h"
|
||||||
|
|
||||||
#define STB_IMAGE_IMPLEMENTATION
|
//#define STB_IMAGE_IMPLEMENTATION //already defined in llava
|
||||||
#include "stb_image.h"
|
#include "stb_image.h"
|
||||||
|
|
||||||
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
||||||
|
|
7987
otherarch/sdcpp/thirdparty/stb_image.h
vendored
7987
otherarch/sdcpp/thirdparty/stb_image.h
vendored
File diff suppressed because it is too large
Load diff
|
@ -207,6 +207,7 @@ std::string path_join(const std::string& p1, const std::string& p2) {
|
||||||
return p1 + "/" + p2;
|
return p1 + "/" + p2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool do_log = true;
|
||||||
void pretty_progress(int step, int steps, float time) {
|
void pretty_progress(int step, int steps, float time) {
|
||||||
if (step == 0) {
|
if (step == 0) {
|
||||||
return;
|
return;
|
||||||
|
@ -260,7 +261,6 @@ void* sd_log_cb_data = NULL;
|
||||||
|
|
||||||
#define LOG_BUFFER_SIZE 1024
|
#define LOG_BUFFER_SIZE 1024
|
||||||
|
|
||||||
static bool do_log = true;
|
|
||||||
void log_message(const char* format, ...) {
|
void log_message(const char* format, ...) {
|
||||||
if (do_log) {
|
if (do_log) {
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue