mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-17 04:09:19 +00:00
refactor: handle GGML_VK_VISIBLE_DEVICES at the Python level (#2179)
All C++ handling code currently: - build a comma-separated list from the info_vulkan array - if GGML_VK_VISIBLE_DEVICES isn't set - set GGML_VK_VISIBLE_DEVICES to the list Once set, GGML_VK_VISIBLE_DEVICES affects the whole process. So this can be done in the same way at the Python level, before all loading functions. Caveat: load_model had the default `inputs.vulkan_info = "0"`, so the default GPU would be "0" only when loading a text model.
This commit is contained in:
parent
42ce63fd3b
commit
25fab4113e
8 changed files with 3 additions and 120 deletions
17
expose.cpp
17
expose.cpp
|
|
@ -23,8 +23,6 @@
|
|||
extern "C"
|
||||
{
|
||||
|
||||
std::string vulkandeviceenv;
|
||||
|
||||
//return val: 0=fail, 1=(original ggml, alpaca), 2=(ggmf), 3=(ggjt)
|
||||
static FileFormat file_format = FileFormat::BADFORMAT;
|
||||
static FileFormatExtraMeta file_format_meta;
|
||||
|
|
@ -38,21 +36,6 @@ extern "C"
|
|||
|
||||
file_format = check_file_format(model.c_str(),&file_format_meta);
|
||||
|
||||
std::string vulkan_info_raw = inputs.vulkan_info;
|
||||
std::string vulkan_info_str = "";
|
||||
for (size_t i = 0; i < vulkan_info_raw.length(); ++i) {
|
||||
vulkan_info_str += vulkan_info_raw[i];
|
||||
if (i < vulkan_info_raw.length() - 1) {
|
||||
vulkan_info_str += ",";
|
||||
}
|
||||
}
|
||||
const char* existingenv = getenv("GGML_VK_VISIBLE_DEVICES");
|
||||
if(!existingenv && vulkan_info_str!="")
|
||||
{
|
||||
vulkandeviceenv = "GGML_VK_VISIBLE_DEVICES="+vulkan_info_str;
|
||||
putenv((char*)vulkandeviceenv.c_str());
|
||||
}
|
||||
|
||||
executable_path = inputs.executable_path;
|
||||
|
||||
if(file_format==FileFormat::GPTJ_1 || file_format==FileFormat::GPTJ_2 || file_format==FileFormat::GPTJ_3 || file_format==FileFormat::GPTJ_4 || file_format==FileFormat::GPTJ_5)
|
||||
|
|
|
|||
6
expose.h
6
expose.h
|
|
@ -55,7 +55,6 @@ struct load_model_inputs
|
|||
const bool use_contextshift = false;
|
||||
const bool use_fastforward = false;
|
||||
const int kcpp_main_gpu = -1;
|
||||
const char * vulkan_info = nullptr;
|
||||
const int batchsize = 512;
|
||||
const bool autofit = false;
|
||||
const int autofit_tax_mb = 0;
|
||||
|
|
@ -178,7 +177,6 @@ struct sd_load_model_inputs
|
|||
const char * model_filename = nullptr;
|
||||
const char * executable_path = nullptr;
|
||||
const int kcpp_main_gpu = -1;
|
||||
const char * vulkan_info = nullptr;
|
||||
const int threads = 0;
|
||||
const int quant = 0;
|
||||
const bool flash_attention = false;
|
||||
|
|
@ -263,7 +261,6 @@ struct whisper_load_model_inputs
|
|||
const char * model_filename = nullptr;
|
||||
const char * executable_path = nullptr;
|
||||
const int kcpp_main_gpu = -1;
|
||||
const char * vulkan_info = nullptr;
|
||||
const char * devices_override = nullptr;
|
||||
const bool quiet = false;
|
||||
const int debugmode = 0;
|
||||
|
|
@ -288,7 +285,6 @@ struct tts_load_model_inputs
|
|||
const char * cts_model_filename = nullptr;
|
||||
const char * executable_path = nullptr;
|
||||
const int kcpp_main_gpu = -1;
|
||||
const char * vulkan_info = nullptr;
|
||||
const int gpulayers = 0;
|
||||
const bool flash_attention = false;
|
||||
const int ttsmaxlen = 4096;
|
||||
|
|
@ -319,7 +315,6 @@ struct embeddings_load_model_inputs
|
|||
const char * model_filename = nullptr;
|
||||
const char * executable_path = nullptr;
|
||||
const int kcpp_main_gpu = -1;
|
||||
const char * vulkan_info = nullptr;
|
||||
const int gpulayers = 0;
|
||||
const bool flash_attention = false;
|
||||
const bool use_mmap = false;
|
||||
|
|
@ -349,7 +344,6 @@ struct music_load_model_inputs
|
|||
const bool lowvram = false;
|
||||
const char * executable_path = nullptr;
|
||||
const int kcpp_main_gpu = -1;
|
||||
const char * vulkan_info = nullptr;
|
||||
const char * devices_override = nullptr;
|
||||
const bool quiet = false;
|
||||
const int debugmode = 0;
|
||||
|
|
|
|||
17
koboldcpp.py
17
koboldcpp.py
|
|
@ -256,7 +256,6 @@ class load_model_inputs(ctypes.Structure):
|
|||
("use_contextshift", ctypes.c_bool),
|
||||
("use_fastforward", ctypes.c_bool),
|
||||
("kcpp_main_gpu", ctypes.c_int),
|
||||
("vulkan_info", ctypes.c_char_p),
|
||||
("batchsize", ctypes.c_int),
|
||||
("autofit", ctypes.c_bool),
|
||||
("autofit_tax_mb", ctypes.c_int),
|
||||
|
|
@ -356,7 +355,6 @@ class sd_load_model_inputs(ctypes.Structure):
|
|||
_fields_ = [("model_filename", ctypes.c_char_p),
|
||||
("executable_path", ctypes.c_char_p),
|
||||
("kcpp_main_gpu", ctypes.c_int),
|
||||
("vulkan_info", ctypes.c_char_p),
|
||||
("threads", ctypes.c_int),
|
||||
("quant", ctypes.c_int),
|
||||
("flash_attention", ctypes.c_bool),
|
||||
|
|
@ -435,7 +433,6 @@ class whisper_load_model_inputs(ctypes.Structure):
|
|||
_fields_ = [("model_filename", ctypes.c_char_p),
|
||||
("executable_path", ctypes.c_char_p),
|
||||
("kcpp_main_gpu", ctypes.c_int),
|
||||
("vulkan_info", ctypes.c_char_p),
|
||||
("devices_override", ctypes.c_char_p),
|
||||
("quiet", ctypes.c_bool),
|
||||
("debugmode", ctypes.c_int)]
|
||||
|
|
@ -456,7 +453,6 @@ class tts_load_model_inputs(ctypes.Structure):
|
|||
("cts_model_filename", ctypes.c_char_p),
|
||||
("executable_path", ctypes.c_char_p),
|
||||
("kcpp_main_gpu", ctypes.c_int),
|
||||
("vulkan_info", ctypes.c_char_p),
|
||||
("gpulayers", ctypes.c_int),
|
||||
("flash_attention", ctypes.c_bool),
|
||||
("ttsmaxlen", ctypes.c_int),
|
||||
|
|
@ -483,7 +479,6 @@ class embeddings_load_model_inputs(ctypes.Structure):
|
|||
("model_filename", ctypes.c_char_p),
|
||||
("executable_path", ctypes.c_char_p),
|
||||
("kcpp_main_gpu", ctypes.c_int),
|
||||
("vulkan_info", ctypes.c_char_p),
|
||||
("gpulayers", ctypes.c_int),
|
||||
("flash_attention", ctypes.c_bool),
|
||||
("use_mmap", ctypes.c_bool),
|
||||
|
|
@ -509,7 +504,6 @@ class music_load_model_inputs(ctypes.Structure):
|
|||
("lowvram", ctypes.c_bool),
|
||||
("executable_path", ctypes.c_char_p),
|
||||
("kcpp_main_gpu", ctypes.c_int),
|
||||
("vulkan_info", ctypes.c_char_p),
|
||||
("devices_override", ctypes.c_char_p),
|
||||
("quiet", ctypes.c_bool),
|
||||
("debugmode", ctypes.c_int)]
|
||||
|
|
@ -993,13 +987,9 @@ def set_backend_props(inputs):
|
|||
elif (args.usecuda and "3" in args.usecuda):
|
||||
inputs.kcpp_main_gpu = 3
|
||||
|
||||
if args.usevulkan: #is an empty array if using vulkan without defined gpu
|
||||
s = ""
|
||||
for it in range(0,len(args.usevulkan)):
|
||||
s += str(args.usevulkan[it])
|
||||
inputs.vulkan_info = s.encode("UTF-8")
|
||||
else:
|
||||
inputs.vulkan_info = "".encode("UTF-8")
|
||||
if "GGML_VK_VISIBLE_DEVICES" not in os.environ:
|
||||
if args.usevulkan: # is an empty array if using vulkan without defined gpu
|
||||
os.environ["GGML_VK_VISIBLE_DEVICES"] = ','.join([str(g) for g in args.usevulkan])
|
||||
|
||||
# set universal flags
|
||||
inputs.devices_override = (args.device if args.device else "").encode("UTF-8")
|
||||
|
|
@ -1890,7 +1880,6 @@ def load_model(model_filename):
|
|||
inputs.low_vram = True if args.lowvram else False
|
||||
inputs.use_mmq = False if args.nommq else True
|
||||
inputs.splitmode = splitmode_choices_to_int(args.splitmode) #layer=1, row=2, tensor=3
|
||||
inputs.vulkan_info = "0".encode("UTF-8")
|
||||
inputs.blasthreads = args.blasthreads
|
||||
inputs.use_mmap = args.usemmap
|
||||
inputs.use_mlock = args.usemlock
|
||||
|
|
|
|||
|
|
@ -33,22 +33,6 @@ bool musictype_load_model(const music_load_model_inputs inputs)
|
|||
{
|
||||
music_is_quiet = inputs.quiet;
|
||||
|
||||
//duplicated from expose.cpp
|
||||
std::string vulkan_info_raw = inputs.vulkan_info;
|
||||
std::string vulkan_info_str = "";
|
||||
for (size_t i = 0; i < vulkan_info_raw.length(); ++i) {
|
||||
vulkan_info_str += vulkan_info_raw[i];
|
||||
if (i < vulkan_info_raw.length() - 1) {
|
||||
vulkan_info_str += ",";
|
||||
}
|
||||
}
|
||||
const char* existingenv = getenv("GGML_VK_VISIBLE_DEVICES");
|
||||
if(!existingenv && vulkan_info_str!="")
|
||||
{
|
||||
musicvulkandeviceenv = "GGML_VK_VISIBLE_DEVICES="+vulkan_info_str;
|
||||
putenv((char*)musicvulkandeviceenv.c_str());
|
||||
}
|
||||
|
||||
std::string musicllm_filename = inputs.musicllm_filename;
|
||||
std::string musicembedding_filename = inputs.musicembedding_filename;
|
||||
std::string musicdiffusion_filename = inputs.musicdiffusion_filename;
|
||||
|
|
|
|||
|
|
@ -22,7 +22,6 @@
|
|||
#endif
|
||||
|
||||
static llama_context * embeddings_ctx = nullptr; //text to codes ctx
|
||||
static std::string ttsvulkandeviceenv;
|
||||
bool embeddings_debug = false;
|
||||
static int max_batchsize = 512;
|
||||
static std::string last_output = "";
|
||||
|
|
@ -82,27 +81,12 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu
|
|||
|
||||
bool embeddingstype_load_model(const embeddings_load_model_inputs inputs)
|
||||
{
|
||||
//duplicated from expose.cpp
|
||||
std::string vulkan_info_raw = inputs.vulkan_info;
|
||||
std::string vulkan_info_str = "";
|
||||
for (size_t i = 0; i < vulkan_info_raw.length(); ++i) {
|
||||
vulkan_info_str += vulkan_info_raw[i];
|
||||
if (i < vulkan_info_raw.length() - 1) {
|
||||
vulkan_info_str += ",";
|
||||
}
|
||||
}
|
||||
const char* existingenv = getenv("GGML_VK_VISIBLE_DEVICES");
|
||||
std::vector<ggml_backend_dev_t> devices_override;
|
||||
std::string dev_override_str = inputs.devices_override;
|
||||
if(dev_override_str!="")
|
||||
{
|
||||
devices_override = kcpp_parse_device_list(dev_override_str);
|
||||
}
|
||||
if(!existingenv && vulkan_info_str!="")
|
||||
{
|
||||
ttsvulkandeviceenv = "GGML_VK_VISIBLE_DEVICES="+vulkan_info_str;
|
||||
putenv((char*)ttsvulkandeviceenv.c_str());
|
||||
}
|
||||
|
||||
llama_backend_init();
|
||||
|
||||
|
|
|
|||
|
|
@ -187,7 +187,6 @@ static uint8_t * upscale_src_buffer = NULL;
|
|||
static std::vector<uint8_t *> input_extraimage_buffers;
|
||||
const int max_extra_images = 4;
|
||||
|
||||
static std::string sdvulkandeviceenv;
|
||||
static std::string sdmaingpuenv;
|
||||
static int cfg_tiled_vae_threshold = 0;
|
||||
static int cfg_square_limit = 0;
|
||||
|
|
@ -430,22 +429,6 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
|
|||
printf("Note: Loading a pre-quantized model is always faster than using compress weights!\n");
|
||||
}
|
||||
|
||||
//duplicated from expose.cpp
|
||||
std::string vulkan_info_raw = inputs.vulkan_info;
|
||||
std::string vulkan_info_str = "";
|
||||
for (size_t i = 0; i < vulkan_info_raw.length(); ++i) {
|
||||
vulkan_info_str += vulkan_info_raw[i];
|
||||
if (i < vulkan_info_raw.length() - 1) {
|
||||
vulkan_info_str += ",";
|
||||
}
|
||||
}
|
||||
const char* existingenv = getenv("GGML_VK_VISIBLE_DEVICES");
|
||||
if(!existingenv && vulkan_info_str!="")
|
||||
{
|
||||
sdvulkandeviceenv = "GGML_VK_VISIBLE_DEVICES="+vulkan_info_str;
|
||||
putenv((char*)sdvulkandeviceenv.c_str());
|
||||
}
|
||||
|
||||
sd_params = new SDParams();
|
||||
sd_params->model_path = inputs.model_filename;
|
||||
sd_params->wtype = SD_TYPE_COUNT;
|
||||
|
|
|
|||
|
|
@ -478,7 +478,6 @@ static llama_context * cts_ctx = nullptr; //codes to speech
|
|||
static TTS_VER ttsver = TTS_VER_2;
|
||||
static int ttsdebugmode = 0;
|
||||
static bool tts_is_quiet = false;
|
||||
static std::string ttsvulkandeviceenv;
|
||||
static std::string last_generated_audio = "";
|
||||
static std::string last_generation_settings_prompt = ""; //for caching purposes to fix ST bug
|
||||
static int last_generation_settings_speaker_seed;
|
||||
|
|
@ -511,27 +510,12 @@ bool ttstype_load_model(const tts_load_model_inputs inputs)
|
|||
tts_is_quiet = inputs.quiet;
|
||||
tts_executable_path = inputs.executable_path;
|
||||
|
||||
//duplicated from expose.cpp
|
||||
std::string vulkan_info_raw = inputs.vulkan_info;
|
||||
std::string vulkan_info_str = "";
|
||||
for (size_t i = 0; i < vulkan_info_raw.length(); ++i) {
|
||||
vulkan_info_str += vulkan_info_raw[i];
|
||||
if (i < vulkan_info_raw.length() - 1) {
|
||||
vulkan_info_str += ",";
|
||||
}
|
||||
}
|
||||
const char* existingenv = getenv("GGML_VK_VISIBLE_DEVICES");
|
||||
std::vector<ggml_backend_dev_t> devices_override;
|
||||
std::string dev_override_str = inputs.devices_override;
|
||||
if(dev_override_str!="")
|
||||
{
|
||||
devices_override = kcpp_parse_device_list(dev_override_str);
|
||||
}
|
||||
if(!existingenv && vulkan_info_str!="")
|
||||
{
|
||||
ttsvulkandeviceenv = "GGML_VK_VISIBLE_DEVICES="+vulkan_info_str;
|
||||
putenv((char*)ttsvulkandeviceenv.c_str());
|
||||
}
|
||||
|
||||
llama_backend_init();
|
||||
|
||||
|
|
|
|||
|
|
@ -59,28 +59,10 @@ static std::string output_txt(struct whisper_context * ctx) {
|
|||
|
||||
void cb_log_disable(enum ggml_log_level , const char * , void * ) { }
|
||||
|
||||
static std::string whispervulkandeviceenv;
|
||||
bool whispertype_load_model(const whisper_load_model_inputs inputs)
|
||||
{
|
||||
whisper_is_quiet = inputs.quiet;
|
||||
|
||||
//duplicated from expose.cpp
|
||||
std::string vulkan_info_raw = inputs.vulkan_info;
|
||||
std::string vulkan_info_str = "";
|
||||
for (size_t i = 0; i < vulkan_info_raw.length(); ++i) {
|
||||
vulkan_info_str += vulkan_info_raw[i];
|
||||
if (i < vulkan_info_raw.length() - 1) {
|
||||
vulkan_info_str += ",";
|
||||
}
|
||||
}
|
||||
const char* existingenv = getenv("GGML_VK_VISIBLE_DEVICES");
|
||||
if(!existingenv && vulkan_info_str!="")
|
||||
{
|
||||
whispervulkandeviceenv = "GGML_VK_VISIBLE_DEVICES="+vulkan_info_str;
|
||||
putenv((char*)whispervulkandeviceenv.c_str());
|
||||
}
|
||||
|
||||
|
||||
std::string modelfile = inputs.model_filename;
|
||||
printf("\nLoading Whisper Model: %s",modelfile.c_str());
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue