fix wrong order, fix llava debug mode failure

This commit is contained in:
Concedo 2024-07-17 15:30:19 +08:00
parent e99fa531a2
commit 5988243aee
3 changed files with 33 additions and 26 deletions

View file

@ -146,6 +146,10 @@ inline bool LogitsDuplicated(std::vector<float> & arr1, std::vector<float> & arr
static std::string FileFormatTokenizeID(int id, FileFormat file_format, bool return_special = false) static std::string FileFormatTokenizeID(int id, FileFormat file_format, bool return_special = false)
{ {
if(id<0)
{
return ""; //placeholder IDs cannot be tokenized!
}
if (file_format == FileFormat::GGML || file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2) if (file_format == FileFormat::GGML || file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2)
{ {
return std::string(llama_v2_token_to_str(llama_ctx_v2, id)); return std::string(llama_v2_token_to_str(llama_ctx_v2, id));

View file

@ -526,6 +526,7 @@ def string_contains_sequence_substring(inputstr,sequences):
import struct import struct
def read_gguf_layer_count(file_path): def read_gguf_layer_count(file_path):
try:
fsize = os.path.getsize(file_path) fsize = os.path.getsize(file_path)
if fsize < 10000: #ignore files under 10kb if fsize < 10000: #ignore files under 10kb
return 0 return 0
@ -550,6 +551,8 @@ def read_gguf_layer_count(file_path):
return 0 return 0
else: else:
return 0 #not found return 0 #not found
except Exception as ex:
return 0
def load_model(model_filename): def load_model(model_filename):
global args global args
@ -2008,7 +2011,7 @@ def show_gui():
(lib_clblast_noavx2, "CLBlast NoAVX2 (Old CPU)"), (lib_clblast_noavx2, "CLBlast NoAVX2 (Old CPU)"),
(lib_vulkan_noavx2, "Vulkan NoAVX2 (Old CPU)"), (lib_vulkan_noavx2, "Vulkan NoAVX2 (Old CPU)"),
(lib_failsafe, "Failsafe Mode (Old CPU)")] (lib_failsafe, "Failsafe Mode (Old CPU)")]
openblas_option, clblast_option, cublas_option, hipblas_option, vulkan_option, default_option, clblast_noavx2_option, vulkan_noavx2_option, noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs) openblas_option, default_option, clblast_option, cublas_option, hipblas_option, vulkan_option, noavx2_option, clblast_noavx2_option, vulkan_noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs)
# slider data # slider data
blasbatchsize_values = ["-1", "32", "64", "128", "256", "512", "1024", "2048"] blasbatchsize_values = ["-1", "32", "64", "128", "256", "512", "1024", "2048"]
blasbatchsize_text = ["Don't Batch BLAS","32","64","128","256","512","1024","2048"] blasbatchsize_text = ["Don't Batch BLAS","32","64","128","256","512","1024","2048"]
@ -3952,7 +3955,7 @@ if __name__ == '__main__':
advparser.add_argument("--lora", help="LLAMA models only, applies a lora file on top of model. Experimental.", metavar=('[lora_filename]', '[lora_base]'), nargs='+') advparser.add_argument("--lora", help="LLAMA models only, applies a lora file on top of model. Experimental.", metavar=('[lora_filename]', '[lora_base]'), nargs='+')
advparser.add_argument("--noshift", help="If set, do not attempt to Trim and Shift the GGUF context.", action='store_true') advparser.add_argument("--noshift", help="If set, do not attempt to Trim and Shift the GGUF context.", action='store_true')
advparser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true') advparser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')
advparser.add_argument("--usemlock", help="For Apple Systems. Force system to keep model in RAM rather than swapping or compressing", action='store_true') advparser.add_argument("--usemlock", help="Enables mlock, preventing the RAM used to load the model from being paged out. Not usually recommended.", action='store_true')
advparser.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices.", action='store_true') advparser.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices.", action='store_true')
advparser.add_argument("--debugmode", help="Shows additional debug info in the terminal.", nargs='?', const=1, type=int, default=0) advparser.add_argument("--debugmode", help="Shows additional debug info in the terminal.", nargs='?', const=1, type=int, default=0)
advparser.add_argument("--skiplauncher", help="Doesn't display or use the GUI launcher.", action='store_true') advparser.add_argument("--skiplauncher", help="Doesn't display or use the GUI launcher.", action='store_true')

View file

@ -18322,7 +18322,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
case GGML_TYPE_IQ1_M: case GGML_TYPE_IQ1_M:
case GGML_TYPE_Q2_K: case GGML_TYPE_Q2_K:
case GGML_TYPE_Q3_K: case GGML_TYPE_Q3_K:
case GGML_TYPE_IQ4_XS: new_type = GGML_TYPE_IQ4_NL; break; case GGML_TYPE_IQ4_XS: new_type = GGML_TYPE_Q4_0; break;
case GGML_TYPE_Q4_K: new_type = GGML_TYPE_Q5_0; break; case GGML_TYPE_Q4_K: new_type = GGML_TYPE_Q5_0; break;
case GGML_TYPE_Q5_K: new_type = GGML_TYPE_Q5_1; break; case GGML_TYPE_Q5_K: new_type = GGML_TYPE_Q5_1; break;
case GGML_TYPE_Q6_K: new_type = GGML_TYPE_Q8_0; break; case GGML_TYPE_Q6_K: new_type = GGML_TYPE_Q8_0; break;