diff --git a/common/arg.cpp b/common/arg.cpp index 573ec06a6..b28b55da0 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1890,7 +1890,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex [](common_params & params, const std::string & value) { params.out_file = value; } - ).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA})); + ).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA, LLAMA_EXAMPLE_TTS})); add_opt(common_arg( {"-ofreq", "--output-frequency"}, "N", string_format("output the imatrix every N iterations (default: %d)", params.n_out_freq), diff --git a/examples/tts/tts.cpp b/examples/tts/tts.cpp index 1253cc28b..d953cadd6 100644 --- a/examples/tts/tts.cpp +++ b/examples/tts/tts.cpp @@ -87,11 +87,11 @@ struct wav_header { uint32_t data_size; }; -static void save_wav16(const std::string & fname, const std::vector & data, int sample_rate) { +static bool save_wav16(const std::string & fname, const std::vector & data, int sample_rate) { std::ofstream file(fname, std::ios::binary); if (!file) { - LOG_ERR("%s: Failed to open file '%s' for writing", __func__, fname.c_str()); - return; + LOG_ERR("%s: Failed to open file '%s' for writing.\n", __func__, fname.c_str()); + return false; } wav_header header; @@ -108,7 +108,7 @@ static void save_wav16(const std::string & fname, const std::vector & dat file.write(reinterpret_cast(&pcm_sample), sizeof(pcm_sample)); } - file.close(); + return file.good(); } static void fill_hann_window(int length, bool periodic, float * output) { @@ -536,6 +536,7 @@ static std::string audio_data_from_speaker(json speaker, const outetts_version t int main(int argc, char ** argv) { common_params params; + params.out_file = "output.wav"; params.prompt = ""; params.n_predict = 4096; @@ -1060,8 +1061,6 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14 } #endif - const std::string fname = "output.wav"; - const int n_sr = 24000; // sampling rate // zero out first 0.25 seconds @@ -1072,11 +1071,15 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14 LOG_INF("%s: time for spectral ops: %.3f ms\n", __func__, (ggml_time_us() - t_spec_start) / 1000.0f); LOG_INF("%s: total time: %.3f ms\n", __func__, (ggml_time_us() - t_main_start) / 1000.0f); - save_wav16(fname, audio, n_sr); + int retval = 0; - LOG_INF("%s: audio written to file '%s'\n", __func__, fname.c_str()); + if (save_wav16(params.out_file, audio, n_sr)) { + LOG_INF("%s: audio written to file '%s'\n", __func__, params.out_file.c_str()); + } else { + retval = ENOENT; + } llama_backend_free(); - return 0; -} \ No newline at end of file + return retval; +} diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp index a92988b7d..fdd07d9ca 100644 --- a/ggml/src/ggml-sycl/common.hpp +++ b/ggml/src/ggml-sycl/common.hpp @@ -474,6 +474,7 @@ static void k_bin_bcast(const src0_t * src0, const src1_t * src1, dst_t * dst, int ne0, int ne1, int ne2, int ne3, int ne10, int ne11, int ne12, int ne13, /*int s0, */ int s1, int s2, int s3, + /*int s00,*/ int s01, int s02, int s03, /*int s10,*/ int s11, int s12, int s13, const sycl::nd_item<3> &item_ct1) { const int i0s = item_ct1.get_local_range(2) * item_ct1.get_group(2) + @@ -495,9 +496,9 @@ static void k_bin_bcast(const src0_t * src0, const src1_t * src1, dst_t * dst, const int i12 = i2 % ne12; const int i13 = i3 % ne13; - const size_t i_src0 = i3*s3 + i2*s2 + i1*s1; + const size_t i_src0 = i3*s03 + i2*s02 + i1*s01; const size_t i_src1 = i13*s13 + i12*s12 + i11*s11; - const size_t i_dst = i_src0; + const size_t i_dst = i3*s3 + i2*s2 + i1*s1; const src0_t * src0_row = src0 + i_src0; const src1_t * src1_row = src1 + i_src1; @@ -515,6 +516,7 @@ static void k_bin_bcast_unravel(const src0_t * src0, const src1_t * src1, dst_t int ne0, int ne1, int ne2, int ne3, int ne10, int ne11, int ne12, int ne13, /*int s0, */ int s1, int s2, int s3, + /*int s00,*/ int s01, int s02, int s03, /*int s10,*/ int s11, int s12, int s13, const sycl::nd_item<3> &item_ct1) { @@ -534,9 +536,9 @@ static void k_bin_bcast_unravel(const src0_t * src0, const src1_t * src1, dst_t const int i12 = i2 % ne12; const int i13 = i3 % ne13; - const size_t i_src0 = i3*s3 + i2*s2 + i1*s1; + const size_t i_src0 = i3*s03 + i2*s02 + i1*s01; const size_t i_src1 = i13*s13 + i12*s12 + i11*s11; - const size_t i_dst = i_src0; + const size_t i_dst = i3*s3 + i2*s2 + i1*s1; const src0_t * src0_row = src0 + i_src0; const src1_t * src1_row = src1 + i_src1; @@ -566,9 +568,11 @@ struct bin_bcast_sycl { int nr[4] = { nr0, nr1, nr2, nr3 }; // collapse dimensions until first broadcast dimension - int64_t cne0[] = {ne0, ne1, ne2, ne3}; + int64_t cne[] = {ne0, ne1, ne2, ne3}; + int64_t cne0[] = {ne00, ne01, ne02, ne03}; int64_t cne1[] = {ne10, ne11, ne12, ne13}; - size_t cnb0[] = {nb0, nb1, nb2, nb3}; + size_t cnb[] = {nb0, nb1, nb2, nb3}; + size_t cnb0[] = {nb00, nb01, nb02, nb03}; size_t cnb1[] = {nb10, nb11, nb12, nb13}; auto collapse = [](int64_t cne[]) { cne[0] *= cne[1]; @@ -583,32 +587,41 @@ struct bin_bcast_sycl { cnb[3] *= cne[3]; }; - for (int i = 0; i < 4; i++) { - if (nr[i] != 1) { - break; - } - if (i > 0) { - collapse_nb(cnb0, cne0); - collapse_nb(cnb1, cne1); - collapse(cne0); - collapse(cne1); + if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1) && ggml_is_contiguous(dst)) { + for (int i = 0; i < 4; i++) { + if (nr[i] != 1) { + break; + } + if (i > 0) { + collapse_nb(cnb, cne); + collapse_nb(cnb0, cne0); + collapse_nb(cnb1, cne1); + collapse(cne); + collapse(cne0); + collapse(cne1); + } } } { - int64_t ne0 = cne0[0]; - int64_t ne1 = cne0[1]; - int64_t ne2 = cne0[2]; - int64_t ne3 = cne0[3]; + int64_t ne0 = cne[0]; + int64_t ne1 = cne[1]; + int64_t ne2 = cne[2]; + int64_t ne3 = cne[3]; int64_t ne10 = cne1[0]; int64_t ne11 = cne1[1]; int64_t ne12 = cne1[2]; int64_t ne13 = cne1[3]; - size_t nb0 = cnb0[0]; - size_t nb1 = cnb0[1]; - size_t nb2 = cnb0[2]; - size_t nb3 = cnb0[3]; + size_t nb0 = cnb[0]; + size_t nb1 = cnb[1]; + size_t nb2 = cnb[2]; + size_t nb3 = cnb[3]; + + size_t nb00 = cnb0[0]; + size_t nb01 = cnb0[1]; + size_t nb02 = cnb0[2]; + size_t nb03 = cnb0[3]; size_t nb10 = cnb1[0]; size_t nb11 = cnb1[1]; @@ -625,6 +638,28 @@ struct bin_bcast_sycl { size_t s12 = nb12 / sizeof(src1_t); size_t s13 = nb13 / sizeof(src1_t); + size_t s00 = nb00 / sizeof(src0_t); + size_t s01 = nb01 / sizeof(src0_t); + size_t s02 = nb02 / sizeof(src0_t); + size_t s03 = nb03 / sizeof(src0_t); + + GGML_UNUSED(s00); + + GGML_ASSERT(nb0 % sizeof(dst_t) == 0); + GGML_ASSERT(nb1 % sizeof(dst_t) == 0); + GGML_ASSERT(nb2 % sizeof(dst_t) == 0); + GGML_ASSERT(nb3 % sizeof(dst_t) == 0); + + GGML_ASSERT(nb00 % sizeof(src0_t) == 0); + GGML_ASSERT(nb01 % sizeof(src0_t) == 0); + GGML_ASSERT(nb02 % sizeof(src0_t) == 0); + GGML_ASSERT(nb03 % sizeof(src0_t) == 0); + + GGML_ASSERT(nb10 % sizeof(src1_t) == 0); + GGML_ASSERT(nb11 % sizeof(src1_t) == 0); + GGML_ASSERT(nb12 % sizeof(src1_t) == 0); + GGML_ASSERT(nb13 % sizeof(src1_t) == 0); + GGML_ASSERT(s0 == 1); GGML_ASSERT(s10 == 1); @@ -661,8 +696,8 @@ struct bin_bcast_sycl { [=](sycl::nd_item<3> item_ct1) { k_bin_bcast_unravel( src0_dd, src1_dd, dst_dd, ne0, ne1, ne2, ne3, - ne10, ne11, ne12, ne13, s1, s2, s3, s11, s12, - s13, item_ct1); + ne10, ne11, ne12, ne13, s1, s2, s3, s01, s02, + s03, s11, s12, s13, item_ct1); }); } } else { @@ -680,7 +715,7 @@ struct bin_bcast_sycl { [=](sycl::nd_item<3> item_ct1) { k_bin_bcast(src0_dd, src1_dd, dst_dd, ne0, ne1, ne2, ne3, ne10, ne11, ne12, ne13, - s1, s2, s3, s11, s12, s13, + s1, s2, s3, s01, s02, s03, s11, s12, s13, item_ct1); }); } diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 6977b705e..ef7d5fa01 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -3113,8 +3113,8 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const int64_t i2 = i12; src0_row.data = src0_original + i02*nb02; - src1_row.data = src1_original + + i11*nb11 + i12*nb12; - dst_row.data = dst_original + i1*nb1 + i2*nb2; + src1_row.data = src1_original + i11*nb11 + i12*nb12; + dst_row.data = dst_original + i1*nb1 + i2*nb2; ggml_sycl_mul_mat(ctx, &src0_row, &src1_row, &dst_row); } diff --git a/klite.embd b/klite.embd index 0b13adea2..659fe3865 100644 --- a/klite.embd +++ b/klite.embd @@ -9719,7 +9719,7 @@ Current version indicated by LITEVER below. },()=>{ }); } - else if(localflag && no_txt_model && !has_txt2img && !koboldcpp_has_vision && !koboldcpp_has_whisper && !koboldcpp_has_tts) + else if(localflag && no_txt_model && !has_txt2img && !koboldcpp_has_vision && !koboldcpp_has_whisper && !koboldcpp_has_tts && !is_using_kcpp_with_admin()) { msgboxYesNo("This KoboldCpp instance has no models loaded. You can still use the WebUI to edit or view existing stories.

Would you like to connect to an external API service?","No Models Loaded", ()=>{ diff --git a/koboldcpp.py b/koboldcpp.py index 599e0384e..7b25e101b 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -49,7 +49,7 @@ logit_bias_max = 512 dry_seq_break_max = 128 # global vars -KcppVersion = "1.86.1" +KcppVersion = "1.86.2" showdebug = True kcpp_instance = None #global running instance global_memory = {"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False}