mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-28 03:30:20 +00:00
sd: sync to master-391-5865b5e (#1872)
This commit is contained in:
parent
03cec02a3d
commit
510508e7da
5 changed files with 1544 additions and 1395 deletions
File diff suppressed because it is too large
Load diff
|
|
@ -106,8 +106,12 @@ const char* unused_tensors[] = {
|
|||
"model_ema.diffusion_model",
|
||||
"embedding_manager",
|
||||
"denoiser.sigmas",
|
||||
"edm_vpred.sigma_max",
|
||||
"text_encoders.t5xxl.transformer.encoder.embed_tokens.weight", // only used during training
|
||||
"ztsnr", // Found in some SDXL vpred models
|
||||
"edm_vpred.sigma_min", // Found in CosXL
|
||||
// TODO: find another way to avoid the "unknown tensor" for these two
|
||||
// "edm_vpred.sigma_max", // Used to detect CosXL
|
||||
// "v_pred", // Used to detect SDXL vpred models
|
||||
"text_encoders.llm.output.weight",
|
||||
"text_encoders.llm.lm_head.",
|
||||
"first_stage_model.bn.",
|
||||
|
|
|
|||
|
|
@ -2267,12 +2267,12 @@ public:
|
|||
}
|
||||
|
||||
ggml_tensor* vae_encode(ggml_context* work_ctx, ggml_tensor* x, bool encode_video = false) {
|
||||
int64_t t0 = ggml_time_ms();
|
||||
ggml_tensor* result = nullptr;
|
||||
int64_t t0 = ggml_time_ms();
|
||||
ggml_tensor* result = nullptr;
|
||||
const int vae_scale_factor = get_vae_scale_factor();
|
||||
int W = x->ne[0] / vae_scale_factor;
|
||||
int H = x->ne[1] / vae_scale_factor;
|
||||
int C = get_latent_channel();
|
||||
int C = get_latent_channel();
|
||||
if (vae_tiling_params.enabled && !encode_video) {
|
||||
// TODO wan2.2 vae support?
|
||||
int ne2;
|
||||
|
|
@ -2397,8 +2397,8 @@ public:
|
|||
const int vae_scale_factor = get_vae_scale_factor();
|
||||
int64_t W = x->ne[0] * vae_scale_factor;
|
||||
int64_t H = x->ne[1] * vae_scale_factor;
|
||||
int64_t C = 3;
|
||||
ggml_tensor* result = nullptr;
|
||||
int64_t C = 3;
|
||||
ggml_tensor* result = nullptr;
|
||||
if (decode_video) {
|
||||
int T = x->ne[2];
|
||||
if (sd_version_is_wan(version)) {
|
||||
|
|
|
|||
|
|
@ -411,19 +411,19 @@ const char* sd_get_system_info() {
|
|||
static char buffer[1024];
|
||||
std::stringstream ss;
|
||||
ss << "System Info: \n";
|
||||
ss << " SSE3 = " << ggml_cpu_has_sse3() << std::endl;
|
||||
ss << " AVX = " << ggml_cpu_has_avx() << std::endl;
|
||||
ss << " AVX2 = " << ggml_cpu_has_avx2() << std::endl;
|
||||
ss << " AVX512 = " << ggml_cpu_has_avx512() << std::endl;
|
||||
ss << " AVX512_VBMI = " << ggml_cpu_has_avx512_vbmi() << std::endl;
|
||||
ss << " AVX512_VNNI = " << ggml_cpu_has_avx512_vnni() << std::endl;
|
||||
ss << " FMA = " << ggml_cpu_has_fma() << std::endl;
|
||||
ss << " NEON = " << ggml_cpu_has_neon() << std::endl;
|
||||
ss << " ARM_FMA = " << ggml_cpu_has_arm_fma() << std::endl;
|
||||
ss << " F16C = " << ggml_cpu_has_f16c() << std::endl;
|
||||
ss << " FP16_VA = " << ggml_cpu_has_fp16_va() << std::endl;
|
||||
ss << " WASM_SIMD = " << ggml_cpu_has_wasm_simd() << std::endl;
|
||||
ss << " VSX = " << ggml_cpu_has_vsx() << std::endl;
|
||||
ss << " SSE3 = " << ggml_cpu_has_sse3() << " | ";
|
||||
ss << " AVX = " << ggml_cpu_has_avx() << " | ";
|
||||
ss << " AVX2 = " << ggml_cpu_has_avx2() << " | ";
|
||||
ss << " AVX512 = " << ggml_cpu_has_avx512() << " | ";
|
||||
ss << " AVX512_VBMI = " << ggml_cpu_has_avx512_vbmi() << " | ";
|
||||
ss << " AVX512_VNNI = " << ggml_cpu_has_avx512_vnni() << " | ";
|
||||
ss << " FMA = " << ggml_cpu_has_fma() << " | ";
|
||||
ss << " NEON = " << ggml_cpu_has_neon() << " | ";
|
||||
ss << " ARM_FMA = " << ggml_cpu_has_arm_fma() << " | ";
|
||||
ss << " F16C = " << ggml_cpu_has_f16c() << " | ";
|
||||
ss << " FP16_VA = " << ggml_cpu_has_fp16_va() << " | ";
|
||||
ss << " WASM_SIMD = " << ggml_cpu_has_wasm_simd() << " | ";
|
||||
ss << " VSX = " << ggml_cpu_has_vsx() << " | ";
|
||||
snprintf(buffer, sizeof(buffer), "%s", ss.str().c_str());
|
||||
return buffer;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,7 +30,12 @@ namespace ZImage {
|
|||
JointAttention(int64_t hidden_size, int64_t head_dim, int64_t num_heads, int64_t num_kv_heads, bool qk_norm)
|
||||
: head_dim(head_dim), num_heads(num_heads), num_kv_heads(num_kv_heads), qk_norm(qk_norm) {
|
||||
blocks["qkv"] = std::make_shared<Linear>(hidden_size, (num_heads + num_kv_heads * 2) * head_dim, false);
|
||||
blocks["out"] = std::make_shared<Linear>(num_heads * head_dim, hidden_size, false);
|
||||
float scale = 1.f;
|
||||
#if GGML_USE_HIP
|
||||
// Prevent NaN issues with certain ROCm setups
|
||||
scale = 1.f / 16.f;
|
||||
#endif
|
||||
blocks["out"] = std::make_shared<Linear>(num_heads * head_dim, hidden_size, false, false, false, scale);
|
||||
if (qk_norm) {
|
||||
blocks["q_norm"] = std::make_shared<RMSNorm>(head_dim);
|
||||
blocks["k_norm"] = std::make_shared<RMSNorm>(head_dim);
|
||||
|
|
@ -93,7 +98,7 @@ namespace ZImage {
|
|||
#endif
|
||||
// The purpose of the scale here is to prevent NaN issues in certain situations.
|
||||
// For example, when using CUDA but the weights are k-quants.
|
||||
blocks["w2"] = std::make_shared<Linear>(hidden_dim, dim, false, false, force_prec_f32, 1.f / 128.f);
|
||||
blocks["w2"] = std::make_shared<Linear>(hidden_dim, dim, false, false, force_prec_f32, scale);
|
||||
blocks["w3"] = std::make_shared<Linear>(dim, hidden_dim, false);
|
||||
}
|
||||
|
||||
|
|
@ -667,4 +672,4 @@ namespace ZImage {
|
|||
|
||||
} // namespace ZImage
|
||||
|
||||
#endif // __Z_IMAGE_HPP__
|
||||
#endif // __Z_IMAGE_HPP__
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue