mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-15 03:19:41 +00:00
* Update stable-diffusion.cpp to 5900ef6605c6 (new API) * Clean up pending LoRA code and simplify LoRA changes to upstream * Move VAE tiling disabling for TAESD to sdtype_adapter.cpp * Move auxiliary ctx functions to sdtype_adapter.cpp * Use ref_images parameter for Kontext images * Drop clip skip workaround (fixed upstream) * Workaround for flash attention with img2img leejet/stable-diffusion.cpp#756 * Workaround for Chroma with flash attention, debug prints * Disable forcing CLIP weights to F32 for reduced memory usage
187 lines
7 KiB
C++
187 lines
7 KiB
C++
#ifndef __DIFFUSION_MODEL_H__
|
|
#define __DIFFUSION_MODEL_H__
|
|
|
|
#include "flux.hpp"
|
|
#include "mmdit.hpp"
|
|
#include "unet.hpp"
|
|
|
|
struct DiffusionModel {
|
|
virtual void compute(int n_threads,
|
|
struct ggml_tensor* x,
|
|
struct ggml_tensor* timesteps,
|
|
struct ggml_tensor* context,
|
|
struct ggml_tensor* c_concat,
|
|
struct ggml_tensor* y,
|
|
struct ggml_tensor* guidance,
|
|
std::vector<ggml_tensor*> ref_latents = {},
|
|
int num_video_frames = -1,
|
|
std::vector<struct ggml_tensor*> controls = {},
|
|
float control_strength = 0.f,
|
|
struct ggml_tensor** output = NULL,
|
|
struct ggml_context* output_ctx = NULL,
|
|
std::vector<int> skip_layers = std::vector<int>()) = 0;
|
|
virtual void alloc_params_buffer() = 0;
|
|
virtual void free_params_buffer() = 0;
|
|
virtual void free_compute_buffer() = 0;
|
|
virtual void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) = 0;
|
|
virtual size_t get_params_buffer_size() = 0;
|
|
virtual int64_t get_adm_in_channels() = 0;
|
|
};
|
|
|
|
struct UNetModel : public DiffusionModel {
|
|
UNetModelRunner unet;
|
|
|
|
UNetModel(ggml_backend_t backend,
|
|
const String2GGMLType& tensor_types = {},
|
|
SDVersion version = VERSION_SD1,
|
|
bool flash_attn = false)
|
|
: unet(backend, tensor_types, "model.diffusion_model", version, flash_attn) {
|
|
}
|
|
|
|
void alloc_params_buffer() {
|
|
unet.alloc_params_buffer();
|
|
}
|
|
|
|
void free_params_buffer() {
|
|
unet.free_params_buffer();
|
|
}
|
|
|
|
void free_compute_buffer() {
|
|
unet.free_compute_buffer();
|
|
}
|
|
|
|
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) {
|
|
unet.get_param_tensors(tensors, "model.diffusion_model");
|
|
}
|
|
|
|
size_t get_params_buffer_size() {
|
|
return unet.get_params_buffer_size();
|
|
}
|
|
|
|
int64_t get_adm_in_channels() {
|
|
return unet.unet.adm_in_channels;
|
|
}
|
|
|
|
void compute(int n_threads,
|
|
struct ggml_tensor* x,
|
|
struct ggml_tensor* timesteps,
|
|
struct ggml_tensor* context,
|
|
struct ggml_tensor* c_concat,
|
|
struct ggml_tensor* y,
|
|
struct ggml_tensor* guidance,
|
|
std::vector<ggml_tensor*> ref_latents = {},
|
|
int num_video_frames = -1,
|
|
std::vector<struct ggml_tensor*> controls = {},
|
|
float control_strength = 0.f,
|
|
struct ggml_tensor** output = NULL,
|
|
struct ggml_context* output_ctx = NULL,
|
|
std::vector<int> skip_layers = std::vector<int>()) {
|
|
(void)skip_layers; // SLG doesn't work with UNet models
|
|
return unet.compute(n_threads, x, timesteps, context, c_concat, y, num_video_frames, controls, control_strength, output, output_ctx);
|
|
}
|
|
};
|
|
|
|
struct MMDiTModel : public DiffusionModel {
|
|
MMDiTRunner mmdit;
|
|
|
|
MMDiTModel(ggml_backend_t backend,
|
|
const String2GGMLType& tensor_types = {})
|
|
: mmdit(backend, tensor_types, "model.diffusion_model") {
|
|
}
|
|
|
|
void alloc_params_buffer() {
|
|
mmdit.alloc_params_buffer();
|
|
}
|
|
|
|
void free_params_buffer() {
|
|
mmdit.free_params_buffer();
|
|
}
|
|
|
|
void free_compute_buffer() {
|
|
mmdit.free_compute_buffer();
|
|
}
|
|
|
|
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) {
|
|
mmdit.get_param_tensors(tensors, "model.diffusion_model");
|
|
}
|
|
|
|
size_t get_params_buffer_size() {
|
|
return mmdit.get_params_buffer_size();
|
|
}
|
|
|
|
int64_t get_adm_in_channels() {
|
|
return 768 + 1280;
|
|
}
|
|
|
|
void compute(int n_threads,
|
|
struct ggml_tensor* x,
|
|
struct ggml_tensor* timesteps,
|
|
struct ggml_tensor* context,
|
|
struct ggml_tensor* c_concat,
|
|
struct ggml_tensor* y,
|
|
struct ggml_tensor* guidance,
|
|
std::vector<ggml_tensor*> ref_latents = {},
|
|
int num_video_frames = -1,
|
|
std::vector<struct ggml_tensor*> controls = {},
|
|
float control_strength = 0.f,
|
|
struct ggml_tensor** output = NULL,
|
|
struct ggml_context* output_ctx = NULL,
|
|
std::vector<int> skip_layers = std::vector<int>()) {
|
|
return mmdit.compute(n_threads, x, timesteps, context, y, output, output_ctx, skip_layers);
|
|
}
|
|
};
|
|
|
|
struct FluxModel : public DiffusionModel {
|
|
Flux::FluxRunner flux;
|
|
|
|
FluxModel(ggml_backend_t backend,
|
|
const String2GGMLType& tensor_types = {},
|
|
SDVersion version = VERSION_FLUX,
|
|
bool flash_attn = false,
|
|
bool use_mask = false)
|
|
: flux(backend, tensor_types, "model.diffusion_model", version, flash_attn, use_mask) {
|
|
}
|
|
|
|
void alloc_params_buffer() {
|
|
flux.alloc_params_buffer();
|
|
}
|
|
|
|
void free_params_buffer() {
|
|
flux.free_params_buffer();
|
|
}
|
|
|
|
void free_compute_buffer() {
|
|
flux.free_compute_buffer();
|
|
}
|
|
|
|
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) {
|
|
flux.get_param_tensors(tensors, "model.diffusion_model");
|
|
}
|
|
|
|
size_t get_params_buffer_size() {
|
|
return flux.get_params_buffer_size();
|
|
}
|
|
|
|
int64_t get_adm_in_channels() {
|
|
return 768;
|
|
}
|
|
|
|
void compute(int n_threads,
|
|
struct ggml_tensor* x,
|
|
struct ggml_tensor* timesteps,
|
|
struct ggml_tensor* context,
|
|
struct ggml_tensor* c_concat,
|
|
struct ggml_tensor* y,
|
|
struct ggml_tensor* guidance,
|
|
std::vector<ggml_tensor*> ref_latents = {},
|
|
int num_video_frames = -1,
|
|
std::vector<struct ggml_tensor*> controls = {},
|
|
float control_strength = 0.f,
|
|
struct ggml_tensor** output = NULL,
|
|
struct ggml_context* output_ctx = NULL,
|
|
std::vector<int> skip_layers = std::vector<int>()) {
|
|
return flux.compute(n_threads, x, timesteps, context, c_concat, y, guidance, ref_latents, output, output_ctx, skip_layers);
|
|
}
|
|
};
|
|
|
|
#endif
|