mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-28 03:30:20 +00:00
sd: sync to master-453-4ff2c8c (#1907)
This commit is contained in:
parent
77082dddfb
commit
0ef55844d3
9 changed files with 378 additions and 223 deletions
|
|
@ -100,17 +100,28 @@ static void print_utf8(FILE* stream, const char* utf8) {
|
|||
? GetStdHandle(STD_ERROR_HANDLE)
|
||||
: GetStdHandle(STD_OUTPUT_HANDLE);
|
||||
|
||||
int wlen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
|
||||
if (wlen <= 0)
|
||||
return;
|
||||
DWORD mode;
|
||||
BOOL is_console = GetConsoleMode(h, &mode);
|
||||
|
||||
wchar_t* wbuf = (wchar_t*)malloc(wlen * sizeof(wchar_t));
|
||||
MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wbuf, wlen);
|
||||
if (is_console) {
|
||||
int wlen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
|
||||
if (wlen <= 0)
|
||||
return;
|
||||
|
||||
DWORD written;
|
||||
WriteConsoleW(h, wbuf, wlen - 1, &written, NULL);
|
||||
wchar_t* wbuf = (wchar_t*)malloc(wlen * sizeof(wchar_t));
|
||||
if (!wbuf)
|
||||
return;
|
||||
|
||||
free(wbuf);
|
||||
MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wbuf, wlen);
|
||||
|
||||
DWORD written;
|
||||
WriteConsoleW(h, wbuf, wlen - 1, &written, NULL);
|
||||
|
||||
free(wbuf);
|
||||
} else {
|
||||
DWORD written;
|
||||
WriteFile(h, utf8, (DWORD)strlen(utf8), &written, NULL);
|
||||
}
|
||||
#else
|
||||
fputs(utf8, stream);
|
||||
#endif
|
||||
|
|
@ -447,6 +458,7 @@ struct SDContextParams {
|
|||
rng_type_t rng_type = CUDA_RNG;
|
||||
rng_type_t sampler_rng_type = RNG_TYPE_COUNT;
|
||||
bool offload_params_to_cpu = false;
|
||||
bool enable_mmap = false;
|
||||
bool control_net_cpu = false;
|
||||
bool clip_on_cpu = false;
|
||||
bool vae_on_cpu = false;
|
||||
|
|
@ -592,6 +604,10 @@ struct SDContextParams {
|
|||
"--offload-to-cpu",
|
||||
"place the weights in RAM to save VRAM, and automatically load them into VRAM when needed",
|
||||
true, &offload_params_to_cpu},
|
||||
{"",
|
||||
"--mmap",
|
||||
"whether to memory-map model",
|
||||
true, &enable_mmap},
|
||||
{"",
|
||||
"--control-net-cpu",
|
||||
"keep controlnet in cpu (for low vram)",
|
||||
|
|
@ -889,6 +905,7 @@ struct SDContextParams {
|
|||
<< " sampler_rng_type: " << sd_rng_type_name(sampler_rng_type) << ",\n"
|
||||
<< " flow_shift: " << (std::isinf(flow_shift) ? "INF" : std::to_string(flow_shift)) << "\n"
|
||||
<< " offload_params_to_cpu: " << (offload_params_to_cpu ? "true" : "false") << ",\n"
|
||||
<< " enable_mmap: " << (enable_mmap ? "true" : "false") << ",\n"
|
||||
<< " control_net_cpu: " << (control_net_cpu ? "true" : "false") << ",\n"
|
||||
<< " clip_on_cpu: " << (clip_on_cpu ? "true" : "false") << ",\n"
|
||||
<< " vae_on_cpu: " << (vae_on_cpu ? "true" : "false") << ",\n"
|
||||
|
|
@ -952,6 +969,7 @@ struct SDContextParams {
|
|||
prediction,
|
||||
lora_apply_mode,
|
||||
offload_params_to_cpu,
|
||||
enable_mmap,
|
||||
clip_on_cpu,
|
||||
control_net_cpu,
|
||||
vae_on_cpu,
|
||||
|
|
@ -1499,7 +1517,7 @@ struct SDGenerationParams {
|
|||
on_cache_mode_arg},
|
||||
{"",
|
||||
"--cache-option",
|
||||
"named cache params (key=value format, comma-separated):\n - easycache/ucache: threshold=,start=,end=,decay=,relative=,reset=\n - dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=\n Examples: \"threshold=0.25\" or \"threshold=1.5,reset=0\"",
|
||||
"named cache params (key=value format, comma-separated). easycache/ucache: threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=. Examples: \"threshold=0.25\" or \"threshold=1.5,reset=0\"",
|
||||
on_cache_option_arg},
|
||||
{"",
|
||||
"--cache-preset",
|
||||
|
|
|
|||
|
|
@ -869,7 +869,7 @@ static bool sample_k_diffusion(sample_method_t method,
|
|||
|
||||
for (int i = 0; i < steps; i++) {
|
||||
// denoise
|
||||
ggml_tensor* denoised = model(x, sigmas[i], i + 1);
|
||||
ggml_tensor* denoised = model(x, sigmas[i], -(i + 1));
|
||||
if (denoised == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -927,7 +927,7 @@ static bool sample_k_diffusion(sample_method_t method,
|
|||
|
||||
for (int i = 0; i < steps; i++) {
|
||||
// denoise
|
||||
ggml_tensor* denoised = model(x, sigmas[i], i + 1);
|
||||
ggml_tensor* denoised = model(x, sigmas[i], -(i + 1));
|
||||
if (denoised == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -370,6 +370,95 @@ std::string format_frame_idx(std::string pattern, int frame_idx) {
|
|||
return result;
|
||||
}
|
||||
|
||||
bool save_results(const SDCliParams& cli_params,
|
||||
const SDContextParams& ctx_params,
|
||||
const SDGenerationParams& gen_params,
|
||||
sd_image_t* results,
|
||||
int num_results) {
|
||||
if (results == nullptr || num_results <= 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
fs::path out_path = cli_params.output_path;
|
||||
|
||||
if (!out_path.parent_path().empty()) {
|
||||
std::error_code ec;
|
||||
fs::create_directories(out_path.parent_path(), ec);
|
||||
if (ec) {
|
||||
LOG_ERROR("failed to create directory '%s': %s",
|
||||
out_path.parent_path().string().c_str(), ec.message().c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
fs::path base_path = out_path;
|
||||
fs::path ext = out_path.has_extension() ? out_path.extension() : fs::path{};
|
||||
if (!ext.empty())
|
||||
base_path.replace_extension();
|
||||
|
||||
std::string ext_lower = ext.string();
|
||||
std::transform(ext_lower.begin(), ext_lower.end(), ext_lower.begin(), ::tolower);
|
||||
bool is_jpg = (ext_lower == ".jpg" || ext_lower == ".jpeg" || ext_lower == ".jpe");
|
||||
|
||||
int output_begin_idx = cli_params.output_begin_idx;
|
||||
if (output_begin_idx < 0) {
|
||||
output_begin_idx = 0;
|
||||
}
|
||||
|
||||
auto write_image = [&](const fs::path& path, int idx) {
|
||||
const sd_image_t& img = results[idx];
|
||||
if (!img.data)
|
||||
return;
|
||||
|
||||
std::string params = get_image_params(cli_params, ctx_params, gen_params, gen_params.seed + idx);
|
||||
int ok = 0;
|
||||
if (is_jpg) {
|
||||
ok = stbi_write_jpg(path.string().c_str(), img.width, img.height, img.channel, img.data, 90, params.c_str());
|
||||
} else {
|
||||
ok = stbi_write_png(path.string().c_str(), img.width, img.height, img.channel, img.data, 0, params.c_str());
|
||||
}
|
||||
LOG_INFO("save result image %d to '%s' (%s)", idx, path.string().c_str(), ok ? "success" : "failure");
|
||||
};
|
||||
|
||||
if (std::regex_search(cli_params.output_path, format_specifier_regex)) {
|
||||
if (!is_jpg && ext_lower != ".png")
|
||||
ext = ".png";
|
||||
fs::path pattern = base_path;
|
||||
pattern += ext;
|
||||
|
||||
for (int i = 0; i < num_results; ++i) {
|
||||
fs::path img_path = format_frame_idx(pattern.string(), output_begin_idx + i);
|
||||
write_image(img_path, i);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
if (cli_params.mode == VID_GEN && num_results > 1) {
|
||||
if (ext_lower != ".avi")
|
||||
ext = ".avi";
|
||||
fs::path video_path = base_path;
|
||||
video_path += ext;
|
||||
create_mjpg_avi_from_sd_images(video_path.string().c_str(), results, num_results, gen_params.fps);
|
||||
LOG_INFO("save result MJPG AVI video to '%s'", video_path.string().c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!is_jpg && ext_lower != ".png")
|
||||
ext = ".png";
|
||||
|
||||
for (int i = 0; i < num_results; ++i) {
|
||||
fs::path img_path = base_path;
|
||||
if (num_results > 1) {
|
||||
img_path += "_" + std::to_string(output_begin_idx + i);
|
||||
}
|
||||
img_path += ext;
|
||||
write_image(img_path, i);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(int argc, const char* argv[]) {
|
||||
if (argc > 1 && std::string(argv[1]) == "--version") {
|
||||
std::cout << version_string() << "\n";
|
||||
|
|
@ -713,101 +802,8 @@ int main(int argc, const char* argv[]) {
|
|||
}
|
||||
}
|
||||
|
||||
// create directory if not exists
|
||||
{
|
||||
const fs::path out_path = cli_params.output_path;
|
||||
if (const fs::path out_dir = out_path.parent_path(); !out_dir.empty()) {
|
||||
std::error_code ec;
|
||||
fs::create_directories(out_dir, ec); // OK if already exists
|
||||
if (ec) {
|
||||
LOG_ERROR("failed to create directory '%s': %s",
|
||||
out_dir.string().c_str(), ec.message().c_str());
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string base_path;
|
||||
std::string file_ext;
|
||||
std::string file_ext_lower;
|
||||
bool is_jpg;
|
||||
size_t last_dot_pos = cli_params.output_path.find_last_of(".");
|
||||
size_t last_slash_pos = std::min(cli_params.output_path.find_last_of("/"),
|
||||
cli_params.output_path.find_last_of("\\"));
|
||||
if (last_dot_pos != std::string::npos && (last_slash_pos == std::string::npos || last_dot_pos > last_slash_pos)) { // filename has extension
|
||||
base_path = cli_params.output_path.substr(0, last_dot_pos);
|
||||
file_ext = file_ext_lower = cli_params.output_path.substr(last_dot_pos);
|
||||
std::transform(file_ext.begin(), file_ext.end(), file_ext_lower.begin(), ::tolower);
|
||||
is_jpg = (file_ext_lower == ".jpg" || file_ext_lower == ".jpeg" || file_ext_lower == ".jpe");
|
||||
} else {
|
||||
base_path = cli_params.output_path;
|
||||
file_ext = file_ext_lower = "";
|
||||
is_jpg = false;
|
||||
}
|
||||
|
||||
if (std::regex_search(cli_params.output_path, format_specifier_regex)) {
|
||||
std::string final_output_path = cli_params.output_path;
|
||||
if (cli_params.output_begin_idx == -1) {
|
||||
cli_params.output_begin_idx = 0;
|
||||
}
|
||||
// writing image sequence, default to PNG
|
||||
if (!is_jpg && file_ext_lower != ".png") {
|
||||
base_path += file_ext;
|
||||
file_ext = ".png";
|
||||
}
|
||||
final_output_path = base_path + file_ext;
|
||||
for (int i = 0; i < num_results; i++) {
|
||||
if (results[i].data == nullptr) {
|
||||
continue;
|
||||
}
|
||||
std::string final_image_path = format_frame_idx(final_output_path, cli_params.output_begin_idx + i);
|
||||
if (is_jpg) {
|
||||
int write_ok = stbi_write_jpg(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
|
||||
results[i].data, 90, get_image_params(cli_params, ctx_params, gen_params, gen_params.seed + i).c_str());
|
||||
LOG_INFO("save result JPEG image %d to '%s' (%s)", i, final_image_path.c_str(), write_ok == 0 ? "failure" : "success");
|
||||
} else {
|
||||
int write_ok = stbi_write_png(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
|
||||
results[i].data, 0, get_image_params(cli_params, ctx_params, gen_params, gen_params.seed + i).c_str());
|
||||
LOG_INFO("save result PNG image %d to '%s' (%s)", i, final_image_path.c_str(), write_ok == 0 ? "failure" : "success");
|
||||
}
|
||||
}
|
||||
} else if (cli_params.mode == VID_GEN && num_results > 1) {
|
||||
std::string final_output_path = cli_params.output_path;
|
||||
if (file_ext_lower != ".avi") {
|
||||
if (!is_jpg && file_ext_lower != ".png") {
|
||||
base_path += file_ext;
|
||||
}
|
||||
file_ext = ".avi";
|
||||
final_output_path = base_path + file_ext;
|
||||
}
|
||||
create_mjpg_avi_from_sd_images(final_output_path.c_str(), results, num_results, gen_params.fps);
|
||||
LOG_INFO("save result MJPG AVI video to '%s'\n", final_output_path.c_str());
|
||||
} else {
|
||||
// appending ".png" to absent or unknown extension
|
||||
if (!is_jpg && file_ext_lower != ".png") {
|
||||
base_path += file_ext;
|
||||
file_ext = ".png";
|
||||
}
|
||||
if (cli_params.output_begin_idx == -1) {
|
||||
cli_params.output_begin_idx = 1;
|
||||
}
|
||||
for (int i = 0; i < num_results; i++) {
|
||||
if (results[i].data == nullptr) {
|
||||
continue;
|
||||
}
|
||||
int write_ok;
|
||||
std::string final_image_path;
|
||||
final_image_path = i > 0 ? base_path + "_" + std::to_string(cli_params.output_begin_idx + i) + file_ext : base_path + file_ext;
|
||||
if (is_jpg) {
|
||||
write_ok = stbi_write_jpg(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
|
||||
results[i].data, 90, get_image_params(cli_params, ctx_params, gen_params, gen_params.seed + i).c_str());
|
||||
LOG_INFO("save result JPEG image to '%s' (%s)", final_image_path.c_str(), write_ok == 0 ? "failure" : "success");
|
||||
} else {
|
||||
write_ok = stbi_write_png(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
|
||||
results[i].data, 0, get_image_params(cli_params, ctx_params, gen_params, gen_params.seed + i).c_str());
|
||||
LOG_INFO("save result PNG image to '%s' (%s)", final_image_path.c_str(), write_ok == 0 ? "failure" : "success");
|
||||
}
|
||||
}
|
||||
if (!save_results(cli_params, ctx_params, gen_params, results, num_results)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_results; i++) {
|
||||
|
|
|
|||
|
|
@ -1393,7 +1393,7 @@ std::string ModelLoader::load_umt5_tokenizer_json() {
|
|||
#endif
|
||||
}
|
||||
|
||||
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads_p) {
|
||||
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads_p, bool enable_mmap) {
|
||||
int64_t process_time_ms = 0;
|
||||
std::atomic<int64_t> read_time_ms(0);
|
||||
std::atomic<int64_t> memcpy_time_ms(0);
|
||||
|
|
@ -1443,6 +1443,15 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
|
|||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<MmapWrapper> mmapped;
|
||||
if (enable_mmap && !is_zip) {
|
||||
LOG_DEBUG("using mmap for I/O");
|
||||
mmapped = MmapWrapper::create(file_path);
|
||||
if (!mmapped) {
|
||||
LOG_WARN("failed to memory-map '%s'", file_path.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
int n_threads = is_zip ? 1 : std::min(num_threads_to_use, (int)file_tensors.size());
|
||||
if (n_threads < 1) {
|
||||
n_threads = 1;
|
||||
|
|
@ -1464,8 +1473,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
|
|||
failed = true;
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// kcpp
|
||||
} else if (!mmapped) {
|
||||
file.open(sd_get_u8path(file_path), std::ios::binary);
|
||||
if (!file.is_open()) {
|
||||
LOG_ERROR("failed to open '%s'", file_path.c_str());
|
||||
|
|
@ -1518,6 +1526,11 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
|
|||
zip_entry_noallocread(zip, (void*)buf, n);
|
||||
}
|
||||
zip_entry_close(zip);
|
||||
} else if (mmapped) {
|
||||
if (!mmapped->copy_data(buf, n, tensor_storage.offset)) {
|
||||
LOG_ERROR("read tensor data failed: '%s'", file_path.c_str());
|
||||
failed = true;
|
||||
}
|
||||
} else {
|
||||
file.seekg(tensor_storage.offset);
|
||||
file.read(buf, n);
|
||||
|
|
@ -1642,7 +1655,8 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
|
|||
|
||||
bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tensors,
|
||||
std::set<std::string> ignore_tensors,
|
||||
int n_threads) {
|
||||
int n_threads,
|
||||
bool enable_mmap) {
|
||||
std::set<std::string> tensor_names_in_file;
|
||||
std::mutex tensor_names_mutex;
|
||||
auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
|
||||
|
|
@ -1685,7 +1699,7 @@ bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tenso
|
|||
return true;
|
||||
};
|
||||
|
||||
bool success = load_tensors(on_new_tensor_cb, n_threads);
|
||||
bool success = load_tensors(on_new_tensor_cb, n_threads, enable_mmap);
|
||||
if (!success) {
|
||||
LOG_ERROR("load tensors from file failed");
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -311,10 +311,11 @@ public:
|
|||
std::map<ggml_type, uint32_t> get_vae_wtype_stat();
|
||||
String2TensorStorage& get_tensor_storage_map() { return tensor_storage_map; }
|
||||
void set_wtype_override(ggml_type wtype, std::string tensor_type_rules = "");
|
||||
bool load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads = 0);
|
||||
bool load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads = 0, bool use_mmap = false);
|
||||
bool load_tensors(std::map<std::string, struct ggml_tensor*>& tensors,
|
||||
std::set<std::string> ignore_tensors = {},
|
||||
int n_threads = 0);
|
||||
int n_threads = 0,
|
||||
bool use_mmap = false);
|
||||
|
||||
std::vector<std::string> get_tensor_names() const {
|
||||
std::vector<std::string> names;
|
||||
|
|
|
|||
|
|
@ -131,7 +131,7 @@ public:
|
|||
bool use_tiny_autoencoder = false;
|
||||
sd_tiling_params_t vae_tiling_params = {false, 0, 0, 0.5f, 0, 0};
|
||||
bool offload_params_to_cpu = false;
|
||||
bool stacked_id = false;
|
||||
bool use_pmid = false;
|
||||
|
||||
bool is_using_v_parameterization = false;
|
||||
bool is_using_edm_v_parameterization = false;
|
||||
|
|
@ -839,11 +839,11 @@ public:
|
|||
if (!model_loader.init_from_file_and_convert_name(sd_ctx_params->photo_maker_path, "pmid.")) {
|
||||
LOG_WARN("loading stacked ID embedding from '%s' failed", sd_ctx_params->photo_maker_path);
|
||||
} else {
|
||||
stacked_id = true;
|
||||
use_pmid = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (stacked_id) {
|
||||
if (use_pmid) {
|
||||
if (!pmid_model->alloc_params_buffer()) {
|
||||
LOG_ERROR(" pmid model params buffer allocation failed");
|
||||
return false;
|
||||
|
|
@ -884,7 +884,7 @@ public:
|
|||
if (use_tiny_autoencoder) {
|
||||
ignore_tensors.insert("first_stage_model.");
|
||||
}
|
||||
if (stacked_id) {
|
||||
if (use_pmid) {
|
||||
ignore_tensors.insert("pmid.unet.");
|
||||
}
|
||||
ignore_tensors.insert("model.diffusion_model.__x0__");
|
||||
|
|
@ -905,7 +905,7 @@ public:
|
|||
if (version == VERSION_SVD) {
|
||||
ignore_tensors.insert("conditioner.embedders.3");
|
||||
}
|
||||
bool success = model_loader.load_tensors(tensors, ignore_tensors, n_threads);
|
||||
bool success = model_loader.load_tensors(tensors, ignore_tensors, n_threads, sd_ctx_params->enable_mmap);
|
||||
if (!success) {
|
||||
LOG_ERROR("load tensors from model loader failed");
|
||||
ggml_free(ctx);
|
||||
|
|
@ -938,7 +938,7 @@ public:
|
|||
control_net_params_mem_size = control_net->get_params_buffer_size();
|
||||
}
|
||||
size_t pmid_params_mem_size = 0;
|
||||
if (stacked_id) {
|
||||
if (use_pmid) {
|
||||
pmid_params_mem_size = pmid_model->get_params_buffer_size();
|
||||
}
|
||||
|
||||
|
|
@ -1350,14 +1350,89 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
ggml_tensor* id_encoder(ggml_context* work_ctx,
|
||||
ggml_tensor* init_img,
|
||||
ggml_tensor* prompts_embeds,
|
||||
ggml_tensor* id_embeds,
|
||||
std::vector<bool>& class_tokens_mask) {
|
||||
ggml_tensor* res = nullptr;
|
||||
pmid_model->compute(n_threads, init_img, prompts_embeds, id_embeds, class_tokens_mask, &res, work_ctx);
|
||||
return res;
|
||||
SDCondition get_pmid_conditon(ggml_context* work_ctx,
|
||||
sd_pm_params_t pm_params,
|
||||
ConditionerParams& condition_params) {
|
||||
SDCondition id_cond;
|
||||
if (use_pmid) {
|
||||
if (!pmid_lora->applied) {
|
||||
int64_t t0 = ggml_time_ms();
|
||||
pmid_lora->apply(tensors, version, n_threads);
|
||||
int64_t t1 = ggml_time_ms();
|
||||
pmid_lora->applied = true;
|
||||
LOG_INFO("pmid_lora apply completed, taking %.2fs", (t1 - t0) * 1.0f / 1000);
|
||||
if (free_params_immediately) {
|
||||
pmid_lora->free_params_buffer();
|
||||
}
|
||||
}
|
||||
// preprocess input id images
|
||||
bool pmv2 = pmid_model->get_version() == PM_VERSION_2;
|
||||
if (pm_params.id_images_count > 0) {
|
||||
int clip_image_size = 224;
|
||||
pmid_model->style_strength = pm_params.style_strength;
|
||||
|
||||
auto id_image_tensor = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, clip_image_size, clip_image_size, 3, pm_params.id_images_count);
|
||||
|
||||
std::vector<sd_image_f32_t> processed_id_images;
|
||||
for (int i = 0; i < pm_params.id_images_count; i++) {
|
||||
sd_image_f32_t id_image = sd_image_t_to_sd_image_f32_t(pm_params.id_images[i]);
|
||||
sd_image_f32_t processed_id_image = clip_preprocess(id_image, clip_image_size, clip_image_size);
|
||||
free(id_image.data);
|
||||
id_image.data = nullptr;
|
||||
processed_id_images.push_back(processed_id_image);
|
||||
}
|
||||
|
||||
ggml_ext_tensor_iter(id_image_tensor, [&](ggml_tensor* id_image_tensor, int64_t i0, int64_t i1, int64_t i2, int64_t i3) {
|
||||
float value = sd_image_get_f32(processed_id_images[i3], i0, i1, i2, false);
|
||||
ggml_ext_tensor_set_f32(id_image_tensor, value, i0, i1, i2, i3);
|
||||
});
|
||||
|
||||
for (auto& image : processed_id_images) {
|
||||
free(image.data);
|
||||
image.data = nullptr;
|
||||
}
|
||||
processed_id_images.clear();
|
||||
|
||||
int64_t t0 = ggml_time_ms();
|
||||
condition_params.num_input_imgs = pm_params.id_images_count;
|
||||
auto cond_tup = cond_stage_model->get_learned_condition_with_trigger(work_ctx,
|
||||
n_threads,
|
||||
condition_params);
|
||||
id_cond = std::get<0>(cond_tup);
|
||||
auto class_tokens_mask = std::get<1>(cond_tup);
|
||||
struct ggml_tensor* id_embeds = nullptr;
|
||||
if (pmv2 && pm_params.id_embed_path != nullptr) {
|
||||
id_embeds = load_tensor_from_file(work_ctx, pm_params.id_embed_path);
|
||||
}
|
||||
if (pmv2 && id_embeds == nullptr) {
|
||||
LOG_WARN("Provided PhotoMaker images, but NO valid ID embeds file for PM v2");
|
||||
LOG_WARN("Turn off PhotoMaker");
|
||||
use_pmid = false;
|
||||
} else {
|
||||
if (pmv2 && pm_params.id_images_count != id_embeds->ne[1]) {
|
||||
LOG_WARN("PhotoMaker image count (%d) does NOT match ID embeds (%d). You should run face_detect.py again.", pm_params.id_images_count, id_embeds->ne[1]);
|
||||
LOG_WARN("Turn off PhotoMaker");
|
||||
use_pmid = false;
|
||||
} else {
|
||||
ggml_tensor* res = nullptr;
|
||||
pmid_model->compute(n_threads, id_image_tensor, id_cond.c_crossattn, id_embeds, class_tokens_mask, &res, work_ctx);
|
||||
id_cond.c_crossattn = res;
|
||||
int64_t t1 = ggml_time_ms();
|
||||
LOG_INFO("Photomaker ID Stacking, taking %" PRId64 " ms", t1 - t0);
|
||||
if (free_params_immediately) {
|
||||
pmid_model->free_params_buffer();
|
||||
}
|
||||
// Encode input prompt without the trigger word for delayed conditioning
|
||||
condition_params.text = cond_stage_model->remove_trigger_from_prompt(work_ctx, condition_params.text);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
LOG_WARN("Provided PhotoMaker model file, but NO input ID images");
|
||||
LOG_WARN("Turn off PhotoMaker");
|
||||
use_pmid = false;
|
||||
}
|
||||
}
|
||||
return id_cond;
|
||||
}
|
||||
|
||||
ggml_tensor* get_clip_vision_output(ggml_context* work_ctx,
|
||||
|
|
@ -2959,6 +3034,7 @@ void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
|
|||
sd_ctx_params->prediction = PREDICTION_COUNT;
|
||||
sd_ctx_params->lora_apply_mode = LORA_APPLY_AUTO;
|
||||
sd_ctx_params->offload_params_to_cpu = false;
|
||||
sd_ctx_params->enable_mmap = false;
|
||||
sd_ctx_params->keep_clip_on_cpu = false;
|
||||
sd_ctx_params->keep_control_net_on_cpu = false;
|
||||
sd_ctx_params->keep_vae_on_cpu = false;
|
||||
|
|
@ -3276,114 +3352,22 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
|||
guidance.img_cfg = guidance.txt_cfg;
|
||||
}
|
||||
|
||||
// for (auto v : sigmas) {
|
||||
// std::cout << v << " ";
|
||||
// }
|
||||
// std::cout << std::endl;
|
||||
|
||||
int sample_steps = sigmas.size() - 1;
|
||||
|
||||
int64_t t0 = ggml_time_ms();
|
||||
|
||||
// Photo Maker
|
||||
std::string prompt_text_only;
|
||||
ggml_tensor* init_img = nullptr;
|
||||
SDCondition id_cond;
|
||||
std::vector<bool> class_tokens_mask;
|
||||
|
||||
ConditionerParams condition_params;
|
||||
condition_params.text = prompt;
|
||||
condition_params.clip_skip = clip_skip;
|
||||
condition_params.width = width;
|
||||
condition_params.height = height;
|
||||
condition_params.ref_images = ref_images;
|
||||
condition_params.adm_in_channels = sd_ctx->sd->diffusion_model->get_adm_in_channels();
|
||||
|
||||
if (sd_ctx->sd->stacked_id) {
|
||||
if (!sd_ctx->sd->pmid_lora->applied) {
|
||||
int64_t t0 = ggml_time_ms();
|
||||
sd_ctx->sd->pmid_lora->apply(sd_ctx->sd->tensors, sd_ctx->sd->version, sd_ctx->sd->n_threads);
|
||||
int64_t t1 = ggml_time_ms();
|
||||
sd_ctx->sd->pmid_lora->applied = true;
|
||||
LOG_INFO("pmid_lora apply completed, taking %.2fs", (t1 - t0) * 1.0f / 1000);
|
||||
if (sd_ctx->sd->free_params_immediately) {
|
||||
sd_ctx->sd->pmid_lora->free_params_buffer();
|
||||
}
|
||||
}
|
||||
// preprocess input id images
|
||||
bool pmv2 = sd_ctx->sd->pmid_model->get_version() == PM_VERSION_2;
|
||||
if (pm_params.id_images_count > 0) {
|
||||
int clip_image_size = 224;
|
||||
sd_ctx->sd->pmid_model->style_strength = pm_params.style_strength;
|
||||
|
||||
init_img = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, clip_image_size, clip_image_size, 3, pm_params.id_images_count);
|
||||
|
||||
std::vector<sd_image_f32_t> processed_id_images;
|
||||
for (int i = 0; i < pm_params.id_images_count; i++) {
|
||||
sd_image_f32_t id_image = sd_image_t_to_sd_image_f32_t(pm_params.id_images[i]);
|
||||
sd_image_f32_t processed_id_image = clip_preprocess(id_image, clip_image_size, clip_image_size);
|
||||
free(id_image.data);
|
||||
id_image.data = nullptr;
|
||||
processed_id_images.push_back(processed_id_image);
|
||||
}
|
||||
|
||||
ggml_ext_tensor_iter(init_img, [&](ggml_tensor* init_img, int64_t i0, int64_t i1, int64_t i2, int64_t i3) {
|
||||
float value = sd_image_get_f32(processed_id_images[i3], i0, i1, i2, false);
|
||||
ggml_ext_tensor_set_f32(init_img, value, i0, i1, i2, i3);
|
||||
});
|
||||
|
||||
for (auto& image : processed_id_images) {
|
||||
free(image.data);
|
||||
image.data = nullptr;
|
||||
}
|
||||
processed_id_images.clear();
|
||||
|
||||
int64_t t0 = ggml_time_ms();
|
||||
condition_params.text = prompt;
|
||||
condition_params.num_input_imgs = pm_params.id_images_count;
|
||||
auto cond_tup = sd_ctx->sd->cond_stage_model->get_learned_condition_with_trigger(work_ctx,
|
||||
sd_ctx->sd->n_threads,
|
||||
condition_params);
|
||||
id_cond = std::get<0>(cond_tup);
|
||||
class_tokens_mask = std::get<1>(cond_tup); //
|
||||
struct ggml_tensor* id_embeds = nullptr;
|
||||
if (pmv2 && pm_params.id_embed_path != nullptr) {
|
||||
id_embeds = load_tensor_from_file(work_ctx, pm_params.id_embed_path);
|
||||
// print_ggml_tensor(id_embeds, true, "id_embeds:");
|
||||
}
|
||||
if (pmv2 && id_embeds == nullptr) {
|
||||
LOG_WARN("Provided PhotoMaker images, but NO valid ID embeds file for PM v2");
|
||||
LOG_WARN("Turn off PhotoMaker");
|
||||
sd_ctx->sd->stacked_id = false;
|
||||
} else {
|
||||
if (pmv2 && pm_params.id_images_count != id_embeds->ne[1]) {
|
||||
LOG_WARN("PhotoMaker image count (%d) does NOT match ID embeds (%d). You should run face_detect.py again.", pm_params.id_images_count, id_embeds->ne[1]);
|
||||
LOG_WARN("Turn off PhotoMaker");
|
||||
sd_ctx->sd->stacked_id = false;
|
||||
} else {
|
||||
id_cond.c_crossattn = sd_ctx->sd->id_encoder(work_ctx, init_img, id_cond.c_crossattn, id_embeds, class_tokens_mask);
|
||||
int64_t t1 = ggml_time_ms();
|
||||
LOG_INFO("Photomaker ID Stacking, taking %" PRId64 " ms", t1 - t0);
|
||||
if (sd_ctx->sd->free_params_immediately) {
|
||||
sd_ctx->sd->pmid_model->free_params_buffer();
|
||||
}
|
||||
// Encode input prompt without the trigger word for delayed conditioning
|
||||
prompt_text_only = sd_ctx->sd->cond_stage_model->remove_trigger_from_prompt(work_ctx, prompt);
|
||||
// printf("%s || %s \n", prompt.c_str(), prompt_text_only.c_str());
|
||||
prompt = prompt_text_only; //
|
||||
if (sample_steps < 50) {
|
||||
LOG_WARN("It's recommended to use >= 50 steps for photo maker!");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
LOG_WARN("Provided PhotoMaker model file, but NO input ID images");
|
||||
LOG_WARN("Turn off PhotoMaker");
|
||||
sd_ctx->sd->stacked_id = false;
|
||||
}
|
||||
}
|
||||
// Photo Maker
|
||||
SDCondition id_cond = sd_ctx->sd->get_pmid_conditon(work_ctx, pm_params, condition_params);
|
||||
|
||||
// Get learned condition
|
||||
condition_params.text = prompt;
|
||||
condition_params.zero_out_masked = false;
|
||||
SDCondition cond = sd_ctx->sd->cond_stage_model->get_learned_condition(work_ctx,
|
||||
sd_ctx->sd->n_threads,
|
||||
|
|
@ -3523,7 +3507,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
|||
ggml_ext_im_set_randn_f32(noise, sd_ctx->sd->rng);
|
||||
|
||||
int start_merge_step = -1;
|
||||
if (sd_ctx->sd->stacked_id) {
|
||||
if (sd_ctx->sd->use_pmid) {
|
||||
start_merge_step = int(sd_ctx->sd->pmid_model->style_strength / 100.f * sample_steps);
|
||||
// if (start_merge_step > 30)
|
||||
// start_merge_step = 30;
|
||||
|
|
|
|||
|
|
@ -182,6 +182,7 @@ typedef struct {
|
|||
enum prediction_t prediction;
|
||||
enum lora_apply_mode_t lora_apply_mode;
|
||||
bool offload_params_to_cpu;
|
||||
bool enable_mmap;
|
||||
bool keep_clip_on_cpu;
|
||||
bool keep_control_net_on_cpu;
|
||||
bool keep_vae_on_cpu;
|
||||
|
|
|
|||
|
|
@ -104,9 +104,71 @@ std::string sd_get_u8path(const std::string& file_path)
|
|||
return std::filesystem::u8path(file_path).string();
|
||||
}
|
||||
|
||||
class MmapWrapperImpl : public MmapWrapper {
|
||||
public:
|
||||
MmapWrapperImpl(void* data, size_t size, HANDLE hfile, HANDLE hmapping)
|
||||
: MmapWrapper(data, size), hfile_(hfile), hmapping_(hmapping) {}
|
||||
|
||||
~MmapWrapperImpl() override {
|
||||
UnmapViewOfFile(data_);
|
||||
CloseHandle(hmapping_);
|
||||
CloseHandle(hfile_);
|
||||
}
|
||||
|
||||
private:
|
||||
HANDLE hfile_;
|
||||
HANDLE hmapping_;
|
||||
};
|
||||
|
||||
std::unique_ptr<MmapWrapper> MmapWrapper::create(const std::string& filename) {
|
||||
void* mapped_data = nullptr;
|
||||
size_t file_size = 0;
|
||||
|
||||
HANDLE file_handle = CreateFileA(
|
||||
filename.c_str(),
|
||||
GENERIC_READ,
|
||||
FILE_SHARE_READ,
|
||||
NULL,
|
||||
OPEN_EXISTING,
|
||||
FILE_ATTRIBUTE_NORMAL,
|
||||
NULL);
|
||||
|
||||
if (file_handle == INVALID_HANDLE_VALUE) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
LARGE_INTEGER size;
|
||||
if (!GetFileSizeEx(file_handle, &size)) {
|
||||
CloseHandle(file_handle);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
file_size = static_cast<size_t>(size.QuadPart);
|
||||
|
||||
HANDLE mapping_handle = CreateFileMapping(file_handle, NULL, PAGE_READONLY, 0, 0, NULL);
|
||||
|
||||
if (mapping_handle == NULL) {
|
||||
CloseHandle(file_handle);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
mapped_data = MapViewOfFile(mapping_handle, FILE_MAP_READ, 0, 0, file_size);
|
||||
|
||||
if (mapped_data == NULL) {
|
||||
CloseHandle(mapping_handle);
|
||||
CloseHandle(file_handle);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return std::make_unique<MmapWrapperImpl>(mapped_data, file_size, file_handle, mapping_handle);
|
||||
}
|
||||
|
||||
#else // Unix
|
||||
#include <dirent.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
bool file_exists(const std::string& filename) {
|
||||
struct stat buffer;
|
||||
|
|
@ -123,8 +185,64 @@ std::string sd_get_u8path(const std::string& file_path)
|
|||
return std::filesystem::path(file_path).string();
|
||||
}
|
||||
|
||||
class MmapWrapperImpl : public MmapWrapper {
|
||||
public:
|
||||
MmapWrapperImpl(void* data, size_t size)
|
||||
: MmapWrapper(data, size) {}
|
||||
|
||||
~MmapWrapperImpl() override {
|
||||
munmap(data_, size_);
|
||||
}
|
||||
};
|
||||
|
||||
std::unique_ptr<MmapWrapper> MmapWrapper::create(const std::string& filename) {
|
||||
int file_descriptor = open(filename.c_str(), O_RDONLY);
|
||||
if (file_descriptor == -1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int mmap_flags = MAP_PRIVATE;
|
||||
|
||||
#ifdef __linux__
|
||||
// performance flags used by llama.cpp
|
||||
// posix_fadvise(file_descriptor, 0, 0, POSIX_FADV_SEQUENTIAL);
|
||||
// mmap_flags |= MAP_POPULATE;
|
||||
#endif
|
||||
|
||||
struct stat sb;
|
||||
if (fstat(file_descriptor, &sb) == -1) {
|
||||
close(file_descriptor);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
size_t file_size = sb.st_size;
|
||||
|
||||
void* mapped_data = mmap(NULL, file_size, PROT_READ, mmap_flags, file_descriptor, 0);
|
||||
|
||||
close(file_descriptor);
|
||||
|
||||
if (mapped_data == MAP_FAILED) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
// performance flags used by llama.cpp
|
||||
// posix_madvise(mapped_data, file_size, POSIX_MADV_WILLNEED);
|
||||
#endif
|
||||
|
||||
return std::make_unique<MmapWrapperImpl>(mapped_data, file_size);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
bool MmapWrapper::copy_data(void* buf, size_t n, size_t offset) const {
|
||||
if (offset >= size_ || n > (size_ - offset)) {
|
||||
return false;
|
||||
}
|
||||
std::memcpy(buf, data() + offset, n);
|
||||
return true;
|
||||
}
|
||||
|
||||
// get_num_physical_cores is copy from
|
||||
// https://github.com/ggerganov/llama.cpp/blob/master/examples/common.cpp
|
||||
// LICENSE: https://github.com/ggerganov/llama.cpp/blob/master/LICENSE
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#define __UTIL_H__
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
|
@ -45,6 +46,28 @@ sd_image_f32_t resize_sd_image_f32_t(sd_image_f32_t image, int target_width, int
|
|||
|
||||
sd_image_f32_t clip_preprocess(sd_image_f32_t image, int target_width, int target_height);
|
||||
|
||||
class MmapWrapper {
|
||||
public:
|
||||
static std::unique_ptr<MmapWrapper> create(const std::string& filename);
|
||||
|
||||
virtual ~MmapWrapper() = default;
|
||||
|
||||
MmapWrapper(const MmapWrapper&) = delete;
|
||||
MmapWrapper& operator=(const MmapWrapper&) = delete;
|
||||
MmapWrapper(MmapWrapper&&) = delete;
|
||||
MmapWrapper& operator=(MmapWrapper&&) = delete;
|
||||
|
||||
const uint8_t* data() const { return static_cast<uint8_t*>(data_); }
|
||||
size_t size() const { return size_; }
|
||||
bool copy_data(void* buf, size_t n, size_t offset) const;
|
||||
|
||||
protected:
|
||||
MmapWrapper(void* data, size_t size)
|
||||
: data_(data), size_(size) {}
|
||||
void* data_ = nullptr;
|
||||
size_t size_ = 0;
|
||||
};
|
||||
|
||||
std::string path_join(const std::string& p1, const std::string& p2);
|
||||
std::vector<std::string> split_string(const std::string& str, char delimiter);
|
||||
void pretty_progress(int step, int steps, float time);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue