diff --git a/Makefile b/Makefile index 9a731e36c..26bbdd242 100644 --- a/Makefile +++ b/Makefile @@ -39,8 +39,8 @@ endif # # keep standard at C11 and C++11 -CFLAGS = -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -CXXFLAGS = -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE +CFLAGS = -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE +CXXFLAGS = -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE LDFLAGS = FASTCFLAGS = $(subst -O3,-Ofast,$(CFLAGS)) @@ -496,12 +496,14 @@ gpttype_adapter_vulkan_noavx2.o: $(GPTTYPE_ADAPTER) $(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) $(VULKAN_FLAGS) -c $< -o $@ clean: - rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf imatrix imatrix.exe gguf.exe main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so + rm -vf *.o main sdmain quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf imatrix imatrix.exe gguf.exe main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so # useful tools main: examples/main/main.cpp common/sampling.cpp build-info.h ggml.o ggml-quants.o ggml-alloc.o ggml-backend.o llama.o common.o console.o grammar-parser.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) @echo '==== Run ./main -h for help. ====' +sdmain: otherarch/sdcpp/main.cpp otherarch/sdcpp/stable-diffusion.cpp common/sampling.cpp build-info.h ggml.o ggml-quants.o ggml-alloc.o ggml-backend.o llama.o common.o console.o grammar-parser.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) imatrix: examples/imatrix/imatrix.cpp common/sampling.cpp build-info.h ggml.o ggml-quants.o ggml-alloc.o ggml-backend.o llama.o common.o console.o grammar-parser.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) gguf: examples/gguf/gguf.cpp build-info.h ggml.o llama.o $(OBJS) diff --git a/ggml.h b/ggml.h index 15868f40e..b7f5a89ee 100644 --- a/ggml.h +++ b/ggml.h @@ -229,7 +229,7 @@ #define GGML_MAX_CONTEXTS 64 #define GGML_MAX_SRC 10 #ifndef GGML_MAX_NAME -#define GGML_MAX_NAME 64 +#define GGML_MAX_NAME 128 #endif #define GGML_MAX_OP_PARAMS 64 #define GGML_DEFAULT_N_THREADS 4 diff --git a/otherarch/sdcpp/CMakeLists.txt b/otherarch/sdcpp/CMakeLists.txt new file mode 100644 index 000000000..a9bedebef --- /dev/null +++ b/otherarch/sdcpp/CMakeLists.txt @@ -0,0 +1,95 @@ +cmake_minimum_required(VERSION 3.12) +project("stable-diffusion") + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") +endif() + +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) + +if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) + set(SD_STANDALONE ON) +else() + set(SD_STANDALONE OFF) +endif() + +# +# Option list +# + +# general +option(SD_CUBLAS "sd: cuda backend" OFF) +option(SD_HIPBLAS "sd: rocm backend" OFF) +option(SD_METAL "sd: metal backend" OFF) +option(SD_FLASH_ATTN "sd: use flash attention for x4 less memory usage" OFF) +option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF) +option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF) + +if(SD_CUBLAS) + message("Use CUBLAS as backend stable-diffusion") + set(GGML_CUBLAS ON) + add_definitions(-DSD_USE_CUBLAS) +endif() + +if(SD_METAL) + message("Use Metal as backend stable-diffusion") + set(GGML_METAL ON) + add_definitions(-DSD_USE_METAL) +endif() + +if (SD_HIPBLAS) + message("Use HIPBLAS as backend stable-diffusion") + set(GGML_HIPBLAS ON) + add_definitions(-DSD_USE_CUBLAS) + if(SD_FAST_SOFTMAX) + set(GGML_CUDA_FAST_SOFTMAX ON) + endif() +endif () + +if(SD_FLASH_ATTN) + message("Use Flash Attention for memory optimization") + add_definitions(-DSD_USE_FLASH_ATTENTION) +endif() + +set(SD_LIB stable-diffusion) + +file(GLOB SD_LIB_SOURCES + "*.h" + "*.cpp" + "*.hpp" +) + +# we can get only one share lib +if(SD_BUILD_SHARED_LIBS) + message("Build shared library") + set(BUILD_SHARED_LIBS OFF) + message(${SD_LIB_SOURCES}) + add_library(${SD_LIB} SHARED ${SD_LIB_SOURCES}) + add_definitions(-DSD_BUILD_SHARED_LIB) + target_compile_definitions(${SD_LIB} PRIVATE -DSD_BUILD_DLL) + set(CMAKE_POSITION_INDEPENDENT_CODE ON) +else() + message("Build static library") + add_library(${SD_LIB} STATIC ${SD_LIB_SOURCES}) +endif() + + +set(CMAKE_POLICY_DEFAULT_CMP0077 NEW) + + +# deps +add_subdirectory(ggml) + +add_subdirectory(thirdparty) + +target_link_libraries(${SD_LIB} PUBLIC ggml zip) +target_include_directories(${SD_LIB} PUBLIC . thirdparty) +target_compile_features(${SD_LIB} PUBLIC cxx_std_11) + + +add_subdirectory(examples) + diff --git a/otherarch/sdcpp/SDCPP_LICENSE b/otherarch/sdcpp/SDCPP_LICENSE new file mode 100644 index 000000000..56e1e5a63 --- /dev/null +++ b/otherarch/sdcpp/SDCPP_LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 leejet + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/otherarch/sdcpp/clip.hpp b/otherarch/sdcpp/clip.hpp new file mode 100644 index 000000000..31efa5fe8 --- /dev/null +++ b/otherarch/sdcpp/clip.hpp @@ -0,0 +1,1139 @@ +#ifndef __CLIP_HPP__ +#define __CLIP_HPP__ + +#include "ggml_extend.hpp" +#include "model.h" + +/*================================================== CLIPTokenizer ===================================================*/ + +std::pair, std::string> extract_and_remove_lora(std::string text) { + std::regex re("]+)>"); + std::smatch matches; + std::unordered_map filename2multiplier; + + while (std::regex_search(text, matches, re)) { + std::string filename = matches[1].str(); + float multiplier = std::stof(matches[2].str()); + + text = std::regex_replace(text, re, "", std::regex_constants::format_first_only); + + if (multiplier == 0.f) { + continue; + } + + if (filename2multiplier.find(filename) == filename2multiplier.end()) { + filename2multiplier[filename] = multiplier; + } else { + filename2multiplier[filename] += multiplier; + } + } + + return std::make_pair(filename2multiplier, text); +} + +const std::string UNK_TOKEN = "<|endoftext|>"; +const std::string BOS_TOKEN = "<|startoftext|>"; +const std::string EOS_TOKEN = "<|endoftext|>"; +const std::string PAD_TOEKN = "<|endoftext|>"; + +const int UNK_TOKEN_ID = 49407; +const int BOS_TOKEN_ID = 49406; +const int EOS_TOKEN_ID = 49407; +const int PAD_TOKEN_ID = 49407; + +std::vector> bytes_to_unicode() { + std::vector> byte_unicode_pairs; + std::set byte_set; + for (int b = static_cast('!'); b <= static_cast('~'); ++b) { + byte_set.insert(b); + byte_unicode_pairs.push_back(std::pair(b, unicode_value_to_utf32(b))); + } + for (int b = 161; b <= 172; ++b) { + byte_set.insert(b); + byte_unicode_pairs.push_back(std::pair(b, unicode_value_to_utf32(b))); + } + for (int b = 174; b <= 255; ++b) { + byte_set.insert(b); + byte_unicode_pairs.push_back(std::pair(b, unicode_value_to_utf32(b))); + } + int n = 0; + for (int b = 0; b < 256; ++b) { + if (byte_set.find(b) == byte_set.end()) { + byte_unicode_pairs.push_back(std::pair(b, unicode_value_to_utf32(n + 256))); + ++n; + } + } + // LOG_DEBUG("byte_unicode_pairs %d", byte_unicode_pairs.size()); + return byte_unicode_pairs; +} + +// Ref: https://github.com/openai/CLIP/blob/main/clip/simple_tokenizer.py + +typedef std::function&)> on_new_token_cb_t; + +class CLIPTokenizer { +private: + SDVersion version = VERSION_1_x; + std::map byte_encoder; + std::map encoder; + std::map, int> bpe_ranks; + std::regex pat; + + static std::string strip(const std::string& str) { + std::string::size_type start = str.find_first_not_of(" \t\n\r\v\f"); + std::string::size_type end = str.find_last_not_of(" \t\n\r\v\f"); + + if (start == std::string::npos) { + // String contains only whitespace characters + return ""; + } + + return str.substr(start, end - start + 1); + } + + static std::string whitespace_clean(std::string text) { + text = std::regex_replace(text, std::regex(R"(\s+)"), " "); + text = strip(text); + return text; + } + + static std::set> get_pairs(const std::vector& subwords) { + std::set> pairs; + if (subwords.size() == 0) { + return pairs; + } + std::u32string prev_subword = subwords[0]; + for (int i = 1; i < subwords.size(); i++) { + std::u32string subword = subwords[i]; + std::pair pair(prev_subword, subword); + pairs.insert(pair); + prev_subword = subword; + } + return pairs; + } + +public: + CLIPTokenizer(SDVersion version = VERSION_1_x) + : version(version) {} + + void load_from_merges(const std::string& merges_utf8_str) { + auto byte_unicode_pairs = bytes_to_unicode(); + byte_encoder = std::map(byte_unicode_pairs.begin(), byte_unicode_pairs.end()); + // for (auto & pair: byte_unicode_pairs) { + // std::cout << pair.first << ": " << pair.second << std::endl; + // } + std::vector merges; + size_t start = 0; + size_t pos; + std::u32string merges_utf32_str = utf8_to_utf32(merges_utf8_str); + while ((pos = merges_utf32_str.find('\n', start)) != std::string::npos) { + merges.push_back(merges_utf32_str.substr(start, pos - start)); + start = pos + 1; + } + // LOG_DEBUG("merges size %llu", merges.size()); + GGML_ASSERT(merges.size() == 48895); + merges = std::vector(merges.begin() + 1, merges.end()); + std::vector> merge_pairs; + for (const auto& merge : merges) { + size_t space_pos = merge.find(' '); + merge_pairs.emplace_back(merge.substr(0, space_pos), merge.substr(space_pos + 1)); + // LOG_DEBUG("%s", utf32_to_utf8(merge.substr(space_pos + 1)).c_str()); + } + std::vector vocab; + for (const auto& pair : byte_unicode_pairs) { + vocab.push_back(pair.second); + } + for (const auto& pair : byte_unicode_pairs) { + vocab.push_back(pair.second + utf8_to_utf32("")); + } + for (const auto& merge : merge_pairs) { + vocab.push_back(merge.first + merge.second); + } + vocab.push_back(utf8_to_utf32("<|startoftext|>")); + vocab.push_back(utf8_to_utf32("<|endoftext|>")); + LOG_DEBUG("vocab size: %llu", vocab.size()); + int i = 0; + for (const auto& token : vocab) { + encoder[token] = i++; + } + + int rank = 0; + for (const auto& merge : merge_pairs) { + bpe_ranks[merge] = rank++; + } + }; + + std::u32string bpe(const std::u32string& token) { + std::vector word; + + for (int i = 0; i < token.size() - 1; i++) { + word.emplace_back(1, token[i]); + } + word.push_back(token.substr(token.size() - 1) + utf8_to_utf32("")); + + std::set> pairs = get_pairs(word); + + if (pairs.empty()) { + return token + utf8_to_utf32(""); + } + + while (true) { + auto min_pair_iter = std::min_element(pairs.begin(), + pairs.end(), + [&](const std::pair& a, + const std::pair& b) { + if (bpe_ranks.find(a) == bpe_ranks.end()) { + return false; + } else if (bpe_ranks.find(b) == bpe_ranks.end()) { + return true; + } + return bpe_ranks.at(a) < bpe_ranks.at(b); + }); + + const std::pair& bigram = *min_pair_iter; + + if (bpe_ranks.find(bigram) == bpe_ranks.end()) { + break; + } + + std::u32string first = bigram.first; + std::u32string second = bigram.second; + std::vector new_word; + int32_t i = 0; + + while (i < word.size()) { + auto it = std::find(word.begin() + i, word.end(), first); + if (it == word.end()) { + new_word.insert(new_word.end(), word.begin() + i, word.end()); + break; + } + new_word.insert(new_word.end(), word.begin() + i, it); + i = static_cast(std::distance(word.begin(), it)); + + if (word[i] == first && i < static_cast(word.size()) - 1 && word[i + 1] == second) { + new_word.push_back(first + second); + i += 2; + } else { + new_word.push_back(word[i]); + i += 1; + } + } + + word = new_word; + + if (word.size() == 1) { + break; + } + pairs = get_pairs(word); + } + + std::u32string result; + for (int i = 0; i < word.size(); i++) { + result += word[i]; + if (i != word.size() - 1) { + result += utf8_to_utf32(" "); + } + } + + return result; + } + + std::vector tokenize(std::string text, + on_new_token_cb_t on_new_token_cb, + size_t max_length = 0, + bool padding = false) { + std::vector tokens = encode(text, on_new_token_cb); + tokens.insert(tokens.begin(), BOS_TOKEN_ID); + if (max_length > 0) { + if (tokens.size() > max_length - 1) { + tokens.resize(max_length - 1); + tokens.push_back(EOS_TOKEN_ID); + } else { + tokens.push_back(EOS_TOKEN_ID); + if (padding) { + int pad_token_id = PAD_TOKEN_ID; + if (version == VERSION_2_x) { + pad_token_id = 0; + } + tokens.insert(tokens.end(), max_length - tokens.size(), pad_token_id); + } + } + } + return tokens; + } + + std::vector encode(std::string text, on_new_token_cb_t on_new_token_cb) { + std::string original_text = text; + std::vector bpe_tokens; + text = whitespace_clean(text); + std::transform(text.begin(), text.end(), text.begin(), [](unsigned char c) { return std::tolower(c); }); + + std::regex pat(R"(<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[[:alpha:]]+|[[:digit:]]|[^[:space:][:alpha:][:digit:]]+)", + std::regex::icase); + + std::smatch matches; + std::string str = text; + std::vector token_strs; + while (std::regex_search(str, matches, pat)) { + bool skip = on_new_token_cb(str, bpe_tokens); + if (skip) { + continue; + } + for (auto& token : matches) { + std::string token_str = token.str(); + std::u32string utf32_token; + for (int i = 0; i < token_str.length(); i++) { + char b = token_str[i]; + utf32_token += byte_encoder[b]; + } + auto bpe_strs = bpe(utf32_token); + size_t start = 0; + size_t pos; + while ((pos = bpe_strs.find(' ', start)) != std::u32string::npos) { + auto bpe_str = bpe_strs.substr(start, pos - start); + bpe_tokens.push_back(encoder[bpe_str]); + token_strs.push_back(utf32_to_utf8(bpe_str)); + + start = pos + 1; + } + auto bpe_str = bpe_strs.substr(start, bpe_strs.size() - start); + bpe_tokens.push_back(encoder[bpe_str]); + token_strs.push_back(utf32_to_utf8(bpe_str)); + } + str = matches.suffix(); + } + std::stringstream ss; + ss << "["; + for (auto token : token_strs) { + ss << "\"" << token << "\", "; + } + ss << "]"; + LOG_DEBUG("split prompt \"%s\" to tokens %s", original_text.c_str(), ss.str().c_str()); + return bpe_tokens; + } +}; + +// Ref: https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/cad87bf4e3e0b0a759afa94e933527c3123d59bc/modules/prompt_parser.py#L345 +// +// Parses a string with attention tokens and returns a list of pairs: text and its associated weight. +// Accepted tokens are: +// (abc) - increases attention to abc by a multiplier of 1.1 +// (abc:3.12) - increases attention to abc by a multiplier of 3.12 +// [abc] - decreases attention to abc by a multiplier of 1.1 +// \( - literal character '(' +// \[ - literal character '[' +// \) - literal character ')' +// \] - literal character ']' +// \\ - literal character '\' +// anything else - just text +// +// >>> parse_prompt_attention('normal text') +// [['normal text', 1.0]] +// >>> parse_prompt_attention('an (important) word') +// [['an ', 1.0], ['important', 1.1], [' word', 1.0]] +// >>> parse_prompt_attention('(unbalanced') +// [['unbalanced', 1.1]] +// >>> parse_prompt_attention('\(literal\]') +// [['(literal]', 1.0]] +// >>> parse_prompt_attention('(unnecessary)(parens)') +// [['unnecessaryparens', 1.1]] +// >>> parse_prompt_attention('a (((house:1.3)) [on] a (hill:0.5), sun, (((sky))).') +// [['a ', 1.0], +// ['house', 1.5730000000000004], +// [' ', 1.1], +// ['on', 1.0], +// [' a ', 1.1], +// ['hill', 0.55], +// [', sun, ', 1.1], +// ['sky', 1.4641000000000006], +// ['.', 1.1]] +std::vector> parse_prompt_attention(const std::string& text) { + std::vector> res; + std::vector round_brackets; + std::vector square_brackets; + + float round_bracket_multiplier = 1.1f; + float square_bracket_multiplier = 1 / 1.1f; + + std::regex re_attention(R"(\\\(|\\\)|\\\[|\\\]|\\\\|\\|\(|\[|:([+-]?[.\d]+)\)|\)|\]|[^\\()\[\]:]+|:)"); + std::regex re_break(R"(\s*\bBREAK\b\s*)"); + + auto multiply_range = [&](int start_position, float multiplier) { + for (int p = start_position; p < res.size(); ++p) { + res[p].second *= multiplier; + } + }; + + std::smatch m; + std::string remaining_text = text; + + while (std::regex_search(remaining_text, m, re_attention)) { + std::string text = m[0]; + std::string weight = m[1]; + + if (text == "(") { + round_brackets.push_back((int)res.size()); + } else if (text == "[") { + square_brackets.push_back((int)res.size()); + } else if (!weight.empty()) { + if (!round_brackets.empty()) { + multiply_range(round_brackets.back(), std::stof(weight)); + round_brackets.pop_back(); + } + } else if (text == ")" && !round_brackets.empty()) { + multiply_range(round_brackets.back(), round_bracket_multiplier); + round_brackets.pop_back(); + } else if (text == "]" && !square_brackets.empty()) { + multiply_range(square_brackets.back(), square_bracket_multiplier); + square_brackets.pop_back(); + } else if (text == "\\(") { + res.push_back({text.substr(1), 1.0f}); + } else { + res.push_back({text, 1.0f}); + } + + remaining_text = m.suffix(); + } + + for (int pos : round_brackets) { + multiply_range(pos, round_bracket_multiplier); + } + + for (int pos : square_brackets) { + multiply_range(pos, square_bracket_multiplier); + } + + if (res.empty()) { + res.push_back({"", 1.0f}); + } + + int i = 0; + while (i + 1 < res.size()) { + if (res[i].second == res[i + 1].second) { + res[i].first += res[i + 1].first; + res.erase(res.begin() + i + 1); + } else { + ++i; + } + } + + return res; +} + +/*================================================ FrozenCLIPEmbedder ================================================*/ + +// Ref: https://github.com/huggingface/transformers/blob/main/src/transformers/models/clip/modeling_clip.py + +struct CLIPMLP : public GGMLBlock { +protected: + bool use_gelu; + +public: + CLIPMLP(int64_t d_model, int64_t intermediate_size) { + blocks["fc1"] = std::shared_ptr(new Linear(d_model, intermediate_size)); + blocks["fc2"] = std::shared_ptr(new Linear(intermediate_size, d_model)); + + if (d_model == 1024 || d_model == 1280) { // SD 2.x + use_gelu = true; + } else { // SD 1.x + use_gelu = false; + } + } + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) { + // x: [N, n_token, d_model] + auto fc1 = std::dynamic_pointer_cast(blocks["fc1"]); + auto fc2 = std::dynamic_pointer_cast(blocks["fc2"]); + + x = fc1->forward(ctx, x); + if (use_gelu) { + x = ggml_gelu_inplace(ctx, x); + } else { + x = ggml_gelu_quick_inplace(ctx, x); + } + x = fc2->forward(ctx, x); + return x; + } +}; + +struct CLIPLayer : public GGMLBlock { +protected: + int64_t d_model; // hidden_size/embed_dim + int64_t n_head; + int64_t intermediate_size; + +public: + CLIPLayer(int64_t d_model, + int64_t n_head, + int64_t intermediate_size) + : d_model(d_model), + n_head(n_head), + intermediate_size(intermediate_size) { + blocks["self_attn"] = std::shared_ptr(new MultiheadAttention(d_model, n_head)); + blocks["layer_norm1"] = std::shared_ptr(new LayerNorm(d_model)); + blocks["layer_norm2"] = std::shared_ptr(new LayerNorm(d_model)); + + blocks["mlp"] = std::shared_ptr(new CLIPMLP(d_model, intermediate_size)); + } + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, bool mask = true) { + // x: [N, n_token, d_model] + auto self_attn = std::dynamic_pointer_cast(blocks["self_attn"]); + auto layer_norm1 = std::dynamic_pointer_cast(blocks["layer_norm1"]); + auto layer_norm2 = std::dynamic_pointer_cast(blocks["layer_norm2"]); + auto mlp = std::dynamic_pointer_cast(blocks["mlp"]); + + x = ggml_add(ctx, x, self_attn->forward(ctx, layer_norm1->forward(ctx, x), mask)); + x = ggml_add(ctx, x, mlp->forward(ctx, layer_norm2->forward(ctx, x))); + return x; + } +}; + +struct CLIPEncoder : public GGMLBlock { +protected: + int64_t n_layer; + +public: + CLIPEncoder(int64_t n_layer, + int64_t d_model, + int64_t n_head, + int64_t intermediate_size) + : n_layer(n_layer) { + for (int i = 0; i < n_layer; i++) { + std::string name = "layers." + std::to_string(i); + blocks[name] = std::shared_ptr(new CLIPLayer(d_model, n_head, intermediate_size)); + } + } + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, int clip_skip = -1, bool mask = true) { + // x: [N, n_token, d_model] + int layer_idx = n_layer - 1; + LOG_DEBUG("clip_skip %d", clip_skip); + if (clip_skip > 0) { + layer_idx = n_layer - clip_skip; + } + + for (int i = 0; i < n_layer; i++) { + // LOG_DEBUG("layer %d", i); + if (i == layer_idx + 1) { + break; + } + std::string name = "layers." + std::to_string(i); + auto layer = std::dynamic_pointer_cast(blocks[name]); + x = layer->forward(ctx, x); // [N, n_token, d_model] + // LOG_DEBUG("layer %d", i); + } + return x; + } +}; + +class CLIPEmbeddings : public GGMLBlock { +protected: + int64_t embed_dim; + int64_t vocab_size; + int64_t num_positions; + + void init_params(struct ggml_context* ctx, ggml_type wtype) { + params["token_embedding.weight"] = ggml_new_tensor_2d(ctx, wtype, embed_dim, vocab_size); + params["position_embedding.weight"] = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, embed_dim, num_positions); + } + +public: + CLIPEmbeddings(int64_t embed_dim, + int64_t vocab_size = 49408, + int64_t num_positions = 77) + : embed_dim(embed_dim), + vocab_size(vocab_size), + num_positions(num_positions) { + } + + struct ggml_tensor* get_token_embed_weight() { + return params["token_embedding.weight"]; + } + + struct ggml_tensor* forward(struct ggml_context* ctx, + struct ggml_tensor* input_ids, + struct ggml_tensor* custom_embed_weight) { + // input_ids: [N, n_token] + auto token_embed_weight = params["token_embedding.weight"]; + auto position_embed_weight = params["position_embedding.weight"]; + + GGML_ASSERT(input_ids->ne[0] <= position_embed_weight->ne[0]); + + // token_embedding + position_embedding + auto x = ggml_add(ctx, + ggml_get_rows(ctx, custom_embed_weight != NULL ? custom_embed_weight : token_embed_weight, input_ids), + position_embed_weight); // [N, n_token, embed_dim] + return x; + } +}; + +class CLIPVisionEmbeddings : public GGMLBlock { +protected: + int64_t embed_dim; + int64_t num_channels; + int64_t patch_size; + int64_t image_size; + int64_t num_patches; + int64_t num_positions; + + void init_params(struct ggml_context* ctx, ggml_type wtype) { + params["patch_embedding.weight"] = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, patch_size, patch_size, num_channels, embed_dim); + params["class_embedding"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, embed_dim); + params["position_embedding.weight"] = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, embed_dim, num_positions); + } + +public: + CLIPVisionEmbeddings(int64_t embed_dim, + int64_t num_channels = 3, + int64_t patch_size = 14, + int64_t image_size = 224) + : embed_dim(embed_dim), + num_channels(num_channels), + patch_size(patch_size), + image_size(image_size) { + num_patches = (image_size / patch_size) * (image_size / patch_size); + num_positions = num_patches + 1; + } + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* pixel_values) { + // pixel_values: [N, num_channels, image_size, image_size] + // return: [N, num_positions, embed_dim] + GGML_ASSERT(pixel_values->ne[0] == image_size && pixel_values->ne[1] == image_size && pixel_values->ne[2] == num_channels); + + auto patch_embed_weight = params["patch_embedding.weight"]; + auto class_embed_weight = params["class_embedding"]; + auto position_embed_weight = params["position_embedding.weight"]; + + // concat(patch_embedding, class_embedding) + position_embedding + struct ggml_tensor* patch_embedding; + int64_t N = pixel_values->ne[3]; + patch_embedding = ggml_nn_conv_2d(ctx, pixel_values, patch_embed_weight, NULL, patch_size, patch_size); // [N, embed_dim, image_size // pacht_size, image_size // pacht_size] + patch_embedding = ggml_reshape_3d(ctx, patch_embedding, num_patches, embed_dim, N); // [N, embed_dim, num_patches] + patch_embedding = ggml_cont(ctx, ggml_permute(ctx, patch_embedding, 1, 0, 2, 3)); // [N, num_patches, embed_dim] + patch_embedding = ggml_reshape_4d(ctx, patch_embedding, 1, embed_dim, num_patches, N); // [N, num_patches, embed_dim, 1] + + struct ggml_tensor* class_embedding = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, embed_dim, N); + class_embedding = ggml_repeat(ctx, class_embed_weight, class_embedding); // [N, embed_dim] + class_embedding = ggml_reshape_4d(ctx, class_embedding, 1, embed_dim, 1, N); // [N, 1, embed_dim, 1] + + struct ggml_tensor* x = ggml_concat(ctx, class_embedding, patch_embedding); // [N, num_positions, embed_dim, 1] + x = ggml_reshape_3d(ctx, x, embed_dim, num_positions, N); // [N, num_positions, embed_dim] + x = ggml_add(ctx, x, position_embed_weight); + return x; // [N, num_positions, embed_dim] + } +}; + +// OPENAI_CLIP_VIT_L_14: https://huggingface.co/openai/clip-vit-large-patch14/blob/main/config.json +// OPEN_CLIP_VIT_H_14: https://huggingface.co/laion/CLIP-ViT-H-14-laion2B-s32B-b79K/blob/main/config.json +// OPEN_CLIP_VIT_BIGG_14: https://huggingface.co/laion/CLIP-ViT-bigG-14-laion2B-39B-b160k/blob/main/config.json (CLIPTextModelWithProjection) + +enum CLIPVersion { + OPENAI_CLIP_VIT_L_14, // SD 1.x and SDXL + OPEN_CLIP_VIT_H_14, // SD 2.x + OPEN_CLIP_VIT_BIGG_14, // SDXL +}; + +class CLIPTextModel : public GGMLBlock { +protected: + void init_params(struct ggml_context* ctx, ggml_type wtype) { + if (version == OPEN_CLIP_VIT_BIGG_14) { + params["text_projection"] = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, projection_dim, hidden_size); + } + } + +public: + CLIPVersion version = OPENAI_CLIP_VIT_L_14; + // network hparams + int32_t vocab_size = 49408; + int32_t n_token = 77; // max_position_embeddings + int32_t hidden_size = 768; + int32_t intermediate_size = 3072; + int32_t n_head = 12; + int32_t n_layer = 12; // num_hidden_layers + int32_t projection_dim = 1280; // only for OPEN_CLIP_VIT_BIGG_14 + int32_t clip_skip = -1; + bool with_final_ln = true; + + CLIPTextModel(CLIPVersion version = OPENAI_CLIP_VIT_L_14, + int clip_skip_value = -1, + bool with_final_ln = true) + : version(version), with_final_ln(with_final_ln) { + if (version == OPEN_CLIP_VIT_H_14) { + hidden_size = 1024; + intermediate_size = 4096; + n_head = 16; + n_layer = 24; + } else if (version == OPEN_CLIP_VIT_BIGG_14) { // CLIPTextModelWithProjection + hidden_size = 1280; + intermediate_size = 5120; + n_head = 20; + n_layer = 32; + } + set_clip_skip(clip_skip_value); + + blocks["embeddings"] = std::shared_ptr(new CLIPEmbeddings(hidden_size, vocab_size, n_token)); + blocks["encoder"] = std::shared_ptr(new CLIPEncoder(n_layer, hidden_size, n_head, intermediate_size)); + blocks["final_layer_norm"] = std::shared_ptr(new LayerNorm(hidden_size)); + } + + void set_clip_skip(int skip) { + if (skip <= 0) { + return; + } + clip_skip = skip; + } + + struct ggml_tensor* get_token_embed_weight() { + auto embeddings = std::dynamic_pointer_cast(blocks["embeddings"]); + return embeddings->get_token_embed_weight(); + } + + struct ggml_tensor* forward(struct ggml_context* ctx, + struct ggml_tensor* input_ids, + struct ggml_tensor* tkn_embeddings, + size_t max_token_idx = 0, + bool return_pooled = false) { + // input_ids: [N, n_token] + auto embeddings = std::dynamic_pointer_cast(blocks["embeddings"]); + auto encoder = std::dynamic_pointer_cast(blocks["encoder"]); + auto final_layer_norm = std::dynamic_pointer_cast(blocks["final_layer_norm"]); + + auto x = embeddings->forward(ctx, input_ids, tkn_embeddings); // [N, n_token, hidden_size] + x = encoder->forward(ctx, x, return_pooled ? -1 : clip_skip, true); + if (return_pooled || with_final_ln) { + x = final_layer_norm->forward(ctx, x); + } + + if (return_pooled) { + auto text_projection = params["text_projection"]; + ggml_tensor* pooled = ggml_view_1d(ctx, x, hidden_size, x->nb[1] * max_token_idx); + pooled = ggml_mul_mat(ctx, ggml_cont(ctx, ggml_transpose(ctx, text_projection)), pooled); + return pooled; + } + + return x; // [N, n_token, hidden_size] + } +}; + +class CLIPVisionModel : public GGMLBlock { +protected: + void init_params(struct ggml_context* ctx, ggml_type wtype) { + params["visual_projection"] = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, projection_dim, hidden_size); + } + +public: + // network hparams + int32_t num_channels = 3; + int32_t patch_size = 14; + int32_t image_size = 224; + int32_t num_positions = 257; // (image_size / patch_size)^2 + 1 + int32_t hidden_size = 1024; + int32_t intermediate_size = 4096; + int32_t n_head = 16; + int32_t n_layer = 24; + int32_t projection_dim = 768; + +public: + CLIPVisionModel(CLIPVersion version = OPEN_CLIP_VIT_H_14) { + if (version == OPEN_CLIP_VIT_H_14) { + hidden_size = 1280; + intermediate_size = 5120; + n_head = 16; + n_layer = 32; + projection_dim = 1024; + } else if (version == OPEN_CLIP_VIT_BIGG_14) { + hidden_size = 1664; + intermediate_size = 8192; + n_head = 16; + n_layer = 48; + } + + blocks["embeddings"] = std::shared_ptr(new CLIPVisionEmbeddings(hidden_size, num_channels, patch_size, image_size)); + blocks["pre_layernorm"] = std::shared_ptr(new LayerNorm(hidden_size)); + blocks["encoder"] = std::shared_ptr(new CLIPEncoder(n_layer, hidden_size, n_head, intermediate_size)); + blocks["post_layernorm"] = std::shared_ptr(new LayerNorm(hidden_size)); + } + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* pixel_values) { + // pixel_values: [N, num_channels, image_size, image_size] + // return: // [N, projection_dim] + auto embeddings = std::dynamic_pointer_cast(blocks["embeddings"]); + auto pre_layernorm = std::dynamic_pointer_cast(blocks["pre_layernorm"]); + auto encoder = std::dynamic_pointer_cast(blocks["encoder"]); + auto post_layernorm = std::dynamic_pointer_cast(blocks["post_layernorm"]); + + auto x = embeddings->forward(ctx, pixel_values); // [N, num_positions, embed_dim] + x = pre_layernorm->forward(ctx, x); + x = encoder->forward(ctx, x, -1, true); + x = post_layernorm->forward(ctx, x); // [N, n_token, hidden_size] + + GGML_ASSERT(x->ne[2] == 1); + int64_t max_token_idx = 0; + ggml_tensor* pooled = ggml_view_1d(ctx, x, x->ne[0], x->nb[1] * max_token_idx); // assert N == 1 + auto visual_projection = params["visual_projection"]; + pooled = ggml_mul_mat(ctx, ggml_cont(ctx, ggml_transpose(ctx, visual_projection)), pooled); + return pooled; // [N, projection_dim] + } +}; + +class CLIPVisionModelProjection : public GGMLBlock { +public: + int32_t hidden_size = 1024; + int32_t projection_dim = 1024; + int32_t image_size = 224; + +public: + CLIPVisionModelProjection(CLIPVersion version = OPEN_CLIP_VIT_H_14) { + if (version == OPEN_CLIP_VIT_H_14) { + hidden_size = 1280; + projection_dim = 1024; + } else if (version == OPEN_CLIP_VIT_BIGG_14) { + hidden_size = 1664; + } + + blocks["visual_model"] = std::shared_ptr(new CLIPVisionModel(version)); + blocks["visual_projection"] = std::shared_ptr(new Linear(hidden_size, projection_dim, false)); + } + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* pixel_values) { + // pixel_values: [N, num_channels, image_size, image_size] + // return: [N, num_positions, projection_dim] + auto visual_model = std::dynamic_pointer_cast(blocks["visual_model"]); + auto visual_projection = std::dynamic_pointer_cast(blocks["visual_projection"]); + + auto x = visual_model->forward(ctx, pixel_values); // [N, embed_dim] + x = visual_projection->forward(ctx, x); // [N, projection_dim] + + return x; // [N, projection_dim] + } +}; + +// ldm.modules.encoders.modules.FrozenCLIPEmbedder +// Ref: https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/cad87bf4e3e0b0a759afa94e933527c3123d59bc/modules/sd_hijack_clip.py#L283 +struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule { + SDVersion version = VERSION_1_x; + CLIPTokenizer tokenizer; + CLIPTextModel text_model; + CLIPTextModel text_model2; + + std::string embd_dir; + int32_t num_custom_embeddings = 0; + std::vector token_embed_custom; + std::vector readed_embeddings; + + FrozenCLIPEmbedderWithCustomWords(ggml_backend_t backend, + ggml_type wtype, + SDVersion version = VERSION_1_x, + int clip_skip = -1) + : GGMLModule(backend, wtype), version(version), tokenizer(version) { + if (clip_skip <= 0) { + clip_skip = 1; + if (version == VERSION_2_x || version == VERSION_XL) { + clip_skip = 2; + } + } + if (version == VERSION_1_x) { + text_model = CLIPTextModel(OPENAI_CLIP_VIT_L_14, clip_skip); + text_model.init(params_ctx, wtype); + } else if (version == VERSION_2_x) { + text_model = CLIPTextModel(OPEN_CLIP_VIT_H_14, clip_skip); + text_model.init(params_ctx, wtype); + } else if (version == VERSION_XL) { + text_model = CLIPTextModel(OPENAI_CLIP_VIT_L_14, clip_skip, false); + text_model2 = CLIPTextModel(OPEN_CLIP_VIT_BIGG_14, clip_skip, false); + text_model.init(params_ctx, wtype); + text_model2.init(params_ctx, wtype); + } + } + + std::string get_desc() { + return "clip"; + } + + size_t get_params_mem_size() { + size_t params_mem_size = text_model.get_params_mem_size(); + if (version == VERSION_XL) { + params_mem_size += text_model2.get_params_mem_size(); + } + return params_mem_size; + } + + size_t get_params_num() { + size_t params_num = text_model.get_params_num(); + if (version == VERSION_XL) { + params_num += text_model2.get_params_num(); + } + return params_num; + } + + void set_clip_skip(int clip_skip) { + text_model.set_clip_skip(clip_skip); + if (version == VERSION_XL) { + text_model2.set_clip_skip(clip_skip); + } + } + + void get_param_tensors(std::map& tensors, const std::string prefix) { + text_model.get_param_tensors(tensors, prefix + "transformer.text_model"); + if (version == VERSION_XL) { + text_model2.get_param_tensors(tensors, prefix + "1.transformer.text_model"); + } + } + + bool load_embedding(std::string embd_name, std::string embd_path, std::vector& bpe_tokens) { + // the order matters + ModelLoader model_loader; + if (!model_loader.init_from_file(embd_path)) { + LOG_ERROR("embedding '%s' failed", embd_name.c_str()); + return false; + } + struct ggml_init_params params; + params.mem_size = 32 * 1024; // max for custom embeddings 32 KB + params.mem_buffer = NULL; + params.no_alloc = false; + struct ggml_context* embd_ctx = ggml_init(params); + struct ggml_tensor* embd = NULL; + auto on_load = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) { + if (tensor_storage.ne[0] != text_model.hidden_size) { + LOG_DEBUG("embedding wrong hidden size, got %i, expected %i", tensor_storage.ne[0], text_model.hidden_size); + return false; + } + embd = ggml_new_tensor_2d(embd_ctx, wtype, text_model.hidden_size, tensor_storage.n_dims > 1 ? tensor_storage.ne[1] : 1); + *dst_tensor = embd; + return true; + }; + model_loader.load_tensors(on_load, NULL); + readed_embeddings.push_back(embd_name); + token_embed_custom.resize(token_embed_custom.size() + ggml_nbytes(embd)); + memcpy((void*)(token_embed_custom.data() + num_custom_embeddings * text_model.hidden_size * ggml_type_size(wtype)), + embd->data, + ggml_nbytes(embd)); + for (int i = 0; i < embd->ne[1]; i++) { + bpe_tokens.push_back(text_model.vocab_size + num_custom_embeddings); + // LOG_DEBUG("new custom token: %i", text_model.vocab_size + num_custom_embeddings); + num_custom_embeddings++; + } + LOG_DEBUG("embedding '%s' applied, custom embeddings: %i", embd_name.c_str(), num_custom_embeddings); + return true; + } + + struct ggml_tensor* forward(struct ggml_context* ctx, + struct ggml_tensor* input_ids, + struct ggml_tensor* input_ids2, + struct ggml_tensor* embeddings, + size_t max_token_idx = 0, + bool return_pooled = false) { + if (return_pooled) { + return text_model2.forward(ctx, input_ids2, NULL, max_token_idx, return_pooled); + } + auto hidden_states = text_model.forward(ctx, input_ids, embeddings); // [N, n_token, hidden_size] + // LOG_DEBUG("hidden_states: %d %d %d %d", hidden_states->ne[0], hidden_states->ne[1], hidden_states->ne[2], hidden_states->ne[3]); + if (version == VERSION_XL) { + hidden_states = ggml_reshape_4d(ctx, + hidden_states, + hidden_states->ne[0], + hidden_states->ne[1], + hidden_states->ne[2], + hidden_states->ne[3]); + hidden_states = ggml_cont(ctx, ggml_permute(ctx, hidden_states, 2, 0, 1, 3)); + + auto hidden_states2 = text_model2.forward(ctx, input_ids2, NULL); // [N, n_token, hidden_size2] + // LOG_DEBUG("hidden_states: %d %d %d %d", hidden_states->ne[0], hidden_states->ne[1], hidden_states->ne[2], hidden_states->ne[3]); + hidden_states2 = ggml_reshape_4d(ctx, + hidden_states2, + hidden_states2->ne[0], + hidden_states2->ne[1], + hidden_states2->ne[2], + hidden_states2->ne[3]); + hidden_states2 = ggml_cont(ctx, ggml_permute(ctx, hidden_states2, 2, 0, 1, 3)); + + hidden_states = ggml_concat(ctx, hidden_states, hidden_states2); // [N, n_token, hidden_size + hidden_size2] + + hidden_states = ggml_cont(ctx, ggml_permute(ctx, hidden_states, 1, 2, 0, 3)); + } + // LOG_DEBUG("hidden_states: %d %d %d %d", hidden_states->ne[0], hidden_states->ne[1], hidden_states->ne[2], hidden_states->ne[3]); + return hidden_states; + } + + struct ggml_cgraph* build_graph(struct ggml_tensor* input_ids, + struct ggml_tensor* input_ids2 = NULL, + size_t max_token_idx = 0, + bool return_pooled = false) { + struct ggml_cgraph* gf = ggml_new_graph(compute_ctx); + + input_ids2 = to_backend(input_ids2); + if (!return_pooled) { + input_ids = to_backend(input_ids); + } + + struct ggml_tensor* embeddings = NULL; + + if (num_custom_embeddings > 0 && version != VERSION_XL) { + auto custom_embeddings = ggml_new_tensor_3d(compute_ctx, + wtype, + text_model.hidden_size, + 1, + num_custom_embeddings); + set_backend_tensor_data(custom_embeddings, token_embed_custom.data()); + + auto token_embed_weight = text_model.get_token_embed_weight(); + token_embed_weight = ggml_reshape_3d(compute_ctx, token_embed_weight, token_embed_weight->ne[0], 1, token_embed_weight->ne[1]); + // concatenate custom embeddings + embeddings = ggml_concat(compute_ctx, token_embed_weight, custom_embeddings); + embeddings = ggml_reshape_2d(compute_ctx, embeddings, embeddings->ne[0], embeddings->ne[2]); + } + + struct ggml_tensor* hidden_states = forward(compute_ctx, input_ids, input_ids2, embeddings, max_token_idx, return_pooled); + + ggml_build_forward_expand(gf, hidden_states); + + return gf; + } + + void compute(const int n_threads, + struct ggml_tensor* input_ids, + struct ggml_tensor* input_ids2, + size_t max_token_idx, + bool return_pooled, + ggml_tensor** output, + ggml_context* output_ctx = NULL) { + auto get_graph = [&]() -> struct ggml_cgraph* { + return build_graph(input_ids, input_ids2, max_token_idx, return_pooled); + }; + GGMLModule::compute(get_graph, n_threads, true, output, output_ctx); + } + + std::pair, std::vector> tokenize(std::string text, + bool padding = false) { + return tokenize(text, text_model.n_token, padding); + } + + std::pair, std::vector> tokenize(std::string text, + size_t max_length = 0, + bool padding = false) { + auto parsed_attention = parse_prompt_attention(text); + + { + std::stringstream ss; + ss << "["; + for (const auto& item : parsed_attention) { + ss << "['" << item.first << "', " << item.second << "], "; + } + ss << "]"; + LOG_DEBUG("parse '%s' to %s", text.c_str(), ss.str().c_str()); + } + + auto on_new_token_cb = [&](std::string& str, std::vector& bpe_tokens) -> bool { + size_t word_end = str.find(","); + std::string embd_name = word_end == std::string::npos ? str : str.substr(0, word_end); + embd_name = trim(embd_name); + std::string embd_path = get_full_path(embd_dir, embd_name + ".pt"); + if (embd_path.size() == 0) { + embd_path = get_full_path(embd_dir, embd_name + ".ckpt"); + } + if (embd_path.size() == 0) { + embd_path = get_full_path(embd_dir, embd_name + ".safetensors"); + } + if (embd_path.size() > 0) { + if (load_embedding(embd_name, embd_path, bpe_tokens)) { + if (word_end != std::string::npos) { + str = str.substr(word_end); + } else { + str = ""; + } + return true; + } + } + return false; + }; + + std::vector tokens; + std::vector weights; + for (const auto& item : parsed_attention) { + const std::string& curr_text = item.first; + float curr_weight = item.second; + std::vector curr_tokens = tokenizer.encode(curr_text, on_new_token_cb); + tokens.insert(tokens.end(), curr_tokens.begin(), curr_tokens.end()); + weights.insert(weights.end(), curr_tokens.size(), curr_weight); + } + tokens.insert(tokens.begin(), BOS_TOKEN_ID); + weights.insert(weights.begin(), 1.0); + + if (max_length > 0) { + if (tokens.size() > max_length - 1) { + tokens.resize(max_length - 1); + weights.resize(max_length - 1); + tokens.push_back(EOS_TOKEN_ID); + weights.push_back(1.0); + } else { + tokens.push_back(EOS_TOKEN_ID); + weights.push_back(1.0); + if (padding) { + int pad_token_id = PAD_TOKEN_ID; + if (version == VERSION_2_x) { + pad_token_id = 0; + } + tokens.insert(tokens.end(), max_length - tokens.size(), pad_token_id); + weights.insert(weights.end(), max_length - weights.size(), 1.0); + } + } + } + + // for (int i = 0; i < tokens.size(); i++) { + // std::cout << tokens[i] << ":" << weights[i] << ", "; + // } + // std::cout << std::endl; + + return {tokens, weights}; + } +}; + +struct FrozenCLIPVisionEmbedder : public GGMLModule { + CLIPVisionModel vision_model; + + FrozenCLIPVisionEmbedder(ggml_backend_t backend, ggml_type wtype) + : GGMLModule(backend, wtype) { + vision_model.init(params_ctx, wtype); + } + + std::string get_desc() { + return "clip_vision"; + } + + size_t get_params_mem_size() { + return vision_model.get_params_mem_size(); + } + + size_t get_params_num() { + return vision_model.get_params_num(); + } + + void get_param_tensors(std::map& tensors, const std::string prefix) { + vision_model.get_param_tensors(tensors, prefix + "transformer.visual_model"); + } + + struct ggml_cgraph* build_graph(struct ggml_tensor* pixel_values) { + struct ggml_cgraph* gf = ggml_new_graph(compute_ctx); + + pixel_values = to_backend(pixel_values); + + struct ggml_tensor* hidden_states = vision_model.forward(compute_ctx, pixel_values); + + ggml_build_forward_expand(gf, hidden_states); + + return gf; + } + + void compute(const int n_threads, + ggml_tensor* pixel_values, + ggml_tensor** output, + ggml_context* output_ctx) { + auto get_graph = [&]() -> struct ggml_cgraph* { + return build_graph(pixel_values); + }; + GGMLModule::compute(get_graph, n_threads, true, output, output_ctx); + } +}; + +#endif // __CLIP_HPP__ \ No newline at end of file diff --git a/otherarch/sdcpp/common.hpp b/otherarch/sdcpp/common.hpp new file mode 100644 index 000000000..30b213ee5 --- /dev/null +++ b/otherarch/sdcpp/common.hpp @@ -0,0 +1,529 @@ +#ifndef __COMMON_HPP__ +#define __COMMON_HPP__ + +#include "ggml_extend.hpp" + +class DownSampleBlock : public GGMLBlock { +protected: + int channels; + int out_channels; + bool vae_downsample; + +public: + DownSampleBlock(int channels, + int out_channels, + bool vae_downsample = false) + : channels(channels), + out_channels(out_channels), + vae_downsample(vae_downsample) { + if (vae_downsample) { + blocks["conv"] = std::shared_ptr(new Conv2d(channels, out_channels, {3, 3}, {2, 2}, {0, 0})); + } else { + blocks["op"] = std::shared_ptr(new Conv2d(channels, out_channels, {3, 3}, {2, 2}, {1, 1})); + } + } + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) { + // x: [N, channels, h, w] + if (vae_downsample) { + auto conv = std::dynamic_pointer_cast(blocks["conv"]); + + x = ggml_pad(ctx, x, 1, 1, 0, 0); + x = conv->forward(ctx, x); + } else { + auto conv = std::dynamic_pointer_cast(blocks["op"]); + + x = conv->forward(ctx, x); + } + return x; // [N, out_channels, h/2, w/2] + } +}; + +class UpSampleBlock : public GGMLBlock { +protected: + int channels; + int out_channels; + +public: + UpSampleBlock(int channels, + int out_channels) + : channels(channels), + out_channels(out_channels) { + blocks["conv"] = std::shared_ptr(new Conv2d(channels, out_channels, {3, 3}, {1, 1}, {1, 1})); + } + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) { + // x: [N, channels, h, w] + auto conv = std::dynamic_pointer_cast(blocks["conv"]); + + x = ggml_upscale(ctx, x, 2); // [N, channels, h*2, w*2] + x = conv->forward(ctx, x); // [N, out_channels, h*2, w*2] + return x; + } +}; + +class ResBlock : public GGMLBlock { +protected: + // network hparams + int64_t channels; // model_channels * (1, 1, 1, 2, 2, 4, 4, 4) + int64_t emb_channels; // time_embed_dim + int64_t out_channels; // mult * model_channels + std::pair kernel_size; + int dims; + bool skip_t_emb; + bool exchange_temb_dims; + + std::shared_ptr conv_nd(int dims, + int64_t in_channels, + int64_t out_channels, + std::pair kernel_size, + std::pair padding) { + GGML_ASSERT(dims == 2 || dims == 3); + if (dims == 3) { + return std::shared_ptr(new Conv3dnx1x1(in_channels, out_channels, kernel_size.first, 1, padding.first)); + } else { + return std::shared_ptr(new Conv2d(in_channels, out_channels, kernel_size, {1, 1}, padding)); + } + } + +public: + ResBlock(int64_t channels, + int64_t emb_channels, + int64_t out_channels, + std::pair kernel_size = {3, 3}, + int dims = 2, + bool exchange_temb_dims = false, + bool skip_t_emb = false) + : channels(channels), + emb_channels(emb_channels), + out_channels(out_channels), + kernel_size(kernel_size), + dims(dims), + skip_t_emb(skip_t_emb), + exchange_temb_dims(exchange_temb_dims) { + std::pair padding = {kernel_size.first / 2, kernel_size.second / 2}; + blocks["in_layers.0"] = std::shared_ptr(new GroupNorm32(channels)); + // in_layer_1 is nn.SILU() + blocks["in_layers.2"] = conv_nd(dims, channels, out_channels, kernel_size, padding); + + if (!skip_t_emb) { + // emb_layer_0 is nn.SILU() + blocks["emb_layers.1"] = std::shared_ptr(new Linear(emb_channels, out_channels)); + } + + blocks["out_layers.0"] = std::shared_ptr(new GroupNorm32(out_channels)); + // out_layer_1 is nn.SILU() + // out_layer_2 is nn.Dropout(), skip for inference + blocks["out_layers.3"] = conv_nd(dims, out_channels, out_channels, kernel_size, padding); + + if (out_channels != channels) { + blocks["skip_connection"] = conv_nd(dims, channels, out_channels, {1, 1}, {0, 0}); + } + } + + virtual struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, struct ggml_tensor* emb = NULL) { + // For dims==3, we reduce dimension from 5d to 4d by merging h and w, in order not to change ggml + // [N, c, t, h, w] => [N, c, t, h * w] + // x: [N, channels, h, w] if dims == 2 else [N, channels, t, h, w] + // emb: [N, emb_channels] if dims == 2 else [N, t, emb_channels] + auto in_layers_0 = std::dynamic_pointer_cast(blocks["in_layers.0"]); + auto in_layers_2 = std::dynamic_pointer_cast(blocks["in_layers.2"]); + auto out_layers_0 = std::dynamic_pointer_cast(blocks["out_layers.0"]); + auto out_layers_3 = std::dynamic_pointer_cast(blocks["out_layers.3"]); + + if (emb == NULL) { + GGML_ASSERT(skip_t_emb); + } + + // in_layers + auto h = in_layers_0->forward(ctx, x); + h = ggml_silu_inplace(ctx, h); + h = in_layers_2->forward(ctx, h); // [N, out_channels, h, w] if dims == 2 else [N, out_channels, t, h, w] + + // emb_layers + if (!skip_t_emb) { + auto emb_layer_1 = std::dynamic_pointer_cast(blocks["emb_layers.1"]); + + auto emb_out = ggml_silu(ctx, emb); + emb_out = emb_layer_1->forward(ctx, emb_out); // [N, out_channels] if dims == 2 else [N, t, out_channels] + + if (dims == 2) { + emb_out = ggml_reshape_4d(ctx, emb_out, 1, 1, emb_out->ne[0], emb_out->ne[1]); // [N, out_channels, 1, 1] + } else { + emb_out = ggml_reshape_4d(ctx, emb_out, 1, emb_out->ne[0], emb_out->ne[1], emb_out->ne[2]); // [N, t, out_channels, 1] + if (exchange_temb_dims) { + // emb_out = rearrange(emb_out, "b t c ... -> b c t ...") + emb_out = ggml_cont(ctx, ggml_permute(ctx, emb_out, 0, 2, 1, 3)); // [N, out_channels, t, 1] + } + } + + h = ggml_add(ctx, h, emb_out); // [N, out_channels, h, w] if dims == 2 else [N, out_channels, t, h, w] + } + + // out_layers + h = out_layers_0->forward(ctx, h); + h = ggml_silu_inplace(ctx, h); + // dropout, skip for inference + h = out_layers_3->forward(ctx, h); + + // skip connection + if (out_channels != channels) { + auto skip_connection = std::dynamic_pointer_cast(blocks["skip_connection"]); + x = skip_connection->forward(ctx, x); // [N, out_channels, h, w] if dims == 2 else [N, out_channels, t, h, w] + } + + h = ggml_add(ctx, h, x); + return h; // [N, out_channels, h, w] if dims == 2 else [N, out_channels, t, h, w] + } +}; + +class GEGLU : public GGMLBlock { +protected: + int64_t dim_in; + int64_t dim_out; + + void init_params(struct ggml_context* ctx, ggml_type wtype) { + params["proj.weight"] = ggml_new_tensor_2d(ctx, wtype, dim_in, dim_out * 2); + params["proj.bias"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, dim_out * 2); + } + +public: + GEGLU(int64_t dim_in, int64_t dim_out) + : dim_in(dim_in), dim_out(dim_out) {} + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) { + // x: [ne3, ne2, ne1, dim_in] + // return: [ne3, ne2, ne1, dim_out] + struct ggml_tensor* w = params["proj.weight"]; + struct ggml_tensor* b = params["proj.bias"]; + + auto x_w = ggml_view_2d(ctx, w, w->ne[0], w->ne[1] / 2, w->nb[1], 0); // [dim_out, dim_in] + auto x_b = ggml_view_1d(ctx, b, b->ne[0] / 2, 0); // [dim_out, dim_in] + auto gate_w = ggml_view_2d(ctx, w, w->ne[0], w->ne[1] / 2, w->nb[1], w->nb[1] * w->ne[1] / 2); // [dim_out, ] + auto gate_b = ggml_view_1d(ctx, b, b->ne[0] / 2, b->nb[0] * b->ne[0] / 2); // [dim_out, ] + + auto x_in = x; + x = ggml_nn_linear(ctx, x_in, x_w, x_b); // [ne3, ne2, ne1, dim_out] + auto gate = ggml_nn_linear(ctx, x_in, gate_w, gate_b); // [ne3, ne2, ne1, dim_out] + + gate = ggml_gelu_inplace(ctx, gate); + + x = ggml_mul(ctx, x, gate); // [ne3, ne2, ne1, dim_out] + + return x; + } +}; + +class FeedForward : public GGMLBlock { +public: + FeedForward(int64_t dim, + int64_t dim_out, + int64_t mult = 4) { + int64_t inner_dim = dim * mult; + + blocks["net.0"] = std::shared_ptr(new GEGLU(dim, inner_dim)); + // net_1 is nn.Dropout(), skip for inference + blocks["net.2"] = std::shared_ptr(new Linear(inner_dim, dim_out)); + } + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) { + // x: [ne3, ne2, ne1, dim] + // return: [ne3, ne2, ne1, dim_out] + + auto net_0 = std::dynamic_pointer_cast(blocks["net.0"]); + auto net_2 = std::dynamic_pointer_cast(blocks["net.2"]); + + x = net_0->forward(ctx, x); // [ne3, ne2, ne1, inner_dim] + x = net_2->forward(ctx, x); // [ne3, ne2, ne1, dim_out] + return x; + } +}; + +class CrossAttention : public GGMLBlock { +protected: + int64_t query_dim; + int64_t context_dim; + int64_t n_head; + int64_t d_head; + +public: + CrossAttention(int64_t query_dim, + int64_t context_dim, + int64_t n_head, + int64_t d_head) + : n_head(n_head), + d_head(d_head), + query_dim(query_dim), + context_dim(context_dim) { + int64_t inner_dim = d_head * n_head; + + blocks["to_q"] = std::shared_ptr(new Linear(query_dim, inner_dim, false)); + blocks["to_k"] = std::shared_ptr(new Linear(context_dim, inner_dim, false)); + blocks["to_v"] = std::shared_ptr(new Linear(context_dim, inner_dim, false)); + + blocks["to_out.0"] = std::shared_ptr(new Linear(inner_dim, query_dim)); + // to_out_1 is nn.Dropout(), skip for inference + } + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, struct ggml_tensor* context) { + // x: [N, n_token, query_dim] + // context: [N, n_context, context_dim] + // return: [N, n_token, query_dim] + auto to_q = std::dynamic_pointer_cast(blocks["to_q"]); + auto to_k = std::dynamic_pointer_cast(blocks["to_k"]); + auto to_v = std::dynamic_pointer_cast(blocks["to_v"]); + auto to_out_0 = std::dynamic_pointer_cast(blocks["to_out.0"]); + + int64_t n = x->ne[2]; + int64_t n_token = x->ne[1]; + int64_t n_context = context->ne[1]; + int64_t inner_dim = d_head * n_head; + + auto q = to_q->forward(ctx, x); // [N, n_token, inner_dim] + q = ggml_reshape_4d(ctx, q, d_head, n_head, n_token, n); // [N, n_token, n_head, d_head] + q = ggml_cont(ctx, ggml_permute(ctx, q, 0, 2, 1, 3)); // [N, n_head, n_token, d_head] + q = ggml_reshape_3d(ctx, q, d_head, n_token, n_head * n); // [N * n_head, n_token, d_head] + + auto k = to_k->forward(ctx, context); // [N, n_context, inner_dim] + k = ggml_reshape_4d(ctx, k, d_head, n_head, n_context, n); // [N, n_context, n_head, d_head] + k = ggml_cont(ctx, ggml_permute(ctx, k, 0, 2, 1, 3)); // [N, n_head, n_context, d_head] + k = ggml_reshape_3d(ctx, k, d_head, n_context, n_head * n); // [N * n_head, n_context, d_head] + + auto v = to_v->forward(ctx, context); // [N, n_context, inner_dim] + v = ggml_reshape_4d(ctx, v, d_head, n_head, n_context, n); // [N, n_context, n_head, d_head] + v = ggml_cont(ctx, ggml_permute(ctx, v, 1, 2, 0, 3)); // [N, n_head, d_head, n_context] + v = ggml_reshape_3d(ctx, v, n_context, d_head, n_head * n); // [N * n_head, d_head, n_context] + + auto kqv = ggml_nn_attention(ctx, q, k, v, false); // [N * n_head, n_token, d_head] + kqv = ggml_reshape_4d(ctx, kqv, d_head, n_token, n_head, n); + kqv = ggml_cont(ctx, ggml_permute(ctx, kqv, 0, 2, 1, 3)); // [N, n_token, n_head, d_head] + + x = ggml_reshape_3d(ctx, kqv, d_head * n_head, n_token, n); // [N, n_token, inner_dim] + + x = to_out_0->forward(ctx, x); // [N, n_token, query_dim] + return x; + } +}; + +class BasicTransformerBlock : public GGMLBlock { +protected: + int64_t n_head; + int64_t d_head; + bool ff_in; + +public: + BasicTransformerBlock(int64_t dim, + int64_t n_head, + int64_t d_head, + int64_t context_dim, + bool ff_in = false) + : n_head(n_head), d_head(d_head), ff_in(ff_in) { + // disable_self_attn is always False + // disable_temporal_crossattention is always False + // switch_temporal_ca_to_sa is always False + // inner_dim is always None or equal to dim + // gated_ff is always True + blocks["attn1"] = std::shared_ptr(new CrossAttention(dim, dim, n_head, d_head)); + blocks["attn2"] = std::shared_ptr(new CrossAttention(dim, context_dim, n_head, d_head)); + blocks["ff"] = std::shared_ptr(new FeedForward(dim, dim)); + blocks["norm1"] = std::shared_ptr(new LayerNorm(dim)); + blocks["norm2"] = std::shared_ptr(new LayerNorm(dim)); + blocks["norm3"] = std::shared_ptr(new LayerNorm(dim)); + + if (ff_in) { + blocks["norm_in"] = std::shared_ptr(new LayerNorm(dim)); + blocks["ff_in"] = std::shared_ptr(new FeedForward(dim, dim)); + } + } + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, struct ggml_tensor* context) { + // x: [N, n_token, query_dim] + // context: [N, n_context, context_dim] + // return: [N, n_token, query_dim] + + auto attn1 = std::dynamic_pointer_cast(blocks["attn1"]); + auto attn2 = std::dynamic_pointer_cast(blocks["attn2"]); + auto ff = std::dynamic_pointer_cast(blocks["ff"]); + auto norm1 = std::dynamic_pointer_cast(blocks["norm1"]); + auto norm2 = std::dynamic_pointer_cast(blocks["norm2"]); + auto norm3 = std::dynamic_pointer_cast(blocks["norm3"]); + + if (ff_in) { + auto norm_in = std::dynamic_pointer_cast(blocks["norm_in"]); + auto ff_in = std::dynamic_pointer_cast(blocks["ff_in"]); + + auto x_skip = x; + x = norm_in->forward(ctx, x); + x = ff_in->forward(ctx, x); + // self.is_res is always True + x = ggml_add(ctx, x, x_skip); + } + + auto r = x; + x = norm1->forward(ctx, x); + x = attn1->forward(ctx, x, x); // self-attention + x = ggml_add(ctx, x, r); + r = x; + x = norm2->forward(ctx, x); + x = attn2->forward(ctx, x, context); // cross-attention + x = ggml_add(ctx, x, r); + r = x; + x = norm3->forward(ctx, x); + x = ff->forward(ctx, x); + x = ggml_add(ctx, x, r); + + return x; + } +}; + +class SpatialTransformer : public GGMLBlock { +protected: + int64_t in_channels; // mult * model_channels + int64_t n_head; + int64_t d_head; + int64_t depth = 1; // 1 + int64_t context_dim = 768; // hidden_size, 1024 for VERSION_2_x + +public: + SpatialTransformer(int64_t in_channels, + int64_t n_head, + int64_t d_head, + int64_t depth, + int64_t context_dim) + : in_channels(in_channels), + n_head(n_head), + d_head(d_head), + depth(depth), + context_dim(context_dim) { + // We will convert unet transformer linear to conv2d 1x1 when loading the weights, so use_linear is always False + // disable_self_attn is always False + int64_t inner_dim = n_head * d_head; // in_channels + blocks["norm"] = std::shared_ptr(new GroupNorm32(in_channels)); + blocks["proj_in"] = std::shared_ptr(new Conv2d(in_channels, inner_dim, {1, 1})); + + for (int i = 0; i < depth; i++) { + std::string name = "transformer_blocks." + std::to_string(i); + blocks[name] = std::shared_ptr(new BasicTransformerBlock(inner_dim, n_head, d_head, context_dim)); + } + + blocks["proj_out"] = std::shared_ptr(new Conv2d(inner_dim, in_channels, {1, 1})); + } + + virtual struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, struct ggml_tensor* context) { + // x: [N, in_channels, h, w] + // context: [N, max_position(aka n_token), hidden_size(aka context_dim)] + auto norm = std::dynamic_pointer_cast(blocks["norm"]); + auto proj_in = std::dynamic_pointer_cast(blocks["proj_in"]); + auto proj_out = std::dynamic_pointer_cast(blocks["proj_out"]); + + auto x_in = x; + int64_t n = x->ne[3]; + int64_t h = x->ne[1]; + int64_t w = x->ne[0]; + int64_t inner_dim = n_head * d_head; + + x = norm->forward(ctx, x); + x = proj_in->forward(ctx, x); // [N, inner_dim, h, w] + + x = ggml_cont(ctx, ggml_permute(ctx, x, 1, 2, 0, 3)); // [N, h, w, inner_dim] + x = ggml_reshape_3d(ctx, x, inner_dim, w * h, n); // [N, h * w, inner_dim] + + for (int i = 0; i < depth; i++) { + std::string name = "transformer_blocks." + std::to_string(i); + auto transformer_block = std::dynamic_pointer_cast(blocks[name]); + + x = transformer_block->forward(ctx, x, context); + } + + x = ggml_cont(ctx, ggml_permute(ctx, x, 1, 0, 2, 3)); // [N, inner_dim, h * w] + x = ggml_reshape_4d(ctx, x, w, h, inner_dim, n); // [N, inner_dim, h, w] + + // proj_out + x = proj_out->forward(ctx, x); // [N, in_channels, h, w] + + x = ggml_add(ctx, x, x_in); + return x; + } +}; + +class AlphaBlender : public GGMLBlock { +protected: + void init_params(struct ggml_context* ctx, ggml_type wtype) { + params["mix_factor"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1); + } + + float get_alpha() { + // image_only_indicator is always tensor([0.]) and since mix_factor.shape is [1,] + // so learned_with_images is same as learned + float alpha = ggml_backend_tensor_get_f32(params["mix_factor"]); + return sigmoid(alpha); + } + +public: + AlphaBlender() { + // merge_strategy is always learned_with_images + // for inference, we don't need to set alpha + // since mix_factor.shape is [1,], we don't need rearrange using rearrange_pattern + } + + struct ggml_tensor* forward(struct ggml_context* ctx, + struct ggml_tensor* x_spatial, + struct ggml_tensor* x_temporal) { + // image_only_indicator is always tensor([0.]) + float alpha = get_alpha(); + auto x = ggml_add(ctx, + ggml_scale(ctx, x_spatial, alpha), + ggml_scale(ctx, x_temporal, 1.0f - alpha)); + return x; + } +}; + +class VideoResBlock : public ResBlock { +public: + VideoResBlock(int channels, + int emb_channels, + int out_channels, + std::pair kernel_size = {3, 3}, + int64_t video_kernel_size = 3, + int dims = 2) // always 2 + : ResBlock(channels, emb_channels, out_channels, kernel_size, dims) { + blocks["time_stack"] = std::shared_ptr(new ResBlock(out_channels, emb_channels, out_channels, kernel_size, 3, true)); + blocks["time_mixer"] = std::shared_ptr(new AlphaBlender()); + } + + struct ggml_tensor* forward(struct ggml_context* ctx, + struct ggml_tensor* x, + struct ggml_tensor* emb, + int num_video_frames) { + // x: [N, channels, h, w] aka [b*t, channels, h, w] + // emb: [N, emb_channels] aka [b*t, emb_channels] + // image_only_indicator is always tensor([0.]) + auto time_stack = std::dynamic_pointer_cast(blocks["time_stack"]); + auto time_mixer = std::dynamic_pointer_cast(blocks["time_mixer"]); + + x = ResBlock::forward(ctx, x, emb); + + int64_t T = num_video_frames; + int64_t B = x->ne[3] / T; + int64_t C = x->ne[2]; + int64_t H = x->ne[1]; + int64_t W = x->ne[0]; + + x = ggml_reshape_4d(ctx, x, W * H, C, T, B); // (b t) c h w -> b t c (h w) + x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // b t c (h w) -> b c t (h w) + auto x_mix = x; + + emb = ggml_reshape_4d(ctx, emb, emb->ne[0], T, B, emb->ne[3]); // (b t) ... -> b t ... + + x = time_stack->forward(ctx, x, emb); // b t c (h w) + + x = time_mixer->forward(ctx, x_mix, x); // b t c (h w) + + x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // b c t (h w) -> b t c (h w) + x = ggml_reshape_4d(ctx, x, W, H, C, T * B); // b t c (h w) -> (b t) c h w + + return x; + } +}; + +#endif // __COMMON_HPP__ \ No newline at end of file diff --git a/otherarch/sdcpp/control.hpp b/otherarch/sdcpp/control.hpp new file mode 100644 index 000000000..1dcd8430c --- /dev/null +++ b/otherarch/sdcpp/control.hpp @@ -0,0 +1,466 @@ +#ifndef __CONTROL_HPP__ +#define __CONTROL_HPP__ + +#include "common.hpp" +#include "ggml_extend.hpp" +#include "model.h" + +#define CONTROL_NET_GRAPH_SIZE 1536 + +/* + =================================== ControlNet =================================== + Reference: https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/cldm/cldm.py + +*/ +class ControlNetBlock : public GGMLBlock { +protected: + SDVersion version = VERSION_1_x; + // network hparams + int in_channels = 4; + int out_channels = 4; + int hint_channels = 3; + int num_res_blocks = 2; + std::vector attention_resolutions = {4, 2, 1}; + std::vector channel_mult = {1, 2, 4, 4}; + std::vector transformer_depth = {1, 1, 1, 1}; + int time_embed_dim = 1280; // model_channels*4 + int num_heads = 8; + int num_head_channels = -1; // channels // num_heads + int context_dim = 768; // 1024 for VERSION_2_x, 2048 for VERSION_XL + +public: + int model_channels = 320; + int adm_in_channels = 2816; // only for VERSION_XL + + ControlNetBlock(SDVersion version = VERSION_1_x) + : version(version) { + if (version == VERSION_2_x) { + context_dim = 1024; + num_head_channels = 64; + num_heads = -1; + } else if (version == VERSION_XL) { + context_dim = 2048; + attention_resolutions = {4, 2}; + channel_mult = {1, 2, 4}; + transformer_depth = {1, 2, 10}; + num_head_channels = 64; + num_heads = -1; + } else if (version == VERSION_SVD) { + in_channels = 8; + out_channels = 4; + context_dim = 1024; + adm_in_channels = 768; + num_head_channels = 64; + num_heads = -1; + } + + blocks["time_embed.0"] = std::shared_ptr(new Linear(model_channels, time_embed_dim)); + // time_embed_1 is nn.SiLU() + blocks["time_embed.2"] = std::shared_ptr(new Linear(time_embed_dim, time_embed_dim)); + + if (version == VERSION_XL || version == VERSION_SVD) { + blocks["label_emb.0.0"] = std::shared_ptr(new Linear(adm_in_channels, time_embed_dim)); + // label_emb_1 is nn.SiLU() + blocks["label_emb.0.2"] = std::shared_ptr(new Linear(time_embed_dim, time_embed_dim)); + } + + // input_blocks + blocks["input_blocks.0.0"] = std::shared_ptr(new Conv2d(in_channels, model_channels, {3, 3}, {1, 1}, {1, 1})); + + std::vector input_block_chans; + input_block_chans.push_back(model_channels); + int ch = model_channels; + int input_block_idx = 0; + int ds = 1; + + auto get_resblock = [&](int64_t channels, int64_t emb_channels, int64_t out_channels) -> ResBlock* { + return new ResBlock(channels, emb_channels, out_channels); + }; + + auto get_attention_layer = [&](int64_t in_channels, + int64_t n_head, + int64_t d_head, + int64_t depth, + int64_t context_dim) -> SpatialTransformer* { + return new SpatialTransformer(in_channels, n_head, d_head, depth, context_dim); + }; + + auto make_zero_conv = [&](int64_t channels) { + return new Conv2d(channels, channels, {1, 1}); + }; + + blocks["zero_convs.0.0"] = std::shared_ptr(make_zero_conv(model_channels)); + + blocks["input_hint_block.0"] = std::shared_ptr(new Conv2d(hint_channels, 16, {3, 3}, {1, 1}, {1, 1})); + // nn.SiLU() + blocks["input_hint_block.2"] = std::shared_ptr(new Conv2d(16, 16, {3, 3}, {1, 1}, {1, 1})); + // nn.SiLU() + blocks["input_hint_block.4"] = std::shared_ptr(new Conv2d(16, 32, {3, 3}, {2, 2}, {1, 1})); + // nn.SiLU() + blocks["input_hint_block.6"] = std::shared_ptr(new Conv2d(32, 32, {3, 3}, {1, 1}, {1, 1})); + // nn.SiLU() + blocks["input_hint_block.8"] = std::shared_ptr(new Conv2d(32, 96, {3, 3}, {2, 2}, {1, 1})); + // nn.SiLU() + blocks["input_hint_block.10"] = std::shared_ptr(new Conv2d(96, 96, {3, 3}, {1, 1}, {1, 1})); + // nn.SiLU() + blocks["input_hint_block.12"] = std::shared_ptr(new Conv2d(96, 256, {3, 3}, {2, 2}, {1, 1})); + // nn.SiLU() + blocks["input_hint_block.14"] = std::shared_ptr(new Conv2d(256, model_channels, {3, 3}, {1, 1}, {1, 1})); + + size_t len_mults = channel_mult.size(); + for (int i = 0; i < len_mults; i++) { + int mult = channel_mult[i]; + for (int j = 0; j < num_res_blocks; j++) { + input_block_idx += 1; + std::string name = "input_blocks." + std::to_string(input_block_idx) + ".0"; + blocks[name] = std::shared_ptr(get_resblock(ch, time_embed_dim, mult * model_channels)); + + ch = mult * model_channels; + if (std::find(attention_resolutions.begin(), attention_resolutions.end(), ds) != attention_resolutions.end()) { + int n_head = num_heads; + int d_head = ch / num_heads; + if (num_head_channels != -1) { + d_head = num_head_channels; + n_head = ch / d_head; + } + std::string name = "input_blocks." + std::to_string(input_block_idx) + ".1"; + blocks[name] = std::shared_ptr(get_attention_layer(ch, + n_head, + d_head, + transformer_depth[i], + context_dim)); + } + blocks["zero_convs." + std::to_string(input_block_idx) + ".0"] = std::shared_ptr(make_zero_conv(ch)); + input_block_chans.push_back(ch); + } + if (i != len_mults - 1) { + input_block_idx += 1; + std::string name = "input_blocks." + std::to_string(input_block_idx) + ".0"; + blocks[name] = std::shared_ptr(new DownSampleBlock(ch, ch)); + + blocks["zero_convs." + std::to_string(input_block_idx) + ".0"] = std::shared_ptr(make_zero_conv(ch)); + + input_block_chans.push_back(ch); + ds *= 2; + } + } + + // middle blocks + int n_head = num_heads; + int d_head = ch / num_heads; + if (num_head_channels != -1) { + d_head = num_head_channels; + n_head = ch / d_head; + } + blocks["middle_block.0"] = std::shared_ptr(get_resblock(ch, time_embed_dim, ch)); + blocks["middle_block.1"] = std::shared_ptr(get_attention_layer(ch, + n_head, + d_head, + transformer_depth[transformer_depth.size() - 1], + context_dim)); + blocks["middle_block.2"] = std::shared_ptr(get_resblock(ch, time_embed_dim, ch)); + + // middle_block_out + blocks["middle_block_out.0"] = std::shared_ptr(make_zero_conv(ch)); + } + + struct ggml_tensor* resblock_forward(std::string name, + struct ggml_context* ctx, + struct ggml_tensor* x, + struct ggml_tensor* emb) { + auto block = std::dynamic_pointer_cast(blocks[name]); + return block->forward(ctx, x, emb); + } + + struct ggml_tensor* attention_layer_forward(std::string name, + struct ggml_context* ctx, + struct ggml_tensor* x, + struct ggml_tensor* context) { + auto block = std::dynamic_pointer_cast(blocks[name]); + return block->forward(ctx, x, context); + } + + struct ggml_tensor* input_hint_block_forward(struct ggml_context* ctx, + struct ggml_tensor* hint, + struct ggml_tensor* emb, + struct ggml_tensor* context) { + int num_input_blocks = 15; + auto h = hint; + for (int i = 0; i < num_input_blocks; i++) { + if (i % 2 == 0) { + auto block = std::dynamic_pointer_cast(blocks["input_hint_block." + std::to_string(i)]); + + h = block->forward(ctx, h); + } else { + h = ggml_silu_inplace(ctx, h); + } + } + return h; + } + + std::vector forward(struct ggml_context* ctx, + struct ggml_tensor* x, + struct ggml_tensor* hint, + struct ggml_tensor* guided_hint, + struct ggml_tensor* timesteps, + struct ggml_tensor* context, + struct ggml_tensor* y = NULL) { + // x: [N, in_channels, h, w] or [N, in_channels/2, h, w] + // timesteps: [N,] + // context: [N, max_position, hidden_size] or [1, max_position, hidden_size]. for example, [N, 77, 768] + // y: [N, adm_in_channels] or [1, adm_in_channels] + if (context != NULL) { + if (context->ne[2] != x->ne[3]) { + context = ggml_repeat(ctx, context, ggml_new_tensor_3d(ctx, GGML_TYPE_F32, context->ne[0], context->ne[1], x->ne[3])); + } + } + + if (y != NULL) { + if (y->ne[1] != x->ne[3]) { + y = ggml_repeat(ctx, y, ggml_new_tensor_2d(ctx, GGML_TYPE_F32, y->ne[0], x->ne[3])); + } + } + + auto time_embed_0 = std::dynamic_pointer_cast(blocks["time_embed.0"]); + auto time_embed_2 = std::dynamic_pointer_cast(blocks["time_embed.2"]); + auto input_blocks_0_0 = std::dynamic_pointer_cast(blocks["input_blocks.0.0"]); + auto zero_convs_0 = std::dynamic_pointer_cast(blocks["zero_convs.0.0"]); + + auto middle_block_out = std::dynamic_pointer_cast(blocks["middle_block_out.0"]); + + auto t_emb = ggml_nn_timestep_embedding(ctx, timesteps, model_channels); // [N, model_channels] + + auto emb = time_embed_0->forward(ctx, t_emb); + emb = ggml_silu_inplace(ctx, emb); + emb = time_embed_2->forward(ctx, emb); // [N, time_embed_dim] + + // SDXL/SVD + if (y != NULL) { + auto label_embed_0 = std::dynamic_pointer_cast(blocks["label_emb.0.0"]); + auto label_embed_2 = std::dynamic_pointer_cast(blocks["label_emb.0.2"]); + + auto label_emb = label_embed_0->forward(ctx, y); + label_emb = ggml_silu_inplace(ctx, label_emb); + label_emb = label_embed_2->forward(ctx, label_emb); // [N, time_embed_dim] + + emb = ggml_add(ctx, emb, label_emb); // [N, time_embed_dim] + } + + std::vector outs; + + if (guided_hint == NULL) { + guided_hint = input_hint_block_forward(ctx, hint, emb, context); + } + outs.push_back(guided_hint); + + // input_blocks + + // input block 0 + auto h = input_blocks_0_0->forward(ctx, x); + h = ggml_add(ctx, h, guided_hint); + outs.push_back(zero_convs_0->forward(ctx, h)); + + // input block 1-11 + size_t len_mults = channel_mult.size(); + int input_block_idx = 0; + int ds = 1; + for (int i = 0; i < len_mults; i++) { + int mult = channel_mult[i]; + for (int j = 0; j < num_res_blocks; j++) { + input_block_idx += 1; + std::string name = "input_blocks." + std::to_string(input_block_idx) + ".0"; + h = resblock_forward(name, ctx, h, emb); // [N, mult*model_channels, h, w] + if (std::find(attention_resolutions.begin(), attention_resolutions.end(), ds) != attention_resolutions.end()) { + std::string name = "input_blocks." + std::to_string(input_block_idx) + ".1"; + h = attention_layer_forward(name, ctx, h, context); // [N, mult*model_channels, h, w] + } + + auto zero_conv = std::dynamic_pointer_cast(blocks["zero_convs." + std::to_string(input_block_idx) + ".0"]); + + outs.push_back(zero_conv->forward(ctx, h)); + } + if (i != len_mults - 1) { + ds *= 2; + input_block_idx += 1; + + std::string name = "input_blocks." + std::to_string(input_block_idx) + ".0"; + auto block = std::dynamic_pointer_cast(blocks[name]); + + h = block->forward(ctx, h); // [N, mult*model_channels, h/(2^(i+1)), w/(2^(i+1))] + + auto zero_conv = std::dynamic_pointer_cast(blocks["zero_convs." + std::to_string(input_block_idx) + ".0"]); + + outs.push_back(zero_conv->forward(ctx, h)); + } + } + // [N, 4*model_channels, h/8, w/8] + + // middle_block + h = resblock_forward("middle_block.0", ctx, h, emb); // [N, 4*model_channels, h/8, w/8] + h = attention_layer_forward("middle_block.1", ctx, h, context); // [N, 4*model_channels, h/8, w/8] + h = resblock_forward("middle_block.2", ctx, h, emb); // [N, 4*model_channels, h/8, w/8] + + // out + outs.push_back(middle_block_out->forward(ctx, h)); + return outs; + } +}; + +struct ControlNet : public GGMLModule { + SDVersion version = VERSION_1_x; + ControlNetBlock control_net; + + ggml_backend_buffer_t control_buffer = NULL; // keep control output tensors in backend memory + ggml_context* control_ctx = NULL; + std::vector controls; // (12 input block outputs, 1 middle block output) SD 1.5 + struct ggml_tensor* guided_hint = NULL; // guided_hint cache, for faster inference + bool guided_hint_cached = false; + + ControlNet(ggml_backend_t backend, + ggml_type wtype, + SDVersion version = VERSION_1_x) + : GGMLModule(backend, wtype), control_net(version) { + control_net.init(params_ctx, wtype); + } + + ~ControlNet() { + free_control_ctx(); + } + + void alloc_control_ctx(std::vector outs) { + struct ggml_init_params params; + params.mem_size = static_cast(outs.size() * ggml_tensor_overhead()) + 1024 * 1024; + params.mem_buffer = NULL; + params.no_alloc = true; + control_ctx = ggml_init(params); + + controls.resize(outs.size() - 1); + + size_t control_buffer_size = 0; + + guided_hint = ggml_dup_tensor(control_ctx, outs[0]); + control_buffer_size += ggml_nbytes(guided_hint); + + for (int i = 0; i < outs.size() - 1; i++) { + controls[i] = ggml_dup_tensor(control_ctx, outs[i + 1]); + control_buffer_size += ggml_nbytes(controls[i]); + } + + control_buffer = ggml_backend_alloc_ctx_tensors(control_ctx, backend); + + LOG_DEBUG("control buffer size %.2fMB", control_buffer_size * 1.f / 1024.f / 1024.f); + } + + void free_control_ctx() { + if (control_buffer != NULL) { + ggml_backend_buffer_free(control_buffer); + control_buffer = NULL; + } + if (control_ctx != NULL) { + ggml_free(control_ctx); + control_ctx = NULL; + } + guided_hint = NULL; + guided_hint_cached = false; + controls.clear(); + } + + std::string get_desc() { + return "control_net"; + } + + size_t get_params_mem_size() { + return control_net.get_params_mem_size(); + } + + size_t get_params_num() { + return control_net.get_params_num(); + } + + void get_param_tensors(std::map& tensors, const std::string prefix) { + control_net.get_param_tensors(tensors, prefix); + } + + struct ggml_cgraph* build_graph(struct ggml_tensor* x, + struct ggml_tensor* hint, + struct ggml_tensor* timesteps, + struct ggml_tensor* context, + struct ggml_tensor* y = NULL) { + struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, CONTROL_NET_GRAPH_SIZE, false); + + x = to_backend(x); + if (guided_hint_cached) { + hint = NULL; + } else { + hint = to_backend(hint); + } + context = to_backend(context); + y = to_backend(y); + timesteps = to_backend(timesteps); + + auto outs = control_net.forward(compute_ctx, + x, + hint, + guided_hint_cached ? guided_hint : NULL, + timesteps, + context, + y); + + if (control_ctx == NULL) { + alloc_control_ctx(outs); + } + + ggml_build_forward_expand(gf, ggml_cpy(compute_ctx, outs[0], guided_hint)); + for (int i = 0; i < outs.size() - 1; i++) { + ggml_build_forward_expand(gf, ggml_cpy(compute_ctx, outs[i + 1], controls[i])); + } + + return gf; + } + + void compute(int n_threads, + struct ggml_tensor* x, + struct ggml_tensor* hint, + struct ggml_tensor* timesteps, + struct ggml_tensor* context, + struct ggml_tensor* y, + struct ggml_tensor** output = NULL, + struct ggml_context* output_ctx = NULL) { + // x: [N, in_channels, h, w] + // timesteps: [N, ] + // context: [N, max_position, hidden_size]([N, 77, 768]) or [1, max_position, hidden_size] + // y: [N, adm_in_channels] or [1, adm_in_channels] + auto get_graph = [&]() -> struct ggml_cgraph* { + return build_graph(x, hint, timesteps, context, y); + }; + + GGMLModule::compute(get_graph, n_threads, false, output, output_ctx); + guided_hint_cached = true; + } + + bool load_from_file(const std::string& file_path) { + LOG_INFO("loading control net from '%s'", file_path.c_str()); + alloc_params_buffer(); + std::map tensors; + control_net.get_param_tensors(tensors); + std::set ignore_tensors; + + ModelLoader model_loader; + if (!model_loader.init_from_file(file_path)) { + LOG_ERROR("init control net model loader from file failed: '%s'", file_path.c_str()); + return false; + } + + bool success = model_loader.load_tensors(tensors, backend, ignore_tensors); + + if (!success) { + LOG_ERROR("load control net tensors from model loader failed"); + return false; + } + + LOG_INFO("control net model loaded"); + return success; + } +}; + +#endif // __CONTROL_HPP__ \ No newline at end of file diff --git a/otherarch/sdcpp/denoiser.hpp b/otherarch/sdcpp/denoiser.hpp new file mode 100644 index 000000000..fd934540c --- /dev/null +++ b/otherarch/sdcpp/denoiser.hpp @@ -0,0 +1,125 @@ +#ifndef __DENOISER_HPP__ +#define __DENOISER_HPP__ + +#include "ggml_extend.hpp" + +/*================================================= CompVisDenoiser ==================================================*/ + +// Ref: https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/external.py + +#define TIMESTEPS 1000 + +struct SigmaSchedule { + float alphas_cumprod[TIMESTEPS]; + float sigmas[TIMESTEPS]; + float log_sigmas[TIMESTEPS]; + + virtual std::vector get_sigmas(uint32_t n) = 0; + + float sigma_to_t(float sigma) { + float log_sigma = std::log(sigma); + std::vector dists; + dists.reserve(TIMESTEPS); + for (float log_sigma_val : log_sigmas) { + dists.push_back(log_sigma - log_sigma_val); + } + + int low_idx = 0; + for (size_t i = 0; i < TIMESTEPS; i++) { + if (dists[i] >= 0) { + low_idx++; + } + } + low_idx = std::min(std::max(low_idx - 1, 0), TIMESTEPS - 2); + int high_idx = low_idx + 1; + + float low = log_sigmas[low_idx]; + float high = log_sigmas[high_idx]; + float w = (low - log_sigma) / (low - high); + w = std::max(0.f, std::min(1.f, w)); + float t = (1.0f - w) * low_idx + w * high_idx; + + return t; + } + + float t_to_sigma(float t) { + int low_idx = static_cast(std::floor(t)); + int high_idx = static_cast(std::ceil(t)); + float w = t - static_cast(low_idx); + float log_sigma = (1.0f - w) * log_sigmas[low_idx] + w * log_sigmas[high_idx]; + return std::exp(log_sigma); + } +}; + +struct DiscreteSchedule : SigmaSchedule { + std::vector get_sigmas(uint32_t n) { + std::vector result; + + int t_max = TIMESTEPS - 1; + + if (n == 0) { + return result; + } else if (n == 1) { + result.push_back(t_to_sigma((float)t_max)); + result.push_back(0); + return result; + } + + float step = static_cast(t_max) / static_cast(n - 1); + for (uint32_t i = 0; i < n; ++i) { + float t = t_max - step * i; + result.push_back(t_to_sigma(t)); + } + result.push_back(0); + return result; + } +}; + +struct KarrasSchedule : SigmaSchedule { + std::vector get_sigmas(uint32_t n) { + // These *COULD* be function arguments here, + // but does anybody ever bother to touch them? + float sigma_min = 0.1f; + float sigma_max = 10.f; + float rho = 7.f; + + std::vector result(n + 1); + + float min_inv_rho = pow(sigma_min, (1.f / rho)); + float max_inv_rho = pow(sigma_max, (1.f / rho)); + for (uint32_t i = 0; i < n; i++) { + // Eq. (5) from Karras et al 2022 + result[i] = pow(max_inv_rho + (float)i / ((float)n - 1.f) * (min_inv_rho - max_inv_rho), rho); + } + result[n] = 0.; + return result; + } +}; + +struct Denoiser { + std::shared_ptr schedule = std::make_shared(); + virtual std::vector get_scalings(float sigma) = 0; +}; + +struct CompVisDenoiser : public Denoiser { + float sigma_data = 1.0f; + + std::vector get_scalings(float sigma) { + float c_out = -sigma; + float c_in = 1.0f / std::sqrt(sigma * sigma + sigma_data * sigma_data); + return {c_out, c_in}; + } +}; + +struct CompVisVDenoiser : public Denoiser { + float sigma_data = 1.0f; + + std::vector get_scalings(float sigma) { + float c_skip = sigma_data * sigma_data / (sigma * sigma + sigma_data * sigma_data); + float c_out = -sigma * sigma_data / std::sqrt(sigma * sigma + sigma_data * sigma_data); + float c_in = 1.0f / std::sqrt(sigma * sigma + sigma_data * sigma_data); + return {c_skip, c_out, c_in}; + } +}; + +#endif // __DENOISER_HPP__ \ No newline at end of file diff --git a/otherarch/sdcpp/esrgan.hpp b/otherarch/sdcpp/esrgan.hpp new file mode 100644 index 000000000..5b6796064 --- /dev/null +++ b/otherarch/sdcpp/esrgan.hpp @@ -0,0 +1,206 @@ +#ifndef __ESRGAN_HPP__ +#define __ESRGAN_HPP__ + +#include "ggml_extend.hpp" +#include "model.h" + +/* + =================================== ESRGAN =================================== + References: + https://github.com/xinntao/Real-ESRGAN/blob/master/inference_realesrgan.py + https://github.com/XPixelGroup/BasicSR/blob/v1.4.2/basicsr/archs/rrdbnet_arch.py + +*/ + +class ResidualDenseBlock : public GGMLBlock { +protected: + int num_feat; + int num_grow_ch; + +public: + ResidualDenseBlock(int num_feat = 64, int num_grow_ch = 32) + : num_feat(num_feat), num_grow_ch(num_grow_ch) { + blocks["conv1"] = std::shared_ptr(new Conv2d(num_feat, num_grow_ch, {3, 3}, {1, 1}, {1, 1})); + blocks["conv2"] = std::shared_ptr(new Conv2d(num_feat + num_grow_ch, num_grow_ch, {3, 3}, {1, 1}, {1, 1})); + blocks["conv3"] = std::shared_ptr(new Conv2d(num_feat + 2 * num_grow_ch, num_grow_ch, {3, 3}, {1, 1}, {1, 1})); + blocks["conv4"] = std::shared_ptr(new Conv2d(num_feat + 3 * num_grow_ch, num_grow_ch, {3, 3}, {1, 1}, {1, 1})); + blocks["conv5"] = std::shared_ptr(new Conv2d(num_feat + 4 * num_grow_ch, num_feat, {3, 3}, {1, 1}, {1, 1})); + } + + struct ggml_tensor* lrelu(struct ggml_context* ctx, struct ggml_tensor* x) { + return ggml_leaky_relu(ctx, x, 0.2f, true); + } + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) { + // x: [n, num_feat, h, w] + // return: [n, num_feat, h, w] + + auto conv1 = std::dynamic_pointer_cast(blocks["conv1"]); + auto conv2 = std::dynamic_pointer_cast(blocks["conv2"]); + auto conv3 = std::dynamic_pointer_cast(blocks["conv3"]); + auto conv4 = std::dynamic_pointer_cast(blocks["conv4"]); + auto conv5 = std::dynamic_pointer_cast(blocks["conv5"]); + + auto x1 = lrelu(ctx, conv1->forward(ctx, x)); + auto x_cat = ggml_concat(ctx, x, x1); + auto x2 = lrelu(ctx, conv2->forward(ctx, x_cat)); + x_cat = ggml_concat(ctx, x_cat, x2); + auto x3 = lrelu(ctx, conv3->forward(ctx, x_cat)); + x_cat = ggml_concat(ctx, x_cat, x3); + auto x4 = lrelu(ctx, conv4->forward(ctx, x_cat)); + x_cat = ggml_concat(ctx, x_cat, x4); + auto x5 = conv5->forward(ctx, x_cat); + + x5 = ggml_add(ctx, ggml_scale(ctx, x5, 0.2f), x); + return x5; + } +}; + +class RRDB : public GGMLBlock { +public: + RRDB(int num_feat, int num_grow_ch = 32) { + blocks["rdb1"] = std::shared_ptr(new ResidualDenseBlock(num_feat, num_grow_ch)); + blocks["rdb2"] = std::shared_ptr(new ResidualDenseBlock(num_feat, num_grow_ch)); + blocks["rdb3"] = std::shared_ptr(new ResidualDenseBlock(num_feat, num_grow_ch)); + } + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) { + // x: [n, num_feat, h, w] + // return: [n, num_feat, h, w] + + auto rdb1 = std::dynamic_pointer_cast(blocks["rdb1"]); + auto rdb2 = std::dynamic_pointer_cast(blocks["rdb2"]); + auto rdb3 = std::dynamic_pointer_cast(blocks["rdb3"]); + + auto out = rdb1->forward(ctx, x); + out = rdb2->forward(ctx, out); + out = rdb3->forward(ctx, out); + + out = ggml_add(ctx, ggml_scale(ctx, out, 0.2f), x); + return out; + } +}; + +class RRDBNet : public GGMLBlock { +protected: + int scale = 4; // default RealESRGAN_x4plus_anime_6B + int num_block = 6; // default RealESRGAN_x4plus_anime_6B + int num_in_ch = 3; + int num_out_ch = 3; + int num_feat = 64; // default RealESRGAN_x4plus_anime_6B + int num_grow_ch = 32; // default RealESRGAN_x4plus_anime_6B + +public: + RRDBNet() { + blocks["conv_first"] = std::shared_ptr(new Conv2d(num_in_ch, num_feat, {3, 3}, {1, 1}, {1, 1})); + for (int i = 0; i < num_block; i++) { + std::string name = "body." + std::to_string(i); + blocks[name] = std::shared_ptr(new RRDB(num_feat, num_grow_ch)); + } + blocks["conv_body"] = std::shared_ptr(new Conv2d(num_feat, num_feat, {3, 3}, {1, 1}, {1, 1})); + // upsample + blocks["conv_up1"] = std::shared_ptr(new Conv2d(num_feat, num_feat, {3, 3}, {1, 1}, {1, 1})); + blocks["conv_up2"] = std::shared_ptr(new Conv2d(num_feat, num_feat, {3, 3}, {1, 1}, {1, 1})); + blocks["conv_hr"] = std::shared_ptr(new Conv2d(num_feat, num_feat, {3, 3}, {1, 1}, {1, 1})); + blocks["conv_last"] = std::shared_ptr(new Conv2d(num_feat, num_out_ch, {3, 3}, {1, 1}, {1, 1})); + } + + struct ggml_tensor* lrelu(struct ggml_context* ctx, struct ggml_tensor* x) { + return ggml_leaky_relu(ctx, x, 0.2f, true); + } + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) { + // x: [n, num_in_ch, h, w] + // return: [n, num_out_ch, h*4, w*4] + auto conv_first = std::dynamic_pointer_cast(blocks["conv_first"]); + auto conv_body = std::dynamic_pointer_cast(blocks["conv_body"]); + auto conv_up1 = std::dynamic_pointer_cast(blocks["conv_up1"]); + auto conv_up2 = std::dynamic_pointer_cast(blocks["conv_up2"]); + auto conv_hr = std::dynamic_pointer_cast(blocks["conv_hr"]); + auto conv_last = std::dynamic_pointer_cast(blocks["conv_last"]); + + auto feat = conv_first->forward(ctx, x); + auto body_feat = feat; + for (int i = 0; i < num_block; i++) { + std::string name = "body." + std::to_string(i); + auto block = std::dynamic_pointer_cast(blocks[name]); + + body_feat = block->forward(ctx, body_feat); + } + body_feat = conv_body->forward(ctx, body_feat); + feat = ggml_add(ctx, feat, body_feat); + // upsample + feat = lrelu(ctx, conv_up1->forward(ctx, ggml_upscale(ctx, feat, 2))); + feat = lrelu(ctx, conv_up2->forward(ctx, ggml_upscale(ctx, feat, 2))); + auto out = conv_last->forward(ctx, lrelu(ctx, conv_hr->forward(ctx, feat))); + return out; + } +}; + +struct ESRGAN : public GGMLModule { + RRDBNet rrdb_net; + int scale = 4; + int tile_size = 128; // avoid cuda OOM for 4gb VRAM + + ESRGAN(ggml_backend_t backend, + ggml_type wtype) + : GGMLModule(backend, wtype) { + rrdb_net.init(params_ctx, wtype); + } + + std::string get_desc() { + return "esrgan"; + } + + size_t get_params_mem_size() { + return rrdb_net.get_params_mem_size(); + } + + size_t get_params_num() { + return rrdb_net.get_params_num(); + } + + bool load_from_file(const std::string& file_path) { + LOG_INFO("loading esrgan from '%s'", file_path.c_str()); + + alloc_params_buffer(); + std::map esrgan_tensors; + rrdb_net.get_param_tensors(esrgan_tensors); + + ModelLoader model_loader; + if (!model_loader.init_from_file(file_path)) { + LOG_ERROR("init esrgan model loader from file failed: '%s'", file_path.c_str()); + return false; + } + + bool success = model_loader.load_tensors(esrgan_tensors, backend); + + if (!success) { + LOG_ERROR("load esrgan tensors from model loader failed"); + return false; + } + + LOG_INFO("esrgan model loaded"); + return success; + } + + struct ggml_cgraph* build_graph(struct ggml_tensor* x) { + struct ggml_cgraph* gf = ggml_new_graph(compute_ctx); + x = to_backend(x); + struct ggml_tensor* out = rrdb_net.forward(compute_ctx, x); + ggml_build_forward_expand(gf, out); + return gf; + } + + void compute(const int n_threads, + struct ggml_tensor* x, + ggml_tensor** output, + ggml_context* output_ctx = NULL) { + auto get_graph = [&]() -> struct ggml_cgraph* { + return build_graph(x); + }; + GGMLModule::compute(get_graph, n_threads, false, output, output_ctx); + } +}; + +#endif // __ESRGAN_HPP__ \ No newline at end of file diff --git a/otherarch/sdcpp/ggml_extend.hpp b/otherarch/sdcpp/ggml_extend.hpp new file mode 100644 index 000000000..580ec3e92 --- /dev/null +++ b/otherarch/sdcpp/ggml_extend.hpp @@ -0,0 +1,1255 @@ +#ifndef __GGML_EXTEND_HPP__ +#define __GGML_EXTEND_HPP__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ggml-alloc.h" +#include "ggml-backend.h" +#include "ggml.h" + +#ifdef SD_USE_CUBLAS +#include "ggml-cuda.h" +#endif + +#ifdef SD_USE_METAL +#include "ggml-metal.h" +#endif + +#include "rng.hpp" +#include "util.h" + +#define EPS 1e-05f + +#ifndef __STATIC_INLINE__ +#define __STATIC_INLINE__ static inline +#endif + +__STATIC_INLINE__ void ggml_log_callback_default(ggml_log_level level, const char* text, void* user_data) { + (void)level; + (void)user_data; + fputs(text, stderr); + fflush(stderr); +} + +__STATIC_INLINE__ void ggml_tensor_set_f32_randn(struct ggml_tensor* tensor, std::shared_ptr rng) { + uint32_t n = (uint32_t)ggml_nelements(tensor); + std::vector random_numbers = rng->randn(n); + for (uint32_t i = 0; i < n; i++) { + ggml_set_f32_1d(tensor, i, random_numbers[i]); + } +} + +// set tensor[i, j, k, l] +// set tensor[l] +// set tensor[k, l] +// set tensor[j, k, l] +__STATIC_INLINE__ void ggml_tensor_set_f32(struct ggml_tensor* tensor, float value, int l, int k = 0, int j = 0, int i = 0) { + GGML_ASSERT(tensor->nb[0] == sizeof(float)); + *(float*)((char*)(tensor->data) + i * tensor->nb[3] + j * tensor->nb[2] + k * tensor->nb[1] + l * tensor->nb[0]) = value; +} + +__STATIC_INLINE__ float ggml_tensor_get_f32(const ggml_tensor* tensor, int l, int k = 0, int j = 0, int i = 0) { + if (tensor->buffer != NULL) { + float value; + ggml_backend_tensor_get(tensor, &value, i * tensor->nb[3] + j * tensor->nb[2] + k * tensor->nb[1] + l * tensor->nb[0], sizeof(float)); + return value; + } + GGML_ASSERT(tensor->nb[0] == sizeof(float)); + return *(float*)((char*)(tensor->data) + i * tensor->nb[3] + j * tensor->nb[2] + k * tensor->nb[1] + l * tensor->nb[0]); +} + +__STATIC_INLINE__ ggml_fp16_t ggml_tensor_get_f16(const ggml_tensor* tensor, int l, int k = 0, int j = 0, int i = 0) { + GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t)); + return *(ggml_fp16_t*)((char*)(tensor->data) + i * tensor->nb[3] + j * tensor->nb[2] + k * tensor->nb[1] + l * tensor->nb[0]); +} + +__STATIC_INLINE__ void print_ggml_tensor(struct ggml_tensor* tensor, bool shape_only = false) { + printf("shape(%zu, %zu, %zu, %zu)\n", tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]); + fflush(stdout); + if (shape_only) { + return; + } + int range = 3; + for (int i = 0; i < tensor->ne[3]; i++) { + if (i >= range && i + range < tensor->ne[3]) { + continue; + } + for (int j = 0; j < tensor->ne[2]; j++) { + if (j >= range && j + range < tensor->ne[2]) { + continue; + } + for (int k = 0; k < tensor->ne[1]; k++) { + if (k >= range && k + range < tensor->ne[1]) { + continue; + } + for (int l = 0; l < tensor->ne[0]; l++) { + if (l >= range && l + range < tensor->ne[0]) { + continue; + } + if (tensor->type == GGML_TYPE_F32) { + printf(" [%d, %d, %d, %d] = %f\n", i, j, k, l, ggml_tensor_get_f32(tensor, l, k, j, i)); + } else if (tensor->type == GGML_TYPE_F16) { + printf(" [%d, %d, %d, %d] = %i\n", i, j, k, l, ggml_tensor_get_f16(tensor, l, k, j, i)); + } + fflush(stdout); + } + } + } + } +} + +__STATIC_INLINE__ ggml_tensor* load_tensor_from_file(ggml_context* ctx, const std::string& file_path) { + std::ifstream file(file_path, std::ios::binary); + if (!file.is_open()) { + LOG_ERROR("failed to open '%s'", file_path.c_str()); + return NULL; + } + int32_t n_dims; + int32_t length; + int32_t ttype; + + file.read(reinterpret_cast(&n_dims), sizeof(n_dims)); + file.read(reinterpret_cast(&length), sizeof(length)); + file.read(reinterpret_cast(&ttype), sizeof(ttype)); + + if (file.eof()) { + LOG_ERROR("incomplete file '%s'", file_path.c_str()); + return NULL; + } + + int32_t nelements = 1; + int32_t ne[4] = {1, 1, 1, 1}; + for (int i = 0; i < n_dims; ++i) { + file.read(reinterpret_cast(&ne[i]), sizeof(ne[i])); + nelements *= ne[i]; + } + std::string name(length, 0); + file.read(&name[0], length); + ggml_tensor* tensor = ggml_new_tensor_4d(ctx, (ggml_type)ttype, ne[0], ne[1], ne[2], ne[3]); + const size_t bpe = ggml_type_size(ggml_type(ttype)); + file.read(reinterpret_cast(tensor->data), ggml_nbytes(tensor)); + return tensor; +} + +// __STATIC_INLINE__ void save_tensor_to_file(const std::string& file_name, ggml_tensor* tensor, const std::string & name) { +// std::string file_name_ = file_name + ".tensor"; +// std::string name_ = name; +// std::ofstream file("./" + file_name_, std::ios::binary); +// file.write(reinterpret_cast(&tensor->n_dims), sizeof(tensor->n_dims)); +// int len = (int)name_.size(); +// file.write(reinterpret_cast(&len), sizeof(len)); +// int ttype = (int)tensor->type; +// file.write(reinterpret_cast(&ttype), sizeof(ttype)); +// for (int i = 0; i < tensor->n_dims; ++i) { +// int ne_ = (int) tensor->ne[i]; +// file.write(reinterpret_cast(&ne_), sizeof(ne_)); +// } +// file.write(&name_[0], len); +// char* data = nullptr; +// file.write((char*)tensor->data, ggml_nbytes(tensor)); +// file.close(); +// } + +__STATIC_INLINE__ void copy_ggml_tensor(struct ggml_tensor* dst, struct ggml_tensor* src) { + if (dst->type == src->type) { + dst->nb[0] = src->nb[0]; + dst->nb[1] = src->nb[1]; + dst->nb[2] = src->nb[2]; + dst->nb[3] = src->nb[3]; + + memcpy(((char*)dst->data), ((char*)src->data), ggml_nbytes(dst)); + return; + } + struct ggml_init_params params; + params.mem_size = 10 * 1024 * 1024; // for padding + params.mem_buffer = NULL; + params.no_alloc = false; + struct ggml_context* ctx = ggml_init(params); + if (!ctx) { + LOG_ERROR("ggml_init() failed"); + return; + } + ggml_tensor* final = ggml_cpy(ctx, src, dst); + + struct ggml_cgraph* graph = ggml_new_graph(ctx); + ggml_build_forward_expand(graph, final); + ggml_graph_compute_with_ctx(ctx, graph, 1); + ggml_free(ctx); +} + +__STATIC_INLINE__ float sigmoid(float x) { + return 1 / (1.0f + expf(-x)); +} + +// SPECIAL OPERATIONS WITH TENSORS + +__STATIC_INLINE__ uint8_t* sd_tensor_to_image(struct ggml_tensor* input) { + int64_t width = input->ne[0]; + int64_t height = input->ne[1]; + int64_t channels = input->ne[2]; + GGML_ASSERT(channels == 3 && input->type == GGML_TYPE_F32); + uint8_t* image_data = (uint8_t*)malloc(width * height * channels); + for (int iy = 0; iy < height; iy++) { + for (int ix = 0; ix < width; ix++) { + for (int k = 0; k < channels; k++) { + float value = ggml_tensor_get_f32(input, ix, iy, k); + *(image_data + iy * width * channels + ix * channels + k) = (uint8_t)(value * 255.0f); + } + } + } + return image_data; +} + +__STATIC_INLINE__ void sd_image_to_tensor(const uint8_t* image_data, + struct ggml_tensor* output, + bool scale = true) { + int64_t width = output->ne[0]; + int64_t height = output->ne[1]; + int64_t channels = output->ne[2]; + GGML_ASSERT(channels == 3 && output->type == GGML_TYPE_F32); + for (int iy = 0; iy < height; iy++) { + for (int ix = 0; ix < width; ix++) { + for (int k = 0; k < channels; k++) { + float value = *(image_data + iy * width * channels + ix * channels + k); + if (scale) { + value /= 255.f; + } + ggml_tensor_set_f32(output, value, ix, iy, k); + } + } + } +} + +__STATIC_INLINE__ void sd_image_f32_to_tensor(const float* image_data, + struct ggml_tensor* output, + bool scale = true) { + int64_t width = output->ne[0]; + int64_t height = output->ne[1]; + int64_t channels = output->ne[2]; + GGML_ASSERT(channels == 3 && output->type == GGML_TYPE_F32); + for (int iy = 0; iy < height; iy++) { + for (int ix = 0; ix < width; ix++) { + for (int k = 0; k < channels; k++) { + float value = *(image_data + iy * width * channels + ix * channels + k); + if (scale) { + value /= 255.f; + } + ggml_tensor_set_f32(output, value, ix, iy, k); + } + } + } +} + +__STATIC_INLINE__ void ggml_split_tensor_2d(struct ggml_tensor* input, + struct ggml_tensor* output, + int x, + int y) { + int64_t width = output->ne[0]; + int64_t height = output->ne[1]; + int64_t channels = output->ne[2]; + GGML_ASSERT(input->type == GGML_TYPE_F32 && output->type == GGML_TYPE_F32); + for (int iy = 0; iy < height; iy++) { + for (int ix = 0; ix < width; ix++) { + for (int k = 0; k < channels; k++) { + float value = ggml_tensor_get_f32(input, ix + x, iy + y, k); + ggml_tensor_set_f32(output, value, ix, iy, k); + } + } + } +} + +__STATIC_INLINE__ void ggml_merge_tensor_2d(struct ggml_tensor* input, + struct ggml_tensor* output, + int x, + int y, + int overlap) { + int64_t width = input->ne[0]; + int64_t height = input->ne[1]; + int64_t channels = input->ne[2]; + GGML_ASSERT(input->type == GGML_TYPE_F32 && output->type == GGML_TYPE_F32); + for (int iy = 0; iy < height; iy++) { + for (int ix = 0; ix < width; ix++) { + for (int k = 0; k < channels; k++) { + float new_value = ggml_tensor_get_f32(input, ix, iy, k); + if (overlap > 0) { // blend colors in overlapped area + float old_value = ggml_tensor_get_f32(output, x + ix, y + iy, k); + if (x > 0 && ix < overlap) { // in overlapped horizontal + ggml_tensor_set_f32(output, old_value + (new_value - old_value) * (ix / (1.0f * overlap)), x + ix, y + iy, k); + continue; + } + if (y > 0 && iy < overlap) { // in overlapped vertical + ggml_tensor_set_f32(output, old_value + (new_value - old_value) * (iy / (1.0f * overlap)), x + ix, y + iy, k); + continue; + } + } + ggml_tensor_set_f32(output, new_value, x + ix, y + iy, k); + } + } + } +} + +__STATIC_INLINE__ float ggml_tensor_mean(struct ggml_tensor* src) { + float mean = 0.0f; + int64_t nelements = ggml_nelements(src); + float* data = (float*)src->data; + for (int i = 0; i < nelements; i++) { + mean += data[i] / nelements * 1.0f; + } + return mean; +} + +// a = a+b +__STATIC_INLINE__ void ggml_tensor_add(struct ggml_tensor* a, struct ggml_tensor* b) { + GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b)); + int64_t nelements = ggml_nelements(a); + float* vec_a = (float*)a->data; + float* vec_b = (float*)b->data; + for (int i = 0; i < nelements; i++) { + vec_a[i] = vec_a[i] + vec_b[i]; + } +} + +__STATIC_INLINE__ void ggml_tensor_scale(struct ggml_tensor* src, float scale) { + int64_t nelements = ggml_nelements(src); + float* data = (float*)src->data; + for (int i = 0; i < nelements; i++) { + data[i] = data[i] * scale; + } +} + +__STATIC_INLINE__ void ggml_tensor_clamp(struct ggml_tensor* src, float min, float max) { + int64_t nelements = ggml_nelements(src); + float* data = (float*)src->data; + for (int i = 0; i < nelements; i++) { + float val = data[i]; + data[i] = val < min ? min : (val > max ? max : val); + } +} + +// convert values from [0, 1] to [-1, 1] +__STATIC_INLINE__ void ggml_tensor_scale_input(struct ggml_tensor* src) { + int64_t nelements = ggml_nelements(src); + float* data = (float*)src->data; + for (int i = 0; i < nelements; i++) { + float val = data[i]; + data[i] = val * 2.0f - 1.0f; + } +} + +// convert values from [-1, 1] to [0, 1] +__STATIC_INLINE__ void ggml_tensor_scale_output(struct ggml_tensor* src) { + int64_t nelements = ggml_nelements(src); + float* data = (float*)src->data; + for (int i = 0; i < nelements; i++) { + float val = data[i]; + data[i] = (val + 1.0f) * 0.5f; + } +} + +typedef std::function on_tile_process; + +// Tiling +__STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const int scale, const int tile_size, const float tile_overlap_factor, on_tile_process on_processing) { + int input_width = (int)input->ne[0]; + int input_height = (int)input->ne[1]; + int output_width = (int)output->ne[0]; + int output_height = (int)output->ne[1]; + GGML_ASSERT(input_width % 2 == 0 && input_height % 2 == 0 && output_width % 2 == 0 && output_height % 2 == 0); // should be multiple of 2 + + int tile_overlap = (int32_t)(tile_size * tile_overlap_factor); + int non_tile_overlap = tile_size - tile_overlap; + + struct ggml_init_params params = {}; + params.mem_size += tile_size * tile_size * input->ne[2] * sizeof(float); // input chunk + params.mem_size += (tile_size * scale) * (tile_size * scale) * output->ne[2] * sizeof(float); // output chunk + params.mem_size += 3 * ggml_tensor_overhead(); + params.mem_buffer = NULL; + params.no_alloc = false; + + LOG_DEBUG("tile work buffer size: %.2f MB", params.mem_size / 1024.f / 1024.f); + + // draft context + struct ggml_context* tiles_ctx = ggml_init(params); + if (!tiles_ctx) { + LOG_ERROR("ggml_init() failed"); + return; + } + + // tiling + ggml_tensor* input_tile = ggml_new_tensor_4d(tiles_ctx, GGML_TYPE_F32, tile_size, tile_size, input->ne[2], 1); + ggml_tensor* output_tile = ggml_new_tensor_4d(tiles_ctx, GGML_TYPE_F32, tile_size * scale, tile_size * scale, output->ne[2], 1); + on_processing(input_tile, NULL, true); + int num_tiles = (input_width * input_height) / (non_tile_overlap * non_tile_overlap); + LOG_INFO("processing %i tiles", num_tiles); + pretty_progress(1, num_tiles, 0.0f); + int tile_count = 1; + bool last_y = false, last_x = false; + float last_time = 0.0f; + for (int y = 0; y < input_height && !last_y; y += non_tile_overlap) { + if (y + tile_size >= input_height) { + y = input_height - tile_size; + last_y = true; + } + for (int x = 0; x < input_width && !last_x; x += non_tile_overlap) { + if (x + tile_size >= input_width) { + x = input_width - tile_size; + last_x = true; + } + int64_t t1 = ggml_time_ms(); + ggml_split_tensor_2d(input, input_tile, x, y); + on_processing(input_tile, output_tile, false); + ggml_merge_tensor_2d(output_tile, output, x * scale, y * scale, tile_overlap * scale); + int64_t t2 = ggml_time_ms(); + last_time = (t2 - t1) / 1000.0f; + pretty_progress(tile_count, num_tiles, last_time); + tile_count++; + } + last_x = false; + } + if (tile_count < num_tiles) { + pretty_progress(num_tiles, num_tiles, last_time); + } +} + +__STATIC_INLINE__ struct ggml_tensor* ggml_group_norm_32(struct ggml_context* ctx, + struct ggml_tensor* a) { + return ggml_group_norm(ctx, a, 32); +} + +__STATIC_INLINE__ struct ggml_tensor* ggml_nn_linear(struct ggml_context* ctx, + struct ggml_tensor* x, + struct ggml_tensor* w, + struct ggml_tensor* b) { + x = ggml_mul_mat(ctx, w, x); + if (b != NULL) { + x = ggml_add(ctx, x, b); + } + return x; +} + +// w: [OC,IC, KH, KW] +// x: [N, IC, IH, IW] +// b: [OC,] +// result: [N, OC, OH, OW] +__STATIC_INLINE__ struct ggml_tensor* ggml_nn_conv_2d(struct ggml_context* ctx, + struct ggml_tensor* x, + struct ggml_tensor* w, + struct ggml_tensor* b, + int s0 = 1, + int s1 = 1, + int p0 = 0, + int p1 = 0, + int d0 = 1, + int d1 = 1) { + x = ggml_conv_2d(ctx, w, x, s0, s1, p0, p1, d0, d1); + if (b != NULL) { + b = ggml_reshape_4d(ctx, b, 1, 1, b->ne[0], 1); + // b = ggml_repeat(ctx, b, x); + x = ggml_add(ctx, x, b); + } + return x; +} + +// w: [OC,IC, KD, 1 * 1] +// x: [N, IC, IH, IW] +// b: [OC,] +// result: [N, OC, OH, OW] +__STATIC_INLINE__ struct ggml_tensor* ggml_nn_conv_3d_nx1x1_bak(struct ggml_context* ctx, + struct ggml_tensor* x, + struct ggml_tensor* w, + struct ggml_tensor* b, + int s2 = 1, + int p2 = 1, + int d2 = 1) { + GGML_ASSERT(w->ne[0] == 1); + // timesteps = x.shape[0] + // x = rearrange(x, "(b t) c h w -> b c t h w", t=timesteps) + // x = conv3d(x) + // return rearrange(x, "b c t h w -> (b t) c h w") + int64_t T = x->ne[3]; + int64_t B = x->ne[3] / T; + int64_t C = x->ne[2]; + int64_t H = x->ne[1]; + int64_t W = x->ne[0]; + + x = ggml_reshape_4d(ctx, x, W * H, C, T, B); // (b t) c h w -> b t c (h w) + x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // b t c (h w) -> b c t (h w) + x = ggml_conv_2d(ctx, w, x, 1, s2, 0, p2, 1, d2); // [B, OC, T, OH * OW] + if (b != NULL) { + b = ggml_reshape_4d(ctx, b, 1, 1, b->ne[0], 1); + x = ggml_add(ctx, x, b); + } + x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // b c t (h w) -> b t c (h w) + x = ggml_reshape_4d(ctx, x, W, H, C, T * B); // b t c (h w) -> (b t) c h w + return x; // [B*T, OC, OH, OW] +} + +// w: [OC,IC, KD, 1 * 1] +// x: [N, IC, ID, IH*IW] +// b: [OC,] +// result: [N, OC, OD, OH*OW] +__STATIC_INLINE__ struct ggml_tensor* ggml_nn_conv_3d_nx1x1(struct ggml_context* ctx, + struct ggml_tensor* x, + struct ggml_tensor* w, + struct ggml_tensor* b, + int s2 = 1, + int p2 = 1, + int d2 = 1) { + x = ggml_conv_2d(ctx, w, x, 1, s2, 0, p2, 1, d2); // [N, OC, T, OH * OW] + if (b != NULL) { + b = ggml_reshape_4d(ctx, b, 1, 1, b->ne[0], 1); + x = ggml_add(ctx, x, b); + } + return x; // [N, OC, T, OH * OW] +} + +// q: [N * n_head, n_token, d_head] +// k: [N * n_head, n_k, d_head] +// v: [N * n_head, d_head, n_k] +// return: [N * n_head, n_token, d_head] +__STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention(struct ggml_context* ctx, + struct ggml_tensor* q, + struct ggml_tensor* k, + struct ggml_tensor* v, + bool mask = false) { +#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUBLAS) && !defined(SD_USE_METAL) + struct ggml_tensor* kqv = ggml_flash_attn(ctx, q, k, v, false); // [N * n_head, n_token, d_head] +#else + float d_head = (float)q->ne[0]; + + struct ggml_tensor* kq = ggml_mul_mat(ctx, k, q); // [N * n_head, n_token, n_k] + kq = ggml_scale_inplace(ctx, kq, 1.0f / sqrt(d_head)); + if (mask) { + kq = ggml_diag_mask_inf_inplace(ctx, kq, 0); + } + kq = ggml_soft_max_inplace(ctx, kq); + + struct ggml_tensor* kqv = ggml_mul_mat(ctx, v, kq); // [N * n_head, n_token, d_head] +#endif + return kqv; +} + +__STATIC_INLINE__ struct ggml_tensor* ggml_nn_layer_norm(struct ggml_context* ctx, + struct ggml_tensor* x, + struct ggml_tensor* w, + struct ggml_tensor* b, + float eps = EPS) { + x = ggml_norm(ctx, x, eps); + if (w != NULL) { + x = ggml_mul(ctx, x, w); + if (b != NULL) { + x = ggml_add(ctx, x, b); + } + } + return x; +} + +__STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ctx, + struct ggml_tensor* x, + struct ggml_tensor* w, + struct ggml_tensor* b, + int num_groups = 32) { + if (ggml_n_dims(x) >= 3 && w != NULL && b != NULL) { + w = ggml_reshape_4d(ctx, w, 1, 1, w->ne[0], 1); + b = ggml_reshape_4d(ctx, b, 1, 1, b->ne[0], 1); + } + + x = ggml_group_norm(ctx, x, num_groups); + if (w != NULL && b != NULL) { + x = ggml_mul(ctx, x, w); + // b = ggml_repeat(ctx, b, x); + x = ggml_add(ctx, x, b); + } + return x; +} + +__STATIC_INLINE__ void ggml_backend_tensor_get_and_sync(ggml_backend_t backend, const struct ggml_tensor* tensor, void* data, size_t offset, size_t size) { +#ifdef SD_USE_CUBLAS + if (!ggml_backend_is_cpu(backend)) { + ggml_backend_tensor_get_async(backend, tensor, data, offset, size); + ggml_backend_synchronize(backend); + } else { + ggml_backend_tensor_get(tensor, data, offset, size); + } +#else + ggml_backend_tensor_get(tensor, data, offset, size); +#endif +} + +__STATIC_INLINE__ float ggml_backend_tensor_get_f32(ggml_tensor* tensor) { + GGML_ASSERT(tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_F16); + float value; + if (tensor->type == GGML_TYPE_F32) { + ggml_backend_tensor_get(tensor, &value, 0, sizeof(value)); + } else { // GGML_TYPE_F16 + ggml_fp16_t f16_value; + ggml_backend_tensor_get(tensor, &f16_value, 0, sizeof(f16_value)); + value = ggml_fp16_to_fp32(f16_value); + } + return value; +} + +__STATIC_INLINE__ struct ggml_tensor* vector_to_ggml_tensor(struct ggml_context* ctx, + const std::vector& vec) { + struct ggml_tensor* t = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, vec.size()); + memcpy(t->data, (const void*)vec.data(), ggml_nbytes(t)); + return t; +} + +__STATIC_INLINE__ struct ggml_tensor* vector_to_ggml_tensor_i32(struct ggml_context* ctx, + const std::vector& vec) { + struct ggml_tensor* t = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, vec.size()); + memcpy(t->data, (const void*)vec.data(), ggml_nbytes(t)); + return t; +} + +__STATIC_INLINE__ std::vector arange(float start, float end, float step = 1.f) { + std::vector result; + + for (float value = start; value < end; value += step) { + result.push_back(value); + } + + return result; +} + +// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151 +__STATIC_INLINE__ std::vector timestep_embedding(std::vector timesteps, + int dim, + int max_period = 10000) { + // timesteps: [N,] + // embedding: [N, dim] + size_t N = timesteps.size(); + int acutual_dim = dim; + if (dim % 2 != 0) { + acutual_dim = dim + 1; + } + std::vector embedding(N * acutual_dim, 0.f); + int half = dim / 2; + std::vector freqs(half); + for (int i = 0; i < half; ++i) { + freqs[i] = (float)std::exp(-std::log(max_period) * i / half); + } + for (int i = 0; i < N; ++i) { + for (int j = 0; j < half; ++j) { + float arg = timesteps[i] * freqs[j]; + embedding[i * acutual_dim + j] = std::cos(arg); + embedding[i * acutual_dim + j + half] = std::sin(arg); + } + } + return embedding; +} + +__STATIC_INLINE__ void set_timestep_embedding(std::vector timesteps, + struct ggml_tensor* embedding, + int dim, + int max_period = 10000) { + std::vector embedding_vec = timestep_embedding(timesteps, dim, max_period); + memcpy(((char*)embedding->data), ((char*)embedding_vec.data()), ggml_nbytes(embedding)); +} + +__STATIC_INLINE__ struct ggml_tensor* new_timestep_embedding(struct ggml_context* ctx, + std::vector timesteps, + int dim, + int max_period = 10000) { + // timesteps: [N,] + // embedding: [N, dim] + std::vector embedding_vec = timestep_embedding(timesteps, dim, max_period); + int acutual_dim = dim; + if (dim % 2 != 0) { + acutual_dim = dim + 1; + } + struct ggml_tensor* embedding = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, acutual_dim, timesteps.size()); + if (embedding->data != NULL) { + memcpy(((char*)embedding->data), ((char*)embedding_vec.data()), ggml_nbytes(embedding)); + } else { + ggml_backend_tensor_set(embedding, embedding_vec.data(), 0, ggml_nbytes(embedding)); + } + return embedding; +} + +__STATIC_INLINE__ struct ggml_tensor* ggml_nn_timestep_embedding( + struct ggml_context* ctx, + struct ggml_tensor* timesteps, + int dim, + int max_period = 10000) { + return ggml_timestep_embedding(ctx, timesteps, dim, max_period); +} + +// struct GGMLComputeGraph { +// virtual void init(struct ggml_context* ctx, ggml_type wtype) = 0; +// virtual std::string get_desc() = 0; +// virtual size_t get_params_mem_size() = 0; +// virtual size_t get_params_num() = 0; +// virtual struct ggml_cgraph* get_ggml_cgraph() = 0; +// }; + +#define MAX_PARAMS_TENSOR_NUM 10240 +#define MAX_GRAPH_SIZE 10240 + +struct GGMLModule { +protected: + typedef std::function get_graph_cb_t; + + struct ggml_context* params_ctx = NULL; + ggml_backend_buffer_t params_buffer = NULL; + + struct ggml_context* compute_ctx = NULL; + struct ggml_gallocr* compute_allocr = NULL; + + std::map backend_tensor_data_map; + + ggml_type wtype = GGML_TYPE_F32; + ggml_backend_t backend = NULL; + + void alloc_params_ctx() { + struct ggml_init_params params; + params.mem_size = static_cast(MAX_PARAMS_TENSOR_NUM * ggml_tensor_overhead()); + params.mem_buffer = NULL; + params.no_alloc = true; + + params_ctx = ggml_init(params); + GGML_ASSERT(params_ctx != NULL); + } + + void free_params_ctx() { + if (params_ctx != NULL) { + ggml_free(params_ctx); + params_ctx = NULL; + } + } + + void alloc_compute_ctx() { + struct ggml_init_params params; + params.mem_size = static_cast(ggml_tensor_overhead() * MAX_GRAPH_SIZE + ggml_graph_overhead()); + params.mem_buffer = NULL; + params.no_alloc = true; + + compute_ctx = ggml_init(params); + GGML_ASSERT(compute_ctx != NULL); + } + + void free_compute_ctx() { + if (compute_ctx != NULL) { + ggml_free(compute_ctx); + compute_ctx = NULL; + } + } + + bool alloc_compute_buffer(get_graph_cb_t get_graph) { + if (compute_allocr != NULL) { + return true; + } + reset_compute_ctx(); + struct ggml_cgraph* gf = get_graph(); + backend_tensor_data_map.clear(); + compute_allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(backend)); + + if (!ggml_gallocr_reserve(compute_allocr, gf)) { + // failed to allocate the compute buffer + LOG_ERROR("%s: failed to allocate the compute buffer\n", get_desc().c_str()); + free_compute_buffer(); + return false; + } + + // compute the required memory + size_t compute_buffer_size = ggml_gallocr_get_buffer_size(compute_allocr, 0); + LOG_DEBUG("%s compute buffer size: %.2f MB", get_desc().c_str(), compute_buffer_size / 1024.0 / 1024.0); + return true; + } + + void cpy_data_to_backend_tensor() { + for (auto& kv : backend_tensor_data_map) { + auto tensor = kv.first; + auto data = kv.second; + + ggml_backend_tensor_set(tensor, data, 0, ggml_nbytes(tensor)); + } + + backend_tensor_data_map.clear(); + } + +public: + virtual size_t get_params_mem_size() = 0; + virtual size_t get_params_num() = 0; + virtual std::string get_desc() = 0; + + GGMLModule(ggml_backend_t backend, ggml_type wtype = GGML_TYPE_F32) + : backend(backend), wtype(wtype) { + alloc_params_ctx(); + } + + virtual ~GGMLModule() { + free_params_buffer(); + free_compute_buffer(); + free_params_ctx(); + free_compute_ctx(); + } + + void reset_compute_ctx() { + free_compute_ctx(); + alloc_compute_ctx(); + } + + bool alloc_params_buffer() { + size_t num_tensors = get_params_num(); + params_buffer = ggml_backend_alloc_ctx_tensors(params_ctx, backend); + if (params_buffer == NULL) { + LOG_ERROR("%s alloc params backend buffer failed", get_desc().c_str()); + return false; + } + size_t params_buffer_size = ggml_backend_buffer_get_size(params_buffer); + LOG_DEBUG("%s params backend buffer size = % 6.2f MB (%i tensors)", + get_desc().c_str(), params_buffer_size / (1024.0 * 1024.0), num_tensors); + return true; + } + + void free_params_buffer() { + if (params_buffer != NULL) { + ggml_backend_buffer_free(params_buffer); + params_buffer = NULL; + } + } + + void free_compute_buffer() { + if (compute_allocr != NULL) { + ggml_gallocr_free(compute_allocr); + compute_allocr = NULL; + } + } + + // do copy after alloc graph + void set_backend_tensor_data(struct ggml_tensor* tensor, const void* data) { + backend_tensor_data_map[tensor] = data; + } + + struct ggml_tensor* to_backend(struct ggml_tensor* tensor) { + GGML_ASSERT(compute_ctx != NULL); + if (tensor == NULL) { + return NULL; + } + // it's performing a compute, check if backend isn't cpu + if (!ggml_backend_is_cpu(backend) && tensor->backend == GGML_BACKEND_CPU) { + // pass input tensors to gpu memory + auto backend_tensor = ggml_dup_tensor(compute_ctx, tensor); + + set_backend_tensor_data(backend_tensor, tensor->data); + return backend_tensor; + } else { + return tensor; + } + } + + void compute(get_graph_cb_t get_graph, + int n_threads, + bool free_compute_buffer_immediately = true, + struct ggml_tensor** output = NULL, + struct ggml_context* output_ctx = NULL) { + alloc_compute_buffer(get_graph); + reset_compute_ctx(); + struct ggml_cgraph* gf = get_graph(); + + GGML_ASSERT(ggml_gallocr_alloc_graph(compute_allocr, gf)); + + cpy_data_to_backend_tensor(); + + if (ggml_backend_is_cpu(backend)) { + ggml_backend_cpu_set_n_threads(backend, n_threads); + } + +#ifdef SD_USE_METAL + if (ggml_backend_is_metal(backend)) { + ggml_backend_metal_set_n_cb(backend, n_threads); + } +#endif + + ggml_backend_graph_compute(backend, gf); + +#ifdef GGML_PERF + ggml_graph_print(gf); +#endif + + if (output != NULL) { + auto result = gf->nodes[gf->n_nodes - 1]; + if (*output == NULL && output_ctx != NULL) { + *output = ggml_dup_tensor(output_ctx, result); + } + if (*output != NULL) { + ggml_backend_tensor_get_and_sync(backend, result, (*output)->data, 0, ggml_nbytes(*output)); + } + } + + if (free_compute_buffer_immediately) { + free_compute_buffer(); + } + } +}; + +class GGMLBlock { +private: + static char temp_buffer[1024 * 1024 * 10]; + ggml_context* get_temp_ctx() { + struct ggml_init_params params; + params.mem_size = sizeof(temp_buffer); + params.mem_buffer = temp_buffer; + params.no_alloc = true; + + ggml_context* temp_ctx = ggml_init(params); + GGML_ASSERT(temp_ctx != NULL); + return temp_ctx; + } + +protected: + typedef std::unordered_map ParameterMap; + typedef std::unordered_map> GGMLBlockMap; + GGMLBlockMap blocks; + ParameterMap params; + + void init_blocks(struct ggml_context* ctx, ggml_type wtype) { + for (auto& pair : blocks) { + auto& block = pair.second; + + block->init(ctx, wtype); + } + } + + virtual void init_params(struct ggml_context* ctx, ggml_type wtype) {} + +public: + void init(struct ggml_context* ctx, ggml_type wtype) { + init_blocks(ctx, wtype); + init_params(ctx, wtype); + } + + std::tuple get_params_info(ggml_type wtype) { + ggml_context* temp_ctx = get_temp_ctx(); + init(temp_ctx, wtype); + size_t num_tensors = get_params_num(); + size_t mem_size = get_params_mem_size(); + return {num_tensors, mem_size}; + } + + size_t get_params_num() { + size_t num_tensors = params.size(); + for (auto& pair : blocks) { + auto& block = pair.second; + + num_tensors += block->get_params_num(); + } + return num_tensors; + }; + + size_t get_params_mem_size() { + size_t mem_size = 0; + for (auto& pair : blocks) { + auto& block = pair.second; + + mem_size += block->get_params_mem_size(); + } + + for (auto& pair : params) { + mem_size += ggml_nbytes(pair.second); + } + + return mem_size; + } + + void get_param_tensors(std::map& tensors, std::string prefix = "") { + if (prefix.size() > 0) { + prefix = prefix + "."; + } + for (auto& pair : blocks) { + auto& block = pair.second; + + block->get_param_tensors(tensors, prefix + pair.first); + } + + for (auto& pair : params) { + struct ggml_tensor* param = pair.second; + + tensors[prefix + pair.first] = pair.second; + } + } +}; + +class UnaryBlock : public GGMLBlock { +public: + virtual struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) = 0; +}; + +class Linear : public UnaryBlock { +protected: + int64_t in_features; + int64_t out_features; + bool bias; + + void init_params(struct ggml_context* ctx, ggml_type wtype) { + params["weight"] = ggml_new_tensor_2d(ctx, wtype, in_features, out_features); + if (bias) { + params["bias"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, out_features); + } + } + +public: + Linear(int64_t in_features, + int64_t out_features, + bool bias = true) + : in_features(in_features), + out_features(out_features), + bias(bias) {} + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) { + struct ggml_tensor* w = params["weight"]; + struct ggml_tensor* b = NULL; + if (bias) { + b = params["bias"]; + } + return ggml_nn_linear(ctx, x, w, b); + } +}; + +class Conv2d : public UnaryBlock { +protected: + int64_t in_channels; + int64_t out_channels; + std::pair kernel_size; + std::pair stride; + std::pair padding; + std::pair dilation; + bool bias; + + void init_params(struct ggml_context* ctx, ggml_type wtype) { + params["weight"] = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, kernel_size.second, kernel_size.first, in_channels, out_channels); + if (bias) { + params["bias"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, out_channels); + } + } + +public: + Conv2d(int64_t in_channels, + int64_t out_channels, + std::pair kernel_size, + std::pair stride = {1, 1}, + std::pair padding = {0, 0}, + std::pair dilation = {1, 1}, + bool bias = true) + : in_channels(in_channels), + out_channels(out_channels), + kernel_size(kernel_size), + stride(stride), + padding(padding), + dilation(dilation), + bias(bias) {} + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) { + struct ggml_tensor* w = params["weight"]; + struct ggml_tensor* b = NULL; + if (bias) { + b = params["bias"]; + } + return ggml_nn_conv_2d(ctx, x, w, b, stride.second, stride.first, padding.second, padding.first, dilation.second, dilation.first); + } +}; + +class Conv3dnx1x1 : public UnaryBlock { +protected: + int64_t in_channels; + int64_t out_channels; + int64_t kernel_size; + int64_t stride; + int64_t padding; + int64_t dilation; + bool bias; + + void init_params(struct ggml_context* ctx, ggml_type wtype) { + params["weight"] = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, 1, kernel_size, in_channels, out_channels); // 5d => 4d + if (bias) { + params["bias"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, out_channels); + } + } + +public: + Conv3dnx1x1(int64_t in_channels, + int64_t out_channels, + int64_t kernel_size, + int64_t stride = 1, + int64_t padding = 0, + int64_t dilation = 1, + bool bias = true) + : in_channels(in_channels), + out_channels(out_channels), + kernel_size(kernel_size), + stride(stride), + padding(padding), + dilation(dilation), + bias(bias) {} + + // x: [N, IC, ID, IH*IW] + // result: [N, OC, OD, OH*OW] + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) { + struct ggml_tensor* w = params["weight"]; + struct ggml_tensor* b = NULL; + if (bias) { + b = params["bias"]; + } + return ggml_nn_conv_3d_nx1x1(ctx, x, w, b, stride, padding, dilation); + } +}; + +class LayerNorm : public UnaryBlock { +protected: + int64_t normalized_shape; + float eps; + bool elementwise_affine; + bool bias; + + void init_params(struct ggml_context* ctx, ggml_type wtype) { + if (elementwise_affine) { + params["weight"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, normalized_shape); + if (bias) { + params["bias"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, normalized_shape); + } + } + } + +public: + LayerNorm(int64_t normalized_shape, + float eps = 1e-05f, + bool elementwise_affine = true, + bool bias = true) + : normalized_shape(normalized_shape), + eps(eps), + elementwise_affine(elementwise_affine), + bias(bias) {} + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) { + struct ggml_tensor* w = NULL; + struct ggml_tensor* b = NULL; + + if (elementwise_affine) { + w = params["weight"]; + if (bias) { + b = params["bias"]; + } + } + return ggml_nn_layer_norm(ctx, x, w, b, eps); + } +}; + +class GroupNorm : public GGMLBlock { +protected: + int64_t num_groups; + int64_t num_channels; + float eps; + bool affine; + + void init_params(struct ggml_context* ctx, ggml_type wtype) { + if (affine) { + params["weight"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, num_channels); + params["bias"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, num_channels); + } + } + +public: + GroupNorm(int64_t num_groups, + int64_t num_channels, + float eps = 1e-05f, + bool affine = true) + : num_groups(num_groups), + num_channels(num_channels), + eps(eps), + affine(affine) {} + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) { + struct ggml_tensor* w = NULL; + struct ggml_tensor* b = NULL; + if (affine) { + w = params["weight"]; + b = params["bias"]; + } + return ggml_nn_group_norm(ctx, x, w, b, num_groups); + } +}; + +class GroupNorm32 : public GroupNorm { +public: + GroupNorm32(int64_t num_channels) + : GroupNorm(32, num_channels, 1e-06f) {} +}; + +class MultiheadAttention : public GGMLBlock { +protected: + int64_t embed_dim; + int64_t n_head; + bool bias; + bool mask; + +public: + MultiheadAttention(int64_t embed_dim, + int64_t n_head, + bool bias = true) + : embed_dim(embed_dim), + n_head(n_head), + bias(bias) { + blocks["q_proj"] = std::shared_ptr(new Linear(embed_dim, embed_dim, bias)); + blocks["k_proj"] = std::shared_ptr(new Linear(embed_dim, embed_dim, bias)); + blocks["v_proj"] = std::shared_ptr(new Linear(embed_dim, embed_dim, bias)); + blocks["out_proj"] = std::shared_ptr(new Linear(embed_dim, embed_dim, bias)); + } + + // x: [N, n_token, embed_dim] + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, bool mask = false) { + auto q_proj = std::dynamic_pointer_cast(blocks["q_proj"]); + auto k_proj = std::dynamic_pointer_cast(blocks["k_proj"]); + auto v_proj = std::dynamic_pointer_cast(blocks["v_proj"]); + auto out_proj = std::dynamic_pointer_cast(blocks["out_proj"]); + + int64_t N = x->ne[2]; + int64_t n_token = x->ne[1]; + int64_t d_head = embed_dim / n_head; + + struct ggml_tensor* q = q_proj->forward(ctx, x); + q = ggml_reshape_4d(ctx, q, d_head, n_head, n_token, N); // [N, n_token, n_head, d_head] + q = ggml_cont(ctx, ggml_permute(ctx, q, 0, 2, 1, 3)); // [N, n_head, n_token, d_head] + q = ggml_reshape_3d(ctx, q, d_head, n_token, n_head * N); // [N * n_head, n_token, d_head] + + struct ggml_tensor* k = k_proj->forward(ctx, x); + k = ggml_reshape_4d(ctx, k, d_head, n_head, n_token, N); // [N, n_token, n_head, d_head] + k = ggml_cont(ctx, ggml_permute(ctx, k, 0, 2, 1, 3)); // [N, n_head, n_token, d_head] + k = ggml_reshape_3d(ctx, k, d_head, n_token, n_head); // [N * n_head, n_token, d_head] + + struct ggml_tensor* v = v_proj->forward(ctx, x); + v = ggml_reshape_4d(ctx, v, d_head, n_head, n_token, N); // [N, n_token, n_head, d_head] + v = ggml_cont(ctx, ggml_permute(ctx, v, 1, 2, 0, 3)); // [N, n_head, d_head, n_token] + v = ggml_reshape_3d(ctx, v, n_token, d_head, n_head * N); // [N * n_head, d_head, n_token] + + struct ggml_tensor* kqv = ggml_nn_attention(ctx, q, k, v, mask); // [N * n_head, n_token, d_head] + + kqv = ggml_reshape_4d(ctx, kqv, d_head, n_token, n_head, N); + kqv = ggml_cont(ctx, ggml_permute(ctx, kqv, 0, 2, 1, 3)); // [N, n_token, n_head, d_head] + + x = ggml_reshape_2d(ctx, kqv, d_head * n_head, n_token * N); // [N * n_token, d_head * n_head] + + x = out_proj->forward(ctx, x); + return x; + } +}; + +#endif // __GGML_EXTEND__HPP__ \ No newline at end of file diff --git a/otherarch/sdcpp/lora.hpp b/otherarch/sdcpp/lora.hpp new file mode 100644 index 000000000..734635b66 --- /dev/null +++ b/otherarch/sdcpp/lora.hpp @@ -0,0 +1,164 @@ +#ifndef __LORA_HPP__ +#define __LORA_HPP__ + +#include "ggml_extend.hpp" + +#define LORA_GRAPH_SIZE 10240 + +struct LoraModel : public GGMLModule { + float multiplier = 1.0f; + std::map lora_tensors; + std::string file_path; + ModelLoader model_loader; + bool load_failed = false; + + LoraModel(ggml_backend_t backend, + ggml_type wtype, + const std::string file_path = "") + : file_path(file_path), GGMLModule(backend, wtype) { + if (!model_loader.init_from_file(file_path)) { + load_failed = true; + } + } + + std::string get_desc() { + return "lora"; + } + + size_t get_params_num() { + return LORA_GRAPH_SIZE; + } + + size_t get_params_mem_size() { + return model_loader.get_params_mem_size(NULL); + } + + bool load_from_file() { + LOG_INFO("loading LoRA from '%s'", file_path.c_str()); + + if (load_failed) { + LOG_ERROR("init lora model loader from file failed: '%s'", file_path.c_str()); + return false; + } + + bool dry_run = true; + auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool { + const std::string& name = tensor_storage.name; + + if (dry_run) { + struct ggml_tensor* real = ggml_new_tensor(params_ctx, + tensor_storage.type, + tensor_storage.n_dims, + tensor_storage.ne); + lora_tensors[name] = real; + } else { + auto real = lora_tensors[name]; + *dst_tensor = real; + } + return true; + }; + + model_loader.load_tensors(on_new_tensor_cb, backend); + alloc_params_buffer(); + + dry_run = false; + model_loader.load_tensors(on_new_tensor_cb, backend); + + LOG_DEBUG("finished loaded lora"); + return true; + } + + struct ggml_cgraph* build_graph(std::map model_tensors) { + struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, LORA_GRAPH_SIZE, false); + + std::set applied_lora_tensors; + for (auto it : model_tensors) { + std::string k_tensor = it.first; + struct ggml_tensor* weight = model_tensors[it.first]; + + size_t k_pos = k_tensor.find(".weight"); + if (k_pos == std::string::npos) { + continue; + } + k_tensor = k_tensor.substr(0, k_pos); + replace_all_chars(k_tensor, '.', '_'); + std::string lora_up_name = "lora." + k_tensor + ".lora_up.weight"; + std::string lora_down_name = "lora." + k_tensor + ".lora_down.weight"; + std::string alpha_name = "lora." + k_tensor + ".alpha"; + std::string scale_name = "lora." + k_tensor + ".scale"; + + ggml_tensor* lora_up = NULL; + ggml_tensor* lora_down = NULL; + + if (lora_tensors.find(lora_up_name) != lora_tensors.end()) { + lora_up = lora_tensors[lora_up_name]; + } + + if (lora_tensors.find(lora_down_name) != lora_tensors.end()) { + lora_down = lora_tensors[lora_down_name]; + } + + if (lora_up == NULL || lora_down == NULL) { + continue; + } + + applied_lora_tensors.insert(lora_up_name); + applied_lora_tensors.insert(lora_down_name); + applied_lora_tensors.insert(alpha_name); + applied_lora_tensors.insert(scale_name); + + // calc_cale + int64_t dim = lora_down->ne[ggml_n_dims(lora_down) - 1]; + float scale_value = 1.0f; + if (lora_tensors.find(scale_name) != lora_tensors.end()) { + scale_value = ggml_backend_tensor_get_f32(lora_tensors[scale_name]); + } else if (lora_tensors.find(alpha_name) != lora_tensors.end()) { + float alpha = ggml_backend_tensor_get_f32(lora_tensors[alpha_name]); + scale_value = alpha / dim; + } + scale_value *= multiplier; + + // flat lora tensors to multiply it + int64_t lora_up_rows = lora_up->ne[ggml_n_dims(lora_up) - 1]; + lora_up = ggml_reshape_2d(compute_ctx, lora_up, ggml_nelements(lora_up) / lora_up_rows, lora_up_rows); + int64_t lora_down_rows = lora_down->ne[ggml_n_dims(lora_down) - 1]; + lora_down = ggml_reshape_2d(compute_ctx, lora_down, ggml_nelements(lora_down) / lora_down_rows, lora_down_rows); + + // ggml_mul_mat requires tensor b transposed + lora_down = ggml_cont(compute_ctx, ggml_transpose(compute_ctx, lora_down)); + struct ggml_tensor* updown = ggml_mul_mat(compute_ctx, lora_up, lora_down); + updown = ggml_cont(compute_ctx, ggml_transpose(compute_ctx, updown)); + updown = ggml_reshape(compute_ctx, updown, weight); + GGML_ASSERT(ggml_nelements(updown) == ggml_nelements(weight)); + updown = ggml_scale_inplace(compute_ctx, updown, scale_value); + ggml_tensor* final_weight; + // if (weight->type != GGML_TYPE_F32 && weight->type != GGML_TYPE_F16) { + // final_weight = ggml_new_tensor(compute_ctx, GGML_TYPE_F32, weight->n_dims, weight->ne); + // final_weight = ggml_cpy_inplace(compute_ctx, weight, final_weight); + // final_weight = ggml_add_inplace(compute_ctx, final_weight, updown); + // final_weight = ggml_cpy_inplace(compute_ctx, final_weight, weight); + // } else { + // final_weight = ggml_add_inplace(compute_ctx, weight, updown); + // } + final_weight = ggml_add_inplace(compute_ctx, weight, updown); // apply directly + ggml_build_forward_expand(gf, final_weight); + } + + for (auto& kv : lora_tensors) { + if (applied_lora_tensors.find(kv.first) == applied_lora_tensors.end()) { + LOG_WARN("unused lora tensor %s", kv.first.c_str()); + } + } + + return gf; + } + + void apply(std::map model_tensors, int n_threads) { + auto get_graph = [&]() -> struct ggml_cgraph* { + return build_graph(model_tensors); + }; + GGMLModule::compute(get_graph, n_threads, true); + } +}; + +#endif // __LORA_HPP__ \ No newline at end of file diff --git a/otherarch/sdcpp/main.cpp b/otherarch/sdcpp/main.cpp new file mode 100644 index 000000000..6b74b6923 --- /dev/null +++ b/otherarch/sdcpp/main.cpp @@ -0,0 +1,778 @@ +#include +#include +#include +#include +#include +#include +#include + +// #include "preprocessing.hpp" +#include "stable-diffusion.h" + +#define STB_IMAGE_IMPLEMENTATION +#include "stb_image.h" + +#define STB_IMAGE_WRITE_IMPLEMENTATION +#define STB_IMAGE_WRITE_STATIC +#include "stb_image_write.h" + +const char* rng_type_to_str[] = { + "std_default", + "cuda", +}; + +// Names of the sampler method, same order as enum sample_method in stable-diffusion.h +const char* sample_method_str[] = { + "euler_a", + "euler", + "heun", + "dpm2", + "dpm++2s_a", + "dpm++2m", + "dpm++2mv2", + "lcm", +}; + +// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h +const char* schedule_str[] = { + "default", + "discrete", + "karras", +}; + +const char* modes_str[] = { + "txt2img", + "img2img", + "img2vid", + "convert", +}; + +enum SDMode { + TXT2IMG, + IMG2IMG, + IMG2VID, + CONVERT, + MODE_COUNT +}; + +struct SDParams { + int n_threads = -1; + SDMode mode = TXT2IMG; + + std::string model_path; + std::string vae_path; + std::string taesd_path; + std::string esrgan_path; + std::string controlnet_path; + std::string embeddings_path; + sd_type_t wtype = SD_TYPE_COUNT; + std::string lora_model_dir; + std::string output_path = "output.png"; + std::string input_path; + std::string control_image_path; + + std::string prompt; + std::string negative_prompt; + float min_cfg = 1.0f; + float cfg_scale = 7.0f; + int clip_skip = -1; // <= 0 represents unspecified + int width = 512; + int height = 512; + int batch_count = 1; + + int video_frames = 6; + int motion_bucket_id = 127; + int fps = 6; + float augmentation_level = 0.f; + + sample_method_t sample_method = EULER_A; + schedule_t schedule = DEFAULT; + int sample_steps = 20; + float strength = 0.75f; + float control_strength = 0.9f; + rng_type_t rng_type = CUDA_RNG; + int64_t seed = 42; + bool verbose = false; + bool vae_tiling = false; + bool control_net_cpu = false; + bool canny_preprocess = false; + int upscale_repeats = 1; +}; + +void print_params(SDParams params) { + printf("Option: \n"); + printf(" n_threads: %d\n", params.n_threads); + printf(" mode: %s\n", modes_str[params.mode]); + printf(" model_path: %s\n", params.model_path.c_str()); + printf(" wtype: %s\n", params.wtype < SD_TYPE_COUNT ? sd_type_name(params.wtype) : "unspecified"); + printf(" vae_path: %s\n", params.vae_path.c_str()); + printf(" taesd_path: %s\n", params.taesd_path.c_str()); + printf(" esrgan_path: %s\n", params.esrgan_path.c_str()); + printf(" controlnet_path: %s\n", params.controlnet_path.c_str()); + printf(" embeddings_path: %s\n", params.embeddings_path.c_str()); + printf(" output_path: %s\n", params.output_path.c_str()); + printf(" init_img: %s\n", params.input_path.c_str()); + printf(" control_image: %s\n", params.control_image_path.c_str()); + printf(" controlnet cpu: %s\n", params.control_net_cpu ? "true" : "false"); + printf(" strength(control): %.2f\n", params.control_strength); + printf(" prompt: %s\n", params.prompt.c_str()); + printf(" negative_prompt: %s\n", params.negative_prompt.c_str()); + printf(" min_cfg: %.2f\n", params.min_cfg); + printf(" cfg_scale: %.2f\n", params.cfg_scale); + printf(" clip_skip: %d\n", params.clip_skip); + printf(" width: %d\n", params.width); + printf(" height: %d\n", params.height); + printf(" sample_method: %s\n", sample_method_str[params.sample_method]); + printf(" schedule: %s\n", schedule_str[params.schedule]); + printf(" sample_steps: %d\n", params.sample_steps); + printf(" strength(img2img): %.2f\n", params.strength); + printf(" rng: %s\n", rng_type_to_str[params.rng_type]); + printf(" seed: %ld\n", params.seed); + printf(" batch_count: %d\n", params.batch_count); + printf(" vae_tiling: %s\n", params.vae_tiling ? "true" : "false"); + printf(" upscale_repeats: %d\n", params.upscale_repeats); +} + +void print_usage(int argc, const char* argv[]) { + printf("usage: %s [arguments]\n", argv[0]); + printf("\n"); + printf("arguments:\n"); + printf(" -h, --help show this help message and exit\n"); + printf(" -M, --mode [MODEL] run mode (txt2img or img2img or convert, default: txt2img)\n"); + printf(" -t, --threads N number of threads to use during computation (default: -1).\n"); + printf(" If threads <= 0, then threads will be set to the number of CPU physical cores\n"); + printf(" -m, --model [MODEL] path to model\n"); + printf(" --vae [VAE] path to vae\n"); + printf(" --taesd [TAESD_PATH] path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)\n"); + printf(" --control-net [CONTROL_PATH] path to control net model\n"); + printf(" --embd-dir [EMBEDDING_PATH] path to embeddings.\n"); + printf(" --upscale-model [ESRGAN_PATH] path to esrgan model. Upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now.\n"); + printf(" --upscale-repeats Run the ESRGAN upscaler this many times (default 1)\n"); + printf(" --type [TYPE] weight type (f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0)\n"); + printf(" If not specified, the default is the type of the weight file.\n"); + printf(" --lora-model-dir [DIR] lora model directory\n"); + printf(" -i, --init-img [IMAGE] path to the input image, required by img2img\n"); + printf(" --control-image [IMAGE] path to image condition, control net\n"); + printf(" -o, --output OUTPUT path to write result image to (default: ./output.png)\n"); + printf(" -p, --prompt [PROMPT] the prompt to render\n"); + printf(" -n, --negative-prompt PROMPT the negative prompt (default: \"\")\n"); + printf(" --cfg-scale SCALE unconditional guidance scale: (default: 7.0)\n"); + printf(" --strength STRENGTH strength for noising/unnoising (default: 0.75)\n"); + printf(" --control-strength STRENGTH strength to apply Control Net (default: 0.9)\n"); + printf(" 1.0 corresponds to full destruction of information in init image\n"); + printf(" -H, --height H image height, in pixel space (default: 512)\n"); + printf(" -W, --width W image width, in pixel space (default: 512)\n"); + printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, lcm}\n"); + printf(" sampling method (default: \"euler_a\")\n"); + printf(" --steps STEPS number of sample steps (default: 20)\n"); + printf(" --rng {std_default, cuda} RNG (default: cuda)\n"); + printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n"); + printf(" -b, --batch-count COUNT number of images to generate.\n"); + printf(" --schedule {discrete, karras} Denoiser sigma schedule (default: discrete)\n"); + printf(" --clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n"); + printf(" <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n"); + printf(" --vae-tiling process vae in tiles to reduce memory usage\n"); + printf(" --control-net-cpu keep controlnet in cpu (for low vram)\n"); + printf(" --canny apply canny preprocessor (edge detection)\n"); + printf(" -v, --verbose print extra info\n"); +} + +void parse_args(int argc, const char** argv, SDParams& params) { + bool invalid_arg = false; + std::string arg; + for (int i = 1; i < argc; i++) { + arg = argv[i]; + + if (arg == "-t" || arg == "--threads") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.n_threads = std::stoi(argv[i]); + } else if (arg == "-M" || arg == "--mode") { + if (++i >= argc) { + invalid_arg = true; + break; + } + const char* mode_selected = argv[i]; + int mode_found = -1; + for (int d = 0; d < MODE_COUNT; d++) { + if (!strcmp(mode_selected, modes_str[d])) { + mode_found = d; + } + } + if (mode_found == -1) { + fprintf(stderr, + "error: invalid mode %s, must be one of [txt2img, img2img, img2vid, convert]\n", + mode_selected); + exit(1); + } + params.mode = (SDMode)mode_found; + } else if (arg == "-m" || arg == "--model") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.model_path = argv[i]; + } else if (arg == "--vae") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.vae_path = argv[i]; + } else if (arg == "--taesd") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.taesd_path = argv[i]; + } else if (arg == "--control-net") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.controlnet_path = argv[i]; + } else if (arg == "--upscale-model") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.esrgan_path = argv[i]; + } else if (arg == "--embd-dir") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.embeddings_path = argv[i]; + } else if (arg == "--type") { + if (++i >= argc) { + invalid_arg = true; + break; + } + std::string type = argv[i]; + if (type == "f32") { + params.wtype = SD_TYPE_F32; + } else if (type == "f16") { + params.wtype = SD_TYPE_F16; + } else if (type == "q4_0") { + params.wtype = SD_TYPE_Q4_0; + } else if (type == "q4_1") { + params.wtype = SD_TYPE_Q4_1; + } else if (type == "q5_0") { + params.wtype = SD_TYPE_Q5_0; + } else if (type == "q5_1") { + params.wtype = SD_TYPE_Q5_1; + } else if (type == "q8_0") { + params.wtype = SD_TYPE_Q8_0; + } else { + fprintf(stderr, "error: invalid weight format %s, must be one of [f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0]\n", + type.c_str()); + exit(1); + } + } else if (arg == "--lora-model-dir") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.lora_model_dir = argv[i]; + } else if (arg == "-i" || arg == "--init-img") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.input_path = argv[i]; + } else if (arg == "--control-image") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.control_image_path = argv[i]; + } else if (arg == "-o" || arg == "--output") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.output_path = argv[i]; + } else if (arg == "-p" || arg == "--prompt") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.prompt = argv[i]; + } else if (arg == "--upscale-repeats") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.upscale_repeats = std::stoi(argv[i]); + if (params.upscale_repeats < 1) { + fprintf(stderr, "error: upscale multiplier must be at least 1\n"); + exit(1); + } + } else if (arg == "-n" || arg == "--negative-prompt") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.negative_prompt = argv[i]; + } else if (arg == "--cfg-scale") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.cfg_scale = std::stof(argv[i]); + } else if (arg == "--strength") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.strength = std::stof(argv[i]); + } else if (arg == "--control-strength") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.control_strength = std::stof(argv[i]); + } else if (arg == "-H" || arg == "--height") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.height = std::stoi(argv[i]); + } else if (arg == "-W" || arg == "--width") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.width = std::stoi(argv[i]); + } else if (arg == "--steps") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.sample_steps = std::stoi(argv[i]); + } else if (arg == "--clip-skip") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.clip_skip = std::stoi(argv[i]); + } else if (arg == "--vae-tiling") { + params.vae_tiling = true; + } else if (arg == "--control-net-cpu") { + params.control_net_cpu = true; + } else if (arg == "--canny") { + params.canny_preprocess = true; + } else if (arg == "-b" || arg == "--batch-count") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.batch_count = std::stoi(argv[i]); + } else if (arg == "--rng") { + if (++i >= argc) { + invalid_arg = true; + break; + } + std::string rng_type_str = argv[i]; + if (rng_type_str == "std_default") { + params.rng_type = STD_DEFAULT_RNG; + } else if (rng_type_str == "cuda") { + params.rng_type = CUDA_RNG; + } else { + invalid_arg = true; + break; + } + } else if (arg == "--schedule") { + if (++i >= argc) { + invalid_arg = true; + break; + } + const char* schedule_selected = argv[i]; + int schedule_found = -1; + for (int d = 0; d < N_SCHEDULES; d++) { + if (!strcmp(schedule_selected, schedule_str[d])) { + schedule_found = d; + } + } + if (schedule_found == -1) { + invalid_arg = true; + break; + } + params.schedule = (schedule_t)schedule_found; + } else if (arg == "-s" || arg == "--seed") { + if (++i >= argc) { + invalid_arg = true; + break; + } + params.seed = std::stoll(argv[i]); + } else if (arg == "--sampling-method") { + if (++i >= argc) { + invalid_arg = true; + break; + } + const char* sample_method_selected = argv[i]; + int sample_method_found = -1; + for (int m = 0; m < N_SAMPLE_METHODS; m++) { + if (!strcmp(sample_method_selected, sample_method_str[m])) { + sample_method_found = m; + } + } + if (sample_method_found == -1) { + invalid_arg = true; + break; + } + params.sample_method = (sample_method_t)sample_method_found; + } else if (arg == "-h" || arg == "--help") { + print_usage(argc, argv); + exit(0); + } else if (arg == "-v" || arg == "--verbose") { + params.verbose = true; + } else { + fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); + print_usage(argc, argv); + exit(1); + } + } + if (invalid_arg) { + fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str()); + print_usage(argc, argv); + exit(1); + } + if (params.n_threads <= 0) { + params.n_threads = get_num_physical_cores(); + } + + if (params.mode != CONVERT && params.mode != IMG2VID && params.prompt.length() == 0) { + fprintf(stderr, "error: the following arguments are required: prompt\n"); + print_usage(argc, argv); + exit(1); + } + + if (params.model_path.length() == 0) { + fprintf(stderr, "error: the following arguments are required: model_path\n"); + print_usage(argc, argv); + exit(1); + } + + if ((params.mode == IMG2IMG || params.mode == IMG2VID) && params.input_path.length() == 0) { + fprintf(stderr, "error: when using the img2img mode, the following arguments are required: init-img\n"); + print_usage(argc, argv); + exit(1); + } + + if (params.output_path.length() == 0) { + fprintf(stderr, "error: the following arguments are required: output_path\n"); + print_usage(argc, argv); + exit(1); + } + + if (params.width <= 0 || params.width % 64 != 0) { + fprintf(stderr, "error: the width must be a multiple of 64\n"); + exit(1); + } + + if (params.height <= 0 || params.height % 64 != 0) { + fprintf(stderr, "error: the height must be a multiple of 64\n"); + exit(1); + } + + if (params.sample_steps <= 0) { + fprintf(stderr, "error: the sample_steps must be greater than 0\n"); + exit(1); + } + + if (params.strength < 0.f || params.strength > 1.f) { + fprintf(stderr, "error: can only work with strength in [0.0, 1.0]\n"); + exit(1); + } + + if (params.seed < 0) { + srand((int)time(NULL)); + params.seed = rand(); + } + + if (params.mode == CONVERT) { + if (params.output_path == "output.png") { + params.output_path = "output.gguf"; + } + } +} + +static std::string sd_basename(const std::string& path) { + size_t pos = path.find_last_of('/'); + if (pos != std::string::npos) { + return path.substr(pos + 1); + } + pos = path.find_last_of('\\'); + if (pos != std::string::npos) { + return path.substr(pos + 1); + } + return path; +} + +std::string get_image_params(SDParams params, int64_t seed) { + std::string parameter_string = params.prompt + "\n"; + if (params.negative_prompt.size() != 0) { + parameter_string += "Negative prompt: " + params.negative_prompt + "\n"; + } + parameter_string += "Steps: " + std::to_string(params.sample_steps) + ", "; + parameter_string += "CFG scale: " + std::to_string(params.cfg_scale) + ", "; + parameter_string += "Seed: " + std::to_string(seed) + ", "; + parameter_string += "Size: " + std::to_string(params.width) + "x" + std::to_string(params.height) + ", "; + parameter_string += "Model: " + sd_basename(params.model_path) + ", "; + parameter_string += "RNG: " + std::string(rng_type_to_str[params.rng_type]) + ", "; + parameter_string += "Sampler: " + std::string(sample_method_str[params.sample_method]); + if (params.schedule == KARRAS) { + parameter_string += " karras"; + } + parameter_string += ", "; + parameter_string += "Version: stable-diffusion.cpp"; + return parameter_string; +} + +void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) { + SDParams* params = (SDParams*)data; + if (!params->verbose && level <= SD_LOG_DEBUG) { + return; + } + if (level <= SD_LOG_INFO) { + fputs(log, stdout); + fflush(stdout); + } else { + fputs(log, stderr); + fflush(stderr); + } +} + +int main(int argc, const char* argv[]) { + SDParams params; + parse_args(argc, argv, params); + + sd_set_log_callback(sd_log_cb, (void*)¶ms); + + if (params.verbose) { + print_params(params); + printf("%s", sd_get_system_info()); + } + + if (params.mode == CONVERT) { + bool success = convert(params.model_path.c_str(), params.vae_path.c_str(), params.output_path.c_str(), params.wtype); + if (!success) { + fprintf(stderr, + "convert '%s'/'%s' to '%s' failed\n", + params.model_path.c_str(), + params.vae_path.c_str(), + params.output_path.c_str()); + return 1; + } else { + printf("convert '%s'/'%s' to '%s' success\n", + params.model_path.c_str(), + params.vae_path.c_str(), + params.output_path.c_str()); + return 0; + } + } + + if (params.mode == IMG2VID) { + fprintf(stderr, "SVD support is broken, do not use it!!!\n"); + return 1; + } + + bool vae_decode_only = true; + uint8_t* input_image_buffer = NULL; + if (params.mode == IMG2IMG || params.mode == IMG2VID) { + vae_decode_only = false; + + int c = 0; + input_image_buffer = stbi_load(params.input_path.c_str(), ¶ms.width, ¶ms.height, &c, 3); + if (input_image_buffer == NULL) { + fprintf(stderr, "load image from '%s' failed\n", params.input_path.c_str()); + return 1; + } + if (c != 3) { + fprintf(stderr, "input image must be a 3 channels RGB image, but got %d channels\n", c); + free(input_image_buffer); + return 1; + } + if (params.width <= 0 || params.width % 64 != 0) { + fprintf(stderr, "error: the width of image must be a multiple of 64\n"); + free(input_image_buffer); + return 1; + } + if (params.height <= 0 || params.height % 64 != 0) { + fprintf(stderr, "error: the height of image must be a multiple of 64\n"); + free(input_image_buffer); + return 1; + } + } + + sd_ctx_t* sd_ctx = new_sd_ctx(params.model_path.c_str(), + params.vae_path.c_str(), + params.taesd_path.c_str(), + params.controlnet_path.c_str(), + params.lora_model_dir.c_str(), + params.embeddings_path.c_str(), + vae_decode_only, + params.vae_tiling, + true, + params.n_threads, + params.wtype, + params.rng_type, + params.schedule, + params.control_net_cpu); + + if (sd_ctx == NULL) { + printf("new_sd_ctx_t failed\n"); + return 1; + } + + sd_image_t* results; + if (params.mode == TXT2IMG) { + sd_image_t* control_image = NULL; + if (params.controlnet_path.size() > 0 && params.control_image_path.size() > 0) { + int c = 0; + input_image_buffer = stbi_load(params.control_image_path.c_str(), ¶ms.width, ¶ms.height, &c, 3); + if (input_image_buffer == NULL) { + fprintf(stderr, "load image from '%s' failed\n", params.control_image_path.c_str()); + return 1; + } + control_image = new sd_image_t{(uint32_t)params.width, + (uint32_t)params.height, + 3, + input_image_buffer}; + if (params.canny_preprocess) { // apply preprocessor + control_image->data = preprocess_canny(control_image->data, + control_image->width, + control_image->height, + 0.08f, + 0.08f, + 0.8f, + 1.0f, + false); + } + } + results = txt2img(sd_ctx, + params.prompt.c_str(), + params.negative_prompt.c_str(), + params.clip_skip, + params.cfg_scale, + params.width, + params.height, + params.sample_method, + params.sample_steps, + params.seed, + params.batch_count, + control_image, + params.control_strength); + } else { + sd_image_t input_image = {(uint32_t)params.width, + (uint32_t)params.height, + 3, + input_image_buffer}; + + if (params.mode == IMG2VID) { + results = img2vid(sd_ctx, + input_image, + params.width, + params.height, + params.video_frames, + params.motion_bucket_id, + params.fps, + params.augmentation_level, + params.min_cfg, + params.cfg_scale, + params.sample_method, + params.sample_steps, + params.strength, + params.seed); + if (results == NULL) { + printf("generate failed\n"); + free_sd_ctx(sd_ctx); + return 1; + } + size_t last = params.output_path.find_last_of("."); + std::string dummy_name = last != std::string::npos ? params.output_path.substr(0, last) : params.output_path; + for (int i = 0; i < params.video_frames; i++) { + if (results[i].data == NULL) { + continue; + } + std::string final_image_path = i > 0 ? dummy_name + "_" + std::to_string(i + 1) + ".png" : dummy_name + ".png"; + stbi_write_png(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel, + results[i].data, 0, get_image_params(params, params.seed + i).c_str()); + printf("save result image to '%s'\n", final_image_path.c_str()); + free(results[i].data); + results[i].data = NULL; + } + free(results); + free_sd_ctx(sd_ctx); + return 0; + } else { + results = img2img(sd_ctx, + input_image, + params.prompt.c_str(), + params.negative_prompt.c_str(), + params.clip_skip, + params.cfg_scale, + params.width, + params.height, + params.sample_method, + params.sample_steps, + params.strength, + params.seed, + params.batch_count); + } + } + + if (results == NULL) { + printf("generate failed\n"); + free_sd_ctx(sd_ctx); + return 1; + } + + int upscale_factor = 4; // unused for RealESRGAN_x4plus_anime_6B.pth + if (params.esrgan_path.size() > 0 && params.upscale_repeats > 0) { + upscaler_ctx_t* upscaler_ctx = new_upscaler_ctx(params.esrgan_path.c_str(), + params.n_threads, + params.wtype); + + if (upscaler_ctx == NULL) { + printf("new_upscaler_ctx failed\n"); + } else { + for (int i = 0; i < params.batch_count; i++) { + if (results[i].data == NULL) { + continue; + } + sd_image_t current_image = results[i]; + for (int u = 0; u < params.upscale_repeats; ++u) { + sd_image_t upscaled_image = upscale(upscaler_ctx, current_image, upscale_factor); + if (upscaled_image.data == NULL) { + printf("upscale failed\n"); + break; + } + free(current_image.data); + current_image = upscaled_image; + } + results[i] = current_image; // Set the final upscaled image as the result + } + } + } + + size_t last = params.output_path.find_last_of("."); + std::string dummy_name = last != std::string::npos ? params.output_path.substr(0, last) : params.output_path; + for (int i = 0; i < params.batch_count; i++) { + if (results[i].data == NULL) { + continue; + } + std::string final_image_path = i > 0 ? dummy_name + "_" + std::to_string(i + 1) + ".png" : dummy_name + ".png"; + stbi_write_png(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel, + results[i].data, 0, get_image_params(params, params.seed + i).c_str()); + printf("save result image to '%s'\n", final_image_path.c_str()); + free(results[i].data); + results[i].data = NULL; + } + free(results); + free_sd_ctx(sd_ctx); + + return 0; +} diff --git a/otherarch/sdcpp/model.cpp b/otherarch/sdcpp/model.cpp new file mode 100644 index 000000000..5925a7da5 --- /dev/null +++ b/otherarch/sdcpp/model.cpp @@ -0,0 +1,1609 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "model.h" +#include "stable-diffusion.h" +#include "util.h" +#include "vocab.hpp" + +#include "ggml/ggml-alloc.h" +#include "ggml/ggml-backend.h" +#include "ggml/ggml.h" + +#include "stable-diffusion.h" + +#ifdef SD_USE_METAL +#include "ggml-metal.h" +#endif + +#define ST_HEADER_SIZE_LEN 8 + +uint64_t read_u64(uint8_t* buffer) { + // little endian + uint64_t value = 0; + value |= static_cast(buffer[7]) << 56; + value |= static_cast(buffer[6]) << 48; + value |= static_cast(buffer[5]) << 40; + value |= static_cast(buffer[4]) << 32; + value |= static_cast(buffer[3]) << 24; + value |= static_cast(buffer[2]) << 16; + value |= static_cast(buffer[1]) << 8; + value |= static_cast(buffer[0]); + return value; +} + +int32_t read_int(uint8_t* buffer) { + // little endian + int value = 0; + value |= buffer[3] << 24; + value |= buffer[2] << 16; + value |= buffer[1] << 8; + value |= buffer[0]; + return value; +} + +uint16_t read_short(uint8_t* buffer) { + // little endian + uint16_t value = 0; + value |= buffer[1] << 8; + value |= buffer[0]; + return value; +} + +/*================================================= Preprocess ==================================================*/ + +std::string self_attn_names[] = { + "self_attn.q_proj.weight", + "self_attn.k_proj.weight", + "self_attn.v_proj.weight", + "self_attn.q_proj.bias", + "self_attn.k_proj.bias", + "self_attn.v_proj.bias", +}; + +const char* unused_tensors[] = { + "betas", + "alphas_cumprod_prev", + "sqrt_alphas_cumprod", + "sqrt_one_minus_alphas_cumprod", + "log_one_minus_alphas_cumprod", + "sqrt_recip_alphas_cumprod", + "sqrt_recipm1_alphas_cumprod", + "posterior_variance", + "posterior_log_variance_clipped", + "posterior_mean_coef1", + "posterior_mean_coef2", + "cond_stage_model.transformer.text_model.embeddings.position_ids", + "cond_stage_model.model.logit_scale", + "cond_stage_model.model.text_projection", + "conditioner.embedders.0.transformer.text_model.embeddings.position_ids", + "conditioner.embedders.0.model.logit_scale", + "conditioner.embedders.1.model.logit_scale", + "model.diffusion_model.time_embedding.cond_proj.weight", + "unet.time_embedding.cond_proj.weight", + "model_ema.decay", + "model_ema.num_updates", + "model_ema.diffusion_model", + "embedding_manager", + "denoiser.sigmas", +}; + +bool is_unused_tensor(std::string name) { + for (int i = 0; i < sizeof(unused_tensors) / sizeof(const char*); i++) { + if (starts_with(name, unused_tensors[i])) { + return true; + } + } + return false; +} + +std::unordered_map open_clip_to_hf_clip_model = { + {"model.ln_final.bias", "transformer.text_model.final_layer_norm.bias"}, + {"model.ln_final.weight", "transformer.text_model.final_layer_norm.weight"}, + {"model.positional_embedding", "transformer.text_model.embeddings.position_embedding.weight"}, + {"model.token_embedding.weight", "transformer.text_model.embeddings.token_embedding.weight"}, + {"model.text_projection", "transformer.text_model.text_projection"}, + {"model.visual.class_embedding", "transformer.visual_model.embeddings.class_embedding"}, + {"model.visual.conv1.weight", "transformer.visual_model.embeddings.patch_embedding.weight"}, + {"model.visual.ln_post.bias", "transformer.visual_model.post_layernorm.bias"}, + {"model.visual.ln_post.weight", "transformer.visual_model.post_layernorm.weight"}, + {"model.visual.ln_pre.bias", "transformer.visual_model.pre_layernorm.bias"}, + {"model.visual.ln_pre.weight", "transformer.visual_model.pre_layernorm.weight"}, + {"model.visual.positional_embedding", "transformer.visual_model.embeddings.position_embedding.weight"}, + {"model.visual.proj", "transformer.visual_model.visual_projection"}, +}; + +std::unordered_map open_clip_to_hk_clip_resblock = { + {"attn.out_proj.bias", "self_attn.out_proj.bias"}, + {"attn.out_proj.weight", "self_attn.out_proj.weight"}, + {"ln_1.bias", "layer_norm1.bias"}, + {"ln_1.weight", "layer_norm1.weight"}, + {"ln_2.bias", "layer_norm2.bias"}, + {"ln_2.weight", "layer_norm2.weight"}, + {"mlp.c_fc.bias", "mlp.fc1.bias"}, + {"mlp.c_fc.weight", "mlp.fc1.weight"}, + {"mlp.c_proj.bias", "mlp.fc2.bias"}, + {"mlp.c_proj.weight", "mlp.fc2.weight"}, +}; + +std::unordered_map vae_decoder_name_map = { + {"first_stage_model.decoder.mid.attn_1.to_k.bias", "first_stage_model.decoder.mid.attn_1.k.bias"}, + {"first_stage_model.decoder.mid.attn_1.to_k.weight", "first_stage_model.decoder.mid.attn_1.k.weight"}, + {"first_stage_model.decoder.mid.attn_1.to_out.0.bias", "first_stage_model.decoder.mid.attn_1.proj_out.bias"}, + {"first_stage_model.decoder.mid.attn_1.to_out.0.weight", "first_stage_model.decoder.mid.attn_1.proj_out.weight"}, + {"first_stage_model.decoder.mid.attn_1.to_q.bias", "first_stage_model.decoder.mid.attn_1.q.bias"}, + {"first_stage_model.decoder.mid.attn_1.to_q.weight", "first_stage_model.decoder.mid.attn_1.q.weight"}, + {"first_stage_model.decoder.mid.attn_1.to_v.bias", "first_stage_model.decoder.mid.attn_1.v.bias"}, + {"first_stage_model.decoder.mid.attn_1.to_v.weight", "first_stage_model.decoder.mid.attn_1.v.weight"}, +}; + +std::string convert_open_clip_to_hf_clip(const std::string& name) { + std::string new_name = name; + std::string prefix; + if (starts_with(new_name, "conditioner.embedders.0.open_clip.")) { + prefix = "cond_stage_model."; + new_name = new_name.substr(strlen("conditioner.embedders.0.open_clip.")); + } else if (starts_with(new_name, "conditioner.embedders.0.")) { + prefix = "cond_stage_model."; + new_name = new_name.substr(strlen("conditioner.embedders.0.")); + } else if (starts_with(new_name, "conditioner.embedders.1.")) { + prefix = "cond_stage_model.1."; + new_name = new_name.substr(strlen("conditioner.embedders.0.")); + } else if (starts_with(new_name, "cond_stage_model.")) { + prefix = "cond_stage_model."; + new_name = new_name.substr(strlen("cond_stage_model.")); + } else { + return new_name; + } + + if (open_clip_to_hf_clip_model.find(new_name) != open_clip_to_hf_clip_model.end()) { + new_name = open_clip_to_hf_clip_model[new_name]; + } + + std::string open_clip_resblock_prefix = "model.transformer.resblocks."; + std::string hf_clip_resblock_prefix = "transformer.text_model.encoder.layers."; + + auto replace_suffix = [&]() { + if (new_name.find(open_clip_resblock_prefix) == 0) { + std::string remain = new_name.substr(open_clip_resblock_prefix.length()); + std::string idx = remain.substr(0, remain.find(".")); + std::string suffix = remain.substr(idx.length() + 1); + + if (suffix == "attn.in_proj_weight" || suffix == "attn.in_proj_bias") { + new_name = hf_clip_resblock_prefix + idx + "." + suffix; + } else if (open_clip_to_hk_clip_resblock.find(suffix) != open_clip_to_hk_clip_resblock.end()) { + std::string new_suffix = open_clip_to_hk_clip_resblock[suffix]; + new_name = hf_clip_resblock_prefix + idx + "." + new_suffix; + } + } + }; + + replace_suffix(); + + open_clip_resblock_prefix = "model.visual.transformer.resblocks."; + hf_clip_resblock_prefix = "transformer.visual_model.encoder.layers."; + + replace_suffix(); + + return prefix + new_name; +} + +std::string convert_vae_decoder_name(const std::string& name) { + if (vae_decoder_name_map.find(name) != vae_decoder_name_map.end()) { + return vae_decoder_name_map[name]; + } + return name; +} + +std::unordered_map> suffix_conversion_underline = { + { + "attentions", + { + {"to_k", "k"}, + {"to_q", "q"}, + {"to_v", "v"}, + {"to_out_0", "proj_out"}, + {"group_norm", "norm"}, + }, + }, + { + "resnets", + { + {"conv1", "in_layers_2"}, + {"conv2", "out_layers_3"}, + {"norm1", "in_layers_0"}, + {"norm2", "out_layers_0"}, + {"time_emb_proj", "emb_layers_1"}, + {"conv_shortcut", "skip_connection"}, + }, + }, +}; + +std::unordered_map> suffix_conversion_dot = { + { + "attentions", + { + {"to_k", "k"}, + {"to_q", "q"}, + {"to_v", "v"}, + {"to_out.0", "proj_out"}, + {"group_norm", "norm"}, + }, + }, + { + "resnets", + { + {"conv1", "in_layers.2"}, + {"conv2", "out_layers.3"}, + {"norm1", "in_layers.0"}, + {"norm2", "out_layers.0"}, + {"time_emb_proj", "emb_layers.1"}, + {"conv_shortcut", "skip_connection"}, + }, + }, +}; + +std::string convert_diffusers_name_to_compvis(const std::string& key, char seq) { + std::vector m; + + auto match = [](std::vector& match_list, const std::regex& regex, const std::string& key) { + auto r = std::smatch{}; + if (!std::regex_match(key, r, regex)) { + return false; + } + + match_list.clear(); + for (size_t i = 1; i < r.size(); ++i) { + match_list.push_back(r.str(i)); + } + return true; + }; + + std::unordered_map> suffix_conversion; + if (seq == '_') { + suffix_conversion = suffix_conversion_underline; + } else { + suffix_conversion = suffix_conversion_dot; + } + + auto get_converted_suffix = [&suffix_conversion](const std::string& outer_key, const std::string& inner_key) { + auto outer_iter = suffix_conversion.find(outer_key); + if (outer_iter != suffix_conversion.end()) { + auto inner_iter = outer_iter->second.find(inner_key); + if (inner_iter != outer_iter->second.end()) { + return inner_iter->second; + } + } + return inner_key; + }; + + // unet + if (match(m, std::regex(format("unet%cconv_in(.*)", seq)), key)) { + return format("model%cdiffusion_model%cinput_blocks%c0%c0", seq, seq, seq, seq) + m[0]; + } + + if (match(m, std::regex(format("unet%cconv%cout(.*)", seq, seq)), key)) { + return format("model%cdiffusion_model%cout%c2", seq, seq, seq) + m[0]; + } + + if (match(m, std::regex(format("unet%cconv_norm_out(.*)", seq)), key)) { + return format("model%cdiffusion_model%cout%c0", seq, seq, seq) + m[0]; + } + + if (match(m, std::regex(format("unet%ctime_embedding%clinear_(\\d+)(.*)", seq, seq)), key)) { + return format("model%cdiffusion_model%ctime_embed%c", seq, seq, seq) + std::to_string(std::stoi(m[0]) * 2 - 2) + m[1]; + } + + if (match(m, std::regex(format("unet%cdown_blocks%c(\\d+)%c(attentions|resnets)%c(\\d+)%c(.+)", seq, seq, seq, seq, seq)), key)) { + std::string suffix = get_converted_suffix(m[1], m[3]); + // LOG_DEBUG("%s %s %s %s", m[0].c_str(), m[1].c_str(), m[2].c_str(), m[3].c_str()); + return format("model%cdiffusion_model%cinput_blocks%c", seq, seq, seq) + std::to_string(1 + std::stoi(m[0]) * 3 + std::stoi(m[2])) + seq + + (m[1] == "attentions" ? "1" : "0") + seq + suffix; + } + + if (match(m, std::regex(format("unet%cmid_block%c(attentions|resnets)%c(\\d+)%c(.+)", seq, seq, seq, seq)), key)) { + std::string suffix = get_converted_suffix(m[0], m[2]); + return format("model%cdiffusion_model%cmiddle_block%c", seq, seq, seq) + (m[0] == "attentions" ? "1" : std::to_string(std::stoi(m[1]) * 2)) + + seq + suffix; + } + + if (match(m, std::regex(format("unet%cup_blocks%c(\\d+)%c(attentions|resnets)%c(\\d+)%c(.+)", seq, seq, seq, seq, seq)), key)) { + std::string suffix = get_converted_suffix(m[1], m[3]); + return format("model%cdiffusion_model%coutput_blocks%c", seq, seq, seq) + std::to_string(std::stoi(m[0]) * 3 + std::stoi(m[2])) + seq + + (m[1] == "attentions" ? "1" : "0") + seq + suffix; + } + + if (match(m, std::regex(format("unet%cdown_blocks%c(\\d+)%cdownsamplers%c0%cconv", seq, seq, seq, seq, seq)), key)) { + return format("model%cdiffusion_model%cinput_blocks%c", seq, seq, seq) + std::to_string(3 + std::stoi(m[0]) * 3) + seq + "0" + seq + "op"; + } + + if (match(m, std::regex(format("unet%cup_blocks%c(\\d+)%cupsamplers%c0%cconv", seq, seq, seq, seq, seq)), key)) { + return format("model%cdiffusion_model%coutput_blocks%c", seq, seq, seq) + std::to_string(2 + std::stoi(m[0]) * 3) + seq + + (std::stoi(m[0]) > 0 ? "2" : "1") + seq + "conv"; + } + + // clip + if (match(m, std::regex(format("te%ctext_model%cencoder%clayers%c(\\d+)%c(.+)", seq, seq, seq, seq, seq)), key)) { + return format("cond_stage_model%ctransformer%ctext_model%cencoder%clayers%c", seq, seq, seq, seq, seq) + m[0] + seq + m[1]; + } + + if (match(m, std::regex(format("te%ctext_model(.*)", seq)), key)) { + return format("cond_stage_model%ctransformer%ctext_model", seq, seq) + m[0]; + } + + // vae + if (match(m, std::regex(format("vae%c(.*)%cconv_norm_out(.*)", seq, seq)), key)) { + return format("first_stage_model%c%s%cnorm_out%s", seq, m[0].c_str(), seq, m[1].c_str()); + } + + if (match(m, std::regex(format("vae%c(.*)%cmid_block%c(attentions|resnets)%c(\\d+)%c(.+)", seq, seq, seq, seq, seq)), key)) { + std::string suffix; + std::string block_name; + if (m[1] == "attentions") { + block_name = "attn"; + suffix = get_converted_suffix(m[1], m[3]); + } else { + block_name = "block"; + suffix = m[3]; + } + return format("first_stage_model%c%s%cmid%c%s_%d%c%s", + seq, m[0].c_str(), seq, seq, block_name.c_str(), std::stoi(m[2]) + 1, seq, suffix.c_str()); + } + + if (match(m, std::regex(format("vae%c(.*)%cup_blocks%c(\\d+)%cresnets%c(\\d+)%c(.+)", seq, seq, seq, seq, seq, seq)), key)) { + std::string suffix = m[3]; + if (suffix == "conv_shortcut") { + suffix = "nin_shortcut"; + } + return format("first_stage_model%c%s%cup%c%d%cblock%c%s%c%s", + seq, m[0].c_str(), seq, seq, 3 - std::stoi(m[1]), seq, seq, m[2].c_str(), seq, suffix.c_str()); + } + + if (match(m, std::regex(format("vae%c(.*)%cdown_blocks%c(\\d+)%cdownsamplers%c0%cconv", seq, seq, seq, seq, seq, seq)), key)) { + return format("first_stage_model%c%s%cdown%c%d%cdownsample%cconv", + seq, m[0].c_str(), seq, seq, std::stoi(m[1]), seq, seq); + } + + if (match(m, std::regex(format("vae%c(.*)%cdown_blocks%c(\\d+)%cresnets%c(\\d+)%c(.+)", seq, seq, seq, seq, seq, seq)), key)) { + std::string suffix = m[3]; + if (suffix == "conv_shortcut") { + suffix = "nin_shortcut"; + } + return format("first_stage_model%c%s%cdown%c%d%cblock%c%s%c%s", + seq, m[0].c_str(), seq, seq, std::stoi(m[1]), seq, seq, m[2].c_str(), seq, suffix.c_str()); + } + + if (match(m, std::regex(format("vae%c(.*)%cup_blocks%c(\\d+)%cupsamplers%c0%cconv", seq, seq, seq, seq, seq, seq)), key)) { + return format("first_stage_model%c%s%cup%c%d%cupsample%cconv", + seq, m[0].c_str(), seq, seq, 3 - std::stoi(m[1]), seq, seq); + } + + if (match(m, std::regex(format("vae%c(.*)", seq)), key)) { + return format("first_stage_model%c", seq) + m[0]; + } + + return key; +} + +std::string convert_tensor_name(const std::string& name) { + std::string new_name; + if (starts_with(name, "cond_stage_model.") || starts_with(name, "conditioner.embedders.")) { + new_name = convert_open_clip_to_hf_clip(name); + } else if (starts_with(name, "first_stage_model.decoder")) { + new_name = convert_vae_decoder_name(name); + } else if (starts_with(name, "control_model.")) { // for controlnet pth models + size_t pos = name.find('.'); + if (pos != std::string::npos) { + new_name = name.substr(pos + 1); + } + } else if (starts_with(name, "lora_")) { // for lora + size_t pos = name.find('.'); + if (pos != std::string::npos) { + std::string name_without_network_parts = name.substr(5, pos - 5); + std::string network_part = name.substr(pos + 1); + // LOG_DEBUG("%s %s", name_without_network_parts.c_str(), network_part.c_str()); + std::string new_key = convert_diffusers_name_to_compvis(name_without_network_parts, '_'); + if (new_key.empty()) { + new_name = name; + } else { + new_name = "lora." + new_key + "." + network_part; + } + } else { + new_name = name; + } + } else if (starts_with(name, "unet") || starts_with(name, "vae") || starts_with(name, "te")) { // for diffuser + size_t pos = name.find_last_of('.'); + if (pos != std::string::npos) { + std::string name_without_network_parts = name.substr(0, pos); + std::string network_part = name.substr(pos + 1); + // LOG_DEBUG("%s %s", name_without_network_parts.c_str(), network_part.c_str()); + std::string new_key = convert_diffusers_name_to_compvis(name_without_network_parts, '.'); + if (new_key.empty()) { + new_name = name; + } else { + new_name = new_key + "." + network_part; + } + } else { + new_name = name; + } + } else { + new_name = name; + } + // if (new_name != name) { + // LOG_DEBUG("%s => %s", name.c_str(), new_name.c_str()); + // } + return new_name; +} + +void preprocess_tensor(TensorStorage tensor_storage, + std::vector& processed_tensor_storages) { + std::vector result; + std::string new_name = convert_tensor_name(tensor_storage.name); + + // convert unet transformer linear to conv2d 1x1 + if (starts_with(new_name, "model.diffusion_model.") && + (ends_with(new_name, "proj_in.weight") || ends_with(new_name, "proj_out.weight"))) { + tensor_storage.unsqueeze(); + } + + // convert vae attn block linear to conv2d 1x1 + if (starts_with(new_name, "first_stage_model.") && new_name.find("attn_1") != std::string::npos) { + tensor_storage.unsqueeze(); + } + + tensor_storage.name = new_name; + + if (new_name.find("cond_stage_model") != std::string::npos && + ends_with(new_name, "attn.in_proj_weight")) { + size_t prefix_size = new_name.find("attn.in_proj_weight"); + std::string prefix = new_name.substr(0, prefix_size); + + std::vector chunks = tensor_storage.chunk(3); + chunks[0].name = prefix + "self_attn.q_proj.weight"; + chunks[1].name = prefix + "self_attn.k_proj.weight"; + chunks[2].name = prefix + "self_attn.v_proj.weight"; + + processed_tensor_storages.insert(processed_tensor_storages.end(), chunks.begin(), chunks.end()); + + } else if (new_name.find("cond_stage_model") != std::string::npos && + ends_with(new_name, "attn.in_proj_bias")) { + size_t prefix_size = new_name.find("attn.in_proj_bias"); + std::string prefix = new_name.substr(0, prefix_size); + + std::vector chunks = tensor_storage.chunk(3); + chunks[0].name = prefix + "self_attn.q_proj.bias"; + chunks[1].name = prefix + "self_attn.k_proj.bias"; + chunks[2].name = prefix + "self_attn.v_proj.bias"; + + processed_tensor_storages.insert(processed_tensor_storages.end(), chunks.begin(), chunks.end()); + } else { + processed_tensor_storages.push_back(tensor_storage); + } +} + +float bf16_to_f32(uint16_t bfloat16) { + uint32_t val_bits = (static_cast(bfloat16) << 16); + return *reinterpret_cast(&val_bits); +} + +void bf16_to_f32_vec(uint16_t* src, float* dst, int64_t n) { + // support inplace op + for (int64_t i = n - 1; i >= 0; i--) { + dst[i] = bf16_to_f32(src[i]); + } +} + +void convert_tensor(void* src, + ggml_type src_type, + void* dst, + ggml_type dst_type, + int nrows, + int n_per_row) { + int n = nrows * n_per_row; + if (src_type == dst_type) { + size_t nbytes = n * ggml_type_size(src_type) / ggml_blck_size(src_type); + memcpy(((char*)dst), ((char*)src), nbytes); + } else if (src_type == GGML_TYPE_F32) { + if (dst_type == GGML_TYPE_F16) { + ggml_fp32_to_fp16_row((float*)src, (ggml_fp16_t*)dst, n); + } else { + int64_t hist[16]; + std::vector imatrix(n_per_row, 1.0f); // dummy importance matrix + const float* im = imatrix.data(); + ggml_quantize_chunk(dst_type, (float*)src, dst, 0, nrows, n_per_row, hist, im); + } + } else if (dst_type == GGML_TYPE_F32) { + if (src_type == GGML_TYPE_F16) { + ggml_fp16_to_fp32_row((ggml_fp16_t*)src, (float*)dst, n); + } else { + auto qtype = ggml_internal_get_type_traits(src_type); + if (qtype.to_float == NULL) { + throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available", + ggml_type_name(src_type))); + } + qtype.to_float(src, (float*)dst, n); + } + } else { + // src_type == GGML_TYPE_F16 => dst_type is quantized + // src_type is quantized => dst_type == GGML_TYPE_F16 or dst_type is quantized + auto qtype = ggml_internal_get_type_traits(src_type); + if (qtype.to_float == NULL) { + throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available", + ggml_type_name(src_type))); + } + std::vector buf; + buf.resize(sizeof(float) * n); + char* src_data_f32 = buf.data(); + qtype.to_float(src, (float*)src_data_f32, n); + if (dst_type == GGML_TYPE_F16) { + ggml_fp32_to_fp16_row((float*)src_data_f32, (ggml_fp16_t*)dst, n); + } else { + int64_t hist[16]; + std::vector imatrix(n_per_row, 1.0f); // dummy importance matrix + const float* im = imatrix.data(); + ggml_quantize_chunk(dst_type, (float*)src_data_f32, dst, 0, nrows, n_per_row, hist, im); + } + } +} + +/*================================================= ModelLoader ==================================================*/ + +// ported from https://github.com/openai/CLIP/blob/main/clip/simple_tokenizer.py#L16 +std::map unicode_to_byte() { + std::map byte_to_unicode; + + // List of utf-8 byte ranges + for (int b = static_cast('!'); b <= static_cast('~'); ++b) { + byte_to_unicode[b] = static_cast(b); + } + + for (int b = 49825; b <= 49836; ++b) { + byte_to_unicode[b] = static_cast(b); + } + + for (int b = 49838; b <= 50111; ++b) { + byte_to_unicode[b] = static_cast(b); + } + // printf("%d %d %d %d\n", static_cast('¡'), static_cast('¬'), static_cast('®'), static_cast('ÿ')); + // exit(1); + + int n = 0; + for (int b = 0; b < 256; ++b) { + if (byte_to_unicode.find(b) == byte_to_unicode.end()) { + byte_to_unicode[b] = static_cast(256 + n); + n++; + } + } + + // byte_encoder = bytes_to_unicode() + // byte_decoder = {v: k for k, v in byte_encoder.items()} + std::map byte_decoder; + + for (const auto& entry : byte_to_unicode) { + byte_decoder[entry.second] = entry.first; + } + + byte_to_unicode.clear(); + + return byte_decoder; +} + +bool is_zip_file(const std::string& file_path) { + struct zip_t* zip = zip_open(file_path.c_str(), 0, 'r'); + if (zip == NULL) { + return false; + } + zip_close(zip); + return true; +} + +bool is_gguf_file(const std::string& file_path) { + std::ifstream file(file_path, std::ios::binary); + if (!file.is_open()) { + return false; + } + + char magic[4]; + + file.read(magic, sizeof(magic)); + if (!file) { + return false; + } + for (uint32_t i = 0; i < sizeof(magic); i++) { + if (magic[i] != GGUF_MAGIC[i]) { + return false; + } + } + + return true; +} + +bool is_safetensors_file(const std::string& file_path) { + std::ifstream file(file_path, std::ios::binary); + if (!file.is_open()) { + return false; + } + + // get file size + file.seekg(0, file.end); + size_t file_size_ = file.tellg(); + file.seekg(0, file.beg); + + // read header size + if (file_size_ <= ST_HEADER_SIZE_LEN) { + return false; + } + + uint8_t header_size_buf[ST_HEADER_SIZE_LEN]; + file.read((char*)header_size_buf, ST_HEADER_SIZE_LEN); + if (!file) { + return false; + } + + size_t header_size_ = read_u64(header_size_buf); + if (header_size_ >= file_size_ || header_size_ <= 2) { + return false; + } + + // read header + std::vector header_buf; + header_buf.resize(header_size_ + 1); + header_buf[header_size_] = '\0'; + file.read(header_buf.data(), header_size_); + if (!file) { + return false; + } + nlohmann::json header_ = nlohmann::json::parse(header_buf.data()); + if (header_.is_discarded()) { + return false; + } + return true; +} + +bool ModelLoader::init_from_file(const std::string& file_path, const std::string& prefix) { + if (is_directory(file_path)) { + LOG_INFO("load %s using diffusers format", file_path.c_str()); + return init_from_diffusers_file(file_path, prefix); + } else if (is_gguf_file(file_path)) { + LOG_INFO("load %s using gguf format", file_path.c_str()); + return init_from_gguf_file(file_path, prefix); + } else if (is_safetensors_file(file_path)) { + LOG_INFO("load %s using safetensors format", file_path.c_str()); + return init_from_safetensors_file(file_path, prefix); + } else if (is_zip_file(file_path)) { + LOG_INFO("load %s using checkpoint format", file_path.c_str()); + return init_from_ckpt_file(file_path, prefix); + } else { + LOG_WARN("unknown format %s", file_path.c_str()); + return false; + } +} + +/*================================================= GGUFModelLoader ==================================================*/ + +bool ModelLoader::init_from_gguf_file(const std::string& file_path, const std::string& prefix) { + LOG_DEBUG("init from '%s'", file_path.c_str()); + file_paths_.push_back(file_path); + size_t file_index = file_paths_.size() - 1; + + gguf_context* ctx_gguf_ = NULL; + ggml_context* ctx_meta_ = NULL; + ctx_gguf_ = gguf_init_from_file(file_path.c_str(), {true, &ctx_meta_}); + if (!ctx_gguf_) { + LOG_ERROR("failed to open '%s'", file_path.c_str()); + return false; + } + + int n_tensors = gguf_get_n_tensors(ctx_gguf_); + + size_t total_size = 0; + size_t data_offset = gguf_get_data_offset(ctx_gguf_); + for (int i = 0; i < n_tensors; i++) { + std::string name = gguf_get_tensor_name(ctx_gguf_, i); + struct ggml_tensor* dummy = ggml_get_tensor(ctx_meta_, name.c_str()); + size_t offset = data_offset + gguf_get_tensor_offset(ctx_gguf_, i); + + // LOG_DEBUG("%s", name.c_str()); + + TensorStorage tensor_storage(prefix + name, dummy->type, dummy->ne, ggml_n_dims(dummy), file_index, offset); + + GGML_ASSERT(ggml_nbytes(dummy) == tensor_storage.nbytes()); + + tensor_storages.push_back(tensor_storage); + } + + gguf_free(ctx_gguf_); + ggml_free(ctx_meta_); + + return true; +} + +/*================================================= SafeTensorsModelLoader ==================================================*/ + +ggml_type str_to_ggml_type(const std::string& dtype) { + ggml_type ttype = GGML_TYPE_COUNT; + if (dtype == "F16") { + ttype = GGML_TYPE_F16; + } else if (dtype == "BF16") { + ttype = GGML_TYPE_F32; + } else if (dtype == "F32") { + ttype = GGML_TYPE_F32; + } + return ttype; +} + +// https://huggingface.co/docs/safetensors/index +bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const std::string& prefix) { + LOG_DEBUG("init from '%s'", file_path.c_str()); + file_paths_.push_back(file_path); + size_t file_index = file_paths_.size() - 1; + std::ifstream file(file_path, std::ios::binary); + if (!file.is_open()) { + LOG_ERROR("failed to open '%s'", file_path.c_str()); + return false; + } + + // get file size + file.seekg(0, file.end); + size_t file_size_ = file.tellg(); + file.seekg(0, file.beg); + + // read header size + if (file_size_ <= ST_HEADER_SIZE_LEN) { + LOG_ERROR("invalid safetensor file '%s'", file_path.c_str()); + return false; + } + + uint8_t header_size_buf[ST_HEADER_SIZE_LEN]; + file.read((char*)header_size_buf, ST_HEADER_SIZE_LEN); + if (!file) { + LOG_ERROR("read safetensors header size failed: '%s'", file_path.c_str()); + return false; + } + + size_t header_size_ = read_u64(header_size_buf); + if (header_size_ >= file_size_) { + LOG_ERROR("invalid safetensor file '%s'", file_path.c_str()); + return false; + } + + // read header + std::vector header_buf; + header_buf.resize(header_size_ + 1); + header_buf[header_size_] = '\0'; + file.read(header_buf.data(), header_size_); + if (!file) { + LOG_ERROR("read safetensors header failed: '%s'", file_path.c_str()); + return false; + } + + nlohmann::json header_ = nlohmann::json::parse(header_buf.data()); + + for (auto& item : header_.items()) { + std::string name = item.key(); + nlohmann::json tensor_info = item.value(); + // LOG_DEBUG("%s %s\n", name.c_str(), tensor_info.dump().c_str()); + + if (name == "__metadata__") { + continue; + } + + if (is_unused_tensor(name)) { + continue; + } + + std::string dtype = tensor_info["dtype"]; + nlohmann::json shape = tensor_info["shape"]; + + size_t begin = tensor_info["data_offsets"][0].get(); + size_t end = tensor_info["data_offsets"][1].get(); + + ggml_type type = str_to_ggml_type(dtype); + if (type == GGML_TYPE_COUNT) { + LOG_ERROR("unsupported dtype '%s'", dtype.c_str()); + return false; + } + + if (shape.size() > SD_MAX_DIMS) { + LOG_ERROR("invalid tensor '%s'", name.c_str()); + return false; + } + + int n_dims = (int)shape.size(); + int64_t ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1}; + for (int i = 0; i < n_dims; i++) { + ne[i] = shape[i].get(); + } + + if (n_dims == 5) { + if (ne[3] == 1 && ne[4] == 1) { + n_dims = 4; + } else { + LOG_ERROR("invalid tensor '%s'", name.c_str()); + return false; + } + } + + TensorStorage tensor_storage(prefix + name, type, ne, n_dims, file_index, ST_HEADER_SIZE_LEN + header_size_ + begin); + + tensor_storage.reverse_ne(); + + size_t tensor_data_size = end - begin; + + if (dtype == "BF16") { + tensor_storage.is_bf16 = true; + GGML_ASSERT(tensor_storage.nbytes() == tensor_data_size * 2); + } else { + GGML_ASSERT(tensor_storage.nbytes() == tensor_data_size); + } + + tensor_storages.push_back(tensor_storage); + + // LOG_DEBUG("%s %s", tensor_storage.to_string().c_str(), dtype.c_str()); + } + + return true; +} + +/*================================================= DiffusersModelLoader ==================================================*/ + +bool ModelLoader::init_from_diffusers_file(const std::string& file_path, const std::string& prefix) { + std::string unet_path = path_join(file_path, "unet/diffusion_pytorch_model.safetensors"); + std::string vae_path = path_join(file_path, "vae/diffusion_pytorch_model.safetensors"); + std::string clip_path = path_join(file_path, "text_encoder/model.safetensors"); + + if (!init_from_safetensors_file(unet_path, "unet.")) { + return false; + } + if (!init_from_safetensors_file(vae_path, "vae.")) { + return false; + } + if (!init_from_safetensors_file(clip_path, "te.")) { + return false; + } + return true; +} + +/*================================================= CkptModelLoader ==================================================*/ + +// $ python -m pickletools sd-v1-4/archive/data.pkl | head -n 100 +// 0: \x80 PROTO 2 +// 2: } EMPTY_DICT +// 3: q BINPUT 0 +// 5: ( MARK +// 6: X BINUNICODE 'epoch' +// 16: q BINPUT 1 +// 18: K BININT1 6 +// 20: X BINUNICODE 'global_step' +// 36: q BINPUT 2 +// 38: J BININT 470000 +// 43: X BINUNICODE 'pytorch-lightning_version' +// 73: q BINPUT 3 +// 75: X BINUNICODE '1.4.2' +// 85: q BINPUT 4 +// 87: X BINUNICODE 'state_dict' +// 102: q BINPUT 5 +// 104: } EMPTY_DICT +// 105: q BINPUT 6 +// 107: ( MARK +// 108: X BINUNICODE 'betas' +// 118: q BINPUT 7 +// 120: c GLOBAL 'torch._utils _rebuild_tensor_v2' +// 153: q BINPUT 8 +// 155: ( MARK +// 156: ( MARK +// 157: X BINUNICODE 'storage' +// 169: q BINPUT 9 +// 171: c GLOBAL 'torch FloatStorage' +// 191: q BINPUT 10 +// 193: X BINUNICODE '0' +// 199: q BINPUT 11 +// 201: X BINUNICODE 'cpu' +// 209: q BINPUT 12 +// 211: M BININT2 1000 +// 214: t TUPLE (MARK at 156) +// 215: q BINPUT 13 +// 217: Q BINPERSID +// 218: K BININT1 0 +// 220: M BININT2 1000 +// ............................... +// 3201: q BINPUT 250 +// 3203: R REDUCE +// 3204: q BINPUT 251 +// 3206: X BINUNICODE 'model.diffusion_model.input_blocks.1.1.proj_in.weight' +// 3264: q BINPUT 252 +// 3266: h BINGET 8 +// 3268: ( MARK +// 3269: ( MARK +// 3270: h BINGET 9 +// 3272: h BINGET 10 +// 3274: X BINUNICODE '30' +// 3281: q BINPUT 253 +// 3283: h BINGET 12 +// 3285: J BININT 102400 +// 3290: t TUPLE (MARK at 3269) +// 3291: q BINPUT 254 +// 3293: Q BINPERSID +// 3294: K BININT1 0 +// 3296: ( MARK +// 3297: M BININT2 320 +// 3300: M BININT2 320 +// 3303: K BININT1 1 +// 3305: K BININT1 1 +// 3307: t TUPLE (MARK at 3296) +// 3308: q BINPUT 255 +// 3310: ( MARK +// 3311: M BININT2 320 +// 3314: K BININT1 1 +// 3316: K BININT1 1 +// 3318: K BININT1 1 +// 3320: t TUPLE (MARK at 3310) +// 3321: r LONG_BINPUT 256 +// 3326: \x89 NEWFALSE +// 3327: h BINGET 16 +// 3329: ) EMPTY_TUPLE +// 3330: R REDUCE +// 3331: r LONG_BINPUT 257 +// 3336: t TUPLE (MARK at 3268) +// 3337: r LONG_BINPUT 258 +// 3342: R REDUCE +// 3343: r LONG_BINPUT 259 +// 3348: X BINUNICODE 'model.diffusion_model.input_blocks.1.1.proj_in.bias' +// 3404: r LONG_BINPUT 260 +// 3409: h BINGET 8 +// 3411: ( MARK +// 3412: ( MARK +// 3413: h BINGET 9 +// 3415: h BINGET 10 +// 3417: X BINUNICODE '31' + +struct PickleTensorReader { + enum ReadPhase { + READ_NAME, + READ_DATA, + CHECK_SIZE, + READ_DIMENS + }; + ReadPhase phase = READ_NAME; + size_t entry_size = 0; + int32_t nelements = 0; + + TensorStorage tensor_storage; + + static ggml_type global_type; // all pickle_tensors data type + static bool read_global_type; + + bool read_int_value(uint32_t value) { + if (phase == CHECK_SIZE) { + if (entry_size == value * ggml_type_size(tensor_storage.type)) { + nelements = value; + phase = READ_DIMENS; + return true; + } else { + phase = READ_NAME; + } + } else if (phase == READ_DIMENS) { + if (tensor_storage.n_dims + 1 > SD_MAX_DIMS) { // too many dimens + phase = READ_NAME; + tensor_storage.n_dims = 0; + } + if (nelements % value == 0) { + tensor_storage.ne[tensor_storage.n_dims] = value; + tensor_storage.n_dims++; + } + } + return false; + } + + void read_global(const std::string& str) { + if (str == "FloatStorage") { + if (read_global_type) { + global_type = GGML_TYPE_F32; + read_global_type = false; + } + tensor_storage.type = GGML_TYPE_F32; + } else if (str == "HalfStorage") { + if (read_global_type) { + global_type = GGML_TYPE_F16; + read_global_type = false; + } + tensor_storage.type = GGML_TYPE_F16; + } + } + + void read_string(const std::string& str, struct zip_t* zip, std::string dir) { + if (str == "storage") { + read_global_type = true; + } else if (str != "state_dict") { + if (phase == READ_DATA) { + std::string entry_name = dir + "data/" + std::string(str); + + size_t i, n = zip_entries_total(zip); + for (i = 0; i < n; ++i) { + zip_entry_openbyindex(zip, i); + { + std::string name = zip_entry_name(zip); + if (name == entry_name) { + tensor_storage.index_in_zip = (int)i; + entry_size = zip_entry_size(zip); + zip_entry_close(zip); + break; + } + } + zip_entry_close(zip); + } + + phase = entry_size > 0 ? CHECK_SIZE : READ_NAME; + } + if (!read_global_type && phase == READ_NAME) { + tensor_storage.name = str; + phase = READ_DATA; + tensor_storage.type = global_type; + } + } + } +}; + +ggml_type PickleTensorReader::global_type = GGML_TYPE_F32; // all pickle_tensors data type +bool PickleTensorReader::read_global_type = false; + +int find_char(uint8_t* buffer, int len, char c) { + for (int pos = 0; pos < len; pos++) { + if (buffer[pos] == c) { + return pos; + } + } + return -1; +} + +#define MAX_STRING_BUFFER 512 + +bool ModelLoader::parse_data_pkl(uint8_t* buffer, + size_t buffer_size, + zip_t* zip, + std::string dir, + size_t file_index, + const std::string& prefix) { + uint8_t* buffer_end = buffer + buffer_size; + if (buffer[0] == 0x80) { // proto + if (buffer[1] != 2) { + LOG_ERROR("Unsupported protocol\n"); + return false; + } + buffer += 2; // 0x80 and version + char string_buffer[MAX_STRING_BUFFER]; + bool finish = false; + PickleTensorReader reader; + // read pickle binary file + while (!finish && buffer < buffer_end) { + uint8_t opcode = *buffer; + buffer++; + // https://github.com/python/cpython/blob/3.7/Lib/pickletools.py#L1048 + // https://github.com/python/cpython/blob/main/Lib/pickle.py#L105 + switch (opcode) { + case '}': // EMPTY_DICT = b'}' # push empty dict + break; + case ']': // EMPTY_LIST = b']' # push empty list + break; + // skip unused sections + case 'h': // BINGET = b'h' # " " " " " " ; " " 1-byte arg + case 'q': // BINPUT = b'q' # " " " " " ; " " 1-byte arg + case 'Q': // BINPERSID = b'Q' # " " " ; " " " " stack + buffer++; + break; + case 'r': // LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg + buffer += 4; + break; + case 0x95: // FRAME = b'\x95' # indicate the beginning of a new frame + buffer += 8; + break; + case 0x94: // MEMOIZE = b'\x94' # store top of the stack in memo + break; + case '(': // MARK = b'(' # push special markobject on stack + break; + case 'K': // BININT1 = b'K' # push 1-byte unsigned int + { + uint8_t value = *buffer; + if (reader.read_int_value(value)) { + buffer++; + } + buffer++; + } break; + case 'M': // BININT2 = b'M' # push 2-byte unsigned int + { + uint16_t value = read_short(buffer); + if (reader.read_int_value(value)) { + buffer++; + } + buffer += 2; + } break; + case 'J': // BININT = b'J' # push four-byte signed int + { + const int32_t value = read_int(buffer); + if (reader.read_int_value(value)) { + buffer++; // skip tuple after read num_elements + } + buffer += 4; + } break; + case 'X': // BINUNICODE = b'X' # " " " ; counted UTF-8 string argument + { + const int32_t len = read_int(buffer); + buffer += 4; + memset(string_buffer, 0, MAX_STRING_BUFFER); + if (len > MAX_STRING_BUFFER) { + LOG_WARN("tensor name very large"); + } + memcpy(string_buffer, buffer, len < MAX_STRING_BUFFER ? len : (MAX_STRING_BUFFER - 1)); + buffer += len; + reader.read_string(string_buffer, zip, dir); + } break; + case 0x8C: // SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes + { + const int8_t len = *buffer; + buffer++; + memset(string_buffer, 0, MAX_STRING_BUFFER); + memcpy(string_buffer, buffer, len); + buffer += len; + // printf("String: '%s'\n", string_buffer); + } break; + case 'c': // GLOBAL = b'c' # push self.find_class(modname, name); 2 string args + { + int len = find_char(buffer, MAX_STRING_BUFFER, '\n'); + + buffer += len + 1; + len = find_char(buffer, MAX_STRING_BUFFER, '\n'); + + memset(string_buffer, 0, MAX_STRING_BUFFER); + memcpy(string_buffer, buffer, len); + buffer += len + 1; + reader.read_global(string_buffer); + } break; + case 0x86: // TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items + case 0x85: // TUPLE1 = b'\x85' # build 1-tuple from stack top + case 't': // TUPLE = b't' # build tuple from topmost stack items + if (reader.phase == PickleTensorReader::READ_DIMENS) { + reader.tensor_storage.reverse_ne(); + reader.tensor_storage.file_index = file_index; + reader.tensor_storage.name = prefix + reader.tensor_storage.name; + tensor_storages.push_back(reader.tensor_storage); + // LOG_DEBUG("%s", reader.tensor_storage.name.c_str()); + // reset + reader = PickleTensorReader(); + } + break; + case '.': // STOP = b'.' # every pickle ends with STOP + finish = true; + break; + default: + break; + } + } + } + return true; +} + +bool ModelLoader::init_from_ckpt_file(const std::string& file_path, const std::string& prefix) { + LOG_DEBUG("init from '%s'", file_path.c_str()); + file_paths_.push_back(file_path); + size_t file_index = file_paths_.size() - 1; + + struct zip_t* zip = zip_open(file_path.c_str(), 0, 'r'); + if (zip == NULL) { + LOG_ERROR("failed to open '%s'", file_path.c_str()); + return false; + } + int n = (int)zip_entries_total(zip); + for (int i = 0; i < n; ++i) { + zip_entry_openbyindex(zip, i); + { + std::string name = zip_entry_name(zip); + size_t pos = name.find("data.pkl"); + if (pos != std::string::npos) { + std::string dir = name.substr(0, pos); + void* pkl_data = NULL; + size_t pkl_size; + zip_entry_read(zip, &pkl_data, &pkl_size); + + // LOG_DEBUG("%lld", pkl_size); + + parse_data_pkl((uint8_t*)pkl_data, pkl_size, zip, dir, file_index, prefix); + + free(pkl_data); + } + } + zip_entry_close(zip); + } + zip_close(zip); + return true; +} + +SDVersion ModelLoader::get_sd_version() { + TensorStorage token_embedding_weight; + for (auto& tensor_storage : tensor_storages) { + if (tensor_storage.name.find("conditioner.embedders.1") != std::string::npos) { + return VERSION_XL; + } + if (tensor_storage.name.find("cond_stage_model.1") != std::string::npos) { + return VERSION_XL; + } + if (tensor_storage.name.find("model.diffusion_model.input_blocks.8.0.time_mixer.mix_factor") != std::string::npos) { + return VERSION_SVD; + } + + if (tensor_storage.name == "cond_stage_model.transformer.text_model.embeddings.token_embedding.weight" || + tensor_storage.name == "cond_stage_model.model.token_embedding.weight" || + tensor_storage.name == "text_model.embeddings.token_embedding.weight" || + tensor_storage.name == "te.text_model.embeddings.token_embedding.weight" || + tensor_storage.name == "conditioner.embedders.0.model.token_embedding.weight" || + tensor_storage.name == "conditioner.embedders.0.transformer.text_model.embeddings.token_embedding.weight") { + token_embedding_weight = tensor_storage; + // break; + } + } + if (token_embedding_weight.ne[0] == 768) { + return VERSION_1_x; + } else if (token_embedding_weight.ne[0] == 1024) { + return VERSION_2_x; + } + return VERSION_COUNT; +} + +ggml_type ModelLoader::get_sd_wtype() { + for (auto& tensor_storage : tensor_storages) { + if (is_unused_tensor(tensor_storage.name)) { + continue; + } + + if (tensor_storage.name.find(".weight") != std::string::npos && + tensor_storage.name.find("time_embed") != std::string::npos) { + return tensor_storage.type; + } + } + return GGML_TYPE_COUNT; +} + +std::string ModelLoader::load_merges() { + std::string merges_utf8_str(reinterpret_cast(merges_utf8_c_str), sizeof(merges_utf8_c_str)); + return merges_utf8_str; +} + +void remove_duplicates(std::vector& vec) { + std::unordered_map name_to_index_map; + + for (size_t i = 0; i < vec.size(); ++i) { + const std::string& current_name = vec[i].name; + auto it = name_to_index_map.find(current_name); + + if (it != name_to_index_map.end()) { + vec[it->second] = vec[i]; + } else { + name_to_index_map[current_name] = i; + } + } + + vec.resize(name_to_index_map.size()); +} + +bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend_t backend) { + std::vector processed_tensor_storages; + for (auto& tensor_storage : tensor_storages) { + // LOG_DEBUG("%s", name.c_str()); + + if (is_unused_tensor(tensor_storage.name)) { + continue; + } + + preprocess_tensor(tensor_storage, processed_tensor_storages); + } + remove_duplicates(processed_tensor_storages); + bool success = true; + for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) { + std::string file_path = file_paths_[file_index]; + LOG_DEBUG("loading tensors from %s", file_path.c_str()); + + std::ifstream file(file_path, std::ios::binary); + if (!file.is_open()) { + LOG_ERROR("failed to open '%s'", file_path.c_str()); + return false; + } + + bool is_zip = false; + for (auto& tensor_storage : tensor_storages) { + if (tensor_storage.file_index != file_index) { + continue; + } + if (tensor_storage.index_in_zip >= 0) { + is_zip = true; + break; + } + } + + struct zip_t* zip = NULL; + if (is_zip) { + zip = zip_open(file_path.c_str(), 0, 'r'); + if (zip == NULL) { + LOG_ERROR("failed to open zip '%s'", file_path.c_str()); + return false; + } + } + + std::vector read_buffer; + std::vector convert_buffer; + + auto read_data = [&](const TensorStorage& tensor_storage, char* buf, size_t n) { + if (zip != NULL) { + zip_entry_openbyindex(zip, tensor_storage.index_in_zip); + size_t entry_size = zip_entry_size(zip); + if (entry_size != n) { + read_buffer.resize(entry_size); + zip_entry_noallocread(zip, (void*)read_buffer.data(), entry_size); + memcpy((void*)buf, (void*)(read_buffer.data() + tensor_storage.offset), n); + } else { + zip_entry_noallocread(zip, (void*)buf, n); + } + zip_entry_close(zip); + } else { + file.seekg(tensor_storage.offset); + file.read(buf, n); + if (!file) { + LOG_ERROR("read tensor data failed: '%s'", file_path.c_str()); + return false; + } + } + return true; + }; + + for (auto& tensor_storage : processed_tensor_storages) { + if (tensor_storage.file_index != file_index) { + continue; + } + + ggml_tensor* dst_tensor = NULL; + + success = on_new_tensor_cb(tensor_storage, &dst_tensor); + if (!success) { + LOG_WARN("process tensor failed: '%s'", tensor_storage.name.c_str()); + break; + } + + if (dst_tensor == NULL) { + continue; + } + + size_t nbytes_to_read = tensor_storage.nbytes_to_read(); + + if (dst_tensor->buffer == NULL || ggml_backend_buffer_is_host(dst_tensor->buffer)) { + // for the CPU and Metal backend, we can copy directly into the tensor + if (tensor_storage.type == dst_tensor->type) { + GGML_ASSERT(ggml_nbytes(dst_tensor) == tensor_storage.nbytes()); + read_data(tensor_storage, (char*)dst_tensor->data, nbytes_to_read); + + if (tensor_storage.is_bf16) { + // inplace op + bf16_to_f32_vec((uint16_t*)dst_tensor->data, (float*)dst_tensor->data, tensor_storage.nelements()); + } + } else { + read_buffer.resize(tensor_storage.nbytes()); + read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read); + + if (tensor_storage.is_bf16) { + // inplace op + bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements()); + } + + convert_tensor((void*)read_buffer.data(), tensor_storage.type, dst_tensor->data, + dst_tensor->type, (int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]); + } + } else { + read_buffer.resize(tensor_storage.nbytes()); + read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read); + + if (tensor_storage.is_bf16) { + // inplace op + bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements()); + } + + if (tensor_storage.type == dst_tensor->type) { + // copy to device memory + ggml_backend_tensor_set(dst_tensor, read_buffer.data(), 0, ggml_nbytes(dst_tensor)); + } else { + // convert first, then copy to device memory + convert_buffer.resize(ggml_nbytes(dst_tensor)); + convert_tensor((void*)read_buffer.data(), tensor_storage.type, + (void*)convert_buffer.data(), dst_tensor->type, + (int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]); + ggml_backend_tensor_set(dst_tensor, convert_buffer.data(), 0, ggml_nbytes(dst_tensor)); + } + } + } + + if (zip != NULL) { + zip_close(zip); + } + + if (!success) { + break; + } + } + return success; +} + +bool ModelLoader::load_tensors(std::map& tensors, + ggml_backend_t backend, + std::set ignore_tensors) { + std::set tensor_names_in_file; + auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool { + const std::string& name = tensor_storage.name; + // LOG_DEBUG("%s", tensor_storage.to_string().c_str()); + tensor_names_in_file.insert(name); + + struct ggml_tensor* real; + if (tensors.find(name) != tensors.end()) { + real = tensors[name]; + } else { + for (auto& ignore_tensor : ignore_tensors) { + if (starts_with(name, ignore_tensor)) { + return true; + } + } + LOG_INFO("unknown tensor '%s' in model file", tensor_storage.to_string().c_str()); + return true; + } + + if ( + real->ne[0] != tensor_storage.ne[0] || + real->ne[1] != tensor_storage.ne[1] || + real->ne[2] != tensor_storage.ne[2] || + real->ne[3] != tensor_storage.ne[3]) { + LOG_ERROR( + "tensor '%s' has wrong shape in model file: " + "got [%d, %d, %d, %d], expected [%d, %d, %d, %d]", + name.c_str(), + (int)tensor_storage.ne[0], (int)tensor_storage.ne[1], (int)tensor_storage.ne[2], (int)tensor_storage.ne[3], + (int)real->ne[0], (int)real->ne[1], (int)real->ne[2], (int)real->ne[3]); + return false; + } + + *dst_tensor = real; + + return true; + }; + + bool success = load_tensors(on_new_tensor_cb, backend); + if (!success) { + LOG_ERROR("load tensors from file failed"); + return false; + } + + bool some_tensor_not_init = false; + + for (auto pair : tensors) { + if (pair.first.find("cond_stage_model.transformer.text_model.encoder.layers.23") != std::string::npos) { + continue; + } + if (pair.first.find("alphas_cumprod") != std::string::npos) { + continue; + } + + if (pair.first.find("alphas_cumprod") != std::string::npos) { + continue; + } + + if (tensor_names_in_file.find(pair.first) == tensor_names_in_file.end()) { + LOG_ERROR("tensor '%s' not in model file", pair.first.c_str()); + some_tensor_not_init = true; + } + } + + if (some_tensor_not_init) { + return false; + } + return true; +} + +bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type) { + auto backend = ggml_backend_cpu_init(); + size_t mem_size = 1 * 1024 * 1024; // for padding + mem_size += tensor_storages.size() * ggml_tensor_overhead(); + mem_size += get_params_mem_size(backend, type); + LOG_INFO("model tensors mem size: %.2fMB", mem_size / 1024.f / 1024.f); + ggml_context* ggml_ctx = ggml_init({mem_size, NULL, false}); + + gguf_context* gguf_ctx = gguf_init_empty(); + + auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool { + const std::string& name = tensor_storage.name; + + ggml_type tensor_type = tensor_storage.type; + if (type != GGML_TYPE_COUNT) { + if (ggml_is_quantized(type) && tensor_storage.ne[0] % 32 != 0) { + tensor_type = GGML_TYPE_F16; + } else { + tensor_type = type; + } + } + + ggml_tensor* tensor = ggml_new_tensor(ggml_ctx, tensor_type, tensor_storage.n_dims, tensor_storage.ne); + if (tensor == NULL) { + LOG_ERROR("ggml_new_tensor failed"); + return false; + } + ggml_set_name(tensor, name.c_str()); + + // LOG_DEBUG("%s %d %s %d[%d %d %d %d] %d[%d %d %d %d]", name.c_str(), + // ggml_nbytes(tensor), ggml_type_name(tensor_type), + // tensor_storage.n_dims, + // tensor_storage.ne[0], tensor_storage.ne[1], tensor_storage.ne[2], tensor_storage.ne[3], + // tensor->n_dims, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]); + + *dst_tensor = tensor; + + gguf_add_tensor(gguf_ctx, tensor); + + return true; + }; + + bool success = load_tensors(on_new_tensor_cb, backend); + ggml_backend_free(backend); + LOG_INFO("load tensors done"); + LOG_INFO("trying to save tensors to %s", file_path.c_str()); + if (success) { + gguf_write_to_file(gguf_ctx, file_path.c_str(), false); + } + ggml_free(ggml_ctx); + gguf_free(gguf_ctx); + return success; +} + +int64_t ModelLoader::get_params_mem_size(ggml_backend_t backend, ggml_type type) { + size_t alignment = 128; + if (backend != NULL) { + alignment = ggml_backend_get_alignment(backend); + } + int64_t mem_size = 0; + std::vector processed_tensor_storages; + for (auto& tensor_storage : tensor_storages) { + if (is_unused_tensor(tensor_storage.name)) { + continue; + } + preprocess_tensor(tensor_storage, processed_tensor_storages); + } + + for (auto& tensor_storage : processed_tensor_storages) { + ggml_type tensor_type = tensor_storage.type; + if (type != GGML_TYPE_COUNT) { + if (ggml_is_quantized(type) && tensor_storage.ne[0] % 32 != 0) { + tensor_type = GGML_TYPE_F16; + } else { + tensor_type = type; + } + } + tensor_storage.type = tensor_type; + mem_size += tensor_storage.nbytes() + alignment; + } + + return mem_size; +} + +bool convert(const char* input_path, const char* vae_path, const char* output_path, sd_type_t output_type) { + ModelLoader model_loader; + + if (!model_loader.init_from_file(input_path)) { + LOG_ERROR("init model loader from file failed: '%s'", input_path); + return false; + } + + if (vae_path != NULL && strlen(vae_path) > 0) { + if (!model_loader.init_from_file(vae_path, "vae.")) { + LOG_ERROR("init model loader from file failed: '%s'", vae_path); + return false; + } + } + bool success = model_loader.save_to_gguf_file(output_path, (ggml_type)output_type); + return success; +} \ No newline at end of file diff --git a/otherarch/sdcpp/model.h b/otherarch/sdcpp/model.h new file mode 100644 index 000000000..833aadd5b --- /dev/null +++ b/otherarch/sdcpp/model.h @@ -0,0 +1,154 @@ +#ifndef __MODEL_H__ +#define __MODEL_H__ + +#include +#include +#include +#include +#include +#include +#include + +#include "ggml-backend.h" +#include "ggml.h" +#include "json.hpp" +#include "zip.h" + +#define SD_MAX_DIMS 5 + +enum SDVersion { + VERSION_1_x, + VERSION_2_x, + VERSION_XL, + VERSION_SVD, + VERSION_COUNT, +}; + +struct TensorStorage { + std::string name; + ggml_type type = GGML_TYPE_F32; + bool is_bf16 = false; + int64_t ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1}; + int n_dims = 0; + + size_t file_index = 0; + int index_in_zip = -1; // >= means stored in a zip file + size_t offset = 0; // offset in file + + TensorStorage() = default; + + TensorStorage(const std::string& name, ggml_type type, int64_t* ne, int n_dims, size_t file_index, size_t offset = 0) + : name(name), type(type), n_dims(n_dims), file_index(file_index), offset(offset) { + for (int i = 0; i < n_dims; i++) { + this->ne[i] = ne[i]; + } + } + + int64_t nelements() const { + int64_t n = 1; + for (int i = 0; i < SD_MAX_DIMS; i++) { + n *= ne[i]; + } + return n; + } + + int64_t nbytes() const { + return nelements() * ggml_type_size(type) / ggml_blck_size(type); + } + + int64_t nbytes_to_read() const { + if (is_bf16) { + return nbytes() / 2; + } else { + return nbytes(); + } + } + + void unsqueeze() { + if (n_dims == 2) { + n_dims = 4; + ne[3] = ne[1]; + ne[2] = ne[0]; + ne[1] = 1; + ne[0] = 1; + } + } + + std::vector chunk(size_t n) { + std::vector chunks; + size_t chunk_size = nbytes_to_read() / n; + // printf("%d/%d\n", chunk_size, nbytes_to_read()); + reverse_ne(); + for (int i = 0; i < n; i++) { + TensorStorage chunk_i = *this; + chunk_i.ne[0] = ne[0] / n; + chunk_i.offset = offset + i * chunk_size; + chunk_i.reverse_ne(); + chunks.push_back(chunk_i); + } + reverse_ne(); + return chunks; + } + + void reverse_ne() { + int64_t new_ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1}; + for (int i = 0; i < n_dims; i++) { + new_ne[i] = ne[n_dims - 1 - i]; + } + for (int i = 0; i < n_dims; i++) { + ne[i] = new_ne[i]; + } + } + + std::string to_string() const { + std::stringstream ss; + const char* type_name = ggml_type_name(type); + if (is_bf16) { + type_name = "bf16"; + } + ss << name << " | " << type_name << " | "; + ss << n_dims << " ["; + for (int i = 0; i < SD_MAX_DIMS; i++) { + ss << ne[i]; + if (i != SD_MAX_DIMS - 1) { + ss << ", "; + } + } + ss << "]"; + return ss.str(); + } +}; + +typedef std::function on_new_tensor_cb_t; + +class ModelLoader { +protected: + std::vector file_paths_; + std::vector tensor_storages; + + bool parse_data_pkl(uint8_t* buffer, + size_t buffer_size, + zip_t* zip, + std::string dir, + size_t file_index, + const std::string& prefix); + + bool init_from_gguf_file(const std::string& file_path, const std::string& prefix = ""); + bool init_from_safetensors_file(const std::string& file_path, const std::string& prefix = ""); + bool init_from_ckpt_file(const std::string& file_path, const std::string& prefix = ""); + bool init_from_diffusers_file(const std::string& file_path, const std::string& prefix = ""); + +public: + bool init_from_file(const std::string& file_path, const std::string& prefix = ""); + SDVersion get_sd_version(); + ggml_type get_sd_wtype(); + std::string load_merges(); + bool load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend_t backend); + bool load_tensors(std::map& tensors, + ggml_backend_t backend, + std::set ignore_tensors = {}); + bool save_to_gguf_file(const std::string& file_path, ggml_type type); + int64_t get_params_mem_size(ggml_backend_t backend, ggml_type type = GGML_TYPE_COUNT); + ~ModelLoader() = default; +}; +#endif // __MODEL_H__ \ No newline at end of file diff --git a/otherarch/sdcpp/preprocessing.hpp b/otherarch/sdcpp/preprocessing.hpp new file mode 100644 index 000000000..4ea1dbab9 --- /dev/null +++ b/otherarch/sdcpp/preprocessing.hpp @@ -0,0 +1,227 @@ +#ifndef __PREPROCESSING_HPP__ +#define __PREPROCESSING_HPP__ + +#include "ggml_extend.hpp" +#define M_PI_ 3.14159265358979323846 + +void convolve(struct ggml_tensor* input, struct ggml_tensor* output, struct ggml_tensor* kernel, int padding) { + struct ggml_init_params params; + params.mem_size = 20 * 1024 * 1024; // 10 + params.mem_buffer = NULL; + params.no_alloc = false; + struct ggml_context* ctx0 = ggml_init(params); + struct ggml_tensor* kernel_fp16 = ggml_new_tensor_4d(ctx0, GGML_TYPE_F16, kernel->ne[0], kernel->ne[1], 1, 1); + ggml_fp32_to_fp16_row((float*)kernel->data, (ggml_fp16_t*)kernel_fp16->data, ggml_nelements(kernel)); + ggml_tensor* h = ggml_conv_2d(ctx0, kernel_fp16, input, 1, 1, padding, padding, 1, 1); + ggml_cgraph* gf = ggml_new_graph(ctx0); + ggml_build_forward_expand(gf, ggml_cpy(ctx0, h, output)); + ggml_graph_compute_with_ctx(ctx0, gf, 1); + ggml_free(ctx0); +} + +void gaussian_kernel(struct ggml_tensor* kernel) { + int ks_mid = kernel->ne[0] / 2; + float sigma = 1.4f; + float normal = 1.f / (2.0f * M_PI_ * powf(sigma, 2.0f)); + for (int y = 0; y < kernel->ne[0]; y++) { + float gx = -ks_mid + y; + for (int x = 0; x < kernel->ne[1]; x++) { + float gy = -ks_mid + x; + float k_ = expf(-((gx * gx + gy * gy) / (2.0f * powf(sigma, 2.0f)))) * normal; + ggml_tensor_set_f32(kernel, k_, x, y); + } + } +} + +void grayscale(struct ggml_tensor* rgb_img, struct ggml_tensor* grayscale) { + for (int iy = 0; iy < rgb_img->ne[1]; iy++) { + for (int ix = 0; ix < rgb_img->ne[0]; ix++) { + float r = ggml_tensor_get_f32(rgb_img, ix, iy); + float g = ggml_tensor_get_f32(rgb_img, ix, iy, 1); + float b = ggml_tensor_get_f32(rgb_img, ix, iy, 2); + float gray = 0.2989f * r + 0.5870f * g + 0.1140f * b; + ggml_tensor_set_f32(grayscale, gray, ix, iy); + } + } +} + +void prop_hypot(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tensor* h) { + int n_elements = ggml_nelements(h); + float* dx = (float*)x->data; + float* dy = (float*)y->data; + float* dh = (float*)h->data; + for (int i = 0; i < n_elements; i++) { + dh[i] = sqrtf(dx[i] * dx[i] + dy[i] * dy[i]); + } +} + +void prop_arctan2(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tensor* h) { + int n_elements = ggml_nelements(h); + float* dx = (float*)x->data; + float* dy = (float*)y->data; + float* dh = (float*)h->data; + for (int i = 0; i < n_elements; i++) { + dh[i] = atan2f(dy[i], dx[i]); + } +} + +void normalize_tensor(struct ggml_tensor* g) { + int n_elements = ggml_nelements(g); + float* dg = (float*)g->data; + float max = -INFINITY; + for (int i = 0; i < n_elements; i++) { + max = dg[i] > max ? dg[i] : max; + } + max = 1.0f / max; + for (int i = 0; i < n_elements; i++) { + dg[i] *= max; + } +} + +void non_max_supression(struct ggml_tensor* result, struct ggml_tensor* G, struct ggml_tensor* D) { + for (int iy = 1; iy < result->ne[1] - 1; iy++) { + for (int ix = 1; ix < result->ne[0] - 1; ix++) { + float angle = ggml_tensor_get_f32(D, ix, iy) * 180.0f / M_PI_; + angle = angle < 0.0f ? angle += 180.0f : angle; + float q = 1.0f; + float r = 1.0f; + + // angle 0 + if ((0 >= angle && angle < 22.5f) || (157.5f >= angle && angle <= 180)) { + q = ggml_tensor_get_f32(G, ix, iy + 1); + r = ggml_tensor_get_f32(G, ix, iy - 1); + } + // angle 45 + else if (22.5f >= angle && angle < 67.5f) { + q = ggml_tensor_get_f32(G, ix + 1, iy - 1); + r = ggml_tensor_get_f32(G, ix - 1, iy + 1); + } + // angle 90 + else if (67.5f >= angle && angle < 112.5) { + q = ggml_tensor_get_f32(G, ix + 1, iy); + r = ggml_tensor_get_f32(G, ix - 1, iy); + } + // angle 135 + else if (112.5 >= angle && angle < 157.5f) { + q = ggml_tensor_get_f32(G, ix - 1, iy - 1); + r = ggml_tensor_get_f32(G, ix + 1, iy + 1); + } + + float cur = ggml_tensor_get_f32(G, ix, iy); + if ((cur >= q) && (cur >= r)) { + ggml_tensor_set_f32(result, cur, ix, iy); + } else { + ggml_tensor_set_f32(result, 0.0f, ix, iy); + } + } + } +} + +void threshold_hystersis(struct ggml_tensor* img, float high_threshold, float low_threshold, float weak, float strong) { + int n_elements = ggml_nelements(img); + float* imd = (float*)img->data; + float max = -INFINITY; + for (int i = 0; i < n_elements; i++) { + max = imd[i] > max ? imd[i] : max; + } + float ht = max * high_threshold; + float lt = ht * low_threshold; + for (int i = 0; i < n_elements; i++) { + float img_v = imd[i]; + if (img_v >= ht) { // strong pixel + imd[i] = strong; + } else if (img_v <= ht && img_v >= lt) { // strong pixel + imd[i] = weak; + } + } + + for (int iy = 0; iy < img->ne[1]; iy++) { + for (int ix = 0; ix < img->ne[0]; ix++) { + if (ix >= 3 && ix <= img->ne[0] - 3 && iy >= 3 && iy <= img->ne[1] - 3) { + ggml_tensor_set_f32(img, ggml_tensor_get_f32(img, ix, iy), ix, iy); + } else { + ggml_tensor_set_f32(img, 0.0f, ix, iy); + } + } + } + + // hysteresis + for (int iy = 1; iy < img->ne[1] - 1; iy++) { + for (int ix = 1; ix < img->ne[0] - 1; ix++) { + float imd_v = ggml_tensor_get_f32(img, ix, iy); + if (imd_v == weak) { + if (ggml_tensor_get_f32(img, ix + 1, iy - 1) == strong || ggml_tensor_get_f32(img, ix + 1, iy) == strong || + ggml_tensor_get_f32(img, ix, iy - 1) == strong || ggml_tensor_get_f32(img, ix, iy + 1) == strong || + ggml_tensor_get_f32(img, ix - 1, iy - 1) == strong || ggml_tensor_get_f32(img, ix - 1, iy) == strong) { + ggml_tensor_set_f32(img, strong, ix, iy); + } else { + ggml_tensor_set_f32(img, 0.0f, ix, iy); + } + } + } + } +} + +uint8_t* preprocess_canny(uint8_t* img, int width, int height, float high_threshold, float low_threshold, float weak, float strong, bool inverse) { + struct ggml_init_params params; + params.mem_size = static_cast(10 * 1024 * 1024); // 10 + params.mem_buffer = NULL; + params.no_alloc = false; + struct ggml_context* work_ctx = ggml_init(params); + + if (!work_ctx) { + LOG_ERROR("ggml_init() failed"); + return NULL; + } + + float kX[9] = { + -1, 0, 1, + -2, 0, 2, + -1, 0, 1}; + + float kY[9] = { + 1, 2, 1, + 0, 0, 0, + -1, -2, -1}; + + // generate kernel + int kernel_size = 5; + struct ggml_tensor* gkernel = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, kernel_size, kernel_size, 1, 1); + struct ggml_tensor* sf_kx = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1); + memcpy(sf_kx->data, kX, ggml_nbytes(sf_kx)); + struct ggml_tensor* sf_ky = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1); + memcpy(sf_ky->data, kY, ggml_nbytes(sf_ky)); + gaussian_kernel(gkernel); + struct ggml_tensor* image = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1); + struct ggml_tensor* image_gray = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 1, 1); + struct ggml_tensor* iX = ggml_dup_tensor(work_ctx, image_gray); + struct ggml_tensor* iY = ggml_dup_tensor(work_ctx, image_gray); + struct ggml_tensor* G = ggml_dup_tensor(work_ctx, image_gray); + struct ggml_tensor* tetha = ggml_dup_tensor(work_ctx, image_gray); + sd_image_to_tensor(img, image); + grayscale(image, image_gray); + convolve(image_gray, image_gray, gkernel, 2); + convolve(image_gray, iX, sf_kx, 1); + convolve(image_gray, iY, sf_ky, 1); + prop_hypot(iX, iY, G); + normalize_tensor(G); + prop_arctan2(iX, iY, tetha); + non_max_supression(image_gray, G, tetha); + threshold_hystersis(image_gray, high_threshold, low_threshold, weak, strong); + // to RGB channels + for (int iy = 0; iy < height; iy++) { + for (int ix = 0; ix < width; ix++) { + float gray = ggml_tensor_get_f32(image_gray, ix, iy); + gray = inverse ? 1.0f - gray : gray; + ggml_tensor_set_f32(image, gray, ix, iy); + ggml_tensor_set_f32(image, gray, ix, iy, 1); + ggml_tensor_set_f32(image, gray, ix, iy, 2); + } + } + free(img); + uint8_t* output = sd_tensor_to_image(image); + ggml_free(work_ctx); + return output; +} + +#endif // __PREPROCESSING_HPP__ \ No newline at end of file diff --git a/otherarch/sdcpp/rng.hpp b/otherarch/sdcpp/rng.hpp new file mode 100644 index 000000000..3340be618 --- /dev/null +++ b/otherarch/sdcpp/rng.hpp @@ -0,0 +1,35 @@ +#ifndef __RNG_H__ +#define __RNG_H__ + +#include +#include + +class RNG { +public: + virtual void manual_seed(uint64_t seed) = 0; + virtual std::vector randn(uint32_t n) = 0; +}; + +class STDDefaultRNG : public RNG { +private: + std::default_random_engine generator; + +public: + void manual_seed(uint64_t seed) { + generator.seed((unsigned int)seed); + } + + std::vector randn(uint32_t n) { + std::vector result; + float mean = 0.0; + float stddev = 1.0; + std::normal_distribution distribution(mean, stddev); + for (uint32_t i = 0; i < n; i++) { + float random_number = distribution(generator); + result.push_back(random_number); + } + return result; + } +}; + +#endif // __RNG_H__ \ No newline at end of file diff --git a/otherarch/sdcpp/rng_philox.hpp b/otherarch/sdcpp/rng_philox.hpp new file mode 100644 index 000000000..33fea9c5b --- /dev/null +++ b/otherarch/sdcpp/rng_philox.hpp @@ -0,0 +1,125 @@ +#ifndef __RNG_PHILOX_H__ +#define __RNG_PHILOX_H__ + +#include +#include + +#include "rng.hpp" + +// RNG imitiating torch cuda randn on CPU. +// Port from: https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/5ef669de080814067961f28357256e8fe27544f4/modules/rng_philox.py +class PhiloxRNG : public RNG { +private: + uint64_t seed; + uint32_t offset; + +private: + std::vector philox_m = {0xD2511F53, 0xCD9E8D57}; + std::vector philox_w = {0x9E3779B9, 0xBB67AE85}; + float two_pow32_inv = 2.3283064e-10f; + float two_pow32_inv_2pi = 2.3283064e-10f * 6.2831855f; + + std::vector uint32(uint64_t x) { + std::vector result(2); + result[0] = static_cast(x & 0xFFFFFFFF); + result[1] = static_cast(x >> 32); + return result; + } + + std::vector> uint32(const std::vector& x) { + uint32_t N = (uint32_t)x.size(); + std::vector> result(2, std::vector(N)); + + for (uint32_t i = 0; i < N; ++i) { + result[0][i] = static_cast(x[i] & 0xFFFFFFFF); + result[1][i] = static_cast(x[i] >> 32); + } + + return result; + } + + // A single round of the Philox 4x32 random number generator. + void philox4_round(std::vector>& counter, + const std::vector>& key) { + uint32_t N = (uint32_t)counter[0].size(); + for (uint32_t i = 0; i < N; i++) { + std::vector v1 = uint32(static_cast(counter[0][i]) * static_cast(philox_m[0])); + std::vector v2 = uint32(static_cast(counter[2][i]) * static_cast(philox_m[1])); + + counter[0][i] = v2[1] ^ counter[1][i] ^ key[0][i]; + counter[1][i] = v2[0]; + counter[2][i] = v1[1] ^ counter[3][i] ^ key[1][i]; + counter[3][i] = v1[0]; + } + } + + // Generates 32-bit random numbers using the Philox 4x32 random number generator. + // Parameters: + // counter : A 4xN array of 32-bit integers representing the counter values (offset into generation). + // key : A 2xN array of 32-bit integers representing the key values (seed). + // rounds : The number of rounds to perform. + // Returns: + // std::vector>: A 4xN array of 32-bit integers containing the generated random numbers. + std::vector> philox4_32(std::vector>& counter, + std::vector>& key, + int rounds = 10) { + uint32_t N = (uint32_t)counter[0].size(); + for (int i = 0; i < rounds - 1; ++i) { + philox4_round(counter, key); + + for (uint32_t j = 0; j < N; ++j) { + key[0][j] += philox_w[0]; + key[1][j] += philox_w[1]; + } + } + + philox4_round(counter, key); + return counter; + } + + float box_muller(float x, float y) { + float u = x * two_pow32_inv + two_pow32_inv / 2; + float v = y * two_pow32_inv_2pi + two_pow32_inv_2pi / 2; + + float s = sqrt(-2.0f * log(u)); + + float r1 = s * sin(v); + return r1; + } + +public: + PhiloxRNG(uint64_t seed = 0) { + this->seed = seed; + this->offset = 0; + } + + void manual_seed(uint64_t seed) { + this->seed = seed; + this->offset = 0; + } + + std::vector randn(uint32_t n) { + std::vector> counter(4, std::vector(n, 0)); + for (uint32_t i = 0; i < n; i++) { + counter[0][i] = this->offset; + } + + for (uint32_t i = 0; i < n; i++) { + counter[2][i] = i; + } + this->offset += 1; + + std::vector key(n, this->seed); + std::vector> key_uint32 = uint32(key); + + std::vector> g = philox4_32(counter, key_uint32); + + std::vector result; + for (uint32_t i = 0; i < n; ++i) { + result.push_back(box_muller((float)g[0][i], (float)g[1][i])); + } + return result; + } +}; + +#endif // __RNG_PHILOX_H__ \ No newline at end of file diff --git a/otherarch/sdcpp/stable-diffusion.cpp b/otherarch/sdcpp/stable-diffusion.cpp new file mode 100644 index 000000000..5313e7dba --- /dev/null +++ b/otherarch/sdcpp/stable-diffusion.cpp @@ -0,0 +1,1778 @@ +#include "ggml_extend.hpp" + +#include "model.h" +#include "rng.hpp" +#include "rng_philox.hpp" +#include "stable-diffusion.h" +#include "util.h" + +#include "clip.hpp" +#include "control.hpp" +#include "denoiser.hpp" +#include "esrgan.hpp" +#include "lora.hpp" +#include "tae.hpp" +#include "unet.hpp" +#include "vae.hpp" + +const char* model_version_to_str[] = { + "1.x", + "2.x", + "XL", + "SVD", +}; + +const char* sampling_methods_str[] = { + "Euler A", + "Euler", + "Heun", + "DPM2", + "DPM++ (2s)", + "DPM++ (2M)", + "modified DPM++ (2M)", + "LCM", +}; + +char GGMLBlock::temp_buffer[1024 * 1024 * 10]; + +/*================================================== Helper Functions ================================================*/ + +void calculate_alphas_cumprod(float* alphas_cumprod, + float linear_start = 0.00085f, + float linear_end = 0.0120, + int timesteps = TIMESTEPS) { + float ls_sqrt = sqrtf(linear_start); + float le_sqrt = sqrtf(linear_end); + float amount = le_sqrt - ls_sqrt; + float product = 1.0f; + for (int i = 0; i < timesteps; i++) { + float beta = ls_sqrt + amount * ((float)i / (timesteps - 1)); + product *= 1.0f - powf(beta, 2.0f); + alphas_cumprod[i] = product; + } +} + +/*=============================================== StableDiffusionGGML ================================================*/ + +class StableDiffusionGGML { +public: + ggml_backend_t backend = NULL; // general backend + ggml_type model_data_type = GGML_TYPE_COUNT; + + SDVersion version; + bool vae_decode_only = false; + bool free_params_immediately = false; + + std::shared_ptr rng = std::make_shared(); + int n_threads = -1; + float scale_factor = 0.18215f; + + std::shared_ptr cond_stage_model; + std::shared_ptr clip_vision; // for svd + std::shared_ptr diffusion_model; + std::shared_ptr first_stage_model; + std::shared_ptr tae_first_stage; + std::shared_ptr control_net; + + std::string taesd_path; + bool use_tiny_autoencoder = false; + bool vae_tiling = false; + + std::map tensors; + + std::string lora_model_dir; + // lora_name => multiplier + std::unordered_map curr_lora_state; + + std::shared_ptr denoiser = std::make_shared(); + + StableDiffusionGGML() = default; + + StableDiffusionGGML(int n_threads, + bool vae_decode_only, + bool free_params_immediately, + std::string lora_model_dir, + rng_type_t rng_type) + : n_threads(n_threads), + vae_decode_only(vae_decode_only), + free_params_immediately(free_params_immediately), + lora_model_dir(lora_model_dir) { + if (rng_type == STD_DEFAULT_RNG) { + rng = std::make_shared(); + } else if (rng_type == CUDA_RNG) { + rng = std::make_shared(); + } + } + + ~StableDiffusionGGML() { + ggml_backend_free(backend); + } + + bool load_from_file(const std::string& model_path, + const std::string& vae_path, + const std::string control_net_path, + const std::string embeddings_path, + const std::string& taesd_path, + bool vae_tiling_, + ggml_type wtype, + schedule_t schedule, + bool control_net_cpu) { + use_tiny_autoencoder = taesd_path.size() > 0; +#ifdef SD_USE_CUBLAS + LOG_DEBUG("Using CUDA backend"); + backend = ggml_backend_cuda_init(0); +#endif +#ifdef SD_USE_METAL + LOG_DEBUG("Using Metal backend"); + ggml_metal_log_set_callback(ggml_log_callback_default, nullptr); + backend = ggml_backend_metal_init(); +#endif + + if (!backend) { + LOG_DEBUG("Using CPU backend"); + backend = ggml_backend_cpu_init(); + } +#ifdef SD_USE_FLASH_ATTENTION +#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) + LOG_WARN("Flash Attention not supported with GPU Backend"); +#else + LOG_INFO("Flash Attention enabled"); +#endif +#endif + LOG_INFO("loading model from '%s'", model_path.c_str()); + ModelLoader model_loader; + + vae_tiling = vae_tiling_; + + if (!model_loader.init_from_file(model_path)) { + LOG_ERROR("init model loader from file failed: '%s'", model_path.c_str()); + return false; + } + + if (vae_path.size() > 0) { + LOG_INFO("loading vae from '%s'", vae_path.c_str()); + if (!model_loader.init_from_file(vae_path, "vae.")) { + LOG_WARN("loading vae from '%s' failed", vae_path.c_str()); + } + } + + version = model_loader.get_sd_version(); + if (version == VERSION_COUNT) { + LOG_ERROR("get sd version from file failed: '%s'", model_path.c_str()); + return false; + } + LOG_INFO("Stable Diffusion %s ", model_version_to_str[version]); + if (wtype == GGML_TYPE_COUNT) { + model_data_type = model_loader.get_sd_wtype(); + } else { + model_data_type = wtype; + } + LOG_INFO("Stable Diffusion weight type: %s", ggml_type_name(model_data_type)); + LOG_DEBUG("ggml tensor size = %d bytes", (int)sizeof(ggml_tensor)); + + if (version == VERSION_XL) { + scale_factor = 0.13025f; + if (vae_path.size() == 0 && taesd_path.size() == 0) { + LOG_WARN( + "!!!It looks like you are using SDXL model. " + "If you find that the generated images are completely black, " + "try specifying SDXL VAE FP16 Fix with the --vae parameter. " + "You can find it here: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/blob/main/sdxl_vae.safetensors"); + } + } + + if (version == VERSION_SVD) { + clip_vision = std::make_shared(backend, model_data_type); + clip_vision->alloc_params_buffer(); + clip_vision->get_param_tensors(tensors, "cond_stage_model."); + + diffusion_model = std::make_shared(backend, model_data_type, version); + diffusion_model->alloc_params_buffer(); + diffusion_model->get_param_tensors(tensors, "model.diffusion_model"); + + first_stage_model = std::make_shared(backend, model_data_type, vae_decode_only, true); + LOG_DEBUG("vae_decode_only %d", vae_decode_only); + first_stage_model->alloc_params_buffer(); + first_stage_model->get_param_tensors(tensors, "first_stage_model"); + } else { + cond_stage_model = std::make_shared(backend, model_data_type, version); + cond_stage_model->alloc_params_buffer(); + cond_stage_model->get_param_tensors(tensors, "cond_stage_model."); + + cond_stage_model->embd_dir = embeddings_path; + + diffusion_model = std::make_shared(backend, model_data_type, version); + diffusion_model->alloc_params_buffer(); + diffusion_model->get_param_tensors(tensors, "model.diffusion_model"); + + ggml_type vae_type = model_data_type; + if (version == VERSION_XL) { + vae_type = GGML_TYPE_F32; // avoid nan, not work... + } + + if (!use_tiny_autoencoder) { + first_stage_model = std::make_shared(backend, vae_type, vae_decode_only); + first_stage_model->alloc_params_buffer(); + first_stage_model->get_param_tensors(tensors, "first_stage_model"); + } else { + tae_first_stage = std::make_shared(backend, model_data_type, vae_decode_only); + } + + if (control_net_path.size() > 0) { + ggml_backend_t cn_backend = NULL; + if (control_net_cpu && !ggml_backend_is_cpu(backend)) { + LOG_DEBUG("ControlNet: Using CPU backend"); + cn_backend = ggml_backend_cpu_init(); + } else { + cn_backend = backend; + } + control_net = std::make_shared(cn_backend, model_data_type, version); + } + + LOG_DEBUG("loading vocab"); + std::string merges_utf8_str = model_loader.load_merges(); + if (merges_utf8_str.size() == 0) { + LOG_ERROR("get merges failed: '%s'", model_path.c_str()); + return false; + } + cond_stage_model->tokenizer.load_from_merges(merges_utf8_str); + } + + struct ggml_init_params params; + params.mem_size = static_cast(10 * 1024) * 1024; // 10M + params.mem_buffer = NULL; + params.no_alloc = false; + // LOG_DEBUG("mem_size %u ", params.mem_size); + struct ggml_context* ctx = ggml_init(params); // for alphas_cumprod and is_using_v_parameterization check + GGML_ASSERT(ctx != NULL); + ggml_tensor* alphas_cumprod_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, TIMESTEPS); + calculate_alphas_cumprod((float*)alphas_cumprod_tensor->data); + + // load weights + LOG_DEBUG("loading weights"); + int64_t t0 = ggml_time_ms(); + + std::set ignore_tensors; + tensors["alphas_cumprod"] = alphas_cumprod_tensor; + if (use_tiny_autoencoder) { + ignore_tensors.insert("first_stage_model."); + } + if (vae_decode_only) { + ignore_tensors.insert("first_stage_model.encoder"); + ignore_tensors.insert("first_stage_model.quant"); + } + if (version == VERSION_SVD) { + ignore_tensors.insert("conditioner.embedders.3"); + } + bool success = model_loader.load_tensors(tensors, backend, ignore_tensors); + if (!success) { + LOG_ERROR("load tensors from model loader failed"); + ggml_free(ctx); + return false; + } + + // LOG_DEBUG("model size = %.2fMB", total_size / 1024.0 / 1024.0); + + if (version == VERSION_SVD) { + // diffusion_model->test(); + // first_stage_model->test(); + // return false; + } else { + size_t clip_params_mem_size = cond_stage_model->get_params_mem_size(); + size_t unet_params_mem_size = diffusion_model->get_params_mem_size(); + size_t vae_params_mem_size = 0; + if (!use_tiny_autoencoder) { + vae_params_mem_size = first_stage_model->get_params_mem_size(); + } else { + if (!tae_first_stage->load_from_file(taesd_path)) { + return false; + } + vae_params_mem_size = tae_first_stage->get_params_mem_size(); + } + size_t control_net_params_mem_size = 0; + if (control_net) { + if (!control_net->load_from_file(control_net_path)) { + return false; + } + control_net_params_mem_size = control_net->get_params_mem_size(); + } + + size_t total_params_size = clip_params_mem_size + clip_params_mem_size + clip_params_mem_size + control_net_params_mem_size; + LOG_INFO("total params memory size = %.2fMB (clip %.2fMB, unet %.2fMB, vae %.2fMB, controlnet %.2fMB)", + total_params_size / 1024.0 / 1024.0, + clip_params_mem_size / 1024.0 / 1024.0, + unet_params_mem_size / 1024.0 / 1024.0, + vae_params_mem_size / 1024.0 / 1024.0, + control_net_params_mem_size / 1024.0 / 1024.0); + } + + int64_t t1 = ggml_time_ms(); + LOG_INFO("loading model from '%s' completed, taking %.2fs", model_path.c_str(), (t1 - t0) * 1.0f / 1000); + + // check is_using_v_parameterization_for_sd2 + bool is_using_v_parameterization = false; + if (version == VERSION_2_x) { + if (is_using_v_parameterization_for_sd2(ctx)) { + is_using_v_parameterization = true; + } + } else if (version == VERSION_SVD) { + // TODO: V_PREDICTION_EDM + is_using_v_parameterization = true; + } + + if (is_using_v_parameterization) { + denoiser = std::make_shared(); + LOG_INFO("running in v-prediction mode"); + } else { + LOG_INFO("running in eps-prediction mode"); + } + + if (schedule != DEFAULT) { + switch (schedule) { + case DISCRETE: + LOG_INFO("running with discrete schedule"); + denoiser->schedule = std::make_shared(); + break; + case KARRAS: + LOG_INFO("running with Karras schedule"); + denoiser->schedule = std::make_shared(); + break; + case DEFAULT: + // Don't touch anything. + break; + default: + LOG_ERROR("Unknown schedule %i", schedule); + abort(); + } + } + + for (int i = 0; i < TIMESTEPS; i++) { + denoiser->schedule->alphas_cumprod[i] = ((float*)alphas_cumprod_tensor->data)[i]; + denoiser->schedule->sigmas[i] = std::sqrt((1 - denoiser->schedule->alphas_cumprod[i]) / denoiser->schedule->alphas_cumprod[i]); + denoiser->schedule->log_sigmas[i] = std::log(denoiser->schedule->sigmas[i]); + } + + LOG_DEBUG("finished loaded file"); + ggml_free(ctx); + return true; + } + + bool is_using_v_parameterization_for_sd2(ggml_context* work_ctx) { + struct ggml_tensor* x_t = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 8, 8, 4, 1); + ggml_set_f32(x_t, 0.5); + struct ggml_tensor* c = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 1024, 2, 1, 1); + ggml_set_f32(c, 0.5); + + struct ggml_tensor* timesteps = ggml_new_tensor_1d(work_ctx, GGML_TYPE_F32, 1); + ggml_set_f32(timesteps, 999); + int64_t t0 = ggml_time_ms(); + struct ggml_tensor* out = ggml_dup_tensor(work_ctx, x_t); + diffusion_model->compute(n_threads, x_t, timesteps, c, NULL, NULL, -1, {}, 0.f, &out); + diffusion_model->free_compute_buffer(); + + double result = 0.f; + { + float* vec_x = (float*)x_t->data; + float* vec_out = (float*)out->data; + + int64_t n = ggml_nelements(out); + + for (int i = 0; i < n; i++) { + result += ((double)vec_out[i] - (double)vec_x[i]); + } + result /= n; + } + int64_t t1 = ggml_time_ms(); + LOG_DEBUG("check is_using_v_parameterization_for_sd2, taking %.2fs", (t1 - t0) * 1.0f / 1000); + return result < -1; + } + + void apply_lora(const std::string& lora_name, float multiplier) { + int64_t t0 = ggml_time_ms(); + std::string st_file_path = path_join(lora_model_dir, lora_name + ".safetensors"); + std::string ckpt_file_path = path_join(lora_model_dir, lora_name + ".ckpt"); + std::string file_path; + if (file_exists(st_file_path)) { + file_path = st_file_path; + } else if (file_exists(ckpt_file_path)) { + file_path = ckpt_file_path; + } else { + LOG_WARN("can not find %s or %s for lora %s", st_file_path.c_str(), ckpt_file_path.c_str(), lora_name.c_str()); + return; + } + LoraModel lora(backend, model_data_type, file_path); + if (!lora.load_from_file()) { + LOG_WARN("load lora tensors from %s failed", file_path.c_str()); + return; + } + + lora.multiplier = multiplier; + lora.apply(tensors, n_threads); + lora.free_params_buffer(); + + int64_t t1 = ggml_time_ms(); + + LOG_INFO("lora '%s' applied, taking %.2fs", + lora_name.c_str(), + (t1 - t0) * 1.0f / 1000); + } + + void apply_loras(const std::unordered_map& lora_state) { + if (lora_state.size() > 0 && model_data_type != GGML_TYPE_F16 && model_data_type != GGML_TYPE_F32) { + LOG_WARN("In quantized models when applying LoRA, the images have poor quality."); + } + std::unordered_map lora_state_diff; + for (auto& kv : lora_state) { + const std::string& lora_name = kv.first; + float multiplier = kv.second; + + if (curr_lora_state.find(lora_name) != curr_lora_state.end()) { + float curr_multiplier = curr_lora_state[lora_name]; + float multiplier_diff = multiplier - curr_multiplier; + if (multiplier_diff != 0.f) { + lora_state_diff[lora_name] = multiplier_diff; + } + } else { + lora_state_diff[lora_name] = multiplier; + } + } + + for (auto& kv : lora_state_diff) { + apply_lora(kv.first, kv.second); + } + + curr_lora_state = lora_state; + } + + std::pair get_learned_condition(ggml_context* work_ctx, + const std::string& text, + int clip_skip, + int width, + int height, + bool force_zero_embeddings = false) { + cond_stage_model->set_clip_skip(clip_skip); + auto tokens_and_weights = cond_stage_model->tokenize(text, true); + std::vector& tokens = tokens_and_weights.first; + std::vector& weights = tokens_and_weights.second; + int64_t t0 = ggml_time_ms(); + struct ggml_tensor* hidden_states = NULL; // [N, n_token, hidden_size] + struct ggml_tensor* pooled = NULL; + + auto input_ids = vector_to_ggml_tensor_i32(work_ctx, tokens); + struct ggml_tensor* input_ids2 = NULL; + size_t max_token_idx = 0; + if (version == VERSION_XL) { + auto it = std::find(tokens.begin(), tokens.end(), EOS_TOKEN_ID); + if (it != tokens.end()) { + std::fill(std::next(it), tokens.end(), 0); + } + + max_token_idx = std::min(std::distance(tokens.begin(), it), tokens.size() - 1); + + input_ids2 = vector_to_ggml_tensor_i32(work_ctx, tokens); + + // for (int i = 0; i < tokens.size(); i++) { + // printf("%d ", tokens[i]); + // } + // printf("\n"); + } + + cond_stage_model->compute(n_threads, input_ids, input_ids2, max_token_idx, false, &hidden_states, work_ctx); + if (version == VERSION_XL) { + cond_stage_model->compute(n_threads, input_ids, input_ids2, max_token_idx, true, &pooled, work_ctx); + } + // if (pooled != NULL) { + // print_ggml_tensor(hidden_states); + // print_ggml_tensor(pooled); + // } + + int64_t t1 = ggml_time_ms(); + LOG_DEBUG("computing condition graph completed, taking %" PRId64 " ms", t1 - t0); + ggml_tensor* result = ggml_dup_tensor(work_ctx, hidden_states); + { + float original_mean = ggml_tensor_mean(hidden_states); + for (int i2 = 0; i2 < hidden_states->ne[2]; i2++) { + for (int i1 = 0; i1 < hidden_states->ne[1]; i1++) { + for (int i0 = 0; i0 < hidden_states->ne[0]; i0++) { + float value = ggml_tensor_get_f32(hidden_states, i0, i1, i2); + value *= weights[i1]; + ggml_tensor_set_f32(result, value, i0, i1, i2); + } + } + } + float new_mean = ggml_tensor_mean(result); + ggml_tensor_scale(result, (original_mean / new_mean)); + } + if (force_zero_embeddings) { + float* vec = (float*)result->data; + for (int i = 0; i < ggml_nelements(result); i++) { + vec[i] = 0; + } + } + + ggml_tensor* vec = NULL; + if (version == VERSION_XL) { + int out_dim = 256; + vec = ggml_new_tensor_1d(work_ctx, GGML_TYPE_F32, diffusion_model->unet.adm_in_channels); + // [0:1280] + size_t offset = 0; + memcpy(vec->data, pooled->data, ggml_nbytes(pooled)); + offset += ggml_nbytes(pooled); + + // original_size_as_tuple + float orig_width = (float)width; + float orig_height = (float)height; + std::vector timesteps = {orig_height, orig_width}; + + ggml_tensor* embed_view = ggml_view_2d(work_ctx, vec, out_dim, 2, ggml_type_size(GGML_TYPE_F32) * out_dim, offset); + offset += ggml_nbytes(embed_view); + set_timestep_embedding(timesteps, embed_view, out_dim); + // print_ggml_tensor(ggml_reshape_1d(work_ctx, embed_view, out_dim * 2)); + // crop_coords_top_left + float crop_coord_top = 0.f; + float crop_coord_left = 0.f; + timesteps = {crop_coord_top, crop_coord_left}; + embed_view = ggml_view_2d(work_ctx, vec, out_dim, 2, ggml_type_size(GGML_TYPE_F32) * out_dim, offset); + offset += ggml_nbytes(embed_view); + set_timestep_embedding(timesteps, embed_view, out_dim); + // print_ggml_tensor(ggml_reshape_1d(work_ctx, embed_view, out_dim * 2)); + // target_size_as_tuple + float target_width = (float)width; + float target_height = (float)height; + timesteps = {target_height, target_width}; + embed_view = ggml_view_2d(work_ctx, vec, out_dim, 2, ggml_type_size(GGML_TYPE_F32) * out_dim, offset); + offset += ggml_nbytes(embed_view); + set_timestep_embedding(timesteps, embed_view, out_dim); + // print_ggml_tensor(ggml_reshape_1d(work_ctx, embed_view, out_dim * 2)); + GGML_ASSERT(offset == ggml_nbytes(vec)); + } + // print_ggml_tensor(result); + return {result, vec}; + } + + std::tuple get_svd_condition(ggml_context* work_ctx, + sd_image_t init_image, + int width, + int height, + int fps = 6, + int motion_bucket_id = 127, + float augmentation_level = 0.f, + bool force_zero_embeddings = false) { + // c_crossattn + int64_t t0 = ggml_time_ms(); + struct ggml_tensor* c_crossattn = NULL; + { + if (force_zero_embeddings) { + c_crossattn = ggml_new_tensor_1d(work_ctx, GGML_TYPE_F32, clip_vision->vision_model.projection_dim); + ggml_set_f32(c_crossattn, 0.f); + } else { + sd_image_f32_t image = sd_image_t_to_sd_image_f32_t(init_image); + sd_image_f32_t resized_image = clip_preprocess(image, clip_vision->vision_model.image_size); + free(image.data); + image.data = NULL; + + ggml_tensor* pixel_values = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, resized_image.width, resized_image.height, 3, 1); + sd_image_f32_to_tensor(resized_image.data, pixel_values, false); + free(resized_image.data); + resized_image.data = NULL; + + // print_ggml_tensor(pixel_values); + clip_vision->compute(n_threads, pixel_values, &c_crossattn, work_ctx); + // print_ggml_tensor(c_crossattn); + } + } + + // c_concat + struct ggml_tensor* c_concat = NULL; + { + if (force_zero_embeddings) { + c_concat = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width / 8, height / 8, 4, 1); + ggml_set_f32(c_concat, 0.f); + } else { + ggml_tensor* init_img = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1); + + if (width != init_image.width || height != init_image.height) { + sd_image_f32_t image = sd_image_t_to_sd_image_f32_t(init_image); + sd_image_f32_t resized_image = resize_sd_image_f32_t(image, width, height); + free(image.data); + image.data = NULL; + sd_image_f32_to_tensor(resized_image.data, init_img, false); + free(resized_image.data); + resized_image.data = NULL; + } else { + sd_image_to_tensor(init_image.data, init_img); + } + if (augmentation_level > 0.f) { + struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, init_img); + ggml_tensor_set_f32_randn(noise, rng); + // encode_pixels += torch.randn_like(pixels) * augmentation_level + ggml_tensor_scale(noise, augmentation_level); + ggml_tensor_add(init_img, noise); + } + print_ggml_tensor(init_img); + ggml_tensor* moments = encode_first_stage(work_ctx, init_img); + print_ggml_tensor(moments); + c_concat = get_first_stage_encoding(work_ctx, moments); + } + print_ggml_tensor(c_concat); + } + + // y + struct ggml_tensor* y = NULL; + { + y = ggml_new_tensor_1d(work_ctx, GGML_TYPE_F32, diffusion_model->unet.adm_in_channels); + int out_dim = 256; + int fps_id = fps - 1; + std::vector timesteps = {(float)fps_id, (float)motion_bucket_id, augmentation_level}; + set_timestep_embedding(timesteps, y, out_dim); + print_ggml_tensor(y); + } + int64_t t1 = ggml_time_ms(); + LOG_DEBUG("computing svd condition graph completed, taking %" PRId64 " ms", t1 - t0); + return {c_crossattn, c_concat, y}; + } + + ggml_tensor* sample(ggml_context* work_ctx, + ggml_tensor* x_t, + ggml_tensor* noise, + ggml_tensor* c, + ggml_tensor* c_concat, + ggml_tensor* c_vector, + ggml_tensor* uc, + ggml_tensor* uc_concat, + ggml_tensor* uc_vector, + ggml_tensor* control_hint, + float control_strength, + float min_cfg, + float cfg_scale, + sample_method_t method, + const std::vector& sigmas) { + size_t steps = sigmas.size() - 1; + // x_t = load_tensor_from_file(work_ctx, "./rand0.bin"); + // print_ggml_tensor(x_t); + struct ggml_tensor* x = ggml_dup_tensor(work_ctx, x_t); + copy_ggml_tensor(x, x_t); + + struct ggml_tensor* noised_input = ggml_dup_tensor(work_ctx, x_t); + struct ggml_tensor* guided_hint = NULL; + + bool has_unconditioned = cfg_scale != 1.0 && uc != NULL; + + if (noise == NULL) { + // x = x * sigmas[0] + ggml_tensor_scale(x, sigmas[0]); + } else { + // xi = x + noise * sigma_sched[0] + ggml_tensor_scale(noise, sigmas[0]); + ggml_tensor_add(x, noise); + } + + // denoise wrapper + struct ggml_tensor* out_cond = ggml_dup_tensor(work_ctx, x); + struct ggml_tensor* out_uncond = NULL; + if (has_unconditioned) { + out_uncond = ggml_dup_tensor(work_ctx, x); + } + struct ggml_tensor* denoised = ggml_dup_tensor(work_ctx, x); + + auto denoise = [&](ggml_tensor* input, float sigma, int step) { + if (step == 1) { + pretty_progress(0, (int)steps, 0); + } + int64_t t0 = ggml_time_us(); + + float c_skip = 1.0f; + float c_out = 1.0f; + float c_in = 1.0f; + std::vector scaling = denoiser->get_scalings(sigma); + + if (scaling.size() == 3) { // CompVisVDenoiser + c_skip = scaling[0]; + c_out = scaling[1]; + c_in = scaling[2]; + } else { // CompVisDenoiser + c_out = scaling[0]; + c_in = scaling[1]; + } + + float t = denoiser->schedule->sigma_to_t(sigma); + std::vector timesteps_vec(x->ne[3], t); // [N, ] + auto timesteps = vector_to_ggml_tensor(work_ctx, timesteps_vec); + + copy_ggml_tensor(noised_input, input); + // noised_input = noised_input * c_in + ggml_tensor_scale(noised_input, c_in); + + std::vector controls; + + if (control_hint != NULL) { + control_net->compute(n_threads, noised_input, control_hint, timesteps, c, c_vector); + controls = control_net->controls; + // print_ggml_tensor(controls[12]); + // GGML_ASSERT(0); + } + + // cond + diffusion_model->compute(n_threads, + noised_input, + timesteps, + c, + c_concat, + c_vector, + -1, + controls, + control_strength, + &out_cond); + + float* negative_data = NULL; + if (has_unconditioned) { + // uncond + if (control_hint != NULL) { + control_net->compute(n_threads, noised_input, control_hint, timesteps, uc, uc_vector); + controls = control_net->controls; + } + diffusion_model->compute(n_threads, + noised_input, + timesteps, + uc, + uc_concat, + uc_vector, + -1, + controls, + control_strength, + &out_uncond); + negative_data = (float*)out_uncond->data; + } + float* vec_denoised = (float*)denoised->data; + float* vec_input = (float*)input->data; + float* positive_data = (float*)out_cond->data; + int ne_elements = (int)ggml_nelements(denoised); + for (int i = 0; i < ne_elements; i++) { + float latent_result = positive_data[i]; + if (has_unconditioned) { + // out_uncond + cfg_scale * (out_cond - out_uncond) + int64_t ne3 = out_cond->ne[3]; + if (min_cfg != cfg_scale && ne3 != 1) { + int64_t i3 = i / out_cond->ne[0] * out_cond->ne[1] * out_cond->ne[2]; + float scale = min_cfg + (cfg_scale - min_cfg) * (i3 * 1.0f / ne3); + } else { + latent_result = negative_data[i] + cfg_scale * (positive_data[i] - negative_data[i]); + } + } + // v = latent_result, eps = latent_result + // denoised = (v * c_out + input * c_skip) or (input + eps * c_out) + vec_denoised[i] = latent_result * c_out + vec_input[i] * c_skip; + } + int64_t t1 = ggml_time_us(); + if (step > 0) { + pretty_progress(step, (int)steps, (t1 - t0) / 1000000.f); + // LOG_INFO("step %d sampling completed taking %.2fs", step, (t1 - t0) * 1.0f / 1000000); + } + }; + + // sample_euler_ancestral + switch (method) { + case EULER_A: { + struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); + struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + float sigma = sigmas[i]; + + // denoise + denoise(x, sigma, i + 1); + + // d = (x - denoised) / sigma + { + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + float* vec_denoised = (float*)denoised->data; + + for (int i = 0; i < ggml_nelements(d); i++) { + vec_d[i] = (vec_x[i] - vec_denoised[i]) / sigma; + } + } + + // get_ancestral_step + float sigma_up = std::min(sigmas[i + 1], + std::sqrt(sigmas[i + 1] * sigmas[i + 1] * (sigmas[i] * sigmas[i] - sigmas[i + 1] * sigmas[i + 1]) / (sigmas[i] * sigmas[i]))); + float sigma_down = std::sqrt(sigmas[i + 1] * sigmas[i + 1] - sigma_up * sigma_up); + + // Euler method + float dt = sigma_down - sigmas[i]; + // x = x + d * dt + { + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + + for (int i = 0; i < ggml_nelements(x); i++) { + vec_x[i] = vec_x[i] + vec_d[i] * dt; + } + } + + if (sigmas[i + 1] > 0) { + // x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up + ggml_tensor_set_f32_randn(noise, rng); + // noise = load_tensor_from_file(work_ctx, "./rand" + std::to_string(i+1) + ".bin"); + { + float* vec_x = (float*)x->data; + float* vec_noise = (float*)noise->data; + + for (int i = 0; i < ggml_nelements(x); i++) { + vec_x[i] = vec_x[i] + vec_noise[i] * sigma_up; + } + } + } + } + } break; + case EULER: // Implemented without any sigma churn + { + struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + float sigma = sigmas[i]; + + // denoise + denoise(x, sigma, i + 1); + + // d = (x - denoised) / sigma + { + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + float* vec_denoised = (float*)denoised->data; + + for (int j = 0; j < ggml_nelements(d); j++) { + vec_d[j] = (vec_x[j] - vec_denoised[j]) / sigma; + } + } + + float dt = sigmas[i + 1] - sigma; + // x = x + d * dt + { + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x[j] = vec_x[j] + vec_d[j] * dt; + } + } + } + } break; + case HEUN: { + struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x); + struct ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + // denoise + denoise(x, sigmas[i], -(i + 1)); + + // d = (x - denoised) / sigma + { + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + float* vec_denoised = (float*)denoised->data; + + for (int j = 0; j < ggml_nelements(x); j++) { + vec_d[j] = (vec_x[j] - vec_denoised[j]) / sigmas[i]; + } + } + + float dt = sigmas[i + 1] - sigmas[i]; + if (sigmas[i + 1] == 0) { + // Euler step + // x = x + d * dt + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x[j] = vec_x[j] + vec_d[j] * dt; + } + } else { + // Heun step + float* vec_d = (float*)d->data; + float* vec_d2 = (float*)d->data; + float* vec_x = (float*)x->data; + float* vec_x2 = (float*)x2->data; + + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x2[j] = vec_x[j] + vec_d[j] * dt; + } + + denoise(x2, sigmas[i + 1], i + 1); + float* vec_denoised = (float*)denoised->data; + for (int j = 0; j < ggml_nelements(x); j++) { + float d2 = (vec_x2[j] - vec_denoised[j]) / sigmas[i + 1]; + vec_d[j] = (vec_d[j] + d2) / 2; + vec_x[j] = vec_x[j] + vec_d[j] * dt; + } + } + } + } break; + case DPM2: { + struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x); + struct ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + // denoise + denoise(x, sigmas[i], i + 1); + + // d = (x - denoised) / sigma + { + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + float* vec_denoised = (float*)denoised->data; + + for (int j = 0; j < ggml_nelements(x); j++) { + vec_d[j] = (vec_x[j] - vec_denoised[j]) / sigmas[i]; + } + } + + if (sigmas[i + 1] == 0) { + // Euler step + // x = x + d * dt + float dt = sigmas[i + 1] - sigmas[i]; + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x[j] = vec_x[j] + vec_d[j] * dt; + } + } else { + // DPM-Solver-2 + float sigma_mid = exp(0.5f * (log(sigmas[i]) + log(sigmas[i + 1]))); + float dt_1 = sigma_mid - sigmas[i]; + float dt_2 = sigmas[i + 1] - sigmas[i]; + + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + float* vec_x2 = (float*)x2->data; + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x2[j] = vec_x[j] + vec_d[j] * dt_1; + } + + denoise(x2, sigma_mid, i + 1); + float* vec_denoised = (float*)denoised->data; + for (int j = 0; j < ggml_nelements(x); j++) { + float d2 = (vec_x2[j] - vec_denoised[j]) / sigma_mid; + vec_x[j] = vec_x[j] + d2 * dt_2; + } + } + } + + } break; + case DPMPP2S_A: { + struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); + struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x); + struct ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + // denoise + denoise(x, sigmas[i], i + 1); + + // get_ancestral_step + float sigma_up = std::min(sigmas[i + 1], + std::sqrt(sigmas[i + 1] * sigmas[i + 1] * (sigmas[i] * sigmas[i] - sigmas[i + 1] * sigmas[i + 1]) / (sigmas[i] * sigmas[i]))); + float sigma_down = std::sqrt(sigmas[i + 1] * sigmas[i + 1] - sigma_up * sigma_up); + auto t_fn = [](float sigma) -> float { return -log(sigma); }; + auto sigma_fn = [](float t) -> float { return exp(-t); }; + + if (sigma_down == 0) { + // Euler step + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + float* vec_denoised = (float*)denoised->data; + + for (int j = 0; j < ggml_nelements(d); j++) { + vec_d[j] = (vec_x[j] - vec_denoised[j]) / sigmas[i]; + } + + // TODO: If sigma_down == 0, isn't this wrong? + // But + // https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/sampling.py#L525 + // has this exactly the same way. + float dt = sigma_down - sigmas[i]; + for (int j = 0; j < ggml_nelements(d); j++) { + vec_x[j] = vec_x[j] + vec_d[j] * dt; + } + } else { + // DPM-Solver++(2S) + float t = t_fn(sigmas[i]); + float t_next = t_fn(sigma_down); + float h = t_next - t; + float s = t + 0.5f * h; + + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + float* vec_x2 = (float*)x2->data; + float* vec_denoised = (float*)denoised->data; + + // First half-step + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x2[j] = (sigma_fn(s) / sigma_fn(t)) * vec_x[j] - (exp(-h * 0.5f) - 1) * vec_denoised[j]; + } + + denoise(x2, sigmas[i + 1], i + 1); + + // Second half-step + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x[j] = (sigma_fn(t_next) / sigma_fn(t)) * vec_x[j] - (exp(-h) - 1) * vec_denoised[j]; + } + } + + // Noise addition + if (sigmas[i + 1] > 0) { + ggml_tensor_set_f32_randn(noise, rng); + { + float* vec_x = (float*)x->data; + float* vec_noise = (float*)noise->data; + + for (int i = 0; i < ggml_nelements(x); i++) { + vec_x[i] = vec_x[i] + vec_noise[i] * sigma_up; + } + } + } + } + } break; + case DPMPP2M: // DPM++ (2M) from Karras et al (2022) + { + struct ggml_tensor* old_denoised = ggml_dup_tensor(work_ctx, x); + + auto t_fn = [](float sigma) -> float { return -log(sigma); }; + + for (int i = 0; i < steps; i++) { + // denoise + denoise(x, sigmas[i], i + 1); + + float t = t_fn(sigmas[i]); + float t_next = t_fn(sigmas[i + 1]); + float h = t_next - t; + float a = sigmas[i + 1] / sigmas[i]; + float b = exp(-h) - 1.f; + float* vec_x = (float*)x->data; + float* vec_denoised = (float*)denoised->data; + float* vec_old_denoised = (float*)old_denoised->data; + + if (i == 0 || sigmas[i + 1] == 0) { + // Simpler step for the edge cases + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x[j] = a * vec_x[j] - b * vec_denoised[j]; + } + } else { + float h_last = t - t_fn(sigmas[i - 1]); + float r = h_last / h; + for (int j = 0; j < ggml_nelements(x); j++) { + float denoised_d = (1.f + 1.f / (2.f * r)) * vec_denoised[j] - (1.f / (2.f * r)) * vec_old_denoised[j]; + vec_x[j] = a * vec_x[j] - b * denoised_d; + } + } + + // old_denoised = denoised + for (int j = 0; j < ggml_nelements(x); j++) { + vec_old_denoised[j] = vec_denoised[j]; + } + } + } break; + case DPMPP2Mv2: // Modified DPM++ (2M) from https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/8457 + { + struct ggml_tensor* old_denoised = ggml_dup_tensor(work_ctx, x); + + auto t_fn = [](float sigma) -> float { return -log(sigma); }; + + for (int i = 0; i < steps; i++) { + // denoise + denoise(x, sigmas[i], i + 1); + + float t = t_fn(sigmas[i]); + float t_next = t_fn(sigmas[i + 1]); + float h = t_next - t; + float a = sigmas[i + 1] / sigmas[i]; + float* vec_x = (float*)x->data; + float* vec_denoised = (float*)denoised->data; + float* vec_old_denoised = (float*)old_denoised->data; + + if (i == 0 || sigmas[i + 1] == 0) { + // Simpler step for the edge cases + float b = exp(-h) - 1.f; + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x[j] = a * vec_x[j] - b * vec_denoised[j]; + } + } else { + float h_last = t - t_fn(sigmas[i - 1]); + float h_min = std::min(h_last, h); + float h_max = std::max(h_last, h); + float r = h_max / h_min; + float h_d = (h_max + h_min) / 2.f; + float b = exp(-h_d) - 1.f; + for (int j = 0; j < ggml_nelements(x); j++) { + float denoised_d = (1.f + 1.f / (2.f * r)) * vec_denoised[j] - (1.f / (2.f * r)) * vec_old_denoised[j]; + vec_x[j] = a * vec_x[j] - b * denoised_d; + } + } + + // old_denoised = denoised + for (int j = 0; j < ggml_nelements(x); j++) { + vec_old_denoised[j] = vec_denoised[j]; + } + } + } break; + case LCM: // Latent Consistency Models + { + struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); + struct ggml_tensor* d = ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + float sigma = sigmas[i]; + + // denoise + denoise(x, sigma, i + 1); + + // x = denoised + { + float* vec_x = (float*)x->data; + float* vec_denoised = (float*)denoised->data; + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x[j] = vec_denoised[j]; + } + } + + if (sigmas[i + 1] > 0) { + // x += sigmas[i + 1] * noise_sampler(sigmas[i], sigmas[i + 1]) + ggml_tensor_set_f32_randn(noise, rng); + // noise = load_tensor_from_file(res_ctx, "./rand" + std::to_string(i+1) + ".bin"); + { + float* vec_x = (float*)x->data; + float* vec_noise = (float*)noise->data; + + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x[j] = vec_x[j] + sigmas[i + 1] * vec_noise[j]; + } + } + } + } + } break; + + default: + LOG_ERROR("Attempting to sample with nonexisting sample method %i", method); + abort(); + } + if (control_net) { + control_net->free_control_ctx(); + control_net->free_compute_buffer(); + } + diffusion_model->free_compute_buffer(); + return x; + } + + // ldm.models.diffusion.ddpm.LatentDiffusion.get_first_stage_encoding + ggml_tensor* get_first_stage_encoding(ggml_context* work_ctx, ggml_tensor* moments) { + // ldm.modules.distributions.distributions.DiagonalGaussianDistribution.sample + ggml_tensor* latent = ggml_new_tensor_4d(work_ctx, moments->type, moments->ne[0], moments->ne[1], moments->ne[2] / 2, moments->ne[3]); + struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, latent); + ggml_tensor_set_f32_randn(noise, rng); + // noise = load_tensor_from_file(work_ctx, "noise.bin"); + { + float mean = 0; + float logvar = 0; + float value = 0; + float std_ = 0; + for (int i = 0; i < latent->ne[3]; i++) { + for (int j = 0; j < latent->ne[2]; j++) { + for (int k = 0; k < latent->ne[1]; k++) { + for (int l = 0; l < latent->ne[0]; l++) { + mean = ggml_tensor_get_f32(moments, l, k, j, i); + logvar = ggml_tensor_get_f32(moments, l, k, j + (int)latent->ne[2], i); + logvar = std::max(-30.0f, std::min(logvar, 20.0f)); + std_ = std::exp(0.5f * logvar); + value = mean + std_ * ggml_tensor_get_f32(noise, l, k, j, i); + value = value * scale_factor; + // printf("%d %d %d %d -> %f\n", i, j, k, l, value); + ggml_tensor_set_f32(latent, value, l, k, j, i); + } + } + } + } + } + return latent; + } + + ggml_tensor* compute_first_stage(ggml_context* work_ctx, ggml_tensor* x, bool decode) { + int64_t W = x->ne[0]; + int64_t H = x->ne[1]; + ggml_tensor* result = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, + decode ? (W * 8) : (W / 8), // width + decode ? (H * 8) : (H / 8), // height + decode ? 3 : (use_tiny_autoencoder ? 4 : 8), + x->ne[3]); // channels + int64_t t0 = ggml_time_ms(); + if (!use_tiny_autoencoder) { + if (decode) { + ggml_tensor_scale(x, 1.0f / scale_factor); + } else { + ggml_tensor_scale_input(x); + } + if (vae_tiling && decode) { // TODO: support tiling vae encode + // split latent in 32x32 tiles and compute in several steps + auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) { + first_stage_model->compute(n_threads, in, decode, &out); + }; + sd_tiling(x, result, 8, 32, 0.5f, on_tiling); + } else { + first_stage_model->compute(n_threads, x, decode, &result); + } + first_stage_model->free_compute_buffer(); + if (decode) { + ggml_tensor_scale_output(result); + } + } else { + if (vae_tiling && decode) { // TODO: support tiling vae encode + // split latent in 64x64 tiles and compute in several steps + auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) { + tae_first_stage->compute(n_threads, in, decode, &out); + }; + sd_tiling(x, result, 8, 64, 0.5f, on_tiling); + } else { + tae_first_stage->compute(n_threads, x, decode, &result); + } + tae_first_stage->free_compute_buffer(); + } + + int64_t t1 = ggml_time_ms(); + LOG_DEBUG("computing vae [mode: %s] graph completed, taking %.2fs", decode ? "DECODE" : "ENCODE", (t1 - t0) * 1.0f / 1000); + if (decode) { + ggml_tensor_clamp(result, 0.0f, 1.0f); + } + return result; + } + + ggml_tensor* encode_first_stage(ggml_context* work_ctx, ggml_tensor* x) { + return compute_first_stage(work_ctx, x, false); + } + + ggml_tensor* decode_first_stage(ggml_context* work_ctx, ggml_tensor* x) { + return compute_first_stage(work_ctx, x, true); + } +}; + +/*================================================= SD API ==================================================*/ + +struct sd_ctx_t { + StableDiffusionGGML* sd = NULL; +}; + +sd_ctx_t* new_sd_ctx(const char* model_path_c_str, + const char* vae_path_c_str, + const char* taesd_path_c_str, + const char* control_net_path_c_str, + const char* lora_model_dir_c_str, + const char* embed_dir_c_str, + bool vae_decode_only, + bool vae_tiling, + bool free_params_immediately, + int n_threads, + enum sd_type_t wtype, + enum rng_type_t rng_type, + enum schedule_t s, + bool keep_control_net_cpu) { + sd_ctx_t* sd_ctx = (sd_ctx_t*)malloc(sizeof(sd_ctx_t)); + if (sd_ctx == NULL) { + return NULL; + } + std::string model_path(model_path_c_str); + std::string vae_path(vae_path_c_str); + std::string taesd_path(taesd_path_c_str); + std::string control_net_path(control_net_path_c_str); + std::string embd_path(embed_dir_c_str); + std::string lora_model_dir(lora_model_dir_c_str); + + sd_ctx->sd = new StableDiffusionGGML(n_threads, + vae_decode_only, + free_params_immediately, + lora_model_dir, + rng_type); + if (sd_ctx->sd == NULL) { + return NULL; + } + + if (!sd_ctx->sd->load_from_file(model_path, + vae_path, + control_net_path, + embd_path, + taesd_path, + vae_tiling, + (ggml_type)wtype, + s, + keep_control_net_cpu)) { + delete sd_ctx->sd; + sd_ctx->sd = NULL; + free(sd_ctx); + return NULL; + } + return sd_ctx; +} + +void free_sd_ctx(sd_ctx_t* sd_ctx) { + if (sd_ctx->sd != NULL) { + delete sd_ctx->sd; + sd_ctx->sd = NULL; + } + free(sd_ctx); +} + +sd_image_t* txt2img(sd_ctx_t* sd_ctx, + const char* prompt_c_str, + const char* negative_prompt_c_str, + int clip_skip, + float cfg_scale, + int width, + int height, + enum sample_method_t sample_method, + int sample_steps, + int64_t seed, + int batch_count, + const sd_image_t* control_cond, + float control_strength) { + LOG_DEBUG("txt2img %dx%d", width, height); + if (sd_ctx == NULL) { + return NULL; + } + // LOG_DEBUG("%s %s %f %d %d %d", prompt_c_str, negative_prompt_c_str, cfg_scale, sample_steps, seed, batch_count); + std::string prompt(prompt_c_str); + std::string negative_prompt(negative_prompt_c_str); + + // extract and remove lora + auto result_pair = extract_and_remove_lora(prompt); + std::unordered_map lora_f2m = result_pair.first; // lora_name -> multiplier + + for (auto& kv : lora_f2m) { + LOG_DEBUG("lora %s:%.2f", kv.first.c_str(), kv.second); + } + + prompt = result_pair.second; + LOG_DEBUG("prompt after extract and remove lora: \"%s\"", prompt.c_str()); + + int64_t t0 = ggml_time_ms(); + sd_ctx->sd->apply_loras(lora_f2m); + int64_t t1 = ggml_time_ms(); + LOG_INFO("apply_loras completed, taking %.2fs", (t1 - t0) * 1.0f / 1000); + struct ggml_init_params params; + params.mem_size = static_cast(10 * 1024 * 1024); // 10 MB + params.mem_size += width * height * 3 * sizeof(float); + params.mem_size *= batch_count; + params.mem_buffer = NULL; + params.no_alloc = false; + // LOG_DEBUG("mem_size %u ", params.mem_size); + + struct ggml_context* work_ctx = ggml_init(params); + if (!work_ctx) { + LOG_ERROR("ggml_init() failed"); + return NULL; + } + + if (seed < 0) { + // Generally, when using the provided command line, the seed is always >0. + // However, to prevent potential issues if 'stable-diffusion.cpp' is invoked as a library + // by a third party with a seed <0, let's incorporate randomization here. + srand((int)time(NULL)); + seed = rand(); + } + + t0 = ggml_time_ms(); + auto cond_pair = sd_ctx->sd->get_learned_condition(work_ctx, prompt, clip_skip, width, height); + ggml_tensor* c = cond_pair.first; + ggml_tensor* c_vector = cond_pair.second; // [adm_in_channels, ] + struct ggml_tensor* uc = NULL; + struct ggml_tensor* uc_vector = NULL; + if (cfg_scale != 1.0) { + bool force_zero_embeddings = false; + if (sd_ctx->sd->version == VERSION_XL && negative_prompt.size() == 0) { + force_zero_embeddings = true; + } + auto uncond_pair = sd_ctx->sd->get_learned_condition(work_ctx, negative_prompt, clip_skip, width, height, force_zero_embeddings); + uc = uncond_pair.first; + uc_vector = uncond_pair.second; // [adm_in_channels, ] + } + t1 = ggml_time_ms(); + LOG_INFO("get_learned_condition completed, taking %" PRId64 " ms", t1 - t0); + + if (sd_ctx->sd->free_params_immediately) { + sd_ctx->sd->cond_stage_model->free_params_buffer(); + } + + struct ggml_tensor* image_hint = NULL; + if (control_cond != NULL) { + image_hint = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1); + sd_image_to_tensor(control_cond->data, image_hint); + } + + std::vector final_latents; // collect latents to decode + int C = 4; + int W = width / 8; + int H = height / 8; + LOG_INFO("sampling using %s method", sampling_methods_str[sample_method]); + for (int b = 0; b < batch_count; b++) { + int64_t sampling_start = ggml_time_ms(); + int64_t cur_seed = seed + b; + LOG_INFO("generating image: %i/%i - seed %i", b + 1, batch_count, cur_seed); + + sd_ctx->sd->rng->manual_seed(cur_seed); + struct ggml_tensor* x_t = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, C, 1); + ggml_tensor_set_f32_randn(x_t, sd_ctx->sd->rng); + + std::vector sigmas = sd_ctx->sd->denoiser->schedule->get_sigmas(sample_steps); + + struct ggml_tensor* x_0 = sd_ctx->sd->sample(work_ctx, + x_t, + NULL, + c, + NULL, + c_vector, + uc, + NULL, + uc_vector, + image_hint, + control_strength, + cfg_scale, + cfg_scale, + sample_method, + sigmas); + // struct ggml_tensor* x_0 = load_tensor_from_file(ctx, "samples_ddim.bin"); + // print_ggml_tensor(x_0); + int64_t sampling_end = ggml_time_ms(); + LOG_INFO("sampling completed, taking %.2fs", (sampling_end - sampling_start) * 1.0f / 1000); + final_latents.push_back(x_0); + } + + if (sd_ctx->sd->free_params_immediately) { + sd_ctx->sd->diffusion_model->free_params_buffer(); + } + int64_t t3 = ggml_time_ms(); + LOG_INFO("generating %" PRId64 " latent images completed, taking %.2fs", final_latents.size(), (t3 - t1) * 1.0f / 1000); + + LOG_INFO("decoding %zu latents", final_latents.size()); + std::vector decoded_images; // collect decoded images + for (size_t i = 0; i < final_latents.size(); i++) { + t1 = ggml_time_ms(); + struct ggml_tensor* img = sd_ctx->sd->decode_first_stage(work_ctx, final_latents[i] /* x_0 */); + // print_ggml_tensor(img); + if (img != NULL) { + decoded_images.push_back(img); + } + int64_t t2 = ggml_time_ms(); + LOG_INFO("latent %" PRId64 " decoded, taking %.2fs", i + 1, (t2 - t1) * 1.0f / 1000); + } + + int64_t t4 = ggml_time_ms(); + LOG_INFO("decode_first_stage completed, taking %.2fs", (t4 - t3) * 1.0f / 1000); + if (sd_ctx->sd->free_params_immediately && !sd_ctx->sd->use_tiny_autoencoder) { + sd_ctx->sd->first_stage_model->free_params_buffer(); + } + sd_image_t* result_images = (sd_image_t*)calloc(batch_count, sizeof(sd_image_t)); + if (result_images == NULL) { + ggml_free(work_ctx); + return NULL; + } + + for (size_t i = 0; i < decoded_images.size(); i++) { + result_images[i].width = width; + result_images[i].height = height; + result_images[i].channel = 3; + result_images[i].data = sd_tensor_to_image(decoded_images[i]); + } + ggml_free(work_ctx); + LOG_INFO( + "txt2img completed in %.2fs", + (t4 - t0) * 1.0f / 1000); + + return result_images; +} + +sd_image_t* img2img(sd_ctx_t* sd_ctx, + sd_image_t init_image, + const char* prompt_c_str, + const char* negative_prompt_c_str, + int clip_skip, + float cfg_scale, + int width, + int height, + sample_method_t sample_method, + int sample_steps, + float strength, + int64_t seed, + int batch_count) { + if (sd_ctx == NULL) { + return NULL; + } + std::string prompt(prompt_c_str); + std::string negative_prompt(negative_prompt_c_str); + + LOG_INFO("img2img %dx%d", width, height); + + std::vector sigmas = sd_ctx->sd->denoiser->schedule->get_sigmas(sample_steps); + size_t t_enc = static_cast(sample_steps * strength); + LOG_INFO("target t_enc is %zu steps", t_enc); + std::vector sigma_sched; + sigma_sched.assign(sigmas.begin() + sample_steps - t_enc - 1, sigmas.end()); + + struct ggml_init_params params; + params.mem_size = static_cast(10 * 1024) * 1024; // 10 MB + params.mem_size += width * height * 3 * sizeof(float) * 2; + params.mem_buffer = NULL; + params.no_alloc = false; + // LOG_DEBUG("mem_size %u ", params.mem_size); + + // draft context + struct ggml_context* work_ctx = ggml_init(params); + if (!work_ctx) { + LOG_ERROR("ggml_init() failed"); + return NULL; + } + + if (seed < 0) { + seed = (int)time(NULL); + } + + sd_ctx->sd->rng->manual_seed(seed); + + // extract and remove lora + auto result_pair = extract_and_remove_lora(prompt); + std::unordered_map lora_f2m = result_pair.first; // lora_name -> multiplier + for (auto& kv : lora_f2m) { + LOG_DEBUG("lora %s:%.2f", kv.first.c_str(), kv.second); + } + prompt = result_pair.second; + LOG_DEBUG("prompt after extract and remove lora: \"%s\"", prompt.c_str()); + + // load lora from file + int64_t t0 = ggml_time_ms(); + sd_ctx->sd->apply_loras(lora_f2m); + int64_t t1 = ggml_time_ms(); + LOG_INFO("apply_loras completed, taking %.2fs", (t1 - t0) * 1.0f / 1000); + + ggml_tensor* init_img = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1); + sd_image_to_tensor(init_image.data, init_img); + t0 = ggml_time_ms(); + ggml_tensor* init_latent = NULL; + if (!sd_ctx->sd->use_tiny_autoencoder) { + ggml_tensor* moments = sd_ctx->sd->encode_first_stage(work_ctx, init_img); + init_latent = sd_ctx->sd->get_first_stage_encoding(work_ctx, moments); + } else { + init_latent = sd_ctx->sd->encode_first_stage(work_ctx, init_img); + } + // print_ggml_tensor(init_latent); + t1 = ggml_time_ms(); + LOG_INFO("encode_first_stage completed, taking %.2fs", (t1 - t0) * 1.0f / 1000); + + auto cond_pair = sd_ctx->sd->get_learned_condition(work_ctx, prompt, clip_skip, width, height); + ggml_tensor* c = cond_pair.first; + ggml_tensor* c_vector = cond_pair.second; // [adm_in_channels, ] + struct ggml_tensor* uc = NULL; + struct ggml_tensor* uc_vector = NULL; + if (cfg_scale != 1.0) { + bool force_zero_embeddings = false; + if (sd_ctx->sd->version == VERSION_XL && negative_prompt.size() == 0) { + force_zero_embeddings = true; + } + auto uncond_pair = sd_ctx->sd->get_learned_condition(work_ctx, negative_prompt, clip_skip, width, height, force_zero_embeddings); + uc = uncond_pair.first; + uc_vector = uncond_pair.second; // [adm_in_channels, ] + } + int64_t t2 = ggml_time_ms(); + LOG_INFO("get_learned_condition completed, taking %" PRId64 " ms", t2 - t1); + if (sd_ctx->sd->free_params_immediately) { + sd_ctx->sd->cond_stage_model->free_params_buffer(); + } + + sd_ctx->sd->rng->manual_seed(seed); + struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, init_latent); + ggml_tensor_set_f32_randn(noise, sd_ctx->sd->rng); + + LOG_INFO("sampling using %s method", sampling_methods_str[sample_method]); + struct ggml_tensor* x_0 = sd_ctx->sd->sample(work_ctx, + init_latent, + noise, + c, + NULL, + c_vector, + uc, + NULL, + uc_vector, + {}, + 0.f, + cfg_scale, + cfg_scale, + sample_method, + sigma_sched); + // struct ggml_tensor *x_0 = load_tensor_from_file(ctx, "samples_ddim.bin"); + // print_ggml_tensor(x_0); + int64_t t3 = ggml_time_ms(); + LOG_INFO("sampling completed, taking %.2fs", (t3 - t2) * 1.0f / 1000); + if (sd_ctx->sd->free_params_immediately) { + sd_ctx->sd->diffusion_model->free_params_buffer(); + } + + struct ggml_tensor* img = sd_ctx->sd->decode_first_stage(work_ctx, x_0); + if (sd_ctx->sd->free_params_immediately && !sd_ctx->sd->use_tiny_autoencoder) { + sd_ctx->sd->first_stage_model->free_params_buffer(); + } + if (img == NULL) { + ggml_free(work_ctx); + return NULL; + } + + sd_image_t* result_images = (sd_image_t*)calloc(1, sizeof(sd_image_t)); + if (result_images == NULL) { + ggml_free(work_ctx); + return NULL; + } + + for (size_t i = 0; i < 1; i++) { + result_images[i].width = width; + result_images[i].height = height; + result_images[i].channel = 3; + result_images[i].data = sd_tensor_to_image(img); + } + ggml_free(work_ctx); + + int64_t t4 = ggml_time_ms(); + LOG_INFO("decode_first_stage completed, taking %.2fs", (t4 - t3) * 1.0f / 1000); + + LOG_INFO("img2img completed in %.2fs", (t4 - t0) * 1.0f / 1000); + + return result_images; +} + +SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx, + sd_image_t init_image, + int width, + int height, + int video_frames, + int motion_bucket_id, + int fps, + float augmentation_level, + float min_cfg, + float cfg_scale, + enum sample_method_t sample_method, + int sample_steps, + float strength, + int64_t seed) { + if (sd_ctx == NULL) { + return NULL; + } + + LOG_INFO("img2vid %dx%d", width, height); + + std::vector sigmas = sd_ctx->sd->denoiser->schedule->get_sigmas(sample_steps); + + struct ggml_init_params params; + params.mem_size = static_cast(10 * 1024) * 1024; // 10 MB + params.mem_size += width * height * 3 * sizeof(float) * video_frames; + params.mem_buffer = NULL; + params.no_alloc = false; + // LOG_DEBUG("mem_size %u ", params.mem_size); + + // draft context + struct ggml_context* work_ctx = ggml_init(params); + if (!work_ctx) { + LOG_ERROR("ggml_init() failed"); + return NULL; + } + + if (seed < 0) { + seed = (int)time(NULL); + } + + sd_ctx->sd->rng->manual_seed(seed); + + int64_t t0 = ggml_time_ms(); + + ggml_tensor* c_crossattn = NULL; + ggml_tensor* c_concat = NULL; + ggml_tensor* c_vector = NULL; + + ggml_tensor* uc_crossattn = NULL; + ggml_tensor* uc_concat = NULL; + ggml_tensor* uc_vector = NULL; + + std::tie(c_crossattn, c_concat, c_vector) = sd_ctx->sd->get_svd_condition(work_ctx, + init_image, + width, + height, + fps, + motion_bucket_id, + augmentation_level); + + uc_crossattn = ggml_dup_tensor(work_ctx, c_crossattn); + ggml_set_f32(uc_crossattn, 0.f); + + uc_concat = ggml_dup_tensor(work_ctx, c_concat); + ggml_set_f32(uc_concat, 0.f); + + uc_vector = ggml_dup_tensor(work_ctx, c_vector); + + int64_t t1 = ggml_time_ms(); + LOG_INFO("get_learned_condition completed, taking %" PRId64 " ms", t1 - t0); + if (sd_ctx->sd->free_params_immediately) { + sd_ctx->sd->clip_vision->free_params_buffer(); + } + + sd_ctx->sd->rng->manual_seed(seed); + int C = 4; + int W = width / 8; + int H = height / 8; + struct ggml_tensor* x_t = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, C, video_frames); + ggml_tensor_set_f32_randn(x_t, sd_ctx->sd->rng); + + LOG_INFO("sampling using %s method", sampling_methods_str[sample_method]); + struct ggml_tensor* x_0 = sd_ctx->sd->sample(work_ctx, + x_t, + NULL, + c_crossattn, + c_concat, + c_vector, + uc_crossattn, + uc_concat, + uc_vector, + {}, + 0.f, + min_cfg, + cfg_scale, + sample_method, + sigmas); + + int64_t t2 = ggml_time_ms(); + LOG_INFO("sampling completed, taking %.2fs", (t2 - t1) * 1.0f / 1000); + if (sd_ctx->sd->free_params_immediately) { + sd_ctx->sd->diffusion_model->free_params_buffer(); + } + + struct ggml_tensor* img = sd_ctx->sd->decode_first_stage(work_ctx, x_0); + if (sd_ctx->sd->free_params_immediately) { + sd_ctx->sd->first_stage_model->free_params_buffer(); + } + if (img == NULL) { + ggml_free(work_ctx); + return NULL; + } + + sd_image_t* result_images = (sd_image_t*)calloc(video_frames, sizeof(sd_image_t)); + if (result_images == NULL) { + ggml_free(work_ctx); + return NULL; + } + + for (size_t i = 0; i < video_frames; i++) { + auto img_i = ggml_view_3d(work_ctx, img, img->ne[0], img->ne[1], img->ne[2], img->nb[1], img->nb[2], img->nb[3] * i); + + result_images[i].width = width; + result_images[i].height = height; + result_images[i].channel = 3; + result_images[i].data = sd_tensor_to_image(img_i); + } + ggml_free(work_ctx); + + int64_t t3 = ggml_time_ms(); + + LOG_INFO("img2vid completed in %.2fs", (t3 - t0) * 1.0f / 1000); + + return result_images; +} \ No newline at end of file diff --git a/otherarch/sdcpp/stable-diffusion.h b/otherarch/sdcpp/stable-diffusion.h new file mode 100644 index 000000000..99eba4330 --- /dev/null +++ b/otherarch/sdcpp/stable-diffusion.h @@ -0,0 +1,193 @@ +#ifndef __STABLE_DIFFUSION_H__ +#define __STABLE_DIFFUSION_H__ + +#if defined(_WIN32) || defined(__CYGWIN__) +#ifndef SD_BUILD_SHARED_LIB +#define SD_API +#else +#ifdef SD_BUILD_DLL +#define SD_API __declspec(dllexport) +#else +#define SD_API __declspec(dllimport) +#endif +#endif +#else +#if __GNUC__ >= 4 +#define SD_API __attribute__((visibility("default"))) +#else +#define SD_API +#endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include + +enum rng_type_t { + STD_DEFAULT_RNG, + CUDA_RNG +}; + +enum sample_method_t { + EULER_A, + EULER, + HEUN, + DPM2, + DPMPP2S_A, + DPMPP2M, + DPMPP2Mv2, + LCM, + N_SAMPLE_METHODS +}; + +enum schedule_t { + DEFAULT, + DISCRETE, + KARRAS, + N_SCHEDULES +}; + +// same as enum ggml_type +enum sd_type_t { + SD_TYPE_F32 = 0, + SD_TYPE_F16 = 1, + SD_TYPE_Q4_0 = 2, + SD_TYPE_Q4_1 = 3, + // SD_TYPE_Q4_2 = 4, support has been removed + // SD_TYPE_Q4_3 (5) support has been removed + SD_TYPE_Q5_0 = 6, + SD_TYPE_Q5_1 = 7, + SD_TYPE_Q8_0 = 8, + SD_TYPE_Q8_1 = 9, + // k-quantizations + SD_TYPE_Q2_K = 10, + SD_TYPE_Q3_K = 11, + SD_TYPE_Q4_K = 12, + SD_TYPE_Q5_K = 13, + SD_TYPE_Q6_K = 14, + SD_TYPE_Q8_K = 15, + SD_TYPE_IQ2_XXS = 16, + SD_TYPE_IQ2_XS = 17, + SD_TYPE_IQ3_XXS = 18, + SD_TYPE_IQ1_S = 19, + SD_TYPE_IQ4_NL = 20, + SD_TYPE_I8, + SD_TYPE_I16, + SD_TYPE_I32, + SD_TYPE_COUNT, +}; + +SD_API const char* sd_type_name(enum sd_type_t type); + +enum sd_log_level_t { + SD_LOG_DEBUG, + SD_LOG_INFO, + SD_LOG_WARN, + SD_LOG_ERROR +}; + +typedef void (*sd_log_cb_t)(enum sd_log_level_t level, const char* text, void* data); + +SD_API void sd_set_log_callback(sd_log_cb_t sd_log_cb, void* data); +SD_API int32_t get_num_physical_cores(); +SD_API const char* sd_get_system_info(); + +typedef struct { + uint32_t width; + uint32_t height; + uint32_t channel; + uint8_t* data; +} sd_image_t; + +typedef struct sd_ctx_t sd_ctx_t; + +SD_API sd_ctx_t* new_sd_ctx(const char* model_path, + const char* vae_path, + const char* taesd_path, + const char* control_net_path_c_str, + const char* lora_model_dir, + const char* embed_dir_c_str, + bool vae_decode_only, + bool vae_tiling, + bool free_params_immediately, + int n_threads, + enum sd_type_t wtype, + enum rng_type_t rng_type, + enum schedule_t s, + bool keep_control_net_cpu); + +SD_API void free_sd_ctx(sd_ctx_t* sd_ctx); + +SD_API sd_image_t* txt2img(sd_ctx_t* sd_ctx, + const char* prompt, + const char* negative_prompt, + int clip_skip, + float cfg_scale, + int width, + int height, + enum sample_method_t sample_method, + int sample_steps, + int64_t seed, + int batch_count, + const sd_image_t* control_cond, + float control_strength); + +SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx, + sd_image_t init_image, + const char* prompt, + const char* negative_prompt, + int clip_skip, + float cfg_scale, + int width, + int height, + enum sample_method_t sample_method, + int sample_steps, + float strength, + int64_t seed, + int batch_count); + +SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx, + sd_image_t init_image, + int width, + int height, + int video_frames, + int motion_bucket_id, + int fps, + float augmentation_level, + float min_cfg, + float cfg_scale, + enum sample_method_t sample_method, + int sample_steps, + float strength, + int64_t seed); + +typedef struct upscaler_ctx_t upscaler_ctx_t; + +SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path, + int n_threads, + enum sd_type_t wtype); +SD_API void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx); + +SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx, sd_image_t input_image, uint32_t upscale_factor); + +SD_API bool convert(const char* input_path, const char* vae_path, const char* output_path, sd_type_t output_type); + +SD_API uint8_t* preprocess_canny(uint8_t* img, + int width, + int height, + float high_threshold, + float low_threshold, + float weak, + float strong, + bool inverse); + +#ifdef __cplusplus +} +#endif + +#endif // __STABLE_DIFFUSION_H__ \ No newline at end of file diff --git a/otherarch/sdcpp/tae.hpp b/otherarch/sdcpp/tae.hpp new file mode 100644 index 000000000..d54205477 --- /dev/null +++ b/otherarch/sdcpp/tae.hpp @@ -0,0 +1,259 @@ +#ifndef __TAE_HPP__ +#define __TAE_HPP__ + +#include "ggml_extend.hpp" + +#include "model.h" + +/* + =================================== TinyAutoEncoder =================================== + References: + https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/autoencoders/vae.py + https://github.com/madebyollin/taesd/blob/main/taesd.py + +*/ + +class TAEBlock : public UnaryBlock { +protected: + int n_in; + int n_out; + +public: + TAEBlock(int n_in, int n_out) + : n_in(n_in), n_out(n_out) { + blocks["conv.0"] = std::shared_ptr(new Conv2d(n_in, n_out, {3, 3}, {1, 1}, {1, 1})); + blocks["conv.2"] = std::shared_ptr(new Conv2d(n_out, n_out, {3, 3}, {1, 1}, {1, 1})); + blocks["conv.4"] = std::shared_ptr(new Conv2d(n_out, n_out, {3, 3}, {1, 1}, {1, 1})); + if (n_in != n_out) { + blocks["skip"] = std::shared_ptr(new Conv2d(n_in, n_out, {1, 1}, {1, 1}, {1, 1}, {1, 1}, false)); + } + } + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) { + // x: [n, n_in, h, w] + // return: [n, n_out, h, w] + + auto conv_0 = std::dynamic_pointer_cast(blocks["conv.0"]); + auto conv_2 = std::dynamic_pointer_cast(blocks["conv.2"]); + auto conv_4 = std::dynamic_pointer_cast(blocks["conv.4"]); + + auto h = conv_0->forward(ctx, x); + h = ggml_relu_inplace(ctx, h); + h = conv_2->forward(ctx, h); + h = ggml_relu_inplace(ctx, h); + h = conv_4->forward(ctx, h); + + if (n_in != n_out) { + auto skip = std::dynamic_pointer_cast(blocks["skip"]); + LOG_DEBUG("skip"); + x = skip->forward(ctx, x); + } + + h = ggml_add(ctx, h, x); + h = ggml_relu_inplace(ctx, h); + return h; + } +}; + +class TinyEncoder : public UnaryBlock { + int in_channels = 3; + int channels = 64; + int z_channels = 4; + int num_blocks = 3; + +public: + TinyEncoder() { + int index = 0; + blocks[std::to_string(index++)] = std::shared_ptr(new Conv2d(in_channels, channels, {3, 3}, {1, 1}, {1, 1})); + blocks[std::to_string(index++)] = std::shared_ptr(new TAEBlock(channels, channels)); + + blocks[std::to_string(index++)] = std::shared_ptr(new Conv2d(channels, channels, {3, 3}, {2, 2}, {1, 1}, {1, 1}, false)); + for (int i = 0; i < num_blocks; i++) { + blocks[std::to_string(index++)] = std::shared_ptr(new TAEBlock(channels, channels)); + } + + blocks[std::to_string(index++)] = std::shared_ptr(new Conv2d(channels, channels, {3, 3}, {2, 2}, {1, 1}, {1, 1}, false)); + for (int i = 0; i < num_blocks; i++) { + blocks[std::to_string(index++)] = std::shared_ptr(new TAEBlock(channels, channels)); + } + + blocks[std::to_string(index++)] = std::shared_ptr(new Conv2d(channels, channels, {3, 3}, {2, 2}, {1, 1}, {1, 1}, false)); + for (int i = 0; i < num_blocks; i++) { + blocks[std::to_string(index++)] = std::shared_ptr(new TAEBlock(channels, channels)); + } + + blocks[std::to_string(index++)] = std::shared_ptr(new Conv2d(channels, z_channels, {3, 3}, {1, 1}, {1, 1})); + } + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) { + // x: [n, in_channels, h, w] + // return: [n, z_channels, h/8, w/8] + + for (int i = 0; i < num_blocks * 3 + 6; i++) { + auto block = std::dynamic_pointer_cast(blocks[std::to_string(i)]); + + x = block->forward(ctx, x); + } + + return x; + } +}; + +class TinyDecoder : public UnaryBlock { + int z_channels = 4; + int channels = 64; + int out_channels = 3; + int num_blocks = 3; + +public: + TinyDecoder(int index = 0) { + blocks[std::to_string(index++)] = std::shared_ptr(new Conv2d(z_channels, channels, {3, 3}, {1, 1}, {1, 1})); + index++; // nn.ReLU() + + for (int i = 0; i < num_blocks; i++) { + blocks[std::to_string(index++)] = std::shared_ptr(new TAEBlock(channels, channels)); + } + index++; // nn.Upsample() + blocks[std::to_string(index++)] = std::shared_ptr(new Conv2d(channels, channels, {3, 3}, {1, 1}, {1, 1}, {1, 1}, false)); + + for (int i = 0; i < num_blocks; i++) { + blocks[std::to_string(index++)] = std::shared_ptr(new TAEBlock(channels, channels)); + } + index++; // nn.Upsample() + blocks[std::to_string(index++)] = std::shared_ptr(new Conv2d(channels, channels, {3, 3}, {1, 1}, {1, 1}, {1, 1}, false)); + + for (int i = 0; i < num_blocks; i++) { + blocks[std::to_string(index++)] = std::shared_ptr(new TAEBlock(channels, channels)); + } + index++; // nn.Upsample() + blocks[std::to_string(index++)] = std::shared_ptr(new Conv2d(channels, channels, {3, 3}, {1, 1}, {1, 1}, {1, 1}, false)); + + blocks[std::to_string(index++)] = std::shared_ptr(new TAEBlock(channels, channels)); + blocks[std::to_string(index++)] = std::shared_ptr(new Conv2d(channels, out_channels, {3, 3}, {1, 1}, {1, 1})); + } + + struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* z) { + // z: [n, z_channels, h, w] + // return: [n, out_channels, h*8, w*8] + + auto h = ggml_scale(ctx, z, 1.0f / 3.0f); + h = ggml_tanh_inplace(ctx, h); + h = ggml_scale(ctx, h, 3.0f); + + for (int i = 0; i < num_blocks * 3 + 10; i++) { + if (blocks.find(std::to_string(i)) == blocks.end()) { + if (i == 1) { + h = ggml_relu_inplace(ctx, h); + } else { + h = ggml_upscale(ctx, h, 2); + } + continue; + } + auto block = std::dynamic_pointer_cast(blocks[std::to_string(i)]); + + h = block->forward(ctx, h); + } + + return h; + } +}; + +class TAESD : public GGMLBlock { +protected: + bool decode_only; + +public: + TAESD(bool decode_only = true) + : decode_only(decode_only) { + blocks["decoder.layers"] = std::shared_ptr(new TinyDecoder()); + + if (!decode_only) { + blocks["encoder.layers"] = std::shared_ptr(new TinyEncoder()); + } + } + + struct ggml_tensor* decode(struct ggml_context* ctx, struct ggml_tensor* z) { + auto decoder = std::dynamic_pointer_cast(blocks["decoder.layers"]); + return decoder->forward(ctx, z); + } + + struct ggml_tensor* encode(struct ggml_context* ctx, struct ggml_tensor* x) { + auto encoder = std::dynamic_pointer_cast(blocks["encoder.layers"]); + return encoder->forward(ctx, x); + } +}; + +struct TinyAutoEncoder : public GGMLModule { + TAESD taesd; + bool decode_only = false; + + TinyAutoEncoder(ggml_backend_t backend, + ggml_type wtype, + bool decoder_only = true) + : decode_only(decoder_only), + taesd(decode_only), + GGMLModule(backend, wtype) { + taesd.init(params_ctx, wtype); + } + + std::string get_desc() { + return "taesd"; + } + + size_t get_params_mem_size() { + return taesd.get_params_mem_size(); + } + + size_t get_params_num() { + return taesd.get_params_num(); + } + + bool load_from_file(const std::string& file_path) { + LOG_INFO("loading taesd from '%s'", file_path.c_str()); + alloc_params_buffer(); + std::map taesd_tensors; + taesd.get_param_tensors(taesd_tensors); + std::set ignore_tensors; + if (decode_only) { + ignore_tensors.insert("encoder."); + } + + ModelLoader model_loader; + if (!model_loader.init_from_file(file_path)) { + LOG_ERROR("init taesd model loader from file failed: '%s'", file_path.c_str()); + return false; + } + + bool success = model_loader.load_tensors(taesd_tensors, backend, ignore_tensors); + + if (!success) { + LOG_ERROR("load tae tensors from model loader failed"); + return false; + } + + LOG_INFO("taesd model loaded"); + return success; + } + + struct ggml_cgraph* build_graph(struct ggml_tensor* z, bool decode_graph) { + struct ggml_cgraph* gf = ggml_new_graph(compute_ctx); + z = to_backend(z); + struct ggml_tensor* out = decode_graph ? taesd.decode(compute_ctx, z) : taesd.encode(compute_ctx, z); + ggml_build_forward_expand(gf, out); + return gf; + } + + void compute(const int n_threads, + struct ggml_tensor* z, + bool decode_graph, + struct ggml_tensor** output, + struct ggml_context* output_ctx = NULL) { + auto get_graph = [&]() -> struct ggml_cgraph* { + return build_graph(z, decode_graph); + }; + + GGMLModule::compute(get_graph, n_threads, false, output, output_ctx); + } +}; + +#endif // __TAE_HPP__ \ No newline at end of file diff --git a/otherarch/sdcpp/thirdparty/README.md b/otherarch/sdcpp/thirdparty/README.md new file mode 100644 index 000000000..4813054b4 --- /dev/null +++ b/otherarch/sdcpp/thirdparty/README.md @@ -0,0 +1,2 @@ +- json.hpp library from: https://github.com/nlohmann/json +- ZIP Library from: https://github.com/kuba--/zip \ No newline at end of file diff --git a/otherarch/sdcpp/thirdparty/json.hpp b/otherarch/sdcpp/thirdparty/json.hpp new file mode 100644 index 000000000..4d1a37ad7 --- /dev/null +++ b/otherarch/sdcpp/thirdparty/json.hpp @@ -0,0 +1,24596 @@ +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + +/****************************************************************************\ + * Note on documentation: The source files contain links to the online * + * documentation of the public API at https://json.nlohmann.me. This URL * + * contains the most recent documentation and should also be applicable to * + * previous versions; documentation for deprecated functions is not * + * removed, but marked deprecated. See "Generate documentation" section in * + * file docs/README.md. * +\****************************************************************************/ + +#ifndef INCLUDE_NLOHMANN_JSON_HPP_ +#define INCLUDE_NLOHMANN_JSON_HPP_ + +#include // all_of, find, for_each +#include // nullptr_t, ptrdiff_t, size_t +#include // hash, less +#include // initializer_list +#ifndef JSON_NO_IO + #include // istream, ostream +#endif // JSON_NO_IO +#include // random_access_iterator_tag +#include // unique_ptr +#include // accumulate +#include // string, stoi, to_string +#include // declval, forward, move, pair, swap +#include // vector + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// This file contains all macro definitions affecting or depending on the ABI + +#ifndef JSON_SKIP_LIBRARY_VERSION_CHECK + #if defined(NLOHMANN_JSON_VERSION_MAJOR) && defined(NLOHMANN_JSON_VERSION_MINOR) && defined(NLOHMANN_JSON_VERSION_PATCH) + #if NLOHMANN_JSON_VERSION_MAJOR != 3 || NLOHMANN_JSON_VERSION_MINOR != 11 || NLOHMANN_JSON_VERSION_PATCH != 2 + #warning "Already included a different version of the library!" + #endif + #endif +#endif + +#define NLOHMANN_JSON_VERSION_MAJOR 3 // NOLINT(modernize-macro-to-enum) +#define NLOHMANN_JSON_VERSION_MINOR 11 // NOLINT(modernize-macro-to-enum) +#define NLOHMANN_JSON_VERSION_PATCH 2 // NOLINT(modernize-macro-to-enum) + +#ifndef JSON_DIAGNOSTICS + #define JSON_DIAGNOSTICS 0 +#endif + +#ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON + #define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0 +#endif + +#if JSON_DIAGNOSTICS + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS _diag +#else + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS +#endif + +#if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON + #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp +#else + #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON +#endif + +#ifndef NLOHMANN_JSON_NAMESPACE_NO_VERSION + #define NLOHMANN_JSON_NAMESPACE_NO_VERSION 0 +#endif + +// Construct the namespace ABI tags component +#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) json_abi ## a ## b +#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b) \ + NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) + +#define NLOHMANN_JSON_ABI_TAGS \ + NLOHMANN_JSON_ABI_TAGS_CONCAT( \ + NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \ + NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON) + +// Construct the namespace version component +#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \ + _v ## major ## _ ## minor ## _ ## patch +#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(major, minor, patch) \ + NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) + +#if NLOHMANN_JSON_NAMESPACE_NO_VERSION +#define NLOHMANN_JSON_NAMESPACE_VERSION +#else +#define NLOHMANN_JSON_NAMESPACE_VERSION \ + NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(NLOHMANN_JSON_VERSION_MAJOR, \ + NLOHMANN_JSON_VERSION_MINOR, \ + NLOHMANN_JSON_VERSION_PATCH) +#endif + +// Combine namespace components +#define NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) a ## b +#define NLOHMANN_JSON_NAMESPACE_CONCAT(a, b) \ + NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) + +#ifndef NLOHMANN_JSON_NAMESPACE +#define NLOHMANN_JSON_NAMESPACE \ + nlohmann::NLOHMANN_JSON_NAMESPACE_CONCAT( \ + NLOHMANN_JSON_ABI_TAGS, \ + NLOHMANN_JSON_NAMESPACE_VERSION) +#endif + +#ifndef NLOHMANN_JSON_NAMESPACE_BEGIN +#define NLOHMANN_JSON_NAMESPACE_BEGIN \ + namespace nlohmann \ + { \ + inline namespace NLOHMANN_JSON_NAMESPACE_CONCAT( \ + NLOHMANN_JSON_ABI_TAGS, \ + NLOHMANN_JSON_NAMESPACE_VERSION) \ + { +#endif + +#ifndef NLOHMANN_JSON_NAMESPACE_END +#define NLOHMANN_JSON_NAMESPACE_END \ + } /* namespace (inline namespace) NOLINT(readability/namespace) */ \ + } // namespace nlohmann +#endif + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // transform +#include // array +#include // forward_list +#include // inserter, front_inserter, end +#include // map +#include // string +#include // tuple, make_tuple +#include // is_arithmetic, is_same, is_enum, underlying_type, is_convertible +#include // unordered_map +#include // pair, declval +#include // valarray + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // nullptr_t +#include // exception +#include // runtime_error +#include // to_string +#include // vector + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // array +#include // size_t +#include // uint8_t +#include // string + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // declval, pair +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +template struct make_void +{ + using type = void; +}; +template using void_t = typename make_void::type; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +// https://en.cppreference.com/w/cpp/experimental/is_detected +struct nonesuch +{ + nonesuch() = delete; + ~nonesuch() = delete; + nonesuch(nonesuch const&) = delete; + nonesuch(nonesuch const&&) = delete; + void operator=(nonesuch const&) = delete; + void operator=(nonesuch&&) = delete; +}; + +template class Op, + class... Args> +struct detector +{ + using value_t = std::false_type; + using type = Default; +}; + +template class Op, class... Args> +struct detector>, Op, Args...> +{ + using value_t = std::true_type; + using type = Op; +}; + +template class Op, class... Args> +using is_detected = typename detector::value_t; + +template class Op, class... Args> +struct is_detected_lazy : is_detected { }; + +template class Op, class... Args> +using detected_t = typename detector::type; + +template class Op, class... Args> +using detected_or = detector; + +template class Op, class... Args> +using detected_or_t = typename detected_or::type; + +template class Op, class... Args> +using is_detected_exact = std::is_same>; + +template class Op, class... Args> +using is_detected_convertible = + std::is_convertible, To>; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include + + +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-FileCopyrightText: 2016-2021 Evan Nemerson +// SPDX-License-Identifier: MIT + +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + */ + +#if !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < 15) +#if defined(JSON_HEDLEY_VERSION) + #undef JSON_HEDLEY_VERSION +#endif +#define JSON_HEDLEY_VERSION 15 + +#if defined(JSON_HEDLEY_STRINGIFY_EX) + #undef JSON_HEDLEY_STRINGIFY_EX +#endif +#define JSON_HEDLEY_STRINGIFY_EX(x) #x + +#if defined(JSON_HEDLEY_STRINGIFY) + #undef JSON_HEDLEY_STRINGIFY +#endif +#define JSON_HEDLEY_STRINGIFY(x) JSON_HEDLEY_STRINGIFY_EX(x) + +#if defined(JSON_HEDLEY_CONCAT_EX) + #undef JSON_HEDLEY_CONCAT_EX +#endif +#define JSON_HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(JSON_HEDLEY_CONCAT) + #undef JSON_HEDLEY_CONCAT +#endif +#define JSON_HEDLEY_CONCAT(a,b) JSON_HEDLEY_CONCAT_EX(a,b) + +#if defined(JSON_HEDLEY_CONCAT3_EX) + #undef JSON_HEDLEY_CONCAT3_EX +#endif +#define JSON_HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(JSON_HEDLEY_CONCAT3) + #undef JSON_HEDLEY_CONCAT3 +#endif +#define JSON_HEDLEY_CONCAT3(a,b,c) JSON_HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(JSON_HEDLEY_VERSION_ENCODE) + #undef JSON_HEDLEY_VERSION_ENCODE +#endif +#define JSON_HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(JSON_HEDLEY_VERSION_DECODE_MAJOR) + #undef JSON_HEDLEY_VERSION_DECODE_MAJOR +#endif +#define JSON_HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(JSON_HEDLEY_VERSION_DECODE_MINOR) + #undef JSON_HEDLEY_VERSION_DECODE_MINOR +#endif +#define JSON_HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(JSON_HEDLEY_VERSION_DECODE_REVISION) + #undef JSON_HEDLEY_VERSION_DECODE_REVISION +#endif +#define JSON_HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(JSON_HEDLEY_GNUC_VERSION) + #undef JSON_HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) + #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) + #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(JSON_HEDLEY_GNUC_VERSION_CHECK) + #undef JSON_HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_GNUC_VERSION) + #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GNUC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_MSVC_VERSION) + #undef JSON_HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) + #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) + #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) + #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(JSON_HEDLEY_MSVC_VERSION_CHECK) + #undef JSON_HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(JSON_HEDLEY_MSVC_VERSION) + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(JSON_HEDLEY_INTEL_VERSION) + #undef JSON_HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) + #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) + #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(JSON_HEDLEY_INTEL_VERSION_CHECK) + #undef JSON_HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_INTEL_VERSION) + #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_INTEL_CL_VERSION) + #undef JSON_HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) + #define JSON_HEDLEY_INTEL_CL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(JSON_HEDLEY_INTEL_CL_VERSION_CHECK) + #undef JSON_HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_INTEL_CL_VERSION) + #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_CL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_PGI_VERSION) + #undef JSON_HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) + #define JSON_HEDLEY_PGI_VERSION JSON_HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(JSON_HEDLEY_PGI_VERSION_CHECK) + #undef JSON_HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_PGI_VERSION) + #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PGI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_SUNPRO_VERSION) + #undef JSON_HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(JSON_HEDLEY_SUNPRO_VERSION_CHECK) + #undef JSON_HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_SUNPRO_VERSION) + #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_SUNPRO_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION) + #undef JSON_HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) + #define JSON_HEDLEY_EMSCRIPTEN_VERSION JSON_HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK) + #undef JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION) + #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_EMSCRIPTEN_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_ARM_VERSION) + #undef JSON_HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) + #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) + #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(JSON_HEDLEY_ARM_VERSION_CHECK) + #undef JSON_HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_ARM_VERSION) + #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_ARM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_IBM_VERSION) + #undef JSON_HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) + #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) + #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) + #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(JSON_HEDLEY_IBM_VERSION_CHECK) + #undef JSON_HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_IBM_VERSION) + #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IBM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_VERSION) + #undef JSON_HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +#if (__TI_COMPILER_VERSION__ >= 16000000) + #define JSON_HEDLEY_TI_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif +#endif + +#if defined(JSON_HEDLEY_TI_VERSION_CHECK) + #undef JSON_HEDLEY_TI_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_VERSION) + #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL2000_VERSION) + #undef JSON_HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) + #define JSON_HEDLEY_TI_CL2000_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL2000_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL2000_VERSION) + #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL2000_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL430_VERSION) + #undef JSON_HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) + #define JSON_HEDLEY_TI_CL430_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL430_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL430_VERSION) + #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL430_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_ARMCL_VERSION) + #undef JSON_HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) + #define JSON_HEDLEY_TI_ARMCL_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_ARMCL_VERSION_CHECK) + #undef JSON_HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_ARMCL_VERSION) + #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_ARMCL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL6X_VERSION) + #undef JSON_HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) + #define JSON_HEDLEY_TI_CL6X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL6X_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL6X_VERSION) + #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL6X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL7X_VERSION) + #undef JSON_HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) + #define JSON_HEDLEY_TI_CL7X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL7X_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL7X_VERSION) + #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL7X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CLPRU_VERSION) + #undef JSON_HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) + #define JSON_HEDLEY_TI_CLPRU_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CLPRU_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CLPRU_VERSION) + #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CLPRU_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_CRAY_VERSION) + #undef JSON_HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) + #if defined(_RELEASE_PATCHLEVEL) + #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) + #else + #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) + #endif +#endif + +#if defined(JSON_HEDLEY_CRAY_VERSION_CHECK) + #undef JSON_HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_CRAY_VERSION) + #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_CRAY_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_IAR_VERSION) + #undef JSON_HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) + #if __VER__ > 1000 + #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) + #else + #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) + #endif +#endif + +#if defined(JSON_HEDLEY_IAR_VERSION_CHECK) + #undef JSON_HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_IAR_VERSION) + #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IAR_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TINYC_VERSION) + #undef JSON_HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) + #define JSON_HEDLEY_TINYC_VERSION JSON_HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(JSON_HEDLEY_TINYC_VERSION_CHECK) + #undef JSON_HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TINYC_VERSION) + #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TINYC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_DMC_VERSION) + #undef JSON_HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) + #define JSON_HEDLEY_DMC_VERSION JSON_HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(JSON_HEDLEY_DMC_VERSION_CHECK) + #undef JSON_HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_DMC_VERSION) + #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_DMC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_COMPCERT_VERSION) + #undef JSON_HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) + #define JSON_HEDLEY_COMPCERT_VERSION JSON_HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(JSON_HEDLEY_COMPCERT_VERSION_CHECK) + #undef JSON_HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_COMPCERT_VERSION) + #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_COMPCERT_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_PELLES_VERSION) + #undef JSON_HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) + #define JSON_HEDLEY_PELLES_VERSION JSON_HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(JSON_HEDLEY_PELLES_VERSION_CHECK) + #undef JSON_HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_PELLES_VERSION) + #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PELLES_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_MCST_LCC_VERSION) + #undef JSON_HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) + #define JSON_HEDLEY_MCST_LCC_VERSION JSON_HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(JSON_HEDLEY_MCST_LCC_VERSION_CHECK) + #undef JSON_HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_MCST_LCC_VERSION) + #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_MCST_LCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_GCC_VERSION) + #undef JSON_HEDLEY_GCC_VERSION +#endif +#if \ + defined(JSON_HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(JSON_HEDLEY_INTEL_VERSION) && \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_ARM_VERSION) && \ + !defined(JSON_HEDLEY_CRAY_VERSION) && \ + !defined(JSON_HEDLEY_TI_VERSION) && \ + !defined(JSON_HEDLEY_TI_ARMCL_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL430_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL2000_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL6X_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL7X_VERSION) && \ + !defined(JSON_HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(JSON_HEDLEY_MCST_LCC_VERSION) + #define JSON_HEDLEY_GCC_VERSION JSON_HEDLEY_GNUC_VERSION +#endif + +#if defined(JSON_HEDLEY_GCC_VERSION_CHECK) + #undef JSON_HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_GCC_VERSION) + #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(JSON_HEDLEY_IAR_VERSION) || JSON_HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) + #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) +#else + #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) + #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) +#else + #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS) + #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_IAR_VERSION) && \ + (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(JSON_HEDLEY_MSVC_VERSION) || JSON_HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) + #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else + #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) + #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else + #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_BUILTIN) + #undef JSON_HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) + #define JSON_HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else + #define JSON_HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_BUILTIN) + #undef JSON_HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) + #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else + #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_BUILTIN) + #undef JSON_HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) + #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else + #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_FEATURE) + #undef JSON_HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) + #define JSON_HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else + #define JSON_HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_FEATURE) + #undef JSON_HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) + #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else + #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_FEATURE) + #undef JSON_HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) + #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else + #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_EXTENSION) + #undef JSON_HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) + #define JSON_HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else + #define JSON_HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_EXTENSION) + #undef JSON_HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) + #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else + #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_EXTENSION) + #undef JSON_HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) + #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else + #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) + #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else + #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) + #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else + #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) + #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else + #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_WARNING) + #undef JSON_HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) + #define JSON_HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else + #define JSON_HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_WARNING) + #undef JSON_HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) + #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else + #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_WARNING) + #undef JSON_HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) + #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else + #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) + #define JSON_HEDLEY_PRAGMA(value) _Pragma(#value) +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_PRAGMA(value) __pragma(value) +#else + #define JSON_HEDLEY_PRAGMA(value) +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_PUSH) + #undef JSON_HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(JSON_HEDLEY_DIAGNOSTIC_POP) + #undef JSON_HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) + #define JSON_HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else + #define JSON_HEDLEY_DIAGNOSTIC_PUSH + #define JSON_HEDLEY_DIAGNOSTIC_POP +#endif + +/* JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat") +# if JSON_HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if JSON_HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + JSON_HEDLEY_DIAGNOSTIC_POP +# else +# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + JSON_HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + JSON_HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(JSON_HEDLEY_CONST_CAST) + #undef JSON_HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define JSON_HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + JSON_HEDLEY_HAS_WARNING("-Wcast-qual") || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define JSON_HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + JSON_HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define JSON_HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(JSON_HEDLEY_REINTERPRET_CAST) + #undef JSON_HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else + #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(JSON_HEDLEY_STATIC_CAST) + #undef JSON_HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else + #define JSON_HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(JSON_HEDLEY_CPP_CAST) + #undef JSON_HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if JSON_HEDLEY_HAS_WARNING("-Wold-style-cast") +# define JSON_HEDLEY_CPP_CAST(T, expr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + JSON_HEDLEY_DIAGNOSTIC_POP +# elif JSON_HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define JSON_HEDLEY_CPP_CAST(T, expr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + JSON_HEDLEY_DIAGNOSTIC_POP +# else +# define JSON_HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define JSON_HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wdeprecated-declarations") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-attributes") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wcast-qual") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunused-function") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(1,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(JSON_HEDLEY_DEPRECATED) + #undef JSON_HEDLEY_DEPRECATED +#endif +#if defined(JSON_HEDLEY_DEPRECATED_FOR) + #undef JSON_HEDLEY_DEPRECATED_FOR +#endif +#if \ + JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (JSON_HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(JSON_HEDLEY_IAR_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) + #define JSON_HEDLEY_DEPRECATED(since) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) + #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DEPRECATED(since) _Pragma("deprecated") + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else + #define JSON_HEDLEY_DEPRECATED(since) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(JSON_HEDLEY_UNAVAILABLE) + #undef JSON_HEDLEY_UNAVAILABLE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(warning) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else + #define JSON_HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT) + #undef JSON_HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT_MSG) + #undef JSON_HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) + #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) + #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ + #define JSON_HEDLEY_WARN_UNUSED_RESULT _Check_return_ + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else + #define JSON_HEDLEY_WARN_UNUSED_RESULT + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(JSON_HEDLEY_SENTINEL) + #undef JSON_HEDLEY_SENTINEL +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else + #define JSON_HEDLEY_SENTINEL(position) +#endif + +#if defined(JSON_HEDLEY_NO_RETURN) + #undef JSON_HEDLEY_NO_RETURN +#endif +#if JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_NO_RETURN __noreturn +#elif \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + #define JSON_HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) + #define JSON_HEDLEY_NO_RETURN JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) + #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_NO_RETURN __declspec(noreturn) +#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) + #define JSON_HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) + #define JSON_HEDLEY_NO_RETURN __attribute((noreturn)) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0) + #define JSON_HEDLEY_NO_RETURN __declspec(noreturn) +#else + #define JSON_HEDLEY_NO_RETURN +#endif + +#if defined(JSON_HEDLEY_NO_ESCAPE) + #undef JSON_HEDLEY_NO_ESCAPE +#endif +#if JSON_HEDLEY_HAS_ATTRIBUTE(noescape) + #define JSON_HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else + #define JSON_HEDLEY_NO_ESCAPE +#endif + +#if defined(JSON_HEDLEY_UNREACHABLE) + #undef JSON_HEDLEY_UNREACHABLE +#endif +#if defined(JSON_HEDLEY_UNREACHABLE_RETURN) + #undef JSON_HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(JSON_HEDLEY_ASSUME) + #undef JSON_HEDLEY_ASSUME +#endif +#if \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_ASSUME(expr) __assume(expr) +#elif JSON_HEDLEY_HAS_BUILTIN(__builtin_assume) + #define JSON_HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) + #if defined(__cplusplus) + #define JSON_HEDLEY_ASSUME(expr) std::_nassert(expr) + #else + #define JSON_HEDLEY_ASSUME(expr) _nassert(expr) + #endif +#endif +#if \ + (JSON_HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(JSON_HEDLEY_ARM_VERSION))) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(JSON_HEDLEY_ASSUME) + #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0) +#endif +#if !defined(JSON_HEDLEY_ASSUME) + #if defined(JSON_HEDLEY_UNREACHABLE) + #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (JSON_HEDLEY_UNREACHABLE(), 1))) + #else + #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, expr) + #endif +#endif +#if defined(JSON_HEDLEY_UNREACHABLE) + #if \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) + #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (JSON_HEDLEY_STATIC_CAST(void, JSON_HEDLEY_ASSUME(0)), (value)) + #else + #define JSON_HEDLEY_UNREACHABLE_RETURN(value) JSON_HEDLEY_UNREACHABLE() + #endif +#else + #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(JSON_HEDLEY_UNREACHABLE) + #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0) +#endif + +JSON_HEDLEY_DIAGNOSTIC_PUSH +#if JSON_HEDLEY_HAS_WARNING("-Wpedantic") + #pragma clang diagnostic ignored "-Wpedantic" +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) + #pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if JSON_HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) + #if defined(__clang__) + #pragma clang diagnostic ignored "-Wvariadic-macros" + #elif defined(JSON_HEDLEY_GCC_VERSION) + #pragma GCC diagnostic ignored "-Wvariadic-macros" + #endif +#endif +#if defined(JSON_HEDLEY_NON_NULL) + #undef JSON_HEDLEY_NON_NULL +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define JSON_HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else + #define JSON_HEDLEY_NON_NULL(...) +#endif +JSON_HEDLEY_DIAGNOSTIC_POP + +#if defined(JSON_HEDLEY_PRINTF_FORMAT) + #undef JSON_HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(format) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(6,0,0) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(JSON_HEDLEY_CONSTEXPR) + #undef JSON_HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) + #if __cplusplus >= 201103L + #define JSON_HEDLEY_CONSTEXPR JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) + #endif +#endif +#if !defined(JSON_HEDLEY_CONSTEXPR) + #define JSON_HEDLEY_CONSTEXPR +#endif + +#if defined(JSON_HEDLEY_PREDICT) + #undef JSON_HEDLEY_PREDICT +#endif +#if defined(JSON_HEDLEY_LIKELY) + #undef JSON_HEDLEY_LIKELY +#endif +#if defined(JSON_HEDLEY_UNLIKELY) + #undef JSON_HEDLEY_UNLIKELY +#endif +#if defined(JSON_HEDLEY_UNPREDICTABLE) + #undef JSON_HEDLEY_UNPREDICTABLE +#endif +#if JSON_HEDLEY_HAS_BUILTIN(__builtin_unpredictable) + #define JSON_HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(JSON_HEDLEY_PGI_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define JSON_HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define JSON_HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (JSON_HEDLEY_STATIC_CAST(void, expected), (expr))) +# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define JSON_HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define JSON_HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define JSON_HEDLEY_PREDICT(expr, expected, probability) (JSON_HEDLEY_STATIC_CAST(void, expected), (expr)) +# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define JSON_HEDLEY_LIKELY(expr) (!!(expr)) +# define JSON_HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(JSON_HEDLEY_UNPREDICTABLE) + #define JSON_HEDLEY_UNPREDICTABLE(expr) JSON_HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(JSON_HEDLEY_MALLOC) + #undef JSON_HEDLEY_MALLOC +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(malloc) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_MALLOC __attribute__((__malloc__)) +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_MALLOC __declspec(restrict) +#else + #define JSON_HEDLEY_MALLOC +#endif + +#if defined(JSON_HEDLEY_PURE) + #undef JSON_HEDLEY_PURE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(pure) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PURE __attribute__((__pure__)) +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define JSON_HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define JSON_HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define JSON_HEDLEY_PURE +#endif + +#if defined(JSON_HEDLEY_CONST) + #undef JSON_HEDLEY_CONST +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(const) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_CONST __attribute__((__const__)) +#elif \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_CONST _Pragma("no_side_effect") +#else + #define JSON_HEDLEY_CONST JSON_HEDLEY_PURE +#endif + +#if defined(JSON_HEDLEY_RESTRICT) + #undef JSON_HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) + #define JSON_HEDLEY_RESTRICT restrict +#elif \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_RESTRICT __restrict +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) + #define JSON_HEDLEY_RESTRICT _Restrict +#else + #define JSON_HEDLEY_RESTRICT +#endif + +#if defined(JSON_HEDLEY_INLINE) + #undef JSON_HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) + #define JSON_HEDLEY_INLINE inline +#elif \ + defined(JSON_HEDLEY_GCC_VERSION) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(6,2,0) + #define JSON_HEDLEY_INLINE __inline__ +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_INLINE __inline +#else + #define JSON_HEDLEY_INLINE +#endif + +#if defined(JSON_HEDLEY_ALWAYS_INLINE) + #undef JSON_HEDLEY_ALWAYS_INLINE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define JSON_HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) JSON_HEDLEY_INLINE +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define JSON_HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define JSON_HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define JSON_HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define JSON_HEDLEY_ALWAYS_INLINE JSON_HEDLEY_INLINE +#endif + +#if defined(JSON_HEDLEY_NEVER_INLINE) + #undef JSON_HEDLEY_NEVER_INLINE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(noinline) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) + #define JSON_HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(10,2,0) + #define JSON_HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) + #define JSON_HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) + #define JSON_HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0) + #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline) +#else + #define JSON_HEDLEY_NEVER_INLINE +#endif + +#if defined(JSON_HEDLEY_PRIVATE) + #undef JSON_HEDLEY_PRIVATE +#endif +#if defined(JSON_HEDLEY_PUBLIC) + #undef JSON_HEDLEY_PUBLIC +#endif +#if defined(JSON_HEDLEY_IMPORT) + #undef JSON_HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define JSON_HEDLEY_PRIVATE +# define JSON_HEDLEY_PUBLIC __declspec(dllexport) +# define JSON_HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + JSON_HEDLEY_HAS_ATTRIBUTE(visibility) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define JSON_HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define JSON_HEDLEY_PRIVATE +# define JSON_HEDLEY_PUBLIC +# endif +# define JSON_HEDLEY_IMPORT extern +#endif + +#if defined(JSON_HEDLEY_NO_THROW) + #undef JSON_HEDLEY_NO_THROW +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define JSON_HEDLEY_NO_THROW __declspec(nothrow) +#else + #define JSON_HEDLEY_NO_THROW +#endif + +#if defined(JSON_HEDLEY_FALL_THROUGH) + #undef JSON_HEDLEY_FALL_THROUGH +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) + #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) + #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ + #define JSON_HEDLEY_FALL_THROUGH __fallthrough +#else + #define JSON_HEDLEY_FALL_THROUGH +#endif + +#if defined(JSON_HEDLEY_RETURNS_NON_NULL) + #undef JSON_HEDLEY_RETURNS_NON_NULL +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ + #define JSON_HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else + #define JSON_HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(JSON_HEDLEY_ARRAY_PARAM) + #undef JSON_HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_TINYC_VERSION) + #define JSON_HEDLEY_ARRAY_PARAM(name) (name) +#else + #define JSON_HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(JSON_HEDLEY_IS_CONSTANT) + #undef JSON_HEDLEY_IS_CONSTANT +#endif +#if defined(JSON_HEDLEY_REQUIRE_CONSTEXPR) + #undef JSON_HEDLEY_REQUIRE_CONSTEXPR +#endif +/* JSON_HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(JSON_HEDLEY_IS_CONSTEXPR_) + #undef JSON_HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + JSON_HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + JSON_HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,24) +#if defined(__INTPTR_TYPE__) + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +#else + #include + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +#endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(JSON_HEDLEY_SUNPRO_VERSION) && \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_IAR_VERSION)) || \ + (JSON_HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(JSON_HEDLEY_IAR_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,3,0) +#if defined(__INTPTR_TYPE__) + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +#else + #include + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +#endif +# elif \ + defined(JSON_HEDLEY_GCC_VERSION) || \ + defined(JSON_HEDLEY_INTEL_VERSION) || \ + defined(JSON_HEDLEY_TINYC_VERSION) || \ + defined(JSON_HEDLEY_TI_ARMCL_VERSION) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(JSON_HEDLEY_TI_CL2000_VERSION) || \ + defined(JSON_HEDLEY_TI_CL6X_VERSION) || \ + defined(JSON_HEDLEY_TI_CL7X_VERSION) || \ + defined(JSON_HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define JSON_HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ +((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(JSON_HEDLEY_IS_CONSTEXPR_) + #if !defined(JSON_HEDLEY_IS_CONSTANT) + #define JSON_HEDLEY_IS_CONSTANT(expr) JSON_HEDLEY_IS_CONSTEXPR_(expr) + #endif + #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (JSON_HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else + #if !defined(JSON_HEDLEY_IS_CONSTANT) + #define JSON_HEDLEY_IS_CONSTANT(expr) (0) + #endif + #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(JSON_HEDLEY_BEGIN_C_DECLS) + #undef JSON_HEDLEY_BEGIN_C_DECLS +#endif +#if defined(JSON_HEDLEY_END_C_DECLS) + #undef JSON_HEDLEY_END_C_DECLS +#endif +#if defined(JSON_HEDLEY_C_DECL) + #undef JSON_HEDLEY_C_DECL +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_BEGIN_C_DECLS extern "C" { + #define JSON_HEDLEY_END_C_DECLS } + #define JSON_HEDLEY_C_DECL extern "C" +#else + #define JSON_HEDLEY_BEGIN_C_DECLS + #define JSON_HEDLEY_END_C_DECLS + #define JSON_HEDLEY_C_DECL +#endif + +#if defined(JSON_HEDLEY_STATIC_ASSERT) + #undef JSON_HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (JSON_HEDLEY_HAS_FEATURE(c_static_assert) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define JSON_HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + JSON_HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define JSON_HEDLEY_STATIC_ASSERT(expr, message) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define JSON_HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(JSON_HEDLEY_NULL) + #undef JSON_HEDLEY_NULL +#endif +#if defined(__cplusplus) + #if __cplusplus >= 201103L + #define JSON_HEDLEY_NULL JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) + #elif defined(NULL) + #define JSON_HEDLEY_NULL NULL + #else + #define JSON_HEDLEY_NULL JSON_HEDLEY_STATIC_CAST(void*, 0) + #endif +#elif defined(NULL) + #define JSON_HEDLEY_NULL NULL +#else + #define JSON_HEDLEY_NULL ((void*) 0) +#endif + +#if defined(JSON_HEDLEY_MESSAGE) + #undef JSON_HEDLEY_MESSAGE +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define JSON_HEDLEY_MESSAGE(msg) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + JSON_HEDLEY_PRAGMA(message msg) \ + JSON_HEDLEY_DIAGNOSTIC_POP +#elif \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message msg) +#elif JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(_CRI message msg) +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg)) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg)) +#else +# define JSON_HEDLEY_MESSAGE(msg) +#endif + +#if defined(JSON_HEDLEY_WARNING) + #undef JSON_HEDLEY_WARNING +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define JSON_HEDLEY_WARNING(msg) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + JSON_HEDLEY_PRAGMA(clang warning msg) \ + JSON_HEDLEY_DIAGNOSTIC_POP +#elif \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(GCC warning msg) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(message(msg)) +#else +# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_MESSAGE(msg) +#endif + +#if defined(JSON_HEDLEY_REQUIRE) + #undef JSON_HEDLEY_REQUIRE +#endif +#if defined(JSON_HEDLEY_REQUIRE_MSG) + #undef JSON_HEDLEY_REQUIRE_MSG +#endif +#if JSON_HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if JSON_HEDLEY_HAS_WARNING("-Wgcc-compat") +# define JSON_HEDLEY_REQUIRE(expr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + JSON_HEDLEY_DIAGNOSTIC_POP +# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + JSON_HEDLEY_DIAGNOSTIC_POP +# else +# define JSON_HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define JSON_HEDLEY_REQUIRE(expr) +# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(JSON_HEDLEY_FLAGS) + #undef JSON_HEDLEY_FLAGS +#endif +#if JSON_HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || JSON_HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) + #define JSON_HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else + #define JSON_HEDLEY_FLAGS +#endif + +#if defined(JSON_HEDLEY_FLAGS_CAST) + #undef JSON_HEDLEY_FLAGS_CAST +#endif +#if JSON_HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define JSON_HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + JSON_HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define JSON_HEDLEY_FLAGS_CAST(T, expr) JSON_HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(JSON_HEDLEY_EMPTY_BASES) + #undef JSON_HEDLEY_EMPTY_BASES +#endif +#if \ + (JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !JSON_HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else + #define JSON_HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) + #undef JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) + #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else + #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_CLANG_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define JSON_HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(JSON_HEDLEY_CLANG_HAS_BUILTIN) + #undef JSON_HEDLEY_CLANG_HAS_BUILTIN +#endif +#define JSON_HEDLEY_CLANG_HAS_BUILTIN(builtin) JSON_HEDLEY_HAS_BUILTIN(builtin) + +#if defined(JSON_HEDLEY_CLANG_HAS_FEATURE) + #undef JSON_HEDLEY_CLANG_HAS_FEATURE +#endif +#define JSON_HEDLEY_CLANG_HAS_FEATURE(feature) JSON_HEDLEY_HAS_FEATURE(feature) + +#if defined(JSON_HEDLEY_CLANG_HAS_EXTENSION) + #undef JSON_HEDLEY_CLANG_HAS_EXTENSION +#endif +#define JSON_HEDLEY_CLANG_HAS_EXTENSION(extension) JSON_HEDLEY_HAS_EXTENSION(extension) + +#if defined(JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define JSON_HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(JSON_HEDLEY_CLANG_HAS_WARNING) + #undef JSON_HEDLEY_CLANG_HAS_WARNING +#endif +#define JSON_HEDLEY_CLANG_HAS_WARNING(warning) JSON_HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < X) */ + + +// This file contains all internal macro definitions (except those affecting ABI) +// You MUST include macro_unscope.hpp at the end of json.hpp to undef all of them + +// #include + + +// exclude unsupported compilers +#if !defined(JSON_SKIP_UNSUPPORTED_COMPILER_CHECK) + #if defined(__clang__) + #if (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) < 30400 + #error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers" + #endif + #elif defined(__GNUC__) && !(defined(__ICC) || defined(__INTEL_COMPILER)) + #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40800 + #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers" + #endif + #endif +#endif + +// C++ language standard detection +// if the user manually specified the used c++ version this is skipped +#if !defined(JSON_HAS_CPP_20) && !defined(JSON_HAS_CPP_17) && !defined(JSON_HAS_CPP_14) && !defined(JSON_HAS_CPP_11) + #if (defined(__cplusplus) && __cplusplus >= 202002L) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) + #define JSON_HAS_CPP_20 + #define JSON_HAS_CPP_17 + #define JSON_HAS_CPP_14 + #elif (defined(__cplusplus) && __cplusplus >= 201703L) || (defined(_HAS_CXX17) && _HAS_CXX17 == 1) // fix for issue #464 + #define JSON_HAS_CPP_17 + #define JSON_HAS_CPP_14 + #elif (defined(__cplusplus) && __cplusplus >= 201402L) || (defined(_HAS_CXX14) && _HAS_CXX14 == 1) + #define JSON_HAS_CPP_14 + #endif + // the cpp 11 flag is always specified because it is the minimal required version + #define JSON_HAS_CPP_11 +#endif + +#ifdef __has_include + #if __has_include() + #include + #endif +#endif + +#if !defined(JSON_HAS_FILESYSTEM) && !defined(JSON_HAS_EXPERIMENTAL_FILESYSTEM) + #ifdef JSON_HAS_CPP_17 + #if defined(__cpp_lib_filesystem) + #define JSON_HAS_FILESYSTEM 1 + #elif defined(__cpp_lib_experimental_filesystem) + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 + #elif !defined(__has_include) + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 + #elif __has_include() + #define JSON_HAS_FILESYSTEM 1 + #elif __has_include() + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 + #endif + + // std::filesystem does not work on MinGW GCC 8: https://sourceforge.net/p/mingw-w64/bugs/737/ + #if defined(__MINGW32__) && defined(__GNUC__) && __GNUC__ == 8 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before GCC 8: https://en.cppreference.com/w/cpp/compiler_support + #if defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 8 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before Clang 7: https://en.cppreference.com/w/cpp/compiler_support + #if defined(__clang_major__) && __clang_major__ < 7 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before MSVC 19.14: https://en.cppreference.com/w/cpp/compiler_support + #if defined(_MSC_VER) && _MSC_VER < 1914 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before iOS 13 + #if defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && __IPHONE_OS_VERSION_MIN_REQUIRED < 130000 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before macOS Catalina + #if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED < 101500 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + #endif +#endif + +#ifndef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 0 +#endif + +#ifndef JSON_HAS_FILESYSTEM + #define JSON_HAS_FILESYSTEM 0 +#endif + +#ifndef JSON_HAS_THREE_WAY_COMPARISON + #if defined(__cpp_impl_three_way_comparison) && __cpp_impl_three_way_comparison >= 201907L \ + && defined(__cpp_lib_three_way_comparison) && __cpp_lib_three_way_comparison >= 201907L + #define JSON_HAS_THREE_WAY_COMPARISON 1 + #else + #define JSON_HAS_THREE_WAY_COMPARISON 0 + #endif +#endif + +#ifndef JSON_HAS_RANGES + // ranges header shipping in GCC 11.1.0 (released 2021-04-27) has syntax error + #if defined(__GLIBCXX__) && __GLIBCXX__ == 20210427 + #define JSON_HAS_RANGES 0 + #elif defined(__cpp_lib_ranges) + #define JSON_HAS_RANGES 1 + #else + #define JSON_HAS_RANGES 0 + #endif +#endif + +#ifdef JSON_HAS_CPP_17 + #define JSON_INLINE_VARIABLE inline +#else + #define JSON_INLINE_VARIABLE +#endif + +#if JSON_HEDLEY_HAS_ATTRIBUTE(no_unique_address) + #define JSON_NO_UNIQUE_ADDRESS [[no_unique_address]] +#else + #define JSON_NO_UNIQUE_ADDRESS +#endif + +// disable documentation warnings on clang +#if defined(__clang__) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wdocumentation" + #pragma clang diagnostic ignored "-Wdocumentation-unknown-command" +#endif + +// allow disabling exceptions +#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)) && !defined(JSON_NOEXCEPTION) + #define JSON_THROW(exception) throw exception + #define JSON_TRY try + #define JSON_CATCH(exception) catch(exception) + #define JSON_INTERNAL_CATCH(exception) catch(exception) +#else + #include + #define JSON_THROW(exception) std::abort() + #define JSON_TRY if(true) + #define JSON_CATCH(exception) if(false) + #define JSON_INTERNAL_CATCH(exception) if(false) +#endif + +// override exception macros +#if defined(JSON_THROW_USER) + #undef JSON_THROW + #define JSON_THROW JSON_THROW_USER +#endif +#if defined(JSON_TRY_USER) + #undef JSON_TRY + #define JSON_TRY JSON_TRY_USER +#endif +#if defined(JSON_CATCH_USER) + #undef JSON_CATCH + #define JSON_CATCH JSON_CATCH_USER + #undef JSON_INTERNAL_CATCH + #define JSON_INTERNAL_CATCH JSON_CATCH_USER +#endif +#if defined(JSON_INTERNAL_CATCH_USER) + #undef JSON_INTERNAL_CATCH + #define JSON_INTERNAL_CATCH JSON_INTERNAL_CATCH_USER +#endif + +// allow overriding assert +#if !defined(JSON_ASSERT) + #include // assert + #define JSON_ASSERT(x) assert(x) +#endif + +// allow to access some private functions (needed by the test suite) +#if defined(JSON_TESTS_PRIVATE) + #define JSON_PRIVATE_UNLESS_TESTED public +#else + #define JSON_PRIVATE_UNLESS_TESTED private +#endif + +/*! +@brief macro to briefly define a mapping between an enum and JSON +@def NLOHMANN_JSON_SERIALIZE_ENUM +@since version 3.4.0 +*/ +#define NLOHMANN_JSON_SERIALIZE_ENUM(ENUM_TYPE, ...) \ + template \ + inline void to_json(BasicJsonType& j, const ENUM_TYPE& e) \ + { \ + static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ + static const std::pair m[] = __VA_ARGS__; \ + auto it = std::find_if(std::begin(m), std::end(m), \ + [e](const std::pair& ej_pair) -> bool \ + { \ + return ej_pair.first == e; \ + }); \ + j = ((it != std::end(m)) ? it : std::begin(m))->second; \ + } \ + template \ + inline void from_json(const BasicJsonType& j, ENUM_TYPE& e) \ + { \ + static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ + static const std::pair m[] = __VA_ARGS__; \ + auto it = std::find_if(std::begin(m), std::end(m), \ + [&j](const std::pair& ej_pair) -> bool \ + { \ + return ej_pair.second == j; \ + }); \ + e = ((it != std::end(m)) ? it : std::begin(m))->first; \ + } + +// Ugly macros to avoid uglier copy-paste when specializing basic_json. They +// may be removed in the future once the class is split. + +#define NLOHMANN_BASIC_JSON_TPL_DECLARATION \ + template class ObjectType, \ + template class ArrayType, \ + class StringType, class BooleanType, class NumberIntegerType, \ + class NumberUnsignedType, class NumberFloatType, \ + template class AllocatorType, \ + template class JSONSerializer, \ + class BinaryType> + +#define NLOHMANN_BASIC_JSON_TPL \ + basic_json + +// Macros to simplify conversion from/to types + +#define NLOHMANN_JSON_EXPAND( x ) x +#define NLOHMANN_JSON_GET_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59, _60, _61, _62, _63, _64, NAME,...) NAME +#define NLOHMANN_JSON_PASTE(...) NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_GET_MACRO(__VA_ARGS__, \ + NLOHMANN_JSON_PASTE64, \ + NLOHMANN_JSON_PASTE63, \ + NLOHMANN_JSON_PASTE62, \ + NLOHMANN_JSON_PASTE61, \ + NLOHMANN_JSON_PASTE60, \ + NLOHMANN_JSON_PASTE59, \ + NLOHMANN_JSON_PASTE58, \ + NLOHMANN_JSON_PASTE57, \ + NLOHMANN_JSON_PASTE56, \ + NLOHMANN_JSON_PASTE55, \ + NLOHMANN_JSON_PASTE54, \ + NLOHMANN_JSON_PASTE53, \ + NLOHMANN_JSON_PASTE52, \ + NLOHMANN_JSON_PASTE51, \ + NLOHMANN_JSON_PASTE50, \ + NLOHMANN_JSON_PASTE49, \ + NLOHMANN_JSON_PASTE48, \ + NLOHMANN_JSON_PASTE47, \ + NLOHMANN_JSON_PASTE46, \ + NLOHMANN_JSON_PASTE45, \ + NLOHMANN_JSON_PASTE44, \ + NLOHMANN_JSON_PASTE43, \ + NLOHMANN_JSON_PASTE42, \ + NLOHMANN_JSON_PASTE41, \ + NLOHMANN_JSON_PASTE40, \ + NLOHMANN_JSON_PASTE39, \ + NLOHMANN_JSON_PASTE38, \ + NLOHMANN_JSON_PASTE37, \ + NLOHMANN_JSON_PASTE36, \ + NLOHMANN_JSON_PASTE35, \ + NLOHMANN_JSON_PASTE34, \ + NLOHMANN_JSON_PASTE33, \ + NLOHMANN_JSON_PASTE32, \ + NLOHMANN_JSON_PASTE31, \ + NLOHMANN_JSON_PASTE30, \ + NLOHMANN_JSON_PASTE29, \ + NLOHMANN_JSON_PASTE28, \ + NLOHMANN_JSON_PASTE27, \ + NLOHMANN_JSON_PASTE26, \ + NLOHMANN_JSON_PASTE25, \ + NLOHMANN_JSON_PASTE24, \ + NLOHMANN_JSON_PASTE23, \ + NLOHMANN_JSON_PASTE22, \ + NLOHMANN_JSON_PASTE21, \ + NLOHMANN_JSON_PASTE20, \ + NLOHMANN_JSON_PASTE19, \ + NLOHMANN_JSON_PASTE18, \ + NLOHMANN_JSON_PASTE17, \ + NLOHMANN_JSON_PASTE16, \ + NLOHMANN_JSON_PASTE15, \ + NLOHMANN_JSON_PASTE14, \ + NLOHMANN_JSON_PASTE13, \ + NLOHMANN_JSON_PASTE12, \ + NLOHMANN_JSON_PASTE11, \ + NLOHMANN_JSON_PASTE10, \ + NLOHMANN_JSON_PASTE9, \ + NLOHMANN_JSON_PASTE8, \ + NLOHMANN_JSON_PASTE7, \ + NLOHMANN_JSON_PASTE6, \ + NLOHMANN_JSON_PASTE5, \ + NLOHMANN_JSON_PASTE4, \ + NLOHMANN_JSON_PASTE3, \ + NLOHMANN_JSON_PASTE2, \ + NLOHMANN_JSON_PASTE1)(__VA_ARGS__)) +#define NLOHMANN_JSON_PASTE2(func, v1) func(v1) +#define NLOHMANN_JSON_PASTE3(func, v1, v2) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE2(func, v2) +#define NLOHMANN_JSON_PASTE4(func, v1, v2, v3) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE3(func, v2, v3) +#define NLOHMANN_JSON_PASTE5(func, v1, v2, v3, v4) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE4(func, v2, v3, v4) +#define NLOHMANN_JSON_PASTE6(func, v1, v2, v3, v4, v5) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE5(func, v2, v3, v4, v5) +#define NLOHMANN_JSON_PASTE7(func, v1, v2, v3, v4, v5, v6) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE6(func, v2, v3, v4, v5, v6) +#define NLOHMANN_JSON_PASTE8(func, v1, v2, v3, v4, v5, v6, v7) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE7(func, v2, v3, v4, v5, v6, v7) +#define NLOHMANN_JSON_PASTE9(func, v1, v2, v3, v4, v5, v6, v7, v8) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE8(func, v2, v3, v4, v5, v6, v7, v8) +#define NLOHMANN_JSON_PASTE10(func, v1, v2, v3, v4, v5, v6, v7, v8, v9) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE9(func, v2, v3, v4, v5, v6, v7, v8, v9) +#define NLOHMANN_JSON_PASTE11(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE10(func, v2, v3, v4, v5, v6, v7, v8, v9, v10) +#define NLOHMANN_JSON_PASTE12(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE11(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11) +#define NLOHMANN_JSON_PASTE13(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE12(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12) +#define NLOHMANN_JSON_PASTE14(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE13(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) +#define NLOHMANN_JSON_PASTE15(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE14(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) +#define NLOHMANN_JSON_PASTE16(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE15(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) +#define NLOHMANN_JSON_PASTE17(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE16(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) +#define NLOHMANN_JSON_PASTE18(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE17(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17) +#define NLOHMANN_JSON_PASTE19(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE18(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18) +#define NLOHMANN_JSON_PASTE20(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE19(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19) +#define NLOHMANN_JSON_PASTE21(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE20(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) +#define NLOHMANN_JSON_PASTE22(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE21(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21) +#define NLOHMANN_JSON_PASTE23(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE22(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22) +#define NLOHMANN_JSON_PASTE24(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE23(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23) +#define NLOHMANN_JSON_PASTE25(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE24(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24) +#define NLOHMANN_JSON_PASTE26(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE25(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) +#define NLOHMANN_JSON_PASTE27(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE26(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26) +#define NLOHMANN_JSON_PASTE28(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE27(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27) +#define NLOHMANN_JSON_PASTE29(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE28(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) +#define NLOHMANN_JSON_PASTE30(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE29(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29) +#define NLOHMANN_JSON_PASTE31(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE30(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30) +#define NLOHMANN_JSON_PASTE32(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE31(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31) +#define NLOHMANN_JSON_PASTE33(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE32(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32) +#define NLOHMANN_JSON_PASTE34(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE33(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33) +#define NLOHMANN_JSON_PASTE35(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE34(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34) +#define NLOHMANN_JSON_PASTE36(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE35(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35) +#define NLOHMANN_JSON_PASTE37(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE36(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36) +#define NLOHMANN_JSON_PASTE38(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE37(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37) +#define NLOHMANN_JSON_PASTE39(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE38(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38) +#define NLOHMANN_JSON_PASTE40(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE39(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39) +#define NLOHMANN_JSON_PASTE41(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE40(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40) +#define NLOHMANN_JSON_PASTE42(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE41(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) +#define NLOHMANN_JSON_PASTE43(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE42(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42) +#define NLOHMANN_JSON_PASTE44(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE43(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43) +#define NLOHMANN_JSON_PASTE45(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE44(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44) +#define NLOHMANN_JSON_PASTE46(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE45(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45) +#define NLOHMANN_JSON_PASTE47(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE46(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46) +#define NLOHMANN_JSON_PASTE48(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE47(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47) +#define NLOHMANN_JSON_PASTE49(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE48(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48) +#define NLOHMANN_JSON_PASTE50(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE49(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49) +#define NLOHMANN_JSON_PASTE51(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE50(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50) +#define NLOHMANN_JSON_PASTE52(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE51(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51) +#define NLOHMANN_JSON_PASTE53(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE52(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) +#define NLOHMANN_JSON_PASTE54(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE53(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53) +#define NLOHMANN_JSON_PASTE55(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE54(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54) +#define NLOHMANN_JSON_PASTE56(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE55(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55) +#define NLOHMANN_JSON_PASTE57(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE56(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56) +#define NLOHMANN_JSON_PASTE58(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE57(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57) +#define NLOHMANN_JSON_PASTE59(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE58(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58) +#define NLOHMANN_JSON_PASTE60(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE59(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59) +#define NLOHMANN_JSON_PASTE61(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE60(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60) +#define NLOHMANN_JSON_PASTE62(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE61(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61) +#define NLOHMANN_JSON_PASTE63(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE62(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62) +#define NLOHMANN_JSON_PASTE64(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE63(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63) + +#define NLOHMANN_JSON_TO(v1) nlohmann_json_j[#v1] = nlohmann_json_t.v1; +#define NLOHMANN_JSON_FROM(v1) nlohmann_json_j.at(#v1).get_to(nlohmann_json_t.v1); +#define NLOHMANN_JSON_FROM_WITH_DEFAULT(v1) nlohmann_json_t.v1 = nlohmann_json_j.value(#v1, nlohmann_json_default_obj.v1); + +/*! +@brief macro +@def NLOHMANN_DEFINE_TYPE_INTRUSIVE +@since version 3.9.0 +*/ +#define NLOHMANN_DEFINE_TYPE_INTRUSIVE(Type, ...) \ + friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } + +#define NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Type, ...) \ + friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { Type nlohmann_json_default_obj; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } + +/*! +@brief macro +@def NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE +@since version 3.9.0 +*/ +#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Type, ...) \ + inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } + +#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(Type, ...) \ + inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { Type nlohmann_json_default_obj; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } + + +// inspired from https://stackoverflow.com/a/26745591 +// allows to call any std function as if (e.g. with begin): +// using std::begin; begin(x); +// +// it allows using the detected idiom to retrieve the return type +// of such an expression +#define NLOHMANN_CAN_CALL_STD_FUNC_IMPL(std_name) \ + namespace detail { \ + using std::std_name; \ + \ + template \ + using result_of_##std_name = decltype(std_name(std::declval()...)); \ + } \ + \ + namespace detail2 { \ + struct std_name##_tag \ + { \ + }; \ + \ + template \ + std_name##_tag std_name(T&&...); \ + \ + template \ + using result_of_##std_name = decltype(std_name(std::declval()...)); \ + \ + template \ + struct would_call_std_##std_name \ + { \ + static constexpr auto const value = ::nlohmann::detail:: \ + is_detected_exact::value; \ + }; \ + } /* namespace detail2 */ \ + \ + template \ + struct would_call_std_##std_name : detail2::would_call_std_##std_name \ + { \ + } + +#ifndef JSON_USE_IMPLICIT_CONVERSIONS + #define JSON_USE_IMPLICIT_CONVERSIONS 1 +#endif + +#if JSON_USE_IMPLICIT_CONVERSIONS + #define JSON_EXPLICIT +#else + #define JSON_EXPLICIT explicit +#endif + +#ifndef JSON_DISABLE_ENUM_SERIALIZATION + #define JSON_DISABLE_ENUM_SERIALIZATION 0 +#endif + +#ifndef JSON_USE_GLOBAL_UDLS + #define JSON_USE_GLOBAL_UDLS 1 +#endif + +#if JSON_HAS_THREE_WAY_COMPARISON + #include // partial_ordering +#endif + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/////////////////////////// +// JSON type enumeration // +/////////////////////////// + +/*! +@brief the JSON type enumeration + +This enumeration collects the different JSON types. It is internally used to +distinguish the stored values, and the functions @ref basic_json::is_null(), +@ref basic_json::is_object(), @ref basic_json::is_array(), +@ref basic_json::is_string(), @ref basic_json::is_boolean(), +@ref basic_json::is_number() (with @ref basic_json::is_number_integer(), +@ref basic_json::is_number_unsigned(), and @ref basic_json::is_number_float()), +@ref basic_json::is_discarded(), @ref basic_json::is_primitive(), and +@ref basic_json::is_structured() rely on it. + +@note There are three enumeration entries (number_integer, number_unsigned, and +number_float), because the library distinguishes these three types for numbers: +@ref basic_json::number_unsigned_t is used for unsigned integers, +@ref basic_json::number_integer_t is used for signed integers, and +@ref basic_json::number_float_t is used for floating-point numbers or to +approximate integers which do not fit in the limits of their respective type. + +@sa see @ref basic_json::basic_json(const value_t value_type) -- create a JSON +value with the default value for a given type + +@since version 1.0.0 +*/ +enum class value_t : std::uint8_t +{ + null, ///< null value + object, ///< object (unordered set of name/value pairs) + array, ///< array (ordered collection of values) + string, ///< string value + boolean, ///< boolean value + number_integer, ///< number value (signed integer) + number_unsigned, ///< number value (unsigned integer) + number_float, ///< number value (floating-point) + binary, ///< binary array (ordered collection of bytes) + discarded ///< discarded by the parser callback function +}; + +/*! +@brief comparison operator for JSON types + +Returns an ordering that is similar to Python: +- order: null < boolean < number < object < array < string < binary +- furthermore, each type is not smaller than itself +- discarded values are not comparable +- binary is represented as a b"" string in python and directly comparable to a + string; however, making a binary array directly comparable with a string would + be surprising behavior in a JSON file. + +@since version 1.0.0 +*/ +#if JSON_HAS_THREE_WAY_COMPARISON + inline std::partial_ordering operator<=>(const value_t lhs, const value_t rhs) noexcept // *NOPAD* +#else + inline bool operator<(const value_t lhs, const value_t rhs) noexcept +#endif +{ + static constexpr std::array order = {{ + 0 /* null */, 3 /* object */, 4 /* array */, 5 /* string */, + 1 /* boolean */, 2 /* integer */, 2 /* unsigned */, 2 /* float */, + 6 /* binary */ + } + }; + + const auto l_index = static_cast(lhs); + const auto r_index = static_cast(rhs); +#if JSON_HAS_THREE_WAY_COMPARISON + if (l_index < order.size() && r_index < order.size()) + { + return order[l_index] <=> order[r_index]; // *NOPAD* + } + return std::partial_ordering::unordered; +#else + return l_index < order.size() && r_index < order.size() && order[l_index] < order[r_index]; +#endif +} + +// GCC selects the built-in operator< over an operator rewritten from +// a user-defined spaceship operator +// Clang, MSVC, and ICC select the rewritten candidate +// (see GCC bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105200) +#if JSON_HAS_THREE_WAY_COMPARISON && defined(__GNUC__) +inline bool operator<(const value_t lhs, const value_t rhs) noexcept +{ + return std::is_lt(lhs <=> rhs); // *NOPAD* +} +#endif + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/*! +@brief replace all occurrences of a substring by another string + +@param[in,out] s the string to manipulate; changed so that all + occurrences of @a f are replaced with @a t +@param[in] f the substring to replace with @a t +@param[in] t the string to replace @a f + +@pre The search string @a f must not be empty. **This precondition is +enforced with an assertion.** + +@since version 2.0.0 +*/ +template +inline void replace_substring(StringType& s, const StringType& f, + const StringType& t) +{ + JSON_ASSERT(!f.empty()); + for (auto pos = s.find(f); // find first occurrence of f + pos != StringType::npos; // make sure f was found + s.replace(pos, f.size(), t), // replace with t, and + pos = s.find(f, pos + t.size())) // find next occurrence of f + {} +} + +/*! + * @brief string escaping as described in RFC 6901 (Sect. 4) + * @param[in] s string to escape + * @return escaped string + * + * Note the order of escaping "~" to "~0" and "/" to "~1" is important. + */ +template +inline StringType escape(StringType s) +{ + replace_substring(s, StringType{"~"}, StringType{"~0"}); + replace_substring(s, StringType{"/"}, StringType{"~1"}); + return s; +} + +/*! + * @brief string unescaping as described in RFC 6901 (Sect. 4) + * @param[in] s string to unescape + * @return unescaped string + * + * Note the order of escaping "~1" to "/" and "~0" to "~" is important. + */ +template +static void unescape(StringType& s) +{ + replace_substring(s, StringType{"~1"}, StringType{"/"}); + replace_substring(s, StringType{"~0"}, StringType{"~"}); +} + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // size_t + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/// struct to capture the start position of the current token +struct position_t +{ + /// the total number of characters read + std::size_t chars_read_total = 0; + /// the number of characters read in the current line + std::size_t chars_read_current_line = 0; + /// the number of lines read + std::size_t lines_read = 0; + + /// conversion to size_t to preserve SAX interface + constexpr operator size_t() const + { + return chars_read_total; + } +}; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-FileCopyrightText: 2018 The Abseil Authors +// SPDX-License-Identifier: MIT + + + +#include // array +#include // size_t +#include // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type +#include // index_sequence, make_index_sequence, index_sequence_for + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +template +using uncvref_t = typename std::remove_cv::type>::type; + +#ifdef JSON_HAS_CPP_14 + +// the following utilities are natively available in C++14 +using std::enable_if_t; +using std::index_sequence; +using std::make_index_sequence; +using std::index_sequence_for; + +#else + +// alias templates to reduce boilerplate +template +using enable_if_t = typename std::enable_if::type; + +// The following code is taken from https://github.com/abseil/abseil-cpp/blob/10cb35e459f5ecca5b2ff107635da0bfa41011b4/absl/utility/utility.h +// which is part of Google Abseil (https://github.com/abseil/abseil-cpp), licensed under the Apache License 2.0. + +//// START OF CODE FROM GOOGLE ABSEIL + +// integer_sequence +// +// Class template representing a compile-time integer sequence. An instantiation +// of `integer_sequence` has a sequence of integers encoded in its +// type through its template arguments (which is a common need when +// working with C++11 variadic templates). `absl::integer_sequence` is designed +// to be a drop-in replacement for C++14's `std::integer_sequence`. +// +// Example: +// +// template< class T, T... Ints > +// void user_function(integer_sequence); +// +// int main() +// { +// // user_function's `T` will be deduced to `int` and `Ints...` +// // will be deduced to `0, 1, 2, 3, 4`. +// user_function(make_integer_sequence()); +// } +template +struct integer_sequence +{ + using value_type = T; + static constexpr std::size_t size() noexcept + { + return sizeof...(Ints); + } +}; + +// index_sequence +// +// A helper template for an `integer_sequence` of `size_t`, +// `absl::index_sequence` is designed to be a drop-in replacement for C++14's +// `std::index_sequence`. +template +using index_sequence = integer_sequence; + +namespace utility_internal +{ + +template +struct Extend; + +// Note that SeqSize == sizeof...(Ints). It's passed explicitly for efficiency. +template +struct Extend, SeqSize, 0> +{ + using type = integer_sequence < T, Ints..., (Ints + SeqSize)... >; +}; + +template +struct Extend, SeqSize, 1> +{ + using type = integer_sequence < T, Ints..., (Ints + SeqSize)..., 2 * SeqSize >; +}; + +// Recursion helper for 'make_integer_sequence'. +// 'Gen::type' is an alias for 'integer_sequence'. +template +struct Gen +{ + using type = + typename Extend < typename Gen < T, N / 2 >::type, N / 2, N % 2 >::type; +}; + +template +struct Gen +{ + using type = integer_sequence; +}; + +} // namespace utility_internal + +// Compile-time sequences of integers + +// make_integer_sequence +// +// This template alias is equivalent to +// `integer_sequence`, and is designed to be a drop-in +// replacement for C++14's `std::make_integer_sequence`. +template +using make_integer_sequence = typename utility_internal::Gen::type; + +// make_index_sequence +// +// This template alias is equivalent to `index_sequence<0, 1, ..., N-1>`, +// and is designed to be a drop-in replacement for C++14's +// `std::make_index_sequence`. +template +using make_index_sequence = make_integer_sequence; + +// index_sequence_for +// +// Converts a typename pack into an index sequence of the same length, and +// is designed to be a drop-in replacement for C++14's +// `std::index_sequence_for()` +template +using index_sequence_for = make_index_sequence; + +//// END OF CODE FROM GOOGLE ABSEIL + +#endif + +// dispatch utility (taken from ranges-v3) +template struct priority_tag : priority_tag < N - 1 > {}; +template<> struct priority_tag<0> {}; + +// taken from ranges-v3 +template +struct static_const +{ + static JSON_INLINE_VARIABLE constexpr T value{}; +}; + +#ifndef JSON_HAS_CPP_17 + template + constexpr T static_const::value; +#endif + +template +inline constexpr std::array make_array(Args&& ... args) +{ + return std::array {{static_cast(std::forward(args))...}}; +} + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // numeric_limits +#include // false_type, is_constructible, is_integral, is_same, true_type +#include // declval +#include // tuple + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // random_access_iterator_tag + +// #include + +// #include + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +template +struct iterator_types {}; + +template +struct iterator_types < + It, + void_t> +{ + using difference_type = typename It::difference_type; + using value_type = typename It::value_type; + using pointer = typename It::pointer; + using reference = typename It::reference; + using iterator_category = typename It::iterator_category; +}; + +// This is required as some compilers implement std::iterator_traits in a way that +// doesn't work with SFINAE. See https://github.com/nlohmann/json/issues/1341. +template +struct iterator_traits +{ +}; + +template +struct iterator_traits < T, enable_if_t < !std::is_pointer::value >> + : iterator_types +{ +}; + +template +struct iterator_traits::value>> +{ + using iterator_category = std::random_access_iterator_tag; + using value_type = T; + using difference_type = ptrdiff_t; + using pointer = T*; + using reference = T&; +}; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN + +NLOHMANN_CAN_CALL_STD_FUNC_IMPL(begin); + +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN + +NLOHMANN_CAN_CALL_STD_FUNC_IMPL(end); + +NLOHMANN_JSON_NAMESPACE_END + +// #include + +// #include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + +#ifndef INCLUDE_NLOHMANN_JSON_FWD_HPP_ + #define INCLUDE_NLOHMANN_JSON_FWD_HPP_ + + #include // int64_t, uint64_t + #include // map + #include // allocator + #include // string + #include // vector + + // #include + + + /*! + @brief namespace for Niels Lohmann + @see https://github.com/nlohmann + @since version 1.0.0 + */ + NLOHMANN_JSON_NAMESPACE_BEGIN + + /*! + @brief default JSONSerializer template argument + + This serializer ignores the template arguments and uses ADL + ([argument-dependent lookup](https://en.cppreference.com/w/cpp/language/adl)) + for serialization. + */ + template + struct adl_serializer; + + /// a class to store JSON values + /// @sa https://json.nlohmann.me/api/basic_json/ + template class ObjectType = + std::map, + template class ArrayType = std::vector, + class StringType = std::string, class BooleanType = bool, + class NumberIntegerType = std::int64_t, + class NumberUnsignedType = std::uint64_t, + class NumberFloatType = double, + template class AllocatorType = std::allocator, + template class JSONSerializer = + adl_serializer, + class BinaryType = std::vector> + class basic_json; + + /// @brief JSON Pointer defines a string syntax for identifying a specific value within a JSON document + /// @sa https://json.nlohmann.me/api/json_pointer/ + template + class json_pointer; + + /*! + @brief default specialization + @sa https://json.nlohmann.me/api/json/ + */ + using json = basic_json<>; + + /// @brief a minimal map-like container that preserves insertion order + /// @sa https://json.nlohmann.me/api/ordered_map/ + template + struct ordered_map; + + /// @brief specialization that maintains the insertion order of object keys + /// @sa https://json.nlohmann.me/api/ordered_json/ + using ordered_json = basic_json; + + NLOHMANN_JSON_NAMESPACE_END + +#endif // INCLUDE_NLOHMANN_JSON_FWD_HPP_ + + +NLOHMANN_JSON_NAMESPACE_BEGIN +/*! +@brief detail namespace with internal helper functions + +This namespace collects functions that should not be exposed, +implementations of some @ref basic_json methods, and meta-programming helpers. + +@since version 2.1.0 +*/ +namespace detail +{ + +///////////// +// helpers // +///////////// + +// Note to maintainers: +// +// Every trait in this file expects a non CV-qualified type. +// The only exceptions are in the 'aliases for detected' section +// (i.e. those of the form: decltype(T::member_function(std::declval()))) +// +// In this case, T has to be properly CV-qualified to constraint the function arguments +// (e.g. to_json(BasicJsonType&, const T&)) + +template struct is_basic_json : std::false_type {}; + +NLOHMANN_BASIC_JSON_TPL_DECLARATION +struct is_basic_json : std::true_type {}; + +// used by exceptions create() member functions +// true_type for pointer to possibly cv-qualified basic_json or std::nullptr_t +// false_type otherwise +template +struct is_basic_json_context : + std::integral_constant < bool, + is_basic_json::type>::type>::value + || std::is_same::value > +{}; + +////////////////////// +// json_ref helpers // +////////////////////// + +template +class json_ref; + +template +struct is_json_ref : std::false_type {}; + +template +struct is_json_ref> : std::true_type {}; + +////////////////////////// +// aliases for detected // +////////////////////////// + +template +using mapped_type_t = typename T::mapped_type; + +template +using key_type_t = typename T::key_type; + +template +using value_type_t = typename T::value_type; + +template +using difference_type_t = typename T::difference_type; + +template +using pointer_t = typename T::pointer; + +template +using reference_t = typename T::reference; + +template +using iterator_category_t = typename T::iterator_category; + +template +using to_json_function = decltype(T::to_json(std::declval()...)); + +template +using from_json_function = decltype(T::from_json(std::declval()...)); + +template +using get_template_function = decltype(std::declval().template get()); + +// trait checking if JSONSerializer::from_json(json const&, udt&) exists +template +struct has_from_json : std::false_type {}; + +// trait checking if j.get is valid +// use this trait instead of std::is_constructible or std::is_convertible, +// both rely on, or make use of implicit conversions, and thus fail when T +// has several constructors/operator= (see https://github.com/nlohmann/json/issues/958) +template +struct is_getable +{ + static constexpr bool value = is_detected::value; +}; + +template +struct has_from_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> +{ + using serializer = typename BasicJsonType::template json_serializer; + + static constexpr bool value = + is_detected_exact::value; +}; + +// This trait checks if JSONSerializer::from_json(json const&) exists +// this overload is used for non-default-constructible user-defined-types +template +struct has_non_default_from_json : std::false_type {}; + +template +struct has_non_default_from_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> +{ + using serializer = typename BasicJsonType::template json_serializer; + + static constexpr bool value = + is_detected_exact::value; +}; + +// This trait checks if BasicJsonType::json_serializer::to_json exists +// Do not evaluate the trait when T is a basic_json type, to avoid template instantiation infinite recursion. +template +struct has_to_json : std::false_type {}; + +template +struct has_to_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> +{ + using serializer = typename BasicJsonType::template json_serializer; + + static constexpr bool value = + is_detected_exact::value; +}; + +template +using detect_key_compare = typename T::key_compare; + +template +struct has_key_compare : std::integral_constant::value> {}; + +// obtains the actual object key comparator +template +struct actual_object_comparator +{ + using object_t = typename BasicJsonType::object_t; + using object_comparator_t = typename BasicJsonType::default_object_comparator_t; + using type = typename std::conditional < has_key_compare::value, + typename object_t::key_compare, object_comparator_t>::type; +}; + +template +using actual_object_comparator_t = typename actual_object_comparator::type; + +/////////////////// +// is_ functions // +/////////////////// + +// https://en.cppreference.com/w/cpp/types/conjunction +template struct conjunction : std::true_type { }; +template struct conjunction : B { }; +template +struct conjunction +: std::conditional(B::value), conjunction, B>::type {}; + +// https://en.cppreference.com/w/cpp/types/negation +template struct negation : std::integral_constant < bool, !B::value > { }; + +// Reimplementation of is_constructible and is_default_constructible, due to them being broken for +// std::pair and std::tuple until LWG 2367 fix (see https://cplusplus.github.io/LWG/lwg-defects.html#2367). +// This causes compile errors in e.g. clang 3.5 or gcc 4.9. +template +struct is_default_constructible : std::is_default_constructible {}; + +template +struct is_default_constructible> + : conjunction, is_default_constructible> {}; + +template +struct is_default_constructible> + : conjunction, is_default_constructible> {}; + +template +struct is_default_constructible> + : conjunction...> {}; + +template +struct is_default_constructible> + : conjunction...> {}; + + +template +struct is_constructible : std::is_constructible {}; + +template +struct is_constructible> : is_default_constructible> {}; + +template +struct is_constructible> : is_default_constructible> {}; + +template +struct is_constructible> : is_default_constructible> {}; + +template +struct is_constructible> : is_default_constructible> {}; + + +template +struct is_iterator_traits : std::false_type {}; + +template +struct is_iterator_traits> +{ + private: + using traits = iterator_traits; + + public: + static constexpr auto value = + is_detected::value && + is_detected::value && + is_detected::value && + is_detected::value && + is_detected::value; +}; + +template +struct is_range +{ + private: + using t_ref = typename std::add_lvalue_reference::type; + + using iterator = detected_t; + using sentinel = detected_t; + + // to be 100% correct, it should use https://en.cppreference.com/w/cpp/iterator/input_or_output_iterator + // and https://en.cppreference.com/w/cpp/iterator/sentinel_for + // but reimplementing these would be too much work, as a lot of other concepts are used underneath + static constexpr auto is_iterator_begin = + is_iterator_traits>::value; + + public: + static constexpr bool value = !std::is_same::value && !std::is_same::value && is_iterator_begin; +}; + +template +using iterator_t = enable_if_t::value, result_of_begin())>>; + +template +using range_value_t = value_type_t>>; + +// The following implementation of is_complete_type is taken from +// https://blogs.msdn.microsoft.com/vcblog/2015/12/02/partial-support-for-expression-sfinae-in-vs-2015-update-1/ +// and is written by Xiang Fan who agreed to using it in this library. + +template +struct is_complete_type : std::false_type {}; + +template +struct is_complete_type : std::true_type {}; + +template +struct is_compatible_object_type_impl : std::false_type {}; + +template +struct is_compatible_object_type_impl < + BasicJsonType, CompatibleObjectType, + enable_if_t < is_detected::value&& + is_detected::value >> +{ + using object_t = typename BasicJsonType::object_t; + + // macOS's is_constructible does not play well with nonesuch... + static constexpr bool value = + is_constructible::value && + is_constructible::value; +}; + +template +struct is_compatible_object_type + : is_compatible_object_type_impl {}; + +template +struct is_constructible_object_type_impl : std::false_type {}; + +template +struct is_constructible_object_type_impl < + BasicJsonType, ConstructibleObjectType, + enable_if_t < is_detected::value&& + is_detected::value >> +{ + using object_t = typename BasicJsonType::object_t; + + static constexpr bool value = + (is_default_constructible::value && + (std::is_move_assignable::value || + std::is_copy_assignable::value) && + (is_constructible::value && + std::is_same < + typename object_t::mapped_type, + typename ConstructibleObjectType::mapped_type >::value)) || + (has_from_json::value || + has_non_default_from_json < + BasicJsonType, + typename ConstructibleObjectType::mapped_type >::value); +}; + +template +struct is_constructible_object_type + : is_constructible_object_type_impl {}; + +template +struct is_compatible_string_type +{ + static constexpr auto value = + is_constructible::value; +}; + +template +struct is_constructible_string_type +{ + // launder type through decltype() to fix compilation failure on ICPC +#ifdef __INTEL_COMPILER + using laundered_type = decltype(std::declval()); +#else + using laundered_type = ConstructibleStringType; +#endif + + static constexpr auto value = + conjunction < + is_constructible, + is_detected_exact>::value; +}; + +template +struct is_compatible_array_type_impl : std::false_type {}; + +template +struct is_compatible_array_type_impl < + BasicJsonType, CompatibleArrayType, + enable_if_t < + is_detected::value&& + is_iterator_traits>>::value&& +// special case for types like std::filesystem::path whose iterator's value_type are themselves +// c.f. https://github.com/nlohmann/json/pull/3073 + !std::is_same>::value >> +{ + static constexpr bool value = + is_constructible>::value; +}; + +template +struct is_compatible_array_type + : is_compatible_array_type_impl {}; + +template +struct is_constructible_array_type_impl : std::false_type {}; + +template +struct is_constructible_array_type_impl < + BasicJsonType, ConstructibleArrayType, + enable_if_t::value >> + : std::true_type {}; + +template +struct is_constructible_array_type_impl < + BasicJsonType, ConstructibleArrayType, + enable_if_t < !std::is_same::value&& + !is_compatible_string_type::value&& + is_default_constructible::value&& +(std::is_move_assignable::value || + std::is_copy_assignable::value)&& +is_detected::value&& +is_iterator_traits>>::value&& +is_detected::value&& +// special case for types like std::filesystem::path whose iterator's value_type are themselves +// c.f. https://github.com/nlohmann/json/pull/3073 +!std::is_same>::value&& + is_complete_type < + detected_t>::value >> +{ + using value_type = range_value_t; + + static constexpr bool value = + std::is_same::value || + has_from_json::value || + has_non_default_from_json < + BasicJsonType, + value_type >::value; +}; + +template +struct is_constructible_array_type + : is_constructible_array_type_impl {}; + +template +struct is_compatible_integer_type_impl : std::false_type {}; + +template +struct is_compatible_integer_type_impl < + RealIntegerType, CompatibleNumberIntegerType, + enable_if_t < std::is_integral::value&& + std::is_integral::value&& + !std::is_same::value >> +{ + // is there an assert somewhere on overflows? + using RealLimits = std::numeric_limits; + using CompatibleLimits = std::numeric_limits; + + static constexpr auto value = + is_constructible::value && + CompatibleLimits::is_integer && + RealLimits::is_signed == CompatibleLimits::is_signed; +}; + +template +struct is_compatible_integer_type + : is_compatible_integer_type_impl {}; + +template +struct is_compatible_type_impl: std::false_type {}; + +template +struct is_compatible_type_impl < + BasicJsonType, CompatibleType, + enable_if_t::value >> +{ + static constexpr bool value = + has_to_json::value; +}; + +template +struct is_compatible_type + : is_compatible_type_impl {}; + +template +struct is_constructible_tuple : std::false_type {}; + +template +struct is_constructible_tuple> : conjunction...> {}; + +template +struct is_json_iterator_of : std::false_type {}; + +template +struct is_json_iterator_of : std::true_type {}; + +template +struct is_json_iterator_of : std::true_type +{}; + +// checks if a given type T is a template specialization of Primary +template