#ifndef __MODEL_CONFIG_HPP_ #define __MODEL_CONFIG_HPP_ #include "nlohmann/json.hpp" #include #include #include using DimSize = size_t; using URL = std::string; using ModelName = std::string; // We must assure this can be load by config.json class ModelConfig { public: DimSize hidden_size; DimSize intermediate_size; size_t max_position_embeddings; std::string model_type; size_t num_attention_heads; size_t num_hidden_layers; size_t num_key_value_heads; size_t vocab_size; NLOHMANN_DEFINE_TYPE_INTRUSIVE(ModelConfig, hidden_size, intermediate_size, max_position_embeddings, model_type, num_attention_heads, num_hidden_layers, num_key_value_heads, vocab_size); void load_from(std::filesystem::path path) { std::cout << "Load from " << path << std::endl; std::ifstream i(path); nlohmann::json j; i >> j; *this = j.get(); } }; using QuantType = std::string; static const QuantType NoQuantType = ""; class QuantConfig { public: QuantType name; // For GEMV QuantType type_of_dot_vector = NoQuantType; inline bool can_be_used_as_matrix() { return type_of_dot_vector != NoQuantType; } bool can_be_used_as_vector; double bytes_per_element; bool has_scale; bool has_min; size_t block_element_count; size_t block_element_size; URL reference = ""; NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(QuantConfig, name, type_of_dot_vector, can_be_used_as_vector, bytes_per_element, has_scale, has_min, block_element_count, block_element_size, reference); }; inline std::map quant_configs; inline std::map model_configs; inline void load_quant_configs(std::filesystem::path path) { nlohmann::json j; if (std::filesystem::exists(path)) { std::cout << __FUNCTION__ << " from " << path << std::endl; std::ifstream i(path); i >> j; quant_configs = j.get>(); std::cout << "Loaded Quant Configs" << std::endl; for (auto &[k, v] : quant_configs) { std::cout << " - " << k << std::endl; } } else { std::cout << __FUNCTION__ << " no file at " << path << std::endl; } } inline void dump_quant_configs(std::filesystem::path path) { std::ofstream o(path); nlohmann::json j = quant_configs; o << j.dump(4); } inline void load_model_configs(std::filesystem::path path) { nlohmann::json j; if (std::filesystem::exists(path)) { std::cout << __FUNCTION__ << " from " << path << std::endl; std::ifstream i(path); i >> j; model_configs = j.get>(); std::cout << "Loaded Model Configs" << std::endl; for (auto &[k, v] : model_configs) { std::cout << " - " << k << std::endl; } } else { std::cout << __FUNCTION__ << " no file at " << path << std::endl; } } inline void dump_model_configs(std::filesystem::path path) { std::ofstream o(path); nlohmann::json j = model_configs; o << j.dump(4); } #endif