diff --git a/otherarch/sdcpp/model.cpp b/otherarch/sdcpp/model.cpp index 80fdd78cf..eb2d36431 100644 --- a/otherarch/sdcpp/model.cpp +++ b/otherarch/sdcpp/model.cpp @@ -615,6 +615,48 @@ uint16_t f8_e4m3_to_f16(uint8_t f8) { return ggml_fp32_to_fp16(*reinterpret_cast(&result)); } + +uint16_t f8_e5m2_to_f16(uint8_t fp8) { + uint8_t sign = (fp8 >> 7) & 0x1; + uint8_t exponent = (fp8 >> 2) & 0x1F; + uint8_t mantissa = fp8 & 0x3; + + uint16_t fp16_sign = sign << 15; + uint16_t fp16_exponent; + uint16_t fp16_mantissa; + + if (exponent == 0 && mantissa == 0) { //zero + return fp16_sign; + } + + if (exponent == 0x1F) { //NAN and INF + fp16_exponent = 0x1F; + fp16_mantissa = mantissa ? (mantissa << 8) : 0; + return fp16_sign | (fp16_exponent << 10) | fp16_mantissa; + } + + if (exponent == 0) { //subnormal numbers + fp16_exponent = 0; + fp16_mantissa = (mantissa << 8); + return fp16_sign | fp16_mantissa; + } + + //normal numbers + int16_t true_exponent = (int16_t)exponent - 15 + 15; + if (true_exponent <= 0) { + fp16_exponent = 0; + fp16_mantissa = (mantissa << 8); + } else if (true_exponent >= 0x1F) { + fp16_exponent = 0x1F; + fp16_mantissa = 0; + } else { + fp16_exponent = (uint16_t)true_exponent; + fp16_mantissa = mantissa << 8; + } + + return fp16_sign | (fp16_exponent << 10) | fp16_mantissa; +} + void bf16_to_f32_vec(uint16_t* src, float* dst, int64_t n) { // support inplace op for (int64_t i = n - 1; i >= 0; i--) { @@ -628,6 +670,12 @@ void f8_e4m3_to_f16_vec(uint8_t* src, uint16_t* dst, int64_t n) { dst[i] = f8_e4m3_to_f16(src[i]); } } +void f8_e5m2_to_f16_vec(uint8_t* src, uint16_t* dst, int64_t n) { + // support inplace op + for (int64_t i = n - 1; i >= 0; i--) { + dst[i] = f8_e5m2_to_f16(src[i]); + } +} void convert_tensor(void* src, ggml_type src_type, @@ -867,6 +915,8 @@ ggml_type str_to_ggml_type(const std::string& dtype) { ttype = GGML_TYPE_F32; } else if (dtype == "F8_E4M3") { ttype = GGML_TYPE_F16; + } else if (dtype == "F8_E5M2") { + ttype = GGML_TYPE_F16; } return ttype; } @@ -980,6 +1030,10 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const tensor_storage.is_f8_e4m3 = true; // f8 -> f16 GGML_ASSERT(tensor_storage.nbytes() == tensor_data_size * 2); + } else if (dtype == "F8_E5M2") { + tensor_storage.is_f8_e5m2 = true; + // f8 -> f16 + GGML_ASSERT(tensor_storage.nbytes() == tensor_data_size * 2); } else { GGML_ASSERT(tensor_storage.nbytes() == tensor_data_size); } @@ -1646,6 +1700,9 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend } else if (tensor_storage.is_f8_e4m3) { // inplace op f8_e4m3_to_f16_vec((uint8_t*)dst_tensor->data, (uint16_t*)dst_tensor->data, tensor_storage.nelements()); + } else if (tensor_storage.is_f8_e5m2) { + // inplace op + f8_e5m2_to_f16_vec((uint8_t*)dst_tensor->data, (uint16_t*)dst_tensor->data, tensor_storage.nelements()); } } else { read_buffer.resize(tensor_storage.nbytes()); @@ -1657,6 +1714,9 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend } else if (tensor_storage.is_f8_e4m3) { // inplace op f8_e4m3_to_f16_vec((uint8_t*)read_buffer.data(), (uint16_t*)read_buffer.data(), tensor_storage.nelements()); + } else if (tensor_storage.is_f8_e5m2) { + // inplace op + f8_e5m2_to_f16_vec((uint8_t*)read_buffer.data(), (uint16_t*)read_buffer.data(), tensor_storage.nelements()); } convert_tensor((void*)read_buffer.data(), tensor_storage.type, dst_tensor->data, @@ -1672,6 +1732,9 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend } else if (tensor_storage.is_f8_e4m3) { // inplace op f8_e4m3_to_f16_vec((uint8_t*)read_buffer.data(), (uint16_t*)read_buffer.data(), tensor_storage.nelements()); + } else if (tensor_storage.is_f8_e5m2) { + // inplace op + f8_e5m2_to_f16_vec((uint8_t*)read_buffer.data(), (uint16_t*)read_buffer.data(), tensor_storage.nelements()); } if (tensor_storage.type == dst_tensor->type) { diff --git a/otherarch/sdcpp/model.h b/otherarch/sdcpp/model.h index f890db67f..a9a046c21 100644 --- a/otherarch/sdcpp/model.h +++ b/otherarch/sdcpp/model.h @@ -35,6 +35,7 @@ struct TensorStorage { ggml_type type = GGML_TYPE_F32; bool is_bf16 = false; bool is_f8_e4m3 = false; + bool is_f8_e5m2 = false; int64_t ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1}; int n_dims = 0; @@ -64,7 +65,7 @@ struct TensorStorage { } int64_t nbytes_to_read() const { - if (is_bf16 || is_f8_e4m3) { + if (is_bf16 || is_f8_e4m3 || is_f8_e5m2) { return nbytes() / 2; } else { return nbytes(); @@ -114,6 +115,8 @@ struct TensorStorage { type_name = "bf16"; } else if (is_f8_e4m3) { type_name = "f8_e4m3"; + } else if (is_f8_e5m2) { + type_name = "f8_e5m2"; } ss << name << " | " << type_name << " | "; ss << n_dims << " [";