From f9bc7245abc01445b28f0c0b686b46ba237d8f69 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sun, 11 Feb 2024 20:35:34 +0800 Subject: [PATCH] b64 decoder --- otherarch/utils.cpp | 68 +++++++++++++++++++++++++++++++++++++++++++++ otherarch/utils.h | 2 ++ 2 files changed, 70 insertions(+) diff --git a/otherarch/utils.cpp b/otherarch/utils.cpp index 62df47a8b..8685dcce3 100644 --- a/otherarch/utils.cpp +++ b/otherarch/utils.cpp @@ -233,3 +233,71 @@ void kcpp_graph_compute_helper(struct ggml_v3_cgraph *graph, int n_threads) } ggml_v3_graph_compute(graph, &plan); } + +static const std::string kcpp_base64_chars = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; +static inline bool kcpp_is_base64(uint8_t c) +{ + return (isalnum(c) || (c == '+') || (c == '/')); +} +std::vector kcpp_base64_decode(const std::string & encoded_string) +{ + int i = 0; + int j = 0; + int in_ = 0; + + int in_len = encoded_string.size(); + + uint8_t char_array_4[4]; + uint8_t char_array_3[3]; + + std::vector ret; + + while (in_len-- && (encoded_string[in_] != '=') && kcpp_is_base64(encoded_string[in_])) + { + char_array_4[i++] = encoded_string[in_]; in_++; + if (i == 4) + { + for (i = 0; i <4; i++) + { + char_array_4[i] = kcpp_base64_chars.find(char_array_4[i]); + } + + char_array_3[0] = ((char_array_4[0] ) << 2) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + + for (i = 0; (i < 3); i++) + { + ret.push_back(char_array_3[i]); + } + i = 0; + } + } + + if (i) + { + for (j = i; j <4; j++) + { + char_array_4[j] = 0; + } + + for (j = 0; j <4; j++) + { + char_array_4[j] = kcpp_base64_chars.find(char_array_4[j]); + } + + char_array_3[0] = ((char_array_4[0] ) << 2) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + + for (j = 0; (j < i - 1); j++) + { + ret.push_back(char_array_3[j]); + } + } + + return ret; +} diff --git a/otherarch/utils.h b/otherarch/utils.h index efe0dc108..f3383bdb4 100644 --- a/otherarch/utils.h +++ b/otherarch/utils.h @@ -56,3 +56,5 @@ std::vector gpt_tokenize(const gpt_vocab & vocab, const std::stri bool should_transpose_layer(std::string name); void kcpp_graph_compute_helper(ggml_v3_cgraph * graph, int n_threads); + +std::vector kcpp_base64_decode(const std::string & encoded_string);