From 9735d726ad71dfad91ce987875ea0dc989d1cd04 Mon Sep 17 00:00:00 2001 From: Aaditya Srinivasan Date: Wed, 15 Apr 2026 01:54:05 +0530 Subject: [PATCH 1/2] Optimize base64_decode validation using lookup table --- cpp/src/arrow/vendored/base64.cpp | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/vendored/base64.cpp b/cpp/src/arrow/vendored/base64.cpp index db2f74ed98fc..d5f4638e3c1c 100644 --- a/cpp/src/arrow/vendored/base64.cpp +++ b/cpp/src/arrow/vendored/base64.cpp @@ -30,6 +30,8 @@ */ #include "arrow/util/base64.h" +#include +#include #include namespace arrow { @@ -40,6 +42,17 @@ static const std::string base64_chars = "abcdefghijklmnopqrstuvwxyz" "0123456789+/"; +static const std::array kBase64Lookup = [] { + std::array table{}; + table.fill(-1); + + for (size_t i = 0; i < base64_chars.size(); ++i) { + table[static_cast(base64_chars[i])] = static_cast(i); + } + + return table; +}(); + static std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len) { std::string ret; int i = 0; @@ -119,22 +132,18 @@ Result base64_decode(std::string_view encoded_string) { return Status::Invalid("Invalid base64 input: padding in wrong position"); } - if (base64_chars.find(c) == std::string::npos) { + int8_t val = kBase64Lookup[static_cast(c)]; + + if (val == -1) { return Status::Invalid("Invalid base64 input: character is not valid base64 character"); } - char_array_4[i++] = c; + char_array_4[i++] = val; } in_++; if (i == 4) { - for (i = 0; i < 4; i++) { - if (char_array_4[i] != 0) { - char_array_4[i] = base64_chars.find(char_array_4[i]) & 0xff; - } - } - char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; From a4e7a084c77f170d19b7434ff4b2df702e30ca30 Mon Sep 17 00:00:00 2001 From: Aaditya Srinivasan Date: Sat, 18 Apr 2026 11:27:29 +0530 Subject: [PATCH 2/2] Use constexpr std::string_view for base64_chars --- cpp/src/arrow/vendored/base64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/vendored/base64.cpp b/cpp/src/arrow/vendored/base64.cpp index d5f4638e3c1c..f7b105e53303 100644 --- a/cpp/src/arrow/vendored/base64.cpp +++ b/cpp/src/arrow/vendored/base64.cpp @@ -37,7 +37,7 @@ namespace arrow { namespace util { -static const std::string base64_chars = +constexpr std::string_view base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789+/";