1/*
2This is free and unencumbered software released into the public domain.
3
4Anyone is free to copy, modify, publish, use, compile, sell, or
5distribute this software, either in source code form or as a compiled
6binary, for any purpose, commercial or non-commercial, and by any
7means.
8
9In jurisdictions that recognize copyright laws, the author or authors
10of this software dedicate any and all copyright interest in the
11software to the public domain. We make this dedication for the benefit
12of the public at large and to the detriment of our heirs and
13successors. We intend this dedication to be an overt act of
14relinquishment in perpetuity of all present and future rights to this
15software under copyright law.
16
17THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23OTHER DEALINGS IN THE SOFTWARE.
24
25For more information, please refer to <http://unlicense.org>
26*/
27
28#ifndef PUBLIC_DOMAIN_BASE64_HPP_
29#define PUBLIC_DOMAIN_BASE64_HPP_
30
31#include <cstdint>
32#include <iterator>
33#include <stdexcept>
34#include <string>
35
36class base64_error : public std::runtime_error
37{
38public:
39 using std::runtime_error::runtime_error;
40};
41
42class base64
43{
44public:
45 enum class alphabet
46 {
47 /** the alphabet is detected automatically */
48 auto_,
49 /** the standard base64 alphabet is used */
50 standard,
51 /** like `standard` except that the characters `+` and `/` are replaced by `-` and `_` respectively*/
52 url_filename_safe
53 };
54
55 enum class decoding_behavior
56 {
57 /** if the input is not padded, the remaining bits are ignored */
58 moderate,
59 /** if a padding character is encounter decoding is finished */
60 loose
61 };
62
63 /**
64 Encodes all the elements from `in_begin` to `in_end` to `out`.
65
66 @warning The source and destination cannot overlap. The destination must be able to hold at least
67 `required_encode_size(std::distance(in_begin, in_end))`, otherwise the behavior depends on the output iterator.
68
69 @tparam Input_iterator the source; the returned elements are cast to `std::uint8_t` and should not be greater than
70 8 bits
71 @tparam Output_iterator the destination; the elements written to it are from the type `char`
72 @param in_begin the beginning of the source
73 @param in_end the ending of the source
74 @param out the destination iterator
75 @param alphabet which alphabet should be used
76 @returns the iterator to the next element past the last element copied
77 @throws see `Input_iterator` and `Output_iterator`
78 */
79 template<typename Input_iterator, typename Output_iterator>
80 static Output_iterator encode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out,
81 alphabet alphabet = alphabet::standard)
82 {
83 constexpr auto pad = '=';
84 const char* alpha = alphabet == alphabet::url_filename_safe
85 ? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
86 : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
87
88 while (in_begin != in_end) {
89 std::uint8_t i0 = 0, i1 = 0, i2 = 0;
90
91 // first character
92 i0 = static_cast<std::uint8_t>(*in_begin);
93 ++in_begin;
94
95 *out = alpha[i0 >> 2 & 0x3f];
96 ++out;
97
98 // part of first character and second
99 if (in_begin != in_end) {
100 i1 = static_cast<std::uint8_t>(*in_begin);
101 ++in_begin;
102
103 *out = alpha[((i0 & 0x3) << 4) | (i1 >> 4 & 0x0f)];
104 ++out;
105 } else {
106 *out = alpha[(i0 & 0x3) << 4];
107 ++out;
108
109 // last padding
110 *out = pad;
111 ++out;
112
113 // last padding
114 *out = pad;
115 ++out;
116
117 break;
118 }
119
120 // part of second character and third
121 if (in_begin != in_end) {
122 i2 = static_cast<std::uint8_t>(*in_begin);
123 ++in_begin;
124
125 *out = alpha[((i1 & 0xf) << 2) | (i2 >> 6 & 0x03)];
126 ++out;
127 } else {
128 *out = alpha[(i1 & 0xf) << 2];
129 ++out;
130
131 // last padding
132 *out = pad;
133 ++out;
134
135 break;
136 }
137
138 // rest of third
139 *out = alpha[i2 & 0x3f];
140 ++out;
141 }
142
143 return out;
144 }
145 /**
146 Encodes a string.
147
148 @param str the string that should be encoded
149 @param alphabet which alphabet should be used
150 @returns the encoded base64 string
151 @throws see base64::encode()
152 */
153 static std::string encode(const std::string& str, alphabet alphabet = alphabet::standard)
154 {
155 std::string result;
156
157 result.reserve(res_arg: required_encode_size(size: str.length()) + 1);
158
159 encode(in_begin: str.begin(), in_end: str.end(), out: std::back_inserter(x&: result), alphabet);
160
161 return result;
162 }
163 /**
164 Encodes a char array.
165
166 @param buffer the char array
167 @param size the size of the array
168 @param alphabet which alphabet should be used
169 @returns the encoded string
170 */
171 static std::string encode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::standard)
172 {
173 std::string result;
174
175 result.reserve(res_arg: required_encode_size(size) + 1);
176
177 encode(in_begin: buffer, in_end: buffer + size, out: std::back_inserter(x&: result), alphabet);
178
179 return result;
180 }
181 /**
182 Decodes all the elements from `in_begin` to `in_end` to `out`. `in_begin` may point to the same location as `out`,
183 in other words: inplace decoding is possible.
184
185 @warning The destination must be able to hold at least `required_decode_size(std::distance(in_begin, in_end))`,
186 otherwise the behavior depends on the output iterator.
187
188 @tparam Input_iterator the source; the returned elements are cast to `char`
189 @tparam Output_iterator the destination; the elements written to it are from the type `std::uint8_t`
190 @param in_begin the beginning of the source
191 @param in_end the ending of the source
192 @param out the destination iterator
193 @param alphabet which alphabet should be used
194 @param behavior the behavior when an error was detected
195 @returns the iterator to the next element past the last element copied
196 @throws base64_error depending on the set behavior
197 @throws see `Input_iterator` and `Output_iterator`
198 */
199 template<typename Input_iterator, typename Output_iterator>
200 static Output_iterator decode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out,
201 alphabet alphabet = alphabet::auto_,
202 decoding_behavior behavior = decoding_behavior::moderate)
203 {
204 //constexpr auto pad = '=';
205 std::uint8_t last = 0;
206 auto bits = 0;
207
208 while (in_begin != in_end) {
209 auto c = *in_begin;
210 ++in_begin;
211
212 if (c == '=') {
213 break;
214 }
215
216 auto part = _base64_value(alphabet, c);
217
218 // enough bits for one byte
219 if (bits + 6 >= 8) {
220 *out = (last << (8 - bits)) | (part >> (bits - 2));
221 ++out;
222
223 bits -= 2;
224 } else {
225 bits += 6;
226 }
227
228 last = part;
229 }
230
231 // check padding
232 if (behavior != decoding_behavior::loose) {
233 while (in_begin != in_end) {
234 auto c = *in_begin;
235 ++in_begin;
236
237 if (c != '=') {
238 throw base64_error("invalid base64 character.");
239 }
240 }
241 }
242
243 return out;
244 }
245 /**
246 Decodes a string.
247
248 @param str the base64 encoded string
249 @param alphabet which alphabet should be used
250 @param behavior the behavior when an error was detected
251 @returns the decoded string
252 @throws see base64::decode()
253 */
254 static std::string decode(const std::string& str, alphabet alphabet = alphabet::auto_,
255 decoding_behavior behavior = decoding_behavior::moderate)
256 {
257 std::string result;
258
259 result.reserve(res_arg: max_decode_size(size: str.length()));
260
261 decode(in_begin: str.begin(), in_end: str.end(), out: std::back_inserter(x&: result), alphabet, behavior);
262
263 return result;
264 }
265 /**
266 Decodes a string.
267
268 @param buffer the base64 encoded buffer
269 @param size the size of the buffer
270 @param alphabet which alphabet should be used
271 @param behavior the behavior when an error was detected
272 @returns the decoded string
273 @throws see base64::decode()
274 */
275 static std::string decode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::auto_,
276 decoding_behavior behavior = decoding_behavior::moderate)
277 {
278 std::string result;
279
280 result.reserve(res_arg: max_decode_size(size));
281
282 decode(in_begin: buffer, in_end: buffer + size, out: std::back_inserter(x&: result), alphabet, behavior);
283
284 return result;
285 }
286 /**
287 Decodes a string inplace.
288
289 @param[in,out] str the base64 encoded string
290 @param alphabet which alphabet should be used
291 @param behavior the behavior when an error was detected
292 @throws base64::decode_inplace()
293 */
294 static void decode_inplace(std::string& str, alphabet alphabet = alphabet::auto_,
295 decoding_behavior behavior = decoding_behavior::moderate)
296 {
297 str.resize(n: decode(in_begin: str.begin(), in_end: str.end(), out: str.begin(), alphabet, behavior) - str.begin());
298 }
299 /**
300 Decodes a char array inplace.
301
302 @param[in,out] str the string array
303 @param size the length of the array
304 @param alphabet which alphabet should be used
305 @param behavior the behavior when an error was detected
306 @returns the pointer to the next element past the last element decoded
307 @throws base64::decode_inplace()
308 */
309 static char* decode_inplace(char* str, std::size_t size, alphabet alphabet = alphabet::auto_,
310 decoding_behavior behavior = decoding_behavior::moderate)
311 {
312 return decode(in_begin: str, in_end: str + size, out: str, alphabet, behavior);
313 }
314 /**
315 Returns the required decoding size for a given size. The value is calculated with the following formula:
316
317 $$
318 \lceil \frac{size}{4} \rceil \cdot 3
319 $$
320
321 @param size the size of the encoded input
322 @returns the size of the resulting decoded buffer; this the absolute maximum
323 */
324 static std::size_t max_decode_size(std::size_t size) noexcept
325 {
326 return (size / 4 + (size % 4 ? 1 : 0)) * 3;
327 }
328 /**
329 Returns the required encoding size for a given size. The value is calculated with the following formula:
330
331 $$
332 \lceil \frac{size}{3} \rceil \cdot 4
333 $$
334
335 @param size the size of the decoded input
336 @returns the size of the resulting encoded buffer
337 */
338 static std::size_t required_encode_size(std::size_t size) noexcept
339 {
340 return (size / 3 + (size % 3 ? 1 : 0)) * 4;
341 }
342
343private:
344 static std::uint8_t _base64_value(alphabet& alphabet, char c)
345 {
346 if (c >= 'A' && c <= 'Z') {
347 return c - 'A';
348 } else if (c >= 'a' && c <= 'z') {
349 return c - 'a' + 26;
350 } else if (c >= '0' && c <= '9') {
351 return c - '0' + 52;
352 }
353
354 // comes down to alphabet
355 if (alphabet == alphabet::standard) {
356 if (c == '+') {
357 return 62;
358 } else if (c == '/') {
359 return 63;
360 }
361 } else if (alphabet == alphabet::url_filename_safe) {
362 if (c == '-') {
363 return 62;
364 } else if (c == '_') {
365 return 63;
366 }
367 } // auto detect
368 else {
369 if (c == '+') {
370 alphabet = alphabet::standard;
371
372 return 62;
373 } else if (c == '/') {
374 alphabet = alphabet::standard;
375
376 return 63;
377 } else if (c == '-') {
378 alphabet = alphabet::url_filename_safe;
379
380 return 62;
381 } else if (c == '_') {
382 alphabet = alphabet::url_filename_safe;
383
384 return 63;
385 }
386 }
387
388 throw base64_error("invalid base64 character.");
389 }
390};
391
392#endif // !PUBLIC_DOMAIN_BASE64_HPP_
393