base64.hpp source code [llama.cpp/common/base64.hpp]

1	/*
2	This is free and unencumbered software released into the public domain.
3
4	Anyone is free to copy, modify, publish, use, compile, sell, or
5	distribute this software, either in source code form or as a compiled
6	binary, for any purpose, commercial or non-commercial, and by any
7	means.
8
9	In jurisdictions that recognize copyright laws, the author or authors
10	of this software dedicate any and all copyright interest in the
11	software to the public domain. We make this dedication for the benefit
12	of the public at large and to the detriment of our heirs and
13	successors. We intend this dedication to be an overt act of
14	relinquishment in perpetuity of all present and future rights to this
15	software under copyright law.
16
17	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18	EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20	IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21	OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22	ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23	OTHER DEALINGS IN THE SOFTWARE.
24
25	For more information, please refer to <http://unlicense.org>
26	*/
27
28	#ifndef PUBLIC_DOMAIN_BASE64_HPP_
29	#define PUBLIC_DOMAIN_BASE64_HPP_
30
31	#include <cstdint>
32	#include <iterator>
33	#include <stdexcept>
34	#include <string>
35
36	class base64_error : public std::runtime_error
37	{
38	public:
39	using std::runtime_error::runtime_error;
40	};
41
42	class base64
43	{
44	public:
45	enum class alphabet
46	{
47	/* the alphabet is detected automatically /
48	auto_,
49	/* the standard base64 alphabet is used /
50	standard,
51	/* like `standard` except that the characters `+` and `/` are replaced by `-` and `_` respectively/
52	url_filename_safe
53	};
54
55	enum class decoding_behavior
56	{
57	/* if the input is not padded, the remaining bits are ignored /
58	moderate,
59	/* if a padding character is encounter decoding is finished /
60	loose
61	};
62
63	/**
64	Encodes all the elements from `in_begin` to `in_end` to `out`.
65
66	@warning The source and destination cannot overlap. The destination must be able to hold at least
67	`required_encode_size(std::distance(in_begin, in_end))`, otherwise the behavior depends on the output iterator.
68
69	@tparam Input_iterator the source; the returned elements are cast to `std::uint8_t` and should not be greater than
70	8 bits
71	@tparam Output_iterator the destination; the elements written to it are from the type `char`
72	@param in_begin the beginning of the source
73	@param in_end the ending of the source
74	@param out the destination iterator
75	@param alphabet which alphabet should be used
76	@returns the iterator to the next element past the last element copied
77	@throws see `Input_iterator` and `Output_iterator`
78	*/
79	template<typename Input_iterator, typename Output_iterator>
80	static Output_iterator encode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out,
81	alphabet alphabet = alphabet::standard)
82	{
83	constexpr auto pad = `'='`;
84	const char* alpha = alphabet == alphabet::url_filename_safe
85	? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
86	: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
87
88	while (in_begin != in_end) {
89	std::uint8_t i0 = `0`, i1 = `0`, i2 = `0`;
90
91	// first character
92	i0 = static_cast<std::uint8_t>(*in_begin);
93	++in_begin;
94
95	*out = alpha[i0 >> `2` & `0x3f`];
96	++out;
97
98	// part of first character and second
99	if (in_begin != in_end) {
100	i1 = static_cast<std::uint8_t>(*in_begin);
101	++in_begin;
102
103	*out = alpha[((i0 & `0x3`) << `4`) \| (i1 >> `4` & `0x0f`)];
104	++out;
105	} else {
106	*out = alpha[(i0 & `0x3`) << `4`];
107	++out;
108
109	// last padding
110	*out = pad;
111	++out;
112
113	// last padding
114	*out = pad;
115	++out;
116
117	break;
118	}
119
120	// part of second character and third
121	if (in_begin != in_end) {
122	i2 = static_cast<std::uint8_t>(*in_begin);
123	++in_begin;
124
125	*out = alpha[((i1 & `0xf`) << `2`) \| (i2 >> `6` & `0x03`)];
126	++out;
127	} else {
128	*out = alpha[(i1 & `0xf`) << `2`];
129	++out;
130
131	// last padding
132	*out = pad;
133	++out;
134
135	break;
136	}
137
138	// rest of third
139	*out = alpha[i2 & `0x3f`];
140	++out;
141	}
142
143	return out;
144	}
145	/**
146	Encodes a string.
147
148	@param str the string that should be encoded
149	@param alphabet which alphabet should be used
150	@returns the encoded base64 string
151	@throws see base64::encode()
152	*/
153	static std::string encode(const std::string& str, alphabet alphabet = alphabet::standard)
154	{
155	std::string result;
156
157	result.reserve(res_arg: required_encode_size(size: str.length()) + `1`);
158
159	encode(in_begin: str.begin(), in_end: str.end(), out: std::back_inserter(x&: result), alphabet);
160
161	return result;
162	}
163	/**
164	Encodes a char array.
165
166	@param buffer the char array
167	@param size the size of the array
168	@param alphabet which alphabet should be used
169	@returns the encoded string
170	*/
171	static std::string encode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::standard)
172	{
173	std::string result;
174
175	result.reserve(res_arg: required_encode_size(size) + `1`);
176
177	encode(in_begin: buffer, in_end: buffer + size, out: std::back_inserter(x&: result), alphabet);
178
179	return result;
180	}
181	/**
182	Decodes all the elements from `in_begin` to `in_end` to `out`. `in_begin` may point to the same location as `out`,
183	in other words: inplace decoding is possible.
184
185	@warning The destination must be able to hold at least `required_decode_size(std::distance(in_begin, in_end))`,
186	otherwise the behavior depends on the output iterator.
187
188	@tparam Input_iterator the source; the returned elements are cast to `char`
189	@tparam Output_iterator the destination; the elements written to it are from the type `std::uint8_t`
190	@param in_begin the beginning of the source
191	@param in_end the ending of the source
192	@param out the destination iterator
193	@param alphabet which alphabet should be used
194	@param behavior the behavior when an error was detected
195	@returns the iterator to the next element past the last element copied
196	@throws base64_error depending on the set behavior
197	@throws see `Input_iterator` and `Output_iterator`
198	*/
199	template<typename Input_iterator, typename Output_iterator>
200	static Output_iterator decode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out,
201	alphabet alphabet = alphabet::auto_,
202	decoding_behavior behavior = decoding_behavior::moderate)
203	{
204	//constexpr auto pad = '=';
205	std::uint8_t last = `0`;
206	auto bits = `0`;
207
208	while (in_begin != in_end) {
209	auto c = *in_begin;
210	++in_begin;
211
212	if (c == `'='`) {
213	break;
214	}
215
216	auto part = _base64_value(alphabet, c);
217
218	// enough bits for one byte
219	if (bits + `6` >= `8`) {
220	*out = (last << (`8` - bits)) \| (part >> (bits - `2`));
221	++out;
222
223	bits -= `2`;
224	} else {
225	bits += `6`;
226	}
227
228	last = part;
229	}
230
231	// check padding
232	if (behavior != decoding_behavior::loose) {
233	while (in_begin != in_end) {
234	auto c = *in_begin;
235	++in_begin;
236
237	if (c != `'='`) {
238	throw base64_error ("invalid base64 character.");
239	}
240	}
241	}
242
243	return out;
244	}
245	/**
246	Decodes a string.
247
248	@param str the base64 encoded string
249	@param alphabet which alphabet should be used
250	@param behavior the behavior when an error was detected
251	@returns the decoded string
252	@throws see base64::decode()
253	*/
254	static std::string decode(const std::string& str, alphabet alphabet = alphabet::auto_,
255	decoding_behavior behavior = decoding_behavior::moderate)
256	{
257	std::string result;
258
259	result.reserve(res_arg: max_decode_size(size: str.length()));
260
261	decode(in_begin: str.begin(), in_end: str.end(), out: std::back_inserter(x&: result), alphabet, behavior);
262
263	return result;
264	}
265	/**
266	Decodes a string.
267
268	@param buffer the base64 encoded buffer
269	@param size the size of the buffer
270	@param alphabet which alphabet should be used
271	@param behavior the behavior when an error was detected
272	@returns the decoded string
273	@throws see base64::decode()
274	*/
275	static std::string decode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::auto_,
276	decoding_behavior behavior = decoding_behavior::moderate)
277	{
278	std::string result;
279
280	result.reserve(res_arg: max_decode_size(size));
281
282	decode(in_begin: buffer, in_end: buffer + size, out: std::back_inserter(x&: result), alphabet, behavior);
283
284	return result;
285	}
286	/**
287	Decodes a string inplace.
288
289	@param[in,out] str the base64 encoded string
290	@param alphabet which alphabet should be used
291	@param behavior the behavior when an error was detected
292	@throws base64::decode_inplace()
293	*/
294	static void decode_inplace(std::string& str, alphabet alphabet = alphabet::auto_,
295	decoding_behavior behavior = decoding_behavior::moderate)
296	{
297	str.resize(n: decode(in_begin: str.begin(), in_end: str.end(), out: str.begin(), alphabet, behavior) - str.begin());
298	}
299	/**
300	Decodes a char array inplace.
301
302	@param[in,out] str the string array
303	@param size the length of the array
304	@param alphabet which alphabet should be used
305	@param behavior the behavior when an error was detected
306	@returns the pointer to the next element past the last element decoded
307	@throws base64::decode_inplace()
308	*/
309	static char* decode_inplace(char* str, std::size_t size, alphabet alphabet = alphabet::auto_,
310	decoding_behavior behavior = decoding_behavior::moderate)
311	{
312	return decode(in_begin: str, in_end: str + size, out: str, alphabet, behavior);
313	}
314	/**
315	Returns the required decoding size for a given size. The value is calculated with the following formula:
316
317	$$
318	\lceil \frac{size}{4} \rceil \cdot 3
319	$$
320
321	@param size the size of the encoded input
322	@returns the size of the resulting decoded buffer; this the absolute maximum
323	*/
324	static std::size_t max_decode_size(std::size_t size) noexcept
325	{
326	return (size / `4` + (size % `4` ? `1` : `0`)) * `3`;
327	}
328	/**
329	Returns the required encoding size for a given size. The value is calculated with the following formula:
330
331	$$
332	\lceil \frac{size}{3} \rceil \cdot 4
333	$$
334
335	@param size the size of the decoded input
336	@returns the size of the resulting encoded buffer
337	*/
338	static std::size_t required_encode_size(std::size_t size) noexcept
339	{
340	return (size / `3` + (size % `3` ? `1` : `0`)) * `4`;
341	}
342
343	private:
344	static std::uint8_t _base64_value(alphabet& alphabet, char c)
345	{
346	if (c >= `'A'` && c <= `'Z'`) {
347	return c - `'A'`;
348	} else if (c >= `'a'` && c <= `'z'`) {
349	return c - `'a'` + `26`;
350	} else if (c >= `'0'` && c <= `'9'`) {
351	return c - `'0'` + `52`;
352	}
353
354	// comes down to alphabet
355	if (alphabet == alphabet::standard) {
356	if (c == `'+'`) {
357	return `62`;
358	} else if (c == `'/'`) {
359	return `63`;
360	}
361	} else if (alphabet == alphabet::url_filename_safe) {
362	if (c == `'-'`) {
363	return `62`;
364	} else if (c == `'_'`) {
365	return `63`;
366	}
367	} // auto detect
368	else {
369	if (c == `'+'`) {
370	alphabet = alphabet::standard;
371
372	return `62`;
373	} else if (c == `'/'`) {
374	alphabet = alphabet::standard;
375
376	return `63`;
377	} else if (c == `'-'`) {
378	alphabet = alphabet::url_filename_safe;
379
380	return `62`;
381	} else if (c == `'_'`) {
382	alphabet = alphabet::url_filename_safe;
383
384	return `63`;
385	}
386	}
387
388	throw base64_error ("invalid base64 character.");
389	}
390	};
391
392	#endif // !PUBLIC_DOMAIN_BASE64_HPP_
393

Browse the source code of llama.cpp/common/base64.hpp