1#include <Common/hex.h>
2#include <Functions/FunctionFactory.h>
3#include <Functions/FunctionStringToString.h>
4#include <common/find_symbols.h>
5
6
7namespace DB
8{
9
10/// We assume that size of the dst buf isn't less than src_size.
11static size_t decodeURL(const char * src, size_t src_size, char * dst)
12{
13 const char * src_prev_pos = src;
14 const char * src_curr_pos = src;
15 const char * src_end = src + src_size;
16 char * dst_pos = dst;
17
18 while (true)
19 {
20 src_curr_pos = find_first_symbols<'%'>(src_curr_pos, src_end);
21
22 if (src_curr_pos == src_end)
23 {
24 break;
25 }
26 else if (src_end - src_curr_pos < 3)
27 {
28 src_curr_pos = src_end;
29 break;
30 }
31 else
32 {
33 unsigned char high = unhex(src_curr_pos[1]);
34 unsigned char low = unhex(src_curr_pos[2]);
35
36 if (high != 0xFF && low != 0xFF)
37 {
38 unsigned char octet = (high << 4) + low;
39
40 size_t bytes_to_copy = src_curr_pos - src_prev_pos;
41 memcpySmallAllowReadWriteOverflow15(dst_pos, src_prev_pos, bytes_to_copy);
42 dst_pos += bytes_to_copy;
43
44 *dst_pos = octet;
45 ++dst_pos;
46
47 src_prev_pos = src_curr_pos + 3;
48 }
49
50 src_curr_pos += 3;
51 }
52 }
53
54 if (src_prev_pos < src_curr_pos)
55 {
56 size_t bytes_to_copy = src_curr_pos - src_prev_pos;
57 memcpySmallAllowReadWriteOverflow15(dst_pos, src_prev_pos, bytes_to_copy);
58 dst_pos += bytes_to_copy;
59 }
60
61 return dst_pos - dst;
62}
63
64
65/// Percent decode of URL data.
66struct DecodeURLComponentImpl
67{
68 static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets,
69 ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
70 {
71 res_data.resize(data.size());
72 size_t size = offsets.size();
73 res_offsets.resize(size);
74
75 size_t prev_offset = 0;
76 size_t res_offset = 0;
77
78 for (size_t i = 0; i < size; ++i)
79 {
80 const char * src_data = reinterpret_cast<const char *>(&data[prev_offset]);
81 size_t src_size = offsets[i] - prev_offset;
82 size_t dst_size = decodeURL(src_data, src_size, reinterpret_cast<char *>(res_data.data() + res_offset));
83
84 res_offset += dst_size;
85 res_offsets[i] = res_offset;
86 prev_offset = offsets[i];
87 }
88
89 res_data.resize(res_offset);
90 }
91
92 [[noreturn]] static void vector_fixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
93 {
94 throw Exception("Column of type FixedString is not supported by URL functions", ErrorCodes::ILLEGAL_COLUMN);
95 }
96};
97
98
99struct NameDecodeURLComponent { static constexpr auto name = "decodeURLComponent"; };
100using FunctionDecodeURLComponent = FunctionStringToString<DecodeURLComponentImpl, NameDecodeURLComponent>;
101
102void registerFunctionDecodeURLComponent(FunctionFactory & factory)
103{
104 factory.registerFunction<FunctionDecodeURLComponent>();
105}
106
107}
108