1#pragma once
2
3#include <DataTypes/DataTypesNumber.h>
4#include <DataTypes/DataTypeString.h>
5#include <DataTypes/DataTypeFixedString.h>
6#include <Columns/ColumnString.h>
7#include <Common/Volnitsky.h>
8#include <Functions/IFunctionImpl.h>
9#include <Functions/FunctionHelpers.h>
10#include <IO/ReadBufferFromMemory.h>
11#include <IO/ReadHelpers.h>
12
13
14/** Functions for retrieving "visit parameters".
15 * Visit parameters in Yandex.Metrika are a special kind of JSONs.
16 * These functions are applicable to almost any JSONs.
17 * Implemented via templates from FunctionsStringSearch.h.
18 *
19 * Check if there is a parameter
20 * visitParamHas
21 *
22 * Retrieve the numeric value of the parameter
23 * visitParamExtractUInt
24 * visitParamExtractInt
25 * visitParamExtractFloat
26 * visitParamExtractBool
27 *
28 * Retrieve the string value of the parameter
29 * visitParamExtractString - unescape value
30 * visitParamExtractRaw
31 */
32
33namespace DB
34{
35
36namespace ErrorCodes
37{
38 extern const int ILLEGAL_COLUMN;
39}
40
41
42template <typename NumericType>
43struct ExtractNumericType
44{
45 using ResultType = NumericType;
46
47 static ResultType extract(const UInt8 * begin, const UInt8 * end)
48 {
49 ReadBufferFromMemory in(begin, end - begin);
50
51 /// Read numbers in double quotes
52 if (!in.eof() && *in.position() == '"')
53 ++in.position();
54
55 ResultType x = 0;
56 if (!in.eof())
57 {
58 if constexpr (std::is_floating_point_v<NumericType>)
59 tryReadFloatText(x, in);
60 else
61 tryReadIntText(x, in);
62 }
63 return x;
64 }
65};
66
67
68/** Searches for occurrences of a field in the visit parameter and calls ParamExtractor
69 * for each occurrence of the field, passing it a pointer to the part of the string,
70 * where the occurrence of the field value begins.
71 * ParamExtractor must parse and return the value of the desired type.
72 *
73 * If a field was not found or an incorrect value is associated with the field,
74 * then the default value used - 0.
75 */
76template <typename ParamExtractor>
77struct ExtractParamImpl
78{
79 using ResultType = typename ParamExtractor::ResultType;
80
81 /// It is assumed that `res` is the correct size and initialized with zeros.
82 static void vector_constant(const ColumnString::Chars & data, const ColumnString::Offsets & offsets,
83 std::string needle,
84 PaddedPODArray<ResultType> & res)
85 {
86 /// We are looking for a parameter simply as a substring of the form "name"
87 needle = "\"" + needle + "\":";
88
89 const UInt8 * begin = data.data();
90 const UInt8 * pos = begin;
91 const UInt8 * end = pos + data.size();
92
93 /// The current index in the string array.
94 size_t i = 0;
95
96 Volnitsky searcher(needle.data(), needle.size(), end - pos);
97
98 /// We will search for the next occurrence in all strings at once.
99 while (pos < end && end != (pos = searcher.search(pos, end - pos)))
100 {
101 /// Let's determine which index it belongs to.
102 while (begin + offsets[i] <= pos)
103 {
104 res[i] = 0;
105 ++i;
106 }
107
108 /// We check that the entry does not pass through the boundaries of strings.
109 if (pos + needle.size() < begin + offsets[i])
110 res[i] = ParamExtractor::extract(pos + needle.size(), begin + offsets[i] - 1); /// don't include terminating zero
111 else
112 res[i] = 0;
113
114 pos = begin + offsets[i];
115 ++i;
116 }
117
118 if (res.size() > i)
119 memset(&res[i], 0, (res.size() - i) * sizeof(res[0]));
120 }
121
122 static void constant_constant(const std::string & data, std::string needle, ResultType & res)
123 {
124 needle = "\"" + needle + "\":";
125 size_t pos = data.find(needle);
126 if (pos == std::string::npos)
127 res = 0;
128 else
129 res = ParamExtractor::extract(
130 reinterpret_cast<const UInt8 *>(data.data() + pos + needle.size()),
131 reinterpret_cast<const UInt8 *>(data.data() + data.size())
132 );
133 }
134
135 template <typename... Args> static void vector_vector(Args &&...)
136 {
137 throw Exception("Functions 'visitParamHas' and 'visitParamExtract*' doesn't support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN);
138 }
139
140 template <typename... Args> static void constant_vector(Args &&...)
141 {
142 throw Exception("Functions 'visitParamHas' and 'visitParamExtract*' doesn't support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN);
143 }
144};
145
146
147/** For the case where the type of field to extract is a string.
148 */
149template <typename ParamExtractor>
150struct ExtractParamToStringImpl
151{
152 static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets,
153 std::string needle,
154 ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
155 {
156 /// Constant 5 is taken from a function that performs a similar task FunctionsStringSearch.h::ExtractImpl
157 res_data.reserve(data.size() / 5);
158 res_offsets.resize(offsets.size());
159
160 /// We are looking for a parameter simply as a substring of the form "name"
161 needle = "\"" + needle + "\":";
162
163 const UInt8 * begin = data.data();
164 const UInt8 * pos = begin;
165 const UInt8 * end = pos + data.size();
166
167 /// The current index in the string array.
168 size_t i = 0;
169
170 Volnitsky searcher(needle.data(), needle.size(), end - pos);
171
172 /// We will search for the next occurrence in all strings at once.
173 while (pos < end && end != (pos = searcher.search(pos, end - pos)))
174 {
175 /// Determine which index it belongs to.
176 while (begin + offsets[i] <= pos)
177 {
178 res_data.push_back(0);
179 res_offsets[i] = res_data.size();
180 ++i;
181 }
182
183 /// We check that the entry does not pass through the boundaries of strings.
184 if (pos + needle.size() < begin + offsets[i])
185 ParamExtractor::extract(pos + needle.size(), begin + offsets[i], res_data);
186
187 pos = begin + offsets[i];
188
189 res_data.push_back(0);
190 res_offsets[i] = res_data.size();
191 ++i;
192 }
193
194 while (i < res_offsets.size())
195 {
196 res_data.push_back(0);
197 res_offsets[i] = res_data.size();
198 ++i;
199 }
200 }
201};
202
203}
204