1 | #include "duckdb/function/cast/cast_function_set.hpp" |
2 | #include "duckdb/function/cast/default_casts.hpp" |
3 | #include "duckdb/function/cast/bound_cast_data.hpp" |
4 | |
5 | #include <algorithm> // for std::sort |
6 | |
7 | namespace duckdb { |
8 | |
9 | //-------------------------------------------------------------------------------------------------- |
10 | // ??? -> UNION |
11 | //-------------------------------------------------------------------------------------------------- |
12 | // if the source can be implicitly cast to a member of the target union, the cast is valid |
13 | |
14 | struct ToUnionBoundCastData : public BoundCastData { |
15 | ToUnionBoundCastData(union_tag_t member_idx, string name, LogicalType type, int64_t cost, |
16 | BoundCastInfo member_cast_info) |
17 | : tag(member_idx), name(std::move(name)), type(std::move(type)), cost(cost), |
18 | member_cast_info(std::move(member_cast_info)) { |
19 | } |
20 | |
21 | union_tag_t tag; |
22 | string name; |
23 | LogicalType type; |
24 | int64_t cost; |
25 | BoundCastInfo member_cast_info; |
26 | |
27 | public: |
28 | unique_ptr<BoundCastData> Copy() const override { |
29 | return make_uniq<ToUnionBoundCastData>(args: tag, args: name, args: type, args: cost, args: member_cast_info.Copy()); |
30 | } |
31 | |
32 | static bool SortByCostAscending(const ToUnionBoundCastData &left, const ToUnionBoundCastData &right) { |
33 | return left.cost < right.cost; |
34 | } |
35 | }; |
36 | |
37 | unique_ptr<BoundCastData> BindToUnionCast(BindCastInput &input, const LogicalType &source, const LogicalType &target) { |
38 | D_ASSERT(target.id() == LogicalTypeId::UNION); |
39 | |
40 | vector<ToUnionBoundCastData> candidates; |
41 | |
42 | for (idx_t member_idx = 0; member_idx < UnionType::GetMemberCount(type: target); member_idx++) { |
43 | auto member_type = UnionType::GetMemberType(type: target, index: member_idx); |
44 | auto member_name = UnionType::GetMemberName(type: target, index: member_idx); |
45 | auto member_cast_cost = input.function_set.ImplicitCastCost(source, target: member_type); |
46 | if (member_cast_cost != -1) { |
47 | auto member_cast_info = input.GetCastFunction(source, target: member_type); |
48 | candidates.emplace_back(args&: member_idx, args&: member_name, args&: member_type, args&: member_cast_cost, |
49 | args: std::move(member_cast_info)); |
50 | } |
51 | }; |
52 | |
53 | // no possible casts found! |
54 | if (candidates.empty()) { |
55 | auto message = StringUtil::Format( |
56 | fmt_str: "Type %s can't be cast as %s. %s can't be implicitly cast to any of the union member types: " , |
57 | params: source.ToString(), params: target.ToString(), params: source.ToString()); |
58 | |
59 | auto member_count = UnionType::GetMemberCount(type: target); |
60 | for (idx_t member_idx = 0; member_idx < member_count; member_idx++) { |
61 | auto member_type = UnionType::GetMemberType(type: target, index: member_idx); |
62 | message += member_type.ToString(); |
63 | if (member_idx < member_count - 1) { |
64 | message += ", " ; |
65 | } |
66 | } |
67 | throw CastException(message); |
68 | } |
69 | |
70 | // sort the candidate casts by cost |
71 | std::sort(first: candidates.begin(), last: candidates.end(), comp: ToUnionBoundCastData::SortByCostAscending); |
72 | |
73 | // select the lowest possible cost cast |
74 | auto &selected_cast = candidates[0]; |
75 | auto selected_cost = candidates[0].cost; |
76 | |
77 | // check if the cast is ambiguous (2 or more casts have the same cost) |
78 | if (candidates.size() > 1 && candidates[1].cost == selected_cost) { |
79 | |
80 | // collect all the ambiguous types |
81 | auto message = StringUtil::Format( |
82 | fmt_str: "Type %s can't be cast as %s. The cast is ambiguous, multiple possible members in target: " , params: source, |
83 | params: target); |
84 | for (size_t i = 0; i < candidates.size(); i++) { |
85 | if (candidates[i].cost == selected_cost) { |
86 | message += StringUtil::Format(fmt_str: "'%s (%s)'" , params: candidates[i].name, params: candidates[i].type.ToString()); |
87 | if (i < candidates.size() - 1) { |
88 | message += ", " ; |
89 | } |
90 | } |
91 | } |
92 | message += ". Disambiguate the target type by using the 'union_value(<tag> := <arg>)' function to promote the " |
93 | "source value to a single member union before casting." ; |
94 | throw CastException(message); |
95 | } |
96 | |
97 | // otherwise, return the selected cast |
98 | return make_uniq<ToUnionBoundCastData>(args: std::move(selected_cast)); |
99 | } |
100 | |
101 | unique_ptr<FunctionLocalState> InitToUnionLocalState(CastLocalStateParameters ¶meters) { |
102 | auto &cast_data = parameters.cast_data->Cast<ToUnionBoundCastData>(); |
103 | if (!cast_data.member_cast_info.init_local_state) { |
104 | return nullptr; |
105 | } |
106 | CastLocalStateParameters child_parameters(parameters, cast_data.member_cast_info.cast_data); |
107 | return cast_data.member_cast_info.init_local_state(child_parameters); |
108 | } |
109 | |
110 | static bool ToUnionCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { |
111 | D_ASSERT(result.GetType().id() == LogicalTypeId::UNION); |
112 | auto &cast_data = parameters.cast_data->Cast<ToUnionBoundCastData>(); |
113 | auto &selected_member_vector = UnionVector::GetMember(vector&: result, member_index: cast_data.tag); |
114 | |
115 | CastParameters child_parameters(parameters, cast_data.member_cast_info.cast_data, parameters.local_state); |
116 | if (!cast_data.member_cast_info.function(source, selected_member_vector, count, child_parameters)) { |
117 | return false; |
118 | } |
119 | |
120 | // cast succeeded, create union vector |
121 | UnionVector::SetToMember(vector&: result, tag: cast_data.tag, member_vector&: selected_member_vector, count, keep_tags_for_null: true); |
122 | |
123 | result.Verify(count); |
124 | |
125 | return true; |
126 | } |
127 | |
128 | BoundCastInfo DefaultCasts::ImplicitToUnionCast(BindCastInput &input, const LogicalType &source, |
129 | const LogicalType &target) { |
130 | return BoundCastInfo(&ToUnionCast, BindToUnionCast(input, source, target), InitToUnionLocalState); |
131 | } |
132 | |
133 | //-------------------------------------------------------------------------------------------------- |
134 | // UNION -> UNION |
135 | //-------------------------------------------------------------------------------------------------- |
136 | // if the source member tags is a subset of the target member tags, and all the source members can be |
137 | // implicitly cast to the corresponding target members, the cast is valid. |
138 | // |
139 | // VALID: UNION(A, B) -> UNION(A, B, C) |
140 | // VALID: UNION(A, B) -> UNION(A, C) if B can be implicitly cast to C |
141 | // |
142 | // INVALID: UNION(A, B, C) -> UNION(A, B) |
143 | // INVALID: UNION(A, B) -> UNION(A, C) if B can't be implicitly cast to C |
144 | // INVALID: UNION(A, B, D) -> UNION(A, B, C) |
145 | |
146 | struct UnionToUnionBoundCastData : public BoundCastData { |
147 | |
148 | // mapping from source member index to target member index |
149 | // these are always the same size as the source member count |
150 | // (since all source members must be present in the target) |
151 | vector<idx_t> tag_map; |
152 | vector<BoundCastInfo> member_casts; |
153 | |
154 | LogicalType target_type; |
155 | |
156 | UnionToUnionBoundCastData(vector<idx_t> tag_map, vector<BoundCastInfo> member_casts, LogicalType target_type) |
157 | : tag_map(std::move(tag_map)), member_casts(std::move(member_casts)), target_type(std::move(target_type)) { |
158 | } |
159 | |
160 | public: |
161 | unique_ptr<BoundCastData> Copy() const override { |
162 | vector<BoundCastInfo> member_casts_copy; |
163 | for (auto &member_cast : member_casts) { |
164 | member_casts_copy.push_back(x: member_cast.Copy()); |
165 | } |
166 | return make_uniq<UnionToUnionBoundCastData>(args: tag_map, args: std::move(member_casts_copy), args: target_type); |
167 | } |
168 | }; |
169 | |
170 | unique_ptr<BoundCastData> BindUnionToUnionCast(BindCastInput &input, const LogicalType &source, |
171 | const LogicalType &target) { |
172 | D_ASSERT(source.id() == LogicalTypeId::UNION); |
173 | D_ASSERT(target.id() == LogicalTypeId::UNION); |
174 | |
175 | auto source_member_count = UnionType::GetMemberCount(type: source); |
176 | |
177 | auto tag_map = vector<idx_t>(source_member_count); |
178 | auto member_casts = vector<BoundCastInfo>(); |
179 | |
180 | for (idx_t source_idx = 0; source_idx < source_member_count; source_idx++) { |
181 | auto &source_member_type = UnionType::GetMemberType(type: source, index: source_idx); |
182 | auto &source_member_name = UnionType::GetMemberName(type: source, index: source_idx); |
183 | |
184 | bool found = false; |
185 | for (idx_t target_idx = 0; target_idx < UnionType::GetMemberCount(type: target); target_idx++) { |
186 | auto &target_member_name = UnionType::GetMemberName(type: target, index: target_idx); |
187 | |
188 | // found a matching member |
189 | if (source_member_name == target_member_name) { |
190 | auto &target_member_type = UnionType::GetMemberType(type: target, index: target_idx); |
191 | tag_map[source_idx] = target_idx; |
192 | member_casts.push_back(x: input.GetCastFunction(source: source_member_type, target: target_member_type)); |
193 | found = true; |
194 | break; |
195 | } |
196 | } |
197 | if (!found) { |
198 | // no matching member tag found in the target set |
199 | auto message = |
200 | StringUtil::Format(fmt_str: "Type %s can't be cast as %s. The member '%s' is not present in target union" , |
201 | params: source.ToString(), params: target.ToString(), params: source_member_name); |
202 | throw CastException(message); |
203 | } |
204 | } |
205 | |
206 | return make_uniq<UnionToUnionBoundCastData>(args&: tag_map, args: std::move(member_casts), args: target); |
207 | } |
208 | |
209 | unique_ptr<FunctionLocalState> InitUnionToUnionLocalState(CastLocalStateParameters ¶meters) { |
210 | auto &cast_data = parameters.cast_data->Cast<UnionToUnionBoundCastData>(); |
211 | auto result = make_uniq<StructCastLocalState>(); |
212 | |
213 | for (auto &entry : cast_data.member_casts) { |
214 | unique_ptr<FunctionLocalState> child_state; |
215 | if (entry.init_local_state) { |
216 | CastLocalStateParameters child_params(parameters, entry.cast_data); |
217 | child_state = entry.init_local_state(child_params); |
218 | } |
219 | result->local_states.push_back(x: std::move(child_state)); |
220 | } |
221 | return std::move(result); |
222 | } |
223 | |
224 | static bool UnionToUnionCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { |
225 | auto &cast_data = parameters.cast_data->Cast<UnionToUnionBoundCastData>(); |
226 | auto &lstate = parameters.local_state->Cast<StructCastLocalState>(); |
227 | |
228 | auto source_member_count = UnionType::GetMemberCount(type: source.GetType()); |
229 | auto target_member_count = UnionType::GetMemberCount(type: result.GetType()); |
230 | |
231 | auto target_member_is_mapped = vector<bool>(target_member_count); |
232 | |
233 | // Perform the casts from source to target members |
234 | for (idx_t member_idx = 0; member_idx < source_member_count; member_idx++) { |
235 | auto target_member_idx = cast_data.tag_map[member_idx]; |
236 | |
237 | auto &source_member_vector = UnionVector::GetMember(vector&: source, member_index: member_idx); |
238 | auto &target_member_vector = UnionVector::GetMember(vector&: result, member_index: target_member_idx); |
239 | auto &member_cast = cast_data.member_casts[member_idx]; |
240 | |
241 | CastParameters child_parameters(parameters, member_cast.cast_data, lstate.local_states[member_idx]); |
242 | if (!member_cast.function(source_member_vector, target_member_vector, count, child_parameters)) { |
243 | return false; |
244 | } |
245 | |
246 | target_member_is_mapped[target_member_idx] = true; |
247 | } |
248 | |
249 | // All member casts succeeded! |
250 | |
251 | // Set the unmapped target members to constant NULL. |
252 | // If we cast UNION(A, B) -> UNION(A, B, C) we need to invalidate C so that |
253 | // the invariants of the result union hold. (only member columns "selected" |
254 | // by the rowwise corresponding tag in the tag vector should be valid) |
255 | for (idx_t target_member_idx = 0; target_member_idx < target_member_count; target_member_idx++) { |
256 | if (!target_member_is_mapped[target_member_idx]) { |
257 | auto &target_member_vector = UnionVector::GetMember(vector&: result, member_index: target_member_idx); |
258 | target_member_vector.SetVectorType(VectorType::CONSTANT_VECTOR); |
259 | ConstantVector::SetNull(vector&: target_member_vector, is_null: true); |
260 | } |
261 | } |
262 | |
263 | // Update the tags in the result vector |
264 | auto &source_tag_vector = UnionVector::GetTags(v&: source); |
265 | auto &result_tag_vector = UnionVector::GetTags(v&: result); |
266 | |
267 | if (source.GetVectorType() == VectorType::CONSTANT_VECTOR) { |
268 | // Constant vector case optimization |
269 | result.SetVectorType(VectorType::CONSTANT_VECTOR); |
270 | if (ConstantVector::IsNull(vector: source)) { |
271 | ConstantVector::SetNull(vector&: result, is_null: true); |
272 | } else { |
273 | // map the tag |
274 | auto source_tag = ConstantVector::GetData<union_tag_t>(vector&: source_tag_vector)[0]; |
275 | auto mapped_tag = cast_data.tag_map[source_tag]; |
276 | ConstantVector::GetData<union_tag_t>(vector&: result_tag_vector)[0] = mapped_tag; |
277 | } |
278 | } else { |
279 | // Otherwise, use the unified vector format to access the source vector. |
280 | |
281 | // Ensure that all the result members are flat vectors |
282 | // This is not always the case, e.g. when a member is cast using the default TryNullCast function |
283 | // the resulting member vector will be a constant null vector. |
284 | for (idx_t target_member_idx = 0; target_member_idx < target_member_count; target_member_idx++) { |
285 | UnionVector::GetMember(vector&: result, member_index: target_member_idx).Flatten(count); |
286 | } |
287 | |
288 | // We assume that a union tag vector validity matches the union vector validity. |
289 | UnifiedVectorFormat source_tag_format; |
290 | source_tag_vector.ToUnifiedFormat(count, data&: source_tag_format); |
291 | |
292 | for (idx_t row_idx = 0; row_idx < count; row_idx++) { |
293 | auto source_row_idx = source_tag_format.sel->get_index(idx: row_idx); |
294 | if (source_tag_format.validity.RowIsValid(row_idx: source_row_idx)) { |
295 | // map the tag |
296 | auto source_tag = (UnifiedVectorFormat::GetData<union_tag_t>(format: source_tag_format))[source_row_idx]; |
297 | auto target_tag = cast_data.tag_map[source_tag]; |
298 | FlatVector::GetData<union_tag_t>(vector&: result_tag_vector)[row_idx] = target_tag; |
299 | } else { |
300 | |
301 | // Issue: The members of the result is not always flatvectors |
302 | // In the case of TryNullCast, the result member is constant. |
303 | FlatVector::SetNull(vector&: result, idx: row_idx, is_null: true); |
304 | } |
305 | } |
306 | } |
307 | |
308 | result.Verify(count); |
309 | |
310 | return true; |
311 | } |
312 | |
313 | static bool UnionToVarcharCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { |
314 | auto constant = source.GetVectorType() == VectorType::CONSTANT_VECTOR; |
315 | // first cast all union members to varchar |
316 | auto &cast_data = parameters.cast_data->Cast<UnionToUnionBoundCastData>(); |
317 | Vector varchar_union(cast_data.target_type, count); |
318 | |
319 | UnionToUnionCast(source, result&: varchar_union, count, parameters); |
320 | |
321 | // now construct the actual varchar vector |
322 | varchar_union.Flatten(count); |
323 | auto &tag_vector = UnionVector::GetTags(v&: source); |
324 | auto tags = FlatVector::GetData<union_tag_t>(vector&: tag_vector); |
325 | |
326 | auto &validity = FlatVector::Validity(vector&: varchar_union); |
327 | auto result_data = FlatVector::GetData<string_t>(vector&: result); |
328 | |
329 | for (idx_t i = 0; i < count; i++) { |
330 | if (!validity.RowIsValid(row_idx: i)) { |
331 | FlatVector::SetNull(vector&: result, idx: i, is_null: true); |
332 | continue; |
333 | } |
334 | |
335 | auto &member = UnionVector::GetMember(vector&: varchar_union, member_index: tags[i]); |
336 | UnifiedVectorFormat member_vdata; |
337 | member.ToUnifiedFormat(count, data&: member_vdata); |
338 | |
339 | auto mapped_idx = member_vdata.sel->get_index(idx: i); |
340 | auto member_valid = member_vdata.validity.RowIsValid(row_idx: mapped_idx); |
341 | if (member_valid) { |
342 | auto member_str = (UnifiedVectorFormat::GetData<string_t>(format: member_vdata))[mapped_idx]; |
343 | result_data[i] = StringVector::AddString(vector&: result, data: member_str); |
344 | } else { |
345 | result_data[i] = StringVector::AddString(vector&: result, data: "NULL" ); |
346 | } |
347 | } |
348 | |
349 | if (constant) { |
350 | result.SetVectorType(VectorType::CONSTANT_VECTOR); |
351 | } |
352 | |
353 | result.Verify(count); |
354 | return true; |
355 | } |
356 | |
357 | BoundCastInfo DefaultCasts::UnionCastSwitch(BindCastInput &input, const LogicalType &source, |
358 | const LogicalType &target) { |
359 | switch (target.id()) { |
360 | case LogicalTypeId::VARCHAR: { |
361 | // bind a cast in which we convert all members to VARCHAR first |
362 | child_list_t<LogicalType> varchar_members; |
363 | for (idx_t member_idx = 0; member_idx < UnionType::GetMemberCount(type: source); member_idx++) { |
364 | varchar_members.push_back(x: make_pair(x: UnionType::GetMemberName(type: source, index: member_idx), y: LogicalType::VARCHAR)); |
365 | } |
366 | auto varchar_type = LogicalType::UNION(members: std::move(varchar_members)); |
367 | return BoundCastInfo(UnionToVarcharCast, BindUnionToUnionCast(input, source, target: varchar_type), |
368 | InitUnionToUnionLocalState); |
369 | } |
370 | case LogicalTypeId::UNION: |
371 | return BoundCastInfo(UnionToUnionCast, BindUnionToUnionCast(input, source, target), InitUnionToUnionLocalState); |
372 | default: |
373 | return TryVectorNullCast; |
374 | } |
375 | } |
376 | |
377 | } // namespace duckdb |
378 | |