1 | #include "duckdb/parser/query_error_context.hpp" |
2 | #include "duckdb/parser/sql_statement.hpp" |
3 | #include "duckdb/common/string_util.hpp" |
4 | #include "duckdb/common/to_string.hpp" |
5 | |
6 | #include "utf8proc_wrapper.hpp" |
7 | |
8 | namespace duckdb { |
9 | |
10 | string QueryErrorContext::Format(const string &query, const string &error_message, int error_loc) { |
11 | if (error_loc < 0 || size_t(error_loc) >= query.size()) { |
12 | // no location in query provided |
13 | return error_message; |
14 | } |
15 | idx_t error_location = idx_t(error_loc); |
16 | // count the line numbers until the error location |
17 | // and set the start position as the first character of that line |
18 | idx_t start_pos = 0; |
19 | idx_t line_number = 1; |
20 | for (idx_t i = 0; i < error_location; i++) { |
21 | if (StringUtil::CharacterIsNewline(c: query[i])) { |
22 | line_number++; |
23 | start_pos = i + 1; |
24 | } |
25 | } |
26 | // now find either the next newline token after the query, or find the end of string |
27 | // this is the initial end position |
28 | idx_t end_pos = query.size(); |
29 | for (idx_t i = error_location; i < query.size(); i++) { |
30 | if (StringUtil::CharacterIsNewline(c: query[i])) { |
31 | end_pos = i; |
32 | break; |
33 | } |
34 | } |
35 | // now start scanning from the start pos |
36 | // we want to figure out the start and end pos of what we are going to render |
37 | // we want to render at most 80 characters in total, with the error_location located in the middle |
38 | const char *buf = query.c_str() + start_pos; |
39 | idx_t len = end_pos - start_pos; |
40 | vector<idx_t> render_widths; |
41 | vector<idx_t> positions; |
42 | if (Utf8Proc::IsValid(s: buf, len)) { |
43 | // for unicode awareness, we traverse the graphemes of the current line and keep track of their render widths |
44 | // and of their position in the string |
45 | for (idx_t cpos = 0; cpos < len;) { |
46 | auto char_render_width = Utf8Proc::RenderWidth(s: buf, len, pos: cpos); |
47 | positions.push_back(x: cpos); |
48 | render_widths.push_back(x: char_render_width); |
49 | cpos = Utf8Proc::NextGraphemeCluster(s: buf, len, pos: cpos); |
50 | } |
51 | } else { // LCOV_EXCL_START |
52 | // invalid utf-8, we can't do much at this point |
53 | // we just assume every character is a character, and every character has a render width of 1 |
54 | for (idx_t cpos = 0; cpos < len; cpos++) { |
55 | positions.push_back(x: cpos); |
56 | render_widths.push_back(x: 1); |
57 | } |
58 | } // LCOV_EXCL_STOP |
59 | // now we want to find the (unicode aware) start and end position |
60 | idx_t epos = 0; |
61 | // start by finding the error location inside the array |
62 | for (idx_t i = 0; i < positions.size(); i++) { |
63 | if (positions[i] >= (error_location - start_pos)) { |
64 | epos = i; |
65 | break; |
66 | } |
67 | } |
68 | bool truncate_beginning = false; |
69 | bool truncate_end = false; |
70 | idx_t spos = 0; |
71 | // now we iterate backwards from the error location |
72 | // we show max 40 render width before the error location |
73 | idx_t current_render_width = 0; |
74 | for (idx_t i = epos; i > 0; i--) { |
75 | current_render_width += render_widths[i]; |
76 | if (current_render_width >= 40) { |
77 | truncate_beginning = true; |
78 | start_pos = positions[i]; |
79 | spos = i; |
80 | break; |
81 | } |
82 | } |
83 | // now do the same, but going forward |
84 | current_render_width = 0; |
85 | for (idx_t i = epos; i < positions.size(); i++) { |
86 | current_render_width += render_widths[i]; |
87 | if (current_render_width >= 40) { |
88 | truncate_end = true; |
89 | end_pos = positions[i]; |
90 | break; |
91 | } |
92 | } |
93 | string line_indicator = "LINE " + to_string(val: line_number) + ": " ; |
94 | string begin_trunc = truncate_beginning ? "..." : "" ; |
95 | string end_trunc = truncate_end ? "..." : "" ; |
96 | |
97 | // get the render width of the error indicator (i.e. how many spaces we need to insert before the ^) |
98 | idx_t error_render_width = 0; |
99 | for (idx_t i = spos; i < epos; i++) { |
100 | error_render_width += render_widths[i]; |
101 | } |
102 | error_render_width += line_indicator.size() + begin_trunc.size(); |
103 | |
104 | // now first print the error message plus the current line (or a subset of the line) |
105 | string result = error_message; |
106 | result += "\n" + line_indicator + begin_trunc + query.substr(pos: start_pos, n: end_pos - start_pos) + end_trunc; |
107 | // print an arrow pointing at the error location |
108 | result += "\n" + string(error_render_width, ' ') + "^" ; |
109 | return result; |
110 | } |
111 | |
112 | string QueryErrorContext::FormatErrorRecursive(const string &msg, vector<ExceptionFormatValue> &values) { |
113 | string error_message = values.empty() ? msg : ExceptionFormatValue::Format(msg, values); |
114 | if (!statement || query_location >= statement->query.size()) { |
115 | // no statement provided or query location out of range |
116 | return error_message; |
117 | } |
118 | return Format(query: statement->query, error_message, error_loc: query_location); |
119 | } |
120 | |
121 | } // namespace duckdb |
122 | |