1#include "duckdb/parser/query_error_context.hpp"
2#include "duckdb/parser/sql_statement.hpp"
3#include "duckdb/common/string_util.hpp"
4#include "duckdb/common/to_string.hpp"
5
6#include "utf8proc_wrapper.hpp"
7
8namespace duckdb {
9
10string QueryErrorContext::Format(const string &query, const string &error_message, int error_loc) {
11 if (error_loc < 0 || size_t(error_loc) >= query.size()) {
12 // no location in query provided
13 return error_message;
14 }
15 idx_t error_location = idx_t(error_loc);
16 // count the line numbers until the error location
17 // and set the start position as the first character of that line
18 idx_t start_pos = 0;
19 idx_t line_number = 1;
20 for (idx_t i = 0; i < error_location; i++) {
21 if (StringUtil::CharacterIsNewline(c: query[i])) {
22 line_number++;
23 start_pos = i + 1;
24 }
25 }
26 // now find either the next newline token after the query, or find the end of string
27 // this is the initial end position
28 idx_t end_pos = query.size();
29 for (idx_t i = error_location; i < query.size(); i++) {
30 if (StringUtil::CharacterIsNewline(c: query[i])) {
31 end_pos = i;
32 break;
33 }
34 }
35 // now start scanning from the start pos
36 // we want to figure out the start and end pos of what we are going to render
37 // we want to render at most 80 characters in total, with the error_location located in the middle
38 const char *buf = query.c_str() + start_pos;
39 idx_t len = end_pos - start_pos;
40 vector<idx_t> render_widths;
41 vector<idx_t> positions;
42 if (Utf8Proc::IsValid(s: buf, len)) {
43 // for unicode awareness, we traverse the graphemes of the current line and keep track of their render widths
44 // and of their position in the string
45 for (idx_t cpos = 0; cpos < len;) {
46 auto char_render_width = Utf8Proc::RenderWidth(s: buf, len, pos: cpos);
47 positions.push_back(x: cpos);
48 render_widths.push_back(x: char_render_width);
49 cpos = Utf8Proc::NextGraphemeCluster(s: buf, len, pos: cpos);
50 }
51 } else { // LCOV_EXCL_START
52 // invalid utf-8, we can't do much at this point
53 // we just assume every character is a character, and every character has a render width of 1
54 for (idx_t cpos = 0; cpos < len; cpos++) {
55 positions.push_back(x: cpos);
56 render_widths.push_back(x: 1);
57 }
58 } // LCOV_EXCL_STOP
59 // now we want to find the (unicode aware) start and end position
60 idx_t epos = 0;
61 // start by finding the error location inside the array
62 for (idx_t i = 0; i < positions.size(); i++) {
63 if (positions[i] >= (error_location - start_pos)) {
64 epos = i;
65 break;
66 }
67 }
68 bool truncate_beginning = false;
69 bool truncate_end = false;
70 idx_t spos = 0;
71 // now we iterate backwards from the error location
72 // we show max 40 render width before the error location
73 idx_t current_render_width = 0;
74 for (idx_t i = epos; i > 0; i--) {
75 current_render_width += render_widths[i];
76 if (current_render_width >= 40) {
77 truncate_beginning = true;
78 start_pos = positions[i];
79 spos = i;
80 break;
81 }
82 }
83 // now do the same, but going forward
84 current_render_width = 0;
85 for (idx_t i = epos; i < positions.size(); i++) {
86 current_render_width += render_widths[i];
87 if (current_render_width >= 40) {
88 truncate_end = true;
89 end_pos = positions[i];
90 break;
91 }
92 }
93 string line_indicator = "LINE " + to_string(val: line_number) + ": ";
94 string begin_trunc = truncate_beginning ? "..." : "";
95 string end_trunc = truncate_end ? "..." : "";
96
97 // get the render width of the error indicator (i.e. how many spaces we need to insert before the ^)
98 idx_t error_render_width = 0;
99 for (idx_t i = spos; i < epos; i++) {
100 error_render_width += render_widths[i];
101 }
102 error_render_width += line_indicator.size() + begin_trunc.size();
103
104 // now first print the error message plus the current line (or a subset of the line)
105 string result = error_message;
106 result += "\n" + line_indicator + begin_trunc + query.substr(pos: start_pos, n: end_pos - start_pos) + end_trunc;
107 // print an arrow pointing at the error location
108 result += "\n" + string(error_render_width, ' ') + "^";
109 return result;
110}
111
112string QueryErrorContext::FormatErrorRecursive(const string &msg, vector<ExceptionFormatValue> &values) {
113 string error_message = values.empty() ? msg : ExceptionFormatValue::Format(msg, values);
114 if (!statement || query_location >= statement->query.size()) {
115 // no statement provided or query location out of range
116 return error_message;
117 }
118 return Format(query: statement->query, error_message, error_loc: query_location);
119}
120
121} // namespace duckdb
122