| 1 | #include "duckdb/parser/query_error_context.hpp" |
| 2 | #include "duckdb/parser/sql_statement.hpp" |
| 3 | #include "duckdb/common/string_util.hpp" |
| 4 | #include "duckdb/common/to_string.hpp" |
| 5 | |
| 6 | #include "utf8proc_wrapper.hpp" |
| 7 | |
| 8 | namespace duckdb { |
| 9 | |
| 10 | string QueryErrorContext::Format(const string &query, const string &error_message, int error_loc) { |
| 11 | if (error_loc < 0 || size_t(error_loc) >= query.size()) { |
| 12 | // no location in query provided |
| 13 | return error_message; |
| 14 | } |
| 15 | idx_t error_location = idx_t(error_loc); |
| 16 | // count the line numbers until the error location |
| 17 | // and set the start position as the first character of that line |
| 18 | idx_t start_pos = 0; |
| 19 | idx_t line_number = 1; |
| 20 | for (idx_t i = 0; i < error_location; i++) { |
| 21 | if (StringUtil::CharacterIsNewline(c: query[i])) { |
| 22 | line_number++; |
| 23 | start_pos = i + 1; |
| 24 | } |
| 25 | } |
| 26 | // now find either the next newline token after the query, or find the end of string |
| 27 | // this is the initial end position |
| 28 | idx_t end_pos = query.size(); |
| 29 | for (idx_t i = error_location; i < query.size(); i++) { |
| 30 | if (StringUtil::CharacterIsNewline(c: query[i])) { |
| 31 | end_pos = i; |
| 32 | break; |
| 33 | } |
| 34 | } |
| 35 | // now start scanning from the start pos |
| 36 | // we want to figure out the start and end pos of what we are going to render |
| 37 | // we want to render at most 80 characters in total, with the error_location located in the middle |
| 38 | const char *buf = query.c_str() + start_pos; |
| 39 | idx_t len = end_pos - start_pos; |
| 40 | vector<idx_t> render_widths; |
| 41 | vector<idx_t> positions; |
| 42 | if (Utf8Proc::IsValid(s: buf, len)) { |
| 43 | // for unicode awareness, we traverse the graphemes of the current line and keep track of their render widths |
| 44 | // and of their position in the string |
| 45 | for (idx_t cpos = 0; cpos < len;) { |
| 46 | auto char_render_width = Utf8Proc::RenderWidth(s: buf, len, pos: cpos); |
| 47 | positions.push_back(x: cpos); |
| 48 | render_widths.push_back(x: char_render_width); |
| 49 | cpos = Utf8Proc::NextGraphemeCluster(s: buf, len, pos: cpos); |
| 50 | } |
| 51 | } else { // LCOV_EXCL_START |
| 52 | // invalid utf-8, we can't do much at this point |
| 53 | // we just assume every character is a character, and every character has a render width of 1 |
| 54 | for (idx_t cpos = 0; cpos < len; cpos++) { |
| 55 | positions.push_back(x: cpos); |
| 56 | render_widths.push_back(x: 1); |
| 57 | } |
| 58 | } // LCOV_EXCL_STOP |
| 59 | // now we want to find the (unicode aware) start and end position |
| 60 | idx_t epos = 0; |
| 61 | // start by finding the error location inside the array |
| 62 | for (idx_t i = 0; i < positions.size(); i++) { |
| 63 | if (positions[i] >= (error_location - start_pos)) { |
| 64 | epos = i; |
| 65 | break; |
| 66 | } |
| 67 | } |
| 68 | bool truncate_beginning = false; |
| 69 | bool truncate_end = false; |
| 70 | idx_t spos = 0; |
| 71 | // now we iterate backwards from the error location |
| 72 | // we show max 40 render width before the error location |
| 73 | idx_t current_render_width = 0; |
| 74 | for (idx_t i = epos; i > 0; i--) { |
| 75 | current_render_width += render_widths[i]; |
| 76 | if (current_render_width >= 40) { |
| 77 | truncate_beginning = true; |
| 78 | start_pos = positions[i]; |
| 79 | spos = i; |
| 80 | break; |
| 81 | } |
| 82 | } |
| 83 | // now do the same, but going forward |
| 84 | current_render_width = 0; |
| 85 | for (idx_t i = epos; i < positions.size(); i++) { |
| 86 | current_render_width += render_widths[i]; |
| 87 | if (current_render_width >= 40) { |
| 88 | truncate_end = true; |
| 89 | end_pos = positions[i]; |
| 90 | break; |
| 91 | } |
| 92 | } |
| 93 | string line_indicator = "LINE " + to_string(val: line_number) + ": " ; |
| 94 | string begin_trunc = truncate_beginning ? "..." : "" ; |
| 95 | string end_trunc = truncate_end ? "..." : "" ; |
| 96 | |
| 97 | // get the render width of the error indicator (i.e. how many spaces we need to insert before the ^) |
| 98 | idx_t error_render_width = 0; |
| 99 | for (idx_t i = spos; i < epos; i++) { |
| 100 | error_render_width += render_widths[i]; |
| 101 | } |
| 102 | error_render_width += line_indicator.size() + begin_trunc.size(); |
| 103 | |
| 104 | // now first print the error message plus the current line (or a subset of the line) |
| 105 | string result = error_message; |
| 106 | result += "\n" + line_indicator + begin_trunc + query.substr(pos: start_pos, n: end_pos - start_pos) + end_trunc; |
| 107 | // print an arrow pointing at the error location |
| 108 | result += "\n" + string(error_render_width, ' ') + "^" ; |
| 109 | return result; |
| 110 | } |
| 111 | |
| 112 | string QueryErrorContext::FormatErrorRecursive(const string &msg, vector<ExceptionFormatValue> &values) { |
| 113 | string error_message = values.empty() ? msg : ExceptionFormatValue::Format(msg, values); |
| 114 | if (!statement || query_location >= statement->query.size()) { |
| 115 | // no statement provided or query location out of range |
| 116 | return error_message; |
| 117 | } |
| 118 | return Format(query: statement->query, error_message, error_loc: query_location); |
| 119 | } |
| 120 | |
| 121 | } // namespace duckdb |
| 122 | |