1 | #include "catch.hpp" |
2 | #include "test_helpers.hpp" |
3 | |
4 | using namespace duckdb; |
5 | using namespace std; |
6 | |
7 | /* Test Case disclaimer |
8 | * |
9 | * Assertions built using the Domain Testing technique |
10 | * at: https://bbst.courses/wp-content/uploads/2018/01/Kaner-Intro-to-Domain-Testing-2018.pdf |
11 | * |
12 | */ |
13 | TEST_CASE("Instr test" , "[function]" ) { |
14 | unique_ptr<QueryResult> result; |
15 | DuckDB db(nullptr); |
16 | Connection con(db); |
17 | con.EnableQueryVerification(); |
18 | |
19 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE strings(s VARCHAR, off INTEGER, length INTEGER);" )); |
20 | REQUIRE_NO_FAIL(con.Query("INSERT INTO strings VALUES ('hello', 1, 2), " |
21 | "('world', 2, 3), ('b', 1, 1), (NULL, 2, 2)" )); |
22 | |
23 | // Test first letter |
24 | result = con.Query("SELECT instr(s,'h') FROM strings" ); |
25 | REQUIRE(CHECK_COLUMN(result, 0, {1, 0, 0, Value()})); |
26 | |
27 | // Test second letter |
28 | result = con.Query("SELECT instr(s,'e') FROM strings" ); |
29 | REQUIRE(CHECK_COLUMN(result, 0, {2, 0, 0, Value()})); |
30 | |
31 | // Test last letter |
32 | result = con.Query("SELECT instr(s,'d') FROM strings" ); |
33 | REQUIRE(CHECK_COLUMN(result, 0, {0, 5, 0, Value()})); |
34 | |
35 | // Test multiple letters |
36 | result = con.Query("SELECT instr(s,'he') FROM strings" ); |
37 | REQUIRE(CHECK_COLUMN(result, 0, {1, 0, 0, Value()})); |
38 | |
39 | // Test multiple letters in the middle |
40 | result = con.Query("SELECT instr(s,'ello') FROM strings" ); |
41 | REQUIRE(CHECK_COLUMN(result, 0, {2, 0, 0, Value()})); |
42 | |
43 | // Test multiple letters at the end |
44 | result = con.Query("SELECT instr(s,'lo') FROM strings" ); |
45 | REQUIRE(CHECK_COLUMN(result, 0, {4, 0, 0, Value()})); |
46 | |
47 | // Test no match |
48 | result = con.Query("SELECT instr(s,'he-man') FROM strings" ); |
49 | REQUIRE(CHECK_COLUMN(result, 0, {0, 0, 0, Value()})); |
50 | |
51 | // Test matching needle in multiple rows |
52 | result = con.Query("SELECT instr(s,'o'),s FROM strings" ); |
53 | REQUIRE(CHECK_COLUMN(result, 0, {5, 2, 0, Value()})); |
54 | REQUIRE(CHECK_COLUMN(result, 1, {"hello" , "world" , "b" , Value()})); |
55 | |
56 | // Test NULL constant in different places |
57 | result = con.Query("SELECT instr(NULL,'o') FROM strings" ); |
58 | REQUIRE(CHECK_COLUMN(result, 0, {Value(), Value(), Value(), Value()})); |
59 | result = con.Query("SELECT instr(s,NULL) FROM strings" ); |
60 | REQUIRE(CHECK_COLUMN(result, 0, {Value(), Value(), Value(), Value()})); |
61 | result = con.Query("SELECT instr(NULL,NULL) FROM strings" ); |
62 | REQUIRE(CHECK_COLUMN(result, 0, {Value(), Value(), Value(), Value()})); |
63 | } |
64 | |
65 | /* Inspired by the substring test case and C language UTF-8 tests |
66 | * |
67 | */ |
68 | TEST_CASE("Instr test with UTF8" , "[function]" ) { |
69 | unique_ptr<QueryResult> result; |
70 | DuckDB db(nullptr); |
71 | Connection con(db); |
72 | con.EnableQueryVerification(); |
73 | string atomo = "\xc3\xa1tomo" ; // length 6 |
74 | string portg = "ol\xc3\xa1 mundo" ; // olá mundo length 9 |
75 | string nihao = "\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xb8\x96\xe7\x95\x8c" ; //你好世界 length 4 |
76 | string potpourri = "two \xc3\xb1 three \xE2\x82\xA1 four \xF0\x9F\xA6\x86 end" ; |
77 | |
78 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE strings(s VARCHAR);" )); |
79 | REQUIRE_NO_FAIL(con.Query("INSERT INTO strings VALUES ('" + atomo + "')" )); |
80 | REQUIRE_NO_FAIL(con.Query("INSERT INTO strings VALUES ('" + portg + "')" )); |
81 | REQUIRE_NO_FAIL(con.Query("INSERT INTO strings VALUES ('" + nihao + "')" )); |
82 | REQUIRE_NO_FAIL(con.Query("INSERT INTO strings VALUES ('" + potpourri + "')" )); |
83 | |
84 | // Test one matching UTF8 letter |
85 | result = con.Query("SELECT INSTR(s,'\xc3\xa1') FROM strings" ); |
86 | REQUIRE(CHECK_COLUMN(result, 0, {1, 3, 0, 0})); |
87 | |
88 | // Test a sentence with an UTF-8 |
89 | result = con.Query("SELECT INSTR(s,'ol\xc3\xa1 mundo') FROM strings" ); |
90 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1, 0, 0})); |
91 | |
92 | // Test an entire UTF-8 word |
93 | result = con.Query("SELECT INSTR(s,'\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xb8\x96\xe7\x95\x8c') FROM strings" ); |
94 | REQUIRE(CHECK_COLUMN(result, 0, {0, 0, 1, 0})); |
95 | |
96 | // Test a substring of the haystack from the beginning |
97 | result = con.Query("SELECT instr(s,'two \xc3\xb1 thr') FROM strings" ); |
98 | REQUIRE(CHECK_COLUMN(result, 0, {0, 0, 0, 1})); |
99 | |
100 | // Test a single UTF8 substring of the haystack in the middle |
101 | result = con.Query("SELECT instr(s,'\xc3\xb1') FROM strings" ); |
102 | REQUIRE(CHECK_COLUMN(result, 0, {0, 0, 0, 5})); |
103 | |
104 | // Test a multiple UTF8 substring of the haystack in the middle |
105 | result = con.Query("SELECT instr(s,'\xE2\x82\xA1 four \xF0\x9F\xA6\x86 e') FROM strings" ); |
106 | REQUIRE(CHECK_COLUMN(result, 0, {0, 0, 0, 13})); |
107 | |
108 | // Test a substring of the haystack from the middle to the end |
109 | result = con.Query("SELECT instr(s,'\xF0\x9F\xA6\x86 end') FROM strings" ); |
110 | REQUIRE(CHECK_COLUMN(result, 0, {0, 0, 0, 20})); |
111 | } |
112 | |