1#include "catch.hpp"
2#include "duckdb/common/file_system.hpp"
3#include "dbgen.hpp"
4#include "test_helpers.hpp"
5
6using namespace duckdb;
7using namespace std;
8
9TEST_CASE("Test LEFT OUTER JOIN", "[join]") {
10 unique_ptr<QueryResult> result;
11 DuckDB db(nullptr);
12 Connection con(db);
13 con.EnableQueryVerification();
14
15 REQUIRE_NO_FAIL(con.Query("CREATE TABLE integers(i INTEGER, j INTEGER)"));
16 REQUIRE_NO_FAIL(con.Query("INSERT INTO integers VALUES (1, 2), (2, 3), (3, 4)"));
17 REQUIRE_NO_FAIL(con.Query("CREATE TABLE integers2(k INTEGER, l INTEGER)"));
18 REQUIRE_NO_FAIL(con.Query("INSERT INTO integers2 VALUES (1, 10), (2, 20)"));
19
20 result = con.Query("SELECT * FROM integers LEFT OUTER JOIN integers2 ON "
21 "integers.i=integers2.k ORDER BY i");
22 REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3}));
23 REQUIRE(CHECK_COLUMN(result, 1, {2, 3, 4}));
24 REQUIRE(CHECK_COLUMN(result, 2, {1, 2, Value()}));
25 REQUIRE(CHECK_COLUMN(result, 3, {10, 20, Value()}));
26
27 // RIGHT OUTER JOIN is just LEFT OUTER JOIN but with arguments reversed
28 // with one caveat: SELECT * will project the columns of the LHS first!
29 result = con.Query("SELECT * FROM integers2 RIGHT OUTER JOIN integers ON "
30 "integers.i=integers2.k ORDER BY i");
31 REQUIRE(CHECK_COLUMN(result, 0, {1, 2, Value()}));
32 REQUIRE(CHECK_COLUMN(result, 1, {10, 20, Value()}));
33 REQUIRE(CHECK_COLUMN(result, 2, {1, 2, 3}));
34 REQUIRE(CHECK_COLUMN(result, 3, {2, 3, 4}));
35
36 // WHERE happens AFTER the join, thus [where k IS NOT NULL] filters out any tuples with generated NULL values from
37 // the LEFT OUTER JOIN. Because of this, this join is equivalent to an inner join.
38 result = con.Query("SELECT * FROM integers LEFT OUTER JOIN integers2 ON "
39 "integers.i=integers2.k WHERE k IS NOT NULL ORDER BY i");
40 REQUIRE(CHECK_COLUMN(result, 0, {1, 2}));
41 REQUIRE(CHECK_COLUMN(result, 1, {2, 3}));
42 REQUIRE(CHECK_COLUMN(result, 2, {1, 2}));
43 REQUIRE(CHECK_COLUMN(result, 3, {10, 20}));
44
45 // however, any conditions in the ON clause happen BEFORE the join, thus the condition [integers2.k IS NOT NULL]
46 // happens BEFORE any NULL values are generated by the LEFT OUTER JOIN.
47 result = con.Query("SELECT * FROM integers LEFT OUTER JOIN integers2 ON "
48 "integers.i=integers2.k AND integers2.k IS NOT NULL ORDER BY i");
49 REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3}));
50 REQUIRE(CHECK_COLUMN(result, 1, {2, 3, 4}));
51 REQUIRE(CHECK_COLUMN(result, 2, {1, 2, Value()}));
52 REQUIRE(CHECK_COLUMN(result, 3, {10, 20, Value()}));
53
54 // filter on LHS
55 result = con.Query("SELECT * FROM integers LEFT OUTER JOIN integers2 ON i=1 ORDER BY i, k;");
56 REQUIRE(CHECK_COLUMN(result, 0, {1, 1, 2, 3}));
57 REQUIRE(CHECK_COLUMN(result, 1, {2, 2, 3, 4}));
58 REQUIRE(CHECK_COLUMN(result, 2, {1, 2, Value(), Value()}));
59 REQUIRE(CHECK_COLUMN(result, 3, {10, 20, Value(), Value()}));
60
61 // // left outer join on "true" is cross product
62 result = con.Query("SELECT * FROM integers LEFT OUTER JOIN integers2 ON 1=1 ORDER BY i, k;");
63 REQUIRE(CHECK_COLUMN(result, 0, {1, 1, 2, 2, 3, 3}));
64 REQUIRE(CHECK_COLUMN(result, 1, {2, 2, 3, 3, 4, 4}));
65 REQUIRE(CHECK_COLUMN(result, 2, {1, 2, 1, 2, 1, 2}));
66 REQUIRE(CHECK_COLUMN(result, 3, {10, 20, 10, 20, 10, 20}));
67
68 // except if RHS is empty; then it is the LHS with NULl values appended
69 result = con.Query(
70 "SELECT * FROM integers LEFT OUTER JOIN (SELECT * FROM integers2 WHERE 1<>1) tbl2 ON 1=2 ORDER BY i;");
71 REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3}));
72 REQUIRE(CHECK_COLUMN(result, 1, {2, 3, 4}));
73 REQUIRE(CHECK_COLUMN(result, 2, {Value(), Value(), Value()}));
74 REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value(), Value()}));
75
76 // left outer join on "false" gives the LHS with the RHS filled as NULL
77 result = con.Query("SELECT * FROM integers LEFT OUTER JOIN integers2 ON 1=2 ORDER BY i;");
78 REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3}));
79 REQUIRE(CHECK_COLUMN(result, 1, {2, 3, 4}));
80 REQUIRE(CHECK_COLUMN(result, 2, {Value(), Value(), Value()}));
81 REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value(), Value()}));
82
83 // left outer join on NULL constant gives the LHS with the RHS filled as null as well
84 result = con.Query("SELECT * FROM integers LEFT OUTER JOIN integers2 ON NULL<>NULL ORDER BY i;");
85 REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3}));
86 REQUIRE(CHECK_COLUMN(result, 1, {2, 3, 4}));
87 REQUIRE(CHECK_COLUMN(result, 2, {Value(), Value(), Value()}));
88 REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value(), Value()}));
89
90 // left outer join on condition that only concerns the LHS
91 result = con.Query("SELECT * FROM integers LEFT OUTER JOIN integers2 ON i=1 ORDER BY i, k;");
92 REQUIRE(CHECK_COLUMN(result, 0, {1, 1, 2, 3}));
93 REQUIRE(CHECK_COLUMN(result, 1, {2, 2, 3, 4}));
94 REQUIRE(CHECK_COLUMN(result, 2, {1, 2, Value(), Value()}));
95 REQUIRE(CHECK_COLUMN(result, 3, {10, 20, Value(), Value()}));
96
97 // // left outer join on condition that only concerns the RHS
98 result = con.Query("SELECT * FROM integers LEFT OUTER JOIN integers2 ON l=20 ORDER BY i, k;");
99 REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3}));
100 REQUIRE(CHECK_COLUMN(result, 1, {2, 3, 4}));
101 REQUIRE(CHECK_COLUMN(result, 2, {2, 2, 2}));
102 REQUIRE(CHECK_COLUMN(result, 3, {20, 20, 20}));
103
104 result = con.Query("SELECT * FROM integers LEFT OUTER JOIN integers2 ON l>0 ORDER BY i, k;");
105 REQUIRE(CHECK_COLUMN(result, 0, {1, 1, 2, 2, 3, 3}));
106 REQUIRE(CHECK_COLUMN(result, 1, {2, 2, 3, 3, 4, 4}));
107 REQUIRE(CHECK_COLUMN(result, 2, {1, 2, 1, 2, 1, 2}));
108 REQUIRE(CHECK_COLUMN(result, 3, {10, 20, 10, 20, 10, 20}));
109
110 // // left outer join on condition that affects both, but is not a simple comparison
111 result = con.Query("SELECT * FROM integers LEFT OUTER JOIN integers2 ON i=1 OR l=20 ORDER BY i, k;");
112 REQUIRE(CHECK_COLUMN(result, 0, {1, 1, 2, 3}));
113 REQUIRE(CHECK_COLUMN(result, 1, {2, 2, 3, 4}));
114 REQUIRE(CHECK_COLUMN(result, 2, {1, 2, 2, 2}));
115 REQUIRE(CHECK_COLUMN(result, 3, {10, 20, 20, 20}));
116
117 result = con.Query("SELECT * FROM integers LEFT OUTER JOIN integers2 ON i=4 OR l=17 ORDER BY i;");
118 REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3}));
119 REQUIRE(CHECK_COLUMN(result, 1, {2, 3, 4}));
120 REQUIRE(CHECK_COLUMN(result, 2, {Value(), Value(), Value()}));
121 REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value(), Value()}));
122
123 result = con.Query("SELECT * FROM integers LEFT OUTER JOIN integers2 ON i+l=21 ORDER BY i;");
124 REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3}));
125 REQUIRE(CHECK_COLUMN(result, 1, {2, 3, 4}));
126 REQUIRE(CHECK_COLUMN(result, 2, {2, Value(), Value()}));
127 REQUIRE(CHECK_COLUMN(result, 3, {20, Value(), Value()}));
128
129 result = con.Query("SELECT * FROM integers LEFT OUTER JOIN integers2 ON i+l>12 ORDER BY i, k;");
130 REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3, 3}));
131 REQUIRE(CHECK_COLUMN(result, 1, {2, 3, 4, 4}));
132 REQUIRE(CHECK_COLUMN(result, 2, {2, 2, 1, 2}));
133 REQUIRE(CHECK_COLUMN(result, 3, {20, 20, 10, 20}));
134
135 // range join
136 result = con.Query(
137 "SELECT * FROM integers LEFT OUTER JOIN integers2 ON integers.i<integers2.k WHERE integers.i <= 2 ORDER BY i");
138 REQUIRE(CHECK_COLUMN(result, 0, {1, 2}));
139 REQUIRE(CHECK_COLUMN(result, 1, {2, 3}));
140 REQUIRE(CHECK_COLUMN(result, 2, {2, Value()}));
141 REQUIRE(CHECK_COLUMN(result, 3, {20, Value()}));
142
143 // multiple conditions
144 result = con.Query("SELECT * FROM integers LEFT OUTER JOIN integers2 ON integers.i<integers2.k AND "
145 "integers.i<integers2.l WHERE integers.i <= 2 ORDER BY i");
146 REQUIRE(CHECK_COLUMN(result, 0, {1, 2}));
147 REQUIRE(CHECK_COLUMN(result, 1, {2, 3}));
148 REQUIRE(CHECK_COLUMN(result, 2, {2, Value()}));
149 REQUIRE(CHECK_COLUMN(result, 3, {20, Value()}));
150
151 result = con.Query("SELECT * FROM integers LEFT OUTER JOIN (SELECT * FROM integers2 WHERE k=100) integers2 ON "
152 "integers.i<integers2.k ORDER BY i");
153 REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3}));
154 REQUIRE(CHECK_COLUMN(result, 1, {2, 3, 4}));
155 REQUIRE(CHECK_COLUMN(result, 2, {Value(), Value(), Value()}));
156 REQUIRE(CHECK_COLUMN(result, 3, {Value(), Value(), Value()}));
157}
158