1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include <cstddef>
19#include <cstdint>
20#include <iostream>
21#include <memory>
22#include <sstream> // IWYU pragma: keep
23#include <string>
24#include <type_traits>
25#include <vector>
26
27#include "arrow/array.h"
28#include "arrow/pretty_print.h"
29#include "arrow/record_batch.h"
30#include "arrow/status.h"
31#include "arrow/table.h"
32#include "arrow/type.h"
33#include "arrow/type_traits.h"
34#include "arrow/util/checked_cast.h"
35#include "arrow/util/string.h"
36#include "arrow/visitor_inline.h"
37
38namespace arrow {
39
40using internal::checked_cast;
41
42class PrettyPrinter {
43 public:
44 PrettyPrinter(int indent, int indent_size, int window, bool skip_new_lines,
45 std::ostream* sink)
46 : indent_(indent),
47 indent_size_(indent_size),
48 window_(window),
49 skip_new_lines_(skip_new_lines),
50 sink_(sink) {}
51
52 void Write(const char* data);
53 void Write(const std::string& data);
54 void WriteIndented(const char* data);
55 void WriteIndented(const std::string& data);
56 void Newline();
57 void Indent();
58 void OpenArray(const Array& array);
59 void CloseArray(const Array& array);
60
61 void Flush() { (*sink_) << std::flush; }
62
63 protected:
64 int indent_;
65 int indent_size_;
66 int window_;
67 bool skip_new_lines_;
68 std::ostream* sink_;
69};
70
71void PrettyPrinter::OpenArray(const Array& array) {
72 Indent();
73 (*sink_) << "[";
74 if (array.length() > 0) {
75 (*sink_) << "\n";
76 indent_ += indent_size_;
77 }
78}
79
80void PrettyPrinter::CloseArray(const Array& array) {
81 if (array.length() > 0) {
82 indent_ -= indent_size_;
83 Indent();
84 }
85 (*sink_) << "]";
86}
87
88void PrettyPrinter::Write(const char* data) { (*sink_) << data; }
89void PrettyPrinter::Write(const std::string& data) { (*sink_) << data; }
90
91void PrettyPrinter::WriteIndented(const char* data) {
92 Indent();
93 Write(data);
94}
95
96void PrettyPrinter::WriteIndented(const std::string& data) {
97 Indent();
98 Write(data);
99}
100
101void PrettyPrinter::Newline() {
102 if (skip_new_lines_) {
103 return;
104 }
105 (*sink_) << "\n";
106 Indent();
107}
108
109void PrettyPrinter::Indent() {
110 for (int i = 0; i < indent_; ++i) {
111 (*sink_) << " ";
112 }
113}
114
115class ArrayPrinter : public PrettyPrinter {
116 public:
117 ArrayPrinter(const Array& array, int indent, int indent_size, int window,
118 const std::string& null_rep, bool skip_new_lines, std::ostream* sink)
119 : PrettyPrinter(indent, indent_size, window, skip_new_lines, sink),
120 array_(array),
121 null_rep_(null_rep) {}
122
123 template <typename FormatFunction>
124 void WriteValues(const Array& array, FormatFunction&& func) {
125 bool skip_comma = true;
126 for (int64_t i = 0; i < array.length(); ++i) {
127 if (skip_comma) {
128 skip_comma = false;
129 } else {
130 (*sink_) << ",\n";
131 }
132 Indent();
133 if ((i >= window_) && (i < (array.length() - window_))) {
134 (*sink_) << "...\n";
135 i = array.length() - window_ - 1;
136 skip_comma = true;
137 } else if (array.IsNull(i)) {
138 (*sink_) << null_rep_;
139 } else {
140 func(i);
141 }
142 }
143 (*sink_) << "\n";
144 }
145
146 template <typename T>
147 inline typename std::enable_if<IsInteger<T>::value, Status>::type WriteDataValues(
148 const T& array) {
149 const auto data = array.raw_values();
150 WriteValues(array, [&](int64_t i) { (*sink_) << static_cast<int64_t>(data[i]); });
151 return Status::OK();
152 }
153
154 template <typename T>
155 inline typename std::enable_if<IsFloatingPoint<T>::value, Status>::type WriteDataValues(
156 const T& array) {
157 const auto data = array.raw_values();
158 WriteValues(array, [&](int64_t i) { (*sink_) << data[i]; });
159 return Status::OK();
160 }
161
162 // String (Utf8)
163 template <typename T>
164 inline typename std::enable_if<std::is_same<StringArray, T>::value, Status>::type
165 WriteDataValues(const T& array) {
166 WriteValues(array, [&](int64_t i) { (*sink_) << "\"" << array.GetView(i) << "\""; });
167 return Status::OK();
168 }
169
170 // Binary
171 template <typename T>
172 inline typename std::enable_if<std::is_same<BinaryArray, T>::value, Status>::type
173 WriteDataValues(const T& array) {
174 WriteValues(array, [&](int64_t i) { (*sink_) << HexEncode(array.GetView(i)); });
175 return Status::OK();
176 }
177
178 template <typename T>
179 inline
180 typename std::enable_if<std::is_same<FixedSizeBinaryArray, T>::value, Status>::type
181 WriteDataValues(const T& array) {
182 WriteValues(array, [&](int64_t i) { (*sink_) << HexEncode(array.GetView(i)); });
183 return Status::OK();
184 }
185
186 template <typename T>
187 inline typename std::enable_if<std::is_same<Decimal128Array, T>::value, Status>::type
188 WriteDataValues(const T& array) {
189 WriteValues(array, [&](int64_t i) { (*sink_) << array.FormatValue(i); });
190 return Status::OK();
191 }
192
193 template <typename T>
194 inline typename std::enable_if<std::is_base_of<BooleanArray, T>::value, Status>::type
195 WriteDataValues(const T& array) {
196 WriteValues(array, [&](int64_t i) { Write(array.Value(i) ? "true" : "false"); });
197 return Status::OK();
198 }
199
200 template <typename T>
201 inline typename std::enable_if<std::is_base_of<ListArray, T>::value, Status>::type
202 WriteDataValues(const T& array) {
203 bool skip_comma = true;
204 for (int64_t i = 0; i < array.length(); ++i) {
205 if (skip_comma) {
206 skip_comma = false;
207 } else {
208 (*sink_) << ",\n";
209 }
210 if ((i >= window_) && (i < (array.length() - window_))) {
211 Indent();
212 (*sink_) << "...\n";
213 i = array.length() - window_ - 1;
214 skip_comma = true;
215 } else if (array.IsNull(i)) {
216 Indent();
217 (*sink_) << null_rep_;
218 } else {
219 std::shared_ptr<Array> slice =
220 array.values()->Slice(array.value_offset(i), array.value_length(i));
221 RETURN_NOT_OK(PrettyPrint(*slice, {indent_, window_}, sink_));
222 }
223 }
224 (*sink_) << "\n";
225 return Status::OK();
226 }
227
228 Status Visit(const NullArray& array) {
229 (*sink_) << array.length() << " nulls";
230 return Status::OK();
231 }
232
233 template <typename T>
234 typename std::enable_if<std::is_base_of<PrimitiveArray, T>::value ||
235 std::is_base_of<FixedSizeBinaryArray, T>::value ||
236 std::is_base_of<BinaryArray, T>::value ||
237 std::is_base_of<ListArray, T>::value,
238 Status>::type
239 Visit(const T& array) {
240 OpenArray(array);
241 if (array.length() > 0) {
242 RETURN_NOT_OK(WriteDataValues(array));
243 }
244 CloseArray(array);
245 return Status::OK();
246 }
247
248 Status Visit(const IntervalArray&) { return Status::NotImplemented("interval"); }
249
250 Status WriteValidityBitmap(const Array& array);
251
252 Status PrintChildren(const std::vector<std::shared_ptr<Array>>& fields, int64_t offset,
253 int64_t length) {
254 for (size_t i = 0; i < fields.size(); ++i) {
255 Newline();
256 std::stringstream ss;
257 ss << "-- child " << i << " type: " << fields[i]->type()->ToString() << "\n";
258 Write(ss.str());
259
260 std::shared_ptr<Array> field = fields[i];
261 if (offset != 0) {
262 field = field->Slice(offset, length);
263 }
264
265 RETURN_NOT_OK(PrettyPrint(*field, indent_ + indent_size_, sink_));
266 }
267 return Status::OK();
268 }
269
270 Status Visit(const StructArray& array) {
271 RETURN_NOT_OK(WriteValidityBitmap(array));
272 std::vector<std::shared_ptr<Array>> children;
273 children.reserve(array.num_fields());
274 for (int i = 0; i < array.num_fields(); ++i) {
275 children.emplace_back(array.field(i));
276 }
277 return PrintChildren(children, 0, array.length());
278 }
279
280 Status Visit(const UnionArray& array) {
281 RETURN_NOT_OK(WriteValidityBitmap(array));
282
283 Newline();
284 Write("-- type_ids: ");
285 UInt8Array type_ids(array.length(), array.type_ids(), nullptr, 0, array.offset());
286 RETURN_NOT_OK(PrettyPrint(type_ids, indent_ + indent_size_, sink_));
287
288 if (array.mode() == UnionMode::DENSE) {
289 Newline();
290 Write("-- value_offsets: ");
291 Int32Array value_offsets(array.length(), array.value_offsets(), nullptr, 0,
292 array.offset());
293 RETURN_NOT_OK(PrettyPrint(value_offsets, indent_ + indent_size_, sink_));
294 }
295
296 // Print the children without any offset, because the type ids are absolute
297 std::vector<std::shared_ptr<Array>> children;
298 children.reserve(array.num_fields());
299 for (int i = 0; i < array.num_fields(); ++i) {
300 children.emplace_back(array.child(i));
301 }
302 return PrintChildren(children, 0, array.length() + array.offset());
303 }
304
305 Status Visit(const DictionaryArray& array) {
306 Newline();
307 Write("-- dictionary:\n");
308 RETURN_NOT_OK(PrettyPrint(*array.dictionary(), indent_ + indent_size_, sink_));
309
310 Newline();
311 Write("-- indices:\n");
312 return PrettyPrint(*array.indices(), indent_ + indent_size_, sink_);
313 }
314
315 Status Print() {
316 RETURN_NOT_OK(VisitArrayInline(array_, this));
317 Flush();
318 return Status::OK();
319 }
320
321 private:
322 const Array& array_;
323 std::string null_rep_;
324};
325
326Status ArrayPrinter::WriteValidityBitmap(const Array& array) {
327 Indent();
328 Write("-- is_valid:");
329
330 if (array.null_count() > 0) {
331 Newline();
332 BooleanArray is_valid(array.length(), array.null_bitmap(), nullptr, 0,
333 array.offset());
334 return PrettyPrint(is_valid, indent_ + indent_size_, sink_);
335 } else {
336 Write(" all not null");
337 return Status::OK();
338 }
339}
340
341Status PrettyPrint(const Array& arr, int indent, std::ostream* sink) {
342 ArrayPrinter printer(arr, indent, 2, 10, "null", false, sink);
343 return printer.Print();
344}
345
346Status PrettyPrint(const Array& arr, const PrettyPrintOptions& options,
347 std::ostream* sink) {
348 ArrayPrinter printer(arr, options.indent, options.indent_size, options.window,
349 options.null_rep, options.skip_new_lines, sink);
350 return printer.Print();
351}
352
353Status PrettyPrint(const Array& arr, const PrettyPrintOptions& options,
354 std::string* result) {
355 std::ostringstream sink;
356 RETURN_NOT_OK(PrettyPrint(arr, options, &sink));
357 *result = sink.str();
358 return Status::OK();
359}
360
361Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& options,
362 std::ostream* sink) {
363 int num_chunks = chunked_arr.num_chunks();
364 int indent = options.indent;
365 int window = options.window;
366
367 for (int i = 0; i < indent; ++i) {
368 (*sink) << " ";
369 }
370 (*sink) << "[\n";
371 bool skip_comma = true;
372 for (int i = 0; i < num_chunks; ++i) {
373 if (skip_comma) {
374 skip_comma = false;
375 } else {
376 (*sink) << ",\n";
377 }
378 if ((i >= window) && (i < (num_chunks - window))) {
379 for (int i = 0; i < indent; ++i) {
380 (*sink) << " ";
381 }
382 (*sink) << "...\n";
383 i = num_chunks - window - 1;
384 skip_comma = true;
385 } else {
386 ArrayPrinter printer(*chunked_arr.chunk(i), indent + options.indent_size,
387 options.indent_size, window, options.null_rep,
388 options.skip_new_lines, sink);
389 RETURN_NOT_OK(printer.Print());
390 }
391 }
392 (*sink) << "\n";
393
394 for (int i = 0; i < indent; ++i) {
395 (*sink) << " ";
396 }
397 (*sink) << "]";
398
399 return Status::OK();
400}
401
402Status PrettyPrint(const Column& column, const PrettyPrintOptions& options,
403 std::ostream* sink) {
404 for (int i = 0; i < options.indent; ++i) {
405 (*sink) << " ";
406 }
407 (*sink) << column.field()->ToString() << "\n";
408
409 return PrettyPrint(*column.data(), options, sink);
410}
411
412Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& options,
413 std::string* result) {
414 std::ostringstream sink;
415 RETURN_NOT_OK(PrettyPrint(chunked_arr, options, &sink));
416 *result = sink.str();
417 return Status::OK();
418}
419
420Status PrettyPrint(const RecordBatch& batch, int indent, std::ostream* sink) {
421 for (int i = 0; i < batch.num_columns(); ++i) {
422 const std::string& name = batch.column_name(i);
423 (*sink) << name << ": ";
424 RETURN_NOT_OK(PrettyPrint(*batch.column(i), indent + 2, sink));
425 (*sink) << "\n";
426 }
427 (*sink) << std::flush;
428 return Status::OK();
429}
430
431Status PrettyPrint(const Table& table, const PrettyPrintOptions& options,
432 std::ostream* sink) {
433 RETURN_NOT_OK(PrettyPrint(*table.schema(), options, sink));
434 (*sink) << "\n";
435 (*sink) << "----\n";
436
437 PrettyPrintOptions column_options = options;
438 column_options.indent += 2;
439 for (int i = 0; i < table.num_columns(); ++i) {
440 for (int j = 0; j < options.indent; ++j) {
441 (*sink) << " ";
442 }
443 (*sink) << table.schema()->field(i)->name() << ":\n";
444 RETURN_NOT_OK(PrettyPrint(*table.column(i)->data(), column_options, sink));
445 (*sink) << "\n";
446 }
447 (*sink) << std::flush;
448 return Status::OK();
449}
450
451Status DebugPrint(const Array& arr, int indent) {
452 return PrettyPrint(arr, indent, &std::cout);
453}
454
455class SchemaPrinter : public PrettyPrinter {
456 public:
457 SchemaPrinter(const Schema& schema, int indent, int indent_size, int window,
458 bool skip_new_lines, std::ostream* sink)
459 : PrettyPrinter(indent, indent_size, window, skip_new_lines, sink),
460 schema_(schema) {}
461
462 Status PrintType(const DataType& type);
463 Status PrintField(const Field& field);
464
465 Status Print() {
466 for (int i = 0; i < schema_.num_fields(); ++i) {
467 if (i > 0) {
468 Newline();
469 }
470 RETURN_NOT_OK(PrintField(*schema_.field(i)));
471 }
472 Flush();
473 return Status::OK();
474 }
475
476 private:
477 const Schema& schema_;
478};
479
480Status SchemaPrinter::PrintType(const DataType& type) {
481 Write(type.ToString());
482 if (type.id() == Type::DICTIONARY) {
483 indent_ += indent_size_;
484 Newline();
485 Write("dictionary:\n");
486 const auto& dict_type = checked_cast<const DictionaryType&>(type);
487 RETURN_NOT_OK(PrettyPrint(*dict_type.dictionary(), indent_ + indent_size_, sink_));
488 indent_ -= indent_size_;
489 } else {
490 for (int i = 0; i < type.num_children(); ++i) {
491 Newline();
492
493 std::stringstream ss;
494 ss << "child " << i << ", ";
495
496 indent_ += indent_size_;
497 WriteIndented(ss.str());
498 RETURN_NOT_OK(PrintField(*type.child(i)));
499 indent_ -= indent_size_;
500 }
501 }
502 return Status::OK();
503}
504
505Status SchemaPrinter::PrintField(const Field& field) {
506 Write(field.name());
507 Write(": ");
508 return PrintType(*field.type());
509}
510
511Status PrettyPrint(const Schema& schema, const PrettyPrintOptions& options,
512 std::ostream* sink) {
513 SchemaPrinter printer(schema, options.indent, options.indent_size, options.window,
514 options.skip_new_lines, sink);
515 return printer.Print();
516}
517
518Status PrettyPrint(const Schema& schema, const PrettyPrintOptions& options,
519 std::string* result) {
520 std::ostringstream sink;
521 RETURN_NOT_OK(PrettyPrint(schema, options, &sink));
522 *result = sink.str();
523 return Status::OK();
524}
525
526} // namespace arrow
527