1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #ifndef ARROW_CSV_OPTIONS_H |
19 | #define ARROW_CSV_OPTIONS_H |
20 | |
21 | #include <cstdint> |
22 | #include <memory> |
23 | #include <string> |
24 | #include <unordered_map> |
25 | #include <vector> |
26 | |
27 | #include "arrow/util/visibility.h" |
28 | |
29 | namespace arrow { |
30 | |
31 | class DataType; |
32 | |
33 | namespace csv { |
34 | |
35 | struct ARROW_EXPORT ParseOptions { |
36 | // Parsing options |
37 | |
38 | // Field delimiter |
39 | char delimiter = ','; |
40 | // Whether quoting is used |
41 | bool quoting = true; |
42 | // Quoting character (if `quoting` is true) |
43 | char quote_char = '"'; |
44 | // Whether a quote inside a value is double-quoted |
45 | bool double_quote = true; |
46 | // Whether escaping is used |
47 | bool escaping = false; |
48 | // Escaping character (if `escaping` is true) |
49 | char escape_char = '\\'; |
50 | // Whether values are allowed to contain CR (0x0d) and LF (0x0a) characters |
51 | bool newlines_in_values = false; |
52 | // Whether empty lines are ignored. If false, an empty line represents |
53 | // a single empty value (assuming a one-column CSV file). |
54 | bool ignore_empty_lines = true; |
55 | |
56 | // XXX Should this be in ReadOptions? |
57 | // Number of header rows to skip (including the first row containing column names) |
58 | int32_t = 1; |
59 | |
60 | static ParseOptions Defaults(); |
61 | }; |
62 | |
63 | struct ARROW_EXPORT ConvertOptions { |
64 | // Conversion options |
65 | |
66 | // Whether to check UTF8 validity of string columns |
67 | bool check_utf8 = true; |
68 | // Optional per-column types (disabling type inference on those columns) |
69 | std::unordered_map<std::string, std::shared_ptr<DataType>> column_types; |
70 | // Recognized spellings for null values |
71 | std::vector<std::string> null_values; |
72 | |
73 | static ConvertOptions Defaults(); |
74 | }; |
75 | |
76 | struct ARROW_EXPORT ReadOptions { |
77 | // Reader options |
78 | |
79 | // Whether to use the global CPU thread pool |
80 | bool use_threads = true; |
81 | // Block size we request from the IO layer; also determines the size of |
82 | // chunks when use_threads is true |
83 | int32_t block_size = 1 << 20; // 1 MB |
84 | |
85 | static ReadOptions Defaults(); |
86 | }; |
87 | |
88 | } // namespace csv |
89 | } // namespace arrow |
90 | |
91 | #endif // ARROW_CSV_OPTIONS_H |
92 | |