1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#ifndef ARROW_CSV_OPTIONS_H
19#define ARROW_CSV_OPTIONS_H
20
21#include <cstdint>
22#include <memory>
23#include <string>
24#include <unordered_map>
25#include <vector>
26
27#include "arrow/util/visibility.h"
28
29namespace arrow {
30
31class DataType;
32
33namespace csv {
34
35struct ARROW_EXPORT ParseOptions {
36 // Parsing options
37
38 // Field delimiter
39 char delimiter = ',';
40 // Whether quoting is used
41 bool quoting = true;
42 // Quoting character (if `quoting` is true)
43 char quote_char = '"';
44 // Whether a quote inside a value is double-quoted
45 bool double_quote = true;
46 // Whether escaping is used
47 bool escaping = false;
48 // Escaping character (if `escaping` is true)
49 char escape_char = '\\';
50 // Whether values are allowed to contain CR (0x0d) and LF (0x0a) characters
51 bool newlines_in_values = false;
52 // Whether empty lines are ignored. If false, an empty line represents
53 // a single empty value (assuming a one-column CSV file).
54 bool ignore_empty_lines = true;
55
56 // XXX Should this be in ReadOptions?
57 // Number of header rows to skip (including the first row containing column names)
58 int32_t header_rows = 1;
59
60 static ParseOptions Defaults();
61};
62
63struct ARROW_EXPORT ConvertOptions {
64 // Conversion options
65
66 // Whether to check UTF8 validity of string columns
67 bool check_utf8 = true;
68 // Optional per-column types (disabling type inference on those columns)
69 std::unordered_map<std::string, std::shared_ptr<DataType>> column_types;
70 // Recognized spellings for null values
71 std::vector<std::string> null_values;
72
73 static ConvertOptions Defaults();
74};
75
76struct ARROW_EXPORT ReadOptions {
77 // Reader options
78
79 // Whether to use the global CPU thread pool
80 bool use_threads = true;
81 // Block size we request from the IO layer; also determines the size of
82 // chunks when use_threads is true
83 int32_t block_size = 1 << 20; // 1 MB
84
85 static ReadOptions Defaults();
86};
87
88} // namespace csv
89} // namespace arrow
90
91#endif // ARROW_CSV_OPTIONS_H
92