1 | #include "duckdb/execution/operator/persistent/csv_buffer.hpp" |
2 | #include "duckdb/common/string_util.hpp" |
3 | |
4 | namespace duckdb { |
5 | |
6 | CSVBuffer::CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle &file_handle, |
7 | idx_t &global_csv_current_position, idx_t file_number_p) |
8 | : context(context), first_buffer(true), file_number(file_number_p) { |
9 | this->handle = AllocateBuffer(buffer_size: buffer_size_p); |
10 | |
11 | auto buffer = Ptr(); |
12 | actual_size = file_handle.Read(buffer, nr_bytes: buffer_size_p); |
13 | global_csv_start = global_csv_current_position; |
14 | global_csv_current_position += actual_size; |
15 | if (actual_size >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF') { |
16 | start_position += 3; |
17 | } |
18 | last_buffer = file_handle.FinishedReading(); |
19 | } |
20 | |
21 | CSVBuffer::CSVBuffer(ClientContext &context, BufferHandle buffer_p, idx_t buffer_size_p, idx_t actual_size_p, |
22 | bool final_buffer, idx_t global_csv_current_position, idx_t file_number_p) |
23 | : context(context), handle(std::move(buffer_p)), actual_size(actual_size_p), last_buffer(final_buffer), |
24 | global_csv_start(global_csv_current_position), file_number(file_number_p) { |
25 | } |
26 | |
27 | unique_ptr<CSVBuffer> CSVBuffer::Next(CSVFileHandle &file_handle, idx_t buffer_size, idx_t &global_csv_current_position, |
28 | idx_t file_number_p) { |
29 | auto next_buffer = AllocateBuffer(buffer_size); |
30 | idx_t next_buffer_actual_size = file_handle.Read(buffer: next_buffer.Ptr(), nr_bytes: buffer_size); |
31 | if (next_buffer_actual_size == 0) { |
32 | // We are done reading |
33 | return nullptr; |
34 | } |
35 | |
36 | auto next_csv_buffer = |
37 | make_uniq<CSVBuffer>(args&: context, args: std::move(next_buffer), args&: buffer_size, args&: next_buffer_actual_size, |
38 | args: file_handle.FinishedReading(), args&: global_csv_current_position, args&: file_number_p); |
39 | global_csv_current_position += next_buffer_actual_size; |
40 | return next_csv_buffer; |
41 | } |
42 | |
43 | BufferHandle CSVBuffer::AllocateBuffer(idx_t buffer_size) { |
44 | auto &buffer_manager = BufferManager::GetBufferManager(context); |
45 | return buffer_manager.Allocate(block_size: MaxValue<idx_t>(a: Storage::BLOCK_SIZE, b: buffer_size)); |
46 | } |
47 | |
48 | idx_t CSVBuffer::GetBufferSize() { |
49 | return actual_size; |
50 | } |
51 | |
52 | idx_t CSVBuffer::GetStart() { |
53 | return start_position; |
54 | } |
55 | |
56 | bool CSVBuffer::IsCSVFileLastBuffer() { |
57 | return last_buffer; |
58 | } |
59 | |
60 | bool CSVBuffer::IsCSVFileFirstBuffer() { |
61 | return first_buffer; |
62 | } |
63 | |
64 | idx_t CSVBuffer::GetCSVGlobalStart() { |
65 | return global_csv_start; |
66 | } |
67 | |
68 | idx_t CSVBuffer::GetFileNumber() { |
69 | return file_number; |
70 | } |
71 | |
72 | } // namespace duckdb |
73 | |