| 1 | #include "duckdb/execution/operator/persistent/csv_buffer.hpp" |
| 2 | #include "duckdb/common/string_util.hpp" |
| 3 | |
| 4 | namespace duckdb { |
| 5 | |
| 6 | CSVBuffer::CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle &file_handle, |
| 7 | idx_t &global_csv_current_position, idx_t file_number_p) |
| 8 | : context(context), first_buffer(true), file_number(file_number_p) { |
| 9 | this->handle = AllocateBuffer(buffer_size: buffer_size_p); |
| 10 | |
| 11 | auto buffer = Ptr(); |
| 12 | actual_size = file_handle.Read(buffer, nr_bytes: buffer_size_p); |
| 13 | global_csv_start = global_csv_current_position; |
| 14 | global_csv_current_position += actual_size; |
| 15 | if (actual_size >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF') { |
| 16 | start_position += 3; |
| 17 | } |
| 18 | last_buffer = file_handle.FinishedReading(); |
| 19 | } |
| 20 | |
| 21 | CSVBuffer::CSVBuffer(ClientContext &context, BufferHandle buffer_p, idx_t buffer_size_p, idx_t actual_size_p, |
| 22 | bool final_buffer, idx_t global_csv_current_position, idx_t file_number_p) |
| 23 | : context(context), handle(std::move(buffer_p)), actual_size(actual_size_p), last_buffer(final_buffer), |
| 24 | global_csv_start(global_csv_current_position), file_number(file_number_p) { |
| 25 | } |
| 26 | |
| 27 | unique_ptr<CSVBuffer> CSVBuffer::Next(CSVFileHandle &file_handle, idx_t buffer_size, idx_t &global_csv_current_position, |
| 28 | idx_t file_number_p) { |
| 29 | auto next_buffer = AllocateBuffer(buffer_size); |
| 30 | idx_t next_buffer_actual_size = file_handle.Read(buffer: next_buffer.Ptr(), nr_bytes: buffer_size); |
| 31 | if (next_buffer_actual_size == 0) { |
| 32 | // We are done reading |
| 33 | return nullptr; |
| 34 | } |
| 35 | |
| 36 | auto next_csv_buffer = |
| 37 | make_uniq<CSVBuffer>(args&: context, args: std::move(next_buffer), args&: buffer_size, args&: next_buffer_actual_size, |
| 38 | args: file_handle.FinishedReading(), args&: global_csv_current_position, args&: file_number_p); |
| 39 | global_csv_current_position += next_buffer_actual_size; |
| 40 | return next_csv_buffer; |
| 41 | } |
| 42 | |
| 43 | BufferHandle CSVBuffer::AllocateBuffer(idx_t buffer_size) { |
| 44 | auto &buffer_manager = BufferManager::GetBufferManager(context); |
| 45 | return buffer_manager.Allocate(block_size: MaxValue<idx_t>(a: Storage::BLOCK_SIZE, b: buffer_size)); |
| 46 | } |
| 47 | |
| 48 | idx_t CSVBuffer::GetBufferSize() { |
| 49 | return actual_size; |
| 50 | } |
| 51 | |
| 52 | idx_t CSVBuffer::GetStart() { |
| 53 | return start_position; |
| 54 | } |
| 55 | |
| 56 | bool CSVBuffer::IsCSVFileLastBuffer() { |
| 57 | return last_buffer; |
| 58 | } |
| 59 | |
| 60 | bool CSVBuffer::IsCSVFileFirstBuffer() { |
| 61 | return first_buffer; |
| 62 | } |
| 63 | |
| 64 | idx_t CSVBuffer::GetCSVGlobalStart() { |
| 65 | return global_csv_start; |
| 66 | } |
| 67 | |
| 68 | idx_t CSVBuffer::GetFileNumber() { |
| 69 | return file_number; |
| 70 | } |
| 71 | |
| 72 | } // namespace duckdb |
| 73 | |