1#include "duckdb/execution/operator/persistent/csv_buffer.hpp"
2#include "duckdb/common/string_util.hpp"
3
4namespace duckdb {
5
6CSVBuffer::CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle &file_handle,
7 idx_t &global_csv_current_position, idx_t file_number_p)
8 : context(context), first_buffer(true), file_number(file_number_p) {
9 this->handle = AllocateBuffer(buffer_size: buffer_size_p);
10
11 auto buffer = Ptr();
12 actual_size = file_handle.Read(buffer, nr_bytes: buffer_size_p);
13 global_csv_start = global_csv_current_position;
14 global_csv_current_position += actual_size;
15 if (actual_size >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF') {
16 start_position += 3;
17 }
18 last_buffer = file_handle.FinishedReading();
19}
20
21CSVBuffer::CSVBuffer(ClientContext &context, BufferHandle buffer_p, idx_t buffer_size_p, idx_t actual_size_p,
22 bool final_buffer, idx_t global_csv_current_position, idx_t file_number_p)
23 : context(context), handle(std::move(buffer_p)), actual_size(actual_size_p), last_buffer(final_buffer),
24 global_csv_start(global_csv_current_position), file_number(file_number_p) {
25}
26
27unique_ptr<CSVBuffer> CSVBuffer::Next(CSVFileHandle &file_handle, idx_t buffer_size, idx_t &global_csv_current_position,
28 idx_t file_number_p) {
29 auto next_buffer = AllocateBuffer(buffer_size);
30 idx_t next_buffer_actual_size = file_handle.Read(buffer: next_buffer.Ptr(), nr_bytes: buffer_size);
31 if (next_buffer_actual_size == 0) {
32 // We are done reading
33 return nullptr;
34 }
35
36 auto next_csv_buffer =
37 make_uniq<CSVBuffer>(args&: context, args: std::move(next_buffer), args&: buffer_size, args&: next_buffer_actual_size,
38 args: file_handle.FinishedReading(), args&: global_csv_current_position, args&: file_number_p);
39 global_csv_current_position += next_buffer_actual_size;
40 return next_csv_buffer;
41}
42
43BufferHandle CSVBuffer::AllocateBuffer(idx_t buffer_size) {
44 auto &buffer_manager = BufferManager::GetBufferManager(context);
45 return buffer_manager.Allocate(block_size: MaxValue<idx_t>(a: Storage::BLOCK_SIZE, b: buffer_size));
46}
47
48idx_t CSVBuffer::GetBufferSize() {
49 return actual_size;
50}
51
52idx_t CSVBuffer::GetStart() {
53 return start_position;
54}
55
56bool CSVBuffer::IsCSVFileLastBuffer() {
57 return last_buffer;
58}
59
60bool CSVBuffer::IsCSVFileFirstBuffer() {
61 return first_buffer;
62}
63
64idx_t CSVBuffer::GetCSVGlobalStart() {
65 return global_csv_start;
66}
67
68idx_t CSVBuffer::GetFileNumber() {
69 return file_number;
70}
71
72} // namespace duckdb
73