1 | #include <errno.h> |
2 | #include <time.h> |
3 | #include <optional> |
4 | #include <Common/ProfileEvents.h> |
5 | #include <Common/Stopwatch.h> |
6 | #include <Common/Exception.h> |
7 | #include <Common/CurrentMetrics.h> |
8 | #include <IO/ReadBufferFromFileDescriptor.h> |
9 | #include <IO/WriteHelpers.h> |
10 | |
11 | |
12 | namespace ProfileEvents |
13 | { |
14 | extern const Event ReadBufferFromFileDescriptorRead; |
15 | extern const Event ReadBufferFromFileDescriptorReadFailed; |
16 | extern const Event ReadBufferFromFileDescriptorReadBytes; |
17 | extern const Event DiskReadElapsedMicroseconds; |
18 | extern const Event Seek; |
19 | } |
20 | |
21 | namespace CurrentMetrics |
22 | { |
23 | extern const Metric Read; |
24 | } |
25 | |
26 | namespace DB |
27 | { |
28 | |
29 | namespace ErrorCodes |
30 | { |
31 | extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; |
32 | extern const int ARGUMENT_OUT_OF_BOUND; |
33 | extern const int CANNOT_SEEK_THROUGH_FILE; |
34 | extern const int CANNOT_SELECT; |
35 | } |
36 | |
37 | |
38 | std::string ReadBufferFromFileDescriptor::getFileName() const |
39 | { |
40 | return "(fd = " + toString(fd) + ")" ; |
41 | } |
42 | |
43 | |
44 | bool ReadBufferFromFileDescriptor::nextImpl() |
45 | { |
46 | size_t bytes_read = 0; |
47 | while (!bytes_read) |
48 | { |
49 | ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorRead); |
50 | |
51 | Stopwatch watch(profile_callback ? clock_type : CLOCK_MONOTONIC); |
52 | |
53 | ssize_t res = 0; |
54 | { |
55 | CurrentMetrics::Increment metric_increment{CurrentMetrics::Read}; |
56 | res = ::read(fd, internal_buffer.begin(), internal_buffer.size()); |
57 | } |
58 | if (!res) |
59 | break; |
60 | |
61 | if (-1 == res && errno != EINTR) |
62 | { |
63 | ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadFailed); |
64 | throwFromErrnoWithPath("Cannot read from file " + getFileName(), getFileName(), |
65 | ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); |
66 | } |
67 | |
68 | if (res > 0) |
69 | bytes_read += res; |
70 | |
71 | /// It reports real time spent including the time spent while thread was preempted doing nothing. |
72 | /// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables). |
73 | /// Sometimes it is better to use taskstats::blkio_delay_total, but it is quite expensive to get it (TaskStatsInfoGetter has about 500K RPS). |
74 | watch.stop(); |
75 | ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); |
76 | |
77 | if (profile_callback) |
78 | { |
79 | ProfileInfo info; |
80 | info.bytes_requested = internal_buffer.size(); |
81 | info.bytes_read = res; |
82 | info.nanoseconds = watch.elapsed(); |
83 | profile_callback(info); |
84 | } |
85 | } |
86 | |
87 | pos_in_file += bytes_read; |
88 | |
89 | if (bytes_read) |
90 | { |
91 | ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadBytes, bytes_read); |
92 | working_buffer.resize(bytes_read); |
93 | } |
94 | else |
95 | return false; |
96 | |
97 | return true; |
98 | } |
99 | |
100 | |
101 | /// If 'offset' is small enough to stay in buffer after seek, then true seek in file does not happen. |
102 | off_t ReadBufferFromFileDescriptor::doSeek(off_t offset, int whence) |
103 | { |
104 | off_t new_pos = offset; |
105 | if (whence == SEEK_CUR) |
106 | new_pos = pos_in_file - (working_buffer.end() - pos) + offset; |
107 | else if (whence != SEEK_SET) |
108 | throw Exception("ReadBufferFromFileDescriptor::seek expects SEEK_SET or SEEK_CUR as whence" , ErrorCodes::ARGUMENT_OUT_OF_BOUND); |
109 | |
110 | /// Position is unchanged. |
111 | if (new_pos + (working_buffer.end() - pos) == pos_in_file) |
112 | return new_pos; |
113 | |
114 | if (hasPendingData() && new_pos <= pos_in_file && new_pos >= pos_in_file - static_cast<off_t>(working_buffer.size())) |
115 | { |
116 | /// Position is still inside buffer. |
117 | pos = working_buffer.begin() + (new_pos - (pos_in_file - working_buffer.size())); |
118 | return new_pos; |
119 | } |
120 | else |
121 | { |
122 | ProfileEvents::increment(ProfileEvents::Seek); |
123 | Stopwatch watch(profile_callback ? clock_type : CLOCK_MONOTONIC); |
124 | |
125 | pos = working_buffer.end(); |
126 | off_t res = ::lseek(fd, new_pos, SEEK_SET); |
127 | if (-1 == res) |
128 | throwFromErrnoWithPath("Cannot seek through file " + getFileName(), getFileName(), |
129 | ErrorCodes::CANNOT_SEEK_THROUGH_FILE); |
130 | pos_in_file = new_pos; |
131 | |
132 | watch.stop(); |
133 | ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); |
134 | |
135 | return res; |
136 | } |
137 | } |
138 | |
139 | |
140 | /// Assuming file descriptor supports 'select', check that we have data to read or wait until timeout. |
141 | bool ReadBufferFromFileDescriptor::poll(size_t timeout_microseconds) |
142 | { |
143 | fd_set fds; |
144 | FD_ZERO(&fds); |
145 | FD_SET(fd, &fds); |
146 | timeval timeout = { time_t(timeout_microseconds / 1000000), suseconds_t(timeout_microseconds % 1000000) }; |
147 | |
148 | int res = select(1, &fds, nullptr, nullptr, &timeout); |
149 | |
150 | if (-1 == res) |
151 | throwFromErrno("Cannot select" , ErrorCodes::CANNOT_SELECT); |
152 | |
153 | return res > 0; |
154 | } |
155 | |
156 | } |
157 | |