1/*
2 * This file is part of the MicroPython project, http://micropython.org/
3 *
4 * The MIT License (MIT)
5 *
6 * Copyright (c) 2014 Damien P. George
7 * Copyright (c) 2014-2016 Paul Sokolovsky
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a copy
10 * of this software and associated documentation files (the "Software"), to deal
11 * in the Software without restriction, including without limitation the rights
12 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 * copies of the Software, and to permit persons to whom the Software is
14 * furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included in
17 * all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 * THE SOFTWARE.
26 */
27
28#include <string.h>
29#include <unistd.h>
30
31#include "py/objstr.h"
32#include "py/stream.h"
33#include "py/runtime.h"
34
35// This file defines generic Python stream read/write methods which
36// dispatch to the underlying stream interface of an object.
37
38// TODO: should be in mpconfig.h
39#define DEFAULT_BUFFER_SIZE 256
40
41STATIC mp_obj_t stream_readall(mp_obj_t self_in);
42
43#define STREAM_CONTENT_TYPE(stream) (((stream)->is_text) ? &mp_type_str : &mp_type_bytes)
44
45// Returns error condition in *errcode, if non-zero, return value is number of bytes written
46// before error condition occurred. If *errcode == 0, returns total bytes written (which will
47// be equal to input size).
48mp_uint_t mp_stream_rw(mp_obj_t stream, void *buf_, mp_uint_t size, int *errcode, byte flags) {
49 byte *buf = buf_;
50 typedef mp_uint_t (*io_func_t)(mp_obj_t obj, void *buf, mp_uint_t size, int *errcode);
51 io_func_t io_func;
52 const mp_stream_p_t *stream_p = mp_get_stream(stream);
53 if (flags & MP_STREAM_RW_WRITE) {
54 io_func = (io_func_t)stream_p->write;
55 } else {
56 io_func = stream_p->read;
57 }
58
59 *errcode = 0;
60 mp_uint_t done = 0;
61 while (size > 0) {
62 mp_uint_t out_sz = io_func(stream, buf, size, errcode);
63 // For read, out_sz == 0 means EOF. For write, it's unspecified
64 // what it means, but we don't make any progress, so returning
65 // is still the best option.
66 if (out_sz == 0) {
67 return done;
68 }
69 if (out_sz == MP_STREAM_ERROR) {
70 // If we read something before getting EAGAIN, don't leak it
71 if (mp_is_nonblocking_error(*errcode) && done != 0) {
72 *errcode = 0;
73 }
74 return done;
75 }
76 if (flags & MP_STREAM_RW_ONCE) {
77 return out_sz;
78 }
79
80 buf += out_sz;
81 size -= out_sz;
82 done += out_sz;
83 }
84 return done;
85}
86
87const mp_stream_p_t *mp_get_stream_raise(mp_obj_t self_in, int flags) {
88 const mp_obj_type_t *type = mp_obj_get_type(self_in);
89 const mp_stream_p_t *stream_p = type->protocol;
90 if (stream_p == NULL
91 || ((flags & MP_STREAM_OP_READ) && stream_p->read == NULL)
92 || ((flags & MP_STREAM_OP_WRITE) && stream_p->write == NULL)
93 || ((flags & MP_STREAM_OP_IOCTL) && stream_p->ioctl == NULL)) {
94 // CPython: io.UnsupportedOperation, OSError subclass
95 mp_raise_msg(&mp_type_OSError, MP_ERROR_TEXT("stream operation not supported"));
96 }
97 return stream_p;
98}
99
100STATIC mp_obj_t stream_read_generic(size_t n_args, const mp_obj_t *args, byte flags) {
101 // What to do if sz < -1? Python docs don't specify this case.
102 // CPython does a readall, but here we silently let negatives through,
103 // and they will cause a MemoryError.
104 mp_int_t sz;
105 if (n_args == 1 || ((sz = mp_obj_get_int(args[1])) == -1)) {
106 return stream_readall(args[0]);
107 }
108
109 const mp_stream_p_t *stream_p = mp_get_stream(args[0]);
110
111 #if MICROPY_PY_BUILTINS_STR_UNICODE
112 if (stream_p->is_text) {
113 // We need to read sz number of unicode characters. Because we don't have any
114 // buffering, and because the stream API can only read bytes, we must read here
115 // in units of bytes and must never over read. If we want sz chars, then reading
116 // sz bytes will never over-read, so we follow this approach, in a loop to keep
117 // reading until we have exactly enough chars. This will be 1 read for text
118 // with ASCII-only chars, and about 2 reads for text with a couple of non-ASCII
119 // chars. For text with lots of non-ASCII chars, it'll be pretty inefficient
120 // in time and memory.
121
122 vstr_t vstr;
123 vstr_init(&vstr, sz);
124 mp_uint_t more_bytes = sz;
125 mp_uint_t last_buf_offset = 0;
126 while (more_bytes > 0) {
127 char *p = vstr_add_len(&vstr, more_bytes);
128 int error;
129 mp_uint_t out_sz = mp_stream_read_exactly(args[0], p, more_bytes, &error);
130 if (error != 0) {
131 vstr_cut_tail_bytes(&vstr, more_bytes);
132 if (mp_is_nonblocking_error(error)) {
133 // With non-blocking streams, we read as much as we can.
134 // If we read nothing, return None, just like read().
135 // Otherwise, return data read so far.
136 // TODO what if we have read only half a non-ASCII char?
137 if (vstr.len == 0) {
138 vstr_clear(&vstr);
139 return mp_const_none;
140 }
141 break;
142 }
143 mp_raise_OSError(error);
144 }
145
146 if (out_sz < more_bytes) {
147 // Finish reading.
148 // TODO what if we have read only half a non-ASCII char?
149 vstr_cut_tail_bytes(&vstr, more_bytes - out_sz);
150 if (out_sz == 0) {
151 break;
152 }
153 }
154
155 // count chars from bytes just read
156 for (mp_uint_t off = last_buf_offset;;) {
157 byte b = vstr.buf[off];
158 int n;
159 if (!UTF8_IS_NONASCII(b)) {
160 // 1-byte ASCII char
161 n = 1;
162 } else if ((b & 0xe0) == 0xc0) {
163 // 2-byte char
164 n = 2;
165 } else if ((b & 0xf0) == 0xe0) {
166 // 3-byte char
167 n = 3;
168 } else if ((b & 0xf8) == 0xf0) {
169 // 4-byte char
170 n = 4;
171 } else {
172 // TODO
173 n = 5;
174 }
175 if (off + n <= vstr.len) {
176 // got a whole char in n bytes
177 off += n;
178 sz -= 1;
179 last_buf_offset = off;
180 if (off >= vstr.len) {
181 more_bytes = sz;
182 break;
183 }
184 } else {
185 // didn't get a whole char, so work out how many extra bytes are needed for
186 // this partial char, plus bytes for additional chars that we want
187 more_bytes = (off + n - vstr.len) + (sz - 1);
188 break;
189 }
190 }
191 }
192
193 return mp_obj_new_str_from_vstr(&mp_type_str, &vstr);
194 }
195 #endif
196
197 vstr_t vstr;
198 vstr_init_len(&vstr, sz);
199 int error;
200 mp_uint_t out_sz = mp_stream_rw(args[0], vstr.buf, sz, &error, flags);
201 if (error != 0) {
202 vstr_clear(&vstr);
203 if (mp_is_nonblocking_error(error)) {
204 // https://docs.python.org/3.4/library/io.html#io.RawIOBase.read
205 // "If the object is in non-blocking mode and no bytes are available,
206 // None is returned."
207 // This is actually very weird, as naive truth check will treat
208 // this as EOF.
209 return mp_const_none;
210 }
211 mp_raise_OSError(error);
212 } else {
213 vstr.len = out_sz;
214 return mp_obj_new_str_from_vstr(STREAM_CONTENT_TYPE(stream_p), &vstr);
215 }
216}
217
218STATIC mp_obj_t stream_read(size_t n_args, const mp_obj_t *args) {
219 return stream_read_generic(n_args, args, MP_STREAM_RW_READ);
220}
221MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_stream_read_obj, 1, 2, stream_read);
222
223STATIC mp_obj_t stream_read1(size_t n_args, const mp_obj_t *args) {
224 return stream_read_generic(n_args, args, MP_STREAM_RW_READ | MP_STREAM_RW_ONCE);
225}
226MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_stream_read1_obj, 1, 2, stream_read1);
227
228mp_obj_t mp_stream_write(mp_obj_t self_in, const void *buf, size_t len, byte flags) {
229 int error;
230 mp_uint_t out_sz = mp_stream_rw(self_in, (void *)buf, len, &error, flags);
231 if (error != 0) {
232 if (mp_is_nonblocking_error(error)) {
233 // http://docs.python.org/3/library/io.html#io.RawIOBase.write
234 // "None is returned if the raw stream is set not to block and
235 // no single byte could be readily written to it."
236 return mp_const_none;
237 }
238 mp_raise_OSError(error);
239 } else {
240 return MP_OBJ_NEW_SMALL_INT(out_sz);
241 }
242}
243
244// This is used to adapt a stream object to an mp_print_t interface
245void mp_stream_write_adaptor(void *self, const char *buf, size_t len) {
246 mp_stream_write(MP_OBJ_FROM_PTR(self), buf, len, MP_STREAM_RW_WRITE);
247}
248
249STATIC mp_obj_t stream_write_method(size_t n_args, const mp_obj_t *args) {
250 mp_buffer_info_t bufinfo;
251 mp_get_buffer_raise(args[1], &bufinfo, MP_BUFFER_READ);
252 size_t max_len = (size_t)-1;
253 size_t off = 0;
254 if (n_args == 3) {
255 max_len = mp_obj_get_int_truncated(args[2]);
256 } else if (n_args == 4) {
257 off = mp_obj_get_int_truncated(args[2]);
258 max_len = mp_obj_get_int_truncated(args[3]);
259 if (off > bufinfo.len) {
260 off = bufinfo.len;
261 }
262 }
263 bufinfo.len -= off;
264 return mp_stream_write(args[0], (byte *)bufinfo.buf + off, MIN(bufinfo.len, max_len), MP_STREAM_RW_WRITE);
265}
266MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_stream_write_obj, 2, 4, stream_write_method);
267
268STATIC mp_obj_t stream_write1_method(mp_obj_t self_in, mp_obj_t arg) {
269 mp_buffer_info_t bufinfo;
270 mp_get_buffer_raise(arg, &bufinfo, MP_BUFFER_READ);
271 return mp_stream_write(self_in, bufinfo.buf, bufinfo.len, MP_STREAM_RW_WRITE | MP_STREAM_RW_ONCE);
272}
273MP_DEFINE_CONST_FUN_OBJ_2(mp_stream_write1_obj, stream_write1_method);
274
275STATIC mp_obj_t stream_readinto(size_t n_args, const mp_obj_t *args) {
276 mp_buffer_info_t bufinfo;
277 mp_get_buffer_raise(args[1], &bufinfo, MP_BUFFER_WRITE);
278
279 // CPython extension: if 2nd arg is provided, that's max len to read,
280 // instead of full buffer. Similar to
281 // https://docs.python.org/3/library/socket.html#socket.socket.recv_into
282 mp_uint_t len = bufinfo.len;
283 if (n_args > 2) {
284 len = mp_obj_get_int(args[2]);
285 if (len > bufinfo.len) {
286 len = bufinfo.len;
287 }
288 }
289
290 int error;
291 mp_uint_t out_sz = mp_stream_read_exactly(args[0], bufinfo.buf, len, &error);
292 if (error != 0) {
293 if (mp_is_nonblocking_error(error)) {
294 return mp_const_none;
295 }
296 mp_raise_OSError(error);
297 } else {
298 return MP_OBJ_NEW_SMALL_INT(out_sz);
299 }
300}
301MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_stream_readinto_obj, 2, 3, stream_readinto);
302
303STATIC mp_obj_t stream_readall(mp_obj_t self_in) {
304 const mp_stream_p_t *stream_p = mp_get_stream(self_in);
305
306 mp_uint_t total_size = 0;
307 vstr_t vstr;
308 vstr_init(&vstr, DEFAULT_BUFFER_SIZE);
309 char *p = vstr.buf;
310 mp_uint_t current_read = DEFAULT_BUFFER_SIZE;
311 while (true) {
312 int error;
313 mp_uint_t out_sz = stream_p->read(self_in, p, current_read, &error);
314 if (out_sz == MP_STREAM_ERROR) {
315 if (mp_is_nonblocking_error(error)) {
316 // With non-blocking streams, we read as much as we can.
317 // If we read nothing, return None, just like read().
318 // Otherwise, return data read so far.
319 if (total_size == 0) {
320 return mp_const_none;
321 }
322 break;
323 }
324 mp_raise_OSError(error);
325 }
326 if (out_sz == 0) {
327 break;
328 }
329 total_size += out_sz;
330 if (out_sz < current_read) {
331 current_read -= out_sz;
332 p += out_sz;
333 } else {
334 p = vstr_extend(&vstr, DEFAULT_BUFFER_SIZE);
335 current_read = DEFAULT_BUFFER_SIZE;
336 }
337 }
338
339 vstr.len = total_size;
340 return mp_obj_new_str_from_vstr(STREAM_CONTENT_TYPE(stream_p), &vstr);
341}
342
343// Unbuffered, inefficient implementation of readline() for raw I/O files.
344STATIC mp_obj_t stream_unbuffered_readline(size_t n_args, const mp_obj_t *args) {
345 const mp_stream_p_t *stream_p = mp_get_stream(args[0]);
346
347 mp_int_t max_size = -1;
348 if (n_args > 1) {
349 max_size = MP_OBJ_SMALL_INT_VALUE(args[1]);
350 }
351
352 vstr_t vstr;
353 if (max_size != -1) {
354 vstr_init(&vstr, max_size);
355 } else {
356 vstr_init(&vstr, 16);
357 }
358
359 while (max_size == -1 || max_size-- != 0) {
360 char *p = vstr_add_len(&vstr, 1);
361 int error;
362 mp_uint_t out_sz = stream_p->read(args[0], p, 1, &error);
363 if (out_sz == MP_STREAM_ERROR) {
364 if (mp_is_nonblocking_error(error)) {
365 if (vstr.len == 1) {
366 // We just incremented it, but otherwise we read nothing
367 // and immediately got EAGAIN. This case is not well
368 // specified in
369 // https://docs.python.org/3/library/io.html#io.IOBase.readline
370 // unlike similar case for read(). But we follow the latter's
371 // behavior - return None.
372 vstr_clear(&vstr);
373 return mp_const_none;
374 } else {
375 goto done;
376 }
377 }
378 mp_raise_OSError(error);
379 }
380 if (out_sz == 0) {
381 done:
382 // Back out previously added byte
383 // Consider, what's better - read a char and get OutOfMemory (so read
384 // char is lost), or allocate first as we do.
385 vstr_cut_tail_bytes(&vstr, 1);
386 break;
387 }
388 if (*p == '\n') {
389 break;
390 }
391 }
392
393 return mp_obj_new_str_from_vstr(STREAM_CONTENT_TYPE(stream_p), &vstr);
394}
395MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_stream_unbuffered_readline_obj, 1, 2, stream_unbuffered_readline);
396
397// TODO take an optional extra argument (what does it do exactly?)
398STATIC mp_obj_t stream_unbuffered_readlines(mp_obj_t self) {
399 mp_obj_t lines = mp_obj_new_list(0, NULL);
400 for (;;) {
401 mp_obj_t line = stream_unbuffered_readline(1, &self);
402 if (!mp_obj_is_true(line)) {
403 break;
404 }
405 mp_obj_list_append(lines, line);
406 }
407 return lines;
408}
409MP_DEFINE_CONST_FUN_OBJ_1(mp_stream_unbuffered_readlines_obj, stream_unbuffered_readlines);
410
411mp_obj_t mp_stream_unbuffered_iter(mp_obj_t self) {
412 mp_obj_t l_in = stream_unbuffered_readline(1, &self);
413 if (mp_obj_is_true(l_in)) {
414 return l_in;
415 }
416 return MP_OBJ_STOP_ITERATION;
417}
418
419mp_obj_t mp_stream_close(mp_obj_t stream) {
420 const mp_stream_p_t *stream_p = mp_get_stream(stream);
421 int error;
422 mp_uint_t res = stream_p->ioctl(stream, MP_STREAM_CLOSE, 0, &error);
423 if (res == MP_STREAM_ERROR) {
424 mp_raise_OSError(error);
425 }
426 return mp_const_none;
427}
428MP_DEFINE_CONST_FUN_OBJ_1(mp_stream_close_obj, mp_stream_close);
429
430STATIC mp_obj_t stream_seek(size_t n_args, const mp_obj_t *args) {
431 struct mp_stream_seek_t seek_s;
432 // TODO: Could be uint64
433 seek_s.offset = mp_obj_get_int(args[1]);
434 seek_s.whence = SEEK_SET;
435 if (n_args == 3) {
436 seek_s.whence = mp_obj_get_int(args[2]);
437 }
438
439 // In POSIX, it's error to seek before end of stream, we enforce it here.
440 if (seek_s.whence == SEEK_SET && seek_s.offset < 0) {
441 mp_raise_OSError(MP_EINVAL);
442 }
443
444 const mp_stream_p_t *stream_p = mp_get_stream(args[0]);
445 int error;
446 mp_uint_t res = stream_p->ioctl(args[0], MP_STREAM_SEEK, (mp_uint_t)(uintptr_t)&seek_s, &error);
447 if (res == MP_STREAM_ERROR) {
448 mp_raise_OSError(error);
449 }
450
451 // TODO: Could be uint64
452 return mp_obj_new_int_from_uint(seek_s.offset);
453}
454MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_stream_seek_obj, 2, 3, stream_seek);
455
456STATIC mp_obj_t stream_tell(mp_obj_t self) {
457 mp_obj_t offset = MP_OBJ_NEW_SMALL_INT(0);
458 mp_obj_t whence = MP_OBJ_NEW_SMALL_INT(SEEK_CUR);
459 const mp_obj_t args[3] = {self, offset, whence};
460 return stream_seek(3, args);
461}
462MP_DEFINE_CONST_FUN_OBJ_1(mp_stream_tell_obj, stream_tell);
463
464STATIC mp_obj_t stream_flush(mp_obj_t self) {
465 const mp_stream_p_t *stream_p = mp_get_stream(self);
466 int error;
467 mp_uint_t res = stream_p->ioctl(self, MP_STREAM_FLUSH, 0, &error);
468 if (res == MP_STREAM_ERROR) {
469 mp_raise_OSError(error);
470 }
471 return mp_const_none;
472}
473MP_DEFINE_CONST_FUN_OBJ_1(mp_stream_flush_obj, stream_flush);
474
475STATIC mp_obj_t stream_ioctl(size_t n_args, const mp_obj_t *args) {
476 mp_buffer_info_t bufinfo;
477 uintptr_t val = 0;
478 if (n_args > 2) {
479 if (mp_get_buffer(args[2], &bufinfo, MP_BUFFER_WRITE)) {
480 val = (uintptr_t)bufinfo.buf;
481 } else {
482 val = mp_obj_get_int_truncated(args[2]);
483 }
484 }
485
486 const mp_stream_p_t *stream_p = mp_get_stream(args[0]);
487 int error;
488 mp_uint_t res = stream_p->ioctl(args[0], mp_obj_get_int(args[1]), val, &error);
489 if (res == MP_STREAM_ERROR) {
490 mp_raise_OSError(error);
491 }
492
493 return mp_obj_new_int(res);
494}
495MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_stream_ioctl_obj, 2, 3, stream_ioctl);
496
497#if MICROPY_STREAMS_POSIX_API
498/*
499 * POSIX-like functions
500 *
501 * These functions have POSIX-compatible signature (except for "void *stream"
502 * first argument instead of "int fd"). They are useful to port existing
503 * POSIX-compatible software to work with MicroPython streams.
504 */
505
506#include <errno.h>
507
508ssize_t mp_stream_posix_write(void *stream, const void *buf, size_t len) {
509 mp_obj_base_t *o = stream;
510 const mp_stream_p_t *stream_p = o->type->protocol;
511 mp_uint_t out_sz = stream_p->write(MP_OBJ_FROM_PTR(stream), buf, len, &errno);
512 if (out_sz == MP_STREAM_ERROR) {
513 return -1;
514 } else {
515 return out_sz;
516 }
517}
518
519ssize_t mp_stream_posix_read(void *stream, void *buf, size_t len) {
520 mp_obj_base_t *o = stream;
521 const mp_stream_p_t *stream_p = o->type->protocol;
522 mp_uint_t out_sz = stream_p->read(MP_OBJ_FROM_PTR(stream), buf, len, &errno);
523 if (out_sz == MP_STREAM_ERROR) {
524 return -1;
525 } else {
526 return out_sz;
527 }
528}
529
530off_t mp_stream_posix_lseek(void *stream, off_t offset, int whence) {
531 const mp_obj_base_t *o = stream;
532 const mp_stream_p_t *stream_p = o->type->protocol;
533 struct mp_stream_seek_t seek_s;
534 seek_s.offset = offset;
535 seek_s.whence = whence;
536 mp_uint_t res = stream_p->ioctl(MP_OBJ_FROM_PTR(stream), MP_STREAM_SEEK, (mp_uint_t)(uintptr_t)&seek_s, &errno);
537 if (res == MP_STREAM_ERROR) {
538 return -1;
539 }
540 return seek_s.offset;
541}
542
543int mp_stream_posix_fsync(void *stream) {
544 mp_obj_base_t *o = stream;
545 const mp_stream_p_t *stream_p = o->type->protocol;
546 mp_uint_t res = stream_p->ioctl(MP_OBJ_FROM_PTR(stream), MP_STREAM_FLUSH, 0, &errno);
547 if (res == MP_STREAM_ERROR) {
548 return -1;
549 }
550 return res;
551}
552
553#endif
554