1 | /* $Id: CoinFileIO.cpp 1448 2011-06-19 15:34:41Z stefan $ */ |
2 | // Copyright (C) 2005, COIN-OR. All Rights Reserved. |
3 | // This code is licensed under the terms of the Eclipse Public License (EPL). |
4 | |
5 | #if defined(_MSC_VER) |
6 | // Turn off compiler warning about long names |
7 | # pragma warning(disable:4786) |
8 | #endif |
9 | |
10 | #include "CoinUtilsConfig.h" |
11 | #include "CoinFileIO.hpp" |
12 | |
13 | #include "CoinError.hpp" |
14 | #include "CoinHelperFunctions.hpp" |
15 | |
16 | #include <vector> |
17 | #include <cstring> |
18 | |
19 | // ------ CoinFileIOBase ------- |
20 | |
21 | CoinFileIOBase::CoinFileIOBase (const std::string &fileName): |
22 | fileName_ (fileName) |
23 | {} |
24 | |
25 | CoinFileIOBase::~CoinFileIOBase () |
26 | {} |
27 | |
28 | const char *CoinFileIOBase::getFileName () const |
29 | { |
30 | return fileName_.c_str (); |
31 | } |
32 | |
33 | |
34 | // ------------------------------------------------------ |
35 | // next we implement some subclasses of CoinFileInput |
36 | // for plain text and compressed files |
37 | // ------------------------------------------------------ |
38 | |
39 | // ------ Input for plain text ------ |
40 | |
41 | #include <stdio.h> |
42 | |
43 | // This reads plain text files |
44 | class CoinPlainFileInput: public CoinFileInput |
45 | { |
46 | public: |
47 | CoinPlainFileInput (const std::string &fileName): |
48 | CoinFileInput (fileName), f_ (0) |
49 | { |
50 | readType_="plain" ; |
51 | if (fileName!="stdin" ) { |
52 | f_ = fopen (fileName.c_str (), "r" ); |
53 | if (f_ == 0) |
54 | throw CoinError ("Could not open file for reading!" , |
55 | "CoinPlainFileInput" , |
56 | "CoinPlainFileInput" ); |
57 | } else { |
58 | f_ = stdin; |
59 | } |
60 | } |
61 | |
62 | virtual ~CoinPlainFileInput () |
63 | { |
64 | if (f_ != 0) |
65 | fclose (f_); |
66 | } |
67 | |
68 | virtual int read (void *buffer, int size) override |
69 | { |
70 | return static_cast<int>(fread (buffer, 1, size, f_)); |
71 | } |
72 | |
73 | virtual char *gets (char *buffer, int size) override |
74 | { |
75 | return fgets (buffer, size, f_); |
76 | } |
77 | |
78 | private: |
79 | FILE *f_; |
80 | }; |
81 | |
82 | // ------ helper class supporting buffered gets ------- |
83 | |
84 | // This is a CoinFileInput class to handle cases, where the gets method |
85 | // is not easy to implement (i.e. bzlib has no equivalent to gets, and |
86 | // zlib's gzgets is extremely slow). It's subclasses only have to implement |
87 | // the readRaw method, while the read and gets methods are handled by this |
88 | // class using an internal buffer. |
89 | class CoinGetslessFileInput: public CoinFileInput |
90 | { |
91 | public: |
92 | CoinGetslessFileInput (const std::string &fileName): |
93 | CoinFileInput (fileName), |
94 | dataBuffer_ (8*1024), |
95 | dataStart_ (&dataBuffer_[0]), |
96 | dataEnd_ (&dataBuffer_[0]) |
97 | {} |
98 | |
99 | virtual ~CoinGetslessFileInput () {} |
100 | |
101 | virtual int read (void *buffer, int size) override |
102 | { |
103 | if (size <= 0) |
104 | return 0; |
105 | |
106 | // return value |
107 | int r = 0; |
108 | |
109 | // treat destination as char * |
110 | char *dest = static_cast<char *>(buffer); |
111 | |
112 | // First consume data from buffer if available. |
113 | if (dataStart_ < dataEnd_) |
114 | { |
115 | int amount = static_cast<int>(dataEnd_ - dataStart_); |
116 | if (amount > size) |
117 | amount = size; |
118 | |
119 | CoinMemcpyN( dataStart_, amount, dest); |
120 | |
121 | dest += amount; |
122 | size -= amount; |
123 | |
124 | dataStart_ += amount; |
125 | |
126 | r = amount; |
127 | } |
128 | |
129 | // If we require more data, use readRaw. |
130 | // We don't use the buffer here, as readRaw is ecpected to be efficient. |
131 | if (size > 0) |
132 | r += readRaw (dest, size); |
133 | |
134 | return r; |
135 | } |
136 | |
137 | virtual char *gets (char *buffer, int size) override |
138 | { |
139 | if (size <= 1) |
140 | return 0; |
141 | |
142 | char *dest = buffer; |
143 | char *destLast = dest + size - 2; // last position allowed to be written |
144 | |
145 | bool initiallyEmpty = (dataStart_ == dataEnd_); |
146 | |
147 | for (;;) |
148 | { |
149 | // refill dataBuffer if needed |
150 | if (dataStart_ == dataEnd_) |
151 | { |
152 | dataStart_ = dataEnd_ = &dataBuffer_[0]; |
153 | int count = readRaw (dataStart_, static_cast<int>(dataBuffer_.size ())); |
154 | |
155 | // at EOF? |
156 | if (count <= 0) |
157 | { |
158 | *dest = 0; |
159 | // if it was initially empty we had nothing written and should |
160 | // return 0, otherwise at least the buffer contents were |
161 | // transfered and buffer has to be returned. |
162 | return initiallyEmpty ? 0 : buffer; |
163 | } |
164 | |
165 | dataEnd_ = dataStart_ + count; |
166 | } |
167 | |
168 | // copy character from buffer |
169 | *dest = *dataStart_++; |
170 | |
171 | // terminate, if character was \n or bufferEnd was reached |
172 | if (*dest == '\n' || dest == destLast) |
173 | { |
174 | *++dest = 0; |
175 | return buffer; |
176 | } |
177 | |
178 | ++dest; |
179 | } |
180 | |
181 | // we should never reach this place |
182 | throw CoinError ("Reached unreachable code!" , |
183 | "gets" , |
184 | "CoinGetslessFileInput" ); |
185 | } |
186 | |
187 | protected: |
188 | // This should be implemented by the subclasses. It essentially behaves |
189 | // like fread: the location pointed to by buffer should be filled with |
190 | // size bytes. Return value is the number of bytes written (0 indicates EOF). |
191 | virtual int readRaw (void *buffer, int size) = 0; |
192 | |
193 | private: |
194 | std::vector<char> dataBuffer_; // memory used for buffering |
195 | char *dataStart_; // pointer to currently buffered data |
196 | char *dataEnd_; // pointer to "one behind last data element" |
197 | }; |
198 | |
199 | |
200 | // -------- input for gzip compressed files ------- |
201 | |
202 | |
203 | #ifdef COIN_HAS_ZLIB |
204 | |
205 | #include <zlib.h> |
206 | |
207 | // This class handles gzip'ed files using libz. |
208 | // While zlib offers the gzread and gzgets functions which do all we want, |
209 | // the gzgets is _very_ slow as it gets single bytes via the complex gzread. |
210 | // So we use the CoinGetslessFileInput as base. |
211 | class CoinGzipFileInput: public CoinGetslessFileInput |
212 | { |
213 | public: |
214 | CoinGzipFileInput (const std::string &fileName): |
215 | CoinGetslessFileInput (fileName), gzf_ (0) |
216 | { |
217 | readType_="zlib" ; |
218 | gzf_ = gzopen (fileName.c_str (), "r" ); |
219 | if (gzf_ == 0) |
220 | throw CoinError ("Could not open file for reading!" , |
221 | "CoinGzipFileInput" , |
222 | "CoinGzipFileInput" ); |
223 | } |
224 | |
225 | virtual ~CoinGzipFileInput () |
226 | { |
227 | if (gzf_ != 0) |
228 | gzclose (gzf_); |
229 | } |
230 | |
231 | protected: |
232 | virtual int readRaw (void *buffer, int size) |
233 | { |
234 | return gzread (gzf_, buffer, size); |
235 | } |
236 | |
237 | private: |
238 | gzFile gzf_; |
239 | }; |
240 | |
241 | #endif // COIN_HAS_ZLIB |
242 | |
243 | |
244 | // ------- input for bzip2 compressed files ------ |
245 | |
246 | #ifdef COIN_HAS_BZLIB |
247 | |
248 | #include <bzlib.h> |
249 | |
250 | // This class handles files compressed by bzip2 using libbz. |
251 | // As bzlib has no builtin gets, we use the CoinGetslessFileInput. |
252 | class CoinBzip2FileInput: public CoinGetslessFileInput |
253 | { |
254 | public: |
255 | CoinBzip2FileInput (const std::string &fileName): |
256 | CoinGetslessFileInput (fileName), f_ (0), bzf_ (0) |
257 | { |
258 | int bzError = BZ_OK; |
259 | readType_="bzlib" ; |
260 | |
261 | f_ = fopen (fileName.c_str (), "r" ); |
262 | |
263 | if (f_ != 0) |
264 | bzf_ = BZ2_bzReadOpen (&bzError, f_, 0, 0, 0, 0); |
265 | |
266 | if (f_ == 0 || bzError != BZ_OK || bzf_ == 0) |
267 | throw CoinError ("Could not open file for reading!" , |
268 | "CoinBzip2FileInput" , |
269 | "CoinBzip2FileInput" ); |
270 | } |
271 | |
272 | virtual ~CoinBzip2FileInput () |
273 | { |
274 | int bzError = BZ_OK; |
275 | if (bzf_ != 0) |
276 | BZ2_bzReadClose (&bzError, bzf_); |
277 | |
278 | if (f_ != 0) |
279 | fclose (f_); |
280 | } |
281 | |
282 | protected: |
283 | virtual int readRaw (void *buffer, int size) |
284 | { |
285 | int bzError = BZ_OK; |
286 | int count = BZ2_bzRead (&bzError, bzf_, buffer, size); |
287 | |
288 | if (bzError == BZ_OK || bzError == BZ_STREAM_END) |
289 | return count; |
290 | |
291 | // Error? |
292 | return 0; |
293 | } |
294 | |
295 | private: |
296 | FILE *f_; |
297 | BZFILE *bzf_; |
298 | }; |
299 | |
300 | #endif // COIN_HAS_BZLIB |
301 | |
302 | |
303 | // ----- implementation of CoinFileInput's methods |
304 | |
305 | /// indicates whether CoinFileInput supports gzip'ed files |
306 | bool CoinFileInput::haveGzipSupport() { |
307 | #ifdef COIN_HAS_ZLIB |
308 | return true; |
309 | #else |
310 | return false; |
311 | #endif |
312 | } |
313 | |
314 | /// indicates whether CoinFileInput supports bzip2'ed files |
315 | bool CoinFileInput::haveBzip2Support() { |
316 | #ifdef COIN_HAS_BZLIB |
317 | return true; |
318 | #else |
319 | return false; |
320 | #endif |
321 | } |
322 | |
323 | CoinFileInput *CoinFileInput::create (const std::string &fileName) |
324 | { |
325 | // first try to open file, and read first bytes |
326 | unsigned char [4]; |
327 | size_t count ; // So stdin will be plain file |
328 | if (fileName!="stdin" ) { |
329 | FILE *f = fopen (fileName.c_str (), "r" ); |
330 | |
331 | if (f == 0) |
332 | throw CoinError ("Could not open file for reading!" , |
333 | "create" , |
334 | "CoinFileInput" ); |
335 | count = fread (header, 1, 4, f); |
336 | fclose (f); |
337 | } else { |
338 | // Reading from stdin - for moment not compressed |
339 | count=0 ; // So stdin will be plain file |
340 | } |
341 | // gzip files start with the magic numbers 0x1f 0x8b |
342 | if (count >= 2 && header[0] == 0x1f && header[1] == 0x8b) |
343 | { |
344 | #ifdef COIN_HAS_ZLIB |
345 | return new CoinGzipFileInput (fileName); |
346 | #else |
347 | throw CoinError ("Cannot read gzip'ed file because zlib was " |
348 | "not compiled into COIN!" , |
349 | "create" , |
350 | "CoinFileInput" ); |
351 | #endif |
352 | } |
353 | |
354 | // bzip2 files start with the string "BZh" |
355 | if (count >= 3 && header[0] == 'B' && header[1] == 'Z' && header[2] == 'h') |
356 | { |
357 | #ifdef COIN_HAS_BZLIB |
358 | return new CoinBzip2FileInput (fileName); |
359 | #else |
360 | throw CoinError ("Cannot read bzip2'ed file because bzlib was " |
361 | "not compiled into COIN!" , |
362 | "create" , |
363 | "CoinFileInput" ); |
364 | #endif |
365 | } |
366 | |
367 | // fallback: probably plain text file |
368 | return new CoinPlainFileInput (fileName); |
369 | } |
370 | |
371 | CoinFileInput::CoinFileInput (const std::string &fileName): |
372 | CoinFileIOBase (fileName) |
373 | {} |
374 | |
375 | CoinFileInput::~CoinFileInput () |
376 | {} |
377 | |
378 | |
379 | // ------------------------------------------------------ |
380 | // Some subclasses of CoinFileOutput |
381 | // for plain text and compressed files |
382 | // ------------------------------------------------------ |
383 | |
384 | |
385 | // -------- CoinPlainFileOutput --------- |
386 | |
387 | // Class to handle output to text files without compression. |
388 | class CoinPlainFileOutput: public CoinFileOutput |
389 | { |
390 | public: |
391 | CoinPlainFileOutput (const std::string &fileName): |
392 | CoinFileOutput (fileName), f_ (0) |
393 | { |
394 | if (fileName == "-" || fileName == "stdout" ) { |
395 | f_ = stdout; |
396 | } else { |
397 | f_ = fopen (fileName.c_str (), "w" ); |
398 | if (f_ == 0) |
399 | throw CoinError ("Could not open file for writing!" , |
400 | "CoinPlainFileOutput" , |
401 | "CoinPlainFileOutput" ); |
402 | } |
403 | } |
404 | |
405 | virtual ~CoinPlainFileOutput () |
406 | { |
407 | if (f_ != 0 && f_ != stdout) |
408 | fclose (f_); |
409 | } |
410 | |
411 | virtual int write (const void *buffer, int size) override |
412 | { |
413 | return static_cast<int>(fwrite (buffer, 1, size, f_)); |
414 | } |
415 | |
416 | // we have something better than the default implementation |
417 | virtual bool puts (const char *s) override |
418 | { |
419 | return fputs (s, f_) >= 0; |
420 | } |
421 | |
422 | private: |
423 | FILE *f_; |
424 | }; |
425 | |
426 | |
427 | // ------- CoinGzipFileOutput --------- |
428 | |
429 | #ifdef COIN_HAS_ZLIB |
430 | |
431 | // no need to include the header, as this was done for the input class |
432 | |
433 | // Handle output with gzip compression |
434 | class CoinGzipFileOutput: public CoinFileOutput |
435 | { |
436 | public: |
437 | CoinGzipFileOutput (const std::string &fileName): |
438 | CoinFileOutput (fileName), gzf_ (0) |
439 | { |
440 | gzf_ = gzopen (fileName.c_str (), "w" ); |
441 | if (gzf_ == 0) |
442 | throw CoinError ("Could not open file for writing!" , |
443 | "CoinGzipFileOutput" , |
444 | "CoinGzipFileOutput" ); |
445 | } |
446 | |
447 | virtual ~CoinGzipFileOutput () |
448 | { |
449 | if (gzf_ != 0) |
450 | gzclose (gzf_); |
451 | } |
452 | |
453 | virtual int write (const void * buffer, int size) |
454 | { |
455 | return gzwrite (gzf_, const_cast<void *> (buffer), size); |
456 | } |
457 | |
458 | // as zlib's gzputs is no more clever than our own, there's |
459 | // no need to replace the default. |
460 | |
461 | private: |
462 | gzFile gzf_; |
463 | }; |
464 | |
465 | #endif // COIN_HAS_ZLIB |
466 | |
467 | |
468 | // ------- CoinBzip2FileOutput ------- |
469 | |
470 | #ifdef COIN_HAS_BZLIB |
471 | |
472 | // no need to include the header, as this was done for the input class |
473 | |
474 | // Output to bzip2 compressed file |
475 | class CoinBzip2FileOutput: public CoinFileOutput |
476 | { |
477 | public: |
478 | CoinBzip2FileOutput (const std::string &fileName): |
479 | CoinFileOutput (fileName), f_ (0), bzf_ (0) |
480 | { |
481 | int bzError = BZ_OK; |
482 | |
483 | f_ = fopen (fileName.c_str (), "w" ); |
484 | |
485 | if (f_ != 0) |
486 | bzf_ = BZ2_bzWriteOpen (&bzError, f_, |
487 | 9, /* Number of 100k blocks used for compression. |
488 | Must be between 1 and 9 inclusive. As 9 |
489 | gives best compression and I guess we can |
490 | spend some memory, we use it. */ |
491 | 0, /* verbosity */ |
492 | 30 /* suggested by bzlib manual */ ); |
493 | |
494 | if (f_ == 0 || bzError != BZ_OK || bzf_ == 0) |
495 | throw CoinError ("Could not open file for writing!" , |
496 | "CoinBzip2FileOutput" , |
497 | "CoinBzip2FileOutput" ); |
498 | } |
499 | |
500 | virtual ~CoinBzip2FileOutput () |
501 | { |
502 | int bzError = BZ_OK; |
503 | if (bzf_ != 0) |
504 | BZ2_bzWriteClose (&bzError, bzf_, 0, 0, 0); |
505 | |
506 | if (f_ != 0) |
507 | fclose (f_); |
508 | } |
509 | |
510 | virtual int write (const void *buffer, int size) |
511 | { |
512 | int bzError = BZ_OK; |
513 | BZ2_bzWrite (&bzError, bzf_, const_cast<void *> (buffer), size); |
514 | return (bzError == BZ_OK) ? size : 0; |
515 | } |
516 | |
517 | private: |
518 | FILE *f_; |
519 | BZFILE *bzf_; |
520 | }; |
521 | |
522 | #endif // COIN_HAS_BZLIB |
523 | |
524 | |
525 | // ------- implementation of CoinFileOutput's methods |
526 | |
527 | bool CoinFileOutput::compressionSupported (Compression compression) |
528 | { |
529 | switch (compression) |
530 | { |
531 | case COMPRESS_NONE: |
532 | return true; |
533 | |
534 | case COMPRESS_GZIP: |
535 | #ifdef COIN_HAS_ZLIB |
536 | return true; |
537 | #else |
538 | return false; |
539 | #endif |
540 | |
541 | case COMPRESS_BZIP2: |
542 | #ifdef COIN_HAS_BZLIB |
543 | return true; |
544 | #else |
545 | return false; |
546 | #endif |
547 | |
548 | default: |
549 | return false; |
550 | } |
551 | } |
552 | |
553 | CoinFileOutput *CoinFileOutput::create (const std::string &fileName, |
554 | Compression compression) |
555 | { |
556 | switch (compression) |
557 | { |
558 | case COMPRESS_NONE: |
559 | return new CoinPlainFileOutput (fileName); |
560 | |
561 | case COMPRESS_GZIP: |
562 | #ifdef COIN_HAS_ZLIB |
563 | return new CoinGzipFileOutput (fileName); |
564 | #endif |
565 | break; |
566 | |
567 | case COMPRESS_BZIP2: |
568 | #ifdef COIN_HAS_BZLIB |
569 | return new CoinBzip2FileOutput (fileName); |
570 | #endif |
571 | break; |
572 | |
573 | default: |
574 | break; |
575 | } |
576 | |
577 | throw CoinError ("Unsupported compression selected!" , |
578 | "create" , |
579 | "CoinFileOutput" ); |
580 | } |
581 | |
582 | CoinFileOutput::CoinFileOutput (const std::string &fileName): |
583 | CoinFileIOBase (fileName) |
584 | {} |
585 | |
586 | CoinFileOutput::~CoinFileOutput () |
587 | {} |
588 | |
589 | bool CoinFileOutput::puts (const char *s) |
590 | { |
591 | int len = static_cast<int>(strlen (s)); |
592 | if (len == 0) |
593 | return true; |
594 | |
595 | return write (s, len) == len; |
596 | } |
597 | |
598 | /* |
599 | Tests if the given string looks like an absolute path to a file. |
600 | - unix: string begins with `/' |
601 | - windows: string begins with `\' or `drv:', where drv is a drive |
602 | designator. |
603 | */ |
604 | bool fileAbsPath (const std::string &path) |
605 | { |
606 | const char dirsep = CoinFindDirSeparator() ; |
607 | |
608 | // If the first two chars are drive designators then treat it as absolute |
609 | // path (noone in their right mind would create a file named 'Z:' on unix, |
610 | // right?...) |
611 | const size_t len = path.length(); |
612 | if (len >= 2 && path[1] == ':') { |
613 | const char ch = path[0]; |
614 | if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) { |
615 | return true; |
616 | } |
617 | } |
618 | |
619 | return path[0] == dirsep; |
620 | } |
621 | |
622 | |
623 | /* |
624 | Tests if file readable and may change name to add |
625 | compression extension. Here to get ZLIB etc in one place |
626 | |
627 | stdin goes by unmolested by all the fussing with file names. We shouldn't |
628 | close it, either. |
629 | */ |
630 | bool (std::string & fileName, const std::string &dfltPrefix) |
631 | { |
632 | if (fileName != "stdin" ) |
633 | { const char dirsep = CoinFindDirSeparator(); |
634 | std::string directory ; |
635 | if (dfltPrefix == "" ) |
636 | { directory = (dirsep == '/' ? "./" : ".\\" ) ; } |
637 | else |
638 | { directory = dfltPrefix ; |
639 | if (directory[directory.length()-1] != dirsep) |
640 | { directory += dirsep ; } } |
641 | |
642 | bool absolutePath = fileAbsPath(fileName) ; |
643 | std::string field = fileName; |
644 | |
645 | if (absolutePath) { |
646 | // nothing to do |
647 | } else if (field[0]=='~') { |
648 | char * home_dir = getenv("HOME" ); |
649 | if (home_dir) { |
650 | std::string home(home_dir); |
651 | field=field.erase(0,1); |
652 | fileName = home+field; |
653 | } else { |
654 | fileName=field; |
655 | } |
656 | } else { |
657 | fileName = directory+field; |
658 | } |
659 | } |
660 | // I am opening it to make sure not odd |
661 | FILE *fp; |
662 | if (strcmp(fileName.c_str(),"stdin" )) { |
663 | fp = fopen ( fileName.c_str(), "r" ); |
664 | } else { |
665 | fp = stdin; |
666 | } |
667 | #ifdef COIN_HAS_ZLIB |
668 | if (!fp) { |
669 | std::string fname = fileName; |
670 | fname += ".gz" ; |
671 | fp = fopen ( fname.c_str(), "r" ); |
672 | if (fp) |
673 | fileName=fname; |
674 | } |
675 | #endif |
676 | #ifdef COIN_HAS_BZLIB |
677 | if (!fp) { |
678 | std::string fname = fileName; |
679 | fname += ".bz2" ; |
680 | fp = fopen ( fname.c_str(), "r" ); |
681 | if (fp) |
682 | fileName=fname; |
683 | } |
684 | #endif |
685 | if (!fp) { |
686 | return false; |
687 | } else { |
688 | if (fp != stdin) { |
689 | fclose(fp); |
690 | } |
691 | return true; |
692 | } |
693 | } |
694 | |