1 | #include "mupdf/fitz.h" |
2 | #include "fitz-imp.h" |
3 | |
4 | #include <string.h> |
5 | |
6 | #if !defined (INT32_MAX) |
7 | #define INT32_MAX 2147483647L |
8 | #endif |
9 | |
10 | typedef struct tar_entry_s tar_entry; |
11 | typedef struct fz_tar_archive_s fz_tar_archive; |
12 | |
13 | struct tar_entry_s |
14 | { |
15 | char *name; |
16 | int64_t offset, size; |
17 | }; |
18 | |
19 | struct fz_tar_archive_s |
20 | { |
21 | fz_archive super; |
22 | |
23 | int count; |
24 | tar_entry *entries; |
25 | }; |
26 | |
27 | static inline int isoctdigit(char c) |
28 | { |
29 | return c >= '0' && c <= '7'; |
30 | } |
31 | |
32 | static inline int64_t otoi(const char *s) |
33 | { |
34 | int64_t value = 0; |
35 | |
36 | while (*s && isoctdigit(*s)) |
37 | { |
38 | value *= 8; |
39 | value += (*s) - '0'; |
40 | s++; |
41 | } |
42 | |
43 | return value; |
44 | } |
45 | |
46 | static void drop_tar_archive(fz_context *ctx, fz_archive *arch) |
47 | { |
48 | fz_tar_archive *tar = (fz_tar_archive *) arch; |
49 | int i; |
50 | for (i = 0; i < tar->count; ++i) |
51 | fz_free(ctx, tar->entries[i].name); |
52 | fz_free(ctx, tar->entries); |
53 | } |
54 | |
55 | static void ensure_tar_entries(fz_context *ctx, fz_tar_archive *tar) |
56 | { |
57 | fz_stream *file = tar->super.file; |
58 | char name[100]; |
59 | char octsize[12]; |
60 | char typeflag; |
61 | int64_t offset, blocks, size; |
62 | size_t n; |
63 | |
64 | tar->count = 0; |
65 | |
66 | fz_seek(ctx, file, 0, SEEK_SET); |
67 | |
68 | while (1) |
69 | { |
70 | offset = fz_tell(ctx, file); |
71 | n = fz_read(ctx, file, (unsigned char *) name, nelem(name)); |
72 | if (n < nelem(name)) |
73 | fz_throw(ctx, FZ_ERROR_GENERIC, "premature end of data in tar entry name" ); |
74 | name[nelem(name) - 1] = '\0'; |
75 | |
76 | if (strlen(name) == 0) |
77 | break; |
78 | |
79 | fz_seek(ctx, file, 24, 1); |
80 | n = fz_read(ctx, file, (unsigned char *) octsize, nelem(octsize)); |
81 | if (n < nelem(octsize)) |
82 | fz_throw(ctx, FZ_ERROR_GENERIC, "premature end of data in tar entry size" ); |
83 | octsize[nelem(octsize) - 1] = '\0'; |
84 | size = otoi(octsize); |
85 | if (size > INT32_MAX) |
86 | fz_throw(ctx, FZ_ERROR_GENERIC, "tar archive entry larger than 2 GB" ); |
87 | |
88 | fz_seek(ctx, file, 20, 1); |
89 | typeflag = fz_read_byte(ctx, file); |
90 | |
91 | fz_seek(ctx, file, 355, 1); |
92 | blocks = (size + 511) / 512; |
93 | fz_seek(ctx, file, blocks * 512, 1); |
94 | |
95 | if (typeflag != '0' && typeflag != '\0') |
96 | continue; |
97 | |
98 | tar->entries = fz_realloc_array(ctx, tar->entries, tar->count + 1, tar_entry); |
99 | |
100 | tar->entries[tar->count].name = fz_strdup(ctx, name); |
101 | tar->entries[tar->count].offset = offset; |
102 | tar->entries[tar->count].size = size; |
103 | |
104 | tar->count++; |
105 | } |
106 | } |
107 | |
108 | static tar_entry *lookup_tar_entry(fz_context *ctx, fz_tar_archive *tar, const char *name) |
109 | { |
110 | int i; |
111 | for (i = 0; i < tar->count; i++) |
112 | if (!fz_strcasecmp(name, tar->entries[i].name)) |
113 | return &tar->entries[i]; |
114 | return NULL; |
115 | } |
116 | |
117 | static fz_stream *open_tar_entry(fz_context *ctx, fz_archive *arch, const char *name) |
118 | { |
119 | fz_tar_archive *tar = (fz_tar_archive *) arch; |
120 | fz_stream *file = tar->super.file; |
121 | tar_entry *ent; |
122 | |
123 | ent = lookup_tar_entry(ctx, tar, name); |
124 | if (!ent) |
125 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find named tar archive entry" ); |
126 | |
127 | fz_seek(ctx, file, ent->offset + 512, 0); |
128 | return fz_open_null_filter(ctx, file, ent->size, fz_tell(ctx, file)); |
129 | } |
130 | |
131 | static fz_buffer *read_tar_entry(fz_context *ctx, fz_archive *arch, const char *name) |
132 | { |
133 | fz_tar_archive *tar = (fz_tar_archive *) arch; |
134 | fz_stream *file = tar->super.file; |
135 | fz_buffer *ubuf; |
136 | tar_entry *ent; |
137 | |
138 | ent = lookup_tar_entry(ctx, tar, name); |
139 | if (!ent) |
140 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find named tar archive entry" ); |
141 | |
142 | ubuf = fz_new_buffer(ctx, ent->size); |
143 | |
144 | fz_try(ctx) |
145 | { |
146 | fz_seek(ctx, file, ent->offset + 512, 0); |
147 | ubuf->len = fz_read(ctx, file, ubuf->data, ent->size); |
148 | if (ubuf->len != ent->size) |
149 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot read entire archive entry" ); |
150 | } |
151 | fz_catch(ctx) |
152 | { |
153 | fz_drop_buffer(ctx, ubuf); |
154 | fz_rethrow(ctx); |
155 | } |
156 | |
157 | return ubuf; |
158 | } |
159 | |
160 | static int has_tar_entry(fz_context *ctx, fz_archive *arch, const char *name) |
161 | { |
162 | fz_tar_archive *tar = (fz_tar_archive *) arch; |
163 | tar_entry *ent = lookup_tar_entry(ctx, tar, name); |
164 | return ent != NULL; |
165 | } |
166 | |
167 | static const char *list_tar_entry(fz_context *ctx, fz_archive *arch, int idx) |
168 | { |
169 | fz_tar_archive *tar = (fz_tar_archive *) arch; |
170 | if (idx < 0 || idx >= tar->count) |
171 | return NULL; |
172 | return tar->entries[idx].name; |
173 | } |
174 | |
175 | static int count_tar_entries(fz_context *ctx, fz_archive *arch) |
176 | { |
177 | fz_tar_archive *tar = (fz_tar_archive *) arch; |
178 | return tar->count; |
179 | } |
180 | |
181 | /* |
182 | Detect if stream object is a tar achieve. |
183 | |
184 | Assumes that the stream object is seekable. |
185 | */ |
186 | int |
187 | fz_is_tar_archive(fz_context *ctx, fz_stream *file) |
188 | { |
189 | const unsigned char gnusignature[6] = { 'u', 's', 't', 'a', 'r', ' ' }; |
190 | const unsigned char paxsignature[6] = { 'u', 's', 't', 'a', 'r', '\0' }; |
191 | const unsigned char v7signature[6] = { '\0', '\0', '\0', '\0', '\0', '\0' }; |
192 | unsigned char data[6]; |
193 | size_t n; |
194 | |
195 | fz_seek(ctx, file, 257, 0); |
196 | n = fz_read(ctx, file, data, nelem(data)); |
197 | if (n != nelem(data)) |
198 | return 0; |
199 | if (!memcmp(data, gnusignature, nelem(gnusignature))) |
200 | return 1; |
201 | if (!memcmp(data, paxsignature, nelem(paxsignature))) |
202 | return 1; |
203 | if (!memcmp(data, v7signature, nelem(v7signature))) |
204 | return 1; |
205 | |
206 | return 0; |
207 | } |
208 | |
209 | /* |
210 | Open a tar archive stream. |
211 | |
212 | Open an archive using a seekable stream object rather than |
213 | opening a file or directory on disk. |
214 | |
215 | An exception is throw if the stream is not a tar archive as |
216 | indicated by the presence of a tar signature. |
217 | |
218 | */ |
219 | fz_archive * |
220 | fz_open_tar_archive_with_stream(fz_context *ctx, fz_stream *file) |
221 | { |
222 | fz_tar_archive *tar; |
223 | |
224 | if (!fz_is_tar_archive(ctx, file)) |
225 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot recognize tar archive" ); |
226 | |
227 | tar = fz_new_derived_archive(ctx, file, fz_tar_archive); |
228 | tar->super.format = "tar" ; |
229 | tar->super.count_entries = count_tar_entries; |
230 | tar->super.list_entry = list_tar_entry; |
231 | tar->super.has_entry = has_tar_entry; |
232 | tar->super.read_entry = read_tar_entry; |
233 | tar->super.open_entry = open_tar_entry; |
234 | tar->super.drop_archive = drop_tar_archive; |
235 | |
236 | fz_try(ctx) |
237 | { |
238 | ensure_tar_entries(ctx, tar); |
239 | } |
240 | fz_catch(ctx) |
241 | { |
242 | fz_drop_archive(ctx, &tar->super); |
243 | fz_rethrow(ctx); |
244 | } |
245 | |
246 | return &tar->super; |
247 | } |
248 | |
249 | /* |
250 | Open a tar archive file. |
251 | |
252 | An exception is throw if the file is not a tar archive as |
253 | indicated by the presence of a tar signature. |
254 | |
255 | filename: a path to a tar archive file as it would be given to |
256 | open(2). |
257 | */ |
258 | fz_archive * |
259 | fz_open_tar_archive(fz_context *ctx, const char *filename) |
260 | { |
261 | fz_archive *tar = NULL; |
262 | fz_stream *file; |
263 | |
264 | file = fz_open_file(ctx, filename); |
265 | |
266 | fz_try(ctx) |
267 | tar = fz_open_tar_archive_with_stream(ctx, file); |
268 | fz_always(ctx) |
269 | fz_drop_stream(ctx, file); |
270 | fz_catch(ctx) |
271 | fz_rethrow(ctx); |
272 | |
273 | return tar; |
274 | } |
275 | |