1 | #include "mupdf/fitz.h" |
2 | |
3 | #include <string.h> |
4 | #include <errno.h> |
5 | #include <math.h> |
6 | #include <float.h> |
7 | #include <stdlib.h> |
8 | |
9 | static inline int |
10 | fz_tolower(int c) |
11 | { |
12 | if (c >= 'A' && c <= 'Z') |
13 | return c + 32; |
14 | return c; |
15 | } |
16 | |
17 | /* |
18 | Return strlen(s), if that is less than maxlen, or maxlen if |
19 | there is no null byte ('\0') among the first maxlen bytes. |
20 | */ |
21 | size_t |
22 | fz_strnlen(const char *s, size_t n) |
23 | { |
24 | const char *p = memchr(s, 0, n); |
25 | return p ? p - s : n; |
26 | } |
27 | |
28 | int |
29 | fz_strncasecmp(const char *a, const char *b, int n) |
30 | { |
31 | if (!n--) |
32 | return 0; |
33 | for (; *a && *b && n && (*a == *b || fz_tolower(*a) == fz_tolower(*b)); a++, b++, n--) |
34 | ; |
35 | return fz_tolower(*a) - fz_tolower(*b); |
36 | } |
37 | |
38 | /* |
39 | Case insensitive (ASCII only) string comparison. |
40 | */ |
41 | int |
42 | fz_strcasecmp(const char *a, const char *b) |
43 | { |
44 | while (fz_tolower(*a) == fz_tolower(*b)) |
45 | { |
46 | if (*a++ == 0) |
47 | return 0; |
48 | b++; |
49 | } |
50 | return fz_tolower(*a) - fz_tolower(*b); |
51 | } |
52 | |
53 | /* |
54 | Given a pointer to a C string (or a pointer to NULL) break |
55 | it at the first occurrence of a delimiter char (from a given set). |
56 | |
57 | stringp: Pointer to a C string pointer (or NULL). Updated on exit to |
58 | point to the first char of the string after the delimiter that was |
59 | found. The string pointed to by stringp will be corrupted by this |
60 | call (as the found delimiter will be overwritten by 0). |
61 | |
62 | delim: A C string of acceptable delimiter characters. |
63 | |
64 | Returns a pointer to a C string containing the chars of stringp up |
65 | to the first delimiter char (or the end of the string), or NULL. |
66 | */ |
67 | char * |
68 | fz_strsep(char **stringp, const char *delim) |
69 | { |
70 | char *ret = *stringp; |
71 | if (!ret) return NULL; |
72 | if ((*stringp = strpbrk(*stringp, delim)) != NULL) |
73 | *((*stringp)++) = '\0'; |
74 | return ret; |
75 | } |
76 | |
77 | /* |
78 | Copy at most n-1 chars of a string into a destination |
79 | buffer with null termination, returning the real length of the |
80 | initial string (excluding terminator). |
81 | |
82 | dst: Destination buffer, at least n bytes long. |
83 | |
84 | src: C string (non-NULL). |
85 | |
86 | n: Size of dst buffer in bytes. |
87 | |
88 | Returns the length (excluding terminator) of src. |
89 | */ |
90 | size_t |
91 | fz_strlcpy(char *dst, const char *src, size_t siz) |
92 | { |
93 | register char *d = dst; |
94 | register const char *s = src; |
95 | register size_t n = siz; |
96 | |
97 | /* Copy as many bytes as will fit */ |
98 | if (n != 0 && --n != 0) { |
99 | do { |
100 | if ((*d++ = *s++) == 0) |
101 | break; |
102 | } while (--n != 0); |
103 | } |
104 | |
105 | /* Not enough room in dst, add NUL and traverse rest of src */ |
106 | if (n == 0) { |
107 | if (siz != 0) |
108 | *d = '\0'; /* NUL-terminate dst */ |
109 | while (*s++) |
110 | ; |
111 | } |
112 | |
113 | return(s - src - 1); /* count does not include NUL */ |
114 | } |
115 | |
116 | /* |
117 | Concatenate 2 strings, with a maximum length. |
118 | |
119 | dst: pointer to first string in a buffer of n bytes. |
120 | |
121 | src: pointer to string to concatenate. |
122 | |
123 | n: Size (in bytes) of buffer that dst is in. |
124 | |
125 | Returns the real length that a concatenated dst + src would have been |
126 | (not including terminator). |
127 | */ |
128 | size_t |
129 | fz_strlcat(char *dst, const char *src, size_t siz) |
130 | { |
131 | register char *d = dst; |
132 | register const char *s = src; |
133 | register size_t n = siz; |
134 | size_t dlen; |
135 | |
136 | /* Find the end of dst and adjust bytes left but don't go past end */ |
137 | while (*d != '\0' && n-- != 0) |
138 | d++; |
139 | dlen = d - dst; |
140 | n = siz - dlen; |
141 | |
142 | if (n == 0) |
143 | return dlen + strlen(s); |
144 | while (*s != '\0') { |
145 | if (n != 1) { |
146 | *d++ = *s; |
147 | n--; |
148 | } |
149 | s++; |
150 | } |
151 | *d = '\0'; |
152 | |
153 | return dlen + (s - src); /* count does not include NUL */ |
154 | } |
155 | |
156 | /* |
157 | extract the directory component from a path. |
158 | */ |
159 | void |
160 | fz_dirname(char *dir, const char *path, size_t n) |
161 | { |
162 | size_t i; |
163 | |
164 | if (!path || !path[0]) |
165 | { |
166 | fz_strlcpy(dir, "." , n); |
167 | return; |
168 | } |
169 | |
170 | fz_strlcpy(dir, path, n); |
171 | |
172 | i = strlen(dir); |
173 | for(; dir[i] == '/'; --i) if (!i) { fz_strlcpy(dir, "/" , n); return; } |
174 | for(; dir[i] != '/'; --i) if (!i) { fz_strlcpy(dir, "." , n); return; } |
175 | for(; dir[i] == '/'; --i) if (!i) { fz_strlcpy(dir, "/" , n); return; } |
176 | dir[i+1] = 0; |
177 | } |
178 | |
179 | static inline int ishex(int a) |
180 | { |
181 | return (a >= 'A' && a <= 'F') || |
182 | (a >= 'a' && a <= 'f') || |
183 | (a >= '0' && a <= '9'); |
184 | } |
185 | |
186 | static inline int tohex(int c) |
187 | { |
188 | if (c >= '0' && c <= '9') return c - '0'; |
189 | if (c >= 'a' && c <= 'f') return c - 'a' + 0xA; |
190 | if (c >= 'A' && c <= 'F') return c - 'A' + 0xA; |
191 | return 0; |
192 | } |
193 | |
194 | /* |
195 | decode url escapes. |
196 | */ |
197 | char * |
198 | fz_urldecode(char *url) |
199 | { |
200 | char *s = url; |
201 | char *p = url; |
202 | while (*s) |
203 | { |
204 | int c = (unsigned char) *s++; |
205 | if (c == '%' && ishex(s[0]) && ishex(s[1])) |
206 | { |
207 | int a = tohex(*s++); |
208 | int b = tohex(*s++); |
209 | *p++ = a << 4 | b; |
210 | } |
211 | else |
212 | { |
213 | *p++ = c; |
214 | } |
215 | } |
216 | *p = 0; |
217 | return url; |
218 | } |
219 | |
220 | /* |
221 | create output file name using a template. |
222 | |
223 | If the path contains %[0-9]*d, the first such pattern will be replaced |
224 | with the page number. If the template does not contain such a pattern, the page |
225 | number will be inserted before the filename extension. If the template does not have |
226 | a filename extension, the page number will be added to the end. |
227 | */ |
228 | void |
229 | fz_format_output_path(fz_context *ctx, char *path, size_t size, const char *fmt, int page) |
230 | { |
231 | const char *s, *p; |
232 | char num[40]; |
233 | int i, n; |
234 | int z = 0; |
235 | |
236 | for (i = 0; page; page /= 10) |
237 | num[i++] = '0' + page % 10; |
238 | num[i] = 0; |
239 | |
240 | s = p = strchr(fmt, '%'); |
241 | if (p) |
242 | { |
243 | ++p; |
244 | while (*p >= '0' && *p <= '9') |
245 | z = z * 10 + (*p++ - '0'); |
246 | } |
247 | if (p && *p == 'd') |
248 | { |
249 | ++p; |
250 | } |
251 | else |
252 | { |
253 | s = p = strrchr(fmt, '.'); |
254 | if (!p) |
255 | s = p = fmt + strlen(fmt); |
256 | } |
257 | |
258 | if (z < 1) |
259 | z = 1; |
260 | while (i < z && i < sizeof num) |
261 | num[i++] = '0'; |
262 | n = s - fmt; |
263 | if (n + i + strlen(p) >= size) |
264 | fz_throw(ctx, FZ_ERROR_GENERIC, "path name buffer overflow" ); |
265 | memcpy(path, fmt, n); |
266 | while (i > 0) |
267 | path[n++] = num[--i]; |
268 | fz_strlcpy(path + n, p, size - n); |
269 | } |
270 | |
271 | #define SEP(x) ((x)=='/' || (x) == 0) |
272 | |
273 | /* |
274 | rewrite path to the shortest string that names the same path. |
275 | |
276 | Eliminates multiple and trailing slashes, interprets "." and "..". |
277 | Overwrites the string in place. |
278 | */ |
279 | char * |
280 | fz_cleanname(char *name) |
281 | { |
282 | char *p, *q, *dotdot; |
283 | int rooted; |
284 | |
285 | rooted = name[0] == '/'; |
286 | |
287 | /* |
288 | * invariants: |
289 | * p points at beginning of path element we're considering. |
290 | * q points just past the last path element we wrote (no slash). |
291 | * dotdot points just past the point where .. cannot backtrack |
292 | * any further (no slash). |
293 | */ |
294 | p = q = dotdot = name + rooted; |
295 | while (*p) |
296 | { |
297 | if(p[0] == '/') /* null element */ |
298 | p++; |
299 | else if (p[0] == '.' && SEP(p[1])) |
300 | p += 1; /* don't count the separator in case it is nul */ |
301 | else if (p[0] == '.' && p[1] == '.' && SEP(p[2])) |
302 | { |
303 | p += 2; |
304 | if (q > dotdot) /* can backtrack */ |
305 | { |
306 | while(--q > dotdot && *q != '/') |
307 | ; |
308 | } |
309 | else if (!rooted) /* /.. is / but ./../ is .. */ |
310 | { |
311 | if (q != name) |
312 | *q++ = '/'; |
313 | *q++ = '.'; |
314 | *q++ = '.'; |
315 | dotdot = q; |
316 | } |
317 | } |
318 | else /* real path element */ |
319 | { |
320 | if (q != name+rooted) |
321 | *q++ = '/'; |
322 | while ((*q = *p) != '/' && *q != 0) |
323 | p++, q++; |
324 | } |
325 | } |
326 | |
327 | if (q == name) /* empty string is really "." */ |
328 | *q++ = '.'; |
329 | *q = '\0'; |
330 | return name; |
331 | } |
332 | |
333 | enum |
334 | { |
335 | UTFmax = 4, /* maximum bytes per rune */ |
336 | Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ |
337 | Runeself = 0x80, /* rune and UTF sequences are the same (<) */ |
338 | Runeerror = 0xFFFD, /* decoding error in UTF */ |
339 | Runemax = 0x10FFFF, /* maximum rune value */ |
340 | }; |
341 | |
342 | enum |
343 | { |
344 | Bit1 = 7, |
345 | Bitx = 6, |
346 | Bit2 = 5, |
347 | Bit3 = 4, |
348 | Bit4 = 3, |
349 | Bit5 = 2, |
350 | |
351 | T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ |
352 | Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ |
353 | T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ |
354 | T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ |
355 | T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ |
356 | T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ |
357 | |
358 | Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */ |
359 | Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */ |
360 | Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */ |
361 | Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0001 1111 1111 1111 1111 1111 */ |
362 | |
363 | Maskx = (1<<Bitx)-1, /* 0011 1111 */ |
364 | Testx = Maskx ^ 0xFF, /* 1100 0000 */ |
365 | |
366 | Bad = Runeerror, |
367 | }; |
368 | |
369 | /* |
370 | UTF8 decode a single rune from a sequence of chars. |
371 | |
372 | rune: Pointer to an int to assign the decoded 'rune' to. |
373 | |
374 | str: Pointer to a UTF8 encoded string. |
375 | |
376 | Returns the number of bytes consumed. |
377 | */ |
378 | int |
379 | fz_chartorune(int *rune, const char *str) |
380 | { |
381 | int c, c1, c2, c3; |
382 | int l; |
383 | |
384 | /* |
385 | * one character sequence |
386 | * 00000-0007F => T1 |
387 | */ |
388 | c = *(const unsigned char*)str; |
389 | if(c < Tx) { |
390 | *rune = c; |
391 | return 1; |
392 | } |
393 | |
394 | /* |
395 | * two character sequence |
396 | * 0080-07FF => T2 Tx |
397 | */ |
398 | c1 = *(const unsigned char*)(str+1) ^ Tx; |
399 | if(c1 & Testx) |
400 | goto bad; |
401 | if(c < T3) { |
402 | if(c < T2) |
403 | goto bad; |
404 | l = ((c << Bitx) | c1) & Rune2; |
405 | if(l <= Rune1) |
406 | goto bad; |
407 | *rune = l; |
408 | return 2; |
409 | } |
410 | |
411 | /* |
412 | * three character sequence |
413 | * 0800-FFFF => T3 Tx Tx |
414 | */ |
415 | c2 = *(const unsigned char*)(str+2) ^ Tx; |
416 | if(c2 & Testx) |
417 | goto bad; |
418 | if(c < T4) { |
419 | l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; |
420 | if(l <= Rune2) |
421 | goto bad; |
422 | *rune = l; |
423 | return 3; |
424 | } |
425 | |
426 | /* |
427 | * four character sequence (21-bit value) |
428 | * 10000-1FFFFF => T4 Tx Tx Tx |
429 | */ |
430 | c3 = *(const unsigned char*)(str+3) ^ Tx; |
431 | if (c3 & Testx) |
432 | goto bad; |
433 | if (c < T5) { |
434 | l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; |
435 | if (l <= Rune3) |
436 | goto bad; |
437 | *rune = l; |
438 | return 4; |
439 | } |
440 | /* |
441 | * Support for 5-byte or longer UTF-8 would go here, but |
442 | * since we don't have that, we'll just fall through to bad. |
443 | */ |
444 | |
445 | /* |
446 | * bad decoding |
447 | */ |
448 | bad: |
449 | *rune = Bad; |
450 | return 1; |
451 | } |
452 | |
453 | /* |
454 | UTF8 encode a rune to a sequence of chars. |
455 | |
456 | str: Pointer to a place to put the UTF8 encoded character. |
457 | |
458 | rune: Pointer to a 'rune'. |
459 | |
460 | Returns the number of bytes the rune took to output. |
461 | */ |
462 | int |
463 | fz_runetochar(char *str, int rune) |
464 | { |
465 | /* Runes are signed, so convert to unsigned for range check. */ |
466 | unsigned int c = (unsigned int)rune; |
467 | |
468 | /* |
469 | * one character sequence |
470 | * 00000-0007F => 00-7F |
471 | */ |
472 | if(c <= Rune1) { |
473 | str[0] = c; |
474 | return 1; |
475 | } |
476 | |
477 | /* |
478 | * two character sequence |
479 | * 0080-07FF => T2 Tx |
480 | */ |
481 | if(c <= Rune2) { |
482 | str[0] = T2 | (c >> 1*Bitx); |
483 | str[1] = Tx | (c & Maskx); |
484 | return 2; |
485 | } |
486 | |
487 | /* |
488 | * If the Rune is out of range, convert it to the error rune. |
489 | * Do this test here because the error rune encodes to three bytes. |
490 | * Doing it earlier would duplicate work, since an out of range |
491 | * Rune wouldn't have fit in one or two bytes. |
492 | */ |
493 | if (c > Runemax) |
494 | c = Runeerror; |
495 | |
496 | /* |
497 | * three character sequence |
498 | * 0800-FFFF => T3 Tx Tx |
499 | */ |
500 | if (c <= Rune3) { |
501 | str[0] = T3 | (c >> 2*Bitx); |
502 | str[1] = Tx | ((c >> 1*Bitx) & Maskx); |
503 | str[2] = Tx | (c & Maskx); |
504 | return 3; |
505 | } |
506 | |
507 | /* |
508 | * four character sequence (21-bit value) |
509 | * 10000-1FFFFF => T4 Tx Tx Tx |
510 | */ |
511 | str[0] = T4 | (c >> 3*Bitx); |
512 | str[1] = Tx | ((c >> 2*Bitx) & Maskx); |
513 | str[2] = Tx | ((c >> 1*Bitx) & Maskx); |
514 | str[3] = Tx | (c & Maskx); |
515 | return 4; |
516 | } |
517 | |
518 | /* |
519 | Count how many chars are required to represent a rune. |
520 | |
521 | rune: The rune to encode. |
522 | |
523 | Returns the number of bytes required to represent this run in UTF8. |
524 | */ |
525 | int |
526 | fz_runelen(int c) |
527 | { |
528 | char str[10]; |
529 | return fz_runetochar(str, c); |
530 | } |
531 | |
532 | /* |
533 | Count how many runes the UTF-8 encoded string |
534 | consists of. |
535 | |
536 | s: The UTF-8 encoded, NUL-terminated text string. |
537 | |
538 | Returns the number of runes in the string. |
539 | */ |
540 | int |
541 | fz_utflen(const char *s) |
542 | { |
543 | int c, n, rune; |
544 | n = 0; |
545 | for(;;) { |
546 | c = *(const unsigned char*)s; |
547 | if(c < Runeself) { |
548 | if(c == 0) |
549 | return n; |
550 | s++; |
551 | } else |
552 | s += fz_chartorune(&rune, s); |
553 | n++; |
554 | } |
555 | return 0; |
556 | } |
557 | |
558 | /* |
559 | Range checking atof |
560 | */ |
561 | float fz_atof(const char *s) |
562 | { |
563 | float result; |
564 | |
565 | if (s == NULL) |
566 | return 0; |
567 | |
568 | errno = 0; |
569 | result = fz_strtof(s, NULL); |
570 | if ((errno == ERANGE && result == 0) || isnan(result)) |
571 | /* Return 1.0 on underflow, as it's a small known value that won't cause a divide by 0. */ |
572 | return 1; |
573 | result = fz_clamp(result, -FLT_MAX, FLT_MAX); |
574 | return result; |
575 | } |
576 | |
577 | /* |
578 | atoi that copes with NULL |
579 | */ |
580 | int fz_atoi(const char *s) |
581 | { |
582 | if (s == NULL) |
583 | return 0; |
584 | return atoi(s); |
585 | } |
586 | |
587 | int64_t fz_atoi64(const char *s) |
588 | { |
589 | if (s == NULL) |
590 | return 0; |
591 | return atoll(s); |
592 | } |
593 | |
594 | /* |
595 | Check and parse string into page ranges: |
596 | ( ','? ([0-9]+|'N') ( '-' ([0-9]+|N) )? )+ |
597 | */ |
598 | int fz_is_page_range(fz_context *ctx, const char *s) |
599 | { |
600 | /* TODO: check the actual syntax... */ |
601 | while (*s) |
602 | { |
603 | if ((*s < '0' || *s > '9') && *s != 'N' && *s != '-' && *s != ',') |
604 | return 0; |
605 | s++; |
606 | } |
607 | return 1; |
608 | } |
609 | |
610 | const char *fz_parse_page_range(fz_context *ctx, const char *s, int *a, int *b, int n) |
611 | { |
612 | if (!s || !s[0]) |
613 | return NULL; |
614 | |
615 | if (s[0] == ',') |
616 | s += 1; |
617 | |
618 | if (s[0] == 'N') |
619 | { |
620 | *a = n; |
621 | s += 1; |
622 | } |
623 | else |
624 | *a = strtol(s, (char**)&s, 10); |
625 | |
626 | if (s[0] == '-') |
627 | { |
628 | if (s[1] == 'N') |
629 | { |
630 | *b = n; |
631 | s += 2; |
632 | } |
633 | else |
634 | *b = strtol(s+1, (char**)&s, 10); |
635 | } |
636 | else |
637 | *b = *a; |
638 | |
639 | *a = fz_clampi(*a, 1, n); |
640 | *b = fz_clampi(*b, 1, n); |
641 | |
642 | return s; |
643 | } |
644 | |
645 | /* memmem from musl */ |
646 | |
647 | #define MAX(a,b) ((a)>(b)?(a):(b)) |
648 | |
649 | #define BITOP(a,b,op) \ |
650 | ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a)))) |
651 | |
652 | static char *twobyte_memmem(const unsigned char *h, size_t k, const unsigned char *n) |
653 | { |
654 | uint16_t nw = n[0]<<8 | n[1], hw = h[0]<<8 | h[1]; |
655 | for (h++, k--; k; k--, hw = hw<<8 | *++h) |
656 | if (hw == nw) return (char *)h-1; |
657 | return 0; |
658 | } |
659 | |
660 | static char *threebyte_memmem(const unsigned char *h, size_t k, const unsigned char *n) |
661 | { |
662 | uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8; |
663 | uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8; |
664 | for (h+=2, k-=2; k; k--, hw = (hw|*++h)<<8) |
665 | if (hw == nw) return (char *)h-2; |
666 | return 0; |
667 | } |
668 | |
669 | static char *fourbyte_memmem(const unsigned char *h, size_t k, const unsigned char *n) |
670 | { |
671 | uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3]; |
672 | uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3]; |
673 | for (h+=3, k-=3; k; k--, hw = hw<<8 | *++h) |
674 | if (hw == nw) return (char *)h-3; |
675 | return 0; |
676 | } |
677 | |
678 | static char *twoway_memmem(const unsigned char *h, const unsigned char *z, const unsigned char *n, size_t l) |
679 | { |
680 | size_t i, ip, jp, k, p, ms, p0, mem, mem0; |
681 | size_t byteset[32 / sizeof(size_t)] = { 0 }; |
682 | size_t shift[256]; |
683 | |
684 | /* Computing length of needle and fill shift table */ |
685 | for (i=0; i<l; i++) |
686 | BITOP(byteset, n[i], |=), shift[n[i]] = i+1; |
687 | |
688 | /* Compute maximal suffix */ |
689 | ip = -1; jp = 0; k = p = 1; |
690 | while (jp+k<l) { |
691 | if (n[ip+k] == n[jp+k]) { |
692 | if (k == p) { |
693 | jp += p; |
694 | k = 1; |
695 | } else k++; |
696 | } else if (n[ip+k] > n[jp+k]) { |
697 | jp += k; |
698 | k = 1; |
699 | p = jp - ip; |
700 | } else { |
701 | ip = jp++; |
702 | k = p = 1; |
703 | } |
704 | } |
705 | ms = ip; |
706 | p0 = p; |
707 | |
708 | /* And with the opposite comparison */ |
709 | ip = -1; jp = 0; k = p = 1; |
710 | while (jp+k<l) { |
711 | if (n[ip+k] == n[jp+k]) { |
712 | if (k == p) { |
713 | jp += p; |
714 | k = 1; |
715 | } else k++; |
716 | } else if (n[ip+k] < n[jp+k]) { |
717 | jp += k; |
718 | k = 1; |
719 | p = jp - ip; |
720 | } else { |
721 | ip = jp++; |
722 | k = p = 1; |
723 | } |
724 | } |
725 | if (ip+1 > ms+1) ms = ip; |
726 | else p = p0; |
727 | |
728 | /* Periodic needle? */ |
729 | if (memcmp(n, n+p, ms+1)) { |
730 | mem0 = 0; |
731 | p = MAX(ms, l-ms-1) + 1; |
732 | } else mem0 = l-p; |
733 | mem = 0; |
734 | |
735 | /* Search loop */ |
736 | for (;;) { |
737 | /* If remainder of haystack is shorter than needle, done */ |
738 | if (z-h < l) return 0; |
739 | |
740 | /* Check last byte first; advance by shift on mismatch */ |
741 | if (BITOP(byteset, h[l-1], &)) { |
742 | k = l-shift[h[l-1]]; |
743 | if (k) { |
744 | if (mem0 && mem && k < p) k = l-p; |
745 | h += k; |
746 | mem = 0; |
747 | continue; |
748 | } |
749 | } else { |
750 | h += l; |
751 | mem = 0; |
752 | continue; |
753 | } |
754 | |
755 | /* Compare right half */ |
756 | for (k=MAX(ms+1,mem); k<l && n[k] == h[k]; k++); |
757 | if (k < l) { |
758 | h += k-ms; |
759 | mem = 0; |
760 | continue; |
761 | } |
762 | /* Compare left half */ |
763 | for (k=ms+1; k>mem && n[k-1] == h[k-1]; k--); |
764 | if (k <= mem) return (char *)h; |
765 | h += p; |
766 | mem = mem0; |
767 | } |
768 | } |
769 | |
770 | /* |
771 | Find the start of the first occurrence of the substring needle in haystack. |
772 | */ |
773 | void *fz_memmem(const void *h0, size_t k, const void *n0, size_t l) |
774 | { |
775 | const unsigned char *h = h0, *n = n0; |
776 | |
777 | /* Return immediately on empty needle */ |
778 | if (!l) return (void *)h; |
779 | |
780 | /* Return immediately when needle is longer than haystack */ |
781 | if (k<l) return 0; |
782 | |
783 | /* Use faster algorithms for short needles */ |
784 | h = memchr(h0, *n, k); |
785 | if (!h || l==1) return (void *)h; |
786 | k -= h - (const unsigned char *)h0; |
787 | if (k<l) return 0; |
788 | if (l==2) return twobyte_memmem(h, k, n); |
789 | if (l==3) return threebyte_memmem(h, k, n); |
790 | if (l==4) return fourbyte_memmem(h, k, n); |
791 | |
792 | return twoway_memmem(h, h+k, n, l); |
793 | } |
794 | |