1 | #include "mupdf/fitz.h" |
2 | |
3 | #include <string.h> |
4 | #include <limits.h> |
5 | #include <assert.h> |
6 | |
7 | /* Enumerate marked selection */ |
8 | |
9 | static float dist2(float a, float b) |
10 | { |
11 | return a * a + b * b; |
12 | } |
13 | |
14 | static float hdist(fz_point *dir, fz_point *a, fz_point *b) |
15 | { |
16 | float dx = b->x - a->x; |
17 | float dy = b->y - a->y; |
18 | return fz_abs(dx * dir->x + dy * dir->y); |
19 | } |
20 | |
21 | static float vdist(fz_point *dir, fz_point *a, fz_point *b) |
22 | { |
23 | float dx = b->x - a->x; |
24 | float dy = b->y - a->y; |
25 | return fz_abs(dx * dir->y + dy * dir->x); |
26 | } |
27 | |
28 | static int line_length(fz_stext_line *line) |
29 | { |
30 | fz_stext_char *ch; |
31 | int n = 0; |
32 | for (ch = line->first_char; ch; ch = ch->next) |
33 | ++n; |
34 | return n; |
35 | } |
36 | |
37 | static int find_closest_in_line(fz_stext_line *line, int idx, fz_point p) |
38 | { |
39 | fz_stext_char *ch; |
40 | float closest_dist = 1e30f; |
41 | int closest_idx = idx; |
42 | |
43 | if (line->dir.x > line->dir.y) |
44 | { |
45 | if (p.y < line->bbox.y0) |
46 | return idx; |
47 | if (p.y > line->bbox.y1) |
48 | return idx + line_length(line); |
49 | } |
50 | else |
51 | { |
52 | if (p.x < line->bbox.x0) |
53 | return idx + line_length(line); |
54 | if (p.x > line->bbox.x1) |
55 | return idx; |
56 | } |
57 | |
58 | for (ch = line->first_char; ch; ch = ch->next) |
59 | { |
60 | float mid_x = (ch->quad.ul.x + ch->quad.ur.x + ch->quad.ll.x + ch->quad.lr.x) / 4; |
61 | float mid_y = (ch->quad.ul.y + ch->quad.ur.y + ch->quad.ll.y + ch->quad.lr.y) / 4; |
62 | float this_dist = dist2(p.x - mid_x, p.y - mid_y); |
63 | if (this_dist < closest_dist) |
64 | { |
65 | closest_dist = this_dist; |
66 | if (line->dir.x > line->dir.y) |
67 | closest_idx = (p.x < mid_x) ? idx : idx+1; |
68 | else |
69 | closest_idx = (p.y < mid_y) ? idx : idx+1; |
70 | } |
71 | ++idx; |
72 | } |
73 | return closest_idx; |
74 | } |
75 | |
76 | static int find_closest_in_page(fz_stext_page *page, fz_point p) |
77 | { |
78 | fz_stext_block *block; |
79 | fz_stext_line *line; |
80 | fz_stext_line *closest_line = NULL; |
81 | int closest_idx = 0; |
82 | float closest_dist = 1e30f; |
83 | float this_dist; |
84 | int idx = 0; |
85 | |
86 | for (block = page->first_block; block; block = block->next) |
87 | { |
88 | if (block->type != FZ_STEXT_BLOCK_TEXT) |
89 | continue; |
90 | for (line = block->u.t.first_line; line; line = line->next) |
91 | { |
92 | fz_rect box = line->bbox; |
93 | if (p.x >= box.x0 && p.x <= box.x1) |
94 | { |
95 | if (p.y < box.y0) |
96 | this_dist = dist2(box.y0 - p.y, 0); |
97 | else if (p.y > box.y1) |
98 | this_dist = dist2(p.y - box.y1, 0); |
99 | else |
100 | this_dist = 0; |
101 | } |
102 | else if (p.y >= box.y0 && p.y <= box.y1) |
103 | { |
104 | if (p.x < box.x0) |
105 | this_dist = dist2(box.x0 - p.x, 0); |
106 | else if (p.x > box.x1) |
107 | this_dist = dist2(p.x - box.x1, 0); |
108 | else |
109 | this_dist = 0; |
110 | } |
111 | else |
112 | { |
113 | float dul = dist2(p.x - box.x0, p.y - box.y0); |
114 | float dur = dist2(p.x - box.x1, p.y - box.y0); |
115 | float dll = dist2(p.x - box.x0, p.y - box.y1); |
116 | float dlr = dist2(p.x - box.x1, p.y - box.y1); |
117 | this_dist = fz_min(fz_min(dul, dur), fz_min(dll, dlr)); |
118 | } |
119 | if (this_dist < closest_dist) |
120 | { |
121 | closest_dist = this_dist; |
122 | closest_line = line; |
123 | closest_idx = idx; |
124 | } |
125 | idx += line_length(line); |
126 | } |
127 | } |
128 | |
129 | if (closest_line) |
130 | return find_closest_in_line(closest_line, closest_idx, p); |
131 | return 0; |
132 | } |
133 | |
134 | struct callbacks |
135 | { |
136 | void (*on_char)(fz_context *ctx, void *arg, fz_stext_line *ln, fz_stext_char *ch); |
137 | void (*on_line)(fz_context *ctx, void *arg, fz_stext_line *ln); |
138 | void *arg; |
139 | }; |
140 | |
141 | static void |
142 | fz_enumerate_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, struct callbacks *cb) |
143 | { |
144 | fz_stext_block *block; |
145 | fz_stext_line *line; |
146 | fz_stext_char *ch; |
147 | int idx, start, end; |
148 | int inside; |
149 | |
150 | start = find_closest_in_page(page, a); |
151 | end = find_closest_in_page(page, b); |
152 | |
153 | if (start > end) |
154 | idx = start, start = end, end = idx; |
155 | |
156 | if (start == end) |
157 | return; |
158 | |
159 | inside = 0; |
160 | idx = 0; |
161 | for (block = page->first_block; block; block = block->next) |
162 | { |
163 | if (block->type != FZ_STEXT_BLOCK_TEXT) |
164 | continue; |
165 | for (line = block->u.t.first_line; line; line = line->next) |
166 | { |
167 | for (ch = line->first_char; ch; ch = ch->next) |
168 | { |
169 | if (!inside) |
170 | if (idx == start) |
171 | inside = 1; |
172 | if (inside) |
173 | cb->on_char(ctx, cb->arg, line, ch); |
174 | if (++idx == end) |
175 | return; |
176 | } |
177 | if (inside) |
178 | cb->on_line(ctx, cb->arg, line); |
179 | } |
180 | } |
181 | } |
182 | |
183 | fz_quad |
184 | fz_snap_selection(fz_context *ctx, fz_stext_page *page, fz_point *a, fz_point *b, int mode) |
185 | { |
186 | fz_stext_block *block; |
187 | fz_stext_line *line; |
188 | fz_stext_char *ch; |
189 | fz_quad handles; |
190 | int idx, start, end; |
191 | int pc; |
192 | |
193 | start = find_closest_in_page(page, *a); |
194 | end = find_closest_in_page(page, *b); |
195 | |
196 | if (start > end) |
197 | idx = start, start = end, end = idx; |
198 | |
199 | handles.ll = handles.ul = *a; |
200 | handles.lr = handles.ur = *b; |
201 | |
202 | idx = 0; |
203 | for (block = page->first_block; block; block = block->next) |
204 | { |
205 | if (block->type != FZ_STEXT_BLOCK_TEXT) |
206 | continue; |
207 | for (line = block->u.t.first_line; line; line = line->next) |
208 | { |
209 | pc = '\n'; |
210 | for (ch = line->first_char; ch; ch = ch->next) |
211 | { |
212 | if (idx <= start) |
213 | { |
214 | if (mode == FZ_SELECT_CHARS |
215 | || (mode == FZ_SELECT_WORDS && (pc == ' ' || pc == '\n')) |
216 | || (mode == FZ_SELECT_LINES && (pc == '\n'))) |
217 | { |
218 | handles.ll = ch->quad.ll; |
219 | handles.ul = ch->quad.ul; |
220 | *a = ch->origin; |
221 | } |
222 | } |
223 | if (idx >= end) |
224 | { |
225 | if (mode == FZ_SELECT_CHARS |
226 | || (mode == FZ_SELECT_WORDS && (ch->c == ' '))) |
227 | { |
228 | handles.lr = ch->quad.ll; |
229 | handles.ur = ch->quad.ul; |
230 | *b = ch->origin; |
231 | return handles; |
232 | } |
233 | if (!ch->next) |
234 | { |
235 | handles.lr = ch->quad.lr; |
236 | handles.ur = ch->quad.ur; |
237 | *b = ch->quad.lr; |
238 | return handles; |
239 | } |
240 | } |
241 | pc = ch->c; |
242 | ++idx; |
243 | } |
244 | } |
245 | } |
246 | |
247 | return handles; |
248 | } |
249 | |
250 | /* Highlight selection */ |
251 | |
252 | struct highlight |
253 | { |
254 | int len, cap; |
255 | fz_quad *box; |
256 | float hfuzz, vfuzz; |
257 | }; |
258 | |
259 | static void on_highlight_char(fz_context *ctx, void *arg, fz_stext_line *line, fz_stext_char *ch) |
260 | { |
261 | struct highlight *hits = arg; |
262 | float vfuzz = ch->size * hits->vfuzz; |
263 | float hfuzz = ch->size * hits->hfuzz; |
264 | |
265 | if (hits->len > 0) |
266 | { |
267 | fz_quad *end = &hits->box[hits->len-1]; |
268 | if (hdist(&line->dir, &end->lr, &ch->quad.ll) < hfuzz |
269 | && vdist(&line->dir, &end->lr, &ch->quad.ll) < vfuzz |
270 | && hdist(&line->dir, &end->ur, &ch->quad.ul) < hfuzz |
271 | && vdist(&line->dir, &end->ur, &ch->quad.ul) < vfuzz) |
272 | { |
273 | end->ur = ch->quad.ur; |
274 | end->lr = ch->quad.lr; |
275 | return; |
276 | } |
277 | } |
278 | |
279 | if (hits->len < hits->cap) |
280 | hits->box[hits->len++] = ch->quad; |
281 | } |
282 | |
283 | static void on_highlight_line(fz_context *ctx, void *arg, fz_stext_line *line) |
284 | { |
285 | } |
286 | |
287 | /* |
288 | Return a list of quads to highlight lines inside the selection points. |
289 | */ |
290 | int |
291 | fz_highlight_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, fz_quad *quads, int max_quads) |
292 | { |
293 | struct callbacks cb; |
294 | struct highlight hits; |
295 | |
296 | hits.len = 0; |
297 | hits.cap = max_quads; |
298 | hits.box = quads; |
299 | hits.hfuzz = 0.5f; /* merge large gaps */ |
300 | hits.vfuzz = 0.1f; |
301 | |
302 | cb.on_char = on_highlight_char; |
303 | cb.on_line = on_highlight_line; |
304 | cb.arg = &hits; |
305 | |
306 | fz_enumerate_selection(ctx, page, a, b, &cb); |
307 | |
308 | return hits.len; |
309 | } |
310 | |
311 | /* Copy selection */ |
312 | |
313 | static void on_copy_char(fz_context *ctx, void *arg, fz_stext_line *line, fz_stext_char *ch) |
314 | { |
315 | fz_buffer *buffer = arg; |
316 | int c = ch->c; |
317 | if (c < 32) |
318 | c = FZ_REPLACEMENT_CHARACTER; |
319 | fz_append_rune(ctx, buffer, c); |
320 | } |
321 | |
322 | static void on_copy_line_crlf(fz_context *ctx, void *arg, fz_stext_line *line) |
323 | { |
324 | fz_buffer *buffer = arg; |
325 | fz_append_byte(ctx, buffer, '\r'); |
326 | fz_append_byte(ctx, buffer, '\n'); |
327 | } |
328 | |
329 | static void on_copy_line_lf(fz_context *ctx, void *arg, fz_stext_line *line) |
330 | { |
331 | fz_buffer *buffer = arg; |
332 | fz_append_byte(ctx, buffer, '\n'); |
333 | } |
334 | |
335 | /* |
336 | Return a newly allocated UTF-8 string with the text for a given selection. |
337 | |
338 | crlf: If true, write "\r\n" style line endings (otherwise "\n" only). |
339 | */ |
340 | char * |
341 | fz_copy_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, int crlf) |
342 | { |
343 | struct callbacks cb; |
344 | fz_buffer *buffer; |
345 | unsigned char *s; |
346 | |
347 | buffer = fz_new_buffer(ctx, 1024); |
348 | |
349 | cb.on_char = on_copy_char; |
350 | cb.on_line = crlf ? on_copy_line_crlf : on_copy_line_lf; |
351 | cb.arg = buffer; |
352 | |
353 | fz_enumerate_selection(ctx, page, a, b, &cb); |
354 | |
355 | fz_terminate_buffer(ctx, buffer); |
356 | fz_buffer_extract(ctx, buffer, &s); /* take over the data */ |
357 | fz_drop_buffer(ctx, buffer); |
358 | return (char*)s; |
359 | } |
360 | |
361 | /* String search */ |
362 | |
363 | static inline int canon(int c) |
364 | { |
365 | /* TODO: proper unicode case folding */ |
366 | /* TODO: character equivalence (a matches รค, etc) */ |
367 | if (c == 0xA0 || c == 0x2028 || c == 0x2029) |
368 | return ' '; |
369 | if (c == '\r' || c == '\n' || c == '\t') |
370 | return ' '; |
371 | if (c >= 'A' && c <= 'Z') |
372 | return c - 'A' + 'a'; |
373 | return c; |
374 | } |
375 | |
376 | static inline int chartocanon(int *c, const char *s) |
377 | { |
378 | int n = fz_chartorune(c, s); |
379 | *c = canon(*c); |
380 | return n; |
381 | } |
382 | |
383 | static const char *match_string(const char *h, const char *n) |
384 | { |
385 | int hc, nc; |
386 | const char *e = h; |
387 | h += chartocanon(&hc, h); |
388 | n += chartocanon(&nc, n); |
389 | while (hc == nc) |
390 | { |
391 | e = h; |
392 | if (hc == ' ') |
393 | do |
394 | h += chartocanon(&hc, h); |
395 | while (hc == ' '); |
396 | else |
397 | h += chartocanon(&hc, h); |
398 | if (nc == ' ') |
399 | do |
400 | n += chartocanon(&nc, n); |
401 | while (nc == ' '); |
402 | else |
403 | n += chartocanon(&nc, n); |
404 | } |
405 | return nc == 0 ? e : NULL; |
406 | } |
407 | |
408 | static const char *find_string(const char *s, const char *needle, const char **endp) |
409 | { |
410 | const char *end; |
411 | while (*s) |
412 | { |
413 | end = match_string(s, needle); |
414 | if (end) |
415 | return *endp = end, s; |
416 | ++s; |
417 | } |
418 | return *endp = NULL, NULL; |
419 | } |
420 | |
421 | /* |
422 | Search for occurrence of 'needle' in text page. |
423 | |
424 | Return the number of hits and store hit quads in the passed in array. |
425 | |
426 | NOTE: This is an experimental interface and subject to change without notice. |
427 | */ |
428 | int |
429 | fz_search_stext_page(fz_context *ctx, fz_stext_page *page, const char *needle, fz_quad *quads, int max_quads) |
430 | { |
431 | struct highlight hits; |
432 | fz_stext_block *block; |
433 | fz_stext_line *line; |
434 | fz_stext_char *ch; |
435 | fz_buffer *buffer; |
436 | const char *haystack, *begin, *end; |
437 | int c, inside; |
438 | |
439 | if (strlen(needle) == 0) |
440 | return 0; |
441 | |
442 | hits.len = 0; |
443 | hits.cap = max_quads; |
444 | hits.box = quads; |
445 | hits.hfuzz = 0.2f; /* merge kerns but not large gaps */ |
446 | hits.vfuzz = 0.1f; |
447 | |
448 | buffer = fz_new_buffer_from_stext_page(ctx, page); |
449 | fz_try(ctx) |
450 | { |
451 | haystack = fz_string_from_buffer(ctx, buffer); |
452 | begin = find_string(haystack, needle, &end); |
453 | if (!begin) |
454 | goto no_more_matches; |
455 | |
456 | inside = 0; |
457 | for (block = page->first_block; block; block = block->next) |
458 | { |
459 | if (block->type != FZ_STEXT_BLOCK_TEXT) |
460 | continue; |
461 | for (line = block->u.t.first_line; line; line = line->next) |
462 | { |
463 | for (ch = line->first_char; ch; ch = ch->next) |
464 | { |
465 | try_new_match: |
466 | if (!inside) |
467 | { |
468 | if (haystack >= begin) |
469 | inside = 1; |
470 | } |
471 | if (inside) |
472 | { |
473 | if (haystack < end) |
474 | on_highlight_char(ctx, &hits, line, ch); |
475 | else |
476 | { |
477 | inside = 0; |
478 | begin = find_string(haystack, needle, &end); |
479 | if (!begin) |
480 | goto no_more_matches; |
481 | else |
482 | goto try_new_match; |
483 | } |
484 | } |
485 | haystack += fz_chartorune(&c, haystack); |
486 | } |
487 | assert(*haystack == '\n'); |
488 | ++haystack; |
489 | } |
490 | assert(*haystack == '\n'); |
491 | ++haystack; |
492 | } |
493 | no_more_matches:; |
494 | } |
495 | fz_always(ctx) |
496 | fz_drop_buffer(ctx, buffer); |
497 | fz_catch(ctx) |
498 | fz_rethrow(ctx); |
499 | |
500 | return hits.len; |
501 | } |
502 | |