1#include "mupdf/fitz.h"
2
3#include <string.h>
4#include <limits.h>
5#include <assert.h>
6
7/* Enumerate marked selection */
8
9static float dist2(float a, float b)
10{
11 return a * a + b * b;
12}
13
14static float hdist(fz_point *dir, fz_point *a, fz_point *b)
15{
16 float dx = b->x - a->x;
17 float dy = b->y - a->y;
18 return fz_abs(dx * dir->x + dy * dir->y);
19}
20
21static float vdist(fz_point *dir, fz_point *a, fz_point *b)
22{
23 float dx = b->x - a->x;
24 float dy = b->y - a->y;
25 return fz_abs(dx * dir->y + dy * dir->x);
26}
27
28static int line_length(fz_stext_line *line)
29{
30 fz_stext_char *ch;
31 int n = 0;
32 for (ch = line->first_char; ch; ch = ch->next)
33 ++n;
34 return n;
35}
36
37static int find_closest_in_line(fz_stext_line *line, int idx, fz_point p)
38{
39 fz_stext_char *ch;
40 float closest_dist = 1e30f;
41 int closest_idx = idx;
42
43 if (line->dir.x > line->dir.y)
44 {
45 if (p.y < line->bbox.y0)
46 return idx;
47 if (p.y > line->bbox.y1)
48 return idx + line_length(line);
49 }
50 else
51 {
52 if (p.x < line->bbox.x0)
53 return idx + line_length(line);
54 if (p.x > line->bbox.x1)
55 return idx;
56 }
57
58 for (ch = line->first_char; ch; ch = ch->next)
59 {
60 float mid_x = (ch->quad.ul.x + ch->quad.ur.x + ch->quad.ll.x + ch->quad.lr.x) / 4;
61 float mid_y = (ch->quad.ul.y + ch->quad.ur.y + ch->quad.ll.y + ch->quad.lr.y) / 4;
62 float this_dist = dist2(p.x - mid_x, p.y - mid_y);
63 if (this_dist < closest_dist)
64 {
65 closest_dist = this_dist;
66 if (line->dir.x > line->dir.y)
67 closest_idx = (p.x < mid_x) ? idx : idx+1;
68 else
69 closest_idx = (p.y < mid_y) ? idx : idx+1;
70 }
71 ++idx;
72 }
73 return closest_idx;
74}
75
76static int find_closest_in_page(fz_stext_page *page, fz_point p)
77{
78 fz_stext_block *block;
79 fz_stext_line *line;
80 fz_stext_line *closest_line = NULL;
81 int closest_idx = 0;
82 float closest_dist = 1e30f;
83 float this_dist;
84 int idx = 0;
85
86 for (block = page->first_block; block; block = block->next)
87 {
88 if (block->type != FZ_STEXT_BLOCK_TEXT)
89 continue;
90 for (line = block->u.t.first_line; line; line = line->next)
91 {
92 fz_rect box = line->bbox;
93 if (p.x >= box.x0 && p.x <= box.x1)
94 {
95 if (p.y < box.y0)
96 this_dist = dist2(box.y0 - p.y, 0);
97 else if (p.y > box.y1)
98 this_dist = dist2(p.y - box.y1, 0);
99 else
100 this_dist = 0;
101 }
102 else if (p.y >= box.y0 && p.y <= box.y1)
103 {
104 if (p.x < box.x0)
105 this_dist = dist2(box.x0 - p.x, 0);
106 else if (p.x > box.x1)
107 this_dist = dist2(p.x - box.x1, 0);
108 else
109 this_dist = 0;
110 }
111 else
112 {
113 float dul = dist2(p.x - box.x0, p.y - box.y0);
114 float dur = dist2(p.x - box.x1, p.y - box.y0);
115 float dll = dist2(p.x - box.x0, p.y - box.y1);
116 float dlr = dist2(p.x - box.x1, p.y - box.y1);
117 this_dist = fz_min(fz_min(dul, dur), fz_min(dll, dlr));
118 }
119 if (this_dist < closest_dist)
120 {
121 closest_dist = this_dist;
122 closest_line = line;
123 closest_idx = idx;
124 }
125 idx += line_length(line);
126 }
127 }
128
129 if (closest_line)
130 return find_closest_in_line(closest_line, closest_idx, p);
131 return 0;
132}
133
134struct callbacks
135{
136 void (*on_char)(fz_context *ctx, void *arg, fz_stext_line *ln, fz_stext_char *ch);
137 void (*on_line)(fz_context *ctx, void *arg, fz_stext_line *ln);
138 void *arg;
139};
140
141static void
142fz_enumerate_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, struct callbacks *cb)
143{
144 fz_stext_block *block;
145 fz_stext_line *line;
146 fz_stext_char *ch;
147 int idx, start, end;
148 int inside;
149
150 start = find_closest_in_page(page, a);
151 end = find_closest_in_page(page, b);
152
153 if (start > end)
154 idx = start, start = end, end = idx;
155
156 if (start == end)
157 return;
158
159 inside = 0;
160 idx = 0;
161 for (block = page->first_block; block; block = block->next)
162 {
163 if (block->type != FZ_STEXT_BLOCK_TEXT)
164 continue;
165 for (line = block->u.t.first_line; line; line = line->next)
166 {
167 for (ch = line->first_char; ch; ch = ch->next)
168 {
169 if (!inside)
170 if (idx == start)
171 inside = 1;
172 if (inside)
173 cb->on_char(ctx, cb->arg, line, ch);
174 if (++idx == end)
175 return;
176 }
177 if (inside)
178 cb->on_line(ctx, cb->arg, line);
179 }
180 }
181}
182
183fz_quad
184fz_snap_selection(fz_context *ctx, fz_stext_page *page, fz_point *a, fz_point *b, int mode)
185{
186 fz_stext_block *block;
187 fz_stext_line *line;
188 fz_stext_char *ch;
189 fz_quad handles;
190 int idx, start, end;
191 int pc;
192
193 start = find_closest_in_page(page, *a);
194 end = find_closest_in_page(page, *b);
195
196 if (start > end)
197 idx = start, start = end, end = idx;
198
199 handles.ll = handles.ul = *a;
200 handles.lr = handles.ur = *b;
201
202 idx = 0;
203 for (block = page->first_block; block; block = block->next)
204 {
205 if (block->type != FZ_STEXT_BLOCK_TEXT)
206 continue;
207 for (line = block->u.t.first_line; line; line = line->next)
208 {
209 pc = '\n';
210 for (ch = line->first_char; ch; ch = ch->next)
211 {
212 if (idx <= start)
213 {
214 if (mode == FZ_SELECT_CHARS
215 || (mode == FZ_SELECT_WORDS && (pc == ' ' || pc == '\n'))
216 || (mode == FZ_SELECT_LINES && (pc == '\n')))
217 {
218 handles.ll = ch->quad.ll;
219 handles.ul = ch->quad.ul;
220 *a = ch->origin;
221 }
222 }
223 if (idx >= end)
224 {
225 if (mode == FZ_SELECT_CHARS
226 || (mode == FZ_SELECT_WORDS && (ch->c == ' ')))
227 {
228 handles.lr = ch->quad.ll;
229 handles.ur = ch->quad.ul;
230 *b = ch->origin;
231 return handles;
232 }
233 if (!ch->next)
234 {
235 handles.lr = ch->quad.lr;
236 handles.ur = ch->quad.ur;
237 *b = ch->quad.lr;
238 return handles;
239 }
240 }
241 pc = ch->c;
242 ++idx;
243 }
244 }
245 }
246
247 return handles;
248}
249
250/* Highlight selection */
251
252struct highlight
253{
254 int len, cap;
255 fz_quad *box;
256 float hfuzz, vfuzz;
257};
258
259static void on_highlight_char(fz_context *ctx, void *arg, fz_stext_line *line, fz_stext_char *ch)
260{
261 struct highlight *hits = arg;
262 float vfuzz = ch->size * hits->vfuzz;
263 float hfuzz = ch->size * hits->hfuzz;
264
265 if (hits->len > 0)
266 {
267 fz_quad *end = &hits->box[hits->len-1];
268 if (hdist(&line->dir, &end->lr, &ch->quad.ll) < hfuzz
269 && vdist(&line->dir, &end->lr, &ch->quad.ll) < vfuzz
270 && hdist(&line->dir, &end->ur, &ch->quad.ul) < hfuzz
271 && vdist(&line->dir, &end->ur, &ch->quad.ul) < vfuzz)
272 {
273 end->ur = ch->quad.ur;
274 end->lr = ch->quad.lr;
275 return;
276 }
277 }
278
279 if (hits->len < hits->cap)
280 hits->box[hits->len++] = ch->quad;
281}
282
283static void on_highlight_line(fz_context *ctx, void *arg, fz_stext_line *line)
284{
285}
286
287/*
288 Return a list of quads to highlight lines inside the selection points.
289*/
290int
291fz_highlight_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, fz_quad *quads, int max_quads)
292{
293 struct callbacks cb;
294 struct highlight hits;
295
296 hits.len = 0;
297 hits.cap = max_quads;
298 hits.box = quads;
299 hits.hfuzz = 0.5f; /* merge large gaps */
300 hits.vfuzz = 0.1f;
301
302 cb.on_char = on_highlight_char;
303 cb.on_line = on_highlight_line;
304 cb.arg = &hits;
305
306 fz_enumerate_selection(ctx, page, a, b, &cb);
307
308 return hits.len;
309}
310
311/* Copy selection */
312
313static void on_copy_char(fz_context *ctx, void *arg, fz_stext_line *line, fz_stext_char *ch)
314{
315 fz_buffer *buffer = arg;
316 int c = ch->c;
317 if (c < 32)
318 c = FZ_REPLACEMENT_CHARACTER;
319 fz_append_rune(ctx, buffer, c);
320}
321
322static void on_copy_line_crlf(fz_context *ctx, void *arg, fz_stext_line *line)
323{
324 fz_buffer *buffer = arg;
325 fz_append_byte(ctx, buffer, '\r');
326 fz_append_byte(ctx, buffer, '\n');
327}
328
329static void on_copy_line_lf(fz_context *ctx, void *arg, fz_stext_line *line)
330{
331 fz_buffer *buffer = arg;
332 fz_append_byte(ctx, buffer, '\n');
333}
334
335/*
336 Return a newly allocated UTF-8 string with the text for a given selection.
337
338 crlf: If true, write "\r\n" style line endings (otherwise "\n" only).
339*/
340char *
341fz_copy_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, int crlf)
342{
343 struct callbacks cb;
344 fz_buffer *buffer;
345 unsigned char *s;
346
347 buffer = fz_new_buffer(ctx, 1024);
348
349 cb.on_char = on_copy_char;
350 cb.on_line = crlf ? on_copy_line_crlf : on_copy_line_lf;
351 cb.arg = buffer;
352
353 fz_enumerate_selection(ctx, page, a, b, &cb);
354
355 fz_terminate_buffer(ctx, buffer);
356 fz_buffer_extract(ctx, buffer, &s); /* take over the data */
357 fz_drop_buffer(ctx, buffer);
358 return (char*)s;
359}
360
361/* String search */
362
363static inline int canon(int c)
364{
365 /* TODO: proper unicode case folding */
366 /* TODO: character equivalence (a matches รค, etc) */
367 if (c == 0xA0 || c == 0x2028 || c == 0x2029)
368 return ' ';
369 if (c == '\r' || c == '\n' || c == '\t')
370 return ' ';
371 if (c >= 'A' && c <= 'Z')
372 return c - 'A' + 'a';
373 return c;
374}
375
376static inline int chartocanon(int *c, const char *s)
377{
378 int n = fz_chartorune(c, s);
379 *c = canon(*c);
380 return n;
381}
382
383static const char *match_string(const char *h, const char *n)
384{
385 int hc, nc;
386 const char *e = h;
387 h += chartocanon(&hc, h);
388 n += chartocanon(&nc, n);
389 while (hc == nc)
390 {
391 e = h;
392 if (hc == ' ')
393 do
394 h += chartocanon(&hc, h);
395 while (hc == ' ');
396 else
397 h += chartocanon(&hc, h);
398 if (nc == ' ')
399 do
400 n += chartocanon(&nc, n);
401 while (nc == ' ');
402 else
403 n += chartocanon(&nc, n);
404 }
405 return nc == 0 ? e : NULL;
406}
407
408static const char *find_string(const char *s, const char *needle, const char **endp)
409{
410 const char *end;
411 while (*s)
412 {
413 end = match_string(s, needle);
414 if (end)
415 return *endp = end, s;
416 ++s;
417 }
418 return *endp = NULL, NULL;
419}
420
421/*
422 Search for occurrence of 'needle' in text page.
423
424 Return the number of hits and store hit quads in the passed in array.
425
426 NOTE: This is an experimental interface and subject to change without notice.
427*/
428int
429fz_search_stext_page(fz_context *ctx, fz_stext_page *page, const char *needle, fz_quad *quads, int max_quads)
430{
431 struct highlight hits;
432 fz_stext_block *block;
433 fz_stext_line *line;
434 fz_stext_char *ch;
435 fz_buffer *buffer;
436 const char *haystack, *begin, *end;
437 int c, inside;
438
439 if (strlen(needle) == 0)
440 return 0;
441
442 hits.len = 0;
443 hits.cap = max_quads;
444 hits.box = quads;
445 hits.hfuzz = 0.2f; /* merge kerns but not large gaps */
446 hits.vfuzz = 0.1f;
447
448 buffer = fz_new_buffer_from_stext_page(ctx, page);
449 fz_try(ctx)
450 {
451 haystack = fz_string_from_buffer(ctx, buffer);
452 begin = find_string(haystack, needle, &end);
453 if (!begin)
454 goto no_more_matches;
455
456 inside = 0;
457 for (block = page->first_block; block; block = block->next)
458 {
459 if (block->type != FZ_STEXT_BLOCK_TEXT)
460 continue;
461 for (line = block->u.t.first_line; line; line = line->next)
462 {
463 for (ch = line->first_char; ch; ch = ch->next)
464 {
465try_new_match:
466 if (!inside)
467 {
468 if (haystack >= begin)
469 inside = 1;
470 }
471 if (inside)
472 {
473 if (haystack < end)
474 on_highlight_char(ctx, &hits, line, ch);
475 else
476 {
477 inside = 0;
478 begin = find_string(haystack, needle, &end);
479 if (!begin)
480 goto no_more_matches;
481 else
482 goto try_new_match;
483 }
484 }
485 haystack += fz_chartorune(&c, haystack);
486 }
487 assert(*haystack == '\n');
488 ++haystack;
489 }
490 assert(*haystack == '\n');
491 ++haystack;
492 }
493no_more_matches:;
494 }
495 fz_always(ctx)
496 fz_drop_buffer(ctx, buffer);
497 fz_catch(ctx)
498 fz_rethrow(ctx);
499
500 return hits.len;
501}
502