1 | #include "mupdf/fitz.h" |
2 | #include "fitz-imp.h" |
3 | |
4 | #include <string.h> |
5 | |
6 | /* |
7 | Create a new empty fz_text object. |
8 | |
9 | Throws exception on failure to allocate. |
10 | */ |
11 | fz_text * |
12 | fz_new_text(fz_context *ctx) |
13 | { |
14 | fz_text *text = fz_malloc_struct(ctx, fz_text); |
15 | text->refs = 1; |
16 | return text; |
17 | } |
18 | |
19 | fz_text * |
20 | fz_keep_text(fz_context *ctx, const fz_text *textc) |
21 | { |
22 | fz_text *text = (fz_text *)textc; /* Explicit cast away of const */ |
23 | |
24 | return fz_keep_imp(ctx, text, &text->refs); |
25 | } |
26 | |
27 | void |
28 | fz_drop_text(fz_context *ctx, const fz_text *textc) |
29 | { |
30 | fz_text *text = (fz_text *)textc; /* Explicit cast away of const */ |
31 | |
32 | if (fz_drop_imp(ctx, text, &text->refs)) |
33 | { |
34 | fz_text_span *span = text->head; |
35 | while (span) |
36 | { |
37 | fz_text_span *next = span->next; |
38 | fz_drop_font(ctx, span->font); |
39 | fz_free(ctx, span->items); |
40 | fz_free(ctx, span); |
41 | span = next; |
42 | } |
43 | fz_free(ctx, text); |
44 | } |
45 | } |
46 | |
47 | static fz_text_span * |
48 | fz_new_text_span(fz_context *ctx, fz_font *font, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language, fz_matrix trm) |
49 | { |
50 | fz_text_span *span = fz_malloc_struct(ctx, fz_text_span); |
51 | span->font = fz_keep_font(ctx, font); |
52 | span->wmode = wmode; |
53 | span->bidi_level = bidi_level; |
54 | span->markup_dir = markup_dir; |
55 | span->language = language; |
56 | span->trm = trm; |
57 | span->trm.e = 0; |
58 | span->trm.f = 0; |
59 | return span; |
60 | } |
61 | |
62 | static fz_text_span * |
63 | fz_add_text_span(fz_context *ctx, fz_text *text, fz_font *font, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language, fz_matrix trm) |
64 | { |
65 | if (!text->tail) |
66 | { |
67 | text->head = text->tail = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm); |
68 | } |
69 | else if (text->tail->font != font || |
70 | text->tail->wmode != wmode || |
71 | text->tail->bidi_level != bidi_level || |
72 | text->tail->markup_dir != markup_dir || |
73 | text->tail->language != language || |
74 | text->tail->trm.a != trm.a || |
75 | text->tail->trm.b != trm.b || |
76 | text->tail->trm.c != trm.c || |
77 | text->tail->trm.d != trm.d) |
78 | { |
79 | text->tail = text->tail->next = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm); |
80 | } |
81 | return text->tail; |
82 | } |
83 | |
84 | static void |
85 | fz_grow_text_span(fz_context *ctx, fz_text_span *span, int n) |
86 | { |
87 | int new_cap = span->cap; |
88 | if (span->len + n < new_cap) |
89 | return; |
90 | while (span->len + n > new_cap) |
91 | new_cap = new_cap + 36; |
92 | span->items = fz_realloc_array(ctx, span->items, new_cap, fz_text_item); |
93 | span->cap = new_cap; |
94 | } |
95 | |
96 | /* |
97 | Add a glyph/unicode value to a text object. |
98 | |
99 | text: Text object to add to. |
100 | |
101 | font: The font the glyph should be added in. |
102 | |
103 | trm: The transform to use for the glyph. |
104 | |
105 | glyph: The glyph id to add. |
106 | |
107 | unicode: The unicode character for the glyph. |
108 | |
109 | wmode: 1 for vertical mode, 0 for horizontal. |
110 | |
111 | bidi_level: The bidirectional level for this glyph. |
112 | |
113 | markup_dir: The direction of the text as specified in the |
114 | markup. |
115 | |
116 | language: The language in use (if known, 0 otherwise) |
117 | (e.g. FZ_LANG_zh_Hans). |
118 | |
119 | Throws exception on failure to allocate. |
120 | */ |
121 | void |
122 | fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, int gid, int ucs, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang) |
123 | { |
124 | fz_text_span *span; |
125 | |
126 | if (text->refs != 1) |
127 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot modify shared text objects" ); |
128 | |
129 | span = fz_add_text_span(ctx, text, font, wmode, bidi_level, markup_dir, lang, trm); |
130 | |
131 | fz_grow_text_span(ctx, span, 1); |
132 | |
133 | span->items[span->len].ucs = ucs; |
134 | span->items[span->len].gid = gid; |
135 | span->items[span->len].x = trm.e; |
136 | span->items[span->len].y = trm.f; |
137 | span->len++; |
138 | } |
139 | |
140 | /* |
141 | Add a UTF8 string to a text object. |
142 | |
143 | text: Text object to add to. |
144 | |
145 | font: The font the string should be added in. |
146 | |
147 | trm: The transform to use. |
148 | |
149 | s: The utf-8 string to add. |
150 | |
151 | wmode: 1 for vertical mode, 0 for horizontal. |
152 | |
153 | bidi_level: The bidirectional level for this glyph. |
154 | |
155 | markup_dir: The direction of the text as specified in the |
156 | markup. |
157 | |
158 | language: The language in use (if known, 0 otherwise) |
159 | (e.g. FZ_LANG_zh_Hans). |
160 | |
161 | Returns the transform updated with the advance width of the string. |
162 | */ |
163 | fz_matrix |
164 | fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix trm, const char *s, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language) |
165 | { |
166 | fz_font *font; |
167 | int gid, ucs; |
168 | float adv; |
169 | |
170 | while (*s) |
171 | { |
172 | s += fz_chartorune(&ucs, s); |
173 | gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, language, &font); |
174 | fz_show_glyph(ctx, text, font, trm, gid, ucs, wmode, bidi_level, markup_dir, language); |
175 | adv = fz_advance_glyph(ctx, font, gid, wmode); |
176 | if (wmode == 0) |
177 | trm = fz_pre_translate(trm, adv, 0); |
178 | else |
179 | trm = fz_pre_translate(trm, 0, -adv); |
180 | } |
181 | |
182 | return trm; |
183 | } |
184 | |
185 | /* |
186 | Find the bounds of a given text object. |
187 | |
188 | text: The text object to find the bounds of. |
189 | |
190 | stroke: Pointer to the stroke attributes (for stroked |
191 | text), or NULL (for filled text). |
192 | |
193 | ctm: The matrix in use. |
194 | |
195 | r: pointer to storage for the bounds. |
196 | |
197 | Returns a pointer to r, which is updated to contain the |
198 | bounding box for the text object. |
199 | */ |
200 | fz_rect |
201 | fz_bound_text(fz_context *ctx, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm) |
202 | { |
203 | fz_text_span *span; |
204 | fz_matrix tm, trm; |
205 | fz_rect gbox; |
206 | fz_rect bbox; |
207 | int i; |
208 | |
209 | bbox = fz_empty_rect; |
210 | |
211 | for (span = text->head; span; span = span->next) |
212 | { |
213 | if (span->len > 0) |
214 | { |
215 | tm = span->trm; |
216 | for (i = 0; i < span->len; i++) |
217 | { |
218 | if (span->items[i].gid >= 0) |
219 | { |
220 | tm.e = span->items[i].x; |
221 | tm.f = span->items[i].y; |
222 | trm = fz_concat(tm, ctm); |
223 | gbox = fz_bound_glyph(ctx, span->font, span->items[i].gid, trm); |
224 | bbox = fz_union_rect(bbox, gbox); |
225 | } |
226 | } |
227 | } |
228 | } |
229 | |
230 | if (!fz_is_empty_rect(bbox)) |
231 | { |
232 | if (stroke) |
233 | bbox = fz_adjust_rect_for_stroke(ctx, bbox, stroke, ctm); |
234 | |
235 | /* Compensate for the glyph cache limited positioning precision */ |
236 | bbox.x0 -= 1; |
237 | bbox.y0 -= 1; |
238 | bbox.x1 += 1; |
239 | bbox.y1 += 1; |
240 | } |
241 | |
242 | return bbox; |
243 | } |
244 | |
245 | /* |
246 | Convert ISO 639 (639-{1,2,3,5}) language specification |
247 | strings losslessly to a 15 bit fz_text_language code. |
248 | |
249 | No validation is carried out. Obviously invalid (out |
250 | of spec) codes will be mapped to FZ_LANG_UNSET, but |
251 | well-formed (but undefined) codes will be blithely |
252 | accepted. |
253 | */ |
254 | fz_text_language fz_text_language_from_string(const char *str) |
255 | { |
256 | fz_text_language lang; |
257 | |
258 | if (str == NULL) |
259 | return FZ_LANG_UNSET; |
260 | |
261 | if (!strcmp(str, "zh-Hant" ) || |
262 | !strcmp(str, "zh-HK" ) || |
263 | !strcmp(str, "zh-MO" ) || |
264 | !strcmp(str, "zh-SG" ) || |
265 | !strcmp(str, "zh-TW" )) |
266 | return FZ_LANG_zh_Hant; |
267 | if (!strcmp(str, "zh-Hans" ) || |
268 | !strcmp(str, "zh-CN" )) |
269 | return FZ_LANG_zh_Hans; |
270 | |
271 | /* 1st char */ |
272 | if (str[0] >= 'a' && str[0] <= 'z') |
273 | lang = str[0] - 'a' + 1; |
274 | else if (str[0] >= 'A' && str[0] <= 'Z') |
275 | lang = str[0] - 'A' + 1; |
276 | else |
277 | return 0; |
278 | |
279 | /* 2nd char */ |
280 | if (str[1] >= 'a' && str[1] <= 'z') |
281 | lang += 27*(str[1] - 'a' + 1); |
282 | else if (str[1] >= 'A' && str[1] <= 'Z') |
283 | lang += 27*(str[1] - 'A' + 1); |
284 | else |
285 | return 0; /* There are no valid 1 char language codes */ |
286 | |
287 | /* 3nd char */ |
288 | if (str[2] >= 'a' && str[2] <= 'z') |
289 | lang += 27*27*(str[2] - 'a' + 1); |
290 | else if (str[2] >= 'A' && str[2] <= 'Z') |
291 | lang += 27*27*(str[2] - 'A' + 1); |
292 | |
293 | /* We don't support iso 639-6 4 char codes, cos the standard |
294 | * has been withdrawn, and no one uses them. */ |
295 | return lang; |
296 | } |
297 | |
298 | /* |
299 | Recover ISO 639 (639-{1,2,3,5}) language specification |
300 | strings losslessly from a 15 bit fz_text_language code. |
301 | |
302 | No validation is carried out. See note above. |
303 | */ |
304 | char *fz_string_from_text_language(char str[8], fz_text_language lang) |
305 | { |
306 | int c; |
307 | |
308 | /* str is supposed to be at least 8 chars in size */ |
309 | if (str == NULL) |
310 | return NULL; |
311 | |
312 | if (lang == FZ_LANG_zh_Hant) |
313 | fz_strlcpy(str, "zh-Hant" , 8); |
314 | else if (lang == FZ_LANG_zh_Hans) |
315 | fz_strlcpy(str, "zh-Hans" , 8); |
316 | else |
317 | { |
318 | c = lang % 27; |
319 | lang = lang / 27; |
320 | str[0] = c == 0 ? 0 : c - 1 + 'a'; |
321 | c = lang % 27; |
322 | lang = lang / 27; |
323 | str[1] = c == 0 ? 0 : c - 1 + 'a'; |
324 | c = lang % 27; |
325 | str[2] = c == 0 ? 0 : c - 1 + 'a'; |
326 | str[3] = 0; |
327 | } |
328 | |
329 | return str; |
330 | } |
331 | |