| 1 | #include "mupdf/fitz.h" |
| 2 | #include "fitz-imp.h" |
| 3 | |
| 4 | #include <string.h> |
| 5 | |
| 6 | /* |
| 7 | Create a new empty fz_text object. |
| 8 | |
| 9 | Throws exception on failure to allocate. |
| 10 | */ |
| 11 | fz_text * |
| 12 | fz_new_text(fz_context *ctx) |
| 13 | { |
| 14 | fz_text *text = fz_malloc_struct(ctx, fz_text); |
| 15 | text->refs = 1; |
| 16 | return text; |
| 17 | } |
| 18 | |
| 19 | fz_text * |
| 20 | fz_keep_text(fz_context *ctx, const fz_text *textc) |
| 21 | { |
| 22 | fz_text *text = (fz_text *)textc; /* Explicit cast away of const */ |
| 23 | |
| 24 | return fz_keep_imp(ctx, text, &text->refs); |
| 25 | } |
| 26 | |
| 27 | void |
| 28 | fz_drop_text(fz_context *ctx, const fz_text *textc) |
| 29 | { |
| 30 | fz_text *text = (fz_text *)textc; /* Explicit cast away of const */ |
| 31 | |
| 32 | if (fz_drop_imp(ctx, text, &text->refs)) |
| 33 | { |
| 34 | fz_text_span *span = text->head; |
| 35 | while (span) |
| 36 | { |
| 37 | fz_text_span *next = span->next; |
| 38 | fz_drop_font(ctx, span->font); |
| 39 | fz_free(ctx, span->items); |
| 40 | fz_free(ctx, span); |
| 41 | span = next; |
| 42 | } |
| 43 | fz_free(ctx, text); |
| 44 | } |
| 45 | } |
| 46 | |
| 47 | static fz_text_span * |
| 48 | fz_new_text_span(fz_context *ctx, fz_font *font, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language, fz_matrix trm) |
| 49 | { |
| 50 | fz_text_span *span = fz_malloc_struct(ctx, fz_text_span); |
| 51 | span->font = fz_keep_font(ctx, font); |
| 52 | span->wmode = wmode; |
| 53 | span->bidi_level = bidi_level; |
| 54 | span->markup_dir = markup_dir; |
| 55 | span->language = language; |
| 56 | span->trm = trm; |
| 57 | span->trm.e = 0; |
| 58 | span->trm.f = 0; |
| 59 | return span; |
| 60 | } |
| 61 | |
| 62 | static fz_text_span * |
| 63 | fz_add_text_span(fz_context *ctx, fz_text *text, fz_font *font, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language, fz_matrix trm) |
| 64 | { |
| 65 | if (!text->tail) |
| 66 | { |
| 67 | text->head = text->tail = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm); |
| 68 | } |
| 69 | else if (text->tail->font != font || |
| 70 | text->tail->wmode != wmode || |
| 71 | text->tail->bidi_level != bidi_level || |
| 72 | text->tail->markup_dir != markup_dir || |
| 73 | text->tail->language != language || |
| 74 | text->tail->trm.a != trm.a || |
| 75 | text->tail->trm.b != trm.b || |
| 76 | text->tail->trm.c != trm.c || |
| 77 | text->tail->trm.d != trm.d) |
| 78 | { |
| 79 | text->tail = text->tail->next = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm); |
| 80 | } |
| 81 | return text->tail; |
| 82 | } |
| 83 | |
| 84 | static void |
| 85 | fz_grow_text_span(fz_context *ctx, fz_text_span *span, int n) |
| 86 | { |
| 87 | int new_cap = span->cap; |
| 88 | if (span->len + n < new_cap) |
| 89 | return; |
| 90 | while (span->len + n > new_cap) |
| 91 | new_cap = new_cap + 36; |
| 92 | span->items = fz_realloc_array(ctx, span->items, new_cap, fz_text_item); |
| 93 | span->cap = new_cap; |
| 94 | } |
| 95 | |
| 96 | /* |
| 97 | Add a glyph/unicode value to a text object. |
| 98 | |
| 99 | text: Text object to add to. |
| 100 | |
| 101 | font: The font the glyph should be added in. |
| 102 | |
| 103 | trm: The transform to use for the glyph. |
| 104 | |
| 105 | glyph: The glyph id to add. |
| 106 | |
| 107 | unicode: The unicode character for the glyph. |
| 108 | |
| 109 | wmode: 1 for vertical mode, 0 for horizontal. |
| 110 | |
| 111 | bidi_level: The bidirectional level for this glyph. |
| 112 | |
| 113 | markup_dir: The direction of the text as specified in the |
| 114 | markup. |
| 115 | |
| 116 | language: The language in use (if known, 0 otherwise) |
| 117 | (e.g. FZ_LANG_zh_Hans). |
| 118 | |
| 119 | Throws exception on failure to allocate. |
| 120 | */ |
| 121 | void |
| 122 | fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, int gid, int ucs, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang) |
| 123 | { |
| 124 | fz_text_span *span; |
| 125 | |
| 126 | if (text->refs != 1) |
| 127 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot modify shared text objects" ); |
| 128 | |
| 129 | span = fz_add_text_span(ctx, text, font, wmode, bidi_level, markup_dir, lang, trm); |
| 130 | |
| 131 | fz_grow_text_span(ctx, span, 1); |
| 132 | |
| 133 | span->items[span->len].ucs = ucs; |
| 134 | span->items[span->len].gid = gid; |
| 135 | span->items[span->len].x = trm.e; |
| 136 | span->items[span->len].y = trm.f; |
| 137 | span->len++; |
| 138 | } |
| 139 | |
| 140 | /* |
| 141 | Add a UTF8 string to a text object. |
| 142 | |
| 143 | text: Text object to add to. |
| 144 | |
| 145 | font: The font the string should be added in. |
| 146 | |
| 147 | trm: The transform to use. |
| 148 | |
| 149 | s: The utf-8 string to add. |
| 150 | |
| 151 | wmode: 1 for vertical mode, 0 for horizontal. |
| 152 | |
| 153 | bidi_level: The bidirectional level for this glyph. |
| 154 | |
| 155 | markup_dir: The direction of the text as specified in the |
| 156 | markup. |
| 157 | |
| 158 | language: The language in use (if known, 0 otherwise) |
| 159 | (e.g. FZ_LANG_zh_Hans). |
| 160 | |
| 161 | Returns the transform updated with the advance width of the string. |
| 162 | */ |
| 163 | fz_matrix |
| 164 | fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix trm, const char *s, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language) |
| 165 | { |
| 166 | fz_font *font; |
| 167 | int gid, ucs; |
| 168 | float adv; |
| 169 | |
| 170 | while (*s) |
| 171 | { |
| 172 | s += fz_chartorune(&ucs, s); |
| 173 | gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, language, &font); |
| 174 | fz_show_glyph(ctx, text, font, trm, gid, ucs, wmode, bidi_level, markup_dir, language); |
| 175 | adv = fz_advance_glyph(ctx, font, gid, wmode); |
| 176 | if (wmode == 0) |
| 177 | trm = fz_pre_translate(trm, adv, 0); |
| 178 | else |
| 179 | trm = fz_pre_translate(trm, 0, -adv); |
| 180 | } |
| 181 | |
| 182 | return trm; |
| 183 | } |
| 184 | |
| 185 | /* |
| 186 | Find the bounds of a given text object. |
| 187 | |
| 188 | text: The text object to find the bounds of. |
| 189 | |
| 190 | stroke: Pointer to the stroke attributes (for stroked |
| 191 | text), or NULL (for filled text). |
| 192 | |
| 193 | ctm: The matrix in use. |
| 194 | |
| 195 | r: pointer to storage for the bounds. |
| 196 | |
| 197 | Returns a pointer to r, which is updated to contain the |
| 198 | bounding box for the text object. |
| 199 | */ |
| 200 | fz_rect |
| 201 | fz_bound_text(fz_context *ctx, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm) |
| 202 | { |
| 203 | fz_text_span *span; |
| 204 | fz_matrix tm, trm; |
| 205 | fz_rect gbox; |
| 206 | fz_rect bbox; |
| 207 | int i; |
| 208 | |
| 209 | bbox = fz_empty_rect; |
| 210 | |
| 211 | for (span = text->head; span; span = span->next) |
| 212 | { |
| 213 | if (span->len > 0) |
| 214 | { |
| 215 | tm = span->trm; |
| 216 | for (i = 0; i < span->len; i++) |
| 217 | { |
| 218 | if (span->items[i].gid >= 0) |
| 219 | { |
| 220 | tm.e = span->items[i].x; |
| 221 | tm.f = span->items[i].y; |
| 222 | trm = fz_concat(tm, ctm); |
| 223 | gbox = fz_bound_glyph(ctx, span->font, span->items[i].gid, trm); |
| 224 | bbox = fz_union_rect(bbox, gbox); |
| 225 | } |
| 226 | } |
| 227 | } |
| 228 | } |
| 229 | |
| 230 | if (!fz_is_empty_rect(bbox)) |
| 231 | { |
| 232 | if (stroke) |
| 233 | bbox = fz_adjust_rect_for_stroke(ctx, bbox, stroke, ctm); |
| 234 | |
| 235 | /* Compensate for the glyph cache limited positioning precision */ |
| 236 | bbox.x0 -= 1; |
| 237 | bbox.y0 -= 1; |
| 238 | bbox.x1 += 1; |
| 239 | bbox.y1 += 1; |
| 240 | } |
| 241 | |
| 242 | return bbox; |
| 243 | } |
| 244 | |
| 245 | /* |
| 246 | Convert ISO 639 (639-{1,2,3,5}) language specification |
| 247 | strings losslessly to a 15 bit fz_text_language code. |
| 248 | |
| 249 | No validation is carried out. Obviously invalid (out |
| 250 | of spec) codes will be mapped to FZ_LANG_UNSET, but |
| 251 | well-formed (but undefined) codes will be blithely |
| 252 | accepted. |
| 253 | */ |
| 254 | fz_text_language fz_text_language_from_string(const char *str) |
| 255 | { |
| 256 | fz_text_language lang; |
| 257 | |
| 258 | if (str == NULL) |
| 259 | return FZ_LANG_UNSET; |
| 260 | |
| 261 | if (!strcmp(str, "zh-Hant" ) || |
| 262 | !strcmp(str, "zh-HK" ) || |
| 263 | !strcmp(str, "zh-MO" ) || |
| 264 | !strcmp(str, "zh-SG" ) || |
| 265 | !strcmp(str, "zh-TW" )) |
| 266 | return FZ_LANG_zh_Hant; |
| 267 | if (!strcmp(str, "zh-Hans" ) || |
| 268 | !strcmp(str, "zh-CN" )) |
| 269 | return FZ_LANG_zh_Hans; |
| 270 | |
| 271 | /* 1st char */ |
| 272 | if (str[0] >= 'a' && str[0] <= 'z') |
| 273 | lang = str[0] - 'a' + 1; |
| 274 | else if (str[0] >= 'A' && str[0] <= 'Z') |
| 275 | lang = str[0] - 'A' + 1; |
| 276 | else |
| 277 | return 0; |
| 278 | |
| 279 | /* 2nd char */ |
| 280 | if (str[1] >= 'a' && str[1] <= 'z') |
| 281 | lang += 27*(str[1] - 'a' + 1); |
| 282 | else if (str[1] >= 'A' && str[1] <= 'Z') |
| 283 | lang += 27*(str[1] - 'A' + 1); |
| 284 | else |
| 285 | return 0; /* There are no valid 1 char language codes */ |
| 286 | |
| 287 | /* 3nd char */ |
| 288 | if (str[2] >= 'a' && str[2] <= 'z') |
| 289 | lang += 27*27*(str[2] - 'a' + 1); |
| 290 | else if (str[2] >= 'A' && str[2] <= 'Z') |
| 291 | lang += 27*27*(str[2] - 'A' + 1); |
| 292 | |
| 293 | /* We don't support iso 639-6 4 char codes, cos the standard |
| 294 | * has been withdrawn, and no one uses them. */ |
| 295 | return lang; |
| 296 | } |
| 297 | |
| 298 | /* |
| 299 | Recover ISO 639 (639-{1,2,3,5}) language specification |
| 300 | strings losslessly from a 15 bit fz_text_language code. |
| 301 | |
| 302 | No validation is carried out. See note above. |
| 303 | */ |
| 304 | char *fz_string_from_text_language(char str[8], fz_text_language lang) |
| 305 | { |
| 306 | int c; |
| 307 | |
| 308 | /* str is supposed to be at least 8 chars in size */ |
| 309 | if (str == NULL) |
| 310 | return NULL; |
| 311 | |
| 312 | if (lang == FZ_LANG_zh_Hant) |
| 313 | fz_strlcpy(str, "zh-Hant" , 8); |
| 314 | else if (lang == FZ_LANG_zh_Hans) |
| 315 | fz_strlcpy(str, "zh-Hans" , 8); |
| 316 | else |
| 317 | { |
| 318 | c = lang % 27; |
| 319 | lang = lang / 27; |
| 320 | str[0] = c == 0 ? 0 : c - 1 + 'a'; |
| 321 | c = lang % 27; |
| 322 | lang = lang / 27; |
| 323 | str[1] = c == 0 ? 0 : c - 1 + 'a'; |
| 324 | c = lang % 27; |
| 325 | str[2] = c == 0 ? 0 : c - 1 + 'a'; |
| 326 | str[3] = 0; |
| 327 | } |
| 328 | |
| 329 | return str; |
| 330 | } |
| 331 | |