| 1 | #include "mupdf/fitz.h" |
| 2 | #include "mupdf/pdf.h" |
| 3 | |
| 4 | #include <string.h> |
| 5 | |
| 6 | size_t |
| 7 | pdf_cmap_size(fz_context *ctx, pdf_cmap *cmap) |
| 8 | { |
| 9 | if (cmap == NULL) |
| 10 | return 0; |
| 11 | if (cmap->storable.refs < 0) |
| 12 | return 0; |
| 13 | |
| 14 | return pdf_cmap_size(ctx, cmap->usecmap) + |
| 15 | cmap->rcap * sizeof *cmap->ranges + |
| 16 | cmap->xcap * sizeof *cmap->xranges + |
| 17 | cmap->mcap * sizeof *cmap->mranges; |
| 18 | } |
| 19 | |
| 20 | /* |
| 21 | * Load CMap stream in PDF file |
| 22 | */ |
| 23 | pdf_cmap * |
| 24 | pdf_load_embedded_cmap(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj) |
| 25 | { |
| 26 | fz_stream *file = NULL; |
| 27 | pdf_cmap *cmap = NULL; |
| 28 | pdf_cmap *usecmap = NULL; |
| 29 | pdf_obj *obj; |
| 30 | |
| 31 | fz_var(file); |
| 32 | fz_var(cmap); |
| 33 | fz_var(usecmap); |
| 34 | |
| 35 | if ((cmap = pdf_find_item(ctx, pdf_drop_cmap_imp, stmobj)) != NULL) |
| 36 | return cmap; |
| 37 | |
| 38 | fz_try(ctx) |
| 39 | { |
| 40 | file = pdf_open_stream(ctx, stmobj); |
| 41 | cmap = pdf_load_cmap(ctx, file); |
| 42 | |
| 43 | obj = pdf_dict_get(ctx, stmobj, PDF_NAME(WMode)); |
| 44 | if (pdf_is_int(ctx, obj)) |
| 45 | pdf_set_cmap_wmode(ctx, cmap, pdf_to_int(ctx, obj)); |
| 46 | |
| 47 | obj = pdf_dict_get(ctx, stmobj, PDF_NAME(UseCMap)); |
| 48 | if (pdf_is_name(ctx, obj)) |
| 49 | { |
| 50 | usecmap = pdf_load_system_cmap(ctx, pdf_to_name(ctx, obj)); |
| 51 | pdf_set_usecmap(ctx, cmap, usecmap); |
| 52 | } |
| 53 | else if (pdf_is_indirect(ctx, obj)) |
| 54 | { |
| 55 | if (pdf_mark_obj(ctx, obj)) |
| 56 | fz_throw(ctx, FZ_ERROR_GENERIC, "recursive CMap" ); |
| 57 | fz_try(ctx) |
| 58 | usecmap = pdf_load_embedded_cmap(ctx, doc, obj); |
| 59 | fz_always(ctx) |
| 60 | pdf_unmark_obj(ctx, obj); |
| 61 | fz_catch(ctx) |
| 62 | fz_rethrow(ctx); |
| 63 | pdf_set_usecmap(ctx, cmap, usecmap); |
| 64 | } |
| 65 | |
| 66 | pdf_store_item(ctx, stmobj, cmap, pdf_cmap_size(ctx, cmap)); |
| 67 | } |
| 68 | fz_always(ctx) |
| 69 | { |
| 70 | fz_drop_stream(ctx, file); |
| 71 | pdf_drop_cmap(ctx, usecmap); |
| 72 | } |
| 73 | fz_catch(ctx) |
| 74 | { |
| 75 | pdf_drop_cmap(ctx, cmap); |
| 76 | fz_rethrow(ctx); |
| 77 | } |
| 78 | |
| 79 | return cmap; |
| 80 | } |
| 81 | |
| 82 | /* |
| 83 | * Create an Identity-* CMap (for both 1 and 2-byte encodings) |
| 84 | */ |
| 85 | pdf_cmap * |
| 86 | pdf_new_identity_cmap(fz_context *ctx, int wmode, int bytes) |
| 87 | { |
| 88 | pdf_cmap *cmap = pdf_new_cmap(ctx); |
| 89 | fz_try(ctx) |
| 90 | { |
| 91 | unsigned int high = (1 << (bytes * 8)) - 1; |
| 92 | if (wmode) |
| 93 | fz_strlcpy(cmap->cmap_name, "Identity-V" , sizeof cmap->cmap_name); |
| 94 | else |
| 95 | fz_strlcpy(cmap->cmap_name, "Identity-H" , sizeof cmap->cmap_name); |
| 96 | pdf_add_codespace(ctx, cmap, 0, high, bytes); |
| 97 | pdf_map_range_to_range(ctx, cmap, 0, high, 0); |
| 98 | pdf_sort_cmap(ctx, cmap); |
| 99 | pdf_set_cmap_wmode(ctx, cmap, wmode); |
| 100 | } |
| 101 | fz_catch(ctx) |
| 102 | { |
| 103 | pdf_drop_cmap(ctx, cmap); |
| 104 | fz_rethrow(ctx); |
| 105 | } |
| 106 | return cmap; |
| 107 | } |
| 108 | |
| 109 | /* |
| 110 | * Load built-in CMap resource. |
| 111 | */ |
| 112 | |
| 113 | #ifdef NO_CJK |
| 114 | |
| 115 | pdf_cmap * |
| 116 | pdf_load_builtin_cmap(fz_context *ctx, const char *name) |
| 117 | { |
| 118 | if (!strcmp(name, "Identity-H" )) return pdf_new_identity_cmap(ctx, 0, 2); |
| 119 | if (!strcmp(name, "Identity-V" )) return pdf_new_identity_cmap(ctx, 1, 2); |
| 120 | return NULL; |
| 121 | } |
| 122 | |
| 123 | #else |
| 124 | |
| 125 | /* To regenerate this list: :r !bash scripts/runcmapdump.sh */ |
| 126 | |
| 127 | #include "cmaps/83pv-RKSJ-H.h" |
| 128 | #include "cmaps/90ms-RKSJ-H.h" |
| 129 | #include "cmaps/90ms-RKSJ-V.h" |
| 130 | #include "cmaps/90msp-RKSJ-H.h" |
| 131 | #include "cmaps/90msp-RKSJ-V.h" |
| 132 | #include "cmaps/90pv-RKSJ-H.h" |
| 133 | #include "cmaps/Add-RKSJ-H.h" |
| 134 | #include "cmaps/Add-RKSJ-V.h" |
| 135 | #include "cmaps/Adobe-CNS1-UCS2.h" |
| 136 | #include "cmaps/Adobe-GB1-UCS2.h" |
| 137 | #include "cmaps/Adobe-Japan1-UCS2.h" |
| 138 | #include "cmaps/Adobe-Korea1-UCS2.h" |
| 139 | #include "cmaps/B5pc-H.h" |
| 140 | #include "cmaps/B5pc-V.h" |
| 141 | #include "cmaps/CNS-EUC-H.h" |
| 142 | #include "cmaps/CNS-EUC-V.h" |
| 143 | #include "cmaps/ETen-B5-H.h" |
| 144 | #include "cmaps/ETen-B5-V.h" |
| 145 | #include "cmaps/ETenms-B5-H.h" |
| 146 | #include "cmaps/ETenms-B5-V.h" |
| 147 | #include "cmaps/EUC-H.h" |
| 148 | #include "cmaps/EUC-V.h" |
| 149 | #include "cmaps/Ext-RKSJ-H.h" |
| 150 | #include "cmaps/Ext-RKSJ-V.h" |
| 151 | #include "cmaps/GB-EUC-H.h" |
| 152 | #include "cmaps/GB-EUC-V.h" |
| 153 | #include "cmaps/GBK-EUC-H.h" |
| 154 | #include "cmaps/GBK-EUC-V.h" |
| 155 | #include "cmaps/GBK-X.h" |
| 156 | #include "cmaps/GBK2K-H.h" |
| 157 | #include "cmaps/GBK2K-V.h" |
| 158 | #include "cmaps/GBKp-EUC-H.h" |
| 159 | #include "cmaps/GBKp-EUC-V.h" |
| 160 | #include "cmaps/GBpc-EUC-H.h" |
| 161 | #include "cmaps/GBpc-EUC-V.h" |
| 162 | #include "cmaps/H.h" |
| 163 | #include "cmaps/HKscs-B5-H.h" |
| 164 | #include "cmaps/HKscs-B5-V.h" |
| 165 | #include "cmaps/Identity-H.h" |
| 166 | #include "cmaps/Identity-V.h" |
| 167 | #include "cmaps/KSC-EUC-H.h" |
| 168 | #include "cmaps/KSC-EUC-V.h" |
| 169 | #include "cmaps/KSCms-UHC-H.h" |
| 170 | #include "cmaps/KSCms-UHC-HW-H.h" |
| 171 | #include "cmaps/KSCms-UHC-HW-V.h" |
| 172 | #include "cmaps/KSCms-UHC-V.h" |
| 173 | #include "cmaps/KSCpc-EUC-H.h" |
| 174 | #include "cmaps/UniCNS-UCS2-H.h" |
| 175 | #include "cmaps/UniCNS-UCS2-V.h" |
| 176 | #include "cmaps/UniCNS-UTF16-H.h" |
| 177 | #include "cmaps/UniCNS-UTF16-V.h" |
| 178 | #include "cmaps/UniCNS-X.h" |
| 179 | #include "cmaps/UniGB-UCS2-H.h" |
| 180 | #include "cmaps/UniGB-UCS2-V.h" |
| 181 | #include "cmaps/UniGB-UTF16-H.h" |
| 182 | #include "cmaps/UniGB-UTF16-V.h" |
| 183 | #include "cmaps/UniGB-X.h" |
| 184 | #include "cmaps/UniJIS-UCS2-H.h" |
| 185 | #include "cmaps/UniJIS-UCS2-HW-H.h" |
| 186 | #include "cmaps/UniJIS-UCS2-HW-V.h" |
| 187 | #include "cmaps/UniJIS-UCS2-V.h" |
| 188 | #include "cmaps/UniJIS-UTF16-H.h" |
| 189 | #include "cmaps/UniJIS-UTF16-V.h" |
| 190 | #include "cmaps/UniJIS-X.h" |
| 191 | #include "cmaps/UniKS-UCS2-H.h" |
| 192 | #include "cmaps/UniKS-UCS2-V.h" |
| 193 | #include "cmaps/UniKS-UTF16-H.h" |
| 194 | #include "cmaps/UniKS-UTF16-V.h" |
| 195 | #include "cmaps/UniKS-X.h" |
| 196 | #include "cmaps/V.h" |
| 197 | |
| 198 | static pdf_cmap *table[] = { |
| 199 | &cmap_83pv_RKSJ_H, |
| 200 | &cmap_90ms_RKSJ_H, |
| 201 | &cmap_90ms_RKSJ_V, |
| 202 | &cmap_90msp_RKSJ_H, |
| 203 | &cmap_90msp_RKSJ_V, |
| 204 | &cmap_90pv_RKSJ_H, |
| 205 | &cmap_Add_RKSJ_H, |
| 206 | &cmap_Add_RKSJ_V, |
| 207 | &cmap_Adobe_CNS1_UCS2, |
| 208 | &cmap_Adobe_GB1_UCS2, |
| 209 | &cmap_Adobe_Japan1_UCS2, |
| 210 | &cmap_Adobe_Korea1_UCS2, |
| 211 | &cmap_B5pc_H, |
| 212 | &cmap_B5pc_V, |
| 213 | &cmap_CNS_EUC_H, |
| 214 | &cmap_CNS_EUC_V, |
| 215 | &cmap_ETen_B5_H, |
| 216 | &cmap_ETen_B5_V, |
| 217 | &cmap_ETenms_B5_H, |
| 218 | &cmap_ETenms_B5_V, |
| 219 | &cmap_EUC_H, |
| 220 | &cmap_EUC_V, |
| 221 | &cmap_Ext_RKSJ_H, |
| 222 | &cmap_Ext_RKSJ_V, |
| 223 | &cmap_GB_EUC_H, |
| 224 | &cmap_GB_EUC_V, |
| 225 | &cmap_GBK_EUC_H, |
| 226 | &cmap_GBK_EUC_V, |
| 227 | &cmap_GBK_X, |
| 228 | &cmap_GBK2K_H, |
| 229 | &cmap_GBK2K_V, |
| 230 | &cmap_GBKp_EUC_H, |
| 231 | &cmap_GBKp_EUC_V, |
| 232 | &cmap_GBpc_EUC_H, |
| 233 | &cmap_GBpc_EUC_V, |
| 234 | &cmap_H, |
| 235 | &cmap_HKscs_B5_H, |
| 236 | &cmap_HKscs_B5_V, |
| 237 | &cmap_Identity_H, |
| 238 | &cmap_Identity_V, |
| 239 | &cmap_KSC_EUC_H, |
| 240 | &cmap_KSC_EUC_V, |
| 241 | &cmap_KSCms_UHC_H, |
| 242 | &cmap_KSCms_UHC_HW_H, |
| 243 | &cmap_KSCms_UHC_HW_V, |
| 244 | &cmap_KSCms_UHC_V, |
| 245 | &cmap_KSCpc_EUC_H, |
| 246 | &cmap_UniCNS_UCS2_H, |
| 247 | &cmap_UniCNS_UCS2_V, |
| 248 | &cmap_UniCNS_UTF16_H, |
| 249 | &cmap_UniCNS_UTF16_V, |
| 250 | &cmap_UniCNS_X, |
| 251 | &cmap_UniGB_UCS2_H, |
| 252 | &cmap_UniGB_UCS2_V, |
| 253 | &cmap_UniGB_UTF16_H, |
| 254 | &cmap_UniGB_UTF16_V, |
| 255 | &cmap_UniGB_X, |
| 256 | &cmap_UniJIS_UCS2_H, |
| 257 | &cmap_UniJIS_UCS2_HW_H, |
| 258 | &cmap_UniJIS_UCS2_HW_V, |
| 259 | &cmap_UniJIS_UCS2_V, |
| 260 | &cmap_UniJIS_UTF16_H, |
| 261 | &cmap_UniJIS_UTF16_V, |
| 262 | &cmap_UniJIS_X, |
| 263 | &cmap_UniKS_UCS2_H, |
| 264 | &cmap_UniKS_UCS2_V, |
| 265 | &cmap_UniKS_UTF16_H, |
| 266 | &cmap_UniKS_UTF16_V, |
| 267 | &cmap_UniKS_X, |
| 268 | &cmap_V, |
| 269 | }; |
| 270 | |
| 271 | pdf_cmap * |
| 272 | pdf_load_builtin_cmap(fz_context *ctx, const char *name) |
| 273 | { |
| 274 | int r = nelem(table)-1; |
| 275 | int l = 0; |
| 276 | while (l <= r) |
| 277 | { |
| 278 | int m = (l + r) >> 1; |
| 279 | int c = strcmp(name, table[m]->cmap_name); |
| 280 | if (c < 0) |
| 281 | r = m - 1; |
| 282 | else if (c > 0) |
| 283 | l = m + 1; |
| 284 | else |
| 285 | return table[m]; |
| 286 | } |
| 287 | return NULL; |
| 288 | } |
| 289 | |
| 290 | #endif |
| 291 | |
| 292 | /* |
| 293 | * Load predefined CMap from system. |
| 294 | */ |
| 295 | pdf_cmap * |
| 296 | pdf_load_system_cmap(fz_context *ctx, const char *cmap_name) |
| 297 | { |
| 298 | pdf_cmap *usecmap; |
| 299 | pdf_cmap *cmap; |
| 300 | |
| 301 | cmap = pdf_load_builtin_cmap(ctx, cmap_name); |
| 302 | if (!cmap) |
| 303 | fz_throw(ctx, FZ_ERROR_GENERIC, "no builtin cmap file: %s" , cmap_name); |
| 304 | |
| 305 | if (cmap->usecmap_name[0] && !cmap->usecmap) |
| 306 | { |
| 307 | usecmap = pdf_load_system_cmap(ctx, cmap->usecmap_name); |
| 308 | if (!usecmap) |
| 309 | fz_throw(ctx, FZ_ERROR_GENERIC, "no builtin cmap file: %s" , cmap->usecmap_name); |
| 310 | pdf_set_usecmap(ctx, cmap, usecmap); |
| 311 | } |
| 312 | |
| 313 | return cmap; |
| 314 | } |
| 315 | |