text.c source code [MuPDF/source/fitz/text.c]

1	#include "mupdf/fitz.h"
2	#include "fitz-imp.h"
3
4	#include <string.h>
5
6	/*
7	Create a new empty fz_text object.
8
9	Throws exception on failure to allocate.
10	*/
11	fz_text *
12	fz_new_text(fz_context *ctx)
13	{
14	fz_text *text = fz_malloc_struct(ctx, fz_text);
15	text->refs = `1`;
16	return text;
17	}
18
19	fz_text *
20	fz_keep_text(fz_context ctx, const* fz_text *textc)
21	{
22	fz_text text = (fz_text )textc; / Explicit cast away of const /
23
24	return fz_keep_imp(ctx, text, &text->refs);
25	}
26
27	void
28	fz_drop_text(fz_context ctx, const* fz_text *textc)
29	{
30	fz_text text = (fz_text )textc; / Explicit cast away of const /
31
32	if (fz_drop_imp(ctx, text, &text->refs))
33	{
34	fz_text_span *span = text->head;
35	while (span)
36	{
37	fz_text_span *next = span->next;
38	fz_drop_font(ctx, span->font);
39	fz_free(ctx, span->items);
40	fz_free(ctx, span);
41	span = next;
42	}
43	fz_free(ctx, text);
44	}
45	}
46
47	static fz_text_span *
48	fz_new_text_span(fz_context ctx, fz_font font, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language, fz_matrix trm)
49	{
50	fz_text_span *span = fz_malloc_struct(ctx, fz_text_span);
51	span->font = fz_keep_font(ctx, font);
52	span->wmode = wmode;
53	span->bidi_level = bidi_level;
54	span->markup_dir = markup_dir;
55	span->language = language;
56	span->trm = trm;
57	span->trm.e = `0`;
58	span->trm.f = `0`;
59	return span;
60	}
61
62	static fz_text_span *
63	fz_add_text_span(fz_context ctx, fz_text text, fz_font font, int* wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language, fz_matrix trm)
64	{
65	if (!text->tail)
66	{
67	text->head = text->tail = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm);
68	}
69	else if (text->tail->font != font \|\|
70	text->tail->wmode != wmode \|\|
71	text->tail->bidi_level != bidi_level \|\|
72	text->tail->markup_dir != markup_dir \|\|
73	text->tail->language != language \|\|
74	text->tail->trm.a != trm.a \|\|
75	text->tail->trm.b != trm.b \|\|
76	text->tail->trm.c != trm.c \|\|
77	text->tail->trm.d != trm.d)
78	{
79	text->tail = text->tail->next = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm);
80	}
81	return text->tail;
82	}
83
84	static void
85	fz_grow_text_span(fz_context ctx, fz_text_span span, int n)
86	{
87	int new_cap = span->cap;
88	if (span->len + n < new_cap)
89	return;
90	while (span->len + n > new_cap)
91	new_cap = new_cap + `36`;
92	span->items = fz_realloc_array(ctx, span->items, new_cap, fz_text_item);
93	span->cap = new_cap;
94	}
95
96	/*
97	Add a glyph/unicode value to a text object.
98
99	text: Text object to add to.
100
101	font: The font the glyph should be added in.
102
103	trm: The transform to use for the glyph.
104
105	glyph: The glyph id to add.
106
107	unicode: The unicode character for the glyph.
108
109	wmode: 1 for vertical mode, 0 for horizontal.
110
111	bidi_level: The bidirectional level for this glyph.
112
113	markup_dir: The direction of the text as specified in the
114	markup.
115
116	language: The language in use (if known, 0 otherwise)
117	(e.g. FZ_LANG_zh_Hans).
118
119	Throws exception on failure to allocate.
120	*/
121	void
122	fz_show_glyph(fz_context ctx, fz_text text, fz_font font, fz_matrix trm, int* gid, int ucs, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang)
123	{
124	fz_text_span *span;
125
126	if (text->refs != `1`)
127	fz_throw(ctx, FZ_ERROR_GENERIC, "cannot modify shared text objects");
128
129	span = fz_add_text_span(ctx, text, font, wmode, bidi_level, markup_dir, lang, trm);
130
131	fz_grow_text_span(ctx, span, `1`);
132
133	span->items[span->len].ucs = ucs;
134	span->items[span->len].gid = gid;
135	span->items[span->len].x = trm.e;
136	span->items[span->len].y = trm.f;
137	span->len++;
138	}
139
140	/*
141	Add a UTF8 string to a text object.
142
143	text: Text object to add to.
144
145	font: The font the string should be added in.
146
147	trm: The transform to use.
148
149	s: The utf-8 string to add.
150
151	wmode: 1 for vertical mode, 0 for horizontal.
152
153	bidi_level: The bidirectional level for this glyph.
154
155	markup_dir: The direction of the text as specified in the
156	markup.
157
158	language: The language in use (if known, 0 otherwise)
159	(e.g. FZ_LANG_zh_Hans).
160
161	Returns the transform updated with the advance width of the string.
162	*/
163	fz_matrix
164	fz_show_string(fz_context ctx, fz_text text, fz_font user_font, fz_matrix trm, const* char s, int* wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language)
165	{
166	fz_font *font;
167	int gid, ucs;
168	float adv;
169
170	while (*s)
171	{
172	s += fz_chartorune(&ucs, s);
173	gid = fz_encode_character_with_fallback(ctx, user_font, ucs, `0`, language, &font);
174	fz_show_glyph(ctx, text, font, trm, gid, ucs, wmode, bidi_level, markup_dir, language);
175	adv = fz_advance_glyph(ctx, font, gid, wmode);
176	if (wmode == `0`)
177	trm = fz_pre_translate(trm, adv, `0`);
178	else
179	trm = fz_pre_translate(trm, `0`, -adv);
180	}
181
182	return trm;
183	}
184
185	/*
186	Find the bounds of a given text object.
187
188	text: The text object to find the bounds of.
189
190	stroke: Pointer to the stroke attributes (for stroked
191	text), or NULL (for filled text).
192
193	ctm: The matrix in use.
194
195	r: pointer to storage for the bounds.
196
197	Returns a pointer to r, which is updated to contain the
198	bounding box for the text object.
199	*/
200	fz_rect
201	fz_bound_text(fz_context ctx, const* fz_text text, const* fz_stroke_state *stroke, fz_matrix ctm)
202	{
203	fz_text_span *span;
204	fz_matrix tm, trm;
205	fz_rect gbox;
206	fz_rect bbox;
207	int i;
208
209	bbox = fz_empty_rect;
210
211	for (span = text->head; span; span = span->next)
212	{
213	if (span->len > `0`)
214	{
215	tm = span->trm;
216	for (i = `0`; i < span->len; i++)
217	{
218	if (span->items[i].gid >= `0`)
219	{
220	tm.e = span->items[i].x;
221	tm.f = span->items[i].y;
222	trm = fz_concat(tm, ctm);
223	gbox = fz_bound_glyph(ctx, span->font, span->items[i].gid, trm);
224	bbox = fz_union_rect(bbox, gbox);
225	}
226	}
227	}
228	}
229
230	if (!fz_is_empty_rect(bbox))
231	{
232	if (stroke)
233	bbox = fz_adjust_rect_for_stroke(ctx, bbox, stroke, ctm);
234
235	/ Compensate for the glyph cache limited positioning precision /
236	bbox.x0 -= `1`;
237	bbox.y0 -= `1`;
238	bbox.x1 += `1`;
239	bbox.y1 += `1`;
240	}
241
242	return bbox;
243	}
244
245	/*
246	Convert ISO 639 (639-{1,2,3,5}) language specification
247	strings losslessly to a 15 bit fz_text_language code.
248
249	No validation is carried out. Obviously invalid (out
250	of spec) codes will be mapped to FZ_LANG_UNSET, but
251	well-formed (but undefined) codes will be blithely
252	accepted.
253	*/
254	fz_text_language fz_text_language_from_string(const char *str)
255	{
256	fz_text_language lang;
257
258	if (str == NULL)
259	return FZ_LANG_UNSET;
260
261	if (!strcmp(str, "zh-Hant") \|\|
262	!strcmp(str, "zh-HK") \|\|
263	!strcmp(str, "zh-MO") \|\|
264	!strcmp(str, "zh-SG") \|\|
265	!strcmp(str, "zh-TW"))
266	return FZ_LANG_zh_Hant;
267	if (!strcmp(str, "zh-Hans") \|\|
268	!strcmp(str, "zh-CN"))
269	return FZ_LANG_zh_Hans;
270
271	/ 1st char /
272	if (str[`0`] >= `'a'` && str[`0`] <= `'z'`)
273	lang = str[`0`] - `'a'` + `1`;
274	else if (str[`0`] >= `'A'` && str[`0`] <= `'Z'`)
275	lang = str[`0`] - `'A'` + `1`;
276	else
277	return `0`;
278
279	/ 2nd char /
280	if (str[`1`] >= `'a'` && str[`1`] <= `'z'`)
281	lang += `27`*(str[`1`] - `'a'` + `1`);
282	else if (str[`1`] >= `'A'` && str[`1`] <= `'Z'`)
283	lang += `27`*(str[`1`] - `'A'` + `1`);
284	else
285	return `0`; / There are no valid 1 char language codes /
286
287	/ 3nd char /
288	if (str[`2`] >= `'a'` && str[`2`] <= `'z'`)
289	lang += `27``27`(str[`2`] - `'a'` + `1`);
290	else if (str[`2`] >= `'A'` && str[`2`] <= `'Z'`)
291	lang += `27``27`(str[`2`] - `'A'` + `1`);
292
293	/ We don't support iso 639-6 4 char codes, cos the standard*
294	* has been withdrawn, and no one uses them. */
295	return lang;
296	}
297
298	/*
299	Recover ISO 639 (639-{1,2,3,5}) language specification
300	strings losslessly from a 15 bit fz_text_language code.
301
302	No validation is carried out. See note above.
303	*/
304	char fz_string_from_text_language(char* str[`8`], fz_text_language lang)
305	{
306	int c;
307
308	/ str is supposed to be at least 8 chars in size /
309	if (str == NULL)
310	return NULL;
311
312	if (lang == FZ_LANG_zh_Hant)
313	fz_strlcpy(str, "zh-Hant", `8`);
314	else if (lang == FZ_LANG_zh_Hans)
315	fz_strlcpy(str, "zh-Hans", `8`);
316	else
317	{
318	c = lang % `27`;
319	lang = lang / `27`;
320	str[`0`] = c == `0` ? `0` : c - `1` + `'a'`;
321	c = lang % `27`;
322	lang = lang / `27`;
323	str[`1`] = c == `0` ? `0` : c - `1` + `'a'`;
324	c = lang % `27`;
325	str[`2`] = c == `0` ? `0` : c - `1` + `'a'`;
326	str[`3`] = `0`;
327	}
328
329	return str;
330	}
331

Browse the source code of MuPDF/source/fitz/text.c