1 | /* Pango |
2 | * pango-break.h: |
3 | * |
4 | * Copyright (C) 1999 Red Hat Software |
5 | * |
6 | * This library is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Library General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2 of the License, or (at your option) any later version. |
10 | * |
11 | * This library is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Library General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Library General Public |
17 | * License along with this library; if not, write to the |
18 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
19 | * Boston, MA 02111-1307, USA. |
20 | */ |
21 | |
22 | #ifndef __PANGO_BREAK_H__ |
23 | #define __PANGO_BREAK_H__ |
24 | |
25 | #include <glib.h> |
26 | |
27 | G_BEGIN_DECLS |
28 | |
29 | #include <pango/pango-item.h> |
30 | |
31 | /* Logical attributes of a character. |
32 | */ |
33 | /** |
34 | * PangoLogAttr: |
35 | * @is_line_break: if set, can break line in front of character |
36 | * @is_mandatory_break: if set, must break line in front of character |
37 | * @is_char_break: if set, can break here when doing character wrapping |
38 | * @is_white: is whitespace character |
39 | * @is_cursor_position: if set, cursor can appear in front of character. |
40 | * i.e. this is a grapheme boundary, or the first character |
41 | * in the text. |
42 | * This flag implements Unicode's |
43 | * <ulink url="http://www.unicode.org/reports/tr29/">Grapheme |
44 | * Cluster Boundaries</ulink> semantics. |
45 | * @is_word_start: is first character in a word |
46 | * @is_word_end: is first non-word char after a word |
47 | * Note that in degenerate cases, you could have both @is_word_start |
48 | * and @is_word_end set for some character. |
49 | * @is_sentence_boundary: is a sentence boundary. |
50 | * There are two ways to divide sentences. The first assigns all |
51 | * inter-sentence whitespace/control/format chars to some sentence, |
52 | * so all chars are in some sentence; @is_sentence_boundary denotes |
53 | * the boundaries there. The second way doesn't assign |
54 | * between-sentence spaces, etc. to any sentence, so |
55 | * @is_sentence_start/@is_sentence_end mark the boundaries of those sentences. |
56 | * @is_sentence_start: is first character in a sentence |
57 | * @is_sentence_end: is first char after a sentence. |
58 | * Note that in degenerate cases, you could have both @is_sentence_start |
59 | * and @is_sentence_end set for some character. (e.g. no space after a |
60 | * period, so the next sentence starts right away) |
61 | * @backspace_deletes_character: if set, backspace deletes one character |
62 | * rather than the entire grapheme cluster. This |
63 | * field is only meaningful on grapheme |
64 | * boundaries (where @is_cursor_position is |
65 | * set). In some languages, the full grapheme |
66 | * (e.g. letter + diacritics) is considered a |
67 | * unit, while in others, each decomposed |
68 | * character in the grapheme is a unit. In the |
69 | * default implementation of pango_break(), this |
70 | * bit is set on all grapheme boundaries except |
71 | * those following Latin, Cyrillic or Greek base characters. |
72 | * @is_expandable_space: is a whitespace character that can possibly be |
73 | * expanded for justification purposes. (Since: 1.18) |
74 | * @is_word_boundary: is a word boundary. |
75 | * More specifically, means that this is not a position in the middle |
76 | * of a word. For example, both sides of a punctuation mark are |
77 | * considered word boundaries. This flag is particularly useful when |
78 | * selecting text word-by-word. |
79 | * This flag implements Unicode's |
80 | * <ulink url="http://www.unicode.org/reports/tr29/">Word |
81 | * Boundaries</ulink> semantics. (Since: 1.22) |
82 | * |
83 | * The #PangoLogAttr structure stores information |
84 | * about the attributes of a single character. |
85 | */ |
86 | struct _PangoLogAttr |
87 | { |
88 | guint is_line_break : 1; /* Can break line in front of character */ |
89 | |
90 | guint is_mandatory_break : 1; /* Must break line in front of character */ |
91 | |
92 | guint is_char_break : 1; /* Can break here when doing char wrap */ |
93 | |
94 | guint is_white : 1; /* Whitespace character */ |
95 | |
96 | /* Cursor can appear in front of character (i.e. this is a grapheme |
97 | * boundary, or the first character in the text). |
98 | */ |
99 | guint is_cursor_position : 1; |
100 | |
101 | /* Note that in degenerate cases, you could have both start/end set on |
102 | * some text, most likely for sentences (e.g. no space after a period, so |
103 | * the next sentence starts right away). |
104 | */ |
105 | |
106 | guint is_word_start : 1; /* first character in a word */ |
107 | guint is_word_end : 1; /* is first non-word char after a word */ |
108 | |
109 | /* There are two ways to divide sentences. The first assigns all |
110 | * intersentence whitespace/control/format chars to some sentence, |
111 | * so all chars are in some sentence; is_sentence_boundary denotes |
112 | * the boundaries there. The second way doesn't assign |
113 | * between-sentence spaces, etc. to any sentence, so |
114 | * is_sentence_start/is_sentence_end mark the boundaries of those |
115 | * sentences. |
116 | */ |
117 | guint is_sentence_boundary : 1; |
118 | guint is_sentence_start : 1; /* first character in a sentence */ |
119 | guint is_sentence_end : 1; /* first non-sentence char after a sentence */ |
120 | |
121 | /* If set, backspace deletes one character rather than |
122 | * the entire grapheme cluster. |
123 | */ |
124 | guint backspace_deletes_character : 1; |
125 | |
126 | /* Only few space variants (U+0020 and U+00A0) have variable |
127 | * width during justification. |
128 | */ |
129 | guint is_expandable_space : 1; |
130 | |
131 | /* Word boundary as defined by UAX#29 */ |
132 | guint is_word_boundary : 1; /* is NOT in the middle of a word */ |
133 | }; |
134 | |
135 | /* Determine information about cluster/word/line breaks in a string |
136 | * of Unicode text. |
137 | */ |
138 | PANGO_AVAILABLE_IN_ALL |
139 | void pango_break (const gchar *text, |
140 | int length, |
141 | PangoAnalysis *analysis, |
142 | PangoLogAttr *attrs, |
143 | int attrs_len); |
144 | |
145 | PANGO_AVAILABLE_IN_ALL |
146 | void pango_find_paragraph_boundary (const gchar *text, |
147 | gint length, |
148 | gint *paragraph_delimiter_index, |
149 | gint *next_paragraph_start); |
150 | |
151 | PANGO_AVAILABLE_IN_ALL |
152 | void pango_get_log_attrs (const char *text, |
153 | int length, |
154 | int level, |
155 | PangoLanguage *language, |
156 | PangoLogAttr *log_attrs, |
157 | int attrs_len); |
158 | |
159 | #ifdef PANGO_ENABLE_ENGINE |
160 | |
161 | /* This is the default break algorithm, used if no language |
162 | * engine overrides it. Normally you should use pango_break() |
163 | * instead; this function is mostly useful for chaining up |
164 | * from a language engine override. |
165 | */ |
166 | PANGO_AVAILABLE_IN_ALL |
167 | void pango_default_break (const gchar *text, |
168 | int length, |
169 | PangoAnalysis *analysis, |
170 | PangoLogAttr *attrs, |
171 | int attrs_len); |
172 | |
173 | #endif /* PANGO_ENABLE_ENGINE */ |
174 | |
175 | G_END_DECLS |
176 | |
177 | #endif /* __PANGO_BREAK_H__ */ |
178 | |