1/* Pango
2 * pango-break.h:
3 *
4 * Copyright (C) 1999 Red Hat Software
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
20 */
21
22#ifndef __PANGO_BREAK_H__
23#define __PANGO_BREAK_H__
24
25#include <glib.h>
26
27G_BEGIN_DECLS
28
29#include <pango/pango-item.h>
30
31/* Logical attributes of a character.
32 */
33/**
34 * PangoLogAttr:
35 * @is_line_break: if set, can break line in front of character
36 * @is_mandatory_break: if set, must break line in front of character
37 * @is_char_break: if set, can break here when doing character wrapping
38 * @is_white: is whitespace character
39 * @is_cursor_position: if set, cursor can appear in front of character.
40 * i.e. this is a grapheme boundary, or the first character
41 * in the text.
42 * This flag implements Unicode's
43 * <ulink url="http://www.unicode.org/reports/tr29/">Grapheme
44 * Cluster Boundaries</ulink> semantics.
45 * @is_word_start: is first character in a word
46 * @is_word_end: is first non-word char after a word
47 * Note that in degenerate cases, you could have both @is_word_start
48 * and @is_word_end set for some character.
49 * @is_sentence_boundary: is a sentence boundary.
50 * There are two ways to divide sentences. The first assigns all
51 * inter-sentence whitespace/control/format chars to some sentence,
52 * so all chars are in some sentence; @is_sentence_boundary denotes
53 * the boundaries there. The second way doesn't assign
54 * between-sentence spaces, etc. to any sentence, so
55 * @is_sentence_start/@is_sentence_end mark the boundaries of those sentences.
56 * @is_sentence_start: is first character in a sentence
57 * @is_sentence_end: is first char after a sentence.
58 * Note that in degenerate cases, you could have both @is_sentence_start
59 * and @is_sentence_end set for some character. (e.g. no space after a
60 * period, so the next sentence starts right away)
61 * @backspace_deletes_character: if set, backspace deletes one character
62 * rather than the entire grapheme cluster. This
63 * field is only meaningful on grapheme
64 * boundaries (where @is_cursor_position is
65 * set). In some languages, the full grapheme
66 * (e.g. letter + diacritics) is considered a
67 * unit, while in others, each decomposed
68 * character in the grapheme is a unit. In the
69 * default implementation of pango_break(), this
70 * bit is set on all grapheme boundaries except
71 * those following Latin, Cyrillic or Greek base characters.
72 * @is_expandable_space: is a whitespace character that can possibly be
73 * expanded for justification purposes. (Since: 1.18)
74 * @is_word_boundary: is a word boundary.
75 * More specifically, means that this is not a position in the middle
76 * of a word. For example, both sides of a punctuation mark are
77 * considered word boundaries. This flag is particularly useful when
78 * selecting text word-by-word.
79 * This flag implements Unicode's
80 * <ulink url="http://www.unicode.org/reports/tr29/">Word
81 * Boundaries</ulink> semantics. (Since: 1.22)
82 *
83 * The #PangoLogAttr structure stores information
84 * about the attributes of a single character.
85 */
86struct _PangoLogAttr
87{
88 guint is_line_break : 1; /* Can break line in front of character */
89
90 guint is_mandatory_break : 1; /* Must break line in front of character */
91
92 guint is_char_break : 1; /* Can break here when doing char wrap */
93
94 guint is_white : 1; /* Whitespace character */
95
96 /* Cursor can appear in front of character (i.e. this is a grapheme
97 * boundary, or the first character in the text).
98 */
99 guint is_cursor_position : 1;
100
101 /* Note that in degenerate cases, you could have both start/end set on
102 * some text, most likely for sentences (e.g. no space after a period, so
103 * the next sentence starts right away).
104 */
105
106 guint is_word_start : 1; /* first character in a word */
107 guint is_word_end : 1; /* is first non-word char after a word */
108
109 /* There are two ways to divide sentences. The first assigns all
110 * intersentence whitespace/control/format chars to some sentence,
111 * so all chars are in some sentence; is_sentence_boundary denotes
112 * the boundaries there. The second way doesn't assign
113 * between-sentence spaces, etc. to any sentence, so
114 * is_sentence_start/is_sentence_end mark the boundaries of those
115 * sentences.
116 */
117 guint is_sentence_boundary : 1;
118 guint is_sentence_start : 1; /* first character in a sentence */
119 guint is_sentence_end : 1; /* first non-sentence char after a sentence */
120
121 /* If set, backspace deletes one character rather than
122 * the entire grapheme cluster.
123 */
124 guint backspace_deletes_character : 1;
125
126 /* Only few space variants (U+0020 and U+00A0) have variable
127 * width during justification.
128 */
129 guint is_expandable_space : 1;
130
131 /* Word boundary as defined by UAX#29 */
132 guint is_word_boundary : 1; /* is NOT in the middle of a word */
133};
134
135/* Determine information about cluster/word/line breaks in a string
136 * of Unicode text.
137 */
138PANGO_AVAILABLE_IN_ALL
139void pango_break (const gchar *text,
140 int length,
141 PangoAnalysis *analysis,
142 PangoLogAttr *attrs,
143 int attrs_len);
144
145PANGO_AVAILABLE_IN_ALL
146void pango_find_paragraph_boundary (const gchar *text,
147 gint length,
148 gint *paragraph_delimiter_index,
149 gint *next_paragraph_start);
150
151PANGO_AVAILABLE_IN_ALL
152void pango_get_log_attrs (const char *text,
153 int length,
154 int level,
155 PangoLanguage *language,
156 PangoLogAttr *log_attrs,
157 int attrs_len);
158
159#ifdef PANGO_ENABLE_ENGINE
160
161/* This is the default break algorithm, used if no language
162 * engine overrides it. Normally you should use pango_break()
163 * instead; this function is mostly useful for chaining up
164 * from a language engine override.
165 */
166PANGO_AVAILABLE_IN_ALL
167void pango_default_break (const gchar *text,
168 int length,
169 PangoAnalysis *analysis,
170 PangoLogAttr *attrs,
171 int attrs_len);
172
173#endif /* PANGO_ENABLE_ENGINE */
174
175G_END_DECLS
176
177#endif /* __PANGO_BREAK_H__ */
178