| 1 | /* Pango |
| 2 | * pango-break.h: |
| 3 | * |
| 4 | * Copyright (C) 1999 Red Hat Software |
| 5 | * |
| 6 | * This library is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU Library General Public |
| 8 | * License as published by the Free Software Foundation; either |
| 9 | * version 2 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | * This library is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | * Library General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU Library General Public |
| 17 | * License along with this library; if not, write to the |
| 18 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| 19 | * Boston, MA 02111-1307, USA. |
| 20 | */ |
| 21 | |
| 22 | #ifndef __PANGO_BREAK_H__ |
| 23 | #define __PANGO_BREAK_H__ |
| 24 | |
| 25 | #include <glib.h> |
| 26 | |
| 27 | G_BEGIN_DECLS |
| 28 | |
| 29 | #include <pango/pango-item.h> |
| 30 | |
| 31 | /* Logical attributes of a character. |
| 32 | */ |
| 33 | /** |
| 34 | * PangoLogAttr: |
| 35 | * @is_line_break: if set, can break line in front of character |
| 36 | * @is_mandatory_break: if set, must break line in front of character |
| 37 | * @is_char_break: if set, can break here when doing character wrapping |
| 38 | * @is_white: is whitespace character |
| 39 | * @is_cursor_position: if set, cursor can appear in front of character. |
| 40 | * i.e. this is a grapheme boundary, or the first character |
| 41 | * in the text. |
| 42 | * This flag implements Unicode's |
| 43 | * <ulink url="http://www.unicode.org/reports/tr29/">Grapheme |
| 44 | * Cluster Boundaries</ulink> semantics. |
| 45 | * @is_word_start: is first character in a word |
| 46 | * @is_word_end: is first non-word char after a word |
| 47 | * Note that in degenerate cases, you could have both @is_word_start |
| 48 | * and @is_word_end set for some character. |
| 49 | * @is_sentence_boundary: is a sentence boundary. |
| 50 | * There are two ways to divide sentences. The first assigns all |
| 51 | * inter-sentence whitespace/control/format chars to some sentence, |
| 52 | * so all chars are in some sentence; @is_sentence_boundary denotes |
| 53 | * the boundaries there. The second way doesn't assign |
| 54 | * between-sentence spaces, etc. to any sentence, so |
| 55 | * @is_sentence_start/@is_sentence_end mark the boundaries of those sentences. |
| 56 | * @is_sentence_start: is first character in a sentence |
| 57 | * @is_sentence_end: is first char after a sentence. |
| 58 | * Note that in degenerate cases, you could have both @is_sentence_start |
| 59 | * and @is_sentence_end set for some character. (e.g. no space after a |
| 60 | * period, so the next sentence starts right away) |
| 61 | * @backspace_deletes_character: if set, backspace deletes one character |
| 62 | * rather than the entire grapheme cluster. This |
| 63 | * field is only meaningful on grapheme |
| 64 | * boundaries (where @is_cursor_position is |
| 65 | * set). In some languages, the full grapheme |
| 66 | * (e.g. letter + diacritics) is considered a |
| 67 | * unit, while in others, each decomposed |
| 68 | * character in the grapheme is a unit. In the |
| 69 | * default implementation of pango_break(), this |
| 70 | * bit is set on all grapheme boundaries except |
| 71 | * those following Latin, Cyrillic or Greek base characters. |
| 72 | * @is_expandable_space: is a whitespace character that can possibly be |
| 73 | * expanded for justification purposes. (Since: 1.18) |
| 74 | * @is_word_boundary: is a word boundary. |
| 75 | * More specifically, means that this is not a position in the middle |
| 76 | * of a word. For example, both sides of a punctuation mark are |
| 77 | * considered word boundaries. This flag is particularly useful when |
| 78 | * selecting text word-by-word. |
| 79 | * This flag implements Unicode's |
| 80 | * <ulink url="http://www.unicode.org/reports/tr29/">Word |
| 81 | * Boundaries</ulink> semantics. (Since: 1.22) |
| 82 | * |
| 83 | * The #PangoLogAttr structure stores information |
| 84 | * about the attributes of a single character. |
| 85 | */ |
| 86 | struct _PangoLogAttr |
| 87 | { |
| 88 | guint is_line_break : 1; /* Can break line in front of character */ |
| 89 | |
| 90 | guint is_mandatory_break : 1; /* Must break line in front of character */ |
| 91 | |
| 92 | guint is_char_break : 1; /* Can break here when doing char wrap */ |
| 93 | |
| 94 | guint is_white : 1; /* Whitespace character */ |
| 95 | |
| 96 | /* Cursor can appear in front of character (i.e. this is a grapheme |
| 97 | * boundary, or the first character in the text). |
| 98 | */ |
| 99 | guint is_cursor_position : 1; |
| 100 | |
| 101 | /* Note that in degenerate cases, you could have both start/end set on |
| 102 | * some text, most likely for sentences (e.g. no space after a period, so |
| 103 | * the next sentence starts right away). |
| 104 | */ |
| 105 | |
| 106 | guint is_word_start : 1; /* first character in a word */ |
| 107 | guint is_word_end : 1; /* is first non-word char after a word */ |
| 108 | |
| 109 | /* There are two ways to divide sentences. The first assigns all |
| 110 | * intersentence whitespace/control/format chars to some sentence, |
| 111 | * so all chars are in some sentence; is_sentence_boundary denotes |
| 112 | * the boundaries there. The second way doesn't assign |
| 113 | * between-sentence spaces, etc. to any sentence, so |
| 114 | * is_sentence_start/is_sentence_end mark the boundaries of those |
| 115 | * sentences. |
| 116 | */ |
| 117 | guint is_sentence_boundary : 1; |
| 118 | guint is_sentence_start : 1; /* first character in a sentence */ |
| 119 | guint is_sentence_end : 1; /* first non-sentence char after a sentence */ |
| 120 | |
| 121 | /* If set, backspace deletes one character rather than |
| 122 | * the entire grapheme cluster. |
| 123 | */ |
| 124 | guint backspace_deletes_character : 1; |
| 125 | |
| 126 | /* Only few space variants (U+0020 and U+00A0) have variable |
| 127 | * width during justification. |
| 128 | */ |
| 129 | guint is_expandable_space : 1; |
| 130 | |
| 131 | /* Word boundary as defined by UAX#29 */ |
| 132 | guint is_word_boundary : 1; /* is NOT in the middle of a word */ |
| 133 | }; |
| 134 | |
| 135 | /* Determine information about cluster/word/line breaks in a string |
| 136 | * of Unicode text. |
| 137 | */ |
| 138 | PANGO_AVAILABLE_IN_ALL |
| 139 | void pango_break (const gchar *text, |
| 140 | int length, |
| 141 | PangoAnalysis *analysis, |
| 142 | PangoLogAttr *attrs, |
| 143 | int attrs_len); |
| 144 | |
| 145 | PANGO_AVAILABLE_IN_ALL |
| 146 | void pango_find_paragraph_boundary (const gchar *text, |
| 147 | gint length, |
| 148 | gint *paragraph_delimiter_index, |
| 149 | gint *next_paragraph_start); |
| 150 | |
| 151 | PANGO_AVAILABLE_IN_ALL |
| 152 | void pango_get_log_attrs (const char *text, |
| 153 | int length, |
| 154 | int level, |
| 155 | PangoLanguage *language, |
| 156 | PangoLogAttr *log_attrs, |
| 157 | int attrs_len); |
| 158 | |
| 159 | #ifdef PANGO_ENABLE_ENGINE |
| 160 | |
| 161 | /* This is the default break algorithm, used if no language |
| 162 | * engine overrides it. Normally you should use pango_break() |
| 163 | * instead; this function is mostly useful for chaining up |
| 164 | * from a language engine override. |
| 165 | */ |
| 166 | PANGO_AVAILABLE_IN_ALL |
| 167 | void pango_default_break (const gchar *text, |
| 168 | int length, |
| 169 | PangoAnalysis *analysis, |
| 170 | PangoLogAttr *attrs, |
| 171 | int attrs_len); |
| 172 | |
| 173 | #endif /* PANGO_ENABLE_ENGINE */ |
| 174 | |
| 175 | G_END_DECLS |
| 176 | |
| 177 | #endif /* __PANGO_BREAK_H__ */ |
| 178 | |