1 | /* |
2 | libdrawtext - a simple library for fast text rendering in OpenGL |
3 | Copyright (C) 2011 John Tsiombikas <nuclear@member.fsf.org> |
4 | |
5 | This program is free software: you can redistribute it and/or modify |
6 | it under the terms of the GNU Lesser General Public License as published by |
7 | the Free Software Foundation, either version 3 of the License, or |
8 | (at your option) any later version. |
9 | |
10 | This program is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | GNU Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public License |
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
17 | */ |
18 | #include "drawtext.h" |
19 | |
20 | #define U8_IS_FIRST(x) (((((x) >> 7) & 1) == 0) || ((((x) >> 6) & 3) == 3)) |
21 | |
22 | static const char first_mask[] = { |
23 | 0, |
24 | 0x7f, /* single byte, 7 bits valid */ |
25 | 0x1f, /* two-bytes, 5 bits valid */ |
26 | 0xf, /* three-bytes, 4 bits valid */ |
27 | 0x7 /* four-bytes, 3 bits valid */ |
28 | }; |
29 | static const char first_shift[] = { 0, 7, 5, 4, 3 }; /* see above */ |
30 | |
31 | #define CONT_PREFIX 0x80 |
32 | #define CONT_MASK 0x3f |
33 | #define CONT_SHIFT 6 |
34 | |
35 | /* last charcodes for 1, 2, 3 or 4-byte utf8 chars */ |
36 | static const int utf8_lastcode[] = { 0x7f, 0x7ff, 0xfff, 0x1fffff }; |
37 | |
38 | #define prefix_mask(x) (~first_mask[x]) |
39 | #define prefix(x) ((prefix_mask(x) << 1) & 0xff) |
40 | |
41 | |
42 | char *dtx_utf8_next_char(char *str) |
43 | { |
44 | return str + dtx_utf8_nbytes(str); |
45 | } |
46 | |
47 | int dtx_utf8_char_code(const char *str) |
48 | { |
49 | int i, nbytes, shift, code = 0; |
50 | int mask; |
51 | |
52 | if(!U8_IS_FIRST(*str)) { |
53 | return -1; |
54 | } |
55 | |
56 | nbytes = dtx_utf8_nbytes(str); |
57 | mask = first_mask[nbytes]; |
58 | shift = 0; |
59 | |
60 | for(i=0; i<nbytes; i++) { |
61 | if(!*str) { |
62 | break; |
63 | } |
64 | |
65 | code = (code << shift) | (*str++ & mask); |
66 | mask = 0x3f; |
67 | shift = 6; |
68 | } |
69 | return code; |
70 | } |
71 | |
72 | int dtx_utf8_nbytes(const char *str) |
73 | { |
74 | int i, numset = 0; |
75 | int c = *str; |
76 | |
77 | if(!U8_IS_FIRST(c)) { |
78 | for(i=0; !U8_IS_FIRST(str[i]); i++); |
79 | return i; |
80 | } |
81 | |
82 | /* count the leading 1s */ |
83 | for(i=0; i<4; i++) { |
84 | if(((c >> (7 - i)) & 1) == 0) { |
85 | break; |
86 | } |
87 | numset++; |
88 | } |
89 | |
90 | if(!numset) { |
91 | return 1; |
92 | } |
93 | return numset; |
94 | } |
95 | |
96 | int dtx_utf8_char_count(const char *str) |
97 | { |
98 | int n = 0; |
99 | |
100 | while(*str) { |
101 | n++; |
102 | str = dtx_utf8_next_char((char*)str); |
103 | } |
104 | return n; |
105 | } |
106 | |
107 | size_t dtx_utf8_from_char_code(int code, char *buf) |
108 | { |
109 | size_t nbytes = 0; |
110 | int i; |
111 | |
112 | for(i=0; i<4; i++) { |
113 | if(code <= utf8_lastcode[i]) { |
114 | nbytes = i + 1; |
115 | break; |
116 | } |
117 | } |
118 | |
119 | if(!nbytes && buf) { |
120 | for(i=0; i<(int)nbytes; i++) { |
121 | int idx = nbytes - i - 1; |
122 | int mask, shift, prefix; |
123 | |
124 | if(idx > 0) { |
125 | mask = CONT_MASK; |
126 | shift = CONT_SHIFT; |
127 | prefix = CONT_PREFIX; |
128 | } else { |
129 | mask = first_mask[nbytes]; |
130 | shift = first_shift[nbytes]; |
131 | prefix = prefix(nbytes); |
132 | } |
133 | |
134 | buf[idx] = (code & mask) | (prefix & ~mask); |
135 | code >>= shift; |
136 | } |
137 | } |
138 | return nbytes; |
139 | } |
140 | |
141 | size_t dtx_utf8_from_string(const wchar_t *str, char *buf) |
142 | { |
143 | size_t nbytes = 0; |
144 | char *ptr = buf; |
145 | |
146 | while(*str) { |
147 | int cbytes = dtx_utf8_from_char_code(*str++, ptr); |
148 | if(ptr) { |
149 | ptr += cbytes; |
150 | } |
151 | nbytes += cbytes; |
152 | } |
153 | return nbytes; |
154 | } |
155 | |