1/*
2 * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10#include <stdio.h>
11#include "internal/cryptlib.h"
12#include <openssl/asn1.h>
13
14/* UTF8 utilities */
15
16/*-
17 * This parses a UTF8 string one character at a time. It is passed a pointer
18 * to the string and the length of the string. It sets 'value' to the value of
19 * the current character. It returns the number of characters read or a
20 * negative error code:
21 * -1 = string too short
22 * -2 = illegal character
23 * -3 = subsequent characters not of the form 10xxxxxx
24 * -4 = character encoded incorrectly (not minimal length).
25 */
26
27int UTF8_getc(const unsigned char *str, int len, unsigned long *val)
28{
29 const unsigned char *p;
30 unsigned long value;
31 int ret;
32 if (len <= 0)
33 return 0;
34 p = str;
35
36 /* Check syntax and work out the encoded value (if correct) */
37 if ((*p & 0x80) == 0) {
38 value = *p++ & 0x7f;
39 ret = 1;
40 } else if ((*p & 0xe0) == 0xc0) {
41 if (len < 2)
42 return -1;
43 if ((p[1] & 0xc0) != 0x80)
44 return -3;
45 value = (*p++ & 0x1f) << 6;
46 value |= *p++ & 0x3f;
47 if (value < 0x80)
48 return -4;
49 ret = 2;
50 } else if ((*p & 0xf0) == 0xe0) {
51 if (len < 3)
52 return -1;
53 if (((p[1] & 0xc0) != 0x80)
54 || ((p[2] & 0xc0) != 0x80))
55 return -3;
56 value = (*p++ & 0xf) << 12;
57 value |= (*p++ & 0x3f) << 6;
58 value |= *p++ & 0x3f;
59 if (value < 0x800)
60 return -4;
61 ret = 3;
62 } else if ((*p & 0xf8) == 0xf0) {
63 if (len < 4)
64 return -1;
65 if (((p[1] & 0xc0) != 0x80)
66 || ((p[2] & 0xc0) != 0x80)
67 || ((p[3] & 0xc0) != 0x80))
68 return -3;
69 value = ((unsigned long)(*p++ & 0x7)) << 18;
70 value |= (*p++ & 0x3f) << 12;
71 value |= (*p++ & 0x3f) << 6;
72 value |= *p++ & 0x3f;
73 if (value < 0x10000)
74 return -4;
75 ret = 4;
76 } else if ((*p & 0xfc) == 0xf8) {
77 if (len < 5)
78 return -1;
79 if (((p[1] & 0xc0) != 0x80)
80 || ((p[2] & 0xc0) != 0x80)
81 || ((p[3] & 0xc0) != 0x80)
82 || ((p[4] & 0xc0) != 0x80))
83 return -3;
84 value = ((unsigned long)(*p++ & 0x3)) << 24;
85 value |= ((unsigned long)(*p++ & 0x3f)) << 18;
86 value |= ((unsigned long)(*p++ & 0x3f)) << 12;
87 value |= (*p++ & 0x3f) << 6;
88 value |= *p++ & 0x3f;
89 if (value < 0x200000)
90 return -4;
91 ret = 5;
92 } else if ((*p & 0xfe) == 0xfc) {
93 if (len < 6)
94 return -1;
95 if (((p[1] & 0xc0) != 0x80)
96 || ((p[2] & 0xc0) != 0x80)
97 || ((p[3] & 0xc0) != 0x80)
98 || ((p[4] & 0xc0) != 0x80)
99 || ((p[5] & 0xc0) != 0x80))
100 return -3;
101 value = ((unsigned long)(*p++ & 0x1)) << 30;
102 value |= ((unsigned long)(*p++ & 0x3f)) << 24;
103 value |= ((unsigned long)(*p++ & 0x3f)) << 18;
104 value |= ((unsigned long)(*p++ & 0x3f)) << 12;
105 value |= (*p++ & 0x3f) << 6;
106 value |= *p++ & 0x3f;
107 if (value < 0x4000000)
108 return -4;
109 ret = 6;
110 } else
111 return -2;
112 *val = value;
113 return ret;
114}
115
116/*
117 * This takes a character 'value' and writes the UTF8 encoded value in 'str'
118 * where 'str' is a buffer containing 'len' characters. Returns the number of
119 * characters written or -1 if 'len' is too small. 'str' can be set to NULL
120 * in which case it just returns the number of characters. It will need at
121 * most 6 characters.
122 */
123
124int UTF8_putc(unsigned char *str, int len, unsigned long value)
125{
126 if (!str)
127 len = 6; /* Maximum we will need */
128 else if (len <= 0)
129 return -1;
130 if (value < 0x80) {
131 if (str)
132 *str = (unsigned char)value;
133 return 1;
134 }
135 if (value < 0x800) {
136 if (len < 2)
137 return -1;
138 if (str) {
139 *str++ = (unsigned char)(((value >> 6) & 0x1f) | 0xc0);
140 *str = (unsigned char)((value & 0x3f) | 0x80);
141 }
142 return 2;
143 }
144 if (value < 0x10000) {
145 if (len < 3)
146 return -1;
147 if (str) {
148 *str++ = (unsigned char)(((value >> 12) & 0xf) | 0xe0);
149 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
150 *str = (unsigned char)((value & 0x3f) | 0x80);
151 }
152 return 3;
153 }
154 if (value < 0x200000) {
155 if (len < 4)
156 return -1;
157 if (str) {
158 *str++ = (unsigned char)(((value >> 18) & 0x7) | 0xf0);
159 *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
160 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
161 *str = (unsigned char)((value & 0x3f) | 0x80);
162 }
163 return 4;
164 }
165 if (value < 0x4000000) {
166 if (len < 5)
167 return -1;
168 if (str) {
169 *str++ = (unsigned char)(((value >> 24) & 0x3) | 0xf8);
170 *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80);
171 *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
172 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
173 *str = (unsigned char)((value & 0x3f) | 0x80);
174 }
175 return 5;
176 }
177 if (len < 6)
178 return -1;
179 if (str) {
180 *str++ = (unsigned char)(((value >> 30) & 0x1) | 0xfc);
181 *str++ = (unsigned char)(((value >> 24) & 0x3f) | 0x80);
182 *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80);
183 *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
184 *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
185 *str = (unsigned char)((value & 0x3f) | 0x80);
186 }
187 return 6;
188}
189