1/*-------------------------------------------------------------------------
2 * saslprep.c
3 * SASLprep normalization, for SCRAM authentication
4 *
5 * The SASLprep algorithm is used to process a user-supplied password into
6 * canonical form. For more details, see:
7 *
8 * [RFC3454] Preparation of Internationalized Strings ("stringprep"),
9 * http://www.ietf.org/rfc/rfc3454.txt
10 *
11 * [RFC4013] SASLprep: Stringprep Profile for User Names and Passwords
12 * http://www.ietf.org/rfc/rfc4013.txt
13 *
14 *
15 * Portions Copyright (c) 2017-2019, PostgreSQL Global Development Group
16 *
17 * IDENTIFICATION
18 * src/common/saslprep.c
19 *
20 *-------------------------------------------------------------------------
21 */
22#ifndef FRONTEND
23#include "postgres.h"
24#else
25#include "postgres_fe.h"
26#endif
27
28#include "common/saslprep.h"
29#include "common/unicode_norm.h"
30
31/*
32 * Note: The functions in this file depend on functions from
33 * src/backend/utils/mb/wchar.c, so in order to use this in frontend
34 * code, you will need to link that in, too.
35 */
36#include "mb/pg_wchar.h"
37
38/*
39 * Limit on how large password's we will try to process. A password
40 * larger than this will be treated the same as out-of-memory.
41 */
42#define MAX_PASSWORD_LENGTH 1024
43
44/*
45 * In backend, we will use palloc/pfree. In frontend, use malloc, and
46 * return SASLPREP_OOM on out-of-memory.
47 */
48#ifndef FRONTEND
49#define STRDUP(s) pstrdup(s)
50#define ALLOC(size) palloc(size)
51#define FREE(size) pfree(size)
52#else
53#define STRDUP(s) strdup(s)
54#define ALLOC(size) malloc(size)
55#define FREE(size) free(size)
56#endif
57
58/* Prototypes for local functions */
59static int codepoint_range_cmp(const void *a, const void *b);
60static bool is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize);
61static int pg_utf8_string_len(const char *source);
62static bool pg_is_ascii_string(const char *p);
63
64/*
65 * Stringprep Mapping Tables.
66 *
67 * The stringprep specification includes a number of tables of Unicode
68 * codepoints, used in different parts of the algorithm. They are below,
69 * as arrays of codepoint ranges. Each range is a pair of codepoints,
70 * for the first and last codepoint included the range (inclusive!).
71 */
72
73/*
74 * C.1.2 Non-ASCII space characters
75 *
76 * These are all mapped to the ASCII space character (U+00A0).
77 */
78static const pg_wchar non_ascii_space_ranges[] =
79{
80 0x00A0, 0x00A0,
81 0x1680, 0x1680,
82 0x2000, 0x200B,
83 0x202F, 0x202F,
84 0x205F, 0x205F,
85 0x3000, 0x3000
86};
87
88/*
89 * B.1 Commonly mapped to nothing
90 *
91 * If any of these appear in the input, they are removed.
92 */
93static const pg_wchar commonly_mapped_to_nothing_ranges[] =
94{
95 0x00AD, 0x00AD,
96 0x034F, 0x034F,
97 0x1806, 0x1806,
98 0x180B, 0x180D,
99 0x200B, 0x200D,
100 0x2060, 0x2060,
101 0xFE00, 0xFE0F,
102 0xFEFF, 0xFEFF
103};
104
105/*
106 * prohibited_output_ranges is a union of all the characters from
107 * the following tables:
108 *
109 * C.1.2 Non-ASCII space characters
110 * C.2.1 ASCII control characters
111 * C.2.2 Non-ASCII control characters
112 * C.3 Private Use characters
113 * C.4 Non-character code points
114 * C.5 Surrogate code points
115 * C.6 Inappropriate for plain text characters
116 * C.7 Inappropriate for canonical representation characters
117 * C.7 Change display properties or deprecated characters
118 * C.8 Tagging characters
119 *
120 * These are the tables that are listed as "prohibited output"
121 * characters in the SASLprep profile.
122 *
123 * The comment after each code range indicates which source table
124 * the code came from. Note that there is some overlap in the source
125 * tables, so one code might originate from multiple source tables.
126 * Adjacent ranges have also been merged together, to save space.
127 */
128static const pg_wchar prohibited_output_ranges[] =
129{
130 0x0000, 0x001F, /* C.2.1 */
131 0x007F, 0x00A0, /* C.1.2, C.2.1, C.2.2 */
132 0x0340, 0x0341, /* C.8 */
133 0x06DD, 0x06DD, /* C.2.2 */
134 0x070F, 0x070F, /* C.2.2 */
135 0x1680, 0x1680, /* C.1.2 */
136 0x180E, 0x180E, /* C.2.2 */
137 0x2000, 0x200F, /* C.1.2, C.2.2, C.8 */
138 0x2028, 0x202F, /* C.1.2, C.2.2, C.8 */
139 0x205F, 0x2063, /* C.1.2, C.2.2 */
140 0x206A, 0x206F, /* C.2.2, C.8 */
141 0x2FF0, 0x2FFB, /* C.7 */
142 0x3000, 0x3000, /* C.1.2 */
143 0xD800, 0xF8FF, /* C.3, C.5 */
144 0xFDD0, 0xFDEF, /* C.4 */
145 0xFEFF, 0xFEFF, /* C.2.2 */
146 0xFFF9, 0xFFFF, /* C.2.2, C.4, C.6 */
147 0x1D173, 0x1D17A, /* C.2.2 */
148 0x1FFFE, 0x1FFFF, /* C.4 */
149 0x2FFFE, 0x2FFFF, /* C.4 */
150 0x3FFFE, 0x3FFFF, /* C.4 */
151 0x4FFFE, 0x4FFFF, /* C.4 */
152 0x5FFFE, 0x5FFFF, /* C.4 */
153 0x6FFFE, 0x6FFFF, /* C.4 */
154 0x7FFFE, 0x7FFFF, /* C.4 */
155 0x8FFFE, 0x8FFFF, /* C.4 */
156 0x9FFFE, 0x9FFFF, /* C.4 */
157 0xAFFFE, 0xAFFFF, /* C.4 */
158 0xBFFFE, 0xBFFFF, /* C.4 */
159 0xCFFFE, 0xCFFFF, /* C.4 */
160 0xDFFFE, 0xDFFFF, /* C.4 */
161 0xE0001, 0xE0001, /* C.9 */
162 0xE0020, 0xE007F, /* C.9 */
163 0xEFFFE, 0xEFFFF, /* C.4 */
164 0xF0000, 0xFFFFF, /* C.3, C.4 */
165 0x100000, 0x10FFFF /* C.3, C.4 */
166};
167
168/* A.1 Unassigned code points in Unicode 3.2 */
169static const pg_wchar unassigned_codepoint_ranges[] =
170{
171 0x0221, 0x0221,
172 0x0234, 0x024F,
173 0x02AE, 0x02AF,
174 0x02EF, 0x02FF,
175 0x0350, 0x035F,
176 0x0370, 0x0373,
177 0x0376, 0x0379,
178 0x037B, 0x037D,
179 0x037F, 0x0383,
180 0x038B, 0x038B,
181 0x038D, 0x038D,
182 0x03A2, 0x03A2,
183 0x03CF, 0x03CF,
184 0x03F7, 0x03FF,
185 0x0487, 0x0487,
186 0x04CF, 0x04CF,
187 0x04F6, 0x04F7,
188 0x04FA, 0x04FF,
189 0x0510, 0x0530,
190 0x0557, 0x0558,
191 0x0560, 0x0560,
192 0x0588, 0x0588,
193 0x058B, 0x0590,
194 0x05A2, 0x05A2,
195 0x05BA, 0x05BA,
196 0x05C5, 0x05CF,
197 0x05EB, 0x05EF,
198 0x05F5, 0x060B,
199 0x060D, 0x061A,
200 0x061C, 0x061E,
201 0x0620, 0x0620,
202 0x063B, 0x063F,
203 0x0656, 0x065F,
204 0x06EE, 0x06EF,
205 0x06FF, 0x06FF,
206 0x070E, 0x070E,
207 0x072D, 0x072F,
208 0x074B, 0x077F,
209 0x07B2, 0x0900,
210 0x0904, 0x0904,
211 0x093A, 0x093B,
212 0x094E, 0x094F,
213 0x0955, 0x0957,
214 0x0971, 0x0980,
215 0x0984, 0x0984,
216 0x098D, 0x098E,
217 0x0991, 0x0992,
218 0x09A9, 0x09A9,
219 0x09B1, 0x09B1,
220 0x09B3, 0x09B5,
221 0x09BA, 0x09BB,
222 0x09BD, 0x09BD,
223 0x09C5, 0x09C6,
224 0x09C9, 0x09CA,
225 0x09CE, 0x09D6,
226 0x09D8, 0x09DB,
227 0x09DE, 0x09DE,
228 0x09E4, 0x09E5,
229 0x09FB, 0x0A01,
230 0x0A03, 0x0A04,
231 0x0A0B, 0x0A0E,
232 0x0A11, 0x0A12,
233 0x0A29, 0x0A29,
234 0x0A31, 0x0A31,
235 0x0A34, 0x0A34,
236 0x0A37, 0x0A37,
237 0x0A3A, 0x0A3B,
238 0x0A3D, 0x0A3D,
239 0x0A43, 0x0A46,
240 0x0A49, 0x0A4A,
241 0x0A4E, 0x0A58,
242 0x0A5D, 0x0A5D,
243 0x0A5F, 0x0A65,
244 0x0A75, 0x0A80,
245 0x0A84, 0x0A84,
246 0x0A8C, 0x0A8C,
247 0x0A8E, 0x0A8E,
248 0x0A92, 0x0A92,
249 0x0AA9, 0x0AA9,
250 0x0AB1, 0x0AB1,
251 0x0AB4, 0x0AB4,
252 0x0ABA, 0x0ABB,
253 0x0AC6, 0x0AC6,
254 0x0ACA, 0x0ACA,
255 0x0ACE, 0x0ACF,
256 0x0AD1, 0x0ADF,
257 0x0AE1, 0x0AE5,
258 0x0AF0, 0x0B00,
259 0x0B04, 0x0B04,
260 0x0B0D, 0x0B0E,
261 0x0B11, 0x0B12,
262 0x0B29, 0x0B29,
263 0x0B31, 0x0B31,
264 0x0B34, 0x0B35,
265 0x0B3A, 0x0B3B,
266 0x0B44, 0x0B46,
267 0x0B49, 0x0B4A,
268 0x0B4E, 0x0B55,
269 0x0B58, 0x0B5B,
270 0x0B5E, 0x0B5E,
271 0x0B62, 0x0B65,
272 0x0B71, 0x0B81,
273 0x0B84, 0x0B84,
274 0x0B8B, 0x0B8D,
275 0x0B91, 0x0B91,
276 0x0B96, 0x0B98,
277 0x0B9B, 0x0B9B,
278 0x0B9D, 0x0B9D,
279 0x0BA0, 0x0BA2,
280 0x0BA5, 0x0BA7,
281 0x0BAB, 0x0BAD,
282 0x0BB6, 0x0BB6,
283 0x0BBA, 0x0BBD,
284 0x0BC3, 0x0BC5,
285 0x0BC9, 0x0BC9,
286 0x0BCE, 0x0BD6,
287 0x0BD8, 0x0BE6,
288 0x0BF3, 0x0C00,
289 0x0C04, 0x0C04,
290 0x0C0D, 0x0C0D,
291 0x0C11, 0x0C11,
292 0x0C29, 0x0C29,
293 0x0C34, 0x0C34,
294 0x0C3A, 0x0C3D,
295 0x0C45, 0x0C45,
296 0x0C49, 0x0C49,
297 0x0C4E, 0x0C54,
298 0x0C57, 0x0C5F,
299 0x0C62, 0x0C65,
300 0x0C70, 0x0C81,
301 0x0C84, 0x0C84,
302 0x0C8D, 0x0C8D,
303 0x0C91, 0x0C91,
304 0x0CA9, 0x0CA9,
305 0x0CB4, 0x0CB4,
306 0x0CBA, 0x0CBD,
307 0x0CC5, 0x0CC5,
308 0x0CC9, 0x0CC9,
309 0x0CCE, 0x0CD4,
310 0x0CD7, 0x0CDD,
311 0x0CDF, 0x0CDF,
312 0x0CE2, 0x0CE5,
313 0x0CF0, 0x0D01,
314 0x0D04, 0x0D04,
315 0x0D0D, 0x0D0D,
316 0x0D11, 0x0D11,
317 0x0D29, 0x0D29,
318 0x0D3A, 0x0D3D,
319 0x0D44, 0x0D45,
320 0x0D49, 0x0D49,
321 0x0D4E, 0x0D56,
322 0x0D58, 0x0D5F,
323 0x0D62, 0x0D65,
324 0x0D70, 0x0D81,
325 0x0D84, 0x0D84,
326 0x0D97, 0x0D99,
327 0x0DB2, 0x0DB2,
328 0x0DBC, 0x0DBC,
329 0x0DBE, 0x0DBF,
330 0x0DC7, 0x0DC9,
331 0x0DCB, 0x0DCE,
332 0x0DD5, 0x0DD5,
333 0x0DD7, 0x0DD7,
334 0x0DE0, 0x0DF1,
335 0x0DF5, 0x0E00,
336 0x0E3B, 0x0E3E,
337 0x0E5C, 0x0E80,
338 0x0E83, 0x0E83,
339 0x0E85, 0x0E86,
340 0x0E89, 0x0E89,
341 0x0E8B, 0x0E8C,
342 0x0E8E, 0x0E93,
343 0x0E98, 0x0E98,
344 0x0EA0, 0x0EA0,
345 0x0EA4, 0x0EA4,
346 0x0EA6, 0x0EA6,
347 0x0EA8, 0x0EA9,
348 0x0EAC, 0x0EAC,
349 0x0EBA, 0x0EBA,
350 0x0EBE, 0x0EBF,
351 0x0EC5, 0x0EC5,
352 0x0EC7, 0x0EC7,
353 0x0ECE, 0x0ECF,
354 0x0EDA, 0x0EDB,
355 0x0EDE, 0x0EFF,
356 0x0F48, 0x0F48,
357 0x0F6B, 0x0F70,
358 0x0F8C, 0x0F8F,
359 0x0F98, 0x0F98,
360 0x0FBD, 0x0FBD,
361 0x0FCD, 0x0FCE,
362 0x0FD0, 0x0FFF,
363 0x1022, 0x1022,
364 0x1028, 0x1028,
365 0x102B, 0x102B,
366 0x1033, 0x1035,
367 0x103A, 0x103F,
368 0x105A, 0x109F,
369 0x10C6, 0x10CF,
370 0x10F9, 0x10FA,
371 0x10FC, 0x10FF,
372 0x115A, 0x115E,
373 0x11A3, 0x11A7,
374 0x11FA, 0x11FF,
375 0x1207, 0x1207,
376 0x1247, 0x1247,
377 0x1249, 0x1249,
378 0x124E, 0x124F,
379 0x1257, 0x1257,
380 0x1259, 0x1259,
381 0x125E, 0x125F,
382 0x1287, 0x1287,
383 0x1289, 0x1289,
384 0x128E, 0x128F,
385 0x12AF, 0x12AF,
386 0x12B1, 0x12B1,
387 0x12B6, 0x12B7,
388 0x12BF, 0x12BF,
389 0x12C1, 0x12C1,
390 0x12C6, 0x12C7,
391 0x12CF, 0x12CF,
392 0x12D7, 0x12D7,
393 0x12EF, 0x12EF,
394 0x130F, 0x130F,
395 0x1311, 0x1311,
396 0x1316, 0x1317,
397 0x131F, 0x131F,
398 0x1347, 0x1347,
399 0x135B, 0x1360,
400 0x137D, 0x139F,
401 0x13F5, 0x1400,
402 0x1677, 0x167F,
403 0x169D, 0x169F,
404 0x16F1, 0x16FF,
405 0x170D, 0x170D,
406 0x1715, 0x171F,
407 0x1737, 0x173F,
408 0x1754, 0x175F,
409 0x176D, 0x176D,
410 0x1771, 0x1771,
411 0x1774, 0x177F,
412 0x17DD, 0x17DF,
413 0x17EA, 0x17FF,
414 0x180F, 0x180F,
415 0x181A, 0x181F,
416 0x1878, 0x187F,
417 0x18AA, 0x1DFF,
418 0x1E9C, 0x1E9F,
419 0x1EFA, 0x1EFF,
420 0x1F16, 0x1F17,
421 0x1F1E, 0x1F1F,
422 0x1F46, 0x1F47,
423 0x1F4E, 0x1F4F,
424 0x1F58, 0x1F58,
425 0x1F5A, 0x1F5A,
426 0x1F5C, 0x1F5C,
427 0x1F5E, 0x1F5E,
428 0x1F7E, 0x1F7F,
429 0x1FB5, 0x1FB5,
430 0x1FC5, 0x1FC5,
431 0x1FD4, 0x1FD5,
432 0x1FDC, 0x1FDC,
433 0x1FF0, 0x1FF1,
434 0x1FF5, 0x1FF5,
435 0x1FFF, 0x1FFF,
436 0x2053, 0x2056,
437 0x2058, 0x205E,
438 0x2064, 0x2069,
439 0x2072, 0x2073,
440 0x208F, 0x209F,
441 0x20B2, 0x20CF,
442 0x20EB, 0x20FF,
443 0x213B, 0x213C,
444 0x214C, 0x2152,
445 0x2184, 0x218F,
446 0x23CF, 0x23FF,
447 0x2427, 0x243F,
448 0x244B, 0x245F,
449 0x24FF, 0x24FF,
450 0x2614, 0x2615,
451 0x2618, 0x2618,
452 0x267E, 0x267F,
453 0x268A, 0x2700,
454 0x2705, 0x2705,
455 0x270A, 0x270B,
456 0x2728, 0x2728,
457 0x274C, 0x274C,
458 0x274E, 0x274E,
459 0x2753, 0x2755,
460 0x2757, 0x2757,
461 0x275F, 0x2760,
462 0x2795, 0x2797,
463 0x27B0, 0x27B0,
464 0x27BF, 0x27CF,
465 0x27EC, 0x27EF,
466 0x2B00, 0x2E7F,
467 0x2E9A, 0x2E9A,
468 0x2EF4, 0x2EFF,
469 0x2FD6, 0x2FEF,
470 0x2FFC, 0x2FFF,
471 0x3040, 0x3040,
472 0x3097, 0x3098,
473 0x3100, 0x3104,
474 0x312D, 0x3130,
475 0x318F, 0x318F,
476 0x31B8, 0x31EF,
477 0x321D, 0x321F,
478 0x3244, 0x3250,
479 0x327C, 0x327E,
480 0x32CC, 0x32CF,
481 0x32FF, 0x32FF,
482 0x3377, 0x337A,
483 0x33DE, 0x33DF,
484 0x33FF, 0x33FF,
485 0x4DB6, 0x4DFF,
486 0x9FA6, 0x9FFF,
487 0xA48D, 0xA48F,
488 0xA4C7, 0xABFF,
489 0xD7A4, 0xD7FF,
490 0xFA2E, 0xFA2F,
491 0xFA6B, 0xFAFF,
492 0xFB07, 0xFB12,
493 0xFB18, 0xFB1C,
494 0xFB37, 0xFB37,
495 0xFB3D, 0xFB3D,
496 0xFB3F, 0xFB3F,
497 0xFB42, 0xFB42,
498 0xFB45, 0xFB45,
499 0xFBB2, 0xFBD2,
500 0xFD40, 0xFD4F,
501 0xFD90, 0xFD91,
502 0xFDC8, 0xFDCF,
503 0xFDFD, 0xFDFF,
504 0xFE10, 0xFE1F,
505 0xFE24, 0xFE2F,
506 0xFE47, 0xFE48,
507 0xFE53, 0xFE53,
508 0xFE67, 0xFE67,
509 0xFE6C, 0xFE6F,
510 0xFE75, 0xFE75,
511 0xFEFD, 0xFEFE,
512 0xFF00, 0xFF00,
513 0xFFBF, 0xFFC1,
514 0xFFC8, 0xFFC9,
515 0xFFD0, 0xFFD1,
516 0xFFD8, 0xFFD9,
517 0xFFDD, 0xFFDF,
518 0xFFE7, 0xFFE7,
519 0xFFEF, 0xFFF8,
520 0x10000, 0x102FF,
521 0x1031F, 0x1031F,
522 0x10324, 0x1032F,
523 0x1034B, 0x103FF,
524 0x10426, 0x10427,
525 0x1044E, 0x1CFFF,
526 0x1D0F6, 0x1D0FF,
527 0x1D127, 0x1D129,
528 0x1D1DE, 0x1D3FF,
529 0x1D455, 0x1D455,
530 0x1D49D, 0x1D49D,
531 0x1D4A0, 0x1D4A1,
532 0x1D4A3, 0x1D4A4,
533 0x1D4A7, 0x1D4A8,
534 0x1D4AD, 0x1D4AD,
535 0x1D4BA, 0x1D4BA,
536 0x1D4BC, 0x1D4BC,
537 0x1D4C1, 0x1D4C1,
538 0x1D4C4, 0x1D4C4,
539 0x1D506, 0x1D506,
540 0x1D50B, 0x1D50C,
541 0x1D515, 0x1D515,
542 0x1D51D, 0x1D51D,
543 0x1D53A, 0x1D53A,
544 0x1D53F, 0x1D53F,
545 0x1D545, 0x1D545,
546 0x1D547, 0x1D549,
547 0x1D551, 0x1D551,
548 0x1D6A4, 0x1D6A7,
549 0x1D7CA, 0x1D7CD,
550 0x1D800, 0x1FFFD,
551 0x2A6D7, 0x2F7FF,
552 0x2FA1E, 0x2FFFD,
553 0x30000, 0x3FFFD,
554 0x40000, 0x4FFFD,
555 0x50000, 0x5FFFD,
556 0x60000, 0x6FFFD,
557 0x70000, 0x7FFFD,
558 0x80000, 0x8FFFD,
559 0x90000, 0x9FFFD,
560 0xA0000, 0xAFFFD,
561 0xB0000, 0xBFFFD,
562 0xC0000, 0xCFFFD,
563 0xD0000, 0xDFFFD,
564 0xE0000, 0xE0000,
565 0xE0002, 0xE001F,
566 0xE0080, 0xEFFFD
567};
568
569/* D.1 Characters with bidirectional property "R" or "AL" */
570static const pg_wchar RandALCat_codepoint_ranges[] =
571{
572 0x05BE, 0x05BE,
573 0x05C0, 0x05C0,
574 0x05C3, 0x05C3,
575 0x05D0, 0x05EA,
576 0x05F0, 0x05F4,
577 0x061B, 0x061B,
578 0x061F, 0x061F,
579 0x0621, 0x063A,
580 0x0640, 0x064A,
581 0x066D, 0x066F,
582 0x0671, 0x06D5,
583 0x06DD, 0x06DD,
584 0x06E5, 0x06E6,
585 0x06FA, 0x06FE,
586 0x0700, 0x070D,
587 0x0710, 0x0710,
588 0x0712, 0x072C,
589 0x0780, 0x07A5,
590 0x07B1, 0x07B1,
591 0x200F, 0x200F,
592 0xFB1D, 0xFB1D,
593 0xFB1F, 0xFB28,
594 0xFB2A, 0xFB36,
595 0xFB38, 0xFB3C,
596 0xFB3E, 0xFB3E,
597 0xFB40, 0xFB41,
598 0xFB43, 0xFB44,
599 0xFB46, 0xFBB1,
600 0xFBD3, 0xFD3D,
601 0xFD50, 0xFD8F,
602 0xFD92, 0xFDC7,
603 0xFDF0, 0xFDFC,
604 0xFE70, 0xFE74,
605 0xFE76, 0xFEFC
606};
607
608/* D.2 Characters with bidirectional property "L" */
609static const pg_wchar LCat_codepoint_ranges[] =
610{
611 0x0041, 0x005A,
612 0x0061, 0x007A,
613 0x00AA, 0x00AA,
614 0x00B5, 0x00B5,
615 0x00BA, 0x00BA,
616 0x00C0, 0x00D6,
617 0x00D8, 0x00F6,
618 0x00F8, 0x0220,
619 0x0222, 0x0233,
620 0x0250, 0x02AD,
621 0x02B0, 0x02B8,
622 0x02BB, 0x02C1,
623 0x02D0, 0x02D1,
624 0x02E0, 0x02E4,
625 0x02EE, 0x02EE,
626 0x037A, 0x037A,
627 0x0386, 0x0386,
628 0x0388, 0x038A,
629 0x038C, 0x038C,
630 0x038E, 0x03A1,
631 0x03A3, 0x03CE,
632 0x03D0, 0x03F5,
633 0x0400, 0x0482,
634 0x048A, 0x04CE,
635 0x04D0, 0x04F5,
636 0x04F8, 0x04F9,
637 0x0500, 0x050F,
638 0x0531, 0x0556,
639 0x0559, 0x055F,
640 0x0561, 0x0587,
641 0x0589, 0x0589,
642 0x0903, 0x0903,
643 0x0905, 0x0939,
644 0x093D, 0x0940,
645 0x0949, 0x094C,
646 0x0950, 0x0950,
647 0x0958, 0x0961,
648 0x0964, 0x0970,
649 0x0982, 0x0983,
650 0x0985, 0x098C,
651 0x098F, 0x0990,
652 0x0993, 0x09A8,
653 0x09AA, 0x09B0,
654 0x09B2, 0x09B2,
655 0x09B6, 0x09B9,
656 0x09BE, 0x09C0,
657 0x09C7, 0x09C8,
658 0x09CB, 0x09CC,
659 0x09D7, 0x09D7,
660 0x09DC, 0x09DD,
661 0x09DF, 0x09E1,
662 0x09E6, 0x09F1,
663 0x09F4, 0x09FA,
664 0x0A05, 0x0A0A,
665 0x0A0F, 0x0A10,
666 0x0A13, 0x0A28,
667 0x0A2A, 0x0A30,
668 0x0A32, 0x0A33,
669 0x0A35, 0x0A36,
670 0x0A38, 0x0A39,
671 0x0A3E, 0x0A40,
672 0x0A59, 0x0A5C,
673 0x0A5E, 0x0A5E,
674 0x0A66, 0x0A6F,
675 0x0A72, 0x0A74,
676 0x0A83, 0x0A83,
677 0x0A85, 0x0A8B,
678 0x0A8D, 0x0A8D,
679 0x0A8F, 0x0A91,
680 0x0A93, 0x0AA8,
681 0x0AAA, 0x0AB0,
682 0x0AB2, 0x0AB3,
683 0x0AB5, 0x0AB9,
684 0x0ABD, 0x0AC0,
685 0x0AC9, 0x0AC9,
686 0x0ACB, 0x0ACC,
687 0x0AD0, 0x0AD0,
688 0x0AE0, 0x0AE0,
689 0x0AE6, 0x0AEF,
690 0x0B02, 0x0B03,
691 0x0B05, 0x0B0C,
692 0x0B0F, 0x0B10,
693 0x0B13, 0x0B28,
694 0x0B2A, 0x0B30,
695 0x0B32, 0x0B33,
696 0x0B36, 0x0B39,
697 0x0B3D, 0x0B3E,
698 0x0B40, 0x0B40,
699 0x0B47, 0x0B48,
700 0x0B4B, 0x0B4C,
701 0x0B57, 0x0B57,
702 0x0B5C, 0x0B5D,
703 0x0B5F, 0x0B61,
704 0x0B66, 0x0B70,
705 0x0B83, 0x0B83,
706 0x0B85, 0x0B8A,
707 0x0B8E, 0x0B90,
708 0x0B92, 0x0B95,
709 0x0B99, 0x0B9A,
710 0x0B9C, 0x0B9C,
711 0x0B9E, 0x0B9F,
712 0x0BA3, 0x0BA4,
713 0x0BA8, 0x0BAA,
714 0x0BAE, 0x0BB5,
715 0x0BB7, 0x0BB9,
716 0x0BBE, 0x0BBF,
717 0x0BC1, 0x0BC2,
718 0x0BC6, 0x0BC8,
719 0x0BCA, 0x0BCC,
720 0x0BD7, 0x0BD7,
721 0x0BE7, 0x0BF2,
722 0x0C01, 0x0C03,
723 0x0C05, 0x0C0C,
724 0x0C0E, 0x0C10,
725 0x0C12, 0x0C28,
726 0x0C2A, 0x0C33,
727 0x0C35, 0x0C39,
728 0x0C41, 0x0C44,
729 0x0C60, 0x0C61,
730 0x0C66, 0x0C6F,
731 0x0C82, 0x0C83,
732 0x0C85, 0x0C8C,
733 0x0C8E, 0x0C90,
734 0x0C92, 0x0CA8,
735 0x0CAA, 0x0CB3,
736 0x0CB5, 0x0CB9,
737 0x0CBE, 0x0CBE,
738 0x0CC0, 0x0CC4,
739 0x0CC7, 0x0CC8,
740 0x0CCA, 0x0CCB,
741 0x0CD5, 0x0CD6,
742 0x0CDE, 0x0CDE,
743 0x0CE0, 0x0CE1,
744 0x0CE6, 0x0CEF,
745 0x0D02, 0x0D03,
746 0x0D05, 0x0D0C,
747 0x0D0E, 0x0D10,
748 0x0D12, 0x0D28,
749 0x0D2A, 0x0D39,
750 0x0D3E, 0x0D40,
751 0x0D46, 0x0D48,
752 0x0D4A, 0x0D4C,
753 0x0D57, 0x0D57,
754 0x0D60, 0x0D61,
755 0x0D66, 0x0D6F,
756 0x0D82, 0x0D83,
757 0x0D85, 0x0D96,
758 0x0D9A, 0x0DB1,
759 0x0DB3, 0x0DBB,
760 0x0DBD, 0x0DBD,
761 0x0DC0, 0x0DC6,
762 0x0DCF, 0x0DD1,
763 0x0DD8, 0x0DDF,
764 0x0DF2, 0x0DF4,
765 0x0E01, 0x0E30,
766 0x0E32, 0x0E33,
767 0x0E40, 0x0E46,
768 0x0E4F, 0x0E5B,
769 0x0E81, 0x0E82,
770 0x0E84, 0x0E84,
771 0x0E87, 0x0E88,
772 0x0E8A, 0x0E8A,
773 0x0E8D, 0x0E8D,
774 0x0E94, 0x0E97,
775 0x0E99, 0x0E9F,
776 0x0EA1, 0x0EA3,
777 0x0EA5, 0x0EA5,
778 0x0EA7, 0x0EA7,
779 0x0EAA, 0x0EAB,
780 0x0EAD, 0x0EB0,
781 0x0EB2, 0x0EB3,
782 0x0EBD, 0x0EBD,
783 0x0EC0, 0x0EC4,
784 0x0EC6, 0x0EC6,
785 0x0ED0, 0x0ED9,
786 0x0EDC, 0x0EDD,
787 0x0F00, 0x0F17,
788 0x0F1A, 0x0F34,
789 0x0F36, 0x0F36,
790 0x0F38, 0x0F38,
791 0x0F3E, 0x0F47,
792 0x0F49, 0x0F6A,
793 0x0F7F, 0x0F7F,
794 0x0F85, 0x0F85,
795 0x0F88, 0x0F8B,
796 0x0FBE, 0x0FC5,
797 0x0FC7, 0x0FCC,
798 0x0FCF, 0x0FCF,
799 0x1000, 0x1021,
800 0x1023, 0x1027,
801 0x1029, 0x102A,
802 0x102C, 0x102C,
803 0x1031, 0x1031,
804 0x1038, 0x1038,
805 0x1040, 0x1057,
806 0x10A0, 0x10C5,
807 0x10D0, 0x10F8,
808 0x10FB, 0x10FB,
809 0x1100, 0x1159,
810 0x115F, 0x11A2,
811 0x11A8, 0x11F9,
812 0x1200, 0x1206,
813 0x1208, 0x1246,
814 0x1248, 0x1248,
815 0x124A, 0x124D,
816 0x1250, 0x1256,
817 0x1258, 0x1258,
818 0x125A, 0x125D,
819 0x1260, 0x1286,
820 0x1288, 0x1288,
821 0x128A, 0x128D,
822 0x1290, 0x12AE,
823 0x12B0, 0x12B0,
824 0x12B2, 0x12B5,
825 0x12B8, 0x12BE,
826 0x12C0, 0x12C0,
827 0x12C2, 0x12C5,
828 0x12C8, 0x12CE,
829 0x12D0, 0x12D6,
830 0x12D8, 0x12EE,
831 0x12F0, 0x130E,
832 0x1310, 0x1310,
833 0x1312, 0x1315,
834 0x1318, 0x131E,
835 0x1320, 0x1346,
836 0x1348, 0x135A,
837 0x1361, 0x137C,
838 0x13A0, 0x13F4,
839 0x1401, 0x1676,
840 0x1681, 0x169A,
841 0x16A0, 0x16F0,
842 0x1700, 0x170C,
843 0x170E, 0x1711,
844 0x1720, 0x1731,
845 0x1735, 0x1736,
846 0x1740, 0x1751,
847 0x1760, 0x176C,
848 0x176E, 0x1770,
849 0x1780, 0x17B6,
850 0x17BE, 0x17C5,
851 0x17C7, 0x17C8,
852 0x17D4, 0x17DA,
853 0x17DC, 0x17DC,
854 0x17E0, 0x17E9,
855 0x1810, 0x1819,
856 0x1820, 0x1877,
857 0x1880, 0x18A8,
858 0x1E00, 0x1E9B,
859 0x1EA0, 0x1EF9,
860 0x1F00, 0x1F15,
861 0x1F18, 0x1F1D,
862 0x1F20, 0x1F45,
863 0x1F48, 0x1F4D,
864 0x1F50, 0x1F57,
865 0x1F59, 0x1F59,
866 0x1F5B, 0x1F5B,
867 0x1F5D, 0x1F5D,
868 0x1F5F, 0x1F7D,
869 0x1F80, 0x1FB4,
870 0x1FB6, 0x1FBC,
871 0x1FBE, 0x1FBE,
872 0x1FC2, 0x1FC4,
873 0x1FC6, 0x1FCC,
874 0x1FD0, 0x1FD3,
875 0x1FD6, 0x1FDB,
876 0x1FE0, 0x1FEC,
877 0x1FF2, 0x1FF4,
878 0x1FF6, 0x1FFC,
879 0x200E, 0x200E,
880 0x2071, 0x2071,
881 0x207F, 0x207F,
882 0x2102, 0x2102,
883 0x2107, 0x2107,
884 0x210A, 0x2113,
885 0x2115, 0x2115,
886 0x2119, 0x211D,
887 0x2124, 0x2124,
888 0x2126, 0x2126,
889 0x2128, 0x2128,
890 0x212A, 0x212D,
891 0x212F, 0x2131,
892 0x2133, 0x2139,
893 0x213D, 0x213F,
894 0x2145, 0x2149,
895 0x2160, 0x2183,
896 0x2336, 0x237A,
897 0x2395, 0x2395,
898 0x249C, 0x24E9,
899 0x3005, 0x3007,
900 0x3021, 0x3029,
901 0x3031, 0x3035,
902 0x3038, 0x303C,
903 0x3041, 0x3096,
904 0x309D, 0x309F,
905 0x30A1, 0x30FA,
906 0x30FC, 0x30FF,
907 0x3105, 0x312C,
908 0x3131, 0x318E,
909 0x3190, 0x31B7,
910 0x31F0, 0x321C,
911 0x3220, 0x3243,
912 0x3260, 0x327B,
913 0x327F, 0x32B0,
914 0x32C0, 0x32CB,
915 0x32D0, 0x32FE,
916 0x3300, 0x3376,
917 0x337B, 0x33DD,
918 0x33E0, 0x33FE,
919 0x3400, 0x4DB5,
920 0x4E00, 0x9FA5,
921 0xA000, 0xA48C,
922 0xAC00, 0xD7A3,
923 0xD800, 0xFA2D,
924 0xFA30, 0xFA6A,
925 0xFB00, 0xFB06,
926 0xFB13, 0xFB17,
927 0xFF21, 0xFF3A,
928 0xFF41, 0xFF5A,
929 0xFF66, 0xFFBE,
930 0xFFC2, 0xFFC7,
931 0xFFCA, 0xFFCF,
932 0xFFD2, 0xFFD7,
933 0xFFDA, 0xFFDC,
934 0x10300, 0x1031E,
935 0x10320, 0x10323,
936 0x10330, 0x1034A,
937 0x10400, 0x10425,
938 0x10428, 0x1044D,
939 0x1D000, 0x1D0F5,
940 0x1D100, 0x1D126,
941 0x1D12A, 0x1D166,
942 0x1D16A, 0x1D172,
943 0x1D183, 0x1D184,
944 0x1D18C, 0x1D1A9,
945 0x1D1AE, 0x1D1DD,
946 0x1D400, 0x1D454,
947 0x1D456, 0x1D49C,
948 0x1D49E, 0x1D49F,
949 0x1D4A2, 0x1D4A2,
950 0x1D4A5, 0x1D4A6,
951 0x1D4A9, 0x1D4AC,
952 0x1D4AE, 0x1D4B9,
953 0x1D4BB, 0x1D4BB,
954 0x1D4BD, 0x1D4C0,
955 0x1D4C2, 0x1D4C3,
956 0x1D4C5, 0x1D505,
957 0x1D507, 0x1D50A,
958 0x1D50D, 0x1D514,
959 0x1D516, 0x1D51C,
960 0x1D51E, 0x1D539,
961 0x1D53B, 0x1D53E,
962 0x1D540, 0x1D544,
963 0x1D546, 0x1D546,
964 0x1D54A, 0x1D550,
965 0x1D552, 0x1D6A3,
966 0x1D6A8, 0x1D7C9,
967 0x20000, 0x2A6D6,
968 0x2F800, 0x2FA1D,
969 0xF0000, 0xFFFFD,
970 0x100000, 0x10FFFD
971};
972
973/* End of stringprep tables */
974
975
976/* Is the given Unicode codepoint in the given table of ranges? */
977#define IS_CODE_IN_TABLE(code, map) is_code_in_table(code, map, lengthof(map))
978
979static int
980codepoint_range_cmp(const void *a, const void *b)
981{
982 const pg_wchar *key = (const pg_wchar *) a;
983 const pg_wchar *range = (const pg_wchar *) b;
984
985 if (*key < range[0])
986 return -1; /* less than lower bound */
987 if (*key > range[1])
988 return 1; /* greater than upper bound */
989
990 return 0; /* within range */
991}
992
993static bool
994is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize)
995{
996 Assert(mapsize % 2 == 0);
997
998 if (code < map[0] || code > map[mapsize - 1])
999 return false;
1000
1001 if (bsearch(&code, map, mapsize / 2, sizeof(pg_wchar) * 2,
1002 codepoint_range_cmp))
1003 return true;
1004 else
1005 return false;
1006}
1007
1008/*
1009 * Calculate the length in characters of a null-terminated UTF-8 string.
1010 *
1011 * Returns -1 if the input is not valid UTF-8.
1012 */
1013static int
1014pg_utf8_string_len(const char *source)
1015{
1016 const unsigned char *p = (const unsigned char *) source;
1017 int l;
1018 int num_chars = 0;
1019
1020 while (*p)
1021 {
1022 l = pg_utf_mblen(p);
1023
1024 if (!pg_utf8_islegal(p, l))
1025 return -1;
1026
1027 p += l;
1028 num_chars++;
1029 }
1030
1031 return num_chars;
1032}
1033
1034/*
1035 * Returns true if the input string is pure ASCII.
1036 */
1037static bool
1038pg_is_ascii_string(const char *p)
1039{
1040 while (*p)
1041 {
1042 if (IS_HIGHBIT_SET(*p))
1043 return false;
1044 p++;
1045 }
1046 return true;
1047}
1048
1049
1050/*
1051 * pg_saslprep - Normalize a password with SASLprep.
1052 *
1053 * SASLprep requires the input to be in UTF-8 encoding, but PostgreSQL
1054 * supports many encodings, so we don't blindly assume that. pg_saslprep
1055 * will check if the input looks like valid UTF-8, and returns
1056 * SASLPREP_INVALID_UTF8 if not.
1057 *
1058 * If the string contains prohibited characters (or more precisely, if the
1059 * output string would contain prohibited characters after normalization),
1060 * returns SASLPREP_PROHIBITED.
1061 *
1062 * On success, returns SASLPREP_SUCCESS, and the normalized string in
1063 * *output.
1064 *
1065 * In frontend, the normalized string is malloc'd, and the caller is
1066 * responsible for freeing it. If an allocation fails, returns
1067 * SASLPREP_OOM. In backend, the normalized string is palloc'd instead,
1068 * and a failed allocation leads to ereport(ERROR).
1069 */
1070pg_saslprep_rc
1071pg_saslprep(const char *input, char **output)
1072{
1073 pg_wchar *input_chars = NULL;
1074 pg_wchar *output_chars = NULL;
1075 int input_size;
1076 char *result;
1077 int result_size;
1078 int count;
1079 int i;
1080 bool contains_RandALCat;
1081 unsigned char *p;
1082 pg_wchar *wp;
1083
1084 /* Ensure we return *output as NULL on failure */
1085 *output = NULL;
1086
1087 /* Check that the password isn't stupendously long */
1088 if (strlen(input) > MAX_PASSWORD_LENGTH)
1089 {
1090#ifndef FRONTEND
1091 ereport(ERROR,
1092 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1093 errmsg("password too long")));
1094#else
1095 return SASLPREP_OOM;
1096#endif
1097 }
1098
1099 /*
1100 * Quick check if the input is pure ASCII. An ASCII string requires no
1101 * further processing.
1102 */
1103 if (pg_is_ascii_string(input))
1104 {
1105 *output = STRDUP(input);
1106 if (!(*output))
1107 goto oom;
1108 return SASLPREP_SUCCESS;
1109 }
1110
1111 /*
1112 * Convert the input from UTF-8 to an array of Unicode codepoints.
1113 *
1114 * This also checks that the input is a legal UTF-8 string.
1115 */
1116 input_size = pg_utf8_string_len(input);
1117 if (input_size < 0)
1118 return SASLPREP_INVALID_UTF8;
1119
1120 input_chars = ALLOC((input_size + 1) * sizeof(pg_wchar));
1121 if (!input_chars)
1122 goto oom;
1123
1124 p = (unsigned char *) input;
1125 for (i = 0; i < input_size; i++)
1126 {
1127 input_chars[i] = utf8_to_unicode(p);
1128 p += pg_utf_mblen(p);
1129 }
1130 input_chars[i] = (pg_wchar) '\0';
1131
1132 /*
1133 * The steps below correspond to the steps listed in [RFC3454], Section
1134 * "2. Preparation Overview"
1135 */
1136
1137 /*
1138 * 1) Map -- For each character in the input, check if it has a mapping
1139 * and, if so, replace it with its mapping.
1140 */
1141 count = 0;
1142 for (i = 0; i < input_size; i++)
1143 {
1144 pg_wchar code = input_chars[i];
1145
1146 if (IS_CODE_IN_TABLE(code, non_ascii_space_ranges))
1147 input_chars[count++] = 0x0020;
1148 else if (IS_CODE_IN_TABLE(code, commonly_mapped_to_nothing_ranges))
1149 {
1150 /* map to nothing */
1151 }
1152 else
1153 input_chars[count++] = code;
1154 }
1155 input_chars[count] = (pg_wchar) '\0';
1156 input_size = count;
1157
1158 if (input_size == 0)
1159 goto prohibited; /* don't allow empty password */
1160
1161 /*
1162 * 2) Normalize -- Normalize the result of step 1 using Unicode
1163 * normalization.
1164 */
1165 output_chars = unicode_normalize_kc(input_chars);
1166 if (!output_chars)
1167 goto oom;
1168
1169 /*
1170 * 3) Prohibit -- Check for any characters that are not allowed in the
1171 * output. If any are found, return an error.
1172 */
1173 for (i = 0; i < input_size; i++)
1174 {
1175 pg_wchar code = input_chars[i];
1176
1177 if (IS_CODE_IN_TABLE(code, prohibited_output_ranges))
1178 goto prohibited;
1179 if (IS_CODE_IN_TABLE(code, unassigned_codepoint_ranges))
1180 goto prohibited;
1181 }
1182
1183 /*
1184 * 4) Check bidi -- Possibly check for right-to-left characters, and if
1185 * any are found, make sure that the whole string satisfies the
1186 * requirements for bidirectional strings. If the string does not satisfy
1187 * the requirements for bidirectional strings, return an error.
1188 *
1189 * [RFC3454], Section "6. Bidirectional Characters" explains in more
1190 * detail what that means:
1191 *
1192 * "In any profile that specifies bidirectional character handling, all
1193 * three of the following requirements MUST be met:
1194 *
1195 * 1) The characters in section 5.8 MUST be prohibited.
1196 *
1197 * 2) If a string contains any RandALCat character, the string MUST NOT
1198 * contain any LCat character.
1199 *
1200 * 3) If a string contains any RandALCat character, a RandALCat character
1201 * MUST be the first character of the string, and a RandALCat character
1202 * MUST be the last character of the string."
1203 */
1204 contains_RandALCat = false;
1205 for (i = 0; i < input_size; i++)
1206 {
1207 pg_wchar code = input_chars[i];
1208
1209 if (IS_CODE_IN_TABLE(code, RandALCat_codepoint_ranges))
1210 {
1211 contains_RandALCat = true;
1212 break;
1213 }
1214 }
1215
1216 if (contains_RandALCat)
1217 {
1218 pg_wchar first = input_chars[0];
1219 pg_wchar last = input_chars[input_size - 1];
1220
1221 for (i = 0; i < input_size; i++)
1222 {
1223 pg_wchar code = input_chars[i];
1224
1225 if (IS_CODE_IN_TABLE(code, LCat_codepoint_ranges))
1226 goto prohibited;
1227 }
1228
1229 if (!IS_CODE_IN_TABLE(first, RandALCat_codepoint_ranges) ||
1230 !IS_CODE_IN_TABLE(last, RandALCat_codepoint_ranges))
1231 goto prohibited;
1232 }
1233
1234 /*
1235 * Finally, convert the result back to UTF-8.
1236 */
1237 result_size = 0;
1238 for (wp = output_chars; *wp; wp++)
1239 {
1240 unsigned char buf[4];
1241
1242 unicode_to_utf8(*wp, buf);
1243 result_size += pg_utf_mblen(buf);
1244 }
1245
1246 result = ALLOC(result_size + 1);
1247 if (!result)
1248 goto oom;
1249
1250 /*
1251 * There are no error exits below here, so the error exit paths don't need
1252 * to worry about possibly freeing "result".
1253 */
1254 p = (unsigned char *) result;
1255 for (wp = output_chars; *wp; wp++)
1256 {
1257 unicode_to_utf8(*wp, p);
1258 p += pg_utf_mblen(p);
1259 }
1260 Assert((char *) p == result + result_size);
1261 *p = '\0';
1262
1263 FREE(input_chars);
1264 FREE(output_chars);
1265
1266 *output = result;
1267 return SASLPREP_SUCCESS;
1268
1269prohibited:
1270 if (input_chars)
1271 FREE(input_chars);
1272 if (output_chars)
1273 FREE(output_chars);
1274
1275 return SASLPREP_PROHIBITED;
1276
1277oom:
1278 if (input_chars)
1279 FREE(input_chars);
1280 if (output_chars)
1281 FREE(output_chars);
1282
1283 return SASLPREP_OOM;
1284}
1285