1/*-------------------------------------------------------------------------
2 *
3 * pgstrcasecmp.c
4 * Portable SQL-like case-independent comparisons and conversions.
5 *
6 * SQL99 specifies Unicode-aware case normalization, which we don't yet
7 * have the infrastructure for. Instead we use tolower() to provide a
8 * locale-aware translation. However, there are some locales where this
9 * is not right either (eg, Turkish may do strange things with 'i' and
10 * 'I'). Our current compromise is to use tolower() for characters with
11 * the high bit set, and use an ASCII-only downcasing for 7-bit
12 * characters.
13 *
14 * NB: this code should match downcase_truncate_identifier() in scansup.c.
15 *
16 * We also provide strict ASCII-only case conversion functions, which can
17 * be used to implement C/POSIX case folding semantics no matter what the
18 * C library thinks the locale is.
19 *
20 *
21 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
22 *
23 * src/port/pgstrcasecmp.c
24 *
25 *-------------------------------------------------------------------------
26 */
27#include "c.h"
28
29#include <ctype.h>
30
31
32/*
33 * Case-independent comparison of two null-terminated strings.
34 */
35int
36pg_strcasecmp(const char *s1, const char *s2)
37{
38 for (;;)
39 {
40 unsigned char ch1 = (unsigned char) *s1++;
41 unsigned char ch2 = (unsigned char) *s2++;
42
43 if (ch1 != ch2)
44 {
45 if (ch1 >= 'A' && ch1 <= 'Z')
46 ch1 += 'a' - 'A';
47 else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
48 ch1 = tolower(ch1);
49
50 if (ch2 >= 'A' && ch2 <= 'Z')
51 ch2 += 'a' - 'A';
52 else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
53 ch2 = tolower(ch2);
54
55 if (ch1 != ch2)
56 return (int) ch1 - (int) ch2;
57 }
58 if (ch1 == 0)
59 break;
60 }
61 return 0;
62}
63
64/*
65 * Case-independent comparison of two not-necessarily-null-terminated strings.
66 * At most n bytes will be examined from each string.
67 */
68int
69pg_strncasecmp(const char *s1, const char *s2, size_t n)
70{
71 while (n-- > 0)
72 {
73 unsigned char ch1 = (unsigned char) *s1++;
74 unsigned char ch2 = (unsigned char) *s2++;
75
76 if (ch1 != ch2)
77 {
78 if (ch1 >= 'A' && ch1 <= 'Z')
79 ch1 += 'a' - 'A';
80 else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
81 ch1 = tolower(ch1);
82
83 if (ch2 >= 'A' && ch2 <= 'Z')
84 ch2 += 'a' - 'A';
85 else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
86 ch2 = tolower(ch2);
87
88 if (ch1 != ch2)
89 return (int) ch1 - (int) ch2;
90 }
91 if (ch1 == 0)
92 break;
93 }
94 return 0;
95}
96
97/*
98 * Fold a character to upper case.
99 *
100 * Unlike some versions of toupper(), this is safe to apply to characters
101 * that aren't lower case letters. Note however that the whole thing is
102 * a bit bogus for multibyte character sets.
103 */
104unsigned char
105pg_toupper(unsigned char ch)
106{
107 if (ch >= 'a' && ch <= 'z')
108 ch += 'A' - 'a';
109 else if (IS_HIGHBIT_SET(ch) && islower(ch))
110 ch = toupper(ch);
111 return ch;
112}
113
114/*
115 * Fold a character to lower case.
116 *
117 * Unlike some versions of tolower(), this is safe to apply to characters
118 * that aren't upper case letters. Note however that the whole thing is
119 * a bit bogus for multibyte character sets.
120 */
121unsigned char
122pg_tolower(unsigned char ch)
123{
124 if (ch >= 'A' && ch <= 'Z')
125 ch += 'a' - 'A';
126 else if (IS_HIGHBIT_SET(ch) && isupper(ch))
127 ch = tolower(ch);
128 return ch;
129}
130
131/*
132 * Fold a character to upper case, following C/POSIX locale rules.
133 */
134unsigned char
135pg_ascii_toupper(unsigned char ch)
136{
137 if (ch >= 'a' && ch <= 'z')
138 ch += 'A' - 'a';
139 return ch;
140}
141
142/*
143 * Fold a character to lower case, following C/POSIX locale rules.
144 */
145unsigned char
146pg_ascii_tolower(unsigned char ch)
147{
148 if (ch >= 'A' && ch <= 'Z')
149 ch += 'a' - 'A';
150 return ch;
151}
152