1/* Copyright (c) 2000, 2003-2007 MySQL AB, 2009 Sun Microsystems, Inc.
2 Copyright (c) 2009-2011, Monty Program Ab
3 Use is subject to license terms.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; version 2 of the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
17
18#include "strings_def.h"
19#include <m_ctype.h>
20
21static const uchar ctype_latin1[] = {
22 0,
23 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
24 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
25 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
26 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
27 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
28 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
29 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
30 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
31 16, 0, 16, 2, 16, 16, 16, 16, 16, 16, 1, 16, 1, 0, 1, 0,
32 0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 16, 2, 0, 2, 1,
33 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
34 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 16, 1, 1, 1, 1, 1, 1, 1, 2,
37 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
38 2, 2, 2, 2, 2, 2, 2, 16, 2, 2, 2, 2, 2, 2, 2, 2
39};
40
41static const uchar to_lower_latin1[] = {
42 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
43 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
44 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
45 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
46 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
47 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
48 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
49 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
50 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
51 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
52 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
53 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
54 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
55 240,241,242,243,244,245,246,215,248,249,250,251,252,253,254,223,
56 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
57 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
58};
59
60static const uchar to_upper_latin1[] = {
61 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
62 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
63 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
64 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
65 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
66 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
67 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
68 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
69 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
70 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
71 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
72 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
73 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
74 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
75 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
76 208,209,210,211,212,213,214,247,216,217,218,219,220,221,222,255
77};
78
79static const uchar sort_order_latin1[] = {
80 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
81 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
82 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
83 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
84 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
85 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
86 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
87 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
88 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
89 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
90 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
91 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
92 65, 65, 65, 65, 92, 91, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
93 68, 78, 79, 79, 79, 79, 93,215,216, 85, 85, 85, 89, 89,222,223,
94 65, 65, 65, 65, 92, 91, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
95 68, 78, 79, 79, 79, 79, 93,247,216, 85, 85, 85, 89, 89,222,255
96};
97
98/*
99 WL#1494 notes:
100
101 We'll use cp1252 instead of iso-8859-1.
102 cp1252 contains printable characters in the range 0x80-0x9F.
103 In ISO 8859-1, these code points have no associated printable
104 characters. Therefore, by converting from CP1252 to ISO 8859-1,
105 one would lose the euro (for instance). Since most people are
106 unaware of the difference, and since we don't really want a
107 "Windows ANSI" to differ from a "Unix ANSI", we will:
108
109 - continue to pretend the latin1 character set is ISO 8859-1
110 - actually allow the storage of euro etc. so it's actually cp1252
111
112 Also we'll map these five undefined cp1252 character:
113 0x81, 0x8D, 0x8F, 0x90, 0x9D
114 into corresponding control characters:
115 U+0081, U+008D, U+008F, U+0090, U+009D.
116 like ISO-8859-1 does. Otherwise, loading "mysqldump"
117 output doesn't reproduce these undefined characters.
118*/
119
120static unsigned const short cs_to_uni[256]={
1210x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,
1220x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F,
1230x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,
1240x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F,
1250x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,
1260x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F,
1270x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,
1280x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F,
1290x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,
1300x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F,
1310x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,
1320x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F,
1330x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
1340x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
1350x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
1360x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
1370x20AC,0x0081,0x201A,0x0192,0x201E,0x2026,0x2020,0x2021,
1380x02C6,0x2030,0x0160,0x2039,0x0152,0x008D,0x017D,0x008F,
1390x0090,0x2018,0x2019,0x201C,0x201D,0x2022,0x2013,0x2014,
1400x02DC,0x2122,0x0161,0x203A,0x0153,0x009D,0x017E,0x0178,
1410x00A0,0x00A1,0x00A2,0x00A3,0x00A4,0x00A5,0x00A6,0x00A7,
1420x00A8,0x00A9,0x00AA,0x00AB,0x00AC,0x00AD,0x00AE,0x00AF,
1430x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5,0x00B6,0x00B7,
1440x00B8,0x00B9,0x00BA,0x00BB,0x00BC,0x00BD,0x00BE,0x00BF,
1450x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7,
1460x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF,
1470x00D0,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x00D7,
1480x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x00DD,0x00DE,0x00DF,
1490x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,
1500x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF,
1510x00F0,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x00F7,
1520x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FD,0x00FE,0x00FF
153};
154static const uchar pl00[256]={
1550x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1560x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1570x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1580x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1590x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1600x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1610x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1620x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1630x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
1640x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
1650x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
1660x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
1670x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1680x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1690x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1700x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1710x00,0x81,0x00,0x00,0x00,0x00,0x00,0x00,
1720x00,0x00,0x00,0x00,0x00,0x8D,0x00,0x8F,
1730x90,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1740x00,0x00,0x00,0x00,0x00,0x9D,0x00,0x00,
1750xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1760xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1770xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1780xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1790xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1800xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1810xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1820xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1830xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1840xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1850xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1860xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
187};
188static const uchar pl01[256]={
1890x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1900x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1910x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1920x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1930x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1940x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1950x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1960x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1970x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1980x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1990x00,0x00,0x8C,0x9C,0x00,0x00,0x00,0x00,
2000x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2010x8A,0x9A,0x00,0x00,0x00,0x00,0x00,0x00,
2020x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2030x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2040x9F,0x00,0x00,0x00,0x00,0x8E,0x9E,0x00,
2050x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2060x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2070x00,0x00,0x83,0x00,0x00,0x00,0x00,0x00,
2080x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2090x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2100x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2110x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2120x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2130x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2140x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2150x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2160x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2170x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2180x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2190x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2200x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
221};
222static const uchar pl02[256]={
2230x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2240x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2250x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2260x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2270x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2280x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2290x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2300x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2310x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2320x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2330x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2340x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2350x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2360x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2370x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2380x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2390x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2400x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2410x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2420x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2430x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2440x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2450x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2460x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2470x00,0x00,0x00,0x00,0x00,0x00,0x88,0x00,
2480x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2490x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2500x00,0x00,0x00,0x00,0x98,0x00,0x00,0x00,
2510x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2520x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2530x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2540x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
255};
256static const uchar pl20[256]={
2570x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2580x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2590x00,0x00,0x00,0x96,0x97,0x00,0x00,0x00,
2600x91,0x92,0x82,0x00,0x93,0x94,0x84,0x00,
2610x86,0x87,0x95,0x00,0x00,0x00,0x85,0x00,
2620x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2630x89,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2640x00,0x8B,0x9B,0x00,0x00,0x00,0x00,0x00,
2650x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2660x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2670x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2680x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2690x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2700x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2710x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2720x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2730x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2740x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2750x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2760x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2770x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2780x00,0x00,0x00,0x00,0x80,0x00,0x00,0x00,
2790x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2800x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2810x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2820x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2830x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2840x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2850x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2860x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2870x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2880x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
289};
290static const uchar pl21[256]={
2910x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2920x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2930x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2940x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2950x00,0x00,0x99,0x00,0x00,0x00,0x00,0x00,
2960x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2970x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2980x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2990x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3000x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3010x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3020x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3030x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3040x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3050x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3060x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3070x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3080x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3090x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3100x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3110x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3120x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3130x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3140x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3150x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3160x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3170x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3180x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3190x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3200x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3210x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
3220x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
323};
324static const uchar *const uni_to_cs[256]={
325pl00,pl01,pl02,NULL,NULL,NULL,NULL,NULL,
326NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
327NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
328NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
329pl20,pl21,NULL,NULL,NULL,NULL,NULL,NULL,
330NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
331NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
332NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
333NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
334NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
335NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
336NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
337NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
338NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
339NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
340NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
341NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
342NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
343NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
344NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
345NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
346NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
347NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
348NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
349NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
350NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
351NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
352NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
353NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
354NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
355NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
356NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL
357};
358
359static
360int my_mb_wc_latin1(CHARSET_INFO *cs __attribute__((unused)),
361 my_wc_t *wc,
362 const uchar *str,
363 const uchar *end __attribute__((unused)))
364{
365 if (str >= end)
366 return MY_CS_TOOSMALL;
367 /*
368 There are no unassigned characters in latin1.
369 Every code point in latin1 is mapped to some Unicode code point.
370 We can always return 1, no needs to check the value of cs_to_uni[*str].
371 */
372 *wc= cs_to_uni[*str];
373 DBUG_ASSERT(wc[0] || !str[0]);
374 return 1;
375}
376
377static
378int my_wc_mb_latin1(CHARSET_INFO *cs __attribute__((unused)),
379 my_wc_t wc,
380 uchar *str,
381 uchar *end __attribute__((unused)))
382{
383 const uchar *pl;
384
385 if (str >= end)
386 return MY_CS_TOOSMALL;
387
388 if (wc > 0xFFFF)
389 return MY_CS_ILUNI;
390
391 pl= uni_to_cs[wc >> 8];
392 str[0]= pl ? pl[wc & 0xFF] : '\0';
393 return (!str[0] && wc) ? MY_CS_ILUNI : 1;
394}
395
396static MY_CHARSET_HANDLER my_charset_handler=
397{
398 NULL, /* init */
399 my_numchars_8bit,
400 my_charpos_8bit,
401 my_lengthsp_8bit,
402 my_numcells_8bit,
403 my_mb_wc_latin1,
404 my_wc_mb_latin1,
405 my_mb_ctype_8bit,
406 my_caseup_str_8bit,
407 my_casedn_str_8bit,
408 my_caseup_8bit,
409 my_casedn_8bit,
410 my_snprintf_8bit,
411 my_long10_to_str_8bit,
412 my_longlong10_to_str_8bit,
413 my_fill_8bit,
414 my_strntol_8bit,
415 my_strntoul_8bit,
416 my_strntoll_8bit,
417 my_strntoull_8bit,
418 my_strntod_8bit,
419 my_strtoll10_8bit,
420 my_strntoull10rnd_8bit,
421 my_scan_8bit,
422 my_charlen_8bit,
423 my_well_formed_char_length_8bit,
424 my_copy_8bit,
425 my_wc_mb_bin, /* native_to_mb */
426};
427
428
429struct charset_info_st my_charset_latin1=
430{
431 8,0,0, /* number */
432 MY_CS_COMPILED | MY_CS_PRIMARY, /* state */
433 "latin1", /* cs name */
434 "latin1_swedish_ci", /* name */
435 "", /* comment */
436 NULL, /* tailoring */
437 ctype_latin1,
438 to_lower_latin1,
439 to_upper_latin1,
440 sort_order_latin1,
441 NULL, /* uca */
442 cs_to_uni, /* tab_to_uni */
443 NULL, /* tab_from_uni */
444 &my_unicase_default,/* caseinfo */
445 NULL, /* state_map */
446 NULL, /* ident_map */
447 1, /* strxfrm_multiply */
448 1, /* caseup_multiply */
449 1, /* casedn_multiply */
450 1, /* mbminlen */
451 1, /* mbmaxlen */
452 0, /* min_sort_char */
453 255, /* max_sort_char */
454 ' ', /* pad char */
455 0, /* escape_with_backslash_is_dangerous */
456 1, /* levels_for_order */
457 &my_charset_handler,
458 &my_collation_8bit_simple_ci_handler
459};
460
461
462struct charset_info_st my_charset_latin1_nopad=
463{
464 MY_NOPAD_ID(8),0,0, /* number */
465 MY_CS_COMPILED | MY_CS_NOPAD, /* state */
466 "latin1", /* cs name */
467 "latin1_swedish_nopad_ci", /* name */
468 "", /* comment */
469 NULL, /* tailoring */
470 ctype_latin1,
471 to_lower_latin1,
472 to_upper_latin1,
473 sort_order_latin1,
474 NULL, /* uca */
475 cs_to_uni, /* tab_to_uni */
476 NULL, /* tab_from_uni */
477 &my_unicase_default, /* caseinfo */
478 NULL, /* state_map */
479 NULL, /* ident_map */
480 1, /* strxfrm_multiply */
481 1, /* caseup_multiply */
482 1, /* casedn_multiply */
483 1, /* mbminlen */
484 1, /* mbmaxlen */
485 0, /* min_sort_char */
486 255, /* max_sort_char */
487 ' ', /* pad char */
488 0, /* escape_with_backslash_is_dangerous */
489 1, /* levels_for_order */
490 &my_charset_handler,
491 &my_collation_8bit_simple_nopad_ci_handler
492};
493
494
495
496
497/*
498 * This file is the latin1 character set with German sorting
499 *
500 * The modern sort order is used, where:
501 *
502 * 'ä' -> "ae"
503 * 'ö' -> "oe"
504 * 'ü' -> "ue"
505 * 'ß' -> "ss"
506 */
507
508
509/*
510 * This is a simple latin1 mapping table, which maps all accented
511 * characters to their non-accented equivalents. Note: in this
512 * table, 'ä' is mapped to 'A', 'ÿ' is mapped to 'Y', etc. - all
513 * accented characters except the following are treated the same way.
514 * Ü, ü, Ö, ö, Ä, ä
515 */
516
517static const uchar sort_order_latin1_de[] = {
518 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
519 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
520 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
521 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
522 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
523 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
524 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
525 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
526 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
527 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
528 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
529 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
530 65, 65, 65, 65,196, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
531 68, 78, 79, 79, 79, 79,214,215,216, 85, 85, 85,220, 89,222,223,
532 65, 65, 65, 65,196, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
533 68, 78, 79, 79, 79, 79,214,247,216, 85, 85, 85,220, 89,222, 89
534};
535
536
537/*
538 same as sort_order_latin_de, but maps ALL accented chars to unaccented ones
539*/
540
541static const uchar combo1map[]={
542 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
543 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
544 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
545 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
546 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
547 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
548 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
549 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
550 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
551 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
552 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
553 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
554 65, 65, 65, 65, 65, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
555 68, 78, 79, 79, 79, 79, 79,215,216, 85, 85, 85, 85, 89,222, 83,
556 65, 65, 65, 65, 65, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
557 68, 78, 79, 79, 79, 79, 79,247,216, 85, 85, 85, 85, 89,222, 89
558};
559
560static const uchar combo2map[]={
561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
569 0, 0, 0, 0, 0, 0,69, 0, 0, 0, 0, 0,69, 0, 0,83, 0, 0, 0, 0,69, 0, 0, 0, 0, 0,
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,69, 0, 0, 0, 0, 0,69, 0, 0, 0, 0
571};
572
573
574/*
575 Some notes about the following comparison rules:
576 By definition, my_strnncoll_latin_de must works exactly as if had called
577 my_strnxfrm_latin_de() on both strings and compared the result strings.
578
579 This means that:
580 Ä must also matches ÁE and Aè, because my_strxn_frm_latin_de() will convert
581 both to AE.
582
583 The other option would be to not do any accent removal in
584 sort_order_latin_de[] at all
585*/
586
587
588static int my_strnncoll_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
589 const uchar *a, size_t a_length,
590 const uchar *b, size_t b_length,
591 my_bool b_is_prefix)
592{
593 const uchar *a_end= a + a_length;
594 const uchar *b_end= b + b_length;
595 uchar a_char, a_extend= 0, b_char, b_extend= 0;
596
597 while ((a < a_end || a_extend) && (b < b_end || b_extend))
598 {
599 if (a_extend)
600 {
601 a_char=a_extend; a_extend=0;
602 }
603 else
604 {
605 a_extend=combo2map[*a];
606 a_char=combo1map[*a++];
607 }
608 if (b_extend)
609 {
610 b_char=b_extend; b_extend=0;
611 }
612 else
613 {
614 b_extend=combo2map[*b];
615 b_char=combo1map[*b++];
616 }
617 if (a_char != b_char)
618 return (int) a_char - (int) b_char;
619 }
620 /*
621 A simple test of string lengths won't work -- we test to see
622 which string ran out first
623 */
624 return ((a < a_end || a_extend) ? (b_is_prefix ? 0 : 1) :
625 (b < b_end || b_extend) ? -1 : 0);
626}
627
628
629static int my_strnncollsp_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
630 const uchar *a, size_t a_length,
631 const uchar *b, size_t b_length)
632{
633 const uchar *a_end= a + a_length, *b_end= b + b_length;
634 uchar a_char, a_extend= 0, b_char, b_extend= 0;
635
636 while ((a < a_end || a_extend) && (b < b_end || b_extend))
637 {
638 if (a_extend)
639 {
640 a_char=a_extend;
641 a_extend= 0;
642 }
643 else
644 {
645 a_extend= combo2map[*a];
646 a_char= combo1map[*a++];
647 }
648 if (b_extend)
649 {
650 b_char= b_extend;
651 b_extend= 0;
652 }
653 else
654 {
655 b_extend= combo2map[*b];
656 b_char= combo1map[*b++];
657 }
658 if (a_char != b_char)
659 return (int) a_char - (int) b_char;
660 }
661 /* Check if double character last */
662 if (a_extend)
663 return 1;
664 if (b_extend)
665 return -1;
666
667 if (a < a_end)
668 return my_strnncollsp_padspace_bin(a, a_end - a);
669 if (b < b_end)
670 return -my_strnncollsp_padspace_bin(b, b_end - b);
671 return 0;
672}
673
674
675static size_t
676my_strnxfrm_latin1_de(CHARSET_INFO *cs,
677 uchar *dst, size_t dstlen, uint nweights,
678 const uchar* src, size_t srclen, uint flags)
679{
680 uchar *de= dst + dstlen;
681 const uchar *se= src + srclen;
682 uchar *d0= dst;
683 for ( ; src < se && dst < de && nweights; src++, nweights--)
684 {
685 uchar chr= combo1map[*src];
686 *dst++= chr;
687 if ((chr= combo2map[*src]) && dst < de && nweights > 1)
688 {
689 *dst++= chr;
690 nweights--;
691 }
692 }
693 return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
694}
695
696
697void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
698 const uchar *key, size_t len,
699 ulong *nr1, ulong *nr2)
700{
701 const uchar *end;
702 register ulong m1= *nr1, m2= *nr2;
703
704 /*
705 Remove end space. We have to do this to be able to compare
706 'AE' and 'Ä' as identical
707 */
708 end= skip_trailing_space(key, len);
709
710 for (; key < end ; key++)
711 {
712 uint X= (uint) combo1map[(uint) *key];
713 MY_HASH_ADD(m1, m2, X);
714 if ((X= combo2map[*key]))
715 {
716 MY_HASH_ADD(m1, m2, X);
717 }
718 }
719 *nr1= m1;
720 *nr2= m2;
721}
722
723
724static MY_COLLATION_HANDLER my_collation_german2_ci_handler=
725{
726 NULL, /* init */
727 my_strnncoll_latin1_de,
728 my_strnncollsp_latin1_de,
729 my_strnxfrm_latin1_de,
730 my_strnxfrmlen_simple,
731 my_like_range_simple,
732 my_wildcmp_8bit,
733 my_strcasecmp_8bit,
734 my_instr_simple,
735 my_hash_sort_latin1_de,
736 my_propagate_complex
737};
738
739
740struct charset_info_st my_charset_latin1_german2_ci=
741{
742 31,0,0, /* number */
743 MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_NON1TO1, /* state */
744 "latin1", /* cs name */
745 "latin1_german2_ci", /* name */
746 "", /* comment */
747 NULL, /* tailoring */
748 ctype_latin1,
749 to_lower_latin1,
750 to_upper_latin1,
751 sort_order_latin1_de,
752 NULL, /* uca */
753 cs_to_uni, /* tab_to_uni */
754 NULL, /* tab_from_uni */
755 &my_unicase_default, /* caseinfo */
756 NULL, /* state_map */
757 NULL, /* ident_map */
758 2, /* strxfrm_multiply */
759 1, /* caseup_multiply */
760 1, /* casedn_multiply */
761 1, /* mbminlen */
762 1, /* mbmaxlen */
763 0, /* min_sort_char */
764 247, /* max_sort_char */
765 ' ', /* pad char */
766 0, /* escape_with_backslash_is_dangerous */
767 1, /* levels_for_order */
768 &my_charset_handler,
769 &my_collation_german2_ci_handler
770};
771
772
773struct charset_info_st my_charset_latin1_bin=
774{
775 47,0,0, /* number */
776 MY_CS_COMPILED|MY_CS_BINSORT, /* state */
777 "latin1", /* cs name */
778 "latin1_bin", /* name */
779 "", /* comment */
780 NULL, /* tailoring */
781 ctype_latin1,
782 to_lower_latin1,
783 to_upper_latin1,
784 NULL, /* sort_order */
785 NULL, /* uca */
786 cs_to_uni, /* tab_to_uni */
787 NULL, /* tab_from_uni */
788 &my_unicase_default, /* caseinfo */
789 NULL, /* state_map */
790 NULL, /* ident_map */
791 1, /* strxfrm_multiply */
792 1, /* caseup_multiply */
793 1, /* casedn_multiply */
794 1, /* mbminlen */
795 1, /* mbmaxlen */
796 0, /* min_sort_char */
797 255, /* max_sort_char */
798 ' ', /* pad char */
799 0, /* escape_with_backslash_is_dangerous */
800 1, /* levels_for_order */
801 &my_charset_handler,
802 &my_collation_8bit_bin_handler
803};
804
805
806struct charset_info_st my_charset_latin1_nopad_bin=
807{
808 MY_NOPAD_ID(47),0,0, /* number */
809 MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_NOPAD,/* state */
810 "latin1", /* cs name */
811 "latin1_nopad_bin", /* name */
812 "", /* comment */
813 NULL, /* tailoring */
814 ctype_latin1,
815 to_lower_latin1,
816 to_upper_latin1,
817 NULL, /* sort_order */
818 NULL, /* uca */
819 cs_to_uni, /* tab_to_uni */
820 NULL, /* tab_from_uni */
821 &my_unicase_default, /* caseinfo */
822 NULL, /* state_map */
823 NULL, /* ident_map */
824 1, /* strxfrm_multiply */
825 1, /* caseup_multiply */
826 1, /* casedn_multiply */
827 1, /* mbminlen */
828 1, /* mbmaxlen */
829 0, /* min_sort_char */
830 255, /* max_sort_char */
831 ' ', /* pad char */
832 0, /* escape_with_backslash_is_dangerous */
833 1, /* levels_for_order */
834 &my_charset_handler,
835 &my_collation_8bit_nopad_bin_handler
836};
837
838