1/* Copyright (c) 2000, 2002, 2004, 2007 MySQL AB
2 Use is subject to license terms
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
16
17/****************************************************************
18* SOUNDEX ALGORITHM in C *
19* *
20* The basic Algorithm source is taken from EDN Nov. *
21* 14, 1985 pg. 36. *
22* *
23* As a test Those in Illinois will find that the *
24* first group of numbers in their drivers license *
25* number is the soundex number for their last name. *
26* *
27* RHW PC-IBBS ID. #1230 *
28* *
29* As an extension if remove_garbage is set then all non- *
30* alpha characters are skipped *
31* *
32* Note, that this implementation corresponds to the *
33* original version of the algorithm, not to the more *
34* popular "enhanced" version, described by Knuth. *
35****************************************************************/
36
37#include "mysys_priv.h"
38#include <m_ctype.h>
39#include "my_static.h"
40
41static char get_scode(CHARSET_INFO * cs, char **ptr,pbool remove_garbage);
42
43 /* outputed string is 4 byte long */
44 /* out_pntr can be == in_pntr */
45
46void soundex(CHARSET_INFO * cs,register char * out_pntr, char * in_pntr,
47 pbool remove_garbage)
48{
49 char ch,last_ch;
50 reg3 char * end;
51 register const uchar *map=cs->to_upper;
52
53 if (remove_garbage)
54 {
55 while (*in_pntr && !my_isalpha(cs,*in_pntr)) /* Skip pre-space */
56 in_pntr++;
57 }
58 *out_pntr++ = map[(uchar)*in_pntr]; /* Copy first letter */
59 last_ch = get_scode(cs,&in_pntr,0); /* code of the first letter */
60 /* for the first 'double-letter */
61 /* check. */
62 end=out_pntr+3; /* Loop on input letters until */
63 /* end of input (null) or output */
64 /* letter code count = 3 */
65
66 in_pntr++;
67 while (out_pntr < end && (ch = get_scode(cs,&in_pntr,remove_garbage)) != 0)
68 {
69 in_pntr++;
70 if ((ch != '0') && (ch != last_ch)) /* if not skipped or double */
71 {
72 *out_pntr++ = ch; /* letter, copy to output */
73 } /* for next double-letter check */
74 last_ch = ch; /* save code of last input letter */
75 }
76 while (out_pntr < end)
77 *out_pntr++ = '0';
78 *out_pntr=0; /* end string */
79 return;
80} /* soundex */
81
82
83 /*
84 If alpha, map input letter to soundex code.
85 If not alpha and remove_garbage is set then skip to next char
86 else return 0
87 */
88
89static char get_scode(CHARSET_INFO * cs,char **ptr, pbool remove_garbage)
90{
91 uchar ch;
92
93 if (remove_garbage)
94 {
95 while (**ptr && !my_isalpha(cs,**ptr))
96 (*ptr)++;
97 }
98 ch=my_toupper(cs,**ptr);
99 if (ch < 'A' || ch > 'Z')
100 {
101 if (my_isalpha(cs,ch)) /* If extended alfa (country spec) */
102 return '0'; /* threat as vokal */
103 return 0; /* Can't map */
104 }
105 return(soundex_map[ch-'A']);
106} /* get_scode */
107