1 | /* Copyright (c) 2000, 2002, 2004, 2007 MySQL AB |
2 | Use is subject to license terms |
3 | |
4 | This program is free software; you can redistribute it and/or modify |
5 | it under the terms of the GNU General Public License as published by |
6 | the Free Software Foundation; version 2 of the License. |
7 | |
8 | This program is distributed in the hope that it will be useful, |
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | GNU General Public License for more details. |
12 | |
13 | You should have received a copy of the GNU General Public License |
14 | along with this program; if not, write to the Free Software |
15 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ |
16 | |
17 | /**************************************************************** |
18 | * SOUNDEX ALGORITHM in C * |
19 | * * |
20 | * The basic Algorithm source is taken from EDN Nov. * |
21 | * 14, 1985 pg. 36. * |
22 | * * |
23 | * As a test Those in Illinois will find that the * |
24 | * first group of numbers in their drivers license * |
25 | * number is the soundex number for their last name. * |
26 | * * |
27 | * RHW PC-IBBS ID. #1230 * |
28 | * * |
29 | * As an extension if remove_garbage is set then all non- * |
30 | * alpha characters are skipped * |
31 | * * |
32 | * Note, that this implementation corresponds to the * |
33 | * original version of the algorithm, not to the more * |
34 | * popular "enhanced" version, described by Knuth. * |
35 | ****************************************************************/ |
36 | |
37 | #include "mysys_priv.h" |
38 | #include <m_ctype.h> |
39 | #include "my_static.h" |
40 | |
41 | static char get_scode(CHARSET_INFO * cs, char **ptr,pbool remove_garbage); |
42 | |
43 | /* outputed string is 4 byte long */ |
44 | /* out_pntr can be == in_pntr */ |
45 | |
46 | void soundex(CHARSET_INFO * cs,register char * out_pntr, char * in_pntr, |
47 | pbool remove_garbage) |
48 | { |
49 | char ch,last_ch; |
50 | reg3 char * end; |
51 | register const uchar *map=cs->to_upper; |
52 | |
53 | if (remove_garbage) |
54 | { |
55 | while (*in_pntr && !my_isalpha(cs,*in_pntr)) /* Skip pre-space */ |
56 | in_pntr++; |
57 | } |
58 | *out_pntr++ = map[(uchar)*in_pntr]; /* Copy first letter */ |
59 | last_ch = get_scode(cs,&in_pntr,0); /* code of the first letter */ |
60 | /* for the first 'double-letter */ |
61 | /* check. */ |
62 | end=out_pntr+3; /* Loop on input letters until */ |
63 | /* end of input (null) or output */ |
64 | /* letter code count = 3 */ |
65 | |
66 | in_pntr++; |
67 | while (out_pntr < end && (ch = get_scode(cs,&in_pntr,remove_garbage)) != 0) |
68 | { |
69 | in_pntr++; |
70 | if ((ch != '0') && (ch != last_ch)) /* if not skipped or double */ |
71 | { |
72 | *out_pntr++ = ch; /* letter, copy to output */ |
73 | } /* for next double-letter check */ |
74 | last_ch = ch; /* save code of last input letter */ |
75 | } |
76 | while (out_pntr < end) |
77 | *out_pntr++ = '0'; |
78 | *out_pntr=0; /* end string */ |
79 | return; |
80 | } /* soundex */ |
81 | |
82 | |
83 | /* |
84 | If alpha, map input letter to soundex code. |
85 | If not alpha and remove_garbage is set then skip to next char |
86 | else return 0 |
87 | */ |
88 | |
89 | static char get_scode(CHARSET_INFO * cs,char **ptr, pbool remove_garbage) |
90 | { |
91 | uchar ch; |
92 | |
93 | if (remove_garbage) |
94 | { |
95 | while (**ptr && !my_isalpha(cs,**ptr)) |
96 | (*ptr)++; |
97 | } |
98 | ch=my_toupper(cs,**ptr); |
99 | if (ch < 'A' || ch > 'Z') |
100 | { |
101 | if (my_isalpha(cs,ch)) /* If extended alfa (country spec) */ |
102 | return '0'; /* threat as vokal */ |
103 | return 0; /* Can't map */ |
104 | } |
105 | return(soundex_map[ch-'A']); |
106 | } /* get_scode */ |
107 | |