| 1 | /* Copyright (c) 2000, 2002, 2004, 2007 MySQL AB |
| 2 | Use is subject to license terms |
| 3 | |
| 4 | This program is free software; you can redistribute it and/or modify |
| 5 | it under the terms of the GNU General Public License as published by |
| 6 | the Free Software Foundation; version 2 of the License. |
| 7 | |
| 8 | This program is distributed in the hope that it will be useful, |
| 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 11 | GNU General Public License for more details. |
| 12 | |
| 13 | You should have received a copy of the GNU General Public License |
| 14 | along with this program; if not, write to the Free Software |
| 15 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ |
| 16 | |
| 17 | /**************************************************************** |
| 18 | * SOUNDEX ALGORITHM in C * |
| 19 | * * |
| 20 | * The basic Algorithm source is taken from EDN Nov. * |
| 21 | * 14, 1985 pg. 36. * |
| 22 | * * |
| 23 | * As a test Those in Illinois will find that the * |
| 24 | * first group of numbers in their drivers license * |
| 25 | * number is the soundex number for their last name. * |
| 26 | * * |
| 27 | * RHW PC-IBBS ID. #1230 * |
| 28 | * * |
| 29 | * As an extension if remove_garbage is set then all non- * |
| 30 | * alpha characters are skipped * |
| 31 | * * |
| 32 | * Note, that this implementation corresponds to the * |
| 33 | * original version of the algorithm, not to the more * |
| 34 | * popular "enhanced" version, described by Knuth. * |
| 35 | ****************************************************************/ |
| 36 | |
| 37 | #include "mysys_priv.h" |
| 38 | #include <m_ctype.h> |
| 39 | #include "my_static.h" |
| 40 | |
| 41 | static char get_scode(CHARSET_INFO * cs, char **ptr,pbool remove_garbage); |
| 42 | |
| 43 | /* outputed string is 4 byte long */ |
| 44 | /* out_pntr can be == in_pntr */ |
| 45 | |
| 46 | void soundex(CHARSET_INFO * cs,register char * out_pntr, char * in_pntr, |
| 47 | pbool remove_garbage) |
| 48 | { |
| 49 | char ch,last_ch; |
| 50 | reg3 char * end; |
| 51 | register const uchar *map=cs->to_upper; |
| 52 | |
| 53 | if (remove_garbage) |
| 54 | { |
| 55 | while (*in_pntr && !my_isalpha(cs,*in_pntr)) /* Skip pre-space */ |
| 56 | in_pntr++; |
| 57 | } |
| 58 | *out_pntr++ = map[(uchar)*in_pntr]; /* Copy first letter */ |
| 59 | last_ch = get_scode(cs,&in_pntr,0); /* code of the first letter */ |
| 60 | /* for the first 'double-letter */ |
| 61 | /* check. */ |
| 62 | end=out_pntr+3; /* Loop on input letters until */ |
| 63 | /* end of input (null) or output */ |
| 64 | /* letter code count = 3 */ |
| 65 | |
| 66 | in_pntr++; |
| 67 | while (out_pntr < end && (ch = get_scode(cs,&in_pntr,remove_garbage)) != 0) |
| 68 | { |
| 69 | in_pntr++; |
| 70 | if ((ch != '0') && (ch != last_ch)) /* if not skipped or double */ |
| 71 | { |
| 72 | *out_pntr++ = ch; /* letter, copy to output */ |
| 73 | } /* for next double-letter check */ |
| 74 | last_ch = ch; /* save code of last input letter */ |
| 75 | } |
| 76 | while (out_pntr < end) |
| 77 | *out_pntr++ = '0'; |
| 78 | *out_pntr=0; /* end string */ |
| 79 | return; |
| 80 | } /* soundex */ |
| 81 | |
| 82 | |
| 83 | /* |
| 84 | If alpha, map input letter to soundex code. |
| 85 | If not alpha and remove_garbage is set then skip to next char |
| 86 | else return 0 |
| 87 | */ |
| 88 | |
| 89 | static char get_scode(CHARSET_INFO * cs,char **ptr, pbool remove_garbage) |
| 90 | { |
| 91 | uchar ch; |
| 92 | |
| 93 | if (remove_garbage) |
| 94 | { |
| 95 | while (**ptr && !my_isalpha(cs,**ptr)) |
| 96 | (*ptr)++; |
| 97 | } |
| 98 | ch=my_toupper(cs,**ptr); |
| 99 | if (ch < 'A' || ch > 'Z') |
| 100 | { |
| 101 | if (my_isalpha(cs,ch)) /* If extended alfa (country spec) */ |
| 102 | return '0'; /* threat as vokal */ |
| 103 | return 0; /* Can't map */ |
| 104 | } |
| 105 | return(soundex_map[ch-'A']); |
| 106 | } /* get_scode */ |
| 107 | |