1 | /* -*- c-basic-offset: 2 -*- */ |
2 | /* |
3 | Copyright(C) 2013 Kouhei Sutou <kou@clear-code.com> |
4 | |
5 | This library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | This library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with this library; if not, write to the Free Software |
17 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
18 | */ |
19 | |
20 | #include "mrn_field_normalizer.hpp" |
21 | #include "mrn_encoding.hpp" |
22 | |
23 | // for debug |
24 | #define MRN_CLASS_NAME "mrn::FieldNormalizer" |
25 | |
26 | namespace mrn { |
27 | FieldNormalizer::FieldNormalizer(grn_ctx *ctx, THD *thread, Field *field) |
28 | : ctx_(ctx), |
29 | thread_(thread), |
30 | field_(field) { |
31 | } |
32 | |
33 | FieldNormalizer::~FieldNormalizer() { |
34 | } |
35 | |
36 | bool FieldNormalizer::should_normalize() { |
37 | MRN_DBUG_ENTER_METHOD(); |
38 | |
39 | DBUG_PRINT("info" , |
40 | ("mroonga: result_type = %u" , field_->result_type())); |
41 | DBUG_PRINT("info" , |
42 | ("mroonga: charset->name = %s" , field_->charset()->name)); |
43 | DBUG_PRINT("info" , |
44 | ("mroonga: charset->csname = %s" , field_->charset()->csname)); |
45 | DBUG_PRINT("info" , |
46 | ("mroonga: charset->state = %u" , field_->charset()->state)); |
47 | bool need_normalize_p; |
48 | if (field_->charset()->state & (MY_CS_BINSORT | MY_CS_CSSORT)) { |
49 | need_normalize_p = false; |
50 | DBUG_PRINT("info" , |
51 | ("mroonga: should_normalize: false: sort is required" )); |
52 | } else { |
53 | if (is_text_type()) { |
54 | need_normalize_p = true; |
55 | DBUG_PRINT("info" , ("mroonga: should_normalize: true: text type" )); |
56 | } else { |
57 | need_normalize_p = false; |
58 | DBUG_PRINT("info" , ("mroonga: should_normalize: false: no text type" )); |
59 | } |
60 | } |
61 | |
62 | DBUG_RETURN(need_normalize_p); |
63 | } |
64 | |
65 | bool FieldNormalizer::is_text_type() { |
66 | MRN_DBUG_ENTER_METHOD(); |
67 | bool text_type_p; |
68 | switch (field_->type()) { |
69 | case MYSQL_TYPE_VARCHAR: |
70 | case MYSQL_TYPE_BLOB: |
71 | case MYSQL_TYPE_VAR_STRING: |
72 | text_type_p = true; |
73 | break; |
74 | case MYSQL_TYPE_STRING: |
75 | switch (field_->real_type()) { |
76 | case MYSQL_TYPE_ENUM: |
77 | case MYSQL_TYPE_SET: |
78 | text_type_p = false; |
79 | break; |
80 | default: |
81 | text_type_p = true; |
82 | break; |
83 | } |
84 | break; |
85 | default: |
86 | text_type_p = false; |
87 | break; |
88 | } |
89 | DBUG_RETURN(text_type_p); |
90 | } |
91 | |
92 | grn_obj *FieldNormalizer::normalize(const char *string, |
93 | unsigned int string_length) { |
94 | MRN_DBUG_ENTER_METHOD(); |
95 | grn_obj *normalizer = find_grn_normalizer(); |
96 | int flags = 0; |
97 | grn_encoding original_encoding = GRN_CTX_GET_ENCODING(ctx_); |
98 | encoding::set_raw(ctx_, field_->charset()); |
99 | grn_obj *grn_string = grn_string_open(ctx_, string, string_length, |
100 | normalizer, flags); |
101 | GRN_CTX_SET_ENCODING(ctx_, original_encoding); |
102 | DBUG_RETURN(grn_string); |
103 | } |
104 | |
105 | grn_obj *FieldNormalizer::find_grn_normalizer() { |
106 | MRN_DBUG_ENTER_METHOD(); |
107 | |
108 | const CHARSET_INFO *charset_info = field_->charset(); |
109 | const char *normalizer_name = NULL; |
110 | const char *default_normalizer_name = "NormalizerAuto" ; |
111 | if ((strcmp(charset_info->name, "utf8_general_ci" ) == 0) || |
112 | (strcmp(charset_info->name, "utf8mb4_general_ci" ) == 0)) { |
113 | normalizer_name = "NormalizerMySQLGeneralCI" ; |
114 | } else if ((strcmp(charset_info->name, "utf8_unicode_ci" ) == 0) || |
115 | (strcmp(charset_info->name, "utf8mb4_unicode_ci" ) == 0)) { |
116 | normalizer_name = "NormalizerMySQLUnicodeCI" ; |
117 | } else if ((strcmp(charset_info->name, "utf8_unicode_520_ci" ) == 0) || |
118 | (strcmp(charset_info->name, "utf8mb4_unicode_520_ci" ) == 0)) { |
119 | normalizer_name = "NormalizerMySQLUnicode520CI" ; |
120 | } |
121 | |
122 | grn_obj *normalizer = NULL; |
123 | if (normalizer_name) { |
124 | normalizer = grn_ctx_get(ctx_, normalizer_name, -1); |
125 | if (!normalizer) { |
126 | char error_message[MRN_MESSAGE_BUFFER_SIZE]; |
127 | snprintf(error_message, MRN_MESSAGE_BUFFER_SIZE, |
128 | "%s normalizer isn't found for %s. " |
129 | "Install groonga-normalizer-mysql normalizer. " |
130 | "%s is used as fallback." , |
131 | normalizer_name, |
132 | charset_info->name, |
133 | default_normalizer_name); |
134 | push_warning(thread_, MRN_SEVERITY_WARNING, |
135 | HA_ERR_UNSUPPORTED, error_message); |
136 | } |
137 | } |
138 | |
139 | if (!normalizer) { |
140 | normalizer = grn_ctx_get(ctx_, default_normalizer_name, -1); |
141 | } |
142 | |
143 | DBUG_RETURN(normalizer); |
144 | } |
145 | } |
146 | |