| 1 | /* -*- c-basic-offset: 2 -*- */ |
| 2 | /* |
| 3 | Copyright(C) 2013 Kouhei Sutou <kou@clear-code.com> |
| 4 | |
| 5 | This library is free software; you can redistribute it and/or |
| 6 | modify it under the terms of the GNU Lesser General Public |
| 7 | License as published by the Free Software Foundation; either |
| 8 | version 2.1 of the License, or (at your option) any later version. |
| 9 | |
| 10 | This library is distributed in the hope that it will be useful, |
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | Lesser General Public License for more details. |
| 14 | |
| 15 | You should have received a copy of the GNU Lesser General Public |
| 16 | License along with this library; if not, write to the Free Software |
| 17 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 18 | */ |
| 19 | |
| 20 | #include "mrn_field_normalizer.hpp" |
| 21 | #include "mrn_encoding.hpp" |
| 22 | |
| 23 | // for debug |
| 24 | #define MRN_CLASS_NAME "mrn::FieldNormalizer" |
| 25 | |
| 26 | namespace mrn { |
| 27 | FieldNormalizer::FieldNormalizer(grn_ctx *ctx, THD *thread, Field *field) |
| 28 | : ctx_(ctx), |
| 29 | thread_(thread), |
| 30 | field_(field) { |
| 31 | } |
| 32 | |
| 33 | FieldNormalizer::~FieldNormalizer() { |
| 34 | } |
| 35 | |
| 36 | bool FieldNormalizer::should_normalize() { |
| 37 | MRN_DBUG_ENTER_METHOD(); |
| 38 | |
| 39 | DBUG_PRINT("info" , |
| 40 | ("mroonga: result_type = %u" , field_->result_type())); |
| 41 | DBUG_PRINT("info" , |
| 42 | ("mroonga: charset->name = %s" , field_->charset()->name)); |
| 43 | DBUG_PRINT("info" , |
| 44 | ("mroonga: charset->csname = %s" , field_->charset()->csname)); |
| 45 | DBUG_PRINT("info" , |
| 46 | ("mroonga: charset->state = %u" , field_->charset()->state)); |
| 47 | bool need_normalize_p; |
| 48 | if (field_->charset()->state & (MY_CS_BINSORT | MY_CS_CSSORT)) { |
| 49 | need_normalize_p = false; |
| 50 | DBUG_PRINT("info" , |
| 51 | ("mroonga: should_normalize: false: sort is required" )); |
| 52 | } else { |
| 53 | if (is_text_type()) { |
| 54 | need_normalize_p = true; |
| 55 | DBUG_PRINT("info" , ("mroonga: should_normalize: true: text type" )); |
| 56 | } else { |
| 57 | need_normalize_p = false; |
| 58 | DBUG_PRINT("info" , ("mroonga: should_normalize: false: no text type" )); |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | DBUG_RETURN(need_normalize_p); |
| 63 | } |
| 64 | |
| 65 | bool FieldNormalizer::is_text_type() { |
| 66 | MRN_DBUG_ENTER_METHOD(); |
| 67 | bool text_type_p; |
| 68 | switch (field_->type()) { |
| 69 | case MYSQL_TYPE_VARCHAR: |
| 70 | case MYSQL_TYPE_BLOB: |
| 71 | case MYSQL_TYPE_VAR_STRING: |
| 72 | text_type_p = true; |
| 73 | break; |
| 74 | case MYSQL_TYPE_STRING: |
| 75 | switch (field_->real_type()) { |
| 76 | case MYSQL_TYPE_ENUM: |
| 77 | case MYSQL_TYPE_SET: |
| 78 | text_type_p = false; |
| 79 | break; |
| 80 | default: |
| 81 | text_type_p = true; |
| 82 | break; |
| 83 | } |
| 84 | break; |
| 85 | default: |
| 86 | text_type_p = false; |
| 87 | break; |
| 88 | } |
| 89 | DBUG_RETURN(text_type_p); |
| 90 | } |
| 91 | |
| 92 | grn_obj *FieldNormalizer::normalize(const char *string, |
| 93 | unsigned int string_length) { |
| 94 | MRN_DBUG_ENTER_METHOD(); |
| 95 | grn_obj *normalizer = find_grn_normalizer(); |
| 96 | int flags = 0; |
| 97 | grn_encoding original_encoding = GRN_CTX_GET_ENCODING(ctx_); |
| 98 | encoding::set_raw(ctx_, field_->charset()); |
| 99 | grn_obj *grn_string = grn_string_open(ctx_, string, string_length, |
| 100 | normalizer, flags); |
| 101 | GRN_CTX_SET_ENCODING(ctx_, original_encoding); |
| 102 | DBUG_RETURN(grn_string); |
| 103 | } |
| 104 | |
| 105 | grn_obj *FieldNormalizer::find_grn_normalizer() { |
| 106 | MRN_DBUG_ENTER_METHOD(); |
| 107 | |
| 108 | const CHARSET_INFO *charset_info = field_->charset(); |
| 109 | const char *normalizer_name = NULL; |
| 110 | const char *default_normalizer_name = "NormalizerAuto" ; |
| 111 | if ((strcmp(charset_info->name, "utf8_general_ci" ) == 0) || |
| 112 | (strcmp(charset_info->name, "utf8mb4_general_ci" ) == 0)) { |
| 113 | normalizer_name = "NormalizerMySQLGeneralCI" ; |
| 114 | } else if ((strcmp(charset_info->name, "utf8_unicode_ci" ) == 0) || |
| 115 | (strcmp(charset_info->name, "utf8mb4_unicode_ci" ) == 0)) { |
| 116 | normalizer_name = "NormalizerMySQLUnicodeCI" ; |
| 117 | } else if ((strcmp(charset_info->name, "utf8_unicode_520_ci" ) == 0) || |
| 118 | (strcmp(charset_info->name, "utf8mb4_unicode_520_ci" ) == 0)) { |
| 119 | normalizer_name = "NormalizerMySQLUnicode520CI" ; |
| 120 | } |
| 121 | |
| 122 | grn_obj *normalizer = NULL; |
| 123 | if (normalizer_name) { |
| 124 | normalizer = grn_ctx_get(ctx_, normalizer_name, -1); |
| 125 | if (!normalizer) { |
| 126 | char error_message[MRN_MESSAGE_BUFFER_SIZE]; |
| 127 | snprintf(error_message, MRN_MESSAGE_BUFFER_SIZE, |
| 128 | "%s normalizer isn't found for %s. " |
| 129 | "Install groonga-normalizer-mysql normalizer. " |
| 130 | "%s is used as fallback." , |
| 131 | normalizer_name, |
| 132 | charset_info->name, |
| 133 | default_normalizer_name); |
| 134 | push_warning(thread_, MRN_SEVERITY_WARNING, |
| 135 | HA_ERR_UNSUPPORTED, error_message); |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | if (!normalizer) { |
| 140 | normalizer = grn_ctx_get(ctx_, default_normalizer_name, -1); |
| 141 | } |
| 142 | |
| 143 | DBUG_RETURN(normalizer); |
| 144 | } |
| 145 | } |
| 146 | |