1/* -*- c-basic-offset: 2 -*- */
2/*
3 Copyright(C) 2013 Kouhei Sutou <kou@clear-code.com>
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with this library; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18*/
19
20#include "mrn_field_normalizer.hpp"
21#include "mrn_encoding.hpp"
22
23// for debug
24#define MRN_CLASS_NAME "mrn::FieldNormalizer"
25
26namespace mrn {
27 FieldNormalizer::FieldNormalizer(grn_ctx *ctx, THD *thread, Field *field)
28 : ctx_(ctx),
29 thread_(thread),
30 field_(field) {
31 }
32
33 FieldNormalizer::~FieldNormalizer() {
34 }
35
36 bool FieldNormalizer::should_normalize() {
37 MRN_DBUG_ENTER_METHOD();
38
39 DBUG_PRINT("info",
40 ("mroonga: result_type = %u", field_->result_type()));
41 DBUG_PRINT("info",
42 ("mroonga: charset->name = %s", field_->charset()->name));
43 DBUG_PRINT("info",
44 ("mroonga: charset->csname = %s", field_->charset()->csname));
45 DBUG_PRINT("info",
46 ("mroonga: charset->state = %u", field_->charset()->state));
47 bool need_normalize_p;
48 if (field_->charset()->state & (MY_CS_BINSORT | MY_CS_CSSORT)) {
49 need_normalize_p = false;
50 DBUG_PRINT("info",
51 ("mroonga: should_normalize: false: sort is required"));
52 } else {
53 if (is_text_type()) {
54 need_normalize_p = true;
55 DBUG_PRINT("info", ("mroonga: should_normalize: true: text type"));
56 } else {
57 need_normalize_p = false;
58 DBUG_PRINT("info", ("mroonga: should_normalize: false: no text type"));
59 }
60 }
61
62 DBUG_RETURN(need_normalize_p);
63 }
64
65 bool FieldNormalizer::is_text_type() {
66 MRN_DBUG_ENTER_METHOD();
67 bool text_type_p;
68 switch (field_->type()) {
69 case MYSQL_TYPE_VARCHAR:
70 case MYSQL_TYPE_BLOB:
71 case MYSQL_TYPE_VAR_STRING:
72 text_type_p = true;
73 break;
74 case MYSQL_TYPE_STRING:
75 switch (field_->real_type()) {
76 case MYSQL_TYPE_ENUM:
77 case MYSQL_TYPE_SET:
78 text_type_p = false;
79 break;
80 default:
81 text_type_p = true;
82 break;
83 }
84 break;
85 default:
86 text_type_p = false;
87 break;
88 }
89 DBUG_RETURN(text_type_p);
90 }
91
92 grn_obj *FieldNormalizer::normalize(const char *string,
93 unsigned int string_length) {
94 MRN_DBUG_ENTER_METHOD();
95 grn_obj *normalizer = find_grn_normalizer();
96 int flags = 0;
97 grn_encoding original_encoding = GRN_CTX_GET_ENCODING(ctx_);
98 encoding::set_raw(ctx_, field_->charset());
99 grn_obj *grn_string = grn_string_open(ctx_, string, string_length,
100 normalizer, flags);
101 GRN_CTX_SET_ENCODING(ctx_, original_encoding);
102 DBUG_RETURN(grn_string);
103 }
104
105 grn_obj *FieldNormalizer::find_grn_normalizer() {
106 MRN_DBUG_ENTER_METHOD();
107
108 const CHARSET_INFO *charset_info = field_->charset();
109 const char *normalizer_name = NULL;
110 const char *default_normalizer_name = "NormalizerAuto";
111 if ((strcmp(charset_info->name, "utf8_general_ci") == 0) ||
112 (strcmp(charset_info->name, "utf8mb4_general_ci") == 0)) {
113 normalizer_name = "NormalizerMySQLGeneralCI";
114 } else if ((strcmp(charset_info->name, "utf8_unicode_ci") == 0) ||
115 (strcmp(charset_info->name, "utf8mb4_unicode_ci") == 0)) {
116 normalizer_name = "NormalizerMySQLUnicodeCI";
117 } else if ((strcmp(charset_info->name, "utf8_unicode_520_ci") == 0) ||
118 (strcmp(charset_info->name, "utf8mb4_unicode_520_ci") == 0)) {
119 normalizer_name = "NormalizerMySQLUnicode520CI";
120 }
121
122 grn_obj *normalizer = NULL;
123 if (normalizer_name) {
124 normalizer = grn_ctx_get(ctx_, normalizer_name, -1);
125 if (!normalizer) {
126 char error_message[MRN_MESSAGE_BUFFER_SIZE];
127 snprintf(error_message, MRN_MESSAGE_BUFFER_SIZE,
128 "%s normalizer isn't found for %s. "
129 "Install groonga-normalizer-mysql normalizer. "
130 "%s is used as fallback.",
131 normalizer_name,
132 charset_info->name,
133 default_normalizer_name);
134 push_warning(thread_, MRN_SEVERITY_WARNING,
135 HA_ERR_UNSUPPORTED, error_message);
136 }
137 }
138
139 if (!normalizer) {
140 normalizer = grn_ctx_get(ctx_, default_normalizer_name, -1);
141 }
142
143 DBUG_RETURN(normalizer);
144 }
145}
146