1 | /* |
2 | * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. |
8 | * |
9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
12 | * version 2 for more details (a copy is included in the LICENSE file that |
13 | * accompanied this code). |
14 | * |
15 | * You should have received a copy of the GNU General Public License version |
16 | * 2 along with this work; if not, write to the Free Software Foundation, |
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | * |
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 | * or visit www.oracle.com if you need additional information or have any |
21 | * questions. |
22 | * |
23 | */ |
24 | #include "precompiled.hpp" |
25 | #include "utilities/ostream.hpp" |
26 | #include "logging/log.hpp" |
27 | #include "logging/logSelection.hpp" |
28 | #include "logging/logTagSet.hpp" |
29 | #include "runtime/os.inline.hpp" |
30 | #include "utilities/globalDefinitions.hpp" |
31 | #include "utilities/ostream.hpp" |
32 | #include "utilities/quickSort.hpp" |
33 | |
34 | const LogSelection LogSelection::Invalid; |
35 | |
36 | LogSelection::LogSelection() : _ntags(0), _wildcard(false), _level(LogLevel::Invalid), _tag_sets_selected(0) { |
37 | } |
38 | |
39 | LogSelection::LogSelection(const LogTagType tags[LogTag::MaxTags], bool wildcard, LogLevelType level) |
40 | : _ntags(0), _wildcard(wildcard), _level(level), _tag_sets_selected(0) { |
41 | while (_ntags < LogTag::MaxTags && tags[_ntags] != LogTag::__NO_TAG) { |
42 | _tags[_ntags] = tags[_ntags]; |
43 | _ntags++; |
44 | } |
45 | |
46 | for (LogTagSet* ts = LogTagSet::first(); ts != NULL; ts = ts->next()) { |
47 | if (selects(*ts)) { |
48 | _tag_sets_selected++; |
49 | } |
50 | } |
51 | } |
52 | |
53 | bool LogSelection::operator==(const LogSelection& ref) const { |
54 | if (_ntags != ref._ntags || |
55 | _wildcard != ref._wildcard || |
56 | _level != ref._level || |
57 | _tag_sets_selected != ref._tag_sets_selected) { |
58 | return false; |
59 | } |
60 | for (size_t i = 0; i < _ntags; i++) { |
61 | if (_tags[i] != ref._tags[i]) { |
62 | return false; |
63 | } |
64 | } |
65 | return true; |
66 | } |
67 | |
68 | bool LogSelection::operator!=(const LogSelection& ref) const { |
69 | return !operator==(ref); |
70 | } |
71 | |
72 | static LogSelection parse_internal(char *str, outputStream* errstream) { |
73 | // Parse the level, if specified |
74 | LogLevelType level = LogLevel::Unspecified; |
75 | char* equals = strchr(str, '='); |
76 | if (equals != NULL) { |
77 | const char* levelstr = equals + 1; |
78 | level = LogLevel::from_string(levelstr); |
79 | if (level == LogLevel::Invalid) { |
80 | if (errstream != NULL) { |
81 | errstream->print("Invalid level '%s' in log selection." , levelstr); |
82 | LogLevelType match = LogLevel::fuzzy_match(levelstr); |
83 | if (match != LogLevel::Invalid) { |
84 | errstream->print(" Did you mean '%s'?" , LogLevel::name(match)); |
85 | } |
86 | errstream->cr(); |
87 | } |
88 | return LogSelection::Invalid; |
89 | } |
90 | *equals = '\0'; |
91 | } |
92 | |
93 | size_t ntags = 0; |
94 | LogTagType tags[LogTag::MaxTags] = { LogTag::__NO_TAG }; |
95 | |
96 | // Parse special tags such as 'all' |
97 | if (strcmp(str, "all" ) == 0) { |
98 | return LogSelection(tags, true, level); |
99 | } |
100 | |
101 | // Check for '*' suffix |
102 | bool wildcard = false; |
103 | char* asterisk_pos = strchr(str, '*'); |
104 | if (asterisk_pos != NULL && asterisk_pos[1] == '\0') { |
105 | wildcard = true; |
106 | *asterisk_pos = '\0'; |
107 | } |
108 | |
109 | // Parse the tag expression (t1+t2+...+tn) |
110 | char* plus_pos; |
111 | char* cur_tag = str; |
112 | do { |
113 | plus_pos = strchr(cur_tag, '+'); |
114 | if (plus_pos != NULL) { |
115 | *plus_pos = '\0'; |
116 | } |
117 | LogTagType tag = LogTag::from_string(cur_tag); |
118 | if (tag == LogTag::__NO_TAG) { |
119 | if (errstream != NULL) { |
120 | errstream->print("Invalid tag '%s' in log selection." , cur_tag); |
121 | LogTagType match = LogTag::fuzzy_match(cur_tag); |
122 | if (match != LogTag::__NO_TAG) { |
123 | errstream->print(" Did you mean '%s'?" , LogTag::name(match)); |
124 | } |
125 | errstream->cr(); |
126 | } |
127 | return LogSelection::Invalid; |
128 | } |
129 | if (ntags == LogTag::MaxTags) { |
130 | if (errstream != NULL) { |
131 | errstream->print_cr("Too many tags in log selection '%s' (can only have up to " SIZE_FORMAT " tags)." , |
132 | str, LogTag::MaxTags); |
133 | } |
134 | return LogSelection::Invalid; |
135 | } |
136 | tags[ntags++] = tag; |
137 | cur_tag = plus_pos + 1; |
138 | } while (plus_pos != NULL); |
139 | |
140 | for (size_t i = 0; i < ntags; i++) { |
141 | for (size_t j = 0; j < ntags; j++) { |
142 | if (i != j && tags[i] == tags[j]) { |
143 | if (errstream != NULL) { |
144 | errstream->print_cr("Log selection contains duplicates of tag %s." , LogTag::name(tags[i])); |
145 | } |
146 | return LogSelection::Invalid; |
147 | } |
148 | } |
149 | } |
150 | |
151 | return LogSelection(tags, wildcard, level); |
152 | } |
153 | |
154 | LogSelection LogSelection::parse(const char* str, outputStream* error_stream) { |
155 | char* copy = os::strdup_check_oom(str, mtLogging); |
156 | LogSelection s = parse_internal(copy, error_stream); |
157 | os::free(copy); |
158 | return s; |
159 | } |
160 | |
161 | bool LogSelection::selects(const LogTagSet& ts) const { |
162 | if (!_wildcard && _ntags != ts.ntags()) { |
163 | return false; |
164 | } |
165 | for (size_t i = 0; i < _ntags; i++) { |
166 | if (!ts.contains(_tags[i])) { |
167 | return false; |
168 | } |
169 | } |
170 | return true; |
171 | } |
172 | |
173 | static bool contains(LogTagType tag, const LogTagType tags[LogTag::MaxTags], size_t ntags) { |
174 | for (size_t i = 0; i < ntags; i++) { |
175 | if (tags[i] == tag) { |
176 | return true; |
177 | } |
178 | } |
179 | return false; |
180 | } |
181 | |
182 | bool LogSelection::consists_of(const LogTagType tags[LogTag::MaxTags]) const { |
183 | size_t i; |
184 | for (i = 0; tags[i] != LogTag::__NO_TAG; i++) { |
185 | if (!contains(tags[i], _tags, _ntags)) { |
186 | return false; |
187 | } |
188 | } |
189 | return i == _ntags; |
190 | } |
191 | |
192 | size_t LogSelection::ntags() const { |
193 | return _ntags; |
194 | } |
195 | |
196 | LogLevelType LogSelection::level() const { |
197 | return _level; |
198 | } |
199 | |
200 | size_t LogSelection::tag_sets_selected() const { |
201 | return _tag_sets_selected; |
202 | } |
203 | |
204 | int LogSelection::describe_tags(char* buf, size_t bufsize) const { |
205 | int tot_written = 0; |
206 | for (size_t i = 0; i < _ntags; i++) { |
207 | int written = jio_snprintf(buf + tot_written, bufsize - tot_written, |
208 | "%s%s" , (i == 0 ? "" : "+" ), LogTag::name(_tags[i])); |
209 | if (written == -1) { |
210 | return written; |
211 | } |
212 | tot_written += written; |
213 | } |
214 | |
215 | if (_wildcard) { |
216 | int written = jio_snprintf(buf + tot_written, bufsize - tot_written, "*" ); |
217 | if (written == -1) { |
218 | return written; |
219 | } |
220 | tot_written += written; |
221 | } |
222 | return tot_written; |
223 | } |
224 | |
225 | int LogSelection::describe(char* buf, size_t bufsize) const { |
226 | int tot_written = describe_tags(buf, bufsize); |
227 | |
228 | int written = jio_snprintf(buf + tot_written, bufsize - tot_written, "=%s" , LogLevel::name(_level)); |
229 | if (written == -1) { |
230 | return -1; |
231 | } |
232 | tot_written += written; |
233 | return tot_written; |
234 | } |
235 | |
236 | double LogSelection::similarity(const LogSelection& other) const { |
237 | // Compute Soerensen-Dice coefficient as the similarity measure |
238 | size_t intersecting = 0; |
239 | for (size_t i = 0; i < _ntags; i++) { |
240 | for (size_t j = 0; j < other._ntags; j++) { |
241 | if (_tags[i] == other._tags[j]) { |
242 | intersecting++; |
243 | break; |
244 | } |
245 | } |
246 | } |
247 | return 2.0 * intersecting / (_ntags + other._ntags); |
248 | } |
249 | |
250 | // Comparator used for sorting LogSelections based on their similarity to a specific LogSelection. |
251 | // A negative return value means that 'a' is more similar to 'ref' than 'b' is, while a positive |
252 | // return value means that 'b' is more similar. |
253 | // For the sake of giving short and effective suggestions, when two selections have an equal |
254 | // similarity score, the selection with the fewer tags (selecting the most tag sets) is considered |
255 | // more similar. |
256 | class SimilarityComparator { |
257 | const LogSelection& _ref; |
258 | public: |
259 | SimilarityComparator(const LogSelection& ref) : _ref(ref) { |
260 | } |
261 | int operator()(const LogSelection& a, const LogSelection& b) const { |
262 | const double epsilon = 1.0e-6; |
263 | |
264 | // Sort by similarity (descending) |
265 | double s = _ref.similarity(b) - _ref.similarity(a); |
266 | if (fabs(s) > epsilon) { |
267 | return s < 0 ? -1 : 1; |
268 | } |
269 | |
270 | // Then by number of tags (ascending) |
271 | int t = static_cast<int>(a.ntags() - (int)b.ntags()); |
272 | if (t != 0) { |
273 | return t; |
274 | } |
275 | |
276 | // Lastly by tag sets selected (descending) |
277 | return static_cast<int>(b.tag_sets_selected() - a.tag_sets_selected()); |
278 | } |
279 | }; |
280 | |
281 | static const size_t suggestion_cap = 5; |
282 | static const double similarity_requirement = 0.3; |
283 | void LogSelection::suggest_similar_matching(outputStream* out) const { |
284 | LogSelection suggestions[suggestion_cap]; |
285 | uint nsuggestions = 0; |
286 | |
287 | // See if simply adding a wildcard would make the selection match |
288 | if (!_wildcard) { |
289 | LogSelection sel(_tags, true, _level); |
290 | if (sel.tag_sets_selected() > 0) { |
291 | suggestions[nsuggestions++] = sel; |
292 | } |
293 | } |
294 | |
295 | // Check for matching tag sets with a single tag mismatching (a tag too many or short a tag) |
296 | for (LogTagSet* ts = LogTagSet::first(); ts != NULL; ts = ts->next()) { |
297 | LogTagType tags[LogTag::MaxTags] = { LogTag::__NO_TAG }; |
298 | for (size_t i = 0; i < ts->ntags(); i++) { |
299 | tags[i] = ts->tag(i); |
300 | } |
301 | |
302 | // Suggest wildcard selection unless the wildcard doesn't match anything extra |
303 | LogSelection sel(tags, true, _level); |
304 | if (sel.tag_sets_selected() == 1) { |
305 | sel = LogSelection(tags, false, _level); |
306 | } |
307 | |
308 | double score = similarity(sel); |
309 | |
310 | // Ignore suggestions with too low similarity |
311 | if (score < similarity_requirement) { |
312 | continue; |
313 | } |
314 | |
315 | // Cap not reached, simply add the new suggestion and continue searching |
316 | if (nsuggestions < suggestion_cap) { |
317 | suggestions[nsuggestions++] = sel; |
318 | continue; |
319 | } |
320 | |
321 | // Find the least matching suggestion already found, and if the new suggestion is a better match, replace it |
322 | double min = 1.0; |
323 | size_t pos = -1; |
324 | for (size_t i = 0; i < nsuggestions; i++) { |
325 | double score = similarity(suggestions[i]); |
326 | if (score < min) { |
327 | min = score; |
328 | pos = i; |
329 | } |
330 | } |
331 | if (score > min) { |
332 | suggestions[pos] = sel; |
333 | } |
334 | } |
335 | |
336 | if (nsuggestions == 0) { |
337 | // Found no similar enough selections to suggest. |
338 | return; |
339 | } |
340 | |
341 | // Sort found suggestions to suggest the best one first |
342 | SimilarityComparator sc(*this); |
343 | QuickSort::sort(suggestions, nsuggestions, sc, false); |
344 | |
345 | out->print("Did you mean any of the following?" ); |
346 | for (size_t i = 0; i < nsuggestions; i++) { |
347 | char buf[128]; |
348 | suggestions[i].describe_tags(buf, sizeof(buf)); |
349 | out->print(" %s" , buf); |
350 | } |
351 | } |
352 | |