| 1 | /* |
| 2 | * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. |
| 8 | * |
| 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 12 | * version 2 for more details (a copy is included in the LICENSE file that |
| 13 | * accompanied this code). |
| 14 | * |
| 15 | * You should have received a copy of the GNU General Public License version |
| 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 18 | * |
| 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| 20 | * or visit www.oracle.com if you need additional information or have any |
| 21 | * questions. |
| 22 | * |
| 23 | */ |
| 24 | |
| 25 | #ifndef SHARE_OPTO_CALLGENERATOR_HPP |
| 26 | #define SHARE_OPTO_CALLGENERATOR_HPP |
| 27 | |
| 28 | #include "compiler/compileBroker.hpp" |
| 29 | #include "opto/callnode.hpp" |
| 30 | #include "opto/compile.hpp" |
| 31 | #include "opto/type.hpp" |
| 32 | #include "runtime/deoptimization.hpp" |
| 33 | |
| 34 | //---------------------------CallGenerator------------------------------------- |
| 35 | // The subclasses of this class handle generation of ideal nodes for |
| 36 | // call sites and method entry points. |
| 37 | |
| 38 | class CallGenerator : public ResourceObj { |
| 39 | public: |
| 40 | enum { |
| 41 | xxxunusedxxx |
| 42 | }; |
| 43 | |
| 44 | private: |
| 45 | ciMethod* _method; // The method being called. |
| 46 | |
| 47 | protected: |
| 48 | CallGenerator(ciMethod* method) : _method(method) {} |
| 49 | |
| 50 | public: |
| 51 | // Accessors |
| 52 | ciMethod* method() const { return _method; } |
| 53 | |
| 54 | // is_inline: At least some code implementing the method is copied here. |
| 55 | virtual bool is_inline() const { return false; } |
| 56 | // is_intrinsic: There's a method-specific way of generating the inline code. |
| 57 | virtual bool is_intrinsic() const { return false; } |
| 58 | // is_parse: Bytecodes implementing the specific method are copied here. |
| 59 | virtual bool is_parse() const { return false; } |
| 60 | // is_virtual: The call uses the receiver type to select or check the method. |
| 61 | virtual bool is_virtual() const { return false; } |
| 62 | // is_deferred: The decision whether to inline or not is deferred. |
| 63 | virtual bool is_deferred() const { return false; } |
| 64 | // is_predicated: Uses an explicit check (predicate). |
| 65 | virtual bool is_predicated() const { return false; } |
| 66 | virtual int predicates_count() const { return 0; } |
| 67 | // is_trap: Does not return to the caller. (E.g., uncommon trap.) |
| 68 | virtual bool is_trap() const { return false; } |
| 69 | // does_virtual_dispatch: Should try inlining as normal method first. |
| 70 | virtual bool does_virtual_dispatch() const { return false; } |
| 71 | |
| 72 | // is_late_inline: supports conversion of call into an inline |
| 73 | virtual bool is_late_inline() const { return false; } |
| 74 | // same but for method handle calls |
| 75 | virtual bool is_mh_late_inline() const { return false; } |
| 76 | virtual bool is_string_late_inline() const{ return false; } |
| 77 | |
| 78 | // for method handle calls: have we tried inlinining the call already? |
| 79 | virtual bool already_attempted() const { ShouldNotReachHere(); return false; } |
| 80 | |
| 81 | // Replace the call with an inline version of the code |
| 82 | virtual void do_late_inline() { ShouldNotReachHere(); } |
| 83 | |
| 84 | virtual CallStaticJavaNode* call_node() const { ShouldNotReachHere(); return NULL; } |
| 85 | |
| 86 | virtual void set_unique_id(jlong id) { fatal("unique id only for late inlines" ); }; |
| 87 | virtual jlong unique_id() const { fatal("unique id only for late inlines" ); return 0; }; |
| 88 | |
| 89 | // Note: It is possible for a CG to be both inline and virtual. |
| 90 | // (The hashCode intrinsic does a vtable check and an inlined fast path.) |
| 91 | |
| 92 | // Utilities: |
| 93 | const TypeFunc* tf() const; |
| 94 | |
| 95 | // The given jvms has state and arguments for a call to my method. |
| 96 | // Edges after jvms->argoff() carry all (pre-popped) argument values. |
| 97 | // |
| 98 | // Update the map with state and return values (if any) and return it. |
| 99 | // The return values (0, 1, or 2) must be pushed on the map's stack, |
| 100 | // and the sp of the jvms incremented accordingly. |
| 101 | // |
| 102 | // The jvms is returned on success. Alternatively, a copy of the |
| 103 | // given jvms, suitably updated, may be returned, in which case the |
| 104 | // caller should discard the original jvms. |
| 105 | // |
| 106 | // The non-Parm edges of the returned map will contain updated global state, |
| 107 | // and one or two edges before jvms->sp() will carry any return values. |
| 108 | // Other map edges may contain locals or monitors, and should not |
| 109 | // be changed in meaning. |
| 110 | // |
| 111 | // If the call traps, the returned map must have a control edge of top. |
| 112 | // If the call can throw, the returned map must report has_exceptions(). |
| 113 | // |
| 114 | // If the result is NULL, it means that this CallGenerator was unable |
| 115 | // to handle the given call, and another CallGenerator should be consulted. |
| 116 | virtual JVMState* generate(JVMState* jvms) = 0; |
| 117 | |
| 118 | // How to generate a call site that is inlined: |
| 119 | static CallGenerator* for_inline(ciMethod* m, float expected_uses = -1); |
| 120 | // How to generate code for an on-stack replacement handler. |
| 121 | static CallGenerator* for_osr(ciMethod* m, int osr_bci); |
| 122 | |
| 123 | // How to generate vanilla out-of-line call sites: |
| 124 | static CallGenerator* for_direct_call(ciMethod* m, bool separate_io_projs = false); // static, special |
| 125 | static CallGenerator* for_virtual_call(ciMethod* m, int vtable_index); // virtual, interface |
| 126 | |
| 127 | static CallGenerator* for_method_handle_call( JVMState* jvms, ciMethod* caller, ciMethod* callee, bool delayed_forbidden); |
| 128 | static CallGenerator* for_method_handle_inline(JVMState* jvms, ciMethod* caller, ciMethod* callee, bool& input_not_const); |
| 129 | |
| 130 | // How to generate a replace a direct call with an inline version |
| 131 | static CallGenerator* for_late_inline(ciMethod* m, CallGenerator* inline_cg); |
| 132 | static CallGenerator* for_mh_late_inline(ciMethod* caller, ciMethod* callee, bool input_not_const); |
| 133 | static CallGenerator* for_string_late_inline(ciMethod* m, CallGenerator* inline_cg); |
| 134 | static CallGenerator* for_boxing_late_inline(ciMethod* m, CallGenerator* inline_cg); |
| 135 | |
| 136 | // How to make a call but defer the decision whether to inline or not. |
| 137 | static CallGenerator* for_warm_call(WarmCallInfo* ci, |
| 138 | CallGenerator* if_cold, |
| 139 | CallGenerator* if_hot); |
| 140 | |
| 141 | // How to make a call that optimistically assumes a receiver type: |
| 142 | static CallGenerator* for_predicted_call(ciKlass* predicted_receiver, |
| 143 | CallGenerator* if_missed, |
| 144 | CallGenerator* if_hit, |
| 145 | float hit_prob); |
| 146 | |
| 147 | static CallGenerator* for_guarded_call(ciKlass* predicted_receiver, |
| 148 | CallGenerator* if_missed, |
| 149 | CallGenerator* if_hit); |
| 150 | |
| 151 | // How to make a call that optimistically assumes a MethodHandle target: |
| 152 | static CallGenerator* for_predicted_dynamic_call(ciMethodHandle* predicted_method_handle, |
| 153 | CallGenerator* if_missed, |
| 154 | CallGenerator* if_hit, |
| 155 | float hit_prob); |
| 156 | |
| 157 | // How to make a call that gives up and goes back to the interpreter: |
| 158 | static CallGenerator* for_uncommon_trap(ciMethod* m, |
| 159 | Deoptimization::DeoptReason reason, |
| 160 | Deoptimization::DeoptAction action); |
| 161 | |
| 162 | // Registry for intrinsics: |
| 163 | static CallGenerator* for_intrinsic(ciMethod* m); |
| 164 | static void register_intrinsic(ciMethod* m, CallGenerator* cg); |
| 165 | static CallGenerator* for_predicated_intrinsic(CallGenerator* intrinsic, |
| 166 | CallGenerator* cg); |
| 167 | virtual Node* generate_predicate(JVMState* jvms, int predicate) { return NULL; }; |
| 168 | |
| 169 | virtual void print_inlining_late(const char* msg) { ShouldNotReachHere(); } |
| 170 | |
| 171 | static void print_inlining(Compile* C, ciMethod* callee, int inline_level, int bci, const char* msg) { |
| 172 | if (C->print_inlining()) { |
| 173 | C->print_inlining(callee, inline_level, bci, msg); |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | static void print_inlining_failure(Compile* C, ciMethod* callee, int inline_level, int bci, const char* msg) { |
| 178 | print_inlining(C, callee, inline_level, bci, msg); |
| 179 | C->log_inline_failure(msg); |
| 180 | } |
| 181 | |
| 182 | static bool is_inlined_method_handle_intrinsic(JVMState* jvms, ciMethod* m); |
| 183 | static bool is_inlined_method_handle_intrinsic(ciMethod* caller, int bci, ciMethod* m); |
| 184 | static bool is_inlined_method_handle_intrinsic(ciMethod* symbolic_info, ciMethod* m); |
| 185 | }; |
| 186 | |
| 187 | |
| 188 | //------------------------InlineCallGenerator---------------------------------- |
| 189 | class InlineCallGenerator : public CallGenerator { |
| 190 | protected: |
| 191 | InlineCallGenerator(ciMethod* method) : CallGenerator(method) {} |
| 192 | |
| 193 | public: |
| 194 | virtual bool is_inline() const { return true; } |
| 195 | }; |
| 196 | |
| 197 | |
| 198 | //---------------------------WarmCallInfo-------------------------------------- |
| 199 | // A struct to collect information about a given call site. |
| 200 | // Helps sort call sites into "hot", "medium", and "cold". |
| 201 | // Participates in the queueing of "medium" call sites for possible inlining. |
| 202 | class WarmCallInfo : public ResourceObj { |
| 203 | private: |
| 204 | |
| 205 | CallNode* _call; // The CallNode which may be inlined. |
| 206 | CallGenerator* _hot_cg;// CG for expanding the call node |
| 207 | |
| 208 | // These are the metrics we use to evaluate call sites: |
| 209 | |
| 210 | float _count; // How often do we expect to reach this site? |
| 211 | float _profit; // How much time do we expect to save by inlining? |
| 212 | float _work; // How long do we expect the average call to take? |
| 213 | float _size; // How big do we expect the inlined code to be? |
| 214 | |
| 215 | float _heat; // Combined score inducing total order on call sites. |
| 216 | WarmCallInfo* _next; // Next cooler call info in pending queue. |
| 217 | |
| 218 | // Count is the number of times this call site is expected to be executed. |
| 219 | // Large count is favorable for inlining, because the extra compilation |
| 220 | // work will be amortized more completely. |
| 221 | |
| 222 | // Profit is a rough measure of the amount of time we expect to save |
| 223 | // per execution of this site if we inline it. (1.0 == call overhead) |
| 224 | // Large profit favors inlining. Negative profit disables inlining. |
| 225 | |
| 226 | // Work is a rough measure of the amount of time a typical out-of-line |
| 227 | // call from this site is expected to take. (1.0 == call, no-op, return) |
| 228 | // Small work is somewhat favorable for inlining, since methods with |
| 229 | // short "hot" traces are more likely to inline smoothly. |
| 230 | |
| 231 | // Size is the number of graph nodes we expect this method to produce, |
| 232 | // not counting the inlining of any further warm calls it may include. |
| 233 | // Small size favors inlining, since small methods are more likely to |
| 234 | // inline smoothly. The size is estimated by examining the native code |
| 235 | // if available. The method bytecodes are also examined, assuming |
| 236 | // empirically observed node counts for each kind of bytecode. |
| 237 | |
| 238 | // Heat is the combined "goodness" of a site's inlining. If we were |
| 239 | // omniscient, it would be the difference of two sums of future execution |
| 240 | // times of code emitted for this site (amortized across multiple sites if |
| 241 | // sharing applies). The two sums are for versions of this call site with |
| 242 | // and without inlining. |
| 243 | |
| 244 | // We approximate this mythical quantity by playing with averages, |
| 245 | // rough estimates, and assumptions that history repeats itself. |
| 246 | // The basic formula count * profit is heuristically adjusted |
| 247 | // by looking at the expected compilation and execution times of |
| 248 | // of the inlined call. |
| 249 | |
| 250 | // Note: Some of these metrics may not be present in the final product, |
| 251 | // but exist in development builds to experiment with inline policy tuning. |
| 252 | |
| 253 | // This heuristic framework does not model well the very significant |
| 254 | // effects of multiple-level inlining. It is possible to see no immediate |
| 255 | // profit from inlining X->Y, but to get great profit from a subsequent |
| 256 | // inlining X->Y->Z. |
| 257 | |
| 258 | // This framework does not take well into account the problem of N**2 code |
| 259 | // size in a clique of mutually inlinable methods. |
| 260 | |
| 261 | WarmCallInfo* next() const { return _next; } |
| 262 | void set_next(WarmCallInfo* n) { _next = n; } |
| 263 | |
| 264 | static WarmCallInfo _always_hot; |
| 265 | static WarmCallInfo _always_cold; |
| 266 | |
| 267 | // Constructor intitialization of always_hot and always_cold |
| 268 | WarmCallInfo(float c, float p, float w, float s) { |
| 269 | _call = NULL; |
| 270 | _hot_cg = NULL; |
| 271 | _next = NULL; |
| 272 | _count = c; |
| 273 | _profit = p; |
| 274 | _work = w; |
| 275 | _size = s; |
| 276 | _heat = 0; |
| 277 | } |
| 278 | |
| 279 | public: |
| 280 | // Because WarmInfo objects live over the entire lifetime of the |
| 281 | // Compile object, they are allocated into the comp_arena, which |
| 282 | // does not get resource marked or reset during the compile process |
| 283 | void *operator new( size_t x, Compile* C ) throw() { return C->comp_arena()->Amalloc(x); } |
| 284 | void operator delete( void * ) { } // fast deallocation |
| 285 | |
| 286 | static WarmCallInfo* always_hot(); |
| 287 | static WarmCallInfo* always_cold(); |
| 288 | |
| 289 | WarmCallInfo() { |
| 290 | _call = NULL; |
| 291 | _hot_cg = NULL; |
| 292 | _next = NULL; |
| 293 | _count = _profit = _work = _size = _heat = 0; |
| 294 | } |
| 295 | |
| 296 | CallNode* call() const { return _call; } |
| 297 | float count() const { return _count; } |
| 298 | float size() const { return _size; } |
| 299 | float work() const { return _work; } |
| 300 | float profit() const { return _profit; } |
| 301 | float heat() const { return _heat; } |
| 302 | |
| 303 | void set_count(float x) { _count = x; } |
| 304 | void set_size(float x) { _size = x; } |
| 305 | void set_work(float x) { _work = x; } |
| 306 | void set_profit(float x) { _profit = x; } |
| 307 | void set_heat(float x) { _heat = x; } |
| 308 | |
| 309 | // Load initial heuristics from profiles, etc. |
| 310 | // The heuristics can be tweaked further by the caller. |
| 311 | void init(JVMState* call_site, ciMethod* call_method, ciCallProfile& profile, float prof_factor); |
| 312 | |
| 313 | static float MAX_VALUE() { return +1.0e10; } |
| 314 | static float MIN_VALUE() { return -1.0e10; } |
| 315 | |
| 316 | float compute_heat() const; |
| 317 | |
| 318 | void set_call(CallNode* call) { _call = call; } |
| 319 | void set_hot_cg(CallGenerator* cg) { _hot_cg = cg; } |
| 320 | |
| 321 | // Do not queue very hot or very cold calls. |
| 322 | // Make very cold ones out of line immediately. |
| 323 | // Inline very hot ones immediately. |
| 324 | // These queries apply various tunable limits |
| 325 | // to the above metrics in a systematic way. |
| 326 | // Test for coldness before testing for hotness. |
| 327 | bool is_cold() const; |
| 328 | bool is_hot() const; |
| 329 | |
| 330 | // Force a warm call to be hot. This worklists the call node for inlining. |
| 331 | void make_hot(); |
| 332 | |
| 333 | // Force a warm call to be cold. This worklists the call node for out-of-lining. |
| 334 | void make_cold(); |
| 335 | |
| 336 | // A reproducible total ordering, in which heat is the major key. |
| 337 | bool warmer_than(WarmCallInfo* that); |
| 338 | |
| 339 | // List management. These methods are called with the list head, |
| 340 | // and return the new list head, inserting or removing the receiver. |
| 341 | WarmCallInfo* insert_into(WarmCallInfo* head); |
| 342 | WarmCallInfo* remove_from(WarmCallInfo* head); |
| 343 | |
| 344 | #ifndef PRODUCT |
| 345 | void print() const; |
| 346 | void print_all() const; |
| 347 | int count_all() const; |
| 348 | #endif |
| 349 | }; |
| 350 | |
| 351 | #endif // SHARE_OPTO_CALLGENERATOR_HPP |
| 352 | |