1 | /* |
2 | * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. |
8 | * |
9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
12 | * version 2 for more details (a copy is included in the LICENSE file that |
13 | * accompanied this code). |
14 | * |
15 | * You should have received a copy of the GNU General Public License version |
16 | * 2 along with this work; if not, write to the Free Software Foundation, |
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | * |
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 | * or visit www.oracle.com if you need additional information or have any |
21 | * questions. |
22 | * |
23 | */ |
24 | |
25 | #include "precompiled.hpp" |
26 | #include "asm/macroAssembler.hpp" |
27 | #include "compiler/disassembler.hpp" |
28 | #include "interpreter/interp_masm.hpp" |
29 | #include "interpreter/interpreter.hpp" |
30 | #include "interpreter/interpreterRuntime.hpp" |
31 | #include "interpreter/templateInterpreterGenerator.hpp" |
32 | #include "runtime/arguments.hpp" |
33 | #include "runtime/sharedRuntime.hpp" |
34 | |
35 | #define __ Disassembler::hook<InterpreterMacroAssembler>(__FILE__, __LINE__, _masm)-> |
36 | |
37 | #ifdef _WIN64 |
38 | address TemplateInterpreterGenerator::generate_slow_signature_handler() { |
39 | address entry = __ pc(); |
40 | |
41 | // rbx: method |
42 | // r14: pointer to locals |
43 | // c_rarg3: first stack arg - wordSize |
44 | __ mov(c_rarg3, rsp); |
45 | // adjust rsp |
46 | __ subptr(rsp, 4 * wordSize); |
47 | __ call_VM(noreg, |
48 | CAST_FROM_FN_PTR(address, |
49 | InterpreterRuntime::slow_signature_handler), |
50 | rbx, r14, c_rarg3); |
51 | |
52 | // rax: result handler |
53 | |
54 | // Stack layout: |
55 | // rsp: 3 integer or float args (if static first is unused) |
56 | // 1 float/double identifiers |
57 | // return address |
58 | // stack args |
59 | // garbage |
60 | // expression stack bottom |
61 | // bcp (NULL) |
62 | // ... |
63 | |
64 | // Do FP first so we can use c_rarg3 as temp |
65 | __ movl(c_rarg3, Address(rsp, 3 * wordSize)); // float/double identifiers |
66 | |
67 | for ( int i= 0; i < Argument::n_int_register_parameters_c-1; i++ ) { |
68 | XMMRegister floatreg = as_XMMRegister(i+1); |
69 | Label isfloatordouble, isdouble, next; |
70 | |
71 | __ testl(c_rarg3, 1 << (i*2)); // Float or Double? |
72 | __ jcc(Assembler::notZero, isfloatordouble); |
73 | |
74 | // Do Int register here |
75 | switch ( i ) { |
76 | case 0: |
77 | __ movl(rscratch1, Address(rbx, Method::access_flags_offset())); |
78 | __ testl(rscratch1, JVM_ACC_STATIC); |
79 | __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0)); |
80 | break; |
81 | case 1: |
82 | __ movptr(c_rarg2, Address(rsp, wordSize)); |
83 | break; |
84 | case 2: |
85 | __ movptr(c_rarg3, Address(rsp, 2 * wordSize)); |
86 | break; |
87 | default: |
88 | break; |
89 | } |
90 | |
91 | __ jmp (next); |
92 | |
93 | __ bind(isfloatordouble); |
94 | __ testl(c_rarg3, 1 << ((i*2)+1)); // Double? |
95 | __ jcc(Assembler::notZero, isdouble); |
96 | |
97 | // Do Float Here |
98 | __ movflt(floatreg, Address(rsp, i * wordSize)); |
99 | __ jmp(next); |
100 | |
101 | // Do Double here |
102 | __ bind(isdouble); |
103 | __ movdbl(floatreg, Address(rsp, i * wordSize)); |
104 | |
105 | __ bind(next); |
106 | } |
107 | |
108 | |
109 | // restore rsp |
110 | __ addptr(rsp, 4 * wordSize); |
111 | |
112 | __ ret(0); |
113 | |
114 | return entry; |
115 | } |
116 | #else |
117 | address TemplateInterpreterGenerator::generate_slow_signature_handler() { |
118 | address entry = __ pc(); |
119 | |
120 | // rbx: method |
121 | // r14: pointer to locals |
122 | // c_rarg3: first stack arg - wordSize |
123 | __ mov(c_rarg3, rsp); |
124 | // adjust rsp |
125 | __ subptr(rsp, 14 * wordSize); |
126 | __ call_VM(noreg, |
127 | CAST_FROM_FN_PTR(address, |
128 | InterpreterRuntime::slow_signature_handler), |
129 | rbx, r14, c_rarg3); |
130 | |
131 | // rax: result handler |
132 | |
133 | // Stack layout: |
134 | // rsp: 5 integer args (if static first is unused) |
135 | // 1 float/double identifiers |
136 | // 8 double args |
137 | // return address |
138 | // stack args |
139 | // garbage |
140 | // expression stack bottom |
141 | // bcp (NULL) |
142 | // ... |
143 | |
144 | // Do FP first so we can use c_rarg3 as temp |
145 | __ movl(c_rarg3, Address(rsp, 5 * wordSize)); // float/double identifiers |
146 | |
147 | for (int i = 0; i < Argument::n_float_register_parameters_c; i++) { |
148 | const XMMRegister r = as_XMMRegister(i); |
149 | |
150 | Label d, done; |
151 | |
152 | __ testl(c_rarg3, 1 << i); |
153 | __ jcc(Assembler::notZero, d); |
154 | __ movflt(r, Address(rsp, (6 + i) * wordSize)); |
155 | __ jmp(done); |
156 | __ bind(d); |
157 | __ movdbl(r, Address(rsp, (6 + i) * wordSize)); |
158 | __ bind(done); |
159 | } |
160 | |
161 | // Now handle integrals. Only do c_rarg1 if not static. |
162 | __ movl(c_rarg3, Address(rbx, Method::access_flags_offset())); |
163 | __ testl(c_rarg3, JVM_ACC_STATIC); |
164 | __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0)); |
165 | |
166 | __ movptr(c_rarg2, Address(rsp, wordSize)); |
167 | __ movptr(c_rarg3, Address(rsp, 2 * wordSize)); |
168 | __ movptr(c_rarg4, Address(rsp, 3 * wordSize)); |
169 | __ movptr(c_rarg5, Address(rsp, 4 * wordSize)); |
170 | |
171 | // restore rsp |
172 | __ addptr(rsp, 14 * wordSize); |
173 | |
174 | __ ret(0); |
175 | |
176 | return entry; |
177 | } |
178 | #endif // __WIN64 |
179 | |
180 | /** |
181 | * Method entry for static native methods: |
182 | * int java.util.zip.CRC32.update(int crc, int b) |
183 | */ |
184 | address TemplateInterpreterGenerator::generate_CRC32_update_entry() { |
185 | if (UseCRC32Intrinsics) { |
186 | address entry = __ pc(); |
187 | |
188 | // rbx,: Method* |
189 | // r13: senderSP must preserved for slow path, set SP to it on fast path |
190 | // c_rarg0: scratch (rdi on non-Win64, rcx on Win64) |
191 | // c_rarg1: scratch (rsi on non-Win64, rdx on Win64) |
192 | |
193 | Label slow_path; |
194 | __ safepoint_poll(slow_path, r15_thread, rscratch1); |
195 | |
196 | // We don't generate local frame and don't align stack because |
197 | // we call stub code and there is no safepoint on this path. |
198 | |
199 | // Load parameters |
200 | const Register crc = rax; // crc |
201 | const Register val = c_rarg0; // source java byte value |
202 | const Register tbl = c_rarg1; // scratch |
203 | |
204 | // Arguments are reversed on java expression stack |
205 | __ movl(val, Address(rsp, wordSize)); // byte value |
206 | __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC |
207 | |
208 | __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr())); |
209 | __ notl(crc); // ~crc |
210 | __ update_byte_crc32(crc, val, tbl); |
211 | __ notl(crc); // ~crc |
212 | // result in rax |
213 | |
214 | // _areturn |
215 | __ pop(rdi); // get return address |
216 | __ mov(rsp, r13); // set sp to sender sp |
217 | __ jmp(rdi); |
218 | |
219 | // generate a vanilla native entry as the slow path |
220 | __ bind(slow_path); |
221 | __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); |
222 | return entry; |
223 | } |
224 | return NULL; |
225 | } |
226 | |
227 | /** |
228 | * Method entry for static native methods: |
229 | * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) |
230 | * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) |
231 | */ |
232 | address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { |
233 | if (UseCRC32Intrinsics) { |
234 | address entry = __ pc(); |
235 | |
236 | // rbx,: Method* |
237 | // r13: senderSP must preserved for slow path, set SP to it on fast path |
238 | |
239 | Label slow_path; |
240 | __ safepoint_poll(slow_path, r15_thread, rscratch1); |
241 | |
242 | // We don't generate local frame and don't align stack because |
243 | // we call stub code and there is no safepoint on this path. |
244 | |
245 | // Load parameters |
246 | const Register crc = c_rarg0; // crc |
247 | const Register buf = c_rarg1; // source java byte array address |
248 | const Register len = c_rarg2; // length |
249 | const Register off = len; // offset (never overlaps with 'len') |
250 | |
251 | // Arguments are reversed on java expression stack |
252 | // Calculate address of start element |
253 | if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { |
254 | __ movptr(buf, Address(rsp, 3*wordSize)); // long buf |
255 | __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset |
256 | __ addq(buf, off); // + offset |
257 | __ movl(crc, Address(rsp, 5*wordSize)); // Initial CRC |
258 | } else { |
259 | __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array |
260 | __ resolve(IS_NOT_NULL | ACCESS_READ, buf); |
261 | __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size |
262 | __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset |
263 | __ addq(buf, off); // + offset |
264 | __ movl(crc, Address(rsp, 4*wordSize)); // Initial CRC |
265 | } |
266 | // Can now load 'len' since we're finished with 'off' |
267 | __ movl(len, Address(rsp, wordSize)); // Length |
268 | |
269 | __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len); |
270 | // result in rax |
271 | |
272 | // _areturn |
273 | __ pop(rdi); // get return address |
274 | __ mov(rsp, r13); // set sp to sender sp |
275 | __ jmp(rdi); |
276 | |
277 | // generate a vanilla native entry as the slow path |
278 | __ bind(slow_path); |
279 | __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); |
280 | return entry; |
281 | } |
282 | return NULL; |
283 | } |
284 | |
285 | /** |
286 | * Method entry for static (non-native) methods: |
287 | * int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) |
288 | * int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long address, int off, int end) |
289 | */ |
290 | address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { |
291 | if (UseCRC32CIntrinsics) { |
292 | address entry = __ pc(); |
293 | // Load parameters |
294 | const Register crc = c_rarg0; // crc |
295 | const Register buf = c_rarg1; // source java byte array address |
296 | const Register len = c_rarg2; |
297 | const Register off = c_rarg3; // offset |
298 | const Register end = len; |
299 | |
300 | // Arguments are reversed on java expression stack |
301 | // Calculate address of start element |
302 | if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { |
303 | __ movptr(buf, Address(rsp, 3 * wordSize)); // long address |
304 | __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset |
305 | __ addq(buf, off); // + offset |
306 | __ movl(crc, Address(rsp, 5 * wordSize)); // Initial CRC |
307 | // Note on 5 * wordSize vs. 4 * wordSize: |
308 | // * int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end) |
309 | // 4 2,3 1 0 |
310 | // end starts at SP + 8 |
311 | // The Java(R) Virtual Machine Specification Java SE 7 Edition |
312 | // 4.10.2.3. Values of Types long and double |
313 | // "When calculating operand stack length, values of type long and double have length two." |
314 | } else { |
315 | __ movptr(buf, Address(rsp, 3 * wordSize)); // byte[] array |
316 | __ resolve(IS_NOT_NULL | ACCESS_READ, buf); |
317 | __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size |
318 | __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset |
319 | __ addq(buf, off); // + offset |
320 | __ movl(crc, Address(rsp, 4 * wordSize)); // Initial CRC |
321 | } |
322 | __ movl(end, Address(rsp, wordSize)); // end |
323 | __ subl(end, off); // end - off |
324 | __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len); |
325 | // result in rax |
326 | // _areturn |
327 | __ pop(rdi); // get return address |
328 | __ mov(rsp, r13); // set sp to sender sp |
329 | __ jmp(rdi); |
330 | |
331 | return entry; |
332 | } |
333 | |
334 | return NULL; |
335 | } |
336 | |
337 | // |
338 | // Various method entries |
339 | // |
340 | |
341 | address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { |
342 | |
343 | // rbx,: Method* |
344 | // rcx: scratrch |
345 | // r13: sender sp |
346 | |
347 | if (!InlineIntrinsics) return NULL; // Generate a vanilla entry |
348 | |
349 | address entry_point = __ pc(); |
350 | |
351 | // These don't need a safepoint check because they aren't virtually |
352 | // callable. We won't enter these intrinsics from compiled code. |
353 | // If in the future we added an intrinsic which was virtually callable |
354 | // we'd have to worry about how to safepoint so that this code is used. |
355 | |
356 | // mathematical functions inlined by compiler |
357 | // (interpreter must provide identical implementation |
358 | // in order to avoid monotonicity bugs when switching |
359 | // from interpreter to compiler in the middle of some |
360 | // computation) |
361 | // |
362 | // stack: [ ret adr ] <-- rsp |
363 | // [ lo(arg) ] |
364 | // [ hi(arg) ] |
365 | // |
366 | |
367 | if (kind == Interpreter::java_lang_math_fmaD) { |
368 | if (!UseFMA) { |
369 | return NULL; // Generate a vanilla entry |
370 | } |
371 | __ movdbl(xmm0, Address(rsp, wordSize)); |
372 | __ movdbl(xmm1, Address(rsp, 3 * wordSize)); |
373 | __ movdbl(xmm2, Address(rsp, 5 * wordSize)); |
374 | __ fmad(xmm0, xmm1, xmm2, xmm0); |
375 | } else if (kind == Interpreter::java_lang_math_fmaF) { |
376 | if (!UseFMA) { |
377 | return NULL; // Generate a vanilla entry |
378 | } |
379 | __ movflt(xmm0, Address(rsp, wordSize)); |
380 | __ movflt(xmm1, Address(rsp, 2 * wordSize)); |
381 | __ movflt(xmm2, Address(rsp, 3 * wordSize)); |
382 | __ fmaf(xmm0, xmm1, xmm2, xmm0); |
383 | } else if (kind == Interpreter::java_lang_math_sqrt) { |
384 | __ sqrtsd(xmm0, Address(rsp, wordSize)); |
385 | } else if (kind == Interpreter::java_lang_math_exp) { |
386 | __ movdbl(xmm0, Address(rsp, wordSize)); |
387 | if (StubRoutines::dexp() != NULL) { |
388 | __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp()))); |
389 | } else { |
390 | __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)); |
391 | } |
392 | } else if (kind == Interpreter::java_lang_math_log) { |
393 | __ movdbl(xmm0, Address(rsp, wordSize)); |
394 | if (StubRoutines::dlog() != NULL) { |
395 | __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog()))); |
396 | } else { |
397 | __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)); |
398 | } |
399 | } else if (kind == Interpreter::java_lang_math_log10) { |
400 | __ movdbl(xmm0, Address(rsp, wordSize)); |
401 | if (StubRoutines::dlog10() != NULL) { |
402 | __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10()))); |
403 | } else { |
404 | __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)); |
405 | } |
406 | } else if (kind == Interpreter::java_lang_math_sin) { |
407 | __ movdbl(xmm0, Address(rsp, wordSize)); |
408 | if (StubRoutines::dsin() != NULL) { |
409 | __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin()))); |
410 | } else { |
411 | __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)); |
412 | } |
413 | } else if (kind == Interpreter::java_lang_math_cos) { |
414 | __ movdbl(xmm0, Address(rsp, wordSize)); |
415 | if (StubRoutines::dcos() != NULL) { |
416 | __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos()))); |
417 | } else { |
418 | __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)); |
419 | } |
420 | } else if (kind == Interpreter::java_lang_math_pow) { |
421 | __ movdbl(xmm1, Address(rsp, wordSize)); |
422 | __ movdbl(xmm0, Address(rsp, 3 * wordSize)); |
423 | if (StubRoutines::dpow() != NULL) { |
424 | __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow()))); |
425 | } else { |
426 | __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)); |
427 | } |
428 | } else if (kind == Interpreter::java_lang_math_tan) { |
429 | __ movdbl(xmm0, Address(rsp, wordSize)); |
430 | if (StubRoutines::dtan() != NULL) { |
431 | __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan()))); |
432 | } else { |
433 | __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)); |
434 | } |
435 | } else { |
436 | __ fld_d(Address(rsp, wordSize)); |
437 | switch (kind) { |
438 | case Interpreter::java_lang_math_abs: |
439 | __ fabs(); |
440 | break; |
441 | default: |
442 | ShouldNotReachHere(); |
443 | } |
444 | |
445 | // return double result in xmm0 for interpreter and compilers. |
446 | __ subptr(rsp, 2*wordSize); |
447 | // Round to 64bit precision |
448 | __ fstp_d(Address(rsp, 0)); |
449 | __ movdbl(xmm0, Address(rsp, 0)); |
450 | __ addptr(rsp, 2*wordSize); |
451 | } |
452 | |
453 | |
454 | __ pop(rax); |
455 | __ mov(rsp, r13); |
456 | __ jmp(rax); |
457 | |
458 | return entry_point; |
459 | } |
460 | |
461 | |