1 | /* |
2 | * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. |
8 | * |
9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
12 | * version 2 for more details (a copy is included in the LICENSE file that |
13 | * accompanied this code). |
14 | * |
15 | * You should have received a copy of the GNU General Public License version |
16 | * 2 along with this work; if not, write to the Free Software Foundation, |
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | * |
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 | * or visit www.oracle.com if you need additional information or have any |
21 | * questions. |
22 | * |
23 | */ |
24 | |
25 | #ifndef CPU_X86_MACROASSEMBLER_X86_HPP |
26 | #define CPU_X86_MACROASSEMBLER_X86_HPP |
27 | |
28 | #include "asm/assembler.hpp" |
29 | #include "utilities/macros.hpp" |
30 | #include "runtime/rtmLocking.hpp" |
31 | |
32 | // MacroAssembler extends Assembler by frequently used macros. |
33 | // |
34 | // Instructions for which a 'better' code sequence exists depending |
35 | // on arguments should also go in here. |
36 | |
37 | class MacroAssembler: public Assembler { |
38 | friend class LIR_Assembler; |
39 | friend class Runtime1; // as_Address() |
40 | |
41 | public: |
42 | // Support for VM calls |
43 | // |
44 | // This is the base routine called by the different versions of call_VM_leaf. The interpreter |
45 | // may customize this version by overriding it for its purposes (e.g., to save/restore |
46 | // additional registers when doing a VM call). |
47 | |
48 | virtual void call_VM_leaf_base( |
49 | address entry_point, // the entry point |
50 | int number_of_arguments // the number of arguments to pop after the call |
51 | ); |
52 | |
53 | protected: |
54 | // This is the base routine called by the different versions of call_VM. The interpreter |
55 | // may customize this version by overriding it for its purposes (e.g., to save/restore |
56 | // additional registers when doing a VM call). |
57 | // |
58 | // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base |
59 | // returns the register which contains the thread upon return. If a thread register has been |
60 | // specified, the return value will correspond to that register. If no last_java_sp is specified |
61 | // (noreg) than rsp will be used instead. |
62 | virtual void call_VM_base( // returns the register containing the thread upon return |
63 | Register oop_result, // where an oop-result ends up if any; use noreg otherwise |
64 | Register java_thread, // the thread if computed before ; use noreg otherwise |
65 | Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise |
66 | address entry_point, // the entry point |
67 | int number_of_arguments, // the number of arguments (w/o thread) to pop after the call |
68 | bool check_exceptions // whether to check for pending exceptions after return |
69 | ); |
70 | |
71 | void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); |
72 | |
73 | // helpers for FPU flag access |
74 | // tmp is a temporary register, if none is available use noreg |
75 | void save_rax (Register tmp); |
76 | void restore_rax(Register tmp); |
77 | |
78 | public: |
79 | MacroAssembler(CodeBuffer* code) : Assembler(code) {} |
80 | |
81 | // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. |
82 | // The implementation is only non-empty for the InterpreterMacroAssembler, |
83 | // as only the interpreter handles PopFrame and ForceEarlyReturn requests. |
84 | virtual void check_and_handle_popframe(Register java_thread); |
85 | virtual void check_and_handle_earlyret(Register java_thread); |
86 | |
87 | Address as_Address(AddressLiteral adr); |
88 | Address as_Address(ArrayAddress adr); |
89 | |
90 | // Support for NULL-checks |
91 | // |
92 | // Generates code that causes a NULL OS exception if the content of reg is NULL. |
93 | // If the accessed location is M[reg + offset] and the offset is known, provide the |
94 | // offset. No explicit code generation is needed if the offset is within a certain |
95 | // range (0 <= offset <= page_size). |
96 | |
97 | void null_check(Register reg, int offset = -1); |
98 | static bool needs_explicit_null_check(intptr_t offset); |
99 | static bool uses_implicit_null_check(void* address); |
100 | |
101 | // Required platform-specific helpers for Label::patch_instructions. |
102 | // They _shadow_ the declarations in AbstractAssembler, which are undefined. |
103 | void pd_patch_instruction(address branch, address target, const char* file, int line) { |
104 | unsigned char op = branch[0]; |
105 | assert(op == 0xE8 /* call */ || |
106 | op == 0xE9 /* jmp */ || |
107 | op == 0xEB /* short jmp */ || |
108 | (op & 0xF0) == 0x70 /* short jcc */ || |
109 | op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */ || |
110 | op == 0xC7 && branch[1] == 0xF8 /* xbegin */, |
111 | "Invalid opcode at patch point" ); |
112 | |
113 | if (op == 0xEB || (op & 0xF0) == 0x70) { |
114 | // short offset operators (jmp and jcc) |
115 | char* disp = (char*) &branch[1]; |
116 | int imm8 = target - (address) &disp[1]; |
117 | guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset at %s:%d" , file, line); |
118 | *disp = imm8; |
119 | } else { |
120 | int* disp = (int*) &branch[(op == 0x0F || op == 0xC7)? 2: 1]; |
121 | int imm32 = target - (address) &disp[1]; |
122 | *disp = imm32; |
123 | } |
124 | } |
125 | |
126 | // The following 4 methods return the offset of the appropriate move instruction |
127 | |
128 | // Support for fast byte/short loading with zero extension (depending on particular CPU) |
129 | int load_unsigned_byte(Register dst, Address src); |
130 | int load_unsigned_short(Register dst, Address src); |
131 | |
132 | // Support for fast byte/short loading with sign extension (depending on particular CPU) |
133 | int load_signed_byte(Register dst, Address src); |
134 | int load_signed_short(Register dst, Address src); |
135 | |
136 | // Support for sign-extension (hi:lo = extend_sign(lo)) |
137 | void extend_sign(Register hi, Register lo); |
138 | |
139 | // Load and store values by size and signed-ness |
140 | void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); |
141 | void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); |
142 | |
143 | // Support for inc/dec with optimal instruction selection depending on value |
144 | |
145 | void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; } |
146 | void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value)) NOT_LP64(decrementl(reg, value)) ; } |
147 | |
148 | void decrementl(Address dst, int value = 1); |
149 | void decrementl(Register reg, int value = 1); |
150 | |
151 | void decrementq(Register reg, int value = 1); |
152 | void decrementq(Address dst, int value = 1); |
153 | |
154 | void incrementl(Address dst, int value = 1); |
155 | void incrementl(Register reg, int value = 1); |
156 | |
157 | void incrementq(Register reg, int value = 1); |
158 | void incrementq(Address dst, int value = 1); |
159 | |
160 | #ifdef COMPILER2 |
161 | // special instructions for EVEX |
162 | void setvectmask(Register dst, Register src); |
163 | void restorevectmask(); |
164 | #endif |
165 | |
166 | // Support optimal SSE move instructions. |
167 | void movflt(XMMRegister dst, XMMRegister src) { |
168 | if (dst-> encoding() == src->encoding()) return; |
169 | if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; } |
170 | else { movss (dst, src); return; } |
171 | } |
172 | void movflt(XMMRegister dst, Address src) { movss(dst, src); } |
173 | void movflt(XMMRegister dst, AddressLiteral src); |
174 | void movflt(Address dst, XMMRegister src) { movss(dst, src); } |
175 | |
176 | void movdbl(XMMRegister dst, XMMRegister src) { |
177 | if (dst-> encoding() == src->encoding()) return; |
178 | if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; } |
179 | else { movsd (dst, src); return; } |
180 | } |
181 | |
182 | void movdbl(XMMRegister dst, AddressLiteral src); |
183 | |
184 | void movdbl(XMMRegister dst, Address src) { |
185 | if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; } |
186 | else { movlpd(dst, src); return; } |
187 | } |
188 | void movdbl(Address dst, XMMRegister src) { movsd(dst, src); } |
189 | |
190 | void incrementl(AddressLiteral dst); |
191 | void incrementl(ArrayAddress dst); |
192 | |
193 | void incrementq(AddressLiteral dst); |
194 | |
195 | // Alignment |
196 | void align(int modulus); |
197 | void align(int modulus, int target); |
198 | |
199 | // A 5 byte nop that is safe for patching (see patch_verified_entry) |
200 | void fat_nop(); |
201 | |
202 | // Stack frame creation/removal |
203 | void enter(); |
204 | void leave(); |
205 | |
206 | // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) |
207 | // The pointer will be loaded into the thread register. |
208 | void get_thread(Register thread); |
209 | |
210 | |
211 | // Support for VM calls |
212 | // |
213 | // It is imperative that all calls into the VM are handled via the call_VM macros. |
214 | // They make sure that the stack linkage is setup correctly. call_VM's correspond |
215 | // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. |
216 | |
217 | |
218 | void call_VM(Register oop_result, |
219 | address entry_point, |
220 | bool check_exceptions = true); |
221 | void call_VM(Register oop_result, |
222 | address entry_point, |
223 | Register arg_1, |
224 | bool check_exceptions = true); |
225 | void call_VM(Register oop_result, |
226 | address entry_point, |
227 | Register arg_1, Register arg_2, |
228 | bool check_exceptions = true); |
229 | void call_VM(Register oop_result, |
230 | address entry_point, |
231 | Register arg_1, Register arg_2, Register arg_3, |
232 | bool check_exceptions = true); |
233 | |
234 | // Overloadings with last_Java_sp |
235 | void call_VM(Register oop_result, |
236 | Register last_java_sp, |
237 | address entry_point, |
238 | int number_of_arguments = 0, |
239 | bool check_exceptions = true); |
240 | void call_VM(Register oop_result, |
241 | Register last_java_sp, |
242 | address entry_point, |
243 | Register arg_1, bool |
244 | check_exceptions = true); |
245 | void call_VM(Register oop_result, |
246 | Register last_java_sp, |
247 | address entry_point, |
248 | Register arg_1, Register arg_2, |
249 | bool check_exceptions = true); |
250 | void call_VM(Register oop_result, |
251 | Register last_java_sp, |
252 | address entry_point, |
253 | Register arg_1, Register arg_2, Register arg_3, |
254 | bool check_exceptions = true); |
255 | |
256 | void get_vm_result (Register oop_result, Register thread); |
257 | void get_vm_result_2(Register metadata_result, Register thread); |
258 | |
259 | // These always tightly bind to MacroAssembler::call_VM_base |
260 | // bypassing the virtual implementation |
261 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); |
262 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true); |
263 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); |
264 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); |
265 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true); |
266 | |
267 | void call_VM_leaf0(address entry_point); |
268 | void call_VM_leaf(address entry_point, |
269 | int number_of_arguments = 0); |
270 | void call_VM_leaf(address entry_point, |
271 | Register arg_1); |
272 | void call_VM_leaf(address entry_point, |
273 | Register arg_1, Register arg_2); |
274 | void call_VM_leaf(address entry_point, |
275 | Register arg_1, Register arg_2, Register arg_3); |
276 | |
277 | // These always tightly bind to MacroAssembler::call_VM_leaf_base |
278 | // bypassing the virtual implementation |
279 | void super_call_VM_leaf(address entry_point); |
280 | void super_call_VM_leaf(address entry_point, Register arg_1); |
281 | void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); |
282 | void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); |
283 | void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4); |
284 | |
285 | // last Java Frame (fills frame anchor) |
286 | void set_last_Java_frame(Register thread, |
287 | Register last_java_sp, |
288 | Register last_java_fp, |
289 | address last_java_pc); |
290 | |
291 | // thread in the default location (r15_thread on 64bit) |
292 | void set_last_Java_frame(Register last_java_sp, |
293 | Register last_java_fp, |
294 | address last_java_pc); |
295 | |
296 | void reset_last_Java_frame(Register thread, bool clear_fp); |
297 | |
298 | // thread in the default location (r15_thread on 64bit) |
299 | void reset_last_Java_frame(bool clear_fp); |
300 | |
301 | // jobjects |
302 | void clear_jweak_tag(Register possibly_jweak); |
303 | void resolve_jobject(Register value, Register thread, Register tmp); |
304 | |
305 | // C 'boolean' to Java boolean: x == 0 ? 0 : 1 |
306 | void c2bool(Register x); |
307 | |
308 | // C++ bool manipulation |
309 | |
310 | void movbool(Register dst, Address src); |
311 | void movbool(Address dst, bool boolconst); |
312 | void movbool(Address dst, Register src); |
313 | void testbool(Register dst); |
314 | |
315 | void resolve_oop_handle(Register result, Register tmp = rscratch2); |
316 | void resolve_weak_handle(Register result, Register tmp); |
317 | void load_mirror(Register mirror, Register method, Register tmp = rscratch2); |
318 | void load_method_holder_cld(Register rresult, Register rmethod); |
319 | |
320 | void load_method_holder(Register holder, Register method); |
321 | |
322 | // oop manipulations |
323 | void load_klass(Register dst, Register src); |
324 | void store_klass(Register dst, Register src); |
325 | |
326 | void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, |
327 | Register tmp1, Register thread_tmp); |
328 | void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, |
329 | Register tmp1, Register tmp2); |
330 | |
331 | // Resolves obj access. Result is placed in the same register. |
332 | // All other registers are preserved. |
333 | void resolve(DecoratorSet decorators, Register obj); |
334 | |
335 | void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, |
336 | Register thread_tmp = noreg, DecoratorSet decorators = 0); |
337 | void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, |
338 | Register thread_tmp = noreg, DecoratorSet decorators = 0); |
339 | void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, |
340 | Register tmp2 = noreg, DecoratorSet decorators = 0); |
341 | |
342 | // Used for storing NULL. All other oop constants should be |
343 | // stored using routines that take a jobject. |
344 | void store_heap_oop_null(Address dst); |
345 | |
346 | void (Register dst, Register src); |
347 | |
348 | #ifdef _LP64 |
349 | void store_klass_gap(Register dst, Register src); |
350 | |
351 | // This dummy is to prevent a call to store_heap_oop from |
352 | // converting a zero (like NULL) into a Register by giving |
353 | // the compiler two choices it can't resolve |
354 | |
355 | void store_heap_oop(Address dst, void* dummy); |
356 | |
357 | void encode_heap_oop(Register r); |
358 | void decode_heap_oop(Register r); |
359 | void encode_heap_oop_not_null(Register r); |
360 | void decode_heap_oop_not_null(Register r); |
361 | void encode_heap_oop_not_null(Register dst, Register src); |
362 | void decode_heap_oop_not_null(Register dst, Register src); |
363 | |
364 | void set_narrow_oop(Register dst, jobject obj); |
365 | void set_narrow_oop(Address dst, jobject obj); |
366 | void cmp_narrow_oop(Register dst, jobject obj); |
367 | void cmp_narrow_oop(Address dst, jobject obj); |
368 | |
369 | void encode_klass_not_null(Register r); |
370 | void decode_klass_not_null(Register r); |
371 | void encode_klass_not_null(Register dst, Register src); |
372 | void decode_klass_not_null(Register dst, Register src); |
373 | void set_narrow_klass(Register dst, Klass* k); |
374 | void set_narrow_klass(Address dst, Klass* k); |
375 | void cmp_narrow_klass(Register dst, Klass* k); |
376 | void cmp_narrow_klass(Address dst, Klass* k); |
377 | |
378 | // Returns the byte size of the instructions generated by decode_klass_not_null() |
379 | // when compressed klass pointers are being used. |
380 | static int instr_size_for_decode_klass_not_null(); |
381 | |
382 | // if heap base register is used - reinit it with the correct value |
383 | void reinit_heapbase(); |
384 | |
385 | DEBUG_ONLY(void verify_heapbase(const char* msg);) |
386 | |
387 | #endif // _LP64 |
388 | |
389 | // Int division/remainder for Java |
390 | // (as idivl, but checks for special case as described in JVM spec.) |
391 | // returns idivl instruction offset for implicit exception handling |
392 | int corrected_idivl(Register reg); |
393 | |
394 | // Long division/remainder for Java |
395 | // (as idivq, but checks for special case as described in JVM spec.) |
396 | // returns idivq instruction offset for implicit exception handling |
397 | int corrected_idivq(Register reg); |
398 | |
399 | void int3(); |
400 | |
401 | // Long operation macros for a 32bit cpu |
402 | // Long negation for Java |
403 | void lneg(Register hi, Register lo); |
404 | |
405 | // Long multiplication for Java |
406 | // (destroys contents of eax, ebx, ecx and edx) |
407 | void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y |
408 | |
409 | // Long shifts for Java |
410 | // (semantics as described in JVM spec.) |
411 | void lshl(Register hi, Register lo); // hi:lo << (rcx & 0x3f) |
412 | void lshr(Register hi, Register lo, bool sign_extension = false); // hi:lo >> (rcx & 0x3f) |
413 | |
414 | // Long compare for Java |
415 | // (semantics as described in JVM spec.) |
416 | void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y) |
417 | |
418 | |
419 | // misc |
420 | |
421 | // Sign extension |
422 | void sign_extend_short(Register reg); |
423 | void sign_extend_byte(Register reg); |
424 | |
425 | // Division by power of 2, rounding towards 0 |
426 | void division_with_shift(Register reg, int shift_value); |
427 | |
428 | // Compares the top-most stack entries on the FPU stack and sets the eflags as follows: |
429 | // |
430 | // CF (corresponds to C0) if x < y |
431 | // PF (corresponds to C2) if unordered |
432 | // ZF (corresponds to C3) if x = y |
433 | // |
434 | // The arguments are in reversed order on the stack (i.e., top of stack is first argument). |
435 | // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code) |
436 | void fcmp(Register tmp); |
437 | // Variant of the above which allows y to be further down the stack |
438 | // and which only pops x and y if specified. If pop_right is |
439 | // specified then pop_left must also be specified. |
440 | void fcmp(Register tmp, int index, bool pop_left, bool pop_right); |
441 | |
442 | // Floating-point comparison for Java |
443 | // Compares the top-most stack entries on the FPU stack and stores the result in dst. |
444 | // The arguments are in reversed order on the stack (i.e., top of stack is first argument). |
445 | // (semantics as described in JVM spec.) |
446 | void fcmp2int(Register dst, bool unordered_is_less); |
447 | // Variant of the above which allows y to be further down the stack |
448 | // and which only pops x and y if specified. If pop_right is |
449 | // specified then pop_left must also be specified. |
450 | void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right); |
451 | |
452 | // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards) |
453 | // tmp is a temporary register, if none is available use noreg |
454 | void fremr(Register tmp); |
455 | |
456 | // dst = c = a * b + c |
457 | void fmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c); |
458 | void fmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c); |
459 | |
460 | void vfmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len); |
461 | void vfmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len); |
462 | void vfmad(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len); |
463 | void vfmaf(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len); |
464 | |
465 | |
466 | // same as fcmp2int, but using SSE2 |
467 | void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); |
468 | void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); |
469 | |
470 | // branch to L if FPU flag C2 is set/not set |
471 | // tmp is a temporary register, if none is available use noreg |
472 | void jC2 (Register tmp, Label& L); |
473 | void jnC2(Register tmp, Label& L); |
474 | |
475 | // Pop ST (ffree & fincstp combined) |
476 | void fpop(); |
477 | |
478 | // Load float value from 'address'. If UseSSE >= 1, the value is loaded into |
479 | // register xmm0. Otherwise, the value is loaded onto the FPU stack. |
480 | void load_float(Address src); |
481 | |
482 | // Store float value to 'address'. If UseSSE >= 1, the value is stored |
483 | // from register xmm0. Otherwise, the value is stored from the FPU stack. |
484 | void store_float(Address dst); |
485 | |
486 | // Load double value from 'address'. If UseSSE >= 2, the value is loaded into |
487 | // register xmm0. Otherwise, the value is loaded onto the FPU stack. |
488 | void load_double(Address src); |
489 | |
490 | // Store double value to 'address'. If UseSSE >= 2, the value is stored |
491 | // from register xmm0. Otherwise, the value is stored from the FPU stack. |
492 | void store_double(Address dst); |
493 | |
494 | // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack |
495 | void push_fTOS(); |
496 | |
497 | // pops double TOS element from CPU stack and pushes on FPU stack |
498 | void pop_fTOS(); |
499 | |
500 | void empty_FPU_stack(); |
501 | |
502 | void push_IU_state(); |
503 | void pop_IU_state(); |
504 | |
505 | void push_FPU_state(); |
506 | void pop_FPU_state(); |
507 | |
508 | void push_CPU_state(); |
509 | void pop_CPU_state(); |
510 | |
511 | // Round up to a power of two |
512 | void round_to(Register reg, int modulus); |
513 | |
514 | // Callee saved registers handling |
515 | void push_callee_saved_registers(); |
516 | void pop_callee_saved_registers(); |
517 | |
518 | // allocation |
519 | void eden_allocate( |
520 | Register thread, // Current thread |
521 | Register obj, // result: pointer to object after successful allocation |
522 | Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise |
523 | int con_size_in_bytes, // object size in bytes if known at compile time |
524 | Register t1, // temp register |
525 | Label& slow_case // continuation point if fast allocation fails |
526 | ); |
527 | void tlab_allocate( |
528 | Register thread, // Current thread |
529 | Register obj, // result: pointer to object after successful allocation |
530 | Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise |
531 | int con_size_in_bytes, // object size in bytes if known at compile time |
532 | Register t1, // temp register |
533 | Register t2, // temp register |
534 | Label& slow_case // continuation point if fast allocation fails |
535 | ); |
536 | void zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp); |
537 | |
538 | // interface method calling |
539 | void lookup_interface_method(Register recv_klass, |
540 | Register intf_klass, |
541 | RegisterOrConstant itable_index, |
542 | Register method_result, |
543 | Register scan_temp, |
544 | Label& no_such_interface, |
545 | bool return_method = true); |
546 | |
547 | // virtual method calling |
548 | void lookup_virtual_method(Register recv_klass, |
549 | RegisterOrConstant vtable_index, |
550 | Register method_result); |
551 | |
552 | // Test sub_klass against super_klass, with fast and slow paths. |
553 | |
554 | // The fast path produces a tri-state answer: yes / no / maybe-slow. |
555 | // One of the three labels can be NULL, meaning take the fall-through. |
556 | // If super_check_offset is -1, the value is loaded up from super_klass. |
557 | // No registers are killed, except temp_reg. |
558 | void check_klass_subtype_fast_path(Register sub_klass, |
559 | Register super_klass, |
560 | Register temp_reg, |
561 | Label* L_success, |
562 | Label* L_failure, |
563 | Label* L_slow_path, |
564 | RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); |
565 | |
566 | // The rest of the type check; must be wired to a corresponding fast path. |
567 | // It does not repeat the fast path logic, so don't use it standalone. |
568 | // The temp_reg and temp2_reg can be noreg, if no temps are available. |
569 | // Updates the sub's secondary super cache as necessary. |
570 | // If set_cond_codes, condition codes will be Z on success, NZ on failure. |
571 | void check_klass_subtype_slow_path(Register sub_klass, |
572 | Register super_klass, |
573 | Register temp_reg, |
574 | Register temp2_reg, |
575 | Label* L_success, |
576 | Label* L_failure, |
577 | bool set_cond_codes = false); |
578 | |
579 | // Simplified, combined version, good for typical uses. |
580 | // Falls through on failure. |
581 | void check_klass_subtype(Register sub_klass, |
582 | Register super_klass, |
583 | Register temp_reg, |
584 | Label& L_success); |
585 | |
586 | void clinit_barrier(Register klass, |
587 | Register thread, |
588 | Label* L_fast_path = NULL, |
589 | Label* L_slow_path = NULL); |
590 | |
591 | // method handles (JSR 292) |
592 | Address argument_address(RegisterOrConstant arg_slot, int = 0); |
593 | |
594 | //---- |
595 | void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0 |
596 | |
597 | // Debugging |
598 | |
599 | // only if +VerifyOops |
600 | // TODO: Make these macros with file and line like sparc version! |
601 | void verify_oop(Register reg, const char* s = "broken oop" ); |
602 | void verify_oop_addr(Address addr, const char * s = "broken oop addr" ); |
603 | |
604 | // TODO: verify method and klass metadata (compare against vptr?) |
605 | void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} |
606 | void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} |
607 | |
608 | #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) |
609 | #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) |
610 | |
611 | // only if +VerifyFPU |
612 | void verify_FPU(int stack_depth, const char* s = "illegal FPU state" ); |
613 | |
614 | // Verify or restore cpu control state after JNI call |
615 | void restore_cpu_control_state_after_jni(); |
616 | |
617 | // prints msg, dumps registers and stops execution |
618 | void stop(const char* msg); |
619 | |
620 | // prints msg and continues |
621 | void warn(const char* msg); |
622 | |
623 | // dumps registers and other state |
624 | void print_state(); |
625 | |
626 | static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg); |
627 | static void debug64(char* msg, int64_t pc, int64_t regs[]); |
628 | static void print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip); |
629 | static void print_state64(int64_t pc, int64_t regs[]); |
630 | |
631 | void os_breakpoint(); |
632 | |
633 | void untested() { stop("untested" ); } |
634 | |
635 | void unimplemented(const char* what = "" ); |
636 | |
637 | void should_not_reach_here() { stop("should not reach here" ); } |
638 | |
639 | void print_CPU_state(); |
640 | |
641 | // Stack overflow checking |
642 | void bang_stack_with_offset(int offset) { |
643 | // stack grows down, caller passes positive offset |
644 | assert(offset > 0, "must bang with negative offset" ); |
645 | movl(Address(rsp, (-offset)), rax); |
646 | } |
647 | |
648 | // Writes to stack successive pages until offset reached to check for |
649 | // stack overflow + shadow pages. Also, clobbers tmp |
650 | void bang_stack_size(Register size, Register tmp); |
651 | |
652 | // Check for reserved stack access in method being exited (for JIT) |
653 | void reserved_stack_check(); |
654 | |
655 | virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, |
656 | Register tmp, |
657 | int offset); |
658 | |
659 | // If thread_reg is != noreg the code assumes the register passed contains |
660 | // the thread (required on 64 bit). |
661 | void safepoint_poll(Label& slow_path, Register thread_reg, Register temp_reg); |
662 | |
663 | void verify_tlab(); |
664 | |
665 | // Biased locking support |
666 | // lock_reg and obj_reg must be loaded up with the appropriate values. |
667 | // swap_reg must be rax, and is killed. |
668 | // tmp_reg is optional. If it is supplied (i.e., != noreg) it will |
669 | // be killed; if not supplied, push/pop will be used internally to |
670 | // allocate a temporary (inefficient, avoid if possible). |
671 | // Optional slow case is for implementations (interpreter and C1) which branch to |
672 | // slow case directly. Leaves condition codes set for C2's Fast_Lock node. |
673 | // Returns offset of first potentially-faulting instruction for null |
674 | // check info (currently consumed only by C1). If |
675 | // swap_reg_contains_mark is true then returns -1 as it is assumed |
676 | // the calling code has already passed any potential faults. |
677 | int biased_locking_enter(Register lock_reg, Register obj_reg, |
678 | Register swap_reg, Register tmp_reg, |
679 | bool swap_reg_contains_mark, |
680 | Label& done, Label* slow_case = NULL, |
681 | BiasedLockingCounters* counters = NULL); |
682 | void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); |
683 | #ifdef COMPILER2 |
684 | // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. |
685 | // See full desription in macroAssembler_x86.cpp. |
686 | void (Register obj, Register box, Register tmp, |
687 | Register scr, Register cx1, Register cx2, |
688 | BiasedLockingCounters* counters, |
689 | RTMLockingCounters* rtm_counters, |
690 | RTMLockingCounters* stack_rtm_counters, |
691 | Metadata* method_data, |
692 | bool use_rtm, bool profile_rtm); |
693 | void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm); |
694 | #if INCLUDE_RTM_OPT |
695 | void rtm_counters_update(Register abort_status, Register rtm_counters); |
696 | void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel); |
697 | void rtm_abort_ratio_calculation(Register tmp, Register rtm_counters_reg, |
698 | RTMLockingCounters* rtm_counters, |
699 | Metadata* method_data); |
700 | void rtm_profiling(Register abort_status_Reg, Register rtm_counters_Reg, |
701 | RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm); |
702 | void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, Label& retryLabel); |
703 | void rtm_retry_lock_on_busy(Register retry_count, Register box, Register tmp, Register scr, Label& retryLabel); |
704 | void rtm_stack_locking(Register obj, Register tmp, Register scr, |
705 | Register retry_on_abort_count, |
706 | RTMLockingCounters* stack_rtm_counters, |
707 | Metadata* method_data, bool profile_rtm, |
708 | Label& DONE_LABEL, Label& IsInflated); |
709 | void rtm_inflated_locking(Register obj, Register box, Register tmp, |
710 | Register scr, Register retry_on_busy_count, |
711 | Register retry_on_abort_count, |
712 | RTMLockingCounters* rtm_counters, |
713 | Metadata* method_data, bool profile_rtm, |
714 | Label& DONE_LABEL); |
715 | #endif |
716 | #endif |
717 | |
718 | Condition negate_condition(Condition cond); |
719 | |
720 | // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit |
721 | // operands. In general the names are modified to avoid hiding the instruction in Assembler |
722 | // so that we don't need to implement all the varieties in the Assembler with trivial wrappers |
723 | // here in MacroAssembler. The major exception to this rule is call |
724 | |
725 | // Arithmetics |
726 | |
727 | |
728 | void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)) ; } |
729 | void addptr(Address dst, Register src); |
730 | |
731 | void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); } |
732 | void addptr(Register dst, int32_t src); |
733 | void addptr(Register dst, Register src); |
734 | void addptr(Register dst, RegisterOrConstant src) { |
735 | if (src.is_constant()) addptr(dst, (int) src.as_constant()); |
736 | else addptr(dst, src.as_register()); |
737 | } |
738 | |
739 | void andptr(Register dst, int32_t src); |
740 | void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2)) NOT_LP64(andl(src1, src2)) ; } |
741 | |
742 | void cmp8(AddressLiteral src1, int imm); |
743 | |
744 | // renamed to drag out the casting of address to int32_t/intptr_t |
745 | void cmp32(Register src1, int32_t imm); |
746 | |
747 | void cmp32(AddressLiteral src1, int32_t imm); |
748 | // compare reg - mem, or reg - &mem |
749 | void cmp32(Register src1, AddressLiteral src2); |
750 | |
751 | void cmp32(Register src1, Address src2); |
752 | |
753 | #ifndef _LP64 |
754 | void cmpklass(Address dst, Metadata* obj); |
755 | void cmpklass(Register dst, Metadata* obj); |
756 | void cmpoop(Address dst, jobject obj); |
757 | void cmpoop_raw(Address dst, jobject obj); |
758 | #endif // _LP64 |
759 | |
760 | void cmpoop(Register src1, Register src2); |
761 | void cmpoop(Register src1, Address src2); |
762 | void cmpoop(Register dst, jobject obj); |
763 | void cmpoop_raw(Register dst, jobject obj); |
764 | |
765 | // NOTE src2 must be the lval. This is NOT an mem-mem compare |
766 | void cmpptr(Address src1, AddressLiteral src2); |
767 | |
768 | void cmpptr(Register src1, AddressLiteral src2); |
769 | |
770 | void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } |
771 | void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } |
772 | // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } |
773 | |
774 | void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } |
775 | void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } |
776 | |
777 | // cmp64 to avoild hiding cmpq |
778 | void cmp64(Register src1, AddressLiteral src); |
779 | |
780 | void cmpxchgptr(Register reg, Address adr); |
781 | |
782 | void locked_cmpxchgptr(Register reg, AddressLiteral adr); |
783 | |
784 | |
785 | void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); } |
786 | void imulptr(Register dst, Register src, int imm32) { LP64_ONLY(imulq(dst, src, imm32)) NOT_LP64(imull(dst, src, imm32)); } |
787 | |
788 | |
789 | void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); } |
790 | |
791 | void notptr(Register dst) { LP64_ONLY(notq(dst)) NOT_LP64(notl(dst)); } |
792 | |
793 | void shlptr(Register dst, int32_t shift); |
794 | void shlptr(Register dst) { LP64_ONLY(shlq(dst)) NOT_LP64(shll(dst)); } |
795 | |
796 | void shrptr(Register dst, int32_t shift); |
797 | void shrptr(Register dst) { LP64_ONLY(shrq(dst)) NOT_LP64(shrl(dst)); } |
798 | |
799 | void sarptr(Register dst) { LP64_ONLY(sarq(dst)) NOT_LP64(sarl(dst)); } |
800 | void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src)) NOT_LP64(sarl(dst, src)); } |
801 | |
802 | void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); } |
803 | |
804 | void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); } |
805 | void subptr(Register dst, int32_t src); |
806 | // Force generation of a 4 byte immediate value even if it fits into 8bit |
807 | void subptr_imm32(Register dst, int32_t src); |
808 | void subptr(Register dst, Register src); |
809 | void subptr(Register dst, RegisterOrConstant src) { |
810 | if (src.is_constant()) subptr(dst, (int) src.as_constant()); |
811 | else subptr(dst, src.as_register()); |
812 | } |
813 | |
814 | void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); } |
815 | void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); } |
816 | |
817 | void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; } |
818 | void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; } |
819 | |
820 | void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2)) NOT_LP64(xaddl(src1, src2)) ; } |
821 | |
822 | |
823 | |
824 | // Helper functions for statistics gathering. |
825 | // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes. |
826 | void cond_inc32(Condition cond, AddressLiteral counter_addr); |
827 | // Unconditional atomic increment. |
828 | void atomic_incl(Address counter_addr); |
829 | void atomic_incl(AddressLiteral counter_addr, Register scr = rscratch1); |
830 | #ifdef _LP64 |
831 | void atomic_incq(Address counter_addr); |
832 | void atomic_incq(AddressLiteral counter_addr, Register scr = rscratch1); |
833 | #endif |
834 | void atomic_incptr(AddressLiteral counter_addr, Register scr = rscratch1) { LP64_ONLY(atomic_incq(counter_addr, scr)) NOT_LP64(atomic_incl(counter_addr, scr)) ; } |
835 | void atomic_incptr(Address counter_addr) { LP64_ONLY(atomic_incq(counter_addr)) NOT_LP64(atomic_incl(counter_addr)) ; } |
836 | |
837 | void lea(Register dst, AddressLiteral adr); |
838 | void lea(Address dst, AddressLiteral adr); |
839 | void lea(Register dst, Address adr) { Assembler::lea(dst, adr); } |
840 | |
841 | void leal32(Register dst, Address src) { leal(dst, src); } |
842 | |
843 | // Import other testl() methods from the parent class or else |
844 | // they will be hidden by the following overriding declaration. |
845 | using Assembler::testl; |
846 | void testl(Register dst, AddressLiteral src); |
847 | |
848 | void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } |
849 | void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } |
850 | void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } |
851 | void orptr(Address dst, int32_t imm32) { LP64_ONLY(orq(dst, imm32)) NOT_LP64(orl(dst, imm32)); } |
852 | |
853 | void testptr(Register src, int32_t imm32) { LP64_ONLY(testq(src, imm32)) NOT_LP64(testl(src, imm32)); } |
854 | void testptr(Register src1, Address src2) { LP64_ONLY(testq(src1, src2)) NOT_LP64(testl(src1, src2)); } |
855 | void testptr(Register src1, Register src2); |
856 | |
857 | void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); } |
858 | void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); } |
859 | |
860 | // Calls |
861 | |
862 | void call(Label& L, relocInfo::relocType rtype); |
863 | void call(Register entry); |
864 | |
865 | // NOTE: this call transfers to the effective address of entry NOT |
866 | // the address contained by entry. This is because this is more natural |
867 | // for jumps/calls. |
868 | void call(AddressLiteral entry); |
869 | |
870 | // Emit the CompiledIC call idiom |
871 | void ic_call(address entry, jint method_index = 0); |
872 | |
873 | // Jumps |
874 | |
875 | // NOTE: these jumps tranfer to the effective address of dst NOT |
876 | // the address contained by dst. This is because this is more natural |
877 | // for jumps/calls. |
878 | void jump(AddressLiteral dst); |
879 | void jump_cc(Condition cc, AddressLiteral dst); |
880 | |
881 | // 32bit can do a case table jump in one instruction but we no longer allow the base |
882 | // to be installed in the Address class. This jump will tranfers to the address |
883 | // contained in the location described by entry (not the address of entry) |
884 | void jump(ArrayAddress entry); |
885 | |
886 | // Floating |
887 | |
888 | void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); } |
889 | void andpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); |
890 | void andpd(XMMRegister dst, XMMRegister src) { Assembler::andpd(dst, src); } |
891 | |
892 | void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); } |
893 | void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); } |
894 | void andps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); |
895 | |
896 | void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); } |
897 | void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); } |
898 | void comiss(XMMRegister dst, AddressLiteral src); |
899 | |
900 | void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); } |
901 | void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); } |
902 | void comisd(XMMRegister dst, AddressLiteral src); |
903 | |
904 | void fadd_s(Address src) { Assembler::fadd_s(src); } |
905 | void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); } |
906 | |
907 | void fldcw(Address src) { Assembler::fldcw(src); } |
908 | void fldcw(AddressLiteral src); |
909 | |
910 | void fld_s(int index) { Assembler::fld_s(index); } |
911 | void fld_s(Address src) { Assembler::fld_s(src); } |
912 | void fld_s(AddressLiteral src); |
913 | |
914 | void fld_d(Address src) { Assembler::fld_d(src); } |
915 | void fld_d(AddressLiteral src); |
916 | |
917 | void fld_x(Address src) { Assembler::fld_x(src); } |
918 | void fld_x(AddressLiteral src); |
919 | |
920 | void fmul_s(Address src) { Assembler::fmul_s(src); } |
921 | void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); } |
922 | |
923 | void ldmxcsr(Address src) { Assembler::ldmxcsr(src); } |
924 | void ldmxcsr(AddressLiteral src); |
925 | |
926 | #ifdef _LP64 |
927 | private: |
928 | void sha256_AVX2_one_round_compute( |
929 | Register reg_old_h, |
930 | Register reg_a, |
931 | Register reg_b, |
932 | Register reg_c, |
933 | Register reg_d, |
934 | Register reg_e, |
935 | Register reg_f, |
936 | Register reg_g, |
937 | Register reg_h, |
938 | int iter); |
939 | void sha256_AVX2_four_rounds_compute_first(int start); |
940 | void sha256_AVX2_four_rounds_compute_last(int start); |
941 | void sha256_AVX2_one_round_and_sched( |
942 | XMMRegister xmm_0, /* == ymm4 on 0, 1, 2, 3 iterations, then rotate 4 registers left on 4, 8, 12 iterations */ |
943 | XMMRegister xmm_1, /* ymm5 */ /* full cycle is 16 iterations */ |
944 | XMMRegister xmm_2, /* ymm6 */ |
945 | XMMRegister xmm_3, /* ymm7 */ |
946 | Register reg_a, /* == eax on 0 iteration, then rotate 8 register right on each next iteration */ |
947 | Register reg_b, /* ebx */ /* full cycle is 8 iterations */ |
948 | Register reg_c, /* edi */ |
949 | Register reg_d, /* esi */ |
950 | Register reg_e, /* r8d */ |
951 | Register reg_f, /* r9d */ |
952 | Register reg_g, /* r10d */ |
953 | Register reg_h, /* r11d */ |
954 | int iter); |
955 | |
956 | void addm(int disp, Register r1, Register r2); |
957 | void gfmul(XMMRegister tmp0, XMMRegister t); |
958 | void schoolbookAAD(int i, Register subkeyH, XMMRegister data, XMMRegister tmp0, |
959 | XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3); |
960 | void generateHtbl_one_block(Register htbl); |
961 | void generateHtbl_eight_blocks(Register htbl); |
962 | public: |
963 | void sha256_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, |
964 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, |
965 | Register buf, Register state, Register ofs, Register limit, Register rsp, |
966 | bool multi_block, XMMRegister shuf_mask); |
967 | void avx_ghash(Register state, Register htbl, Register data, Register blocks); |
968 | #endif |
969 | |
970 | #ifdef _LP64 |
971 | private: |
972 | void sha512_AVX2_one_round_compute(Register old_h, Register a, Register b, Register c, Register d, |
973 | Register e, Register f, Register g, Register h, int iteration); |
974 | |
975 | void sha512_AVX2_one_round_and_schedule(XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
976 | Register a, Register b, Register c, Register d, Register e, Register f, |
977 | Register g, Register h, int iteration); |
978 | |
979 | void addmq(int disp, Register r1, Register r2); |
980 | public: |
981 | void sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, |
982 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, |
983 | Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block, |
984 | XMMRegister shuf_mask); |
985 | #endif |
986 | |
987 | void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0, |
988 | XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask, |
989 | Register buf, Register state, Register ofs, Register limit, Register rsp, |
990 | bool multi_block); |
991 | |
992 | #ifdef _LP64 |
993 | void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, |
994 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, |
995 | Register buf, Register state, Register ofs, Register limit, Register rsp, |
996 | bool multi_block, XMMRegister shuf_mask); |
997 | #else |
998 | void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, |
999 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, |
1000 | Register buf, Register state, Register ofs, Register limit, Register rsp, |
1001 | bool multi_block); |
1002 | #endif |
1003 | |
1004 | void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
1005 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
1006 | Register rax, Register rcx, Register rdx, Register tmp); |
1007 | |
1008 | #ifdef _LP64 |
1009 | void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
1010 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
1011 | Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2); |
1012 | |
1013 | void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
1014 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
1015 | Register rax, Register rcx, Register rdx, Register r11); |
1016 | |
1017 | void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, |
1018 | XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, |
1019 | Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4); |
1020 | |
1021 | void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
1022 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
1023 | Register rax, Register rbx, Register rcx, Register rdx, Register tmp1, Register tmp2, |
1024 | Register tmp3, Register tmp4); |
1025 | |
1026 | void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
1027 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
1028 | Register rax, Register rcx, Register rdx, Register tmp1, |
1029 | Register tmp2, Register tmp3, Register tmp4); |
1030 | void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
1031 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
1032 | Register rax, Register rcx, Register rdx, Register tmp1, |
1033 | Register tmp2, Register tmp3, Register tmp4); |
1034 | #else |
1035 | void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
1036 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
1037 | Register rax, Register rcx, Register rdx, Register tmp1); |
1038 | |
1039 | void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
1040 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
1041 | Register rax, Register rcx, Register rdx, Register tmp); |
1042 | |
1043 | void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, |
1044 | XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, |
1045 | Register rdx, Register tmp); |
1046 | |
1047 | void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
1048 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
1049 | Register rax, Register rbx, Register rdx); |
1050 | |
1051 | void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
1052 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
1053 | Register rax, Register rcx, Register rdx, Register tmp); |
1054 | |
1055 | void libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, |
1056 | Register edx, Register ebx, Register esi, Register edi, |
1057 | Register ebp, Register esp); |
1058 | |
1059 | void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx, |
1060 | Register esi, Register edi, Register ebp, Register esp); |
1061 | |
1062 | void libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, |
1063 | Register edx, Register ebx, Register esi, Register edi, |
1064 | Register ebp, Register esp); |
1065 | |
1066 | void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
1067 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
1068 | Register rax, Register rcx, Register rdx, Register tmp); |
1069 | #endif |
1070 | |
1071 | void increase_precision(); |
1072 | void restore_precision(); |
1073 | |
1074 | private: |
1075 | |
1076 | // these are private because users should be doing movflt/movdbl |
1077 | |
1078 | void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); } |
1079 | void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); } |
1080 | void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); } |
1081 | void movss(XMMRegister dst, AddressLiteral src); |
1082 | |
1083 | void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); } |
1084 | void movlpd(XMMRegister dst, AddressLiteral src); |
1085 | |
1086 | public: |
1087 | |
1088 | void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); } |
1089 | void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); } |
1090 | void addsd(XMMRegister dst, AddressLiteral src); |
1091 | |
1092 | void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); } |
1093 | void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); } |
1094 | void addss(XMMRegister dst, AddressLiteral src); |
1095 | |
1096 | void addpd(XMMRegister dst, XMMRegister src) { Assembler::addpd(dst, src); } |
1097 | void addpd(XMMRegister dst, Address src) { Assembler::addpd(dst, src); } |
1098 | void addpd(XMMRegister dst, AddressLiteral src); |
1099 | |
1100 | void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); } |
1101 | void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); } |
1102 | void divsd(XMMRegister dst, AddressLiteral src); |
1103 | |
1104 | void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); } |
1105 | void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); } |
1106 | void divss(XMMRegister dst, AddressLiteral src); |
1107 | |
1108 | // Move Unaligned Double Quadword |
1109 | void movdqu(Address dst, XMMRegister src); |
1110 | void movdqu(XMMRegister dst, Address src); |
1111 | void movdqu(XMMRegister dst, XMMRegister src); |
1112 | void movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg = rscratch1); |
1113 | // AVX Unaligned forms |
1114 | void vmovdqu(Address dst, XMMRegister src); |
1115 | void vmovdqu(XMMRegister dst, Address src); |
1116 | void vmovdqu(XMMRegister dst, XMMRegister src); |
1117 | void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); |
1118 | void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } |
1119 | void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } |
1120 | void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } |
1121 | void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch); |
1122 | |
1123 | // Move Aligned Double Quadword |
1124 | void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); } |
1125 | void movdqa(XMMRegister dst, XMMRegister src) { Assembler::movdqa(dst, src); } |
1126 | void movdqa(XMMRegister dst, AddressLiteral src); |
1127 | |
1128 | void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); } |
1129 | void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); } |
1130 | void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); } |
1131 | void movsd(XMMRegister dst, AddressLiteral src); |
1132 | |
1133 | void mulpd(XMMRegister dst, XMMRegister src) { Assembler::mulpd(dst, src); } |
1134 | void mulpd(XMMRegister dst, Address src) { Assembler::mulpd(dst, src); } |
1135 | void mulpd(XMMRegister dst, AddressLiteral src); |
1136 | |
1137 | void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); } |
1138 | void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); } |
1139 | void mulsd(XMMRegister dst, AddressLiteral src); |
1140 | |
1141 | void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); } |
1142 | void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); } |
1143 | void mulss(XMMRegister dst, AddressLiteral src); |
1144 | |
1145 | // Carry-Less Multiplication Quadword |
1146 | void pclmulldq(XMMRegister dst, XMMRegister src) { |
1147 | // 0x00 - multiply lower 64 bits [0:63] |
1148 | Assembler::pclmulqdq(dst, src, 0x00); |
1149 | } |
1150 | void pclmulhdq(XMMRegister dst, XMMRegister src) { |
1151 | // 0x11 - multiply upper 64 bits [64:127] |
1152 | Assembler::pclmulqdq(dst, src, 0x11); |
1153 | } |
1154 | |
1155 | void pcmpeqb(XMMRegister dst, XMMRegister src); |
1156 | void pcmpeqw(XMMRegister dst, XMMRegister src); |
1157 | |
1158 | void pcmpestri(XMMRegister dst, Address src, int imm8); |
1159 | void pcmpestri(XMMRegister dst, XMMRegister src, int imm8); |
1160 | |
1161 | void pmovzxbw(XMMRegister dst, XMMRegister src); |
1162 | void pmovzxbw(XMMRegister dst, Address src); |
1163 | |
1164 | void pmovmskb(Register dst, XMMRegister src); |
1165 | |
1166 | void ptest(XMMRegister dst, XMMRegister src); |
1167 | |
1168 | void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); } |
1169 | void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); } |
1170 | void sqrtsd(XMMRegister dst, AddressLiteral src); |
1171 | |
1172 | void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); } |
1173 | void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); } |
1174 | void sqrtss(XMMRegister dst, AddressLiteral src); |
1175 | |
1176 | void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); } |
1177 | void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); } |
1178 | void subsd(XMMRegister dst, AddressLiteral src); |
1179 | |
1180 | void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); } |
1181 | void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); } |
1182 | void subss(XMMRegister dst, AddressLiteral src); |
1183 | |
1184 | void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); } |
1185 | void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); } |
1186 | void ucomiss(XMMRegister dst, AddressLiteral src); |
1187 | |
1188 | void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); } |
1189 | void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); } |
1190 | void ucomisd(XMMRegister dst, AddressLiteral src); |
1191 | |
1192 | // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values |
1193 | void xorpd(XMMRegister dst, XMMRegister src); |
1194 | void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); } |
1195 | void xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); |
1196 | |
1197 | // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values |
1198 | void xorps(XMMRegister dst, XMMRegister src); |
1199 | void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } |
1200 | void xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); |
1201 | |
1202 | // Shuffle Bytes |
1203 | void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); } |
1204 | void pshufb(XMMRegister dst, Address src) { Assembler::pshufb(dst, src); } |
1205 | void pshufb(XMMRegister dst, AddressLiteral src); |
1206 | // AVX 3-operands instructions |
1207 | |
1208 | void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); } |
1209 | void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); } |
1210 | void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
1211 | |
1212 | void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); } |
1213 | void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); } |
1214 | void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
1215 | |
1216 | void vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len); |
1217 | void vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len); |
1218 | |
1219 | void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1220 | void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1221 | |
1222 | void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1223 | void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1224 | |
1225 | void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } |
1226 | void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } |
1227 | void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); |
1228 | |
1229 | void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len); |
1230 | void vpbroadcastw(XMMRegister dst, Address src, int vector_len) { Assembler::vpbroadcastw(dst, src, vector_len); } |
1231 | |
1232 | void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1233 | |
1234 | void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1235 | |
1236 | void vpmovzxbw(XMMRegister dst, Address src, int vector_len); |
1237 | void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpmovzxbw(dst, src, vector_len); } |
1238 | |
1239 | void vpmovmskb(Register dst, XMMRegister src); |
1240 | |
1241 | void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1242 | void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1243 | |
1244 | void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1245 | void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1246 | |
1247 | void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1248 | void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
1249 | |
1250 | void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); |
1251 | void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); |
1252 | |
1253 | void evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); |
1254 | void evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len); |
1255 | |
1256 | void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); |
1257 | void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); |
1258 | |
1259 | void vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); |
1260 | void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); |
1261 | |
1262 | void vptest(XMMRegister dst, XMMRegister src); |
1263 | |
1264 | void punpcklbw(XMMRegister dst, XMMRegister src); |
1265 | void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); } |
1266 | |
1267 | void pshufd(XMMRegister dst, Address src, int mode); |
1268 | void pshufd(XMMRegister dst, XMMRegister src, int mode) { Assembler::pshufd(dst, src, mode); } |
1269 | |
1270 | void pshuflw(XMMRegister dst, XMMRegister src, int mode); |
1271 | void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); } |
1272 | |
1273 | void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } |
1274 | void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } |
1275 | void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); |
1276 | |
1277 | void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); } |
1278 | void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); } |
1279 | void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); |
1280 | |
1281 | void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); } |
1282 | void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); } |
1283 | void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
1284 | |
1285 | void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); } |
1286 | void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); } |
1287 | void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
1288 | |
1289 | void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); } |
1290 | void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); } |
1291 | void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
1292 | |
1293 | void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); } |
1294 | void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); } |
1295 | void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
1296 | |
1297 | void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); } |
1298 | void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); } |
1299 | void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
1300 | |
1301 | void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); } |
1302 | void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); } |
1303 | void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
1304 | |
1305 | void vnegatess(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
1306 | void vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
1307 | |
1308 | // AVX Vector instructions |
1309 | |
1310 | void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); } |
1311 | void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); } |
1312 | void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); |
1313 | |
1314 | void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); } |
1315 | void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); } |
1316 | void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); |
1317 | |
1318 | void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { |
1319 | if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2 |
1320 | Assembler::vpxor(dst, nds, src, vector_len); |
1321 | else |
1322 | Assembler::vxorpd(dst, nds, src, vector_len); |
1323 | } |
1324 | void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { |
1325 | if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2 |
1326 | Assembler::vpxor(dst, nds, src, vector_len); |
1327 | else |
1328 | Assembler::vxorpd(dst, nds, src, vector_len); |
1329 | } |
1330 | void vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); |
1331 | |
1332 | // Simple version for AVX2 256bit vectors |
1333 | void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); } |
1334 | void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); } |
1335 | |
1336 | void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) { |
1337 | if (UseAVX > 2) { |
1338 | Assembler::vinserti32x4(dst, dst, src, imm8); |
1339 | } else if (UseAVX > 1) { |
1340 | // vinserti128 is available only in AVX2 |
1341 | Assembler::vinserti128(dst, nds, src, imm8); |
1342 | } else { |
1343 | Assembler::vinsertf128(dst, nds, src, imm8); |
1344 | } |
1345 | } |
1346 | |
1347 | void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) { |
1348 | if (UseAVX > 2) { |
1349 | Assembler::vinserti32x4(dst, dst, src, imm8); |
1350 | } else if (UseAVX > 1) { |
1351 | // vinserti128 is available only in AVX2 |
1352 | Assembler::vinserti128(dst, nds, src, imm8); |
1353 | } else { |
1354 | Assembler::vinsertf128(dst, nds, src, imm8); |
1355 | } |
1356 | } |
1357 | |
1358 | void (XMMRegister dst, XMMRegister src, uint8_t imm8) { |
1359 | if (UseAVX > 2) { |
1360 | Assembler::vextracti32x4(dst, src, imm8); |
1361 | } else if (UseAVX > 1) { |
1362 | // vextracti128 is available only in AVX2 |
1363 | Assembler::vextracti128(dst, src, imm8); |
1364 | } else { |
1365 | Assembler::vextractf128(dst, src, imm8); |
1366 | } |
1367 | } |
1368 | |
1369 | void (Address dst, XMMRegister src, uint8_t imm8) { |
1370 | if (UseAVX > 2) { |
1371 | Assembler::vextracti32x4(dst, src, imm8); |
1372 | } else if (UseAVX > 1) { |
1373 | // vextracti128 is available only in AVX2 |
1374 | Assembler::vextracti128(dst, src, imm8); |
1375 | } else { |
1376 | Assembler::vextractf128(dst, src, imm8); |
1377 | } |
1378 | } |
1379 | |
1380 | // 128bit copy to/from high 128 bits of 256bit (YMM) vector registers |
1381 | void vinserti128_high(XMMRegister dst, XMMRegister src) { |
1382 | vinserti128(dst, dst, src, 1); |
1383 | } |
1384 | void vinserti128_high(XMMRegister dst, Address src) { |
1385 | vinserti128(dst, dst, src, 1); |
1386 | } |
1387 | void (XMMRegister dst, XMMRegister src) { |
1388 | vextracti128(dst, src, 1); |
1389 | } |
1390 | void (Address dst, XMMRegister src) { |
1391 | vextracti128(dst, src, 1); |
1392 | } |
1393 | |
1394 | void vinsertf128_high(XMMRegister dst, XMMRegister src) { |
1395 | if (UseAVX > 2) { |
1396 | Assembler::vinsertf32x4(dst, dst, src, 1); |
1397 | } else { |
1398 | Assembler::vinsertf128(dst, dst, src, 1); |
1399 | } |
1400 | } |
1401 | |
1402 | void vinsertf128_high(XMMRegister dst, Address src) { |
1403 | if (UseAVX > 2) { |
1404 | Assembler::vinsertf32x4(dst, dst, src, 1); |
1405 | } else { |
1406 | Assembler::vinsertf128(dst, dst, src, 1); |
1407 | } |
1408 | } |
1409 | |
1410 | void (XMMRegister dst, XMMRegister src) { |
1411 | if (UseAVX > 2) { |
1412 | Assembler::vextractf32x4(dst, src, 1); |
1413 | } else { |
1414 | Assembler::vextractf128(dst, src, 1); |
1415 | } |
1416 | } |
1417 | |
1418 | void (Address dst, XMMRegister src) { |
1419 | if (UseAVX > 2) { |
1420 | Assembler::vextractf32x4(dst, src, 1); |
1421 | } else { |
1422 | Assembler::vextractf128(dst, src, 1); |
1423 | } |
1424 | } |
1425 | |
1426 | // 256bit copy to/from high 256 bits of 512bit (ZMM) vector registers |
1427 | void vinserti64x4_high(XMMRegister dst, XMMRegister src) { |
1428 | Assembler::vinserti64x4(dst, dst, src, 1); |
1429 | } |
1430 | void vinsertf64x4_high(XMMRegister dst, XMMRegister src) { |
1431 | Assembler::vinsertf64x4(dst, dst, src, 1); |
1432 | } |
1433 | void (XMMRegister dst, XMMRegister src) { |
1434 | Assembler::vextracti64x4(dst, src, 1); |
1435 | } |
1436 | void (XMMRegister dst, XMMRegister src) { |
1437 | Assembler::vextractf64x4(dst, src, 1); |
1438 | } |
1439 | void (Address dst, XMMRegister src) { |
1440 | Assembler::vextractf64x4(dst, src, 1); |
1441 | } |
1442 | void vinsertf64x4_high(XMMRegister dst, Address src) { |
1443 | Assembler::vinsertf64x4(dst, dst, src, 1); |
1444 | } |
1445 | |
1446 | // 128bit copy to/from low 128 bits of 256bit (YMM) vector registers |
1447 | void vinserti128_low(XMMRegister dst, XMMRegister src) { |
1448 | vinserti128(dst, dst, src, 0); |
1449 | } |
1450 | void vinserti128_low(XMMRegister dst, Address src) { |
1451 | vinserti128(dst, dst, src, 0); |
1452 | } |
1453 | void (XMMRegister dst, XMMRegister src) { |
1454 | vextracti128(dst, src, 0); |
1455 | } |
1456 | void (Address dst, XMMRegister src) { |
1457 | vextracti128(dst, src, 0); |
1458 | } |
1459 | |
1460 | void vinsertf128_low(XMMRegister dst, XMMRegister src) { |
1461 | if (UseAVX > 2) { |
1462 | Assembler::vinsertf32x4(dst, dst, src, 0); |
1463 | } else { |
1464 | Assembler::vinsertf128(dst, dst, src, 0); |
1465 | } |
1466 | } |
1467 | |
1468 | void vinsertf128_low(XMMRegister dst, Address src) { |
1469 | if (UseAVX > 2) { |
1470 | Assembler::vinsertf32x4(dst, dst, src, 0); |
1471 | } else { |
1472 | Assembler::vinsertf128(dst, dst, src, 0); |
1473 | } |
1474 | } |
1475 | |
1476 | void (XMMRegister dst, XMMRegister src) { |
1477 | if (UseAVX > 2) { |
1478 | Assembler::vextractf32x4(dst, src, 0); |
1479 | } else { |
1480 | Assembler::vextractf128(dst, src, 0); |
1481 | } |
1482 | } |
1483 | |
1484 | void (Address dst, XMMRegister src) { |
1485 | if (UseAVX > 2) { |
1486 | Assembler::vextractf32x4(dst, src, 0); |
1487 | } else { |
1488 | Assembler::vextractf128(dst, src, 0); |
1489 | } |
1490 | } |
1491 | |
1492 | // 256bit copy to/from low 256 bits of 512bit (ZMM) vector registers |
1493 | void vinserti64x4_low(XMMRegister dst, XMMRegister src) { |
1494 | Assembler::vinserti64x4(dst, dst, src, 0); |
1495 | } |
1496 | void vinsertf64x4_low(XMMRegister dst, XMMRegister src) { |
1497 | Assembler::vinsertf64x4(dst, dst, src, 0); |
1498 | } |
1499 | void (XMMRegister dst, XMMRegister src) { |
1500 | Assembler::vextracti64x4(dst, src, 0); |
1501 | } |
1502 | void (XMMRegister dst, XMMRegister src) { |
1503 | Assembler::vextractf64x4(dst, src, 0); |
1504 | } |
1505 | void (Address dst, XMMRegister src) { |
1506 | Assembler::vextractf64x4(dst, src, 0); |
1507 | } |
1508 | void vinsertf64x4_low(XMMRegister dst, Address src) { |
1509 | Assembler::vinsertf64x4(dst, dst, src, 0); |
1510 | } |
1511 | |
1512 | // Carry-Less Multiplication Quadword |
1513 | void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) { |
1514 | // 0x00 - multiply lower 64 bits [0:63] |
1515 | Assembler::vpclmulqdq(dst, nds, src, 0x00); |
1516 | } |
1517 | void vpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { |
1518 | // 0x11 - multiply upper 64 bits [64:127] |
1519 | Assembler::vpclmulqdq(dst, nds, src, 0x11); |
1520 | } |
1521 | void vpclmullqhqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { |
1522 | // 0x10 - multiply nds[0:63] and src[64:127] |
1523 | Assembler::vpclmulqdq(dst, nds, src, 0x10); |
1524 | } |
1525 | void vpclmulhqlqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { |
1526 | //0x01 - multiply nds[64:127] and src[0:63] |
1527 | Assembler::vpclmulqdq(dst, nds, src, 0x01); |
1528 | } |
1529 | |
1530 | void evpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { |
1531 | // 0x00 - multiply lower 64 bits [0:63] |
1532 | Assembler::evpclmulqdq(dst, nds, src, 0x00, vector_len); |
1533 | } |
1534 | void evpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { |
1535 | // 0x11 - multiply upper 64 bits [64:127] |
1536 | Assembler::evpclmulqdq(dst, nds, src, 0x11, vector_len); |
1537 | } |
1538 | |
1539 | // Data |
1540 | |
1541 | void cmov32( Condition cc, Register dst, Address src); |
1542 | void cmov32( Condition cc, Register dst, Register src); |
1543 | |
1544 | void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); } |
1545 | |
1546 | void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); } |
1547 | void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); } |
1548 | |
1549 | void movoop(Register dst, jobject obj); |
1550 | void movoop(Address dst, jobject obj); |
1551 | |
1552 | void mov_metadata(Register dst, Metadata* obj); |
1553 | void mov_metadata(Address dst, Metadata* obj); |
1554 | |
1555 | void movptr(ArrayAddress dst, Register src); |
1556 | // can this do an lea? |
1557 | void movptr(Register dst, ArrayAddress src); |
1558 | |
1559 | void movptr(Register dst, Address src); |
1560 | |
1561 | #ifdef _LP64 |
1562 | void movptr(Register dst, AddressLiteral src, Register scratch=rscratch1); |
1563 | #else |
1564 | void movptr(Register dst, AddressLiteral src, Register scratch=noreg); // Scratch reg is ignored in 32-bit |
1565 | #endif |
1566 | |
1567 | void movptr(Register dst, intptr_t src); |
1568 | void movptr(Register dst, Register src); |
1569 | void movptr(Address dst, intptr_t src); |
1570 | |
1571 | void movptr(Address dst, Register src); |
1572 | |
1573 | void movptr(Register dst, RegisterOrConstant src) { |
1574 | if (src.is_constant()) movptr(dst, src.as_constant()); |
1575 | else movptr(dst, src.as_register()); |
1576 | } |
1577 | |
1578 | #ifdef _LP64 |
1579 | // Generally the next two are only used for moving NULL |
1580 | // Although there are situations in initializing the mark word where |
1581 | // they could be used. They are dangerous. |
1582 | |
1583 | // They only exist on LP64 so that int32_t and intptr_t are not the same |
1584 | // and we have ambiguous declarations. |
1585 | |
1586 | void movptr(Address dst, int32_t imm32); |
1587 | void movptr(Register dst, int32_t imm32); |
1588 | #endif // _LP64 |
1589 | |
1590 | // to avoid hiding movl |
1591 | void mov32(AddressLiteral dst, Register src); |
1592 | void mov32(Register dst, AddressLiteral src); |
1593 | |
1594 | // to avoid hiding movb |
1595 | void movbyte(ArrayAddress dst, int src); |
1596 | |
1597 | // Import other mov() methods from the parent class or else |
1598 | // they will be hidden by the following overriding declaration. |
1599 | using Assembler::movdl; |
1600 | using Assembler::movq; |
1601 | void movdl(XMMRegister dst, AddressLiteral src); |
1602 | void movq(XMMRegister dst, AddressLiteral src); |
1603 | |
1604 | // Can push value or effective address |
1605 | void pushptr(AddressLiteral src); |
1606 | |
1607 | void pushptr(Address src) { LP64_ONLY(pushq(src)) NOT_LP64(pushl(src)); } |
1608 | void popptr(Address src) { LP64_ONLY(popq(src)) NOT_LP64(popl(src)); } |
1609 | |
1610 | void pushoop(jobject obj); |
1611 | void pushklass(Metadata* obj); |
1612 | |
1613 | // sign extend as need a l to ptr sized element |
1614 | void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); } |
1615 | void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); } |
1616 | |
1617 | #ifdef COMPILER2 |
1618 | // Generic instructions support for use in .ad files C2 code generation |
1619 | void vabsnegd(int opcode, XMMRegister dst, Register scr); |
1620 | void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr); |
1621 | void vabsnegf(int opcode, XMMRegister dst, Register scr); |
1622 | void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr); |
1623 | void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len); |
1624 | void vextendbw(bool sign, XMMRegister dst, XMMRegister src); |
1625 | void vshiftd(int opcode, XMMRegister dst, XMMRegister src); |
1626 | void vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1627 | void vshiftw(int opcode, XMMRegister dst, XMMRegister src); |
1628 | void vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1629 | void vshiftq(int opcode, XMMRegister dst, XMMRegister src); |
1630 | void vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
1631 | #endif |
1632 | |
1633 | // C2 compiled method's prolog code. |
1634 | void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub); |
1635 | |
1636 | // clear memory of size 'cnt' qwords, starting at 'base'; |
1637 | // if 'is_large' is set, do not try to produce short loop |
1638 | void clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, bool is_large); |
1639 | |
1640 | // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers |
1641 | void xmm_clear_mem(Register base, Register cnt, XMMRegister xtmp); |
1642 | |
1643 | #ifdef COMPILER2 |
1644 | void string_indexof_char(Register str1, Register cnt1, Register ch, Register result, |
1645 | XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); |
1646 | |
1647 | // IndexOf strings. |
1648 | // Small strings are loaded through stack if they cross page boundary. |
1649 | void string_indexof(Register str1, Register str2, |
1650 | Register cnt1, Register cnt2, |
1651 | int int_cnt2, Register result, |
1652 | XMMRegister vec, Register tmp, |
1653 | int ae); |
1654 | |
1655 | // IndexOf for constant substrings with size >= 8 elements |
1656 | // which don't need to be loaded through stack. |
1657 | void string_indexofC8(Register str1, Register str2, |
1658 | Register cnt1, Register cnt2, |
1659 | int int_cnt2, Register result, |
1660 | XMMRegister vec, Register tmp, |
1661 | int ae); |
1662 | |
1663 | // Smallest code: we don't need to load through stack, |
1664 | // check string tail. |
1665 | |
1666 | // helper function for string_compare |
1667 | void load_next_elements(Register elem1, Register elem2, Register str1, Register str2, |
1668 | Address::ScaleFactor scale, Address::ScaleFactor scale1, |
1669 | Address::ScaleFactor scale2, Register index, int ae); |
1670 | // Compare strings. |
1671 | void string_compare(Register str1, Register str2, |
1672 | Register cnt1, Register cnt2, Register result, |
1673 | XMMRegister vec1, int ae); |
1674 | |
1675 | // Search for Non-ASCII character (Negative byte value) in a byte array, |
1676 | // return true if it has any and false otherwise. |
1677 | void has_negatives(Register ary1, Register len, |
1678 | Register result, Register tmp1, |
1679 | XMMRegister vec1, XMMRegister vec2); |
1680 | |
1681 | // Compare char[] or byte[] arrays. |
1682 | void arrays_equals(bool is_array_equ, Register ary1, Register ary2, |
1683 | Register limit, Register result, Register chr, |
1684 | XMMRegister vec1, XMMRegister vec2, bool is_char); |
1685 | |
1686 | #endif |
1687 | |
1688 | // Fill primitive arrays |
1689 | void generate_fill(BasicType t, bool aligned, |
1690 | Register to, Register value, Register count, |
1691 | Register rtmp, XMMRegister xtmp); |
1692 | |
1693 | void encode_iso_array(Register src, Register dst, Register len, |
1694 | XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3, |
1695 | XMMRegister tmp4, Register tmp5, Register result); |
1696 | |
1697 | #ifdef _LP64 |
1698 | void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2); |
1699 | void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, |
1700 | Register y, Register y_idx, Register z, |
1701 | Register carry, Register product, |
1702 | Register idx, Register kdx); |
1703 | void multiply_add_128_x_128(Register x_xstart, Register y, Register z, |
1704 | Register yz_idx, Register idx, |
1705 | Register carry, Register product, int offset); |
1706 | void multiply_128_x_128_bmi2_loop(Register y, Register z, |
1707 | Register carry, Register carry2, |
1708 | Register idx, Register jdx, |
1709 | Register yz_idx1, Register yz_idx2, |
1710 | Register tmp, Register tmp3, Register tmp4); |
1711 | void multiply_128_x_128_loop(Register x_xstart, Register y, Register z, |
1712 | Register yz_idx, Register idx, Register jdx, |
1713 | Register carry, Register product, |
1714 | Register carry2); |
1715 | void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen, |
1716 | Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5); |
1717 | void square_rshift(Register x, Register len, Register z, Register tmp1, Register tmp3, |
1718 | Register tmp4, Register tmp5, Register rdxReg, Register raxReg); |
1719 | void multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry, |
1720 | Register tmp2); |
1721 | void multiply_add_64(Register sum, Register op1, Register op2, Register carry, |
1722 | Register rdxReg, Register raxReg); |
1723 | void add_one_64(Register z, Register zlen, Register carry, Register tmp1); |
1724 | void lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, |
1725 | Register tmp3, Register tmp4); |
1726 | void square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, |
1727 | Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg); |
1728 | |
1729 | void mul_add_128_x_32_loop(Register out, Register in, Register offset, Register len, Register tmp1, |
1730 | Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, |
1731 | Register raxReg); |
1732 | void mul_add(Register out, Register in, Register offset, Register len, Register k, Register tmp1, |
1733 | Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, |
1734 | Register raxReg); |
1735 | void vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale, |
1736 | Register result, Register tmp1, Register tmp2, |
1737 | XMMRegister vec1, XMMRegister vec2, XMMRegister vec3); |
1738 | #endif |
1739 | |
1740 | // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic. |
1741 | void update_byte_crc32(Register crc, Register val, Register table); |
1742 | void kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp); |
1743 | // CRC32C code for java.util.zip.CRC32C::updateBytes() intrinsic |
1744 | // Note on a naming convention: |
1745 | // Prefix w = register only used on a Westmere+ architecture |
1746 | // Prefix n = register only used on a Nehalem architecture |
1747 | #ifdef _LP64 |
1748 | void crc32c_ipl_alg4(Register in_out, uint32_t n, |
1749 | Register tmp1, Register tmp2, Register tmp3); |
1750 | #else |
1751 | void crc32c_ipl_alg4(Register in_out, uint32_t n, |
1752 | Register tmp1, Register tmp2, Register tmp3, |
1753 | XMMRegister xtmp1, XMMRegister xtmp2); |
1754 | #endif |
1755 | void crc32c_pclmulqdq(XMMRegister w_xtmp1, |
1756 | Register in_out, |
1757 | uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported, |
1758 | XMMRegister w_xtmp2, |
1759 | Register tmp1, |
1760 | Register n_tmp2, Register n_tmp3); |
1761 | void crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2, |
1762 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, |
1763 | Register tmp1, Register tmp2, |
1764 | Register n_tmp3); |
1765 | void crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, |
1766 | Register in_out1, Register in_out2, Register in_out3, |
1767 | Register tmp1, Register tmp2, Register tmp3, |
1768 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, |
1769 | Register tmp4, Register tmp5, |
1770 | Register n_tmp6); |
1771 | void crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2, |
1772 | Register tmp1, Register tmp2, Register tmp3, |
1773 | Register tmp4, Register tmp5, Register tmp6, |
1774 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, |
1775 | bool is_pclmulqdq_supported); |
1776 | // Fold 128-bit data chunk |
1777 | void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset); |
1778 | void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf); |
1779 | // Fold 8-bit data |
1780 | void fold_8bit_crc32(Register crc, Register table, Register tmp); |
1781 | void fold_8bit_crc32(XMMRegister crc, Register table, XMMRegister xtmp, Register tmp); |
1782 | void fold_128bit_crc32_avx512(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset); |
1783 | |
1784 | // Compress char[] array to byte[]. |
1785 | void char_array_compress(Register src, Register dst, Register len, |
1786 | XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3, |
1787 | XMMRegister tmp4, Register tmp5, Register result); |
1788 | |
1789 | // Inflate byte[] array to char[]. |
1790 | void byte_array_inflate(Register src, Register dst, Register len, |
1791 | XMMRegister tmp1, Register tmp2); |
1792 | |
1793 | }; |
1794 | |
1795 | /** |
1796 | * class SkipIfEqual: |
1797 | * |
1798 | * Instantiating this class will result in assembly code being output that will |
1799 | * jump around any code emitted between the creation of the instance and it's |
1800 | * automatic destruction at the end of a scope block, depending on the value of |
1801 | * the flag passed to the constructor, which will be checked at run-time. |
1802 | */ |
1803 | class SkipIfEqual { |
1804 | private: |
1805 | MacroAssembler* _masm; |
1806 | Label _label; |
1807 | |
1808 | public: |
1809 | SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); |
1810 | ~SkipIfEqual(); |
1811 | }; |
1812 | |
1813 | #endif // CPU_X86_MACROASSEMBLER_X86_HPP |
1814 | |