| 1 | /* |
| 2 | * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. |
| 8 | * |
| 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 12 | * version 2 for more details (a copy is included in the LICENSE file that |
| 13 | * accompanied this code). |
| 14 | * |
| 15 | * You should have received a copy of the GNU General Public License version |
| 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 18 | * |
| 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| 20 | * or visit www.oracle.com if you need additional information or have any |
| 21 | * questions. |
| 22 | * |
| 23 | */ |
| 24 | |
| 25 | #ifndef CPU_X86_MACROASSEMBLER_X86_HPP |
| 26 | #define CPU_X86_MACROASSEMBLER_X86_HPP |
| 27 | |
| 28 | #include "asm/assembler.hpp" |
| 29 | #include "utilities/macros.hpp" |
| 30 | #include "runtime/rtmLocking.hpp" |
| 31 | |
| 32 | // MacroAssembler extends Assembler by frequently used macros. |
| 33 | // |
| 34 | // Instructions for which a 'better' code sequence exists depending |
| 35 | // on arguments should also go in here. |
| 36 | |
| 37 | class MacroAssembler: public Assembler { |
| 38 | friend class LIR_Assembler; |
| 39 | friend class Runtime1; // as_Address() |
| 40 | |
| 41 | public: |
| 42 | // Support for VM calls |
| 43 | // |
| 44 | // This is the base routine called by the different versions of call_VM_leaf. The interpreter |
| 45 | // may customize this version by overriding it for its purposes (e.g., to save/restore |
| 46 | // additional registers when doing a VM call). |
| 47 | |
| 48 | virtual void call_VM_leaf_base( |
| 49 | address entry_point, // the entry point |
| 50 | int number_of_arguments // the number of arguments to pop after the call |
| 51 | ); |
| 52 | |
| 53 | protected: |
| 54 | // This is the base routine called by the different versions of call_VM. The interpreter |
| 55 | // may customize this version by overriding it for its purposes (e.g., to save/restore |
| 56 | // additional registers when doing a VM call). |
| 57 | // |
| 58 | // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base |
| 59 | // returns the register which contains the thread upon return. If a thread register has been |
| 60 | // specified, the return value will correspond to that register. If no last_java_sp is specified |
| 61 | // (noreg) than rsp will be used instead. |
| 62 | virtual void call_VM_base( // returns the register containing the thread upon return |
| 63 | Register oop_result, // where an oop-result ends up if any; use noreg otherwise |
| 64 | Register java_thread, // the thread if computed before ; use noreg otherwise |
| 65 | Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise |
| 66 | address entry_point, // the entry point |
| 67 | int number_of_arguments, // the number of arguments (w/o thread) to pop after the call |
| 68 | bool check_exceptions // whether to check for pending exceptions after return |
| 69 | ); |
| 70 | |
| 71 | void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); |
| 72 | |
| 73 | // helpers for FPU flag access |
| 74 | // tmp is a temporary register, if none is available use noreg |
| 75 | void save_rax (Register tmp); |
| 76 | void restore_rax(Register tmp); |
| 77 | |
| 78 | public: |
| 79 | MacroAssembler(CodeBuffer* code) : Assembler(code) {} |
| 80 | |
| 81 | // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. |
| 82 | // The implementation is only non-empty for the InterpreterMacroAssembler, |
| 83 | // as only the interpreter handles PopFrame and ForceEarlyReturn requests. |
| 84 | virtual void check_and_handle_popframe(Register java_thread); |
| 85 | virtual void check_and_handle_earlyret(Register java_thread); |
| 86 | |
| 87 | Address as_Address(AddressLiteral adr); |
| 88 | Address as_Address(ArrayAddress adr); |
| 89 | |
| 90 | // Support for NULL-checks |
| 91 | // |
| 92 | // Generates code that causes a NULL OS exception if the content of reg is NULL. |
| 93 | // If the accessed location is M[reg + offset] and the offset is known, provide the |
| 94 | // offset. No explicit code generation is needed if the offset is within a certain |
| 95 | // range (0 <= offset <= page_size). |
| 96 | |
| 97 | void null_check(Register reg, int offset = -1); |
| 98 | static bool needs_explicit_null_check(intptr_t offset); |
| 99 | static bool uses_implicit_null_check(void* address); |
| 100 | |
| 101 | // Required platform-specific helpers for Label::patch_instructions. |
| 102 | // They _shadow_ the declarations in AbstractAssembler, which are undefined. |
| 103 | void pd_patch_instruction(address branch, address target, const char* file, int line) { |
| 104 | unsigned char op = branch[0]; |
| 105 | assert(op == 0xE8 /* call */ || |
| 106 | op == 0xE9 /* jmp */ || |
| 107 | op == 0xEB /* short jmp */ || |
| 108 | (op & 0xF0) == 0x70 /* short jcc */ || |
| 109 | op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */ || |
| 110 | op == 0xC7 && branch[1] == 0xF8 /* xbegin */, |
| 111 | "Invalid opcode at patch point" ); |
| 112 | |
| 113 | if (op == 0xEB || (op & 0xF0) == 0x70) { |
| 114 | // short offset operators (jmp and jcc) |
| 115 | char* disp = (char*) &branch[1]; |
| 116 | int imm8 = target - (address) &disp[1]; |
| 117 | guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset at %s:%d" , file, line); |
| 118 | *disp = imm8; |
| 119 | } else { |
| 120 | int* disp = (int*) &branch[(op == 0x0F || op == 0xC7)? 2: 1]; |
| 121 | int imm32 = target - (address) &disp[1]; |
| 122 | *disp = imm32; |
| 123 | } |
| 124 | } |
| 125 | |
| 126 | // The following 4 methods return the offset of the appropriate move instruction |
| 127 | |
| 128 | // Support for fast byte/short loading with zero extension (depending on particular CPU) |
| 129 | int load_unsigned_byte(Register dst, Address src); |
| 130 | int load_unsigned_short(Register dst, Address src); |
| 131 | |
| 132 | // Support for fast byte/short loading with sign extension (depending on particular CPU) |
| 133 | int load_signed_byte(Register dst, Address src); |
| 134 | int load_signed_short(Register dst, Address src); |
| 135 | |
| 136 | // Support for sign-extension (hi:lo = extend_sign(lo)) |
| 137 | void extend_sign(Register hi, Register lo); |
| 138 | |
| 139 | // Load and store values by size and signed-ness |
| 140 | void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); |
| 141 | void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); |
| 142 | |
| 143 | // Support for inc/dec with optimal instruction selection depending on value |
| 144 | |
| 145 | void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; } |
| 146 | void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value)) NOT_LP64(decrementl(reg, value)) ; } |
| 147 | |
| 148 | void decrementl(Address dst, int value = 1); |
| 149 | void decrementl(Register reg, int value = 1); |
| 150 | |
| 151 | void decrementq(Register reg, int value = 1); |
| 152 | void decrementq(Address dst, int value = 1); |
| 153 | |
| 154 | void incrementl(Address dst, int value = 1); |
| 155 | void incrementl(Register reg, int value = 1); |
| 156 | |
| 157 | void incrementq(Register reg, int value = 1); |
| 158 | void incrementq(Address dst, int value = 1); |
| 159 | |
| 160 | #ifdef COMPILER2 |
| 161 | // special instructions for EVEX |
| 162 | void setvectmask(Register dst, Register src); |
| 163 | void restorevectmask(); |
| 164 | #endif |
| 165 | |
| 166 | // Support optimal SSE move instructions. |
| 167 | void movflt(XMMRegister dst, XMMRegister src) { |
| 168 | if (dst-> encoding() == src->encoding()) return; |
| 169 | if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; } |
| 170 | else { movss (dst, src); return; } |
| 171 | } |
| 172 | void movflt(XMMRegister dst, Address src) { movss(dst, src); } |
| 173 | void movflt(XMMRegister dst, AddressLiteral src); |
| 174 | void movflt(Address dst, XMMRegister src) { movss(dst, src); } |
| 175 | |
| 176 | void movdbl(XMMRegister dst, XMMRegister src) { |
| 177 | if (dst-> encoding() == src->encoding()) return; |
| 178 | if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; } |
| 179 | else { movsd (dst, src); return; } |
| 180 | } |
| 181 | |
| 182 | void movdbl(XMMRegister dst, AddressLiteral src); |
| 183 | |
| 184 | void movdbl(XMMRegister dst, Address src) { |
| 185 | if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; } |
| 186 | else { movlpd(dst, src); return; } |
| 187 | } |
| 188 | void movdbl(Address dst, XMMRegister src) { movsd(dst, src); } |
| 189 | |
| 190 | void incrementl(AddressLiteral dst); |
| 191 | void incrementl(ArrayAddress dst); |
| 192 | |
| 193 | void incrementq(AddressLiteral dst); |
| 194 | |
| 195 | // Alignment |
| 196 | void align(int modulus); |
| 197 | void align(int modulus, int target); |
| 198 | |
| 199 | // A 5 byte nop that is safe for patching (see patch_verified_entry) |
| 200 | void fat_nop(); |
| 201 | |
| 202 | // Stack frame creation/removal |
| 203 | void enter(); |
| 204 | void leave(); |
| 205 | |
| 206 | // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) |
| 207 | // The pointer will be loaded into the thread register. |
| 208 | void get_thread(Register thread); |
| 209 | |
| 210 | |
| 211 | // Support for VM calls |
| 212 | // |
| 213 | // It is imperative that all calls into the VM are handled via the call_VM macros. |
| 214 | // They make sure that the stack linkage is setup correctly. call_VM's correspond |
| 215 | // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. |
| 216 | |
| 217 | |
| 218 | void call_VM(Register oop_result, |
| 219 | address entry_point, |
| 220 | bool check_exceptions = true); |
| 221 | void call_VM(Register oop_result, |
| 222 | address entry_point, |
| 223 | Register arg_1, |
| 224 | bool check_exceptions = true); |
| 225 | void call_VM(Register oop_result, |
| 226 | address entry_point, |
| 227 | Register arg_1, Register arg_2, |
| 228 | bool check_exceptions = true); |
| 229 | void call_VM(Register oop_result, |
| 230 | address entry_point, |
| 231 | Register arg_1, Register arg_2, Register arg_3, |
| 232 | bool check_exceptions = true); |
| 233 | |
| 234 | // Overloadings with last_Java_sp |
| 235 | void call_VM(Register oop_result, |
| 236 | Register last_java_sp, |
| 237 | address entry_point, |
| 238 | int number_of_arguments = 0, |
| 239 | bool check_exceptions = true); |
| 240 | void call_VM(Register oop_result, |
| 241 | Register last_java_sp, |
| 242 | address entry_point, |
| 243 | Register arg_1, bool |
| 244 | check_exceptions = true); |
| 245 | void call_VM(Register oop_result, |
| 246 | Register last_java_sp, |
| 247 | address entry_point, |
| 248 | Register arg_1, Register arg_2, |
| 249 | bool check_exceptions = true); |
| 250 | void call_VM(Register oop_result, |
| 251 | Register last_java_sp, |
| 252 | address entry_point, |
| 253 | Register arg_1, Register arg_2, Register arg_3, |
| 254 | bool check_exceptions = true); |
| 255 | |
| 256 | void get_vm_result (Register oop_result, Register thread); |
| 257 | void get_vm_result_2(Register metadata_result, Register thread); |
| 258 | |
| 259 | // These always tightly bind to MacroAssembler::call_VM_base |
| 260 | // bypassing the virtual implementation |
| 261 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); |
| 262 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true); |
| 263 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); |
| 264 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); |
| 265 | void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true); |
| 266 | |
| 267 | void call_VM_leaf0(address entry_point); |
| 268 | void call_VM_leaf(address entry_point, |
| 269 | int number_of_arguments = 0); |
| 270 | void call_VM_leaf(address entry_point, |
| 271 | Register arg_1); |
| 272 | void call_VM_leaf(address entry_point, |
| 273 | Register arg_1, Register arg_2); |
| 274 | void call_VM_leaf(address entry_point, |
| 275 | Register arg_1, Register arg_2, Register arg_3); |
| 276 | |
| 277 | // These always tightly bind to MacroAssembler::call_VM_leaf_base |
| 278 | // bypassing the virtual implementation |
| 279 | void super_call_VM_leaf(address entry_point); |
| 280 | void super_call_VM_leaf(address entry_point, Register arg_1); |
| 281 | void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); |
| 282 | void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); |
| 283 | void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4); |
| 284 | |
| 285 | // last Java Frame (fills frame anchor) |
| 286 | void set_last_Java_frame(Register thread, |
| 287 | Register last_java_sp, |
| 288 | Register last_java_fp, |
| 289 | address last_java_pc); |
| 290 | |
| 291 | // thread in the default location (r15_thread on 64bit) |
| 292 | void set_last_Java_frame(Register last_java_sp, |
| 293 | Register last_java_fp, |
| 294 | address last_java_pc); |
| 295 | |
| 296 | void reset_last_Java_frame(Register thread, bool clear_fp); |
| 297 | |
| 298 | // thread in the default location (r15_thread on 64bit) |
| 299 | void reset_last_Java_frame(bool clear_fp); |
| 300 | |
| 301 | // jobjects |
| 302 | void clear_jweak_tag(Register possibly_jweak); |
| 303 | void resolve_jobject(Register value, Register thread, Register tmp); |
| 304 | |
| 305 | // C 'boolean' to Java boolean: x == 0 ? 0 : 1 |
| 306 | void c2bool(Register x); |
| 307 | |
| 308 | // C++ bool manipulation |
| 309 | |
| 310 | void movbool(Register dst, Address src); |
| 311 | void movbool(Address dst, bool boolconst); |
| 312 | void movbool(Address dst, Register src); |
| 313 | void testbool(Register dst); |
| 314 | |
| 315 | void resolve_oop_handle(Register result, Register tmp = rscratch2); |
| 316 | void resolve_weak_handle(Register result, Register tmp); |
| 317 | void load_mirror(Register mirror, Register method, Register tmp = rscratch2); |
| 318 | void load_method_holder_cld(Register rresult, Register rmethod); |
| 319 | |
| 320 | void load_method_holder(Register holder, Register method); |
| 321 | |
| 322 | // oop manipulations |
| 323 | void load_klass(Register dst, Register src); |
| 324 | void store_klass(Register dst, Register src); |
| 325 | |
| 326 | void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, |
| 327 | Register tmp1, Register thread_tmp); |
| 328 | void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, |
| 329 | Register tmp1, Register tmp2); |
| 330 | |
| 331 | // Resolves obj access. Result is placed in the same register. |
| 332 | // All other registers are preserved. |
| 333 | void resolve(DecoratorSet decorators, Register obj); |
| 334 | |
| 335 | void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, |
| 336 | Register thread_tmp = noreg, DecoratorSet decorators = 0); |
| 337 | void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, |
| 338 | Register thread_tmp = noreg, DecoratorSet decorators = 0); |
| 339 | void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, |
| 340 | Register tmp2 = noreg, DecoratorSet decorators = 0); |
| 341 | |
| 342 | // Used for storing NULL. All other oop constants should be |
| 343 | // stored using routines that take a jobject. |
| 344 | void store_heap_oop_null(Address dst); |
| 345 | |
| 346 | void (Register dst, Register src); |
| 347 | |
| 348 | #ifdef _LP64 |
| 349 | void store_klass_gap(Register dst, Register src); |
| 350 | |
| 351 | // This dummy is to prevent a call to store_heap_oop from |
| 352 | // converting a zero (like NULL) into a Register by giving |
| 353 | // the compiler two choices it can't resolve |
| 354 | |
| 355 | void store_heap_oop(Address dst, void* dummy); |
| 356 | |
| 357 | void encode_heap_oop(Register r); |
| 358 | void decode_heap_oop(Register r); |
| 359 | void encode_heap_oop_not_null(Register r); |
| 360 | void decode_heap_oop_not_null(Register r); |
| 361 | void encode_heap_oop_not_null(Register dst, Register src); |
| 362 | void decode_heap_oop_not_null(Register dst, Register src); |
| 363 | |
| 364 | void set_narrow_oop(Register dst, jobject obj); |
| 365 | void set_narrow_oop(Address dst, jobject obj); |
| 366 | void cmp_narrow_oop(Register dst, jobject obj); |
| 367 | void cmp_narrow_oop(Address dst, jobject obj); |
| 368 | |
| 369 | void encode_klass_not_null(Register r); |
| 370 | void decode_klass_not_null(Register r); |
| 371 | void encode_klass_not_null(Register dst, Register src); |
| 372 | void decode_klass_not_null(Register dst, Register src); |
| 373 | void set_narrow_klass(Register dst, Klass* k); |
| 374 | void set_narrow_klass(Address dst, Klass* k); |
| 375 | void cmp_narrow_klass(Register dst, Klass* k); |
| 376 | void cmp_narrow_klass(Address dst, Klass* k); |
| 377 | |
| 378 | // Returns the byte size of the instructions generated by decode_klass_not_null() |
| 379 | // when compressed klass pointers are being used. |
| 380 | static int instr_size_for_decode_klass_not_null(); |
| 381 | |
| 382 | // if heap base register is used - reinit it with the correct value |
| 383 | void reinit_heapbase(); |
| 384 | |
| 385 | DEBUG_ONLY(void verify_heapbase(const char* msg);) |
| 386 | |
| 387 | #endif // _LP64 |
| 388 | |
| 389 | // Int division/remainder for Java |
| 390 | // (as idivl, but checks for special case as described in JVM spec.) |
| 391 | // returns idivl instruction offset for implicit exception handling |
| 392 | int corrected_idivl(Register reg); |
| 393 | |
| 394 | // Long division/remainder for Java |
| 395 | // (as idivq, but checks for special case as described in JVM spec.) |
| 396 | // returns idivq instruction offset for implicit exception handling |
| 397 | int corrected_idivq(Register reg); |
| 398 | |
| 399 | void int3(); |
| 400 | |
| 401 | // Long operation macros for a 32bit cpu |
| 402 | // Long negation for Java |
| 403 | void lneg(Register hi, Register lo); |
| 404 | |
| 405 | // Long multiplication for Java |
| 406 | // (destroys contents of eax, ebx, ecx and edx) |
| 407 | void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y |
| 408 | |
| 409 | // Long shifts for Java |
| 410 | // (semantics as described in JVM spec.) |
| 411 | void lshl(Register hi, Register lo); // hi:lo << (rcx & 0x3f) |
| 412 | void lshr(Register hi, Register lo, bool sign_extension = false); // hi:lo >> (rcx & 0x3f) |
| 413 | |
| 414 | // Long compare for Java |
| 415 | // (semantics as described in JVM spec.) |
| 416 | void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y) |
| 417 | |
| 418 | |
| 419 | // misc |
| 420 | |
| 421 | // Sign extension |
| 422 | void sign_extend_short(Register reg); |
| 423 | void sign_extend_byte(Register reg); |
| 424 | |
| 425 | // Division by power of 2, rounding towards 0 |
| 426 | void division_with_shift(Register reg, int shift_value); |
| 427 | |
| 428 | // Compares the top-most stack entries on the FPU stack and sets the eflags as follows: |
| 429 | // |
| 430 | // CF (corresponds to C0) if x < y |
| 431 | // PF (corresponds to C2) if unordered |
| 432 | // ZF (corresponds to C3) if x = y |
| 433 | // |
| 434 | // The arguments are in reversed order on the stack (i.e., top of stack is first argument). |
| 435 | // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code) |
| 436 | void fcmp(Register tmp); |
| 437 | // Variant of the above which allows y to be further down the stack |
| 438 | // and which only pops x and y if specified. If pop_right is |
| 439 | // specified then pop_left must also be specified. |
| 440 | void fcmp(Register tmp, int index, bool pop_left, bool pop_right); |
| 441 | |
| 442 | // Floating-point comparison for Java |
| 443 | // Compares the top-most stack entries on the FPU stack and stores the result in dst. |
| 444 | // The arguments are in reversed order on the stack (i.e., top of stack is first argument). |
| 445 | // (semantics as described in JVM spec.) |
| 446 | void fcmp2int(Register dst, bool unordered_is_less); |
| 447 | // Variant of the above which allows y to be further down the stack |
| 448 | // and which only pops x and y if specified. If pop_right is |
| 449 | // specified then pop_left must also be specified. |
| 450 | void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right); |
| 451 | |
| 452 | // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards) |
| 453 | // tmp is a temporary register, if none is available use noreg |
| 454 | void fremr(Register tmp); |
| 455 | |
| 456 | // dst = c = a * b + c |
| 457 | void fmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c); |
| 458 | void fmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c); |
| 459 | |
| 460 | void vfmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len); |
| 461 | void vfmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len); |
| 462 | void vfmad(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len); |
| 463 | void vfmaf(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len); |
| 464 | |
| 465 | |
| 466 | // same as fcmp2int, but using SSE2 |
| 467 | void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); |
| 468 | void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); |
| 469 | |
| 470 | // branch to L if FPU flag C2 is set/not set |
| 471 | // tmp is a temporary register, if none is available use noreg |
| 472 | void jC2 (Register tmp, Label& L); |
| 473 | void jnC2(Register tmp, Label& L); |
| 474 | |
| 475 | // Pop ST (ffree & fincstp combined) |
| 476 | void fpop(); |
| 477 | |
| 478 | // Load float value from 'address'. If UseSSE >= 1, the value is loaded into |
| 479 | // register xmm0. Otherwise, the value is loaded onto the FPU stack. |
| 480 | void load_float(Address src); |
| 481 | |
| 482 | // Store float value to 'address'. If UseSSE >= 1, the value is stored |
| 483 | // from register xmm0. Otherwise, the value is stored from the FPU stack. |
| 484 | void store_float(Address dst); |
| 485 | |
| 486 | // Load double value from 'address'. If UseSSE >= 2, the value is loaded into |
| 487 | // register xmm0. Otherwise, the value is loaded onto the FPU stack. |
| 488 | void load_double(Address src); |
| 489 | |
| 490 | // Store double value to 'address'. If UseSSE >= 2, the value is stored |
| 491 | // from register xmm0. Otherwise, the value is stored from the FPU stack. |
| 492 | void store_double(Address dst); |
| 493 | |
| 494 | // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack |
| 495 | void push_fTOS(); |
| 496 | |
| 497 | // pops double TOS element from CPU stack and pushes on FPU stack |
| 498 | void pop_fTOS(); |
| 499 | |
| 500 | void empty_FPU_stack(); |
| 501 | |
| 502 | void push_IU_state(); |
| 503 | void pop_IU_state(); |
| 504 | |
| 505 | void push_FPU_state(); |
| 506 | void pop_FPU_state(); |
| 507 | |
| 508 | void push_CPU_state(); |
| 509 | void pop_CPU_state(); |
| 510 | |
| 511 | // Round up to a power of two |
| 512 | void round_to(Register reg, int modulus); |
| 513 | |
| 514 | // Callee saved registers handling |
| 515 | void push_callee_saved_registers(); |
| 516 | void pop_callee_saved_registers(); |
| 517 | |
| 518 | // allocation |
| 519 | void eden_allocate( |
| 520 | Register thread, // Current thread |
| 521 | Register obj, // result: pointer to object after successful allocation |
| 522 | Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise |
| 523 | int con_size_in_bytes, // object size in bytes if known at compile time |
| 524 | Register t1, // temp register |
| 525 | Label& slow_case // continuation point if fast allocation fails |
| 526 | ); |
| 527 | void tlab_allocate( |
| 528 | Register thread, // Current thread |
| 529 | Register obj, // result: pointer to object after successful allocation |
| 530 | Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise |
| 531 | int con_size_in_bytes, // object size in bytes if known at compile time |
| 532 | Register t1, // temp register |
| 533 | Register t2, // temp register |
| 534 | Label& slow_case // continuation point if fast allocation fails |
| 535 | ); |
| 536 | void zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp); |
| 537 | |
| 538 | // interface method calling |
| 539 | void lookup_interface_method(Register recv_klass, |
| 540 | Register intf_klass, |
| 541 | RegisterOrConstant itable_index, |
| 542 | Register method_result, |
| 543 | Register scan_temp, |
| 544 | Label& no_such_interface, |
| 545 | bool return_method = true); |
| 546 | |
| 547 | // virtual method calling |
| 548 | void lookup_virtual_method(Register recv_klass, |
| 549 | RegisterOrConstant vtable_index, |
| 550 | Register method_result); |
| 551 | |
| 552 | // Test sub_klass against super_klass, with fast and slow paths. |
| 553 | |
| 554 | // The fast path produces a tri-state answer: yes / no / maybe-slow. |
| 555 | // One of the three labels can be NULL, meaning take the fall-through. |
| 556 | // If super_check_offset is -1, the value is loaded up from super_klass. |
| 557 | // No registers are killed, except temp_reg. |
| 558 | void check_klass_subtype_fast_path(Register sub_klass, |
| 559 | Register super_klass, |
| 560 | Register temp_reg, |
| 561 | Label* L_success, |
| 562 | Label* L_failure, |
| 563 | Label* L_slow_path, |
| 564 | RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); |
| 565 | |
| 566 | // The rest of the type check; must be wired to a corresponding fast path. |
| 567 | // It does not repeat the fast path logic, so don't use it standalone. |
| 568 | // The temp_reg and temp2_reg can be noreg, if no temps are available. |
| 569 | // Updates the sub's secondary super cache as necessary. |
| 570 | // If set_cond_codes, condition codes will be Z on success, NZ on failure. |
| 571 | void check_klass_subtype_slow_path(Register sub_klass, |
| 572 | Register super_klass, |
| 573 | Register temp_reg, |
| 574 | Register temp2_reg, |
| 575 | Label* L_success, |
| 576 | Label* L_failure, |
| 577 | bool set_cond_codes = false); |
| 578 | |
| 579 | // Simplified, combined version, good for typical uses. |
| 580 | // Falls through on failure. |
| 581 | void check_klass_subtype(Register sub_klass, |
| 582 | Register super_klass, |
| 583 | Register temp_reg, |
| 584 | Label& L_success); |
| 585 | |
| 586 | void clinit_barrier(Register klass, |
| 587 | Register thread, |
| 588 | Label* L_fast_path = NULL, |
| 589 | Label* L_slow_path = NULL); |
| 590 | |
| 591 | // method handles (JSR 292) |
| 592 | Address argument_address(RegisterOrConstant arg_slot, int = 0); |
| 593 | |
| 594 | //---- |
| 595 | void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0 |
| 596 | |
| 597 | // Debugging |
| 598 | |
| 599 | // only if +VerifyOops |
| 600 | // TODO: Make these macros with file and line like sparc version! |
| 601 | void verify_oop(Register reg, const char* s = "broken oop" ); |
| 602 | void verify_oop_addr(Address addr, const char * s = "broken oop addr" ); |
| 603 | |
| 604 | // TODO: verify method and klass metadata (compare against vptr?) |
| 605 | void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} |
| 606 | void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} |
| 607 | |
| 608 | #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) |
| 609 | #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) |
| 610 | |
| 611 | // only if +VerifyFPU |
| 612 | void verify_FPU(int stack_depth, const char* s = "illegal FPU state" ); |
| 613 | |
| 614 | // Verify or restore cpu control state after JNI call |
| 615 | void restore_cpu_control_state_after_jni(); |
| 616 | |
| 617 | // prints msg, dumps registers and stops execution |
| 618 | void stop(const char* msg); |
| 619 | |
| 620 | // prints msg and continues |
| 621 | void warn(const char* msg); |
| 622 | |
| 623 | // dumps registers and other state |
| 624 | void print_state(); |
| 625 | |
| 626 | static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg); |
| 627 | static void debug64(char* msg, int64_t pc, int64_t regs[]); |
| 628 | static void print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip); |
| 629 | static void print_state64(int64_t pc, int64_t regs[]); |
| 630 | |
| 631 | void os_breakpoint(); |
| 632 | |
| 633 | void untested() { stop("untested" ); } |
| 634 | |
| 635 | void unimplemented(const char* what = "" ); |
| 636 | |
| 637 | void should_not_reach_here() { stop("should not reach here" ); } |
| 638 | |
| 639 | void print_CPU_state(); |
| 640 | |
| 641 | // Stack overflow checking |
| 642 | void bang_stack_with_offset(int offset) { |
| 643 | // stack grows down, caller passes positive offset |
| 644 | assert(offset > 0, "must bang with negative offset" ); |
| 645 | movl(Address(rsp, (-offset)), rax); |
| 646 | } |
| 647 | |
| 648 | // Writes to stack successive pages until offset reached to check for |
| 649 | // stack overflow + shadow pages. Also, clobbers tmp |
| 650 | void bang_stack_size(Register size, Register tmp); |
| 651 | |
| 652 | // Check for reserved stack access in method being exited (for JIT) |
| 653 | void reserved_stack_check(); |
| 654 | |
| 655 | virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, |
| 656 | Register tmp, |
| 657 | int offset); |
| 658 | |
| 659 | // If thread_reg is != noreg the code assumes the register passed contains |
| 660 | // the thread (required on 64 bit). |
| 661 | void safepoint_poll(Label& slow_path, Register thread_reg, Register temp_reg); |
| 662 | |
| 663 | void verify_tlab(); |
| 664 | |
| 665 | // Biased locking support |
| 666 | // lock_reg and obj_reg must be loaded up with the appropriate values. |
| 667 | // swap_reg must be rax, and is killed. |
| 668 | // tmp_reg is optional. If it is supplied (i.e., != noreg) it will |
| 669 | // be killed; if not supplied, push/pop will be used internally to |
| 670 | // allocate a temporary (inefficient, avoid if possible). |
| 671 | // Optional slow case is for implementations (interpreter and C1) which branch to |
| 672 | // slow case directly. Leaves condition codes set for C2's Fast_Lock node. |
| 673 | // Returns offset of first potentially-faulting instruction for null |
| 674 | // check info (currently consumed only by C1). If |
| 675 | // swap_reg_contains_mark is true then returns -1 as it is assumed |
| 676 | // the calling code has already passed any potential faults. |
| 677 | int biased_locking_enter(Register lock_reg, Register obj_reg, |
| 678 | Register swap_reg, Register tmp_reg, |
| 679 | bool swap_reg_contains_mark, |
| 680 | Label& done, Label* slow_case = NULL, |
| 681 | BiasedLockingCounters* counters = NULL); |
| 682 | void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); |
| 683 | #ifdef COMPILER2 |
| 684 | // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. |
| 685 | // See full desription in macroAssembler_x86.cpp. |
| 686 | void (Register obj, Register box, Register tmp, |
| 687 | Register scr, Register cx1, Register cx2, |
| 688 | BiasedLockingCounters* counters, |
| 689 | RTMLockingCounters* rtm_counters, |
| 690 | RTMLockingCounters* stack_rtm_counters, |
| 691 | Metadata* method_data, |
| 692 | bool use_rtm, bool profile_rtm); |
| 693 | void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm); |
| 694 | #if INCLUDE_RTM_OPT |
| 695 | void rtm_counters_update(Register abort_status, Register rtm_counters); |
| 696 | void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel); |
| 697 | void rtm_abort_ratio_calculation(Register tmp, Register rtm_counters_reg, |
| 698 | RTMLockingCounters* rtm_counters, |
| 699 | Metadata* method_data); |
| 700 | void rtm_profiling(Register abort_status_Reg, Register rtm_counters_Reg, |
| 701 | RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm); |
| 702 | void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, Label& retryLabel); |
| 703 | void rtm_retry_lock_on_busy(Register retry_count, Register box, Register tmp, Register scr, Label& retryLabel); |
| 704 | void rtm_stack_locking(Register obj, Register tmp, Register scr, |
| 705 | Register retry_on_abort_count, |
| 706 | RTMLockingCounters* stack_rtm_counters, |
| 707 | Metadata* method_data, bool profile_rtm, |
| 708 | Label& DONE_LABEL, Label& IsInflated); |
| 709 | void rtm_inflated_locking(Register obj, Register box, Register tmp, |
| 710 | Register scr, Register retry_on_busy_count, |
| 711 | Register retry_on_abort_count, |
| 712 | RTMLockingCounters* rtm_counters, |
| 713 | Metadata* method_data, bool profile_rtm, |
| 714 | Label& DONE_LABEL); |
| 715 | #endif |
| 716 | #endif |
| 717 | |
| 718 | Condition negate_condition(Condition cond); |
| 719 | |
| 720 | // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit |
| 721 | // operands. In general the names are modified to avoid hiding the instruction in Assembler |
| 722 | // so that we don't need to implement all the varieties in the Assembler with trivial wrappers |
| 723 | // here in MacroAssembler. The major exception to this rule is call |
| 724 | |
| 725 | // Arithmetics |
| 726 | |
| 727 | |
| 728 | void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)) ; } |
| 729 | void addptr(Address dst, Register src); |
| 730 | |
| 731 | void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); } |
| 732 | void addptr(Register dst, int32_t src); |
| 733 | void addptr(Register dst, Register src); |
| 734 | void addptr(Register dst, RegisterOrConstant src) { |
| 735 | if (src.is_constant()) addptr(dst, (int) src.as_constant()); |
| 736 | else addptr(dst, src.as_register()); |
| 737 | } |
| 738 | |
| 739 | void andptr(Register dst, int32_t src); |
| 740 | void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2)) NOT_LP64(andl(src1, src2)) ; } |
| 741 | |
| 742 | void cmp8(AddressLiteral src1, int imm); |
| 743 | |
| 744 | // renamed to drag out the casting of address to int32_t/intptr_t |
| 745 | void cmp32(Register src1, int32_t imm); |
| 746 | |
| 747 | void cmp32(AddressLiteral src1, int32_t imm); |
| 748 | // compare reg - mem, or reg - &mem |
| 749 | void cmp32(Register src1, AddressLiteral src2); |
| 750 | |
| 751 | void cmp32(Register src1, Address src2); |
| 752 | |
| 753 | #ifndef _LP64 |
| 754 | void cmpklass(Address dst, Metadata* obj); |
| 755 | void cmpklass(Register dst, Metadata* obj); |
| 756 | void cmpoop(Address dst, jobject obj); |
| 757 | void cmpoop_raw(Address dst, jobject obj); |
| 758 | #endif // _LP64 |
| 759 | |
| 760 | void cmpoop(Register src1, Register src2); |
| 761 | void cmpoop(Register src1, Address src2); |
| 762 | void cmpoop(Register dst, jobject obj); |
| 763 | void cmpoop_raw(Register dst, jobject obj); |
| 764 | |
| 765 | // NOTE src2 must be the lval. This is NOT an mem-mem compare |
| 766 | void cmpptr(Address src1, AddressLiteral src2); |
| 767 | |
| 768 | void cmpptr(Register src1, AddressLiteral src2); |
| 769 | |
| 770 | void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } |
| 771 | void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } |
| 772 | // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } |
| 773 | |
| 774 | void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } |
| 775 | void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } |
| 776 | |
| 777 | // cmp64 to avoild hiding cmpq |
| 778 | void cmp64(Register src1, AddressLiteral src); |
| 779 | |
| 780 | void cmpxchgptr(Register reg, Address adr); |
| 781 | |
| 782 | void locked_cmpxchgptr(Register reg, AddressLiteral adr); |
| 783 | |
| 784 | |
| 785 | void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); } |
| 786 | void imulptr(Register dst, Register src, int imm32) { LP64_ONLY(imulq(dst, src, imm32)) NOT_LP64(imull(dst, src, imm32)); } |
| 787 | |
| 788 | |
| 789 | void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); } |
| 790 | |
| 791 | void notptr(Register dst) { LP64_ONLY(notq(dst)) NOT_LP64(notl(dst)); } |
| 792 | |
| 793 | void shlptr(Register dst, int32_t shift); |
| 794 | void shlptr(Register dst) { LP64_ONLY(shlq(dst)) NOT_LP64(shll(dst)); } |
| 795 | |
| 796 | void shrptr(Register dst, int32_t shift); |
| 797 | void shrptr(Register dst) { LP64_ONLY(shrq(dst)) NOT_LP64(shrl(dst)); } |
| 798 | |
| 799 | void sarptr(Register dst) { LP64_ONLY(sarq(dst)) NOT_LP64(sarl(dst)); } |
| 800 | void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src)) NOT_LP64(sarl(dst, src)); } |
| 801 | |
| 802 | void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); } |
| 803 | |
| 804 | void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); } |
| 805 | void subptr(Register dst, int32_t src); |
| 806 | // Force generation of a 4 byte immediate value even if it fits into 8bit |
| 807 | void subptr_imm32(Register dst, int32_t src); |
| 808 | void subptr(Register dst, Register src); |
| 809 | void subptr(Register dst, RegisterOrConstant src) { |
| 810 | if (src.is_constant()) subptr(dst, (int) src.as_constant()); |
| 811 | else subptr(dst, src.as_register()); |
| 812 | } |
| 813 | |
| 814 | void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); } |
| 815 | void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); } |
| 816 | |
| 817 | void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; } |
| 818 | void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; } |
| 819 | |
| 820 | void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2)) NOT_LP64(xaddl(src1, src2)) ; } |
| 821 | |
| 822 | |
| 823 | |
| 824 | // Helper functions for statistics gathering. |
| 825 | // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes. |
| 826 | void cond_inc32(Condition cond, AddressLiteral counter_addr); |
| 827 | // Unconditional atomic increment. |
| 828 | void atomic_incl(Address counter_addr); |
| 829 | void atomic_incl(AddressLiteral counter_addr, Register scr = rscratch1); |
| 830 | #ifdef _LP64 |
| 831 | void atomic_incq(Address counter_addr); |
| 832 | void atomic_incq(AddressLiteral counter_addr, Register scr = rscratch1); |
| 833 | #endif |
| 834 | void atomic_incptr(AddressLiteral counter_addr, Register scr = rscratch1) { LP64_ONLY(atomic_incq(counter_addr, scr)) NOT_LP64(atomic_incl(counter_addr, scr)) ; } |
| 835 | void atomic_incptr(Address counter_addr) { LP64_ONLY(atomic_incq(counter_addr)) NOT_LP64(atomic_incl(counter_addr)) ; } |
| 836 | |
| 837 | void lea(Register dst, AddressLiteral adr); |
| 838 | void lea(Address dst, AddressLiteral adr); |
| 839 | void lea(Register dst, Address adr) { Assembler::lea(dst, adr); } |
| 840 | |
| 841 | void leal32(Register dst, Address src) { leal(dst, src); } |
| 842 | |
| 843 | // Import other testl() methods from the parent class or else |
| 844 | // they will be hidden by the following overriding declaration. |
| 845 | using Assembler::testl; |
| 846 | void testl(Register dst, AddressLiteral src); |
| 847 | |
| 848 | void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } |
| 849 | void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } |
| 850 | void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } |
| 851 | void orptr(Address dst, int32_t imm32) { LP64_ONLY(orq(dst, imm32)) NOT_LP64(orl(dst, imm32)); } |
| 852 | |
| 853 | void testptr(Register src, int32_t imm32) { LP64_ONLY(testq(src, imm32)) NOT_LP64(testl(src, imm32)); } |
| 854 | void testptr(Register src1, Address src2) { LP64_ONLY(testq(src1, src2)) NOT_LP64(testl(src1, src2)); } |
| 855 | void testptr(Register src1, Register src2); |
| 856 | |
| 857 | void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); } |
| 858 | void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); } |
| 859 | |
| 860 | // Calls |
| 861 | |
| 862 | void call(Label& L, relocInfo::relocType rtype); |
| 863 | void call(Register entry); |
| 864 | |
| 865 | // NOTE: this call transfers to the effective address of entry NOT |
| 866 | // the address contained by entry. This is because this is more natural |
| 867 | // for jumps/calls. |
| 868 | void call(AddressLiteral entry); |
| 869 | |
| 870 | // Emit the CompiledIC call idiom |
| 871 | void ic_call(address entry, jint method_index = 0); |
| 872 | |
| 873 | // Jumps |
| 874 | |
| 875 | // NOTE: these jumps tranfer to the effective address of dst NOT |
| 876 | // the address contained by dst. This is because this is more natural |
| 877 | // for jumps/calls. |
| 878 | void jump(AddressLiteral dst); |
| 879 | void jump_cc(Condition cc, AddressLiteral dst); |
| 880 | |
| 881 | // 32bit can do a case table jump in one instruction but we no longer allow the base |
| 882 | // to be installed in the Address class. This jump will tranfers to the address |
| 883 | // contained in the location described by entry (not the address of entry) |
| 884 | void jump(ArrayAddress entry); |
| 885 | |
| 886 | // Floating |
| 887 | |
| 888 | void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); } |
| 889 | void andpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); |
| 890 | void andpd(XMMRegister dst, XMMRegister src) { Assembler::andpd(dst, src); } |
| 891 | |
| 892 | void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); } |
| 893 | void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); } |
| 894 | void andps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); |
| 895 | |
| 896 | void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); } |
| 897 | void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); } |
| 898 | void comiss(XMMRegister dst, AddressLiteral src); |
| 899 | |
| 900 | void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); } |
| 901 | void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); } |
| 902 | void comisd(XMMRegister dst, AddressLiteral src); |
| 903 | |
| 904 | void fadd_s(Address src) { Assembler::fadd_s(src); } |
| 905 | void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); } |
| 906 | |
| 907 | void fldcw(Address src) { Assembler::fldcw(src); } |
| 908 | void fldcw(AddressLiteral src); |
| 909 | |
| 910 | void fld_s(int index) { Assembler::fld_s(index); } |
| 911 | void fld_s(Address src) { Assembler::fld_s(src); } |
| 912 | void fld_s(AddressLiteral src); |
| 913 | |
| 914 | void fld_d(Address src) { Assembler::fld_d(src); } |
| 915 | void fld_d(AddressLiteral src); |
| 916 | |
| 917 | void fld_x(Address src) { Assembler::fld_x(src); } |
| 918 | void fld_x(AddressLiteral src); |
| 919 | |
| 920 | void fmul_s(Address src) { Assembler::fmul_s(src); } |
| 921 | void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); } |
| 922 | |
| 923 | void ldmxcsr(Address src) { Assembler::ldmxcsr(src); } |
| 924 | void ldmxcsr(AddressLiteral src); |
| 925 | |
| 926 | #ifdef _LP64 |
| 927 | private: |
| 928 | void sha256_AVX2_one_round_compute( |
| 929 | Register reg_old_h, |
| 930 | Register reg_a, |
| 931 | Register reg_b, |
| 932 | Register reg_c, |
| 933 | Register reg_d, |
| 934 | Register reg_e, |
| 935 | Register reg_f, |
| 936 | Register reg_g, |
| 937 | Register reg_h, |
| 938 | int iter); |
| 939 | void sha256_AVX2_four_rounds_compute_first(int start); |
| 940 | void sha256_AVX2_four_rounds_compute_last(int start); |
| 941 | void sha256_AVX2_one_round_and_sched( |
| 942 | XMMRegister xmm_0, /* == ymm4 on 0, 1, 2, 3 iterations, then rotate 4 registers left on 4, 8, 12 iterations */ |
| 943 | XMMRegister xmm_1, /* ymm5 */ /* full cycle is 16 iterations */ |
| 944 | XMMRegister xmm_2, /* ymm6 */ |
| 945 | XMMRegister xmm_3, /* ymm7 */ |
| 946 | Register reg_a, /* == eax on 0 iteration, then rotate 8 register right on each next iteration */ |
| 947 | Register reg_b, /* ebx */ /* full cycle is 8 iterations */ |
| 948 | Register reg_c, /* edi */ |
| 949 | Register reg_d, /* esi */ |
| 950 | Register reg_e, /* r8d */ |
| 951 | Register reg_f, /* r9d */ |
| 952 | Register reg_g, /* r10d */ |
| 953 | Register reg_h, /* r11d */ |
| 954 | int iter); |
| 955 | |
| 956 | void addm(int disp, Register r1, Register r2); |
| 957 | void gfmul(XMMRegister tmp0, XMMRegister t); |
| 958 | void schoolbookAAD(int i, Register subkeyH, XMMRegister data, XMMRegister tmp0, |
| 959 | XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3); |
| 960 | void generateHtbl_one_block(Register htbl); |
| 961 | void generateHtbl_eight_blocks(Register htbl); |
| 962 | public: |
| 963 | void sha256_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, |
| 964 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, |
| 965 | Register buf, Register state, Register ofs, Register limit, Register rsp, |
| 966 | bool multi_block, XMMRegister shuf_mask); |
| 967 | void avx_ghash(Register state, Register htbl, Register data, Register blocks); |
| 968 | #endif |
| 969 | |
| 970 | #ifdef _LP64 |
| 971 | private: |
| 972 | void sha512_AVX2_one_round_compute(Register old_h, Register a, Register b, Register c, Register d, |
| 973 | Register e, Register f, Register g, Register h, int iteration); |
| 974 | |
| 975 | void sha512_AVX2_one_round_and_schedule(XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
| 976 | Register a, Register b, Register c, Register d, Register e, Register f, |
| 977 | Register g, Register h, int iteration); |
| 978 | |
| 979 | void addmq(int disp, Register r1, Register r2); |
| 980 | public: |
| 981 | void sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, |
| 982 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, |
| 983 | Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block, |
| 984 | XMMRegister shuf_mask); |
| 985 | #endif |
| 986 | |
| 987 | void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0, |
| 988 | XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask, |
| 989 | Register buf, Register state, Register ofs, Register limit, Register rsp, |
| 990 | bool multi_block); |
| 991 | |
| 992 | #ifdef _LP64 |
| 993 | void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, |
| 994 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, |
| 995 | Register buf, Register state, Register ofs, Register limit, Register rsp, |
| 996 | bool multi_block, XMMRegister shuf_mask); |
| 997 | #else |
| 998 | void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, |
| 999 | XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, |
| 1000 | Register buf, Register state, Register ofs, Register limit, Register rsp, |
| 1001 | bool multi_block); |
| 1002 | #endif |
| 1003 | |
| 1004 | void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
| 1005 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
| 1006 | Register rax, Register rcx, Register rdx, Register tmp); |
| 1007 | |
| 1008 | #ifdef _LP64 |
| 1009 | void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
| 1010 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
| 1011 | Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2); |
| 1012 | |
| 1013 | void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
| 1014 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
| 1015 | Register rax, Register rcx, Register rdx, Register r11); |
| 1016 | |
| 1017 | void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, |
| 1018 | XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, |
| 1019 | Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4); |
| 1020 | |
| 1021 | void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
| 1022 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
| 1023 | Register rax, Register rbx, Register rcx, Register rdx, Register tmp1, Register tmp2, |
| 1024 | Register tmp3, Register tmp4); |
| 1025 | |
| 1026 | void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
| 1027 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
| 1028 | Register rax, Register rcx, Register rdx, Register tmp1, |
| 1029 | Register tmp2, Register tmp3, Register tmp4); |
| 1030 | void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
| 1031 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
| 1032 | Register rax, Register rcx, Register rdx, Register tmp1, |
| 1033 | Register tmp2, Register tmp3, Register tmp4); |
| 1034 | #else |
| 1035 | void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
| 1036 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
| 1037 | Register rax, Register rcx, Register rdx, Register tmp1); |
| 1038 | |
| 1039 | void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
| 1040 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
| 1041 | Register rax, Register rcx, Register rdx, Register tmp); |
| 1042 | |
| 1043 | void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, |
| 1044 | XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, |
| 1045 | Register rdx, Register tmp); |
| 1046 | |
| 1047 | void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
| 1048 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
| 1049 | Register rax, Register rbx, Register rdx); |
| 1050 | |
| 1051 | void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
| 1052 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
| 1053 | Register rax, Register rcx, Register rdx, Register tmp); |
| 1054 | |
| 1055 | void libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, |
| 1056 | Register edx, Register ebx, Register esi, Register edi, |
| 1057 | Register ebp, Register esp); |
| 1058 | |
| 1059 | void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx, |
| 1060 | Register esi, Register edi, Register ebp, Register esp); |
| 1061 | |
| 1062 | void libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, |
| 1063 | Register edx, Register ebx, Register esi, Register edi, |
| 1064 | Register ebp, Register esp); |
| 1065 | |
| 1066 | void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, |
| 1067 | XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, |
| 1068 | Register rax, Register rcx, Register rdx, Register tmp); |
| 1069 | #endif |
| 1070 | |
| 1071 | void increase_precision(); |
| 1072 | void restore_precision(); |
| 1073 | |
| 1074 | private: |
| 1075 | |
| 1076 | // these are private because users should be doing movflt/movdbl |
| 1077 | |
| 1078 | void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); } |
| 1079 | void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); } |
| 1080 | void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); } |
| 1081 | void movss(XMMRegister dst, AddressLiteral src); |
| 1082 | |
| 1083 | void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); } |
| 1084 | void movlpd(XMMRegister dst, AddressLiteral src); |
| 1085 | |
| 1086 | public: |
| 1087 | |
| 1088 | void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); } |
| 1089 | void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); } |
| 1090 | void addsd(XMMRegister dst, AddressLiteral src); |
| 1091 | |
| 1092 | void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); } |
| 1093 | void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); } |
| 1094 | void addss(XMMRegister dst, AddressLiteral src); |
| 1095 | |
| 1096 | void addpd(XMMRegister dst, XMMRegister src) { Assembler::addpd(dst, src); } |
| 1097 | void addpd(XMMRegister dst, Address src) { Assembler::addpd(dst, src); } |
| 1098 | void addpd(XMMRegister dst, AddressLiteral src); |
| 1099 | |
| 1100 | void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); } |
| 1101 | void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); } |
| 1102 | void divsd(XMMRegister dst, AddressLiteral src); |
| 1103 | |
| 1104 | void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); } |
| 1105 | void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); } |
| 1106 | void divss(XMMRegister dst, AddressLiteral src); |
| 1107 | |
| 1108 | // Move Unaligned Double Quadword |
| 1109 | void movdqu(Address dst, XMMRegister src); |
| 1110 | void movdqu(XMMRegister dst, Address src); |
| 1111 | void movdqu(XMMRegister dst, XMMRegister src); |
| 1112 | void movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg = rscratch1); |
| 1113 | // AVX Unaligned forms |
| 1114 | void vmovdqu(Address dst, XMMRegister src); |
| 1115 | void vmovdqu(XMMRegister dst, Address src); |
| 1116 | void vmovdqu(XMMRegister dst, XMMRegister src); |
| 1117 | void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); |
| 1118 | void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } |
| 1119 | void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } |
| 1120 | void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } |
| 1121 | void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch); |
| 1122 | |
| 1123 | // Move Aligned Double Quadword |
| 1124 | void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); } |
| 1125 | void movdqa(XMMRegister dst, XMMRegister src) { Assembler::movdqa(dst, src); } |
| 1126 | void movdqa(XMMRegister dst, AddressLiteral src); |
| 1127 | |
| 1128 | void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); } |
| 1129 | void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); } |
| 1130 | void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); } |
| 1131 | void movsd(XMMRegister dst, AddressLiteral src); |
| 1132 | |
| 1133 | void mulpd(XMMRegister dst, XMMRegister src) { Assembler::mulpd(dst, src); } |
| 1134 | void mulpd(XMMRegister dst, Address src) { Assembler::mulpd(dst, src); } |
| 1135 | void mulpd(XMMRegister dst, AddressLiteral src); |
| 1136 | |
| 1137 | void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); } |
| 1138 | void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); } |
| 1139 | void mulsd(XMMRegister dst, AddressLiteral src); |
| 1140 | |
| 1141 | void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); } |
| 1142 | void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); } |
| 1143 | void mulss(XMMRegister dst, AddressLiteral src); |
| 1144 | |
| 1145 | // Carry-Less Multiplication Quadword |
| 1146 | void pclmulldq(XMMRegister dst, XMMRegister src) { |
| 1147 | // 0x00 - multiply lower 64 bits [0:63] |
| 1148 | Assembler::pclmulqdq(dst, src, 0x00); |
| 1149 | } |
| 1150 | void pclmulhdq(XMMRegister dst, XMMRegister src) { |
| 1151 | // 0x11 - multiply upper 64 bits [64:127] |
| 1152 | Assembler::pclmulqdq(dst, src, 0x11); |
| 1153 | } |
| 1154 | |
| 1155 | void pcmpeqb(XMMRegister dst, XMMRegister src); |
| 1156 | void pcmpeqw(XMMRegister dst, XMMRegister src); |
| 1157 | |
| 1158 | void pcmpestri(XMMRegister dst, Address src, int imm8); |
| 1159 | void pcmpestri(XMMRegister dst, XMMRegister src, int imm8); |
| 1160 | |
| 1161 | void pmovzxbw(XMMRegister dst, XMMRegister src); |
| 1162 | void pmovzxbw(XMMRegister dst, Address src); |
| 1163 | |
| 1164 | void pmovmskb(Register dst, XMMRegister src); |
| 1165 | |
| 1166 | void ptest(XMMRegister dst, XMMRegister src); |
| 1167 | |
| 1168 | void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); } |
| 1169 | void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); } |
| 1170 | void sqrtsd(XMMRegister dst, AddressLiteral src); |
| 1171 | |
| 1172 | void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); } |
| 1173 | void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); } |
| 1174 | void sqrtss(XMMRegister dst, AddressLiteral src); |
| 1175 | |
| 1176 | void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); } |
| 1177 | void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); } |
| 1178 | void subsd(XMMRegister dst, AddressLiteral src); |
| 1179 | |
| 1180 | void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); } |
| 1181 | void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); } |
| 1182 | void subss(XMMRegister dst, AddressLiteral src); |
| 1183 | |
| 1184 | void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); } |
| 1185 | void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); } |
| 1186 | void ucomiss(XMMRegister dst, AddressLiteral src); |
| 1187 | |
| 1188 | void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); } |
| 1189 | void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); } |
| 1190 | void ucomisd(XMMRegister dst, AddressLiteral src); |
| 1191 | |
| 1192 | // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values |
| 1193 | void xorpd(XMMRegister dst, XMMRegister src); |
| 1194 | void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); } |
| 1195 | void xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); |
| 1196 | |
| 1197 | // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values |
| 1198 | void xorps(XMMRegister dst, XMMRegister src); |
| 1199 | void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } |
| 1200 | void xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); |
| 1201 | |
| 1202 | // Shuffle Bytes |
| 1203 | void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); } |
| 1204 | void pshufb(XMMRegister dst, Address src) { Assembler::pshufb(dst, src); } |
| 1205 | void pshufb(XMMRegister dst, AddressLiteral src); |
| 1206 | // AVX 3-operands instructions |
| 1207 | |
| 1208 | void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); } |
| 1209 | void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); } |
| 1210 | void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
| 1211 | |
| 1212 | void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); } |
| 1213 | void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); } |
| 1214 | void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
| 1215 | |
| 1216 | void vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len); |
| 1217 | void vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len); |
| 1218 | |
| 1219 | void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
| 1220 | void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
| 1221 | |
| 1222 | void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
| 1223 | void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
| 1224 | |
| 1225 | void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } |
| 1226 | void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } |
| 1227 | void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); |
| 1228 | |
| 1229 | void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len); |
| 1230 | void vpbroadcastw(XMMRegister dst, Address src, int vector_len) { Assembler::vpbroadcastw(dst, src, vector_len); } |
| 1231 | |
| 1232 | void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
| 1233 | |
| 1234 | void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
| 1235 | |
| 1236 | void vpmovzxbw(XMMRegister dst, Address src, int vector_len); |
| 1237 | void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpmovzxbw(dst, src, vector_len); } |
| 1238 | |
| 1239 | void vpmovmskb(Register dst, XMMRegister src); |
| 1240 | |
| 1241 | void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
| 1242 | void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
| 1243 | |
| 1244 | void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
| 1245 | void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
| 1246 | |
| 1247 | void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
| 1248 | void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); |
| 1249 | |
| 1250 | void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); |
| 1251 | void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); |
| 1252 | |
| 1253 | void evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); |
| 1254 | void evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len); |
| 1255 | |
| 1256 | void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); |
| 1257 | void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); |
| 1258 | |
| 1259 | void vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); |
| 1260 | void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); |
| 1261 | |
| 1262 | void vptest(XMMRegister dst, XMMRegister src); |
| 1263 | |
| 1264 | void punpcklbw(XMMRegister dst, XMMRegister src); |
| 1265 | void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); } |
| 1266 | |
| 1267 | void pshufd(XMMRegister dst, Address src, int mode); |
| 1268 | void pshufd(XMMRegister dst, XMMRegister src, int mode) { Assembler::pshufd(dst, src, mode); } |
| 1269 | |
| 1270 | void pshuflw(XMMRegister dst, XMMRegister src, int mode); |
| 1271 | void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); } |
| 1272 | |
| 1273 | void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } |
| 1274 | void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } |
| 1275 | void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); |
| 1276 | |
| 1277 | void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); } |
| 1278 | void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); } |
| 1279 | void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); |
| 1280 | |
| 1281 | void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); } |
| 1282 | void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); } |
| 1283 | void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
| 1284 | |
| 1285 | void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); } |
| 1286 | void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); } |
| 1287 | void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
| 1288 | |
| 1289 | void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); } |
| 1290 | void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); } |
| 1291 | void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
| 1292 | |
| 1293 | void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); } |
| 1294 | void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); } |
| 1295 | void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
| 1296 | |
| 1297 | void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); } |
| 1298 | void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); } |
| 1299 | void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
| 1300 | |
| 1301 | void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); } |
| 1302 | void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); } |
| 1303 | void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
| 1304 | |
| 1305 | void vnegatess(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
| 1306 | void vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral src); |
| 1307 | |
| 1308 | // AVX Vector instructions |
| 1309 | |
| 1310 | void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); } |
| 1311 | void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); } |
| 1312 | void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); |
| 1313 | |
| 1314 | void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); } |
| 1315 | void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); } |
| 1316 | void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); |
| 1317 | |
| 1318 | void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { |
| 1319 | if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2 |
| 1320 | Assembler::vpxor(dst, nds, src, vector_len); |
| 1321 | else |
| 1322 | Assembler::vxorpd(dst, nds, src, vector_len); |
| 1323 | } |
| 1324 | void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { |
| 1325 | if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2 |
| 1326 | Assembler::vpxor(dst, nds, src, vector_len); |
| 1327 | else |
| 1328 | Assembler::vxorpd(dst, nds, src, vector_len); |
| 1329 | } |
| 1330 | void vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); |
| 1331 | |
| 1332 | // Simple version for AVX2 256bit vectors |
| 1333 | void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); } |
| 1334 | void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); } |
| 1335 | |
| 1336 | void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) { |
| 1337 | if (UseAVX > 2) { |
| 1338 | Assembler::vinserti32x4(dst, dst, src, imm8); |
| 1339 | } else if (UseAVX > 1) { |
| 1340 | // vinserti128 is available only in AVX2 |
| 1341 | Assembler::vinserti128(dst, nds, src, imm8); |
| 1342 | } else { |
| 1343 | Assembler::vinsertf128(dst, nds, src, imm8); |
| 1344 | } |
| 1345 | } |
| 1346 | |
| 1347 | void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) { |
| 1348 | if (UseAVX > 2) { |
| 1349 | Assembler::vinserti32x4(dst, dst, src, imm8); |
| 1350 | } else if (UseAVX > 1) { |
| 1351 | // vinserti128 is available only in AVX2 |
| 1352 | Assembler::vinserti128(dst, nds, src, imm8); |
| 1353 | } else { |
| 1354 | Assembler::vinsertf128(dst, nds, src, imm8); |
| 1355 | } |
| 1356 | } |
| 1357 | |
| 1358 | void (XMMRegister dst, XMMRegister src, uint8_t imm8) { |
| 1359 | if (UseAVX > 2) { |
| 1360 | Assembler::vextracti32x4(dst, src, imm8); |
| 1361 | } else if (UseAVX > 1) { |
| 1362 | // vextracti128 is available only in AVX2 |
| 1363 | Assembler::vextracti128(dst, src, imm8); |
| 1364 | } else { |
| 1365 | Assembler::vextractf128(dst, src, imm8); |
| 1366 | } |
| 1367 | } |
| 1368 | |
| 1369 | void (Address dst, XMMRegister src, uint8_t imm8) { |
| 1370 | if (UseAVX > 2) { |
| 1371 | Assembler::vextracti32x4(dst, src, imm8); |
| 1372 | } else if (UseAVX > 1) { |
| 1373 | // vextracti128 is available only in AVX2 |
| 1374 | Assembler::vextracti128(dst, src, imm8); |
| 1375 | } else { |
| 1376 | Assembler::vextractf128(dst, src, imm8); |
| 1377 | } |
| 1378 | } |
| 1379 | |
| 1380 | // 128bit copy to/from high 128 bits of 256bit (YMM) vector registers |
| 1381 | void vinserti128_high(XMMRegister dst, XMMRegister src) { |
| 1382 | vinserti128(dst, dst, src, 1); |
| 1383 | } |
| 1384 | void vinserti128_high(XMMRegister dst, Address src) { |
| 1385 | vinserti128(dst, dst, src, 1); |
| 1386 | } |
| 1387 | void (XMMRegister dst, XMMRegister src) { |
| 1388 | vextracti128(dst, src, 1); |
| 1389 | } |
| 1390 | void (Address dst, XMMRegister src) { |
| 1391 | vextracti128(dst, src, 1); |
| 1392 | } |
| 1393 | |
| 1394 | void vinsertf128_high(XMMRegister dst, XMMRegister src) { |
| 1395 | if (UseAVX > 2) { |
| 1396 | Assembler::vinsertf32x4(dst, dst, src, 1); |
| 1397 | } else { |
| 1398 | Assembler::vinsertf128(dst, dst, src, 1); |
| 1399 | } |
| 1400 | } |
| 1401 | |
| 1402 | void vinsertf128_high(XMMRegister dst, Address src) { |
| 1403 | if (UseAVX > 2) { |
| 1404 | Assembler::vinsertf32x4(dst, dst, src, 1); |
| 1405 | } else { |
| 1406 | Assembler::vinsertf128(dst, dst, src, 1); |
| 1407 | } |
| 1408 | } |
| 1409 | |
| 1410 | void (XMMRegister dst, XMMRegister src) { |
| 1411 | if (UseAVX > 2) { |
| 1412 | Assembler::vextractf32x4(dst, src, 1); |
| 1413 | } else { |
| 1414 | Assembler::vextractf128(dst, src, 1); |
| 1415 | } |
| 1416 | } |
| 1417 | |
| 1418 | void (Address dst, XMMRegister src) { |
| 1419 | if (UseAVX > 2) { |
| 1420 | Assembler::vextractf32x4(dst, src, 1); |
| 1421 | } else { |
| 1422 | Assembler::vextractf128(dst, src, 1); |
| 1423 | } |
| 1424 | } |
| 1425 | |
| 1426 | // 256bit copy to/from high 256 bits of 512bit (ZMM) vector registers |
| 1427 | void vinserti64x4_high(XMMRegister dst, XMMRegister src) { |
| 1428 | Assembler::vinserti64x4(dst, dst, src, 1); |
| 1429 | } |
| 1430 | void vinsertf64x4_high(XMMRegister dst, XMMRegister src) { |
| 1431 | Assembler::vinsertf64x4(dst, dst, src, 1); |
| 1432 | } |
| 1433 | void (XMMRegister dst, XMMRegister src) { |
| 1434 | Assembler::vextracti64x4(dst, src, 1); |
| 1435 | } |
| 1436 | void (XMMRegister dst, XMMRegister src) { |
| 1437 | Assembler::vextractf64x4(dst, src, 1); |
| 1438 | } |
| 1439 | void (Address dst, XMMRegister src) { |
| 1440 | Assembler::vextractf64x4(dst, src, 1); |
| 1441 | } |
| 1442 | void vinsertf64x4_high(XMMRegister dst, Address src) { |
| 1443 | Assembler::vinsertf64x4(dst, dst, src, 1); |
| 1444 | } |
| 1445 | |
| 1446 | // 128bit copy to/from low 128 bits of 256bit (YMM) vector registers |
| 1447 | void vinserti128_low(XMMRegister dst, XMMRegister src) { |
| 1448 | vinserti128(dst, dst, src, 0); |
| 1449 | } |
| 1450 | void vinserti128_low(XMMRegister dst, Address src) { |
| 1451 | vinserti128(dst, dst, src, 0); |
| 1452 | } |
| 1453 | void (XMMRegister dst, XMMRegister src) { |
| 1454 | vextracti128(dst, src, 0); |
| 1455 | } |
| 1456 | void (Address dst, XMMRegister src) { |
| 1457 | vextracti128(dst, src, 0); |
| 1458 | } |
| 1459 | |
| 1460 | void vinsertf128_low(XMMRegister dst, XMMRegister src) { |
| 1461 | if (UseAVX > 2) { |
| 1462 | Assembler::vinsertf32x4(dst, dst, src, 0); |
| 1463 | } else { |
| 1464 | Assembler::vinsertf128(dst, dst, src, 0); |
| 1465 | } |
| 1466 | } |
| 1467 | |
| 1468 | void vinsertf128_low(XMMRegister dst, Address src) { |
| 1469 | if (UseAVX > 2) { |
| 1470 | Assembler::vinsertf32x4(dst, dst, src, 0); |
| 1471 | } else { |
| 1472 | Assembler::vinsertf128(dst, dst, src, 0); |
| 1473 | } |
| 1474 | } |
| 1475 | |
| 1476 | void (XMMRegister dst, XMMRegister src) { |
| 1477 | if (UseAVX > 2) { |
| 1478 | Assembler::vextractf32x4(dst, src, 0); |
| 1479 | } else { |
| 1480 | Assembler::vextractf128(dst, src, 0); |
| 1481 | } |
| 1482 | } |
| 1483 | |
| 1484 | void (Address dst, XMMRegister src) { |
| 1485 | if (UseAVX > 2) { |
| 1486 | Assembler::vextractf32x4(dst, src, 0); |
| 1487 | } else { |
| 1488 | Assembler::vextractf128(dst, src, 0); |
| 1489 | } |
| 1490 | } |
| 1491 | |
| 1492 | // 256bit copy to/from low 256 bits of 512bit (ZMM) vector registers |
| 1493 | void vinserti64x4_low(XMMRegister dst, XMMRegister src) { |
| 1494 | Assembler::vinserti64x4(dst, dst, src, 0); |
| 1495 | } |
| 1496 | void vinsertf64x4_low(XMMRegister dst, XMMRegister src) { |
| 1497 | Assembler::vinsertf64x4(dst, dst, src, 0); |
| 1498 | } |
| 1499 | void (XMMRegister dst, XMMRegister src) { |
| 1500 | Assembler::vextracti64x4(dst, src, 0); |
| 1501 | } |
| 1502 | void (XMMRegister dst, XMMRegister src) { |
| 1503 | Assembler::vextractf64x4(dst, src, 0); |
| 1504 | } |
| 1505 | void (Address dst, XMMRegister src) { |
| 1506 | Assembler::vextractf64x4(dst, src, 0); |
| 1507 | } |
| 1508 | void vinsertf64x4_low(XMMRegister dst, Address src) { |
| 1509 | Assembler::vinsertf64x4(dst, dst, src, 0); |
| 1510 | } |
| 1511 | |
| 1512 | // Carry-Less Multiplication Quadword |
| 1513 | void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) { |
| 1514 | // 0x00 - multiply lower 64 bits [0:63] |
| 1515 | Assembler::vpclmulqdq(dst, nds, src, 0x00); |
| 1516 | } |
| 1517 | void vpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { |
| 1518 | // 0x11 - multiply upper 64 bits [64:127] |
| 1519 | Assembler::vpclmulqdq(dst, nds, src, 0x11); |
| 1520 | } |
| 1521 | void vpclmullqhqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { |
| 1522 | // 0x10 - multiply nds[0:63] and src[64:127] |
| 1523 | Assembler::vpclmulqdq(dst, nds, src, 0x10); |
| 1524 | } |
| 1525 | void vpclmulhqlqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) { |
| 1526 | //0x01 - multiply nds[64:127] and src[0:63] |
| 1527 | Assembler::vpclmulqdq(dst, nds, src, 0x01); |
| 1528 | } |
| 1529 | |
| 1530 | void evpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { |
| 1531 | // 0x00 - multiply lower 64 bits [0:63] |
| 1532 | Assembler::evpclmulqdq(dst, nds, src, 0x00, vector_len); |
| 1533 | } |
| 1534 | void evpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { |
| 1535 | // 0x11 - multiply upper 64 bits [64:127] |
| 1536 | Assembler::evpclmulqdq(dst, nds, src, 0x11, vector_len); |
| 1537 | } |
| 1538 | |
| 1539 | // Data |
| 1540 | |
| 1541 | void cmov32( Condition cc, Register dst, Address src); |
| 1542 | void cmov32( Condition cc, Register dst, Register src); |
| 1543 | |
| 1544 | void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); } |
| 1545 | |
| 1546 | void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); } |
| 1547 | void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); } |
| 1548 | |
| 1549 | void movoop(Register dst, jobject obj); |
| 1550 | void movoop(Address dst, jobject obj); |
| 1551 | |
| 1552 | void mov_metadata(Register dst, Metadata* obj); |
| 1553 | void mov_metadata(Address dst, Metadata* obj); |
| 1554 | |
| 1555 | void movptr(ArrayAddress dst, Register src); |
| 1556 | // can this do an lea? |
| 1557 | void movptr(Register dst, ArrayAddress src); |
| 1558 | |
| 1559 | void movptr(Register dst, Address src); |
| 1560 | |
| 1561 | #ifdef _LP64 |
| 1562 | void movptr(Register dst, AddressLiteral src, Register scratch=rscratch1); |
| 1563 | #else |
| 1564 | void movptr(Register dst, AddressLiteral src, Register scratch=noreg); // Scratch reg is ignored in 32-bit |
| 1565 | #endif |
| 1566 | |
| 1567 | void movptr(Register dst, intptr_t src); |
| 1568 | void movptr(Register dst, Register src); |
| 1569 | void movptr(Address dst, intptr_t src); |
| 1570 | |
| 1571 | void movptr(Address dst, Register src); |
| 1572 | |
| 1573 | void movptr(Register dst, RegisterOrConstant src) { |
| 1574 | if (src.is_constant()) movptr(dst, src.as_constant()); |
| 1575 | else movptr(dst, src.as_register()); |
| 1576 | } |
| 1577 | |
| 1578 | #ifdef _LP64 |
| 1579 | // Generally the next two are only used for moving NULL |
| 1580 | // Although there are situations in initializing the mark word where |
| 1581 | // they could be used. They are dangerous. |
| 1582 | |
| 1583 | // They only exist on LP64 so that int32_t and intptr_t are not the same |
| 1584 | // and we have ambiguous declarations. |
| 1585 | |
| 1586 | void movptr(Address dst, int32_t imm32); |
| 1587 | void movptr(Register dst, int32_t imm32); |
| 1588 | #endif // _LP64 |
| 1589 | |
| 1590 | // to avoid hiding movl |
| 1591 | void mov32(AddressLiteral dst, Register src); |
| 1592 | void mov32(Register dst, AddressLiteral src); |
| 1593 | |
| 1594 | // to avoid hiding movb |
| 1595 | void movbyte(ArrayAddress dst, int src); |
| 1596 | |
| 1597 | // Import other mov() methods from the parent class or else |
| 1598 | // they will be hidden by the following overriding declaration. |
| 1599 | using Assembler::movdl; |
| 1600 | using Assembler::movq; |
| 1601 | void movdl(XMMRegister dst, AddressLiteral src); |
| 1602 | void movq(XMMRegister dst, AddressLiteral src); |
| 1603 | |
| 1604 | // Can push value or effective address |
| 1605 | void pushptr(AddressLiteral src); |
| 1606 | |
| 1607 | void pushptr(Address src) { LP64_ONLY(pushq(src)) NOT_LP64(pushl(src)); } |
| 1608 | void popptr(Address src) { LP64_ONLY(popq(src)) NOT_LP64(popl(src)); } |
| 1609 | |
| 1610 | void pushoop(jobject obj); |
| 1611 | void pushklass(Metadata* obj); |
| 1612 | |
| 1613 | // sign extend as need a l to ptr sized element |
| 1614 | void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); } |
| 1615 | void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); } |
| 1616 | |
| 1617 | #ifdef COMPILER2 |
| 1618 | // Generic instructions support for use in .ad files C2 code generation |
| 1619 | void vabsnegd(int opcode, XMMRegister dst, Register scr); |
| 1620 | void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr); |
| 1621 | void vabsnegf(int opcode, XMMRegister dst, Register scr); |
| 1622 | void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr); |
| 1623 | void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len); |
| 1624 | void vextendbw(bool sign, XMMRegister dst, XMMRegister src); |
| 1625 | void vshiftd(int opcode, XMMRegister dst, XMMRegister src); |
| 1626 | void vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
| 1627 | void vshiftw(int opcode, XMMRegister dst, XMMRegister src); |
| 1628 | void vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
| 1629 | void vshiftq(int opcode, XMMRegister dst, XMMRegister src); |
| 1630 | void vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); |
| 1631 | #endif |
| 1632 | |
| 1633 | // C2 compiled method's prolog code. |
| 1634 | void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub); |
| 1635 | |
| 1636 | // clear memory of size 'cnt' qwords, starting at 'base'; |
| 1637 | // if 'is_large' is set, do not try to produce short loop |
| 1638 | void clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, bool is_large); |
| 1639 | |
| 1640 | // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers |
| 1641 | void xmm_clear_mem(Register base, Register cnt, XMMRegister xtmp); |
| 1642 | |
| 1643 | #ifdef COMPILER2 |
| 1644 | void string_indexof_char(Register str1, Register cnt1, Register ch, Register result, |
| 1645 | XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); |
| 1646 | |
| 1647 | // IndexOf strings. |
| 1648 | // Small strings are loaded through stack if they cross page boundary. |
| 1649 | void string_indexof(Register str1, Register str2, |
| 1650 | Register cnt1, Register cnt2, |
| 1651 | int int_cnt2, Register result, |
| 1652 | XMMRegister vec, Register tmp, |
| 1653 | int ae); |
| 1654 | |
| 1655 | // IndexOf for constant substrings with size >= 8 elements |
| 1656 | // which don't need to be loaded through stack. |
| 1657 | void string_indexofC8(Register str1, Register str2, |
| 1658 | Register cnt1, Register cnt2, |
| 1659 | int int_cnt2, Register result, |
| 1660 | XMMRegister vec, Register tmp, |
| 1661 | int ae); |
| 1662 | |
| 1663 | // Smallest code: we don't need to load through stack, |
| 1664 | // check string tail. |
| 1665 | |
| 1666 | // helper function for string_compare |
| 1667 | void load_next_elements(Register elem1, Register elem2, Register str1, Register str2, |
| 1668 | Address::ScaleFactor scale, Address::ScaleFactor scale1, |
| 1669 | Address::ScaleFactor scale2, Register index, int ae); |
| 1670 | // Compare strings. |
| 1671 | void string_compare(Register str1, Register str2, |
| 1672 | Register cnt1, Register cnt2, Register result, |
| 1673 | XMMRegister vec1, int ae); |
| 1674 | |
| 1675 | // Search for Non-ASCII character (Negative byte value) in a byte array, |
| 1676 | // return true if it has any and false otherwise. |
| 1677 | void has_negatives(Register ary1, Register len, |
| 1678 | Register result, Register tmp1, |
| 1679 | XMMRegister vec1, XMMRegister vec2); |
| 1680 | |
| 1681 | // Compare char[] or byte[] arrays. |
| 1682 | void arrays_equals(bool is_array_equ, Register ary1, Register ary2, |
| 1683 | Register limit, Register result, Register chr, |
| 1684 | XMMRegister vec1, XMMRegister vec2, bool is_char); |
| 1685 | |
| 1686 | #endif |
| 1687 | |
| 1688 | // Fill primitive arrays |
| 1689 | void generate_fill(BasicType t, bool aligned, |
| 1690 | Register to, Register value, Register count, |
| 1691 | Register rtmp, XMMRegister xtmp); |
| 1692 | |
| 1693 | void encode_iso_array(Register src, Register dst, Register len, |
| 1694 | XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3, |
| 1695 | XMMRegister tmp4, Register tmp5, Register result); |
| 1696 | |
| 1697 | #ifdef _LP64 |
| 1698 | void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2); |
| 1699 | void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, |
| 1700 | Register y, Register y_idx, Register z, |
| 1701 | Register carry, Register product, |
| 1702 | Register idx, Register kdx); |
| 1703 | void multiply_add_128_x_128(Register x_xstart, Register y, Register z, |
| 1704 | Register yz_idx, Register idx, |
| 1705 | Register carry, Register product, int offset); |
| 1706 | void multiply_128_x_128_bmi2_loop(Register y, Register z, |
| 1707 | Register carry, Register carry2, |
| 1708 | Register idx, Register jdx, |
| 1709 | Register yz_idx1, Register yz_idx2, |
| 1710 | Register tmp, Register tmp3, Register tmp4); |
| 1711 | void multiply_128_x_128_loop(Register x_xstart, Register y, Register z, |
| 1712 | Register yz_idx, Register idx, Register jdx, |
| 1713 | Register carry, Register product, |
| 1714 | Register carry2); |
| 1715 | void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen, |
| 1716 | Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5); |
| 1717 | void square_rshift(Register x, Register len, Register z, Register tmp1, Register tmp3, |
| 1718 | Register tmp4, Register tmp5, Register rdxReg, Register raxReg); |
| 1719 | void multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry, |
| 1720 | Register tmp2); |
| 1721 | void multiply_add_64(Register sum, Register op1, Register op2, Register carry, |
| 1722 | Register rdxReg, Register raxReg); |
| 1723 | void add_one_64(Register z, Register zlen, Register carry, Register tmp1); |
| 1724 | void lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, |
| 1725 | Register tmp3, Register tmp4); |
| 1726 | void square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, |
| 1727 | Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg); |
| 1728 | |
| 1729 | void mul_add_128_x_32_loop(Register out, Register in, Register offset, Register len, Register tmp1, |
| 1730 | Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, |
| 1731 | Register raxReg); |
| 1732 | void mul_add(Register out, Register in, Register offset, Register len, Register k, Register tmp1, |
| 1733 | Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, |
| 1734 | Register raxReg); |
| 1735 | void vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale, |
| 1736 | Register result, Register tmp1, Register tmp2, |
| 1737 | XMMRegister vec1, XMMRegister vec2, XMMRegister vec3); |
| 1738 | #endif |
| 1739 | |
| 1740 | // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic. |
| 1741 | void update_byte_crc32(Register crc, Register val, Register table); |
| 1742 | void kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp); |
| 1743 | // CRC32C code for java.util.zip.CRC32C::updateBytes() intrinsic |
| 1744 | // Note on a naming convention: |
| 1745 | // Prefix w = register only used on a Westmere+ architecture |
| 1746 | // Prefix n = register only used on a Nehalem architecture |
| 1747 | #ifdef _LP64 |
| 1748 | void crc32c_ipl_alg4(Register in_out, uint32_t n, |
| 1749 | Register tmp1, Register tmp2, Register tmp3); |
| 1750 | #else |
| 1751 | void crc32c_ipl_alg4(Register in_out, uint32_t n, |
| 1752 | Register tmp1, Register tmp2, Register tmp3, |
| 1753 | XMMRegister xtmp1, XMMRegister xtmp2); |
| 1754 | #endif |
| 1755 | void crc32c_pclmulqdq(XMMRegister w_xtmp1, |
| 1756 | Register in_out, |
| 1757 | uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported, |
| 1758 | XMMRegister w_xtmp2, |
| 1759 | Register tmp1, |
| 1760 | Register n_tmp2, Register n_tmp3); |
| 1761 | void crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2, |
| 1762 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, |
| 1763 | Register tmp1, Register tmp2, |
| 1764 | Register n_tmp3); |
| 1765 | void crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, |
| 1766 | Register in_out1, Register in_out2, Register in_out3, |
| 1767 | Register tmp1, Register tmp2, Register tmp3, |
| 1768 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, |
| 1769 | Register tmp4, Register tmp5, |
| 1770 | Register n_tmp6); |
| 1771 | void crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2, |
| 1772 | Register tmp1, Register tmp2, Register tmp3, |
| 1773 | Register tmp4, Register tmp5, Register tmp6, |
| 1774 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, |
| 1775 | bool is_pclmulqdq_supported); |
| 1776 | // Fold 128-bit data chunk |
| 1777 | void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset); |
| 1778 | void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf); |
| 1779 | // Fold 8-bit data |
| 1780 | void fold_8bit_crc32(Register crc, Register table, Register tmp); |
| 1781 | void fold_8bit_crc32(XMMRegister crc, Register table, XMMRegister xtmp, Register tmp); |
| 1782 | void fold_128bit_crc32_avx512(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset); |
| 1783 | |
| 1784 | // Compress char[] array to byte[]. |
| 1785 | void char_array_compress(Register src, Register dst, Register len, |
| 1786 | XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3, |
| 1787 | XMMRegister tmp4, Register tmp5, Register result); |
| 1788 | |
| 1789 | // Inflate byte[] array to char[]. |
| 1790 | void byte_array_inflate(Register src, Register dst, Register len, |
| 1791 | XMMRegister tmp1, Register tmp2); |
| 1792 | |
| 1793 | }; |
| 1794 | |
| 1795 | /** |
| 1796 | * class SkipIfEqual: |
| 1797 | * |
| 1798 | * Instantiating this class will result in assembly code being output that will |
| 1799 | * jump around any code emitted between the creation of the instance and it's |
| 1800 | * automatic destruction at the end of a scope block, depending on the value of |
| 1801 | * the flag passed to the constructor, which will be checked at run-time. |
| 1802 | */ |
| 1803 | class SkipIfEqual { |
| 1804 | private: |
| 1805 | MacroAssembler* _masm; |
| 1806 | Label _label; |
| 1807 | |
| 1808 | public: |
| 1809 | SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); |
| 1810 | ~SkipIfEqual(); |
| 1811 | }; |
| 1812 | |
| 1813 | #endif // CPU_X86_MACROASSEMBLER_X86_HPP |
| 1814 | |