| 1 | /* |
| 2 | * Copyright (c) 2016, Intel Corporation. |
| 3 | * Intel Math Library (LIBM) Source Code |
| 4 | * |
| 5 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 6 | * |
| 7 | * This code is free software; you can redistribute it and/or modify it |
| 8 | * under the terms of the GNU General Public License version 2 only, as |
| 9 | * published by the Free Software Foundation. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| 22 | * or visit www.oracle.com if you need additional information or have any |
| 23 | * questions. |
| 24 | * |
| 25 | */ |
| 26 | |
| 27 | #include "precompiled.hpp" |
| 28 | #include "asm/assembler.hpp" |
| 29 | #include "asm/assembler.inline.hpp" |
| 30 | #include "macroAssembler_x86.hpp" |
| 31 | #include "utilities/globalDefinitions.hpp" |
| 32 | |
| 33 | /******************************************************************************/ |
| 34 | // ALGORITHM DESCRIPTION - LOG() |
| 35 | // --------------------- |
| 36 | // |
| 37 | // x=2^k * mx, mx in [1,2) |
| 38 | // |
| 39 | // Get B~1/mx based on the output of rcpss instruction (B0) |
| 40 | // B = int((B0*2^7+0.5))/2^7 |
| 41 | // |
| 42 | // Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts) |
| 43 | // |
| 44 | // Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and |
| 45 | // p(r) is a degree 7 polynomial |
| 46 | // -log(B) read from data table (high, low parts) |
| 47 | // Result is formed from high and low parts |
| 48 | // |
| 49 | // Special cases: |
| 50 | // log(NaN) = quiet NaN, and raise invalid exception |
| 51 | // log(+INF) = that INF |
| 52 | // log(0) = -INF with divide-by-zero exception raised |
| 53 | // log(1) = +0 |
| 54 | // log(x) = NaN with invalid exception raised if x < -0, including -INF |
| 55 | // |
| 56 | /******************************************************************************/ |
| 57 | |
| 58 | #ifdef _LP64 |
| 59 | // The 64 bit code is at most SSE2 compliant |
| 60 | ATTRIBUTE_ALIGNED(16) juint _L_tbl[] = |
| 61 | { |
| 62 | 0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL, |
| 63 | 0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL, |
| 64 | 0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL, |
| 65 | 0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL, |
| 66 | 0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL, |
| 67 | 0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL, |
| 68 | 0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL, |
| 69 | 0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL, |
| 70 | 0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL, |
| 71 | 0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL, |
| 72 | 0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL, |
| 73 | 0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL, |
| 74 | 0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL, |
| 75 | 0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL, |
| 76 | 0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL, |
| 77 | 0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL, |
| 78 | 0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL, |
| 79 | 0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL, |
| 80 | 0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL, |
| 81 | 0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL, |
| 82 | 0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL, |
| 83 | 0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL, |
| 84 | 0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL, |
| 85 | 0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL, |
| 86 | 0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL, |
| 87 | 0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL, |
| 88 | 0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL, |
| 89 | 0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL, |
| 90 | 0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL, |
| 91 | 0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL, |
| 92 | 0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL, |
| 93 | 0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL, |
| 94 | 0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL, |
| 95 | 0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL, |
| 96 | 0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL, |
| 97 | 0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL, |
| 98 | 0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL, |
| 99 | 0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL, |
| 100 | 0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL, |
| 101 | 0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL, |
| 102 | 0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL, |
| 103 | 0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL, |
| 104 | 0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL, |
| 105 | 0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL, |
| 106 | 0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL, |
| 107 | 0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL, |
| 108 | 0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL, |
| 109 | 0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL, |
| 110 | 0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL, |
| 111 | 0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL, |
| 112 | 0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL, |
| 113 | 0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL, |
| 114 | 0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL, |
| 115 | 0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL, |
| 116 | 0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL, |
| 117 | 0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL, |
| 118 | 0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL, |
| 119 | 0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL, |
| 120 | 0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL, |
| 121 | 0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL, |
| 122 | 0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL, |
| 123 | 0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL, |
| 124 | 0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL, |
| 125 | 0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL, |
| 126 | 0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL, |
| 127 | 0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL, |
| 128 | 0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL, |
| 129 | 0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL, |
| 130 | 0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL, |
| 131 | 0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL, |
| 132 | 0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL, |
| 133 | 0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL, |
| 134 | 0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL, |
| 135 | 0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL, |
| 136 | 0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL, |
| 137 | 0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL, |
| 138 | 0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL, |
| 139 | 0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL, |
| 140 | 0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL, |
| 141 | 0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL, |
| 142 | 0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL, |
| 143 | 0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL, |
| 144 | 0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL, |
| 145 | 0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL, |
| 146 | 0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL, |
| 147 | 0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL, |
| 148 | 0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL, |
| 149 | 0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL, |
| 150 | 0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL, |
| 151 | 0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL, |
| 152 | 0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL, |
| 153 | 0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL, |
| 154 | 0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL, |
| 155 | 0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL, |
| 156 | 0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL, |
| 157 | 0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL, |
| 158 | 0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL, |
| 159 | 0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL, |
| 160 | 0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL, |
| 161 | 0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL, |
| 162 | 0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL, |
| 163 | 0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL, |
| 164 | 0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, |
| 165 | 0x80000000UL |
| 166 | }; |
| 167 | |
| 168 | ATTRIBUTE_ALIGNED(16) juint _log2[] = |
| 169 | { |
| 170 | 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL |
| 171 | }; |
| 172 | |
| 173 | ATTRIBUTE_ALIGNED(16) juint _coeff[] = |
| 174 | { |
| 175 | 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL, |
| 176 | 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL, |
| 177 | 0x00000000UL, 0xbfe00000UL |
| 178 | }; |
| 179 | |
| 180 | //registers, |
| 181 | // input: xmm0 |
| 182 | // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 |
| 183 | // rax, rdx, rcx, r8, r11 |
| 184 | |
| 185 | void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2) { |
| 186 | Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; |
| 187 | Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; |
| 188 | Label L_2TAG_PACKET_8_0_2; |
| 189 | Label B1_3, B1_5, start; |
| 190 | |
| 191 | assert_different_registers(tmp1, tmp2, eax, ecx, edx); |
| 192 | jmp(start); |
| 193 | address L_tbl = (address)_L_tbl; |
| 194 | address log2 = (address)_log2; |
| 195 | address coeff = (address)_coeff; |
| 196 | |
| 197 | bind(start); |
| 198 | subq(rsp, 24); |
| 199 | movsd(Address(rsp, 0), xmm0); |
| 200 | mov64(rax, 0x3ff0000000000000); |
| 201 | movdq(xmm2, rax); |
| 202 | mov64(rdx, 0x77f0000000000000); |
| 203 | movdq(xmm3, rdx); |
| 204 | movl(ecx, 32768); |
| 205 | movdl(xmm4, rcx); |
| 206 | mov64(tmp1, 0xffffe00000000000); |
| 207 | movdq(xmm5, tmp1); |
| 208 | movdqu(xmm1, xmm0); |
| 209 | pextrw(eax, xmm0, 3); |
| 210 | por(xmm0, xmm2); |
| 211 | movl(ecx, 16352); |
| 212 | psrlq(xmm0, 27); |
| 213 | lea(tmp2, ExternalAddress(L_tbl)); |
| 214 | psrld(xmm0, 2); |
| 215 | rcpps(xmm0, xmm0); |
| 216 | psllq(xmm1, 12); |
| 217 | pshufd(xmm6, xmm5, 228); |
| 218 | psrlq(xmm1, 12); |
| 219 | subl(eax, 16); |
| 220 | cmpl(eax, 32736); |
| 221 | jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); |
| 222 | |
| 223 | bind(L_2TAG_PACKET_1_0_2); |
| 224 | paddd(xmm0, xmm4); |
| 225 | por(xmm1, xmm3); |
| 226 | movdl(edx, xmm0); |
| 227 | psllq(xmm0, 29); |
| 228 | pand(xmm5, xmm1); |
| 229 | pand(xmm0, xmm6); |
| 230 | subsd(xmm1, xmm5); |
| 231 | mulpd(xmm5, xmm0); |
| 232 | andl(eax, 32752); |
| 233 | subl(eax, ecx); |
| 234 | cvtsi2sdl(xmm7, eax); |
| 235 | mulsd(xmm1, xmm0); |
| 236 | movq(xmm6, ExternalAddress(log2)); // 0xfefa3800UL, 0x3fa62e42UL |
| 237 | movdqu(xmm3, ExternalAddress(coeff)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL |
| 238 | subsd(xmm5, xmm2); |
| 239 | andl(edx, 16711680); |
| 240 | shrl(edx, 12); |
| 241 | movdqu(xmm0, Address(tmp2, edx)); |
| 242 | movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL |
| 243 | addsd(xmm1, xmm5); |
| 244 | movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL |
| 245 | mulsd(xmm6, xmm7); |
| 246 | if (VM_Version::supports_sse3()) { |
| 247 | movddup(xmm5, xmm1); |
| 248 | } |
| 249 | else { |
| 250 | movdqu(xmm5, xmm1); |
| 251 | movlhps(xmm5, xmm5); |
| 252 | } |
| 253 | mulsd(xmm7, ExternalAddress(8 + log2)); // 0x93c76730UL, 0x3ceef357UL |
| 254 | mulsd(xmm3, xmm1); |
| 255 | addsd(xmm0, xmm6); |
| 256 | mulpd(xmm4, xmm5); |
| 257 | mulpd(xmm5, xmm5); |
| 258 | if (VM_Version::supports_sse3()) { |
| 259 | movddup(xmm6, xmm0); |
| 260 | } |
| 261 | else { |
| 262 | movdqu(xmm6, xmm0); |
| 263 | movlhps(xmm6, xmm6); |
| 264 | } |
| 265 | addsd(xmm0, xmm1); |
| 266 | addpd(xmm4, xmm2); |
| 267 | mulpd(xmm3, xmm5); |
| 268 | subsd(xmm6, xmm0); |
| 269 | mulsd(xmm4, xmm1); |
| 270 | pshufd(xmm2, xmm0, 238); |
| 271 | addsd(xmm1, xmm6); |
| 272 | mulsd(xmm5, xmm5); |
| 273 | addsd(xmm7, xmm2); |
| 274 | addpd(xmm4, xmm3); |
| 275 | addsd(xmm1, xmm7); |
| 276 | mulpd(xmm4, xmm5); |
| 277 | addsd(xmm1, xmm4); |
| 278 | pshufd(xmm5, xmm4, 238); |
| 279 | addsd(xmm1, xmm5); |
| 280 | addsd(xmm0, xmm1); |
| 281 | jmp(B1_5); |
| 282 | |
| 283 | bind(L_2TAG_PACKET_0_0_2); |
| 284 | movq(xmm0, Address(rsp, 0)); |
| 285 | movq(xmm1, Address(rsp, 0)); |
| 286 | addl(eax, 16); |
| 287 | cmpl(eax, 32768); |
| 288 | jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2); |
| 289 | cmpl(eax, 16); |
| 290 | jcc(Assembler::below, L_2TAG_PACKET_3_0_2); |
| 291 | |
| 292 | bind(L_2TAG_PACKET_4_0_2); |
| 293 | addsd(xmm0, xmm0); |
| 294 | jmp(B1_5); |
| 295 | |
| 296 | bind(L_2TAG_PACKET_5_0_2); |
| 297 | jcc(Assembler::above, L_2TAG_PACKET_4_0_2); |
| 298 | cmpl(edx, 0); |
| 299 | jcc(Assembler::above, L_2TAG_PACKET_4_0_2); |
| 300 | jmp(L_2TAG_PACKET_6_0_2); |
| 301 | |
| 302 | bind(L_2TAG_PACKET_3_0_2); |
| 303 | xorpd(xmm1, xmm1); |
| 304 | addsd(xmm1, xmm0); |
| 305 | movdl(edx, xmm1); |
| 306 | psrlq(xmm1, 32); |
| 307 | movdl(ecx, xmm1); |
| 308 | orl(edx, ecx); |
| 309 | cmpl(edx, 0); |
| 310 | jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); |
| 311 | xorpd(xmm1, xmm1); |
| 312 | movl(eax, 18416); |
| 313 | pinsrw(xmm1, eax, 3); |
| 314 | mulsd(xmm0, xmm1); |
| 315 | movdqu(xmm1, xmm0); |
| 316 | pextrw(eax, xmm0, 3); |
| 317 | por(xmm0, xmm2); |
| 318 | psrlq(xmm0, 27); |
| 319 | movl(ecx, 18416); |
| 320 | psrld(xmm0, 2); |
| 321 | rcpps(xmm0, xmm0); |
| 322 | psllq(xmm1, 12); |
| 323 | pshufd(xmm6, xmm5, 228); |
| 324 | psrlq(xmm1, 12); |
| 325 | jmp(L_2TAG_PACKET_1_0_2); |
| 326 | |
| 327 | bind(L_2TAG_PACKET_2_0_2); |
| 328 | movdl(edx, xmm1); |
| 329 | psrlq(xmm1, 32); |
| 330 | movdl(ecx, xmm1); |
| 331 | addl(ecx, ecx); |
| 332 | cmpl(ecx, -2097152); |
| 333 | jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2); |
| 334 | orl(edx, ecx); |
| 335 | cmpl(edx, 0); |
| 336 | jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); |
| 337 | |
| 338 | bind(L_2TAG_PACKET_6_0_2); |
| 339 | xorpd(xmm1, xmm1); |
| 340 | xorpd(xmm0, xmm0); |
| 341 | movl(eax, 32752); |
| 342 | pinsrw(xmm1, eax, 3); |
| 343 | mulsd(xmm0, xmm1); |
| 344 | movl(Address(rsp, 16), 3); |
| 345 | jmp(L_2TAG_PACKET_8_0_2); |
| 346 | bind(L_2TAG_PACKET_7_0_2); |
| 347 | xorpd(xmm1, xmm1); |
| 348 | xorpd(xmm0, xmm0); |
| 349 | movl(eax, 49136); |
| 350 | pinsrw(xmm0, eax, 3); |
| 351 | divsd(xmm0, xmm1); |
| 352 | movl(Address(rsp, 16), 2); |
| 353 | |
| 354 | bind(L_2TAG_PACKET_8_0_2); |
| 355 | movq(Address(rsp, 8), xmm0); |
| 356 | |
| 357 | bind(B1_3); |
| 358 | movq(xmm0, Address(rsp, 8)); |
| 359 | |
| 360 | bind(B1_5); |
| 361 | addq(rsp, 24); |
| 362 | } |
| 363 | #else |
| 364 | // The 32 bit code is at most SSE2 compliant |
| 365 | ATTRIBUTE_ALIGNED(16) juint _static_const_table_log[] = |
| 366 | { |
| 367 | 0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL, |
| 368 | 0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL, |
| 369 | 0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL, |
| 370 | 0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL, |
| 371 | 0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL, |
| 372 | 0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL, |
| 373 | 0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL, |
| 374 | 0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL, |
| 375 | 0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL, |
| 376 | 0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL, |
| 377 | 0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL, |
| 378 | 0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL, |
| 379 | 0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL, |
| 380 | 0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL, |
| 381 | 0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL, |
| 382 | 0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL, |
| 383 | 0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL, |
| 384 | 0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL, |
| 385 | 0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL, |
| 386 | 0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL, |
| 387 | 0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL, |
| 388 | 0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL, |
| 389 | 0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL, |
| 390 | 0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL, |
| 391 | 0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL, |
| 392 | 0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL, |
| 393 | 0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL, |
| 394 | 0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL, |
| 395 | 0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL, |
| 396 | 0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL, |
| 397 | 0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL, |
| 398 | 0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL, |
| 399 | 0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL, |
| 400 | 0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL, |
| 401 | 0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL, |
| 402 | 0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL, |
| 403 | 0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL, |
| 404 | 0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL, |
| 405 | 0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL, |
| 406 | 0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL, |
| 407 | 0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL, |
| 408 | 0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL, |
| 409 | 0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL, |
| 410 | 0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL, |
| 411 | 0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL, |
| 412 | 0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL, |
| 413 | 0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL, |
| 414 | 0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL, |
| 415 | 0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL, |
| 416 | 0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL, |
| 417 | 0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL, |
| 418 | 0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL, |
| 419 | 0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL, |
| 420 | 0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL, |
| 421 | 0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL, |
| 422 | 0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL, |
| 423 | 0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL, |
| 424 | 0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL, |
| 425 | 0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL, |
| 426 | 0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL, |
| 427 | 0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL, |
| 428 | 0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL, |
| 429 | 0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL, |
| 430 | 0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL, |
| 431 | 0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL, |
| 432 | 0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL, |
| 433 | 0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL, |
| 434 | 0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL, |
| 435 | 0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL, |
| 436 | 0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL, |
| 437 | 0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL, |
| 438 | 0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL, |
| 439 | 0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL, |
| 440 | 0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL, |
| 441 | 0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL, |
| 442 | 0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL, |
| 443 | 0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL, |
| 444 | 0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL, |
| 445 | 0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL, |
| 446 | 0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL, |
| 447 | 0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL, |
| 448 | 0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL, |
| 449 | 0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL, |
| 450 | 0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL, |
| 451 | 0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL, |
| 452 | 0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL, |
| 453 | 0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL, |
| 454 | 0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL, |
| 455 | 0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL, |
| 456 | 0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL, |
| 457 | 0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL, |
| 458 | 0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL, |
| 459 | 0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL, |
| 460 | 0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL, |
| 461 | 0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL, |
| 462 | 0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL, |
| 463 | 0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL, |
| 464 | 0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL, |
| 465 | 0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL, |
| 466 | 0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL, |
| 467 | 0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL, |
| 468 | 0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL, |
| 469 | 0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, |
| 470 | 0x80000000UL, 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL, |
| 471 | 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL, |
| 472 | 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL, |
| 473 | 0x00000000UL, 0xbfe00000UL, 0x00000000UL, 0xffffe000UL, 0x00000000UL, |
| 474 | 0xffffe000UL |
| 475 | }; |
| 476 | //registers, |
| 477 | // input: xmm0 |
| 478 | // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 |
| 479 | // rax, rdx, rcx, rbx (tmp) |
| 480 | |
| 481 | void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { |
| 482 | Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; |
| 483 | Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; |
| 484 | Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2; |
| 485 | Label L_2TAG_PACKET_10_0_2, start; |
| 486 | |
| 487 | assert_different_registers(tmp, eax, ecx, edx); |
| 488 | jmp(start); |
| 489 | address static_const_table = (address)_static_const_table_log; |
| 490 | |
| 491 | bind(start); |
| 492 | subl(rsp, 104); |
| 493 | movl(Address(rsp, 40), tmp); |
| 494 | lea(tmp, ExternalAddress(static_const_table)); |
| 495 | xorpd(xmm2, xmm2); |
| 496 | movl(eax, 16368); |
| 497 | pinsrw(xmm2, eax, 3); |
| 498 | xorpd(xmm3, xmm3); |
| 499 | movl(edx, 30704); |
| 500 | pinsrw(xmm3, edx, 3); |
| 501 | movsd(xmm0, Address(rsp, 112)); |
| 502 | movapd(xmm1, xmm0); |
| 503 | movl(ecx, 32768); |
| 504 | movdl(xmm4, ecx); |
| 505 | movsd(xmm5, Address(tmp, 2128)); // 0x00000000UL, 0xffffe000UL |
| 506 | pextrw(eax, xmm0, 3); |
| 507 | por(xmm0, xmm2); |
| 508 | psllq(xmm0, 5); |
| 509 | movl(ecx, 16352); |
| 510 | psrlq(xmm0, 34); |
| 511 | rcpss(xmm0, xmm0); |
| 512 | psllq(xmm1, 12); |
| 513 | pshufd(xmm6, xmm5, 228); |
| 514 | psrlq(xmm1, 12); |
| 515 | subl(eax, 16); |
| 516 | cmpl(eax, 32736); |
| 517 | jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); |
| 518 | |
| 519 | bind(L_2TAG_PACKET_1_0_2); |
| 520 | paddd(xmm0, xmm4); |
| 521 | por(xmm1, xmm3); |
| 522 | movdl(edx, xmm0); |
| 523 | psllq(xmm0, 29); |
| 524 | pand(xmm5, xmm1); |
| 525 | pand(xmm0, xmm6); |
| 526 | subsd(xmm1, xmm5); |
| 527 | mulpd(xmm5, xmm0); |
| 528 | andl(eax, 32752); |
| 529 | subl(eax, ecx); |
| 530 | cvtsi2sdl(xmm7, eax); |
| 531 | mulsd(xmm1, xmm0); |
| 532 | movsd(xmm6, Address(tmp, 2064)); // 0xfefa3800UL, 0x3fa62e42UL |
| 533 | movdqu(xmm3, Address(tmp, 2080)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL |
| 534 | subsd(xmm5, xmm2); |
| 535 | andl(edx, 16711680); |
| 536 | shrl(edx, 12); |
| 537 | movdqu(xmm0, Address(tmp, edx)); |
| 538 | movdqu(xmm4, Address(tmp, 2096)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL |
| 539 | addsd(xmm1, xmm5); |
| 540 | movdqu(xmm2, Address(tmp, 2112)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL |
| 541 | mulsd(xmm6, xmm7); |
| 542 | pshufd(xmm5, xmm1, 68); |
| 543 | mulsd(xmm7, Address(tmp, 2072)); // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL |
| 544 | mulsd(xmm3, xmm1); |
| 545 | addsd(xmm0, xmm6); |
| 546 | mulpd(xmm4, xmm5); |
| 547 | mulpd(xmm5, xmm5); |
| 548 | pshufd(xmm6, xmm0, 228); |
| 549 | addsd(xmm0, xmm1); |
| 550 | addpd(xmm4, xmm2); |
| 551 | mulpd(xmm3, xmm5); |
| 552 | subsd(xmm6, xmm0); |
| 553 | mulsd(xmm4, xmm1); |
| 554 | pshufd(xmm2, xmm0, 238); |
| 555 | addsd(xmm1, xmm6); |
| 556 | mulsd(xmm5, xmm5); |
| 557 | addsd(xmm7, xmm2); |
| 558 | addpd(xmm4, xmm3); |
| 559 | addsd(xmm1, xmm7); |
| 560 | mulpd(xmm4, xmm5); |
| 561 | addsd(xmm1, xmm4); |
| 562 | pshufd(xmm5, xmm4, 238); |
| 563 | addsd(xmm1, xmm5); |
| 564 | addsd(xmm0, xmm1); |
| 565 | jmp(L_2TAG_PACKET_2_0_2); |
| 566 | |
| 567 | bind(L_2TAG_PACKET_0_0_2); |
| 568 | movsd(xmm0, Address(rsp, 112)); |
| 569 | movdqu(xmm1, xmm0); |
| 570 | addl(eax, 16); |
| 571 | cmpl(eax, 32768); |
| 572 | jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2); |
| 573 | cmpl(eax, 16); |
| 574 | jcc(Assembler::below, L_2TAG_PACKET_4_0_2); |
| 575 | |
| 576 | bind(L_2TAG_PACKET_5_0_2); |
| 577 | addsd(xmm0, xmm0); |
| 578 | jmp(L_2TAG_PACKET_2_0_2); |
| 579 | |
| 580 | bind(L_2TAG_PACKET_6_0_2); |
| 581 | jcc(Assembler::above, L_2TAG_PACKET_5_0_2); |
| 582 | cmpl(edx, 0); |
| 583 | jcc(Assembler::above, L_2TAG_PACKET_5_0_2); |
| 584 | jmp(L_2TAG_PACKET_7_0_2); |
| 585 | |
| 586 | bind(L_2TAG_PACKET_3_0_2); |
| 587 | movdl(edx, xmm1); |
| 588 | psrlq(xmm1, 32); |
| 589 | movdl(ecx, xmm1); |
| 590 | addl(ecx, ecx); |
| 591 | cmpl(ecx, -2097152); |
| 592 | jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2); |
| 593 | orl(edx, ecx); |
| 594 | cmpl(edx, 0); |
| 595 | jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); |
| 596 | |
| 597 | bind(L_2TAG_PACKET_7_0_2); |
| 598 | xorpd(xmm1, xmm1); |
| 599 | xorpd(xmm0, xmm0); |
| 600 | movl(eax, 32752); |
| 601 | pinsrw(xmm1, eax, 3); |
| 602 | movl(edx, 3); |
| 603 | mulsd(xmm0, xmm1); |
| 604 | |
| 605 | bind(L_2TAG_PACKET_9_0_2); |
| 606 | movsd(Address(rsp, 0), xmm0); |
| 607 | movsd(xmm0, Address(rsp, 112)); |
| 608 | fld_d(Address(rsp, 0)); |
| 609 | jmp(L_2TAG_PACKET_10_0_2); |
| 610 | |
| 611 | bind(L_2TAG_PACKET_8_0_2); |
| 612 | xorpd(xmm1, xmm1); |
| 613 | xorpd(xmm0, xmm0); |
| 614 | movl(eax, 49136); |
| 615 | pinsrw(xmm0, eax, 3); |
| 616 | divsd(xmm0, xmm1); |
| 617 | movl(edx, 2); |
| 618 | jmp(L_2TAG_PACKET_9_0_2); |
| 619 | |
| 620 | bind(L_2TAG_PACKET_4_0_2); |
| 621 | movdl(edx, xmm1); |
| 622 | psrlq(xmm1, 32); |
| 623 | movdl(ecx, xmm1); |
| 624 | orl(edx, ecx); |
| 625 | cmpl(edx, 0); |
| 626 | jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); |
| 627 | xorpd(xmm1, xmm1); |
| 628 | movl(eax, 18416); |
| 629 | pinsrw(xmm1, eax, 3); |
| 630 | mulsd(xmm0, xmm1); |
| 631 | movapd(xmm1, xmm0); |
| 632 | pextrw(eax, xmm0, 3); |
| 633 | por(xmm0, xmm2); |
| 634 | psllq(xmm0, 5); |
| 635 | movl(ecx, 18416); |
| 636 | psrlq(xmm0, 34); |
| 637 | rcpss(xmm0, xmm0); |
| 638 | psllq(xmm1, 12); |
| 639 | pshufd(xmm6, xmm5, 228); |
| 640 | psrlq(xmm1, 12); |
| 641 | jmp(L_2TAG_PACKET_1_0_2); |
| 642 | |
| 643 | bind(L_2TAG_PACKET_2_0_2); |
| 644 | movsd(Address(rsp, 24), xmm0); |
| 645 | fld_d(Address(rsp, 24)); |
| 646 | |
| 647 | bind(L_2TAG_PACKET_10_0_2); |
| 648 | movl(tmp, Address(rsp, 40)); |
| 649 | } |
| 650 | #endif |
| 651 | |