1 | /* |
2 | * Copyright (c) 2016, Intel Corporation. |
3 | * Intel Math Library (LIBM) Source Code |
4 | * |
5 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
6 | * |
7 | * This code is free software; you can redistribute it and/or modify it |
8 | * under the terms of the GNU General Public License version 2 only, as |
9 | * published by the Free Software Foundation. |
10 | * |
11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | * version 2 for more details (a copy is included in the LICENSE file that |
15 | * accompanied this code). |
16 | * |
17 | * You should have received a copy of the GNU General Public License version |
18 | * 2 along with this work; if not, write to the Free Software Foundation, |
19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
20 | * |
21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 | * or visit www.oracle.com if you need additional information or have any |
23 | * questions. |
24 | * |
25 | */ |
26 | |
27 | #include "precompiled.hpp" |
28 | #include "asm/assembler.hpp" |
29 | #include "asm/assembler.inline.hpp" |
30 | #include "macroAssembler_x86.hpp" |
31 | #include "utilities/globalDefinitions.hpp" |
32 | |
33 | /******************************************************************************/ |
34 | // ALGORITHM DESCRIPTION - LOG() |
35 | // --------------------- |
36 | // |
37 | // x=2^k * mx, mx in [1,2) |
38 | // |
39 | // Get B~1/mx based on the output of rcpss instruction (B0) |
40 | // B = int((B0*2^7+0.5))/2^7 |
41 | // |
42 | // Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts) |
43 | // |
44 | // Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and |
45 | // p(r) is a degree 7 polynomial |
46 | // -log(B) read from data table (high, low parts) |
47 | // Result is formed from high and low parts |
48 | // |
49 | // Special cases: |
50 | // log(NaN) = quiet NaN, and raise invalid exception |
51 | // log(+INF) = that INF |
52 | // log(0) = -INF with divide-by-zero exception raised |
53 | // log(1) = +0 |
54 | // log(x) = NaN with invalid exception raised if x < -0, including -INF |
55 | // |
56 | /******************************************************************************/ |
57 | |
58 | #ifdef _LP64 |
59 | // The 64 bit code is at most SSE2 compliant |
60 | ATTRIBUTE_ALIGNED(16) juint _L_tbl[] = |
61 | { |
62 | 0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL, |
63 | 0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL, |
64 | 0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL, |
65 | 0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL, |
66 | 0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL, |
67 | 0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL, |
68 | 0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL, |
69 | 0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL, |
70 | 0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL, |
71 | 0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL, |
72 | 0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL, |
73 | 0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL, |
74 | 0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL, |
75 | 0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL, |
76 | 0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL, |
77 | 0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL, |
78 | 0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL, |
79 | 0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL, |
80 | 0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL, |
81 | 0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL, |
82 | 0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL, |
83 | 0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL, |
84 | 0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL, |
85 | 0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL, |
86 | 0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL, |
87 | 0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL, |
88 | 0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL, |
89 | 0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL, |
90 | 0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL, |
91 | 0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL, |
92 | 0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL, |
93 | 0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL, |
94 | 0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL, |
95 | 0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL, |
96 | 0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL, |
97 | 0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL, |
98 | 0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL, |
99 | 0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL, |
100 | 0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL, |
101 | 0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL, |
102 | 0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL, |
103 | 0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL, |
104 | 0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL, |
105 | 0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL, |
106 | 0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL, |
107 | 0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL, |
108 | 0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL, |
109 | 0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL, |
110 | 0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL, |
111 | 0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL, |
112 | 0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL, |
113 | 0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL, |
114 | 0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL, |
115 | 0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL, |
116 | 0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL, |
117 | 0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL, |
118 | 0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL, |
119 | 0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL, |
120 | 0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL, |
121 | 0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL, |
122 | 0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL, |
123 | 0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL, |
124 | 0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL, |
125 | 0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL, |
126 | 0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL, |
127 | 0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL, |
128 | 0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL, |
129 | 0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL, |
130 | 0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL, |
131 | 0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL, |
132 | 0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL, |
133 | 0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL, |
134 | 0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL, |
135 | 0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL, |
136 | 0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL, |
137 | 0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL, |
138 | 0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL, |
139 | 0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL, |
140 | 0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL, |
141 | 0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL, |
142 | 0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL, |
143 | 0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL, |
144 | 0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL, |
145 | 0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL, |
146 | 0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL, |
147 | 0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL, |
148 | 0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL, |
149 | 0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL, |
150 | 0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL, |
151 | 0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL, |
152 | 0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL, |
153 | 0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL, |
154 | 0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL, |
155 | 0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL, |
156 | 0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL, |
157 | 0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL, |
158 | 0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL, |
159 | 0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL, |
160 | 0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL, |
161 | 0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL, |
162 | 0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL, |
163 | 0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL, |
164 | 0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, |
165 | 0x80000000UL |
166 | }; |
167 | |
168 | ATTRIBUTE_ALIGNED(16) juint _log2[] = |
169 | { |
170 | 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL |
171 | }; |
172 | |
173 | ATTRIBUTE_ALIGNED(16) juint _coeff[] = |
174 | { |
175 | 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL, |
176 | 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL, |
177 | 0x00000000UL, 0xbfe00000UL |
178 | }; |
179 | |
180 | //registers, |
181 | // input: xmm0 |
182 | // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 |
183 | // rax, rdx, rcx, r8, r11 |
184 | |
185 | void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2) { |
186 | Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; |
187 | Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; |
188 | Label L_2TAG_PACKET_8_0_2; |
189 | Label B1_3, B1_5, start; |
190 | |
191 | assert_different_registers(tmp1, tmp2, eax, ecx, edx); |
192 | jmp(start); |
193 | address L_tbl = (address)_L_tbl; |
194 | address log2 = (address)_log2; |
195 | address coeff = (address)_coeff; |
196 | |
197 | bind(start); |
198 | subq(rsp, 24); |
199 | movsd(Address(rsp, 0), xmm0); |
200 | mov64(rax, 0x3ff0000000000000); |
201 | movdq(xmm2, rax); |
202 | mov64(rdx, 0x77f0000000000000); |
203 | movdq(xmm3, rdx); |
204 | movl(ecx, 32768); |
205 | movdl(xmm4, rcx); |
206 | mov64(tmp1, 0xffffe00000000000); |
207 | movdq(xmm5, tmp1); |
208 | movdqu(xmm1, xmm0); |
209 | pextrw(eax, xmm0, 3); |
210 | por(xmm0, xmm2); |
211 | movl(ecx, 16352); |
212 | psrlq(xmm0, 27); |
213 | lea(tmp2, ExternalAddress(L_tbl)); |
214 | psrld(xmm0, 2); |
215 | rcpps(xmm0, xmm0); |
216 | psllq(xmm1, 12); |
217 | pshufd(xmm6, xmm5, 228); |
218 | psrlq(xmm1, 12); |
219 | subl(eax, 16); |
220 | cmpl(eax, 32736); |
221 | jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); |
222 | |
223 | bind(L_2TAG_PACKET_1_0_2); |
224 | paddd(xmm0, xmm4); |
225 | por(xmm1, xmm3); |
226 | movdl(edx, xmm0); |
227 | psllq(xmm0, 29); |
228 | pand(xmm5, xmm1); |
229 | pand(xmm0, xmm6); |
230 | subsd(xmm1, xmm5); |
231 | mulpd(xmm5, xmm0); |
232 | andl(eax, 32752); |
233 | subl(eax, ecx); |
234 | cvtsi2sdl(xmm7, eax); |
235 | mulsd(xmm1, xmm0); |
236 | movq(xmm6, ExternalAddress(log2)); // 0xfefa3800UL, 0x3fa62e42UL |
237 | movdqu(xmm3, ExternalAddress(coeff)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL |
238 | subsd(xmm5, xmm2); |
239 | andl(edx, 16711680); |
240 | shrl(edx, 12); |
241 | movdqu(xmm0, Address(tmp2, edx)); |
242 | movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL |
243 | addsd(xmm1, xmm5); |
244 | movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL |
245 | mulsd(xmm6, xmm7); |
246 | if (VM_Version::supports_sse3()) { |
247 | movddup(xmm5, xmm1); |
248 | } |
249 | else { |
250 | movdqu(xmm5, xmm1); |
251 | movlhps(xmm5, xmm5); |
252 | } |
253 | mulsd(xmm7, ExternalAddress(8 + log2)); // 0x93c76730UL, 0x3ceef357UL |
254 | mulsd(xmm3, xmm1); |
255 | addsd(xmm0, xmm6); |
256 | mulpd(xmm4, xmm5); |
257 | mulpd(xmm5, xmm5); |
258 | if (VM_Version::supports_sse3()) { |
259 | movddup(xmm6, xmm0); |
260 | } |
261 | else { |
262 | movdqu(xmm6, xmm0); |
263 | movlhps(xmm6, xmm6); |
264 | } |
265 | addsd(xmm0, xmm1); |
266 | addpd(xmm4, xmm2); |
267 | mulpd(xmm3, xmm5); |
268 | subsd(xmm6, xmm0); |
269 | mulsd(xmm4, xmm1); |
270 | pshufd(xmm2, xmm0, 238); |
271 | addsd(xmm1, xmm6); |
272 | mulsd(xmm5, xmm5); |
273 | addsd(xmm7, xmm2); |
274 | addpd(xmm4, xmm3); |
275 | addsd(xmm1, xmm7); |
276 | mulpd(xmm4, xmm5); |
277 | addsd(xmm1, xmm4); |
278 | pshufd(xmm5, xmm4, 238); |
279 | addsd(xmm1, xmm5); |
280 | addsd(xmm0, xmm1); |
281 | jmp(B1_5); |
282 | |
283 | bind(L_2TAG_PACKET_0_0_2); |
284 | movq(xmm0, Address(rsp, 0)); |
285 | movq(xmm1, Address(rsp, 0)); |
286 | addl(eax, 16); |
287 | cmpl(eax, 32768); |
288 | jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2); |
289 | cmpl(eax, 16); |
290 | jcc(Assembler::below, L_2TAG_PACKET_3_0_2); |
291 | |
292 | bind(L_2TAG_PACKET_4_0_2); |
293 | addsd(xmm0, xmm0); |
294 | jmp(B1_5); |
295 | |
296 | bind(L_2TAG_PACKET_5_0_2); |
297 | jcc(Assembler::above, L_2TAG_PACKET_4_0_2); |
298 | cmpl(edx, 0); |
299 | jcc(Assembler::above, L_2TAG_PACKET_4_0_2); |
300 | jmp(L_2TAG_PACKET_6_0_2); |
301 | |
302 | bind(L_2TAG_PACKET_3_0_2); |
303 | xorpd(xmm1, xmm1); |
304 | addsd(xmm1, xmm0); |
305 | movdl(edx, xmm1); |
306 | psrlq(xmm1, 32); |
307 | movdl(ecx, xmm1); |
308 | orl(edx, ecx); |
309 | cmpl(edx, 0); |
310 | jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); |
311 | xorpd(xmm1, xmm1); |
312 | movl(eax, 18416); |
313 | pinsrw(xmm1, eax, 3); |
314 | mulsd(xmm0, xmm1); |
315 | movdqu(xmm1, xmm0); |
316 | pextrw(eax, xmm0, 3); |
317 | por(xmm0, xmm2); |
318 | psrlq(xmm0, 27); |
319 | movl(ecx, 18416); |
320 | psrld(xmm0, 2); |
321 | rcpps(xmm0, xmm0); |
322 | psllq(xmm1, 12); |
323 | pshufd(xmm6, xmm5, 228); |
324 | psrlq(xmm1, 12); |
325 | jmp(L_2TAG_PACKET_1_0_2); |
326 | |
327 | bind(L_2TAG_PACKET_2_0_2); |
328 | movdl(edx, xmm1); |
329 | psrlq(xmm1, 32); |
330 | movdl(ecx, xmm1); |
331 | addl(ecx, ecx); |
332 | cmpl(ecx, -2097152); |
333 | jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2); |
334 | orl(edx, ecx); |
335 | cmpl(edx, 0); |
336 | jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); |
337 | |
338 | bind(L_2TAG_PACKET_6_0_2); |
339 | xorpd(xmm1, xmm1); |
340 | xorpd(xmm0, xmm0); |
341 | movl(eax, 32752); |
342 | pinsrw(xmm1, eax, 3); |
343 | mulsd(xmm0, xmm1); |
344 | movl(Address(rsp, 16), 3); |
345 | jmp(L_2TAG_PACKET_8_0_2); |
346 | bind(L_2TAG_PACKET_7_0_2); |
347 | xorpd(xmm1, xmm1); |
348 | xorpd(xmm0, xmm0); |
349 | movl(eax, 49136); |
350 | pinsrw(xmm0, eax, 3); |
351 | divsd(xmm0, xmm1); |
352 | movl(Address(rsp, 16), 2); |
353 | |
354 | bind(L_2TAG_PACKET_8_0_2); |
355 | movq(Address(rsp, 8), xmm0); |
356 | |
357 | bind(B1_3); |
358 | movq(xmm0, Address(rsp, 8)); |
359 | |
360 | bind(B1_5); |
361 | addq(rsp, 24); |
362 | } |
363 | #else |
364 | // The 32 bit code is at most SSE2 compliant |
365 | ATTRIBUTE_ALIGNED(16) juint _static_const_table_log[] = |
366 | { |
367 | 0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL, |
368 | 0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL, |
369 | 0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL, |
370 | 0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL, |
371 | 0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL, |
372 | 0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL, |
373 | 0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL, |
374 | 0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL, |
375 | 0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL, |
376 | 0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL, |
377 | 0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL, |
378 | 0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL, |
379 | 0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL, |
380 | 0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL, |
381 | 0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL, |
382 | 0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL, |
383 | 0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL, |
384 | 0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL, |
385 | 0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL, |
386 | 0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL, |
387 | 0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL, |
388 | 0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL, |
389 | 0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL, |
390 | 0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL, |
391 | 0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL, |
392 | 0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL, |
393 | 0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL, |
394 | 0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL, |
395 | 0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL, |
396 | 0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL, |
397 | 0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL, |
398 | 0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL, |
399 | 0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL, |
400 | 0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL, |
401 | 0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL, |
402 | 0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL, |
403 | 0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL, |
404 | 0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL, |
405 | 0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL, |
406 | 0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL, |
407 | 0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL, |
408 | 0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL, |
409 | 0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL, |
410 | 0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL, |
411 | 0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL, |
412 | 0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL, |
413 | 0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL, |
414 | 0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL, |
415 | 0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL, |
416 | 0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL, |
417 | 0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL, |
418 | 0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL, |
419 | 0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL, |
420 | 0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL, |
421 | 0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL, |
422 | 0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL, |
423 | 0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL, |
424 | 0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL, |
425 | 0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL, |
426 | 0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL, |
427 | 0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL, |
428 | 0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL, |
429 | 0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL, |
430 | 0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL, |
431 | 0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL, |
432 | 0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL, |
433 | 0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL, |
434 | 0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL, |
435 | 0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL, |
436 | 0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL, |
437 | 0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL, |
438 | 0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL, |
439 | 0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL, |
440 | 0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL, |
441 | 0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL, |
442 | 0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL, |
443 | 0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL, |
444 | 0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL, |
445 | 0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL, |
446 | 0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL, |
447 | 0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL, |
448 | 0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL, |
449 | 0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL, |
450 | 0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL, |
451 | 0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL, |
452 | 0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL, |
453 | 0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL, |
454 | 0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL, |
455 | 0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL, |
456 | 0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL, |
457 | 0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL, |
458 | 0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL, |
459 | 0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL, |
460 | 0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL, |
461 | 0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL, |
462 | 0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL, |
463 | 0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL, |
464 | 0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL, |
465 | 0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL, |
466 | 0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL, |
467 | 0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL, |
468 | 0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL, |
469 | 0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, |
470 | 0x80000000UL, 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL, |
471 | 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL, |
472 | 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL, |
473 | 0x00000000UL, 0xbfe00000UL, 0x00000000UL, 0xffffe000UL, 0x00000000UL, |
474 | 0xffffe000UL |
475 | }; |
476 | //registers, |
477 | // input: xmm0 |
478 | // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 |
479 | // rax, rdx, rcx, rbx (tmp) |
480 | |
481 | void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { |
482 | Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; |
483 | Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; |
484 | Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2; |
485 | Label L_2TAG_PACKET_10_0_2, start; |
486 | |
487 | assert_different_registers(tmp, eax, ecx, edx); |
488 | jmp(start); |
489 | address static_const_table = (address)_static_const_table_log; |
490 | |
491 | bind(start); |
492 | subl(rsp, 104); |
493 | movl(Address(rsp, 40), tmp); |
494 | lea(tmp, ExternalAddress(static_const_table)); |
495 | xorpd(xmm2, xmm2); |
496 | movl(eax, 16368); |
497 | pinsrw(xmm2, eax, 3); |
498 | xorpd(xmm3, xmm3); |
499 | movl(edx, 30704); |
500 | pinsrw(xmm3, edx, 3); |
501 | movsd(xmm0, Address(rsp, 112)); |
502 | movapd(xmm1, xmm0); |
503 | movl(ecx, 32768); |
504 | movdl(xmm4, ecx); |
505 | movsd(xmm5, Address(tmp, 2128)); // 0x00000000UL, 0xffffe000UL |
506 | pextrw(eax, xmm0, 3); |
507 | por(xmm0, xmm2); |
508 | psllq(xmm0, 5); |
509 | movl(ecx, 16352); |
510 | psrlq(xmm0, 34); |
511 | rcpss(xmm0, xmm0); |
512 | psllq(xmm1, 12); |
513 | pshufd(xmm6, xmm5, 228); |
514 | psrlq(xmm1, 12); |
515 | subl(eax, 16); |
516 | cmpl(eax, 32736); |
517 | jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); |
518 | |
519 | bind(L_2TAG_PACKET_1_0_2); |
520 | paddd(xmm0, xmm4); |
521 | por(xmm1, xmm3); |
522 | movdl(edx, xmm0); |
523 | psllq(xmm0, 29); |
524 | pand(xmm5, xmm1); |
525 | pand(xmm0, xmm6); |
526 | subsd(xmm1, xmm5); |
527 | mulpd(xmm5, xmm0); |
528 | andl(eax, 32752); |
529 | subl(eax, ecx); |
530 | cvtsi2sdl(xmm7, eax); |
531 | mulsd(xmm1, xmm0); |
532 | movsd(xmm6, Address(tmp, 2064)); // 0xfefa3800UL, 0x3fa62e42UL |
533 | movdqu(xmm3, Address(tmp, 2080)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL |
534 | subsd(xmm5, xmm2); |
535 | andl(edx, 16711680); |
536 | shrl(edx, 12); |
537 | movdqu(xmm0, Address(tmp, edx)); |
538 | movdqu(xmm4, Address(tmp, 2096)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL |
539 | addsd(xmm1, xmm5); |
540 | movdqu(xmm2, Address(tmp, 2112)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL |
541 | mulsd(xmm6, xmm7); |
542 | pshufd(xmm5, xmm1, 68); |
543 | mulsd(xmm7, Address(tmp, 2072)); // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL |
544 | mulsd(xmm3, xmm1); |
545 | addsd(xmm0, xmm6); |
546 | mulpd(xmm4, xmm5); |
547 | mulpd(xmm5, xmm5); |
548 | pshufd(xmm6, xmm0, 228); |
549 | addsd(xmm0, xmm1); |
550 | addpd(xmm4, xmm2); |
551 | mulpd(xmm3, xmm5); |
552 | subsd(xmm6, xmm0); |
553 | mulsd(xmm4, xmm1); |
554 | pshufd(xmm2, xmm0, 238); |
555 | addsd(xmm1, xmm6); |
556 | mulsd(xmm5, xmm5); |
557 | addsd(xmm7, xmm2); |
558 | addpd(xmm4, xmm3); |
559 | addsd(xmm1, xmm7); |
560 | mulpd(xmm4, xmm5); |
561 | addsd(xmm1, xmm4); |
562 | pshufd(xmm5, xmm4, 238); |
563 | addsd(xmm1, xmm5); |
564 | addsd(xmm0, xmm1); |
565 | jmp(L_2TAG_PACKET_2_0_2); |
566 | |
567 | bind(L_2TAG_PACKET_0_0_2); |
568 | movsd(xmm0, Address(rsp, 112)); |
569 | movdqu(xmm1, xmm0); |
570 | addl(eax, 16); |
571 | cmpl(eax, 32768); |
572 | jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2); |
573 | cmpl(eax, 16); |
574 | jcc(Assembler::below, L_2TAG_PACKET_4_0_2); |
575 | |
576 | bind(L_2TAG_PACKET_5_0_2); |
577 | addsd(xmm0, xmm0); |
578 | jmp(L_2TAG_PACKET_2_0_2); |
579 | |
580 | bind(L_2TAG_PACKET_6_0_2); |
581 | jcc(Assembler::above, L_2TAG_PACKET_5_0_2); |
582 | cmpl(edx, 0); |
583 | jcc(Assembler::above, L_2TAG_PACKET_5_0_2); |
584 | jmp(L_2TAG_PACKET_7_0_2); |
585 | |
586 | bind(L_2TAG_PACKET_3_0_2); |
587 | movdl(edx, xmm1); |
588 | psrlq(xmm1, 32); |
589 | movdl(ecx, xmm1); |
590 | addl(ecx, ecx); |
591 | cmpl(ecx, -2097152); |
592 | jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2); |
593 | orl(edx, ecx); |
594 | cmpl(edx, 0); |
595 | jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); |
596 | |
597 | bind(L_2TAG_PACKET_7_0_2); |
598 | xorpd(xmm1, xmm1); |
599 | xorpd(xmm0, xmm0); |
600 | movl(eax, 32752); |
601 | pinsrw(xmm1, eax, 3); |
602 | movl(edx, 3); |
603 | mulsd(xmm0, xmm1); |
604 | |
605 | bind(L_2TAG_PACKET_9_0_2); |
606 | movsd(Address(rsp, 0), xmm0); |
607 | movsd(xmm0, Address(rsp, 112)); |
608 | fld_d(Address(rsp, 0)); |
609 | jmp(L_2TAG_PACKET_10_0_2); |
610 | |
611 | bind(L_2TAG_PACKET_8_0_2); |
612 | xorpd(xmm1, xmm1); |
613 | xorpd(xmm0, xmm0); |
614 | movl(eax, 49136); |
615 | pinsrw(xmm0, eax, 3); |
616 | divsd(xmm0, xmm1); |
617 | movl(edx, 2); |
618 | jmp(L_2TAG_PACKET_9_0_2); |
619 | |
620 | bind(L_2TAG_PACKET_4_0_2); |
621 | movdl(edx, xmm1); |
622 | psrlq(xmm1, 32); |
623 | movdl(ecx, xmm1); |
624 | orl(edx, ecx); |
625 | cmpl(edx, 0); |
626 | jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); |
627 | xorpd(xmm1, xmm1); |
628 | movl(eax, 18416); |
629 | pinsrw(xmm1, eax, 3); |
630 | mulsd(xmm0, xmm1); |
631 | movapd(xmm1, xmm0); |
632 | pextrw(eax, xmm0, 3); |
633 | por(xmm0, xmm2); |
634 | psllq(xmm0, 5); |
635 | movl(ecx, 18416); |
636 | psrlq(xmm0, 34); |
637 | rcpss(xmm0, xmm0); |
638 | psllq(xmm1, 12); |
639 | pshufd(xmm6, xmm5, 228); |
640 | psrlq(xmm1, 12); |
641 | jmp(L_2TAG_PACKET_1_0_2); |
642 | |
643 | bind(L_2TAG_PACKET_2_0_2); |
644 | movsd(Address(rsp, 24), xmm0); |
645 | fld_d(Address(rsp, 24)); |
646 | |
647 | bind(L_2TAG_PACKET_10_0_2); |
648 | movl(tmp, Address(rsp, 40)); |
649 | } |
650 | #endif |
651 | |