1 | /* |
2 | * Copyright (c) 2016, Intel Corporation. |
3 | * Intel Math Library (LIBM) Source Code |
4 | * |
5 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
6 | * |
7 | * This code is free software; you can redistribute it and/or modify it |
8 | * under the terms of the GNU General Public License version 2 only, as |
9 | * published by the Free Software Foundation. |
10 | * |
11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | * version 2 for more details (a copy is included in the LICENSE file that |
15 | * accompanied this code). |
16 | * |
17 | * You should have received a copy of the GNU General Public License version |
18 | * 2 along with this work; if not, write to the Free Software Foundation, |
19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
20 | * |
21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 | * or visit www.oracle.com if you need additional information or have any |
23 | * questions. |
24 | * |
25 | */ |
26 | |
27 | #include "precompiled.hpp" |
28 | #include "asm/assembler.hpp" |
29 | #include "asm/assembler.inline.hpp" |
30 | #include "macroAssembler_x86.hpp" |
31 | #include "runtime/stubRoutines.hpp" |
32 | #include "utilities/globalDefinitions.hpp" |
33 | |
34 | /******************************************************************************/ |
35 | // ALGORITHM DESCRIPTION - LOG10() |
36 | // --------------------- |
37 | // |
38 | // Let x=2^k * mx, mx in [1,2) |
39 | // |
40 | // Get B~1/mx based on the output of rcpss instruction (B0) |
41 | // B = int((B0*LH*2^7+0.5))/2^7 |
42 | // LH is a short approximation for log10(e) |
43 | // |
44 | // Reduced argument: r=B*mx-LH (computed accurately in high and low parts) |
45 | // |
46 | // Result: k*log10(2) - log(B) + p(r) |
47 | // p(r) is a degree 7 polynomial |
48 | // -log(B) read from data table (high, low parts) |
49 | // Result is formed from high and low parts |
50 | // |
51 | // Special cases: |
52 | // log10(0) = -INF with divide-by-zero exception raised |
53 | // log10(1) = +0 |
54 | // log10(x) = NaN with invalid exception raised if x < -0, including -INF |
55 | // log10(+INF) = +INF |
56 | // |
57 | /******************************************************************************/ |
58 | |
59 | #ifdef _LP64 |
60 | // The 64 bit code is at most SSE2 compliant |
61 | ATTRIBUTE_ALIGNED(16) juint _HIGHSIGMASK_log10[] = |
62 | { |
63 | 0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL |
64 | }; |
65 | |
66 | ATTRIBUTE_ALIGNED(16) juint _LOG10_E[] = |
67 | { |
68 | 0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL |
69 | }; |
70 | |
71 | ATTRIBUTE_ALIGNED(16) juint _L_tbl_log10[] = |
72 | { |
73 | 0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL, |
74 | 0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL, |
75 | 0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL, |
76 | 0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL, |
77 | 0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL, |
78 | 0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL, |
79 | 0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL, |
80 | 0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL, |
81 | 0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL, |
82 | 0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL, |
83 | 0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL, |
84 | 0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL, |
85 | 0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL, |
86 | 0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL, |
87 | 0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL, |
88 | 0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL, |
89 | 0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL, |
90 | 0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL, |
91 | 0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL, |
92 | 0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL, |
93 | 0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL, |
94 | 0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL, |
95 | 0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL, |
96 | 0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL, |
97 | 0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL, |
98 | 0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL, |
99 | 0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL, |
100 | 0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL, |
101 | 0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL, |
102 | 0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL, |
103 | 0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL, |
104 | 0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL, |
105 | 0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL, |
106 | 0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL, |
107 | 0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL, |
108 | 0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL, |
109 | 0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL, |
110 | 0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL, |
111 | 0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL, |
112 | 0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL, |
113 | 0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL, |
114 | 0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL, |
115 | 0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL, |
116 | 0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL, |
117 | 0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL, |
118 | 0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL, |
119 | 0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL, |
120 | 0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL, |
121 | 0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL, |
122 | 0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL, |
123 | 0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL, |
124 | 0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL, |
125 | 0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL, |
126 | 0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL, |
127 | 0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL, |
128 | 0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL, |
129 | 0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL, |
130 | 0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL, |
131 | 0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL, |
132 | 0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL, |
133 | 0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL, |
134 | 0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL, |
135 | 0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL, |
136 | 0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL, |
137 | 0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL, |
138 | 0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL, |
139 | 0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL, |
140 | 0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL, |
141 | 0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL, |
142 | 0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL, |
143 | 0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL, |
144 | 0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL, |
145 | 0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL, |
146 | 0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL, |
147 | 0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL, |
148 | 0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL, |
149 | 0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL, |
150 | 0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL, |
151 | 0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL, |
152 | 0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL, |
153 | 0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL, |
154 | 0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL, |
155 | 0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL, |
156 | 0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL, |
157 | 0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL, |
158 | 0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL, |
159 | 0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL, |
160 | 0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL, |
161 | 0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL, |
162 | 0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL, |
163 | 0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL, |
164 | 0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL, |
165 | 0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL, |
166 | 0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL, |
167 | 0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL, |
168 | 0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL, |
169 | 0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL, |
170 | 0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL, |
171 | 0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL, |
172 | 0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL, |
173 | 0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL, |
174 | 0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL, |
175 | 0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, |
176 | 0x00000000UL |
177 | }; |
178 | |
179 | ATTRIBUTE_ALIGNED(16) juint _log2_log10[] = |
180 | { |
181 | 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL |
182 | }; |
183 | |
184 | ATTRIBUTE_ALIGNED(16) juint _coeff_log10[] = |
185 | { |
186 | 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL, |
187 | 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, |
188 | 0xdc77b115UL, 0xbff27af2UL |
189 | }; |
190 | |
191 | // Registers: |
192 | // input: xmm0 |
193 | // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 |
194 | // rax, rdx, rcx, tmp - r11 |
195 | |
196 | // Code generated by Intel C compiler for LIBM library |
197 | |
198 | void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r11) { |
199 | Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; |
200 | Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; |
201 | Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, B1_2, B1_3, B1_5, start; |
202 | |
203 | assert_different_registers(r11, eax, ecx, edx); |
204 | |
205 | address HIGHSIGMASK = (address)_HIGHSIGMASK_log10; |
206 | address LOG10_E = (address)_LOG10_E; |
207 | address L_tbl = (address)_L_tbl_log10; |
208 | address log2 = (address)_log2_log10; |
209 | address coeff = (address)_coeff_log10; |
210 | |
211 | bind(start); |
212 | subq(rsp, 24); |
213 | movsd(Address(rsp, 0), xmm0); |
214 | |
215 | bind(B1_2); |
216 | xorpd(xmm2, xmm2); |
217 | movl(eax, 16368); |
218 | pinsrw(xmm2, eax, 3); |
219 | movl(ecx, 1054736384); |
220 | movdl(xmm7, ecx); |
221 | xorpd(xmm3, xmm3); |
222 | movl(edx, 30704); |
223 | pinsrw(xmm3, edx, 3); |
224 | movdqu(xmm1, xmm0); |
225 | movl(edx, 32768); |
226 | movdl(xmm4, edx); |
227 | movdqu(xmm5, ExternalAddress(HIGHSIGMASK)); //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL |
228 | pextrw(eax, xmm0, 3); |
229 | por(xmm0, xmm2); |
230 | movl(ecx, 16352); |
231 | psrlq(xmm0, 27); |
232 | movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL |
233 | psrld(xmm0, 2); |
234 | rcpps(xmm0, xmm0); |
235 | psllq(xmm1, 12); |
236 | pshufd(xmm6, xmm5, 78); |
237 | psrlq(xmm1, 12); |
238 | subl(eax, 16); |
239 | cmpl(eax, 32736); |
240 | jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); |
241 | |
242 | bind(L_2TAG_PACKET_1_0_2); |
243 | mulss(xmm0, xmm7); |
244 | por(xmm1, xmm3); |
245 | lea(r11, ExternalAddress(L_tbl)); |
246 | andpd(xmm5, xmm1); |
247 | paddd(xmm0, xmm4); |
248 | subsd(xmm1, xmm5); |
249 | movdl(edx, xmm0); |
250 | psllq(xmm0, 29); |
251 | andpd(xmm0, xmm6); |
252 | andl(eax, 32752); |
253 | subl(eax, ecx); |
254 | cvtsi2sdl(xmm7, eax); |
255 | mulpd(xmm5, xmm0); |
256 | mulsd(xmm1, xmm0); |
257 | movq(xmm6, ExternalAddress(log2)); //0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL |
258 | movdqu(xmm3, ExternalAddress(coeff)); //0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL |
259 | subsd(xmm5, xmm2); |
260 | andl(edx, 16711680); |
261 | shrl(edx, 12); |
262 | movdqu(xmm0, Address(r11, rdx, Address::times_1, -1504)); |
263 | movdqu(xmm4, ExternalAddress(16 + coeff)); //0x385593b1UL, 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL |
264 | addsd(xmm1, xmm5); |
265 | movdqu(xmm2, ExternalAddress(32 + coeff)); //0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL, 0xbff27af2UL |
266 | mulsd(xmm6, xmm7); |
267 | pshufd(xmm5, xmm1, 68); |
268 | mulsd(xmm7, ExternalAddress(8 + log2)); //0x1f12b358UL, 0x3cdfef31UL |
269 | mulsd(xmm3, xmm1); |
270 | addsd(xmm0, xmm6); |
271 | mulpd(xmm4, xmm5); |
272 | movq(xmm6, ExternalAddress(8 + LOG10_E)); //0xbf2e4108UL, 0x3f5a7a6cUL |
273 | mulpd(xmm5, xmm5); |
274 | addpd(xmm4, xmm2); |
275 | mulpd(xmm3, xmm5); |
276 | pshufd(xmm2, xmm0, 228); |
277 | addsd(xmm0, xmm1); |
278 | mulsd(xmm4, xmm1); |
279 | subsd(xmm2, xmm0); |
280 | mulsd(xmm6, xmm1); |
281 | addsd(xmm1, xmm2); |
282 | pshufd(xmm2, xmm0, 238); |
283 | mulsd(xmm5, xmm5); |
284 | addsd(xmm7, xmm2); |
285 | addsd(xmm1, xmm6); |
286 | addpd(xmm4, xmm3); |
287 | addsd(xmm1, xmm7); |
288 | mulpd(xmm4, xmm5); |
289 | addsd(xmm1, xmm4); |
290 | pshufd(xmm5, xmm4, 238); |
291 | addsd(xmm1, xmm5); |
292 | addsd(xmm0, xmm1); |
293 | jmp(B1_5); |
294 | |
295 | bind(L_2TAG_PACKET_0_0_2); |
296 | movq(xmm0, Address(rsp, 0)); |
297 | movq(xmm1, Address(rsp, 0)); |
298 | addl(eax, 16); |
299 | cmpl(eax, 32768); |
300 | jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2); |
301 | cmpl(eax, 16); |
302 | jcc(Assembler::below, L_2TAG_PACKET_3_0_2); |
303 | |
304 | bind(L_2TAG_PACKET_4_0_2); |
305 | addsd(xmm0, xmm0); |
306 | jmp(B1_5); |
307 | |
308 | bind(L_2TAG_PACKET_5_0_2); |
309 | jcc(Assembler::above, L_2TAG_PACKET_4_0_2); |
310 | cmpl(edx, 0); |
311 | jcc(Assembler::above, L_2TAG_PACKET_4_0_2); |
312 | jmp(L_2TAG_PACKET_6_0_2); |
313 | |
314 | bind(L_2TAG_PACKET_3_0_2); |
315 | xorpd(xmm1, xmm1); |
316 | addsd(xmm1, xmm0); |
317 | movdl(edx, xmm1); |
318 | psrlq(xmm1, 32); |
319 | movdl(ecx, xmm1); |
320 | orl(edx, ecx); |
321 | cmpl(edx, 0); |
322 | jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); |
323 | xorpd(xmm1, xmm1); |
324 | movl(eax, 18416); |
325 | pinsrw(xmm1, eax, 3); |
326 | mulsd(xmm0, xmm1); |
327 | xorpd(xmm2, xmm2); |
328 | movl(eax, 16368); |
329 | pinsrw(xmm2, eax, 3); |
330 | movdqu(xmm1, xmm0); |
331 | pextrw(eax, xmm0, 3); |
332 | por(xmm0, xmm2); |
333 | movl(ecx, 18416); |
334 | psrlq(xmm0, 27); |
335 | movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL |
336 | psrld(xmm0, 2); |
337 | rcpps(xmm0, xmm0); |
338 | psllq(xmm1, 12); |
339 | pshufd(xmm6, xmm5, 78); |
340 | psrlq(xmm1, 12); |
341 | jmp(L_2TAG_PACKET_1_0_2); |
342 | |
343 | bind(L_2TAG_PACKET_2_0_2); |
344 | movdl(edx, xmm1); |
345 | psrlq(xmm1, 32); |
346 | movdl(ecx, xmm1); |
347 | addl(ecx, ecx); |
348 | cmpl(ecx, -2097152); |
349 | jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2); |
350 | orl(edx, ecx); |
351 | cmpl(edx, 0); |
352 | jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); |
353 | |
354 | bind(L_2TAG_PACKET_6_0_2); |
355 | xorpd(xmm1, xmm1); |
356 | xorpd(xmm0, xmm0); |
357 | movl(eax, 32752); |
358 | pinsrw(xmm1, eax, 3); |
359 | mulsd(xmm0, xmm1); |
360 | movl(Address(rsp, 16), 9); |
361 | jmp(L_2TAG_PACKET_8_0_2); |
362 | |
363 | bind(L_2TAG_PACKET_7_0_2); |
364 | xorpd(xmm1, xmm1); |
365 | xorpd(xmm0, xmm0); |
366 | movl(eax, 49136); |
367 | pinsrw(xmm0, eax, 3); |
368 | divsd(xmm0, xmm1); |
369 | movl(Address(rsp, 16), 8); |
370 | |
371 | bind(L_2TAG_PACKET_8_0_2); |
372 | movq(Address(rsp, 8), xmm0); |
373 | |
374 | bind(B1_3); |
375 | movq(xmm0, Address(rsp, 8)); |
376 | |
377 | bind(L_2TAG_PACKET_9_0_2); |
378 | |
379 | bind(B1_5); |
380 | addq(rsp, 24); |
381 | |
382 | } |
383 | #else |
384 | // The 32 bit code is at most SSE2 compliant |
385 | ATTRIBUTE_ALIGNED(16) juint _static_const_table_log10[] = |
386 | { |
387 | 0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL, |
388 | 0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL, |
389 | 0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL, |
390 | 0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL, |
391 | 0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL, |
392 | 0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL, |
393 | 0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL, |
394 | 0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL, |
395 | 0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL, |
396 | 0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL, |
397 | 0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL, |
398 | 0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL, |
399 | 0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL, |
400 | 0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL, |
401 | 0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL, |
402 | 0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL, |
403 | 0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL, |
404 | 0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL, |
405 | 0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL, |
406 | 0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL, |
407 | 0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL, |
408 | 0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL, |
409 | 0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL, |
410 | 0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL, |
411 | 0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL, |
412 | 0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL, |
413 | 0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL, |
414 | 0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL, |
415 | 0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL, |
416 | 0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL, |
417 | 0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL, |
418 | 0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL, |
419 | 0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL, |
420 | 0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL, |
421 | 0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL, |
422 | 0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL, |
423 | 0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL, |
424 | 0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL, |
425 | 0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL, |
426 | 0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL, |
427 | 0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL, |
428 | 0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL, |
429 | 0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL, |
430 | 0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL, |
431 | 0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL, |
432 | 0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL, |
433 | 0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL, |
434 | 0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL, |
435 | 0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL, |
436 | 0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL, |
437 | 0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL, |
438 | 0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL, |
439 | 0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL, |
440 | 0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL, |
441 | 0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL, |
442 | 0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL, |
443 | 0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL, |
444 | 0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL, |
445 | 0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL, |
446 | 0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL, |
447 | 0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL, |
448 | 0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL, |
449 | 0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL, |
450 | 0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL, |
451 | 0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL, |
452 | 0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL, |
453 | 0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL, |
454 | 0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL, |
455 | 0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL, |
456 | 0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL, |
457 | 0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL, |
458 | 0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL, |
459 | 0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL, |
460 | 0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL, |
461 | 0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL, |
462 | 0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL, |
463 | 0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL, |
464 | 0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL, |
465 | 0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL, |
466 | 0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL, |
467 | 0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL, |
468 | 0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL, |
469 | 0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL, |
470 | 0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL, |
471 | 0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL, |
472 | 0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL, |
473 | 0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL, |
474 | 0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL, |
475 | 0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL, |
476 | 0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL, |
477 | 0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL, |
478 | 0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL, |
479 | 0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL, |
480 | 0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL, |
481 | 0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL, |
482 | 0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL, |
483 | 0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL, |
484 | 0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL, |
485 | 0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL, |
486 | 0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL, |
487 | 0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL, |
488 | 0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL, |
489 | 0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, |
490 | 0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL, |
491 | 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL, |
492 | 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, |
493 | 0xdc77b115UL, 0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL, |
494 | 0xffffe000UL, 0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL |
495 | }; |
496 | //registers, |
497 | // input: xmm0 |
498 | // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 |
499 | // rax, rdx, rcx, rbx (tmp) |
500 | |
501 | void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { |
502 | |
503 | Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; |
504 | Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; |
505 | Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, start; |
506 | |
507 | assert_different_registers(tmp, eax, ecx, edx); |
508 | |
509 | address static_const_table_log10 = (address)_static_const_table_log10; |
510 | |
511 | bind(start); |
512 | subl(rsp, 104); |
513 | movl(Address(rsp, 40), tmp); |
514 | lea(tmp, ExternalAddress(static_const_table_log10)); |
515 | xorpd(xmm2, xmm2); |
516 | movl(eax, 16368); |
517 | pinsrw(xmm2, eax, 3); |
518 | movl(ecx, 1054736384); |
519 | movdl(xmm7, ecx); |
520 | xorpd(xmm3, xmm3); |
521 | movl(edx, 30704); |
522 | pinsrw(xmm3, edx, 3); |
523 | movsd(xmm0, Address(rsp, 112)); |
524 | movdqu(xmm1, xmm0); |
525 | movl(edx, 32768); |
526 | movdl(xmm4, edx); |
527 | movdqu(xmm5, Address(tmp, 2128)); //0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL |
528 | pextrw(eax, xmm0, 3); |
529 | por(xmm0, xmm2); |
530 | movl(ecx, 16352); |
531 | psllq(xmm0, 5); |
532 | movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL |
533 | psrlq(xmm0, 34); |
534 | rcpss(xmm0, xmm0); |
535 | psllq(xmm1, 12); |
536 | pshufd(xmm6, xmm5, 78); |
537 | psrlq(xmm1, 12); |
538 | subl(eax, 16); |
539 | cmpl(eax, 32736); |
540 | jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); |
541 | |
542 | bind(L_2TAG_PACKET_1_0_2); |
543 | mulss(xmm0, xmm7); |
544 | por(xmm1, xmm3); |
545 | andpd(xmm5, xmm1); |
546 | paddd(xmm0, xmm4); |
547 | subsd(xmm1, xmm5); |
548 | movdl(edx, xmm0); |
549 | psllq(xmm0, 29); |
550 | andpd(xmm0, xmm6); |
551 | andl(eax, 32752); |
552 | subl(eax, ecx); |
553 | cvtsi2sdl(xmm7, eax); |
554 | mulpd(xmm5, xmm0); |
555 | mulsd(xmm1, xmm0); |
556 | movsd(xmm6, Address(tmp, 2064)); //0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL |
557 | movdqu(xmm3, Address(tmp, 2080)); //0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL |
558 | subsd(xmm5, xmm2); |
559 | andl(edx, 16711680); |
560 | shrl(edx, 12); |
561 | movdqu(xmm0, Address(tmp, edx, Address::times_1, -1504)); |
562 | movdqu(xmm4, Address(tmp, 2096)); //0x3cdfef31UL, 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL |
563 | addsd(xmm1, xmm5); |
564 | movdqu(xmm2, Address(tmp, 2112)); //0xc0089309UL, 0x385593b1UL, 0xc025c917UL, 0xdc963467UL |
565 | mulsd(xmm6, xmm7); |
566 | pshufd(xmm5, xmm1, 68); |
567 | mulsd(xmm7, Address(tmp, 2072)); //0x00000000UL, 0x00000000UL, 0x00000000UL, 0x509f7800UL |
568 | mulsd(xmm3, xmm1); |
569 | addsd(xmm0, xmm6); |
570 | mulpd(xmm4, xmm5); |
571 | movsd(xmm6, Address(tmp, 2152)); //0xffffffffUL, 0x00000000UL, 0xffffe000UL, 0x00000000UL |
572 | mulpd(xmm5, xmm5); |
573 | addpd(xmm4, xmm2); |
574 | mulpd(xmm3, xmm5); |
575 | pshufd(xmm2, xmm0, 228); |
576 | addsd(xmm0, xmm1); |
577 | mulsd(xmm4, xmm1); |
578 | subsd(xmm2, xmm0); |
579 | mulsd(xmm6, xmm1); |
580 | addsd(xmm1, xmm2); |
581 | pshufd(xmm2, xmm0, 238); |
582 | mulsd(xmm5, xmm5); |
583 | addsd(xmm7, xmm2); |
584 | addsd(xmm1, xmm6); |
585 | addpd(xmm4, xmm3); |
586 | addsd(xmm1, xmm7); |
587 | mulpd(xmm4, xmm5); |
588 | addsd(xmm1, xmm4); |
589 | pshufd(xmm5, xmm4, 238); |
590 | addsd(xmm1, xmm5); |
591 | addsd(xmm0, xmm1); |
592 | jmp(L_2TAG_PACKET_2_0_2); |
593 | |
594 | bind(L_2TAG_PACKET_0_0_2); |
595 | movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL |
596 | movdqu(xmm1, xmm0); |
597 | addl(eax, 16); |
598 | cmpl(eax, 32768); |
599 | jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2); |
600 | cmpl(eax, 16); |
601 | jcc(Assembler::below, L_2TAG_PACKET_4_0_2); |
602 | |
603 | bind(L_2TAG_PACKET_5_0_2); |
604 | addsd(xmm0, xmm0); |
605 | jmp(L_2TAG_PACKET_2_0_2); |
606 | |
607 | bind(L_2TAG_PACKET_6_0_2); |
608 | jcc(Assembler::above, L_2TAG_PACKET_5_0_2); |
609 | cmpl(edx, 0); |
610 | jcc(Assembler::above, L_2TAG_PACKET_5_0_2); |
611 | jmp(L_2TAG_PACKET_7_0_2); |
612 | |
613 | bind(L_2TAG_PACKET_3_0_2); |
614 | movdl(edx, xmm1); |
615 | psrlq(xmm1, 32); |
616 | movdl(ecx, xmm1); |
617 | addl(ecx, ecx); |
618 | cmpl(ecx, -2097152); |
619 | jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2); |
620 | orl(edx, ecx); |
621 | cmpl(edx, 0); |
622 | jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); |
623 | |
624 | bind(L_2TAG_PACKET_7_0_2); |
625 | xorpd(xmm1, xmm1); |
626 | xorpd(xmm0, xmm0); |
627 | movl(eax, 32752); |
628 | pinsrw(xmm1, eax, 3); |
629 | movl(edx, 9); |
630 | mulsd(xmm0, xmm1); |
631 | |
632 | bind(L_2TAG_PACKET_9_0_2); |
633 | movsd(Address(rsp, 0), xmm0); |
634 | movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL |
635 | fld_d(Address(rsp, 0)); |
636 | jmp(L_2TAG_PACKET_10_0_2); |
637 | |
638 | bind(L_2TAG_PACKET_8_0_2); |
639 | xorpd(xmm1, xmm1); |
640 | xorpd(xmm0, xmm0); |
641 | movl(eax, 49136); |
642 | pinsrw(xmm0, eax, 3); |
643 | divsd(xmm0, xmm1); |
644 | movl(edx, 8); |
645 | jmp(L_2TAG_PACKET_9_0_2); |
646 | |
647 | bind(L_2TAG_PACKET_4_0_2); |
648 | movdl(edx, xmm1); |
649 | psrlq(xmm1, 32); |
650 | movdl(ecx, xmm1); |
651 | orl(edx, ecx); |
652 | cmpl(edx, 0); |
653 | jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); |
654 | xorpd(xmm1, xmm1); |
655 | movl(eax, 18416); |
656 | pinsrw(xmm1, eax, 3); |
657 | mulsd(xmm0, xmm1); |
658 | xorpd(xmm2, xmm2); |
659 | movl(eax, 16368); |
660 | pinsrw(xmm2, eax, 3); |
661 | movdqu(xmm1, xmm0); |
662 | pextrw(eax, xmm0, 3); |
663 | por(xmm0, xmm2); |
664 | movl(ecx, 18416); |
665 | psllq(xmm0, 5); |
666 | movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL |
667 | psrlq(xmm0, 34); |
668 | rcpss(xmm0, xmm0); |
669 | psllq(xmm1, 12); |
670 | pshufd(xmm6, xmm5, 78); |
671 | psrlq(xmm1, 12); |
672 | jmp(L_2TAG_PACKET_1_0_2); |
673 | |
674 | bind(L_2TAG_PACKET_2_0_2); |
675 | movsd(Address(rsp, 24), xmm0); |
676 | fld_d(Address(rsp, 24)); |
677 | |
678 | bind(L_2TAG_PACKET_10_0_2); |
679 | movl(tmp, Address(rsp, 40)); |
680 | |
681 | } |
682 | #endif |
683 | |