1/*
2* Copyright (c) 2016, Intel Corporation.
3* Intel Math Library (LIBM) Source Code
4*
5* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6*
7* This code is free software; you can redistribute it and/or modify it
8* under the terms of the GNU General Public License version 2 only, as
9* published by the Free Software Foundation.
10*
11* This code is distributed in the hope that it will be useful, but WITHOUT
12* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14* version 2 for more details (a copy is included in the LICENSE file that
15* accompanied this code).
16*
17* You should have received a copy of the GNU General Public License version
18* 2 along with this work; if not, write to the Free Software Foundation,
19* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20*
21* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22* or visit www.oracle.com if you need additional information or have any
23* questions.
24*
25*/
26
27#include "precompiled.hpp"
28#include "asm/assembler.hpp"
29#include "asm/assembler.inline.hpp"
30#include "macroAssembler_x86.hpp"
31#include "runtime/stubRoutines.hpp"
32#include "utilities/globalDefinitions.hpp"
33
34/******************************************************************************/
35// ALGORITHM DESCRIPTION - EXP()
36// ---------------------
37//
38// Description:
39// Let K = 64 (table size).
40// x x/log(2) n
41// e = 2 = 2 * T[j] * (1 + P(y))
42// where
43// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K]
44// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2]
45// j/K
46// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
47//
48// P(y) is a minimax polynomial approximation of exp(x)-1
49// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
50//
51// To avoid problems with arithmetic overflow and underflow,
52// n n1 n2
53// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
54// where BIAS is a value of exponent bias.
55//
56// Special cases:
57// exp(NaN) = NaN
58// exp(+INF) = +INF
59// exp(-INF) = 0
60// exp(x) = 1 for subnormals
61// for finite argument, only exp(0)=1 is exact
62// For IEEE double
63// if x > 709.782712893383973096 then exp(x) overflow
64// if x < -745.133219101941108420 then exp(x) underflow
65//
66/******************************************************************************/
67
68#ifdef _LP64
69// The 64 bit code is at most SSE2 compliant
70ATTRIBUTE_ALIGNED(16) juint _cv[] =
71{
72 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL,
73 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL,
74 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL,
75 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL,
76 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
77};
78
79ATTRIBUTE_ALIGNED(16) juint _shifter[] =
80{
81 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
82};
83
84ATTRIBUTE_ALIGNED(16) juint _mmask[] =
85{
86 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
87};
88
89ATTRIBUTE_ALIGNED(16) juint _bias[] =
90{
91 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
92};
93
94ATTRIBUTE_ALIGNED(16) juint _Tbl_addr[] =
95{
96 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
97 0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
98 0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
99 0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
100 0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
101 0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
102 0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
103 0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
104 0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
105 0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
106 0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
107 0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
108 0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
109 0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
110 0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
111 0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
112 0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
113 0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
114 0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
115 0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
116 0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
117 0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
118 0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
119 0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
120 0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
121 0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
122 0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
123 0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
124 0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
125 0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
126 0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
127 0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
128 0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
129 0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
130 0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
131 0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
132 0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
133 0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
134 0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
135 0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
136 0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
137 0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
138 0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
139 0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
140 0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
141 0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
142 0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
143 0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
144 0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
145 0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
146 0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
147 0x000fa7c1UL
148};
149
150ATTRIBUTE_ALIGNED(16) juint _ALLONES[] =
151{
152 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
153};
154
155ATTRIBUTE_ALIGNED(16) juint _ebias[] =
156{
157 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
158};
159
160ATTRIBUTE_ALIGNED(4) juint _XMAX[] =
161{
162 0xffffffffUL, 0x7fefffffUL
163};
164
165ATTRIBUTE_ALIGNED(4) juint _XMIN[] =
166{
167 0x00000000UL, 0x00100000UL
168};
169
170ATTRIBUTE_ALIGNED(4) juint _INF[] =
171{
172 0x00000000UL, 0x7ff00000UL
173};
174
175ATTRIBUTE_ALIGNED(4) juint _ZERO[] =
176{
177 0x00000000UL, 0x00000000UL
178};
179
180ATTRIBUTE_ALIGNED(4) juint _ONE_val[] =
181{
182 0x00000000UL, 0x3ff00000UL
183};
184
185
186// Registers:
187// input: xmm0
188// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
189// rax, rdx, rcx, tmp - r11
190
191// Code generated by Intel C compiler for LIBM library
192
193void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
194 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
195 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
196 Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
197 Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start;
198
199 assert_different_registers(tmp, eax, ecx, edx);
200 jmp(start);
201 address cv = (address)_cv;
202 address Shifter = (address)_shifter;
203 address mmask = (address)_mmask;
204 address bias = (address)_bias;
205 address Tbl_addr = (address)_Tbl_addr;
206 address ALLONES = (address)_ALLONES;
207 address ebias = (address)_ebias;
208 address XMAX = (address)_XMAX;
209 address XMIN = (address)_XMIN;
210 address INF = (address)_INF;
211 address ZERO = (address)_ZERO;
212 address ONE_val = (address)_ONE_val;
213
214 bind(start);
215 subq(rsp, 24);
216 movsd(Address(rsp, 8), xmm0);
217 unpcklpd(xmm0, xmm0);
218 movdqu(xmm1, ExternalAddress(cv)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
219 movdqu(xmm6, ExternalAddress(Shifter)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
220 movdqu(xmm2, ExternalAddress(16 + cv)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
221 movdqu(xmm3, ExternalAddress(32 + cv)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
222 pextrw(eax, xmm0, 3);
223 andl(eax, 32767);
224 movl(edx, 16527);
225 subl(edx, eax);
226 subl(eax, 15504);
227 orl(edx, eax);
228 cmpl(edx, INT_MIN);
229 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
230 mulpd(xmm1, xmm0);
231 addpd(xmm1, xmm6);
232 movapd(xmm7, xmm1);
233 subpd(xmm1, xmm6);
234 mulpd(xmm2, xmm1);
235 movdqu(xmm4, ExternalAddress(64 + cv)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
236 mulpd(xmm3, xmm1);
237 movdqu(xmm5, ExternalAddress(80 + cv)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
238 subpd(xmm0, xmm2);
239 movdl(eax, xmm7);
240 movl(ecx, eax);
241 andl(ecx, 63);
242 shll(ecx, 4);
243 sarl(eax, 6);
244 movl(edx, eax);
245 movdqu(xmm6, ExternalAddress(mmask)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
246 pand(xmm7, xmm6);
247 movdqu(xmm6, ExternalAddress(bias)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
248 paddq(xmm7, xmm6);
249 psllq(xmm7, 46);
250 subpd(xmm0, xmm3);
251 lea(tmp, ExternalAddress(Tbl_addr));
252 movdqu(xmm2, Address(ecx, tmp));
253 mulpd(xmm4, xmm0);
254 movapd(xmm6, xmm0);
255 movapd(xmm1, xmm0);
256 mulpd(xmm6, xmm6);
257 mulpd(xmm0, xmm6);
258 addpd(xmm5, xmm4);
259 mulsd(xmm0, xmm6);
260 mulpd(xmm6, ExternalAddress(48 + cv)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
261 addsd(xmm1, xmm2);
262 unpckhpd(xmm2, xmm2);
263 mulpd(xmm0, xmm5);
264 addsd(xmm1, xmm0);
265 por(xmm2, xmm7);
266 unpckhpd(xmm0, xmm0);
267 addsd(xmm0, xmm1);
268 addsd(xmm0, xmm6);
269 addl(edx, 894);
270 cmpl(edx, 1916);
271 jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
272 mulsd(xmm0, xmm2);
273 addsd(xmm0, xmm2);
274 jmp(B1_5);
275
276 bind(L_2TAG_PACKET_1_0_2);
277 xorpd(xmm3, xmm3);
278 movdqu(xmm4, ExternalAddress(ALLONES)); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
279 movl(edx, -1022);
280 subl(edx, eax);
281 movdl(xmm5, edx);
282 psllq(xmm4, xmm5);
283 movl(ecx, eax);
284 sarl(eax, 1);
285 pinsrw(xmm3, eax, 3);
286 movdqu(xmm6, ExternalAddress(ebias)); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
287 psllq(xmm3, 4);
288 psubd(xmm2, xmm3);
289 mulsd(xmm0, xmm2);
290 cmpl(edx, 52);
291 jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
292 pand(xmm4, xmm2);
293 paddd(xmm3, xmm6);
294 subsd(xmm2, xmm4);
295 addsd(xmm0, xmm2);
296 cmpl(ecx, 1023);
297 jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
298 pextrw(ecx, xmm0, 3);
299 andl(ecx, 32768);
300 orl(edx, ecx);
301 cmpl(edx, 0);
302 jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
303 movapd(xmm6, xmm0);
304 addsd(xmm0, xmm4);
305 mulsd(xmm0, xmm3);
306 pextrw(ecx, xmm0, 3);
307 andl(ecx, 32752);
308 cmpl(ecx, 0);
309 jcc(Assembler::equal, L_2TAG_PACKET_5_0_2);
310 jmp(B1_5);
311
312 bind(L_2TAG_PACKET_5_0_2);
313 mulsd(xmm6, xmm3);
314 mulsd(xmm4, xmm3);
315 movdqu(xmm0, xmm6);
316 pxor(xmm6, xmm4);
317 psrad(xmm6, 31);
318 pshufd(xmm6, xmm6, 85);
319 psllq(xmm0, 1);
320 psrlq(xmm0, 1);
321 pxor(xmm0, xmm6);
322 psrlq(xmm6, 63);
323 paddq(xmm0, xmm6);
324 paddq(xmm0, xmm4);
325 movl(Address(rsp, 0), 15);
326 jmp(L_2TAG_PACKET_6_0_2);
327
328 bind(L_2TAG_PACKET_4_0_2);
329 addsd(xmm0, xmm4);
330 mulsd(xmm0, xmm3);
331 jmp(B1_5);
332
333 bind(L_2TAG_PACKET_3_0_2);
334 addsd(xmm0, xmm4);
335 mulsd(xmm0, xmm3);
336 pextrw(ecx, xmm0, 3);
337 andl(ecx, 32752);
338 cmpl(ecx, 32752);
339 jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
340 jmp(B1_5);
341
342 bind(L_2TAG_PACKET_2_0_2);
343 paddd(xmm3, xmm6);
344 addpd(xmm0, xmm2);
345 mulsd(xmm0, xmm3);
346 movl(Address(rsp, 0), 15);
347 jmp(L_2TAG_PACKET_6_0_2);
348
349 bind(L_2TAG_PACKET_8_0_2);
350 cmpl(eax, 2146435072);
351 jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
352 movl(eax, Address(rsp, 12));
353 cmpl(eax, INT_MIN);
354 jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2);
355 movsd(xmm0, ExternalAddress(XMAX)); // 0xffffffffUL, 0x7fefffffUL
356 mulsd(xmm0, xmm0);
357
358 bind(L_2TAG_PACKET_7_0_2);
359 movl(Address(rsp, 0), 14);
360 jmp(L_2TAG_PACKET_6_0_2);
361
362 bind(L_2TAG_PACKET_10_0_2);
363 movsd(xmm0, ExternalAddress(XMIN)); // 0x00000000UL, 0x00100000UL
364 mulsd(xmm0, xmm0);
365 movl(Address(rsp, 0), 15);
366 jmp(L_2TAG_PACKET_6_0_2);
367
368 bind(L_2TAG_PACKET_9_0_2);
369 movl(edx, Address(rsp, 8));
370 cmpl(eax, 2146435072);
371 jcc(Assembler::above, L_2TAG_PACKET_11_0_2);
372 cmpl(edx, 0);
373 jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
374 movl(eax, Address(rsp, 12));
375 cmpl(eax, 2146435072);
376 jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2);
377 movsd(xmm0, ExternalAddress(INF)); // 0x00000000UL, 0x7ff00000UL
378 jmp(B1_5);
379
380 bind(L_2TAG_PACKET_12_0_2);
381 movsd(xmm0, ExternalAddress(ZERO)); // 0x00000000UL, 0x00000000UL
382 jmp(B1_5);
383
384 bind(L_2TAG_PACKET_11_0_2);
385 movsd(xmm0, Address(rsp, 8));
386 addsd(xmm0, xmm0);
387 jmp(B1_5);
388
389 bind(L_2TAG_PACKET_0_0_2);
390 movl(eax, Address(rsp, 12));
391 andl(eax, 2147483647);
392 cmpl(eax, 1083179008);
393 jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
394 movsd(Address(rsp, 8), xmm0);
395 addsd(xmm0, ExternalAddress(ONE_val)); // 0x00000000UL, 0x3ff00000UL
396 jmp(B1_5);
397
398 bind(L_2TAG_PACKET_6_0_2);
399 movq(Address(rsp, 16), xmm0);
400
401 bind(B1_3);
402 movq(xmm0, Address(rsp, 16));
403
404 bind(B1_5);
405 addq(rsp, 24);
406}
407#else
408// The 32 bit code is at most SSE2 compliant
409ATTRIBUTE_ALIGNED(16) juint _static_const_table[] =
410{
411 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL,
412 0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL,
413 0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL,
414 0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL,
415 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL,
416 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL,
417 0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL,
418 0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL,
419 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
420 0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
421 0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
422 0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
423 0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
424 0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
425 0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
426 0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
427 0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
428 0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
429 0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
430 0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
431 0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
432 0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
433 0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
434 0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
435 0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
436 0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
437 0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
438 0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
439 0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
440 0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
441 0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
442 0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
443 0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
444 0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
445 0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
446 0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
447 0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
448 0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
449 0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
450 0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
451 0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
452 0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
453 0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
454 0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
455 0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
456 0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
457 0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
458 0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
459 0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
460 0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
461 0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
462 0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
463 0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
464 0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
465 0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
466 0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
467 0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
468 0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
469 0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
470 0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL,
471 0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL,
472 0x00100000UL
473};
474
475//registers,
476// input: (rbp + 8)
477// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
478// rax, rdx, rcx, rbx (tmp)
479
480// Code generated by Intel C compiler for LIBM library
481
482void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
483 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
484 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
485 Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
486 Label L_2TAG_PACKET_12_0_2, start;
487
488 assert_different_registers(tmp, eax, ecx, edx);
489 jmp(start);
490 address static_const_table = (address)_static_const_table;
491
492 bind(start);
493 subl(rsp, 120);
494 movl(Address(rsp, 64), tmp);
495 lea(tmp, ExternalAddress(static_const_table));
496 movdqu(xmm0, Address(rsp, 128));
497 unpcklpd(xmm0, xmm0);
498 movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
499 movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
500 movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
501 movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
502 pextrw(eax, xmm0, 3);
503 andl(eax, 32767);
504 movl(edx, 16527);
505 subl(edx, eax);
506 subl(eax, 15504);
507 orl(edx, eax);
508 cmpl(edx, INT_MIN);
509 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
510 mulpd(xmm1, xmm0);
511 addpd(xmm1, xmm6);
512 movapd(xmm7, xmm1);
513 subpd(xmm1, xmm6);
514 mulpd(xmm2, xmm1);
515 movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
516 mulpd(xmm3, xmm1);
517 movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
518 subpd(xmm0, xmm2);
519 movdl(eax, xmm7);
520 movl(ecx, eax);
521 andl(ecx, 63);
522 shll(ecx, 4);
523 sarl(eax, 6);
524 movl(edx, eax);
525 movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
526 pand(xmm7, xmm6);
527 movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
528 paddq(xmm7, xmm6);
529 psllq(xmm7, 46);
530 subpd(xmm0, xmm3);
531 movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160));
532 mulpd(xmm4, xmm0);
533 movapd(xmm6, xmm0);
534 movapd(xmm1, xmm0);
535 mulpd(xmm6, xmm6);
536 mulpd(xmm0, xmm6);
537 addpd(xmm5, xmm4);
538 mulsd(xmm0, xmm6);
539 mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
540 addsd(xmm1, xmm2);
541 unpckhpd(xmm2, xmm2);
542 mulpd(xmm0, xmm5);
543 addsd(xmm1, xmm0);
544 por(xmm2, xmm7);
545 unpckhpd(xmm0, xmm0);
546 addsd(xmm0, xmm1);
547 addsd(xmm0, xmm6);
548 addl(edx, 894);
549 cmpl(edx, 1916);
550 jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
551 mulsd(xmm0, xmm2);
552 addsd(xmm0, xmm2);
553 jmp(L_2TAG_PACKET_2_0_2);
554
555 bind(L_2TAG_PACKET_1_0_2);
556 fnstcw(Address(rsp, 24));
557 movzwl(edx, Address(rsp, 24));
558 orl(edx, 768);
559 movw(Address(rsp, 28), edx);
560 fldcw(Address(rsp, 28));
561 movl(edx, eax);
562 sarl(eax, 1);
563 subl(edx, eax);
564 movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL
565 pandn(xmm6, xmm2);
566 addl(eax, 1023);
567 movdl(xmm3, eax);
568 psllq(xmm3, 52);
569 por(xmm6, xmm3);
570 addl(edx, 1023);
571 movdl(xmm4, edx);
572 psllq(xmm4, 52);
573 movsd(Address(rsp, 8), xmm0);
574 fld_d(Address(rsp, 8));
575 movsd(Address(rsp, 16), xmm6);
576 fld_d(Address(rsp, 16));
577 fmula(1);
578 faddp(1);
579 movsd(Address(rsp, 8), xmm4);
580 fld_d(Address(rsp, 8));
581 fmulp(1);
582 fstp_d(Address(rsp, 8));
583 movsd(xmm0, Address(rsp, 8));
584 fldcw(Address(rsp, 24));
585 pextrw(ecx, xmm0, 3);
586 andl(ecx, 32752);
587 cmpl(ecx, 32752);
588 jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
589 cmpl(ecx, 0);
590 jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
591 jmp(L_2TAG_PACKET_2_0_2);
592 cmpl(ecx, INT_MIN);
593 jcc(Assembler::less, L_2TAG_PACKET_3_0_2);
594 cmpl(ecx, -1064950997);
595 jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
596 jcc(Assembler::greater, L_2TAG_PACKET_4_0_2);
597 movl(edx, Address(rsp, 128));
598 cmpl(edx, -17155601);
599 jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
600 jmp(L_2TAG_PACKET_4_0_2);
601
602 bind(L_2TAG_PACKET_3_0_2);
603 movl(edx, 14);
604 jmp(L_2TAG_PACKET_5_0_2);
605
606 bind(L_2TAG_PACKET_4_0_2);
607 movl(edx, 15);
608
609 bind(L_2TAG_PACKET_5_0_2);
610 movsd(Address(rsp, 0), xmm0);
611 movsd(xmm0, Address(rsp, 128));
612 fld_d(Address(rsp, 0));
613 jmp(L_2TAG_PACKET_6_0_2);
614
615 bind(L_2TAG_PACKET_7_0_2);
616 cmpl(eax, 2146435072);
617 jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2);
618 movl(eax, Address(rsp, 132));
619 cmpl(eax, INT_MIN);
620 jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2);
621 movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL
622 mulsd(xmm0, xmm0);
623 movl(edx, 14);
624 jmp(L_2TAG_PACKET_5_0_2);
625
626 bind(L_2TAG_PACKET_9_0_2);
627 movsd(xmm0, Address(tmp, 1216));
628 mulsd(xmm0, xmm0);
629 movl(edx, 15);
630 jmp(L_2TAG_PACKET_5_0_2);
631
632 bind(L_2TAG_PACKET_8_0_2);
633 movl(edx, Address(rsp, 128));
634 cmpl(eax, 2146435072);
635 jcc(Assembler::above, L_2TAG_PACKET_10_0_2);
636 cmpl(edx, 0);
637 jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2);
638 movl(eax, Address(rsp, 132));
639 cmpl(eax, 2146435072);
640 jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
641 movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL
642 jmp(L_2TAG_PACKET_2_0_2);
643
644 bind(L_2TAG_PACKET_11_0_2);
645 movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL
646 jmp(L_2TAG_PACKET_2_0_2);
647
648 bind(L_2TAG_PACKET_10_0_2);
649 movsd(xmm0, Address(rsp, 128));
650 addsd(xmm0, xmm0);
651 jmp(L_2TAG_PACKET_2_0_2);
652
653 bind(L_2TAG_PACKET_0_0_2);
654 movl(eax, Address(rsp, 132));
655 andl(eax, 2147483647);
656 cmpl(eax, 1083179008);
657 jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
658 movsd(xmm0, Address(rsp, 128));
659 addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL
660 jmp(L_2TAG_PACKET_2_0_2);
661
662 bind(L_2TAG_PACKET_2_0_2);
663 movsd(Address(rsp, 48), xmm0);
664 fld_d(Address(rsp, 48));
665
666 bind(L_2TAG_PACKET_6_0_2);
667 movl(tmp, Address(rsp, 64));
668}
669#endif
670