1 | /* |
2 | Copyright (c) 2005-2019 Intel Corporation |
3 | |
4 | Licensed under the Apache License, Version 2.0 (the "License"); |
5 | you may not use this file except in compliance with the License. |
6 | You may obtain a copy of the License at |
7 | |
8 | http://www.apache.org/licenses/LICENSE-2.0 |
9 | |
10 | Unless required by applicable law or agreed to in writing, software |
11 | distributed under the License is distributed on an "AS IS" BASIS, |
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | See the License for the specific language governing permissions and |
14 | limitations under the License. |
15 | */ |
16 | |
17 | // include system header to prevent standard library to be included under private=public first time |
18 | #include <cstddef> |
19 | #define private public |
20 | #include "tbb/tbb_machine.h" |
21 | #undef private |
22 | #include "harness_assert.h" |
23 | |
24 | #if ( __TBB_x86_32 || __TBB_x86_64 ) && __TBB_CPU_CTL_ENV_PRESENT && !defined(__TBB_WIN32_USE_CL_BUILTINS) |
25 | |
26 | const int FE_TONEAREST = 0x0000, |
27 | FE_DOWNWARD = 0x0400, |
28 | FE_UPWARD = 0x0800, |
29 | FE_TOWARDZERO = 0x0c00, |
30 | FE_RND_MODE_MASK = FE_TOWARDZERO, |
31 | SSE_RND_MODE_MASK = FE_RND_MODE_MASK << 3, |
32 | SSE_DAZ = 0x0040, |
33 | SSE_FTZ = 0x8000, |
34 | SSE_MODE_MASK = SSE_DAZ | SSE_FTZ, |
35 | SSE_STATUS_MASK = 0x3F; |
36 | |
37 | const int NumSseModes = 4; |
38 | const int SseModes[NumSseModes] = { 0, SSE_DAZ, SSE_FTZ, SSE_DAZ | SSE_FTZ }; |
39 | |
40 | #if _WIN64 && !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE && !__MINGW64__ |
41 | // MinGW uses inline implementation from tbb/machine/linux_intel64.h |
42 | // and when inline asm is not available, the library uses out of line assembly which is not exported |
43 | // thus reimplementing them here |
44 | |
45 | #include <float.h> |
46 | |
47 | inline void __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* fe ) { |
48 | fe->x87cw = short(_control87(0, 0) & _MCW_RC) << 2; |
49 | fe->mxcsr = _mm_getcsr(); |
50 | } |
51 | inline void __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* fe ) { |
52 | ASSERT( (fe->x87cw & FE_RND_MODE_MASK) == ((fe->x87cw & FE_RND_MODE_MASK) >> 2 & _MCW_RC) << 2, "Check float.h constants" ); |
53 | _control87( (fe->x87cw & FE_RND_MODE_MASK) >> 6, _MCW_RC ); |
54 | _mm_setcsr( fe->mxcsr ); |
55 | } |
56 | |
57 | #endif /* _WIN64 && !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE && !__MINGW64__ */ |
58 | |
59 | inline int GetRoundingMode ( bool checkConsistency = true ) { |
60 | tbb::internal::cpu_ctl_env ctl; |
61 | ctl.get_env(); |
62 | ASSERT( !checkConsistency || (ctl.mxcsr & SSE_RND_MODE_MASK) >> 3 == (ctl.x87cw & FE_RND_MODE_MASK), NULL ); |
63 | return ctl.x87cw & FE_RND_MODE_MASK; |
64 | } |
65 | |
66 | inline void SetRoundingMode ( int mode ) { |
67 | tbb::internal::cpu_ctl_env ctl; |
68 | ctl.get_env(); |
69 | ctl.mxcsr = (ctl.mxcsr & ~SSE_RND_MODE_MASK) | (mode & FE_RND_MODE_MASK) << 3; |
70 | ctl.x87cw = short((ctl.x87cw & ~FE_RND_MODE_MASK) | (mode & FE_RND_MODE_MASK)); |
71 | ctl.set_env(); |
72 | } |
73 | |
74 | inline int GetSseMode () { |
75 | tbb::internal::cpu_ctl_env ctl; |
76 | ctl.get_env(); |
77 | return ctl.mxcsr & SSE_MODE_MASK; |
78 | } |
79 | |
80 | inline void SetSseMode ( int mode ) { |
81 | tbb::internal::cpu_ctl_env ctl; |
82 | ctl.get_env(); |
83 | ctl.mxcsr = (ctl.mxcsr & ~SSE_MODE_MASK) | (mode & SSE_MODE_MASK); |
84 | ctl.set_env(); |
85 | } |
86 | |
87 | #elif defined(_M_ARM) || defined(__TBB_WIN32_USE_CL_BUILTINS) |
88 | const int NumSseModes = 1; |
89 | const int SseModes[NumSseModes] = { 0 }; |
90 | |
91 | inline int GetSseMode () { return 0; } |
92 | inline void SetSseMode ( int ) {} |
93 | |
94 | const int FE_TONEAREST = _RC_NEAR, |
95 | FE_DOWNWARD = _RC_DOWN, |
96 | FE_UPWARD = _RC_UP, |
97 | FE_TOWARDZERO = _RC_CHOP; |
98 | |
99 | inline int GetRoundingMode ( bool = true ) { |
100 | tbb::internal::cpu_ctl_env ctl; |
101 | ctl.get_env(); |
102 | return ctl.my_ctl; |
103 | } |
104 | inline void SetRoundingMode ( int mode ) { |
105 | tbb::internal::cpu_ctl_env ctl; |
106 | ctl.my_ctl = mode; |
107 | ctl.set_env(); |
108 | } |
109 | |
110 | #else /* Other archs */ |
111 | |
112 | #include <fenv.h> |
113 | |
114 | const int RND_MODE_MASK = FE_TONEAREST | FE_DOWNWARD | FE_UPWARD | FE_TOWARDZERO; |
115 | |
116 | const int NumSseModes = 1; |
117 | const int SseModes[NumSseModes] = { 0 }; |
118 | |
119 | inline int GetRoundingMode ( bool = true ) { return fegetround(); } |
120 | inline void SetRoundingMode ( int rnd ) { fesetround(rnd); } |
121 | |
122 | inline int GetSseMode () { return 0; } |
123 | inline void SetSseMode ( int ) {} |
124 | |
125 | #endif /* Other archs */ |
126 | |
127 | const int NumRoundingModes = 4; |
128 | const int RoundingModes[NumRoundingModes] = { FE_TONEAREST, FE_DOWNWARD, FE_UPWARD, FE_TOWARDZERO }; |
129 | const int numFPModes = NumRoundingModes*NumSseModes; |
130 | |
131 | inline void SetFPMode( int mode ) { |
132 | SetRoundingMode( RoundingModes[mode/NumSseModes%NumRoundingModes] ); |
133 | SetSseMode( SseModes[mode%NumSseModes] ); |
134 | } |
135 | |
136 | #define AssertFPMode( mode ) { \ |
137 | ASSERT( GetRoundingMode() == RoundingModes[mode/NumSseModes%NumRoundingModes], "FPU control state has not been set correctly." ); \ |
138 | ASSERT( GetSseMode() == SseModes[mode%NumSseModes], "SSE control state has not been set correctly." ); \ |
139 | } |
140 | |
141 | inline int SetNextFPMode( int mode, int step = 1 ) { |
142 | const int nextMode = (mode+step)%numFPModes; |
143 | SetFPMode( nextMode ); |
144 | return nextMode; |
145 | } |
146 | |
147 | class FPModeContext { |
148 | int origSse, origRounding; |
149 | int currentMode; |
150 | public: |
151 | FPModeContext(int newMode) { |
152 | origSse = GetSseMode(); |
153 | origRounding = GetRoundingMode(); |
154 | SetFPMode(currentMode = newMode); |
155 | } |
156 | ~FPModeContext() { |
157 | assertFPMode(); |
158 | SetRoundingMode(origRounding); |
159 | SetSseMode(origSse); |
160 | } |
161 | int setNextFPMode() { |
162 | assertFPMode(); |
163 | return currentMode = SetNextFPMode(currentMode); |
164 | } |
165 | void assertFPMode() { |
166 | AssertFPMode(currentMode); |
167 | } |
168 | }; |
169 | |