1/*
2 Copyright (c) 2005-2019 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15*/
16
17// include system header to prevent standard library to be included under private=public first time
18#include <cstddef>
19#define private public
20#include "tbb/tbb_machine.h"
21#undef private
22#include "harness_assert.h"
23
24#if ( __TBB_x86_32 || __TBB_x86_64 ) && __TBB_CPU_CTL_ENV_PRESENT && !defined(__TBB_WIN32_USE_CL_BUILTINS)
25
26const int FE_TONEAREST = 0x0000,
27 FE_DOWNWARD = 0x0400,
28 FE_UPWARD = 0x0800,
29 FE_TOWARDZERO = 0x0c00,
30 FE_RND_MODE_MASK = FE_TOWARDZERO,
31 SSE_RND_MODE_MASK = FE_RND_MODE_MASK << 3,
32 SSE_DAZ = 0x0040,
33 SSE_FTZ = 0x8000,
34 SSE_MODE_MASK = SSE_DAZ | SSE_FTZ,
35 SSE_STATUS_MASK = 0x3F;
36
37const int NumSseModes = 4;
38const int SseModes[NumSseModes] = { 0, SSE_DAZ, SSE_FTZ, SSE_DAZ | SSE_FTZ };
39
40#if _WIN64 && !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE && !__MINGW64__
41// MinGW uses inline implementation from tbb/machine/linux_intel64.h
42// and when inline asm is not available, the library uses out of line assembly which is not exported
43// thus reimplementing them here
44
45#include <float.h>
46
47inline void __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* fe ) {
48 fe->x87cw = short(_control87(0, 0) & _MCW_RC) << 2;
49 fe->mxcsr = _mm_getcsr();
50}
51inline void __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* fe ) {
52 ASSERT( (fe->x87cw & FE_RND_MODE_MASK) == ((fe->x87cw & FE_RND_MODE_MASK) >> 2 & _MCW_RC) << 2, "Check float.h constants" );
53 _control87( (fe->x87cw & FE_RND_MODE_MASK) >> 6, _MCW_RC );
54 _mm_setcsr( fe->mxcsr );
55}
56
57#endif /* _WIN64 && !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE && !__MINGW64__ */
58
59inline int GetRoundingMode ( bool checkConsistency = true ) {
60 tbb::internal::cpu_ctl_env ctl;
61 ctl.get_env();
62 ASSERT( !checkConsistency || (ctl.mxcsr & SSE_RND_MODE_MASK) >> 3 == (ctl.x87cw & FE_RND_MODE_MASK), NULL );
63 return ctl.x87cw & FE_RND_MODE_MASK;
64}
65
66inline void SetRoundingMode ( int mode ) {
67 tbb::internal::cpu_ctl_env ctl;
68 ctl.get_env();
69 ctl.mxcsr = (ctl.mxcsr & ~SSE_RND_MODE_MASK) | (mode & FE_RND_MODE_MASK) << 3;
70 ctl.x87cw = short((ctl.x87cw & ~FE_RND_MODE_MASK) | (mode & FE_RND_MODE_MASK));
71 ctl.set_env();
72}
73
74inline int GetSseMode () {
75 tbb::internal::cpu_ctl_env ctl;
76 ctl.get_env();
77 return ctl.mxcsr & SSE_MODE_MASK;
78}
79
80inline void SetSseMode ( int mode ) {
81 tbb::internal::cpu_ctl_env ctl;
82 ctl.get_env();
83 ctl.mxcsr = (ctl.mxcsr & ~SSE_MODE_MASK) | (mode & SSE_MODE_MASK);
84 ctl.set_env();
85}
86
87#elif defined(_M_ARM) || defined(__TBB_WIN32_USE_CL_BUILTINS)
88const int NumSseModes = 1;
89const int SseModes[NumSseModes] = { 0 };
90
91inline int GetSseMode () { return 0; }
92inline void SetSseMode ( int ) {}
93
94const int FE_TONEAREST = _RC_NEAR,
95 FE_DOWNWARD = _RC_DOWN,
96 FE_UPWARD = _RC_UP,
97 FE_TOWARDZERO = _RC_CHOP;
98
99inline int GetRoundingMode ( bool = true ) {
100 tbb::internal::cpu_ctl_env ctl;
101 ctl.get_env();
102 return ctl.my_ctl;
103}
104inline void SetRoundingMode ( int mode ) {
105 tbb::internal::cpu_ctl_env ctl;
106 ctl.my_ctl = mode;
107 ctl.set_env();
108}
109
110#else /* Other archs */
111
112#include <fenv.h>
113
114const int RND_MODE_MASK = FE_TONEAREST | FE_DOWNWARD | FE_UPWARD | FE_TOWARDZERO;
115
116const int NumSseModes = 1;
117const int SseModes[NumSseModes] = { 0 };
118
119inline int GetRoundingMode ( bool = true ) { return fegetround(); }
120inline void SetRoundingMode ( int rnd ) { fesetround(rnd); }
121
122inline int GetSseMode () { return 0; }
123inline void SetSseMode ( int ) {}
124
125#endif /* Other archs */
126
127const int NumRoundingModes = 4;
128const int RoundingModes[NumRoundingModes] = { FE_TONEAREST, FE_DOWNWARD, FE_UPWARD, FE_TOWARDZERO };
129const int numFPModes = NumRoundingModes*NumSseModes;
130
131inline void SetFPMode( int mode ) {
132 SetRoundingMode( RoundingModes[mode/NumSseModes%NumRoundingModes] );
133 SetSseMode( SseModes[mode%NumSseModes] );
134}
135
136#define AssertFPMode( mode ) { \
137 ASSERT( GetRoundingMode() == RoundingModes[mode/NumSseModes%NumRoundingModes], "FPU control state has not been set correctly." ); \
138 ASSERT( GetSseMode() == SseModes[mode%NumSseModes], "SSE control state has not been set correctly." ); \
139}
140
141inline int SetNextFPMode( int mode, int step = 1 ) {
142 const int nextMode = (mode+step)%numFPModes;
143 SetFPMode( nextMode );
144 return nextMode;
145}
146
147class FPModeContext {
148 int origSse, origRounding;
149 int currentMode;
150public:
151 FPModeContext(int newMode) {
152 origSse = GetSseMode();
153 origRounding = GetRoundingMode();
154 SetFPMode(currentMode = newMode);
155 }
156 ~FPModeContext() {
157 assertFPMode();
158 SetRoundingMode(origRounding);
159 SetSseMode(origSse);
160 }
161 int setNextFPMode() {
162 assertFPMode();
163 return currentMode = SetNextFPMode(currentMode);
164 }
165 void assertFPMode() {
166 AssertFPMode(currentMode);
167 }
168};
169