| 1 | /* |
| 2 | Copyright (c) 2005-2019 Intel Corporation |
| 3 | |
| 4 | Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | you may not use this file except in compliance with the License. |
| 6 | You may obtain a copy of the License at |
| 7 | |
| 8 | http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | |
| 10 | Unless required by applicable law or agreed to in writing, software |
| 11 | distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | See the License for the specific language governing permissions and |
| 14 | limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | // include system header to prevent standard library to be included under private=public first time |
| 18 | #include <cstddef> |
| 19 | #define private public |
| 20 | #include "tbb/tbb_machine.h" |
| 21 | #undef private |
| 22 | #include "harness_assert.h" |
| 23 | |
| 24 | #if ( __TBB_x86_32 || __TBB_x86_64 ) && __TBB_CPU_CTL_ENV_PRESENT && !defined(__TBB_WIN32_USE_CL_BUILTINS) |
| 25 | |
| 26 | const int FE_TONEAREST = 0x0000, |
| 27 | FE_DOWNWARD = 0x0400, |
| 28 | FE_UPWARD = 0x0800, |
| 29 | FE_TOWARDZERO = 0x0c00, |
| 30 | FE_RND_MODE_MASK = FE_TOWARDZERO, |
| 31 | SSE_RND_MODE_MASK = FE_RND_MODE_MASK << 3, |
| 32 | SSE_DAZ = 0x0040, |
| 33 | SSE_FTZ = 0x8000, |
| 34 | SSE_MODE_MASK = SSE_DAZ | SSE_FTZ, |
| 35 | SSE_STATUS_MASK = 0x3F; |
| 36 | |
| 37 | const int NumSseModes = 4; |
| 38 | const int SseModes[NumSseModes] = { 0, SSE_DAZ, SSE_FTZ, SSE_DAZ | SSE_FTZ }; |
| 39 | |
| 40 | #if _WIN64 && !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE && !__MINGW64__ |
| 41 | // MinGW uses inline implementation from tbb/machine/linux_intel64.h |
| 42 | // and when inline asm is not available, the library uses out of line assembly which is not exported |
| 43 | // thus reimplementing them here |
| 44 | |
| 45 | #include <float.h> |
| 46 | |
| 47 | inline void __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* fe ) { |
| 48 | fe->x87cw = short(_control87(0, 0) & _MCW_RC) << 2; |
| 49 | fe->mxcsr = _mm_getcsr(); |
| 50 | } |
| 51 | inline void __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* fe ) { |
| 52 | ASSERT( (fe->x87cw & FE_RND_MODE_MASK) == ((fe->x87cw & FE_RND_MODE_MASK) >> 2 & _MCW_RC) << 2, "Check float.h constants" ); |
| 53 | _control87( (fe->x87cw & FE_RND_MODE_MASK) >> 6, _MCW_RC ); |
| 54 | _mm_setcsr( fe->mxcsr ); |
| 55 | } |
| 56 | |
| 57 | #endif /* _WIN64 && !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE && !__MINGW64__ */ |
| 58 | |
| 59 | inline int GetRoundingMode ( bool checkConsistency = true ) { |
| 60 | tbb::internal::cpu_ctl_env ctl; |
| 61 | ctl.get_env(); |
| 62 | ASSERT( !checkConsistency || (ctl.mxcsr & SSE_RND_MODE_MASK) >> 3 == (ctl.x87cw & FE_RND_MODE_MASK), NULL ); |
| 63 | return ctl.x87cw & FE_RND_MODE_MASK; |
| 64 | } |
| 65 | |
| 66 | inline void SetRoundingMode ( int mode ) { |
| 67 | tbb::internal::cpu_ctl_env ctl; |
| 68 | ctl.get_env(); |
| 69 | ctl.mxcsr = (ctl.mxcsr & ~SSE_RND_MODE_MASK) | (mode & FE_RND_MODE_MASK) << 3; |
| 70 | ctl.x87cw = short((ctl.x87cw & ~FE_RND_MODE_MASK) | (mode & FE_RND_MODE_MASK)); |
| 71 | ctl.set_env(); |
| 72 | } |
| 73 | |
| 74 | inline int GetSseMode () { |
| 75 | tbb::internal::cpu_ctl_env ctl; |
| 76 | ctl.get_env(); |
| 77 | return ctl.mxcsr & SSE_MODE_MASK; |
| 78 | } |
| 79 | |
| 80 | inline void SetSseMode ( int mode ) { |
| 81 | tbb::internal::cpu_ctl_env ctl; |
| 82 | ctl.get_env(); |
| 83 | ctl.mxcsr = (ctl.mxcsr & ~SSE_MODE_MASK) | (mode & SSE_MODE_MASK); |
| 84 | ctl.set_env(); |
| 85 | } |
| 86 | |
| 87 | #elif defined(_M_ARM) || defined(__TBB_WIN32_USE_CL_BUILTINS) |
| 88 | const int NumSseModes = 1; |
| 89 | const int SseModes[NumSseModes] = { 0 }; |
| 90 | |
| 91 | inline int GetSseMode () { return 0; } |
| 92 | inline void SetSseMode ( int ) {} |
| 93 | |
| 94 | const int FE_TONEAREST = _RC_NEAR, |
| 95 | FE_DOWNWARD = _RC_DOWN, |
| 96 | FE_UPWARD = _RC_UP, |
| 97 | FE_TOWARDZERO = _RC_CHOP; |
| 98 | |
| 99 | inline int GetRoundingMode ( bool = true ) { |
| 100 | tbb::internal::cpu_ctl_env ctl; |
| 101 | ctl.get_env(); |
| 102 | return ctl.my_ctl; |
| 103 | } |
| 104 | inline void SetRoundingMode ( int mode ) { |
| 105 | tbb::internal::cpu_ctl_env ctl; |
| 106 | ctl.my_ctl = mode; |
| 107 | ctl.set_env(); |
| 108 | } |
| 109 | |
| 110 | #else /* Other archs */ |
| 111 | |
| 112 | #include <fenv.h> |
| 113 | |
| 114 | const int RND_MODE_MASK = FE_TONEAREST | FE_DOWNWARD | FE_UPWARD | FE_TOWARDZERO; |
| 115 | |
| 116 | const int NumSseModes = 1; |
| 117 | const int SseModes[NumSseModes] = { 0 }; |
| 118 | |
| 119 | inline int GetRoundingMode ( bool = true ) { return fegetround(); } |
| 120 | inline void SetRoundingMode ( int rnd ) { fesetround(rnd); } |
| 121 | |
| 122 | inline int GetSseMode () { return 0; } |
| 123 | inline void SetSseMode ( int ) {} |
| 124 | |
| 125 | #endif /* Other archs */ |
| 126 | |
| 127 | const int NumRoundingModes = 4; |
| 128 | const int RoundingModes[NumRoundingModes] = { FE_TONEAREST, FE_DOWNWARD, FE_UPWARD, FE_TOWARDZERO }; |
| 129 | const int numFPModes = NumRoundingModes*NumSseModes; |
| 130 | |
| 131 | inline void SetFPMode( int mode ) { |
| 132 | SetRoundingMode( RoundingModes[mode/NumSseModes%NumRoundingModes] ); |
| 133 | SetSseMode( SseModes[mode%NumSseModes] ); |
| 134 | } |
| 135 | |
| 136 | #define AssertFPMode( mode ) { \ |
| 137 | ASSERT( GetRoundingMode() == RoundingModes[mode/NumSseModes%NumRoundingModes], "FPU control state has not been set correctly." ); \ |
| 138 | ASSERT( GetSseMode() == SseModes[mode%NumSseModes], "SSE control state has not been set correctly." ); \ |
| 139 | } |
| 140 | |
| 141 | inline int SetNextFPMode( int mode, int step = 1 ) { |
| 142 | const int nextMode = (mode+step)%numFPModes; |
| 143 | SetFPMode( nextMode ); |
| 144 | return nextMode; |
| 145 | } |
| 146 | |
| 147 | class FPModeContext { |
| 148 | int origSse, origRounding; |
| 149 | int currentMode; |
| 150 | public: |
| 151 | FPModeContext(int newMode) { |
| 152 | origSse = GetSseMode(); |
| 153 | origRounding = GetRoundingMode(); |
| 154 | SetFPMode(currentMode = newMode); |
| 155 | } |
| 156 | ~FPModeContext() { |
| 157 | assertFPMode(); |
| 158 | SetRoundingMode(origRounding); |
| 159 | SetSseMode(origSse); |
| 160 | } |
| 161 | int setNextFPMode() { |
| 162 | assertFPMode(); |
| 163 | return currentMode = SetNextFPMode(currentMode); |
| 164 | } |
| 165 | void assertFPMode() { |
| 166 | AssertFPMode(currentMode); |
| 167 | } |
| 168 | }; |
| 169 | |