| 1 | #pragma once |
| 2 | |
| 3 | #include "ConvectionKernels_Util.h" |
| 4 | |
| 5 | namespace cvtt |
| 6 | { |
| 7 | namespace Internal |
| 8 | { |
| 9 | template<int TVectorSize> |
| 10 | class UnfinishedEndpoints |
| 11 | { |
| 12 | public: |
| 13 | typedef ParallelMath::Float MFloat; |
| 14 | typedef ParallelMath::UInt16 MUInt16; |
| 15 | typedef ParallelMath::UInt15 MUInt15; |
| 16 | typedef ParallelMath::SInt16 MSInt16; |
| 17 | typedef ParallelMath::SInt32 MSInt32; |
| 18 | |
| 19 | UnfinishedEndpoints() |
| 20 | { |
| 21 | } |
| 22 | |
| 23 | UnfinishedEndpoints(const MFloat *base, const MFloat *offset) |
| 24 | { |
| 25 | for (int ch = 0; ch < TVectorSize; ch++) |
| 26 | m_base[ch] = base[ch]; |
| 27 | for (int ch = 0; ch < TVectorSize; ch++) |
| 28 | m_offset[ch] = offset[ch]; |
| 29 | } |
| 30 | |
| 31 | UnfinishedEndpoints(const UnfinishedEndpoints& other) |
| 32 | { |
| 33 | for (int ch = 0; ch < TVectorSize; ch++) |
| 34 | m_base[ch] = other.m_base[ch]; |
| 35 | for (int ch = 0; ch < TVectorSize; ch++) |
| 36 | m_offset[ch] = other.m_offset[ch]; |
| 37 | } |
| 38 | |
| 39 | void FinishHDRUnsigned(int tweak, int range, MSInt16 *outEP0, MSInt16 *outEP1, ParallelMath::RoundTowardNearestForScope *roundingMode) |
| 40 | { |
| 41 | float tweakFactors[2]; |
| 42 | Util::ComputeTweakFactors(tweak, range, tweakFactors); |
| 43 | |
| 44 | for (int ch = 0; ch < TVectorSize; ch++) |
| 45 | { |
| 46 | MUInt15 channelEPs[2]; |
| 47 | for (int epi = 0; epi < 2; epi++) |
| 48 | { |
| 49 | MFloat f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[epi], 0.0f, 31743.0f); |
| 50 | channelEPs[epi] = ParallelMath::RoundAndConvertToU15(f, roundingMode); |
| 51 | } |
| 52 | |
| 53 | outEP0[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(channelEPs[0]); |
| 54 | outEP1[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(channelEPs[1]); |
| 55 | } |
| 56 | } |
| 57 | |
| 58 | void FinishHDRSigned(int tweak, int range, MSInt16* outEP0, MSInt16* outEP1, ParallelMath::RoundTowardNearestForScope* roundingMode) |
| 59 | { |
| 60 | float tweakFactors[2]; |
| 61 | Util::ComputeTweakFactors(tweak, range, tweakFactors); |
| 62 | |
| 63 | for (int ch = 0; ch < TVectorSize; ch++) |
| 64 | { |
| 65 | MSInt16 channelEPs[2]; |
| 66 | for (int epi = 0; epi < 2; epi++) |
| 67 | { |
| 68 | MFloat f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[epi], -31743.0f, 31743.0f); |
| 69 | channelEPs[epi] = ParallelMath::RoundAndConvertToS16(f, roundingMode); |
| 70 | } |
| 71 | |
| 72 | outEP0[ch] = channelEPs[0]; |
| 73 | outEP1[ch] = channelEPs[1]; |
| 74 | } |
| 75 | } |
| 76 | |
| 77 | void FinishLDR(int tweak, int range, MUInt15* outEP0, MUInt15* outEP1) |
| 78 | { |
| 79 | ParallelMath::RoundTowardNearestForScope roundingMode; |
| 80 | |
| 81 | float tweakFactors[2]; |
| 82 | Util::ComputeTweakFactors(tweak, range, tweakFactors); |
| 83 | |
| 84 | for (int ch = 0; ch < TVectorSize; ch++) |
| 85 | { |
| 86 | MFloat ep0f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[0], 0.0f, 255.0f); |
| 87 | MFloat ep1f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[1], 0.0f, 255.0f); |
| 88 | outEP0[ch] = ParallelMath::RoundAndConvertToU15(ep0f, &roundingMode); |
| 89 | outEP1[ch] = ParallelMath::RoundAndConvertToU15(ep1f, &roundingMode); |
| 90 | } |
| 91 | } |
| 92 | |
| 93 | template<int TNewVectorSize> |
| 94 | UnfinishedEndpoints<TNewVectorSize> ExpandTo(float filler) |
| 95 | { |
| 96 | MFloat newBase[TNewVectorSize]; |
| 97 | MFloat newOffset[TNewVectorSize]; |
| 98 | |
| 99 | for (int ch = 0; ch < TNewVectorSize && ch < TVectorSize; ch++) |
| 100 | { |
| 101 | newBase[ch] = m_base[ch]; |
| 102 | newOffset[ch] = m_offset[ch]; |
| 103 | } |
| 104 | |
| 105 | MFloat fillerV = ParallelMath::MakeFloat(filler); |
| 106 | |
| 107 | for (int ch = TVectorSize; ch < TNewVectorSize; ch++) |
| 108 | { |
| 109 | newBase[ch] = fillerV; |
| 110 | newOffset[ch] = ParallelMath::MakeFloatZero(); |
| 111 | } |
| 112 | |
| 113 | return UnfinishedEndpoints<TNewVectorSize>(newBase, newOffset); |
| 114 | } |
| 115 | |
| 116 | private: |
| 117 | MFloat m_base[TVectorSize]; |
| 118 | MFloat m_offset[TVectorSize]; |
| 119 | }; |
| 120 | } |
| 121 | } |
| 122 | |