| 1 | #pragma once |
| 2 | #ifndef __CVTT_INDEXSELECTOR_H__ |
| 3 | #define __CVTT_INDEXSELECTOR_H__ |
| 4 | |
| 5 | #include "ConvectionKernels_ParallelMath.h" |
| 6 | |
| 7 | namespace cvtt |
| 8 | { |
| 9 | namespace Internal |
| 10 | { |
| 11 | extern const ParallelMath::UInt16 g_weightReciprocals[17]; |
| 12 | |
| 13 | template<int TVectorSize> |
| 14 | class IndexSelector |
| 15 | { |
| 16 | public: |
| 17 | typedef ParallelMath::Float MFloat; |
| 18 | typedef ParallelMath::UInt16 MUInt16; |
| 19 | typedef ParallelMath::UInt15 MUInt15; |
| 20 | typedef ParallelMath::SInt16 MSInt16; |
| 21 | typedef ParallelMath::AInt16 MAInt16; |
| 22 | typedef ParallelMath::SInt32 MSInt32; |
| 23 | typedef ParallelMath::UInt31 MUInt31; |
| 24 | |
| 25 | |
| 26 | template<class TInterpolationEPType, class TColorEPType> |
| 27 | void Init(const float *channelWeights, const TInterpolationEPType interpolationEndPoints[2][TVectorSize], const TColorEPType colorSpaceEndpoints[2][TVectorSize], int range) |
| 28 | { |
| 29 | // In BC6H, the interpolation endpoints are higher-precision than the endpoints in color space. |
| 30 | // We need to select indexes using the color-space endpoints. |
| 31 | |
| 32 | m_isUniform = true; |
| 33 | for (int ch = 1; ch < TVectorSize; ch++) |
| 34 | { |
| 35 | if (channelWeights[ch] != channelWeights[0]) |
| 36 | m_isUniform = false; |
| 37 | } |
| 38 | |
| 39 | // To work with channel weights, we need something where: |
| 40 | // pxDiff = px - ep[0] |
| 41 | // epDiff = ep[1] - ep[0] |
| 42 | // |
| 43 | // weightedEPDiff = epDiff * channelWeights |
| 44 | // normalizedWeightedAxis = weightedEPDiff / len(weightedEPDiff) |
| 45 | // normalizedIndex = dot(pxDiff * channelWeights, normalizedWeightedAxis) / len(weightedEPDiff) |
| 46 | // index = normalizedIndex * maxValue |
| 47 | // |
| 48 | // Equivalent to: |
| 49 | // axis = channelWeights * maxValue * epDiff * channelWeights / lenSquared(epDiff * channelWeights) |
| 50 | // index = dot(axis, pxDiff) |
| 51 | |
| 52 | for (int ep = 0; ep < 2; ep++) |
| 53 | for (int ch = 0; ch < TVectorSize; ch++) |
| 54 | m_endPoint[ep][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(interpolationEndPoints[ep][ch]); |
| 55 | |
| 56 | m_range = range; |
| 57 | m_maxValue = static_cast<float>(range - 1); |
| 58 | |
| 59 | MFloat epDiffWeighted[TVectorSize]; |
| 60 | for (int ch = 0; ch < TVectorSize; ch++) |
| 61 | { |
| 62 | m_origin[ch] = ParallelMath::ToFloat(colorSpaceEndpoints[0][ch]); |
| 63 | MFloat opposingOriginCh = ParallelMath::ToFloat(colorSpaceEndpoints[1][ch]); |
| 64 | epDiffWeighted[ch] = (opposingOriginCh - m_origin[ch]) * channelWeights[ch]; |
| 65 | } |
| 66 | |
| 67 | MFloat lenSquared = epDiffWeighted[0] * epDiffWeighted[0]; |
| 68 | for (int ch = 1; ch < TVectorSize; ch++) |
| 69 | lenSquared = lenSquared + epDiffWeighted[ch] * epDiffWeighted[ch]; |
| 70 | |
| 71 | ParallelMath::MakeSafeDenominator(lenSquared); |
| 72 | |
| 73 | MFloat maxValueDividedByLengthSquared = ParallelMath::MakeFloat(m_maxValue) / lenSquared; |
| 74 | |
| 75 | for (int ch = 0; ch < TVectorSize; ch++) |
| 76 | m_axis[ch] = epDiffWeighted[ch] * channelWeights[ch] * maxValueDividedByLengthSquared; |
| 77 | } |
| 78 | |
| 79 | template<bool TSigned> |
| 80 | void Init(const float channelWeights[TVectorSize], const MUInt15 endPoints[2][TVectorSize], int range) |
| 81 | { |
| 82 | MAInt16 converted[2][TVectorSize]; |
| 83 | for (int epi = 0; epi < 2; epi++) |
| 84 | for (int ch = 0; ch < TVectorSize; ch++) |
| 85 | converted[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(endPoints[epi][ch]); |
| 86 | |
| 87 | Init<MUInt15, MUInt15>(channelWeights, endPoints, endPoints, range); |
| 88 | } |
| 89 | |
| 90 | void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel, int numRealChannels) |
| 91 | { |
| 92 | MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9)); |
| 93 | |
| 94 | for (int ch = 0; ch < numRealChannels; ch++) |
| 95 | { |
| 96 | MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(64) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch]))); |
| 97 | MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch]))); |
| 98 | pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(32), 6)); |
| 99 | } |
| 100 | } |
| 101 | |
| 102 | void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel, int numRealChannels) |
| 103 | { |
| 104 | MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 64, 7)); |
| 105 | |
| 106 | for (int ch = 0; ch < numRealChannels; ch++) |
| 107 | { |
| 108 | MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(256) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch]))); |
| 109 | MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch]))); |
| 110 | pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(128), 8)); |
| 111 | } |
| 112 | } |
| 113 | |
| 114 | void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel) |
| 115 | { |
| 116 | ReconstructLDR_BC7(index, pixel, TVectorSize); |
| 117 | } |
| 118 | |
| 119 | void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel) |
| 120 | { |
| 121 | ReconstructLDRPrecise(index, pixel, TVectorSize); |
| 122 | } |
| 123 | |
| 124 | MUInt15 SelectIndexLDR(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const |
| 125 | { |
| 126 | MFloat dist = (pixel[0] - m_origin[0]) * m_axis[0]; |
| 127 | for (int ch = 1; ch < TVectorSize; ch++) |
| 128 | dist = dist + (pixel[ch] - m_origin[ch]) * m_axis[ch]; |
| 129 | |
| 130 | return ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(dist, 0.0f, m_maxValue), rtn); |
| 131 | } |
| 132 | |
| 133 | protected: |
| 134 | MAInt16 m_endPoint[2][TVectorSize]; |
| 135 | |
| 136 | private: |
| 137 | MFloat m_origin[TVectorSize]; |
| 138 | MFloat m_axis[TVectorSize]; |
| 139 | int m_range; |
| 140 | float m_maxValue; |
| 141 | bool m_isUniform; |
| 142 | }; |
| 143 | } |
| 144 | } |
| 145 | |
| 146 | #endif |
| 147 | |
| 148 | |