| 1 | #pragma once | 
|---|
| 2 | #ifndef __CVTT_INDEXSELECTOR_H__ | 
|---|
| 3 | #define __CVTT_INDEXSELECTOR_H__ | 
|---|
| 4 |  | 
|---|
| 5 | #include "ConvectionKernels_ParallelMath.h" | 
|---|
| 6 |  | 
|---|
| 7 | namespace cvtt | 
|---|
| 8 | { | 
|---|
| 9 | namespace Internal | 
|---|
| 10 | { | 
|---|
| 11 | extern const ParallelMath::UInt16 g_weightReciprocals[17]; | 
|---|
| 12 |  | 
|---|
| 13 | template<int TVectorSize> | 
|---|
| 14 | class IndexSelector | 
|---|
| 15 | { | 
|---|
| 16 | public: | 
|---|
| 17 | typedef ParallelMath::Float MFloat; | 
|---|
| 18 | typedef ParallelMath::UInt16 MUInt16; | 
|---|
| 19 | typedef ParallelMath::UInt15 MUInt15; | 
|---|
| 20 | typedef ParallelMath::SInt16 MSInt16; | 
|---|
| 21 | typedef ParallelMath::AInt16 MAInt16; | 
|---|
| 22 | typedef ParallelMath::SInt32 MSInt32; | 
|---|
| 23 | typedef ParallelMath::UInt31 MUInt31; | 
|---|
| 24 |  | 
|---|
| 25 |  | 
|---|
| 26 | template<class TInterpolationEPType, class TColorEPType> | 
|---|
| 27 | void Init(const float *channelWeights, const TInterpolationEPType interpolationEndPoints[2][TVectorSize], const TColorEPType colorSpaceEndpoints[2][TVectorSize], int range) | 
|---|
| 28 | { | 
|---|
| 29 | // In BC6H, the interpolation endpoints are higher-precision than the endpoints in color space. | 
|---|
| 30 | // We need to select indexes using the color-space endpoints. | 
|---|
| 31 |  | 
|---|
| 32 | m_isUniform = true; | 
|---|
| 33 | for (int ch = 1; ch < TVectorSize; ch++) | 
|---|
| 34 | { | 
|---|
| 35 | if (channelWeights[ch] != channelWeights[0]) | 
|---|
| 36 | m_isUniform = false; | 
|---|
| 37 | } | 
|---|
| 38 |  | 
|---|
| 39 | // To work with channel weights, we need something where: | 
|---|
| 40 | // pxDiff = px - ep[0] | 
|---|
| 41 | // epDiff = ep[1] - ep[0] | 
|---|
| 42 | // | 
|---|
| 43 | // weightedEPDiff = epDiff * channelWeights | 
|---|
| 44 | // normalizedWeightedAxis = weightedEPDiff / len(weightedEPDiff) | 
|---|
| 45 | // normalizedIndex = dot(pxDiff * channelWeights, normalizedWeightedAxis) / len(weightedEPDiff) | 
|---|
| 46 | // index = normalizedIndex * maxValue | 
|---|
| 47 | // | 
|---|
| 48 | // Equivalent to: | 
|---|
| 49 | // axis = channelWeights * maxValue * epDiff * channelWeights / lenSquared(epDiff * channelWeights) | 
|---|
| 50 | // index = dot(axis, pxDiff) | 
|---|
| 51 |  | 
|---|
| 52 | for (int ep = 0; ep < 2; ep++) | 
|---|
| 53 | for (int ch = 0; ch < TVectorSize; ch++) | 
|---|
| 54 | m_endPoint[ep][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(interpolationEndPoints[ep][ch]); | 
|---|
| 55 |  | 
|---|
| 56 | m_range = range; | 
|---|
| 57 | m_maxValue = static_cast<float>(range - 1); | 
|---|
| 58 |  | 
|---|
| 59 | MFloat epDiffWeighted[TVectorSize]; | 
|---|
| 60 | for (int ch = 0; ch < TVectorSize; ch++) | 
|---|
| 61 | { | 
|---|
| 62 | m_origin[ch] = ParallelMath::ToFloat(colorSpaceEndpoints[0][ch]); | 
|---|
| 63 | MFloat opposingOriginCh = ParallelMath::ToFloat(colorSpaceEndpoints[1][ch]); | 
|---|
| 64 | epDiffWeighted[ch] = (opposingOriginCh - m_origin[ch]) * channelWeights[ch]; | 
|---|
| 65 | } | 
|---|
| 66 |  | 
|---|
| 67 | MFloat lenSquared = epDiffWeighted[0] * epDiffWeighted[0]; | 
|---|
| 68 | for (int ch = 1; ch < TVectorSize; ch++) | 
|---|
| 69 | lenSquared = lenSquared + epDiffWeighted[ch] * epDiffWeighted[ch]; | 
|---|
| 70 |  | 
|---|
| 71 | ParallelMath::MakeSafeDenominator(lenSquared); | 
|---|
| 72 |  | 
|---|
| 73 | MFloat maxValueDividedByLengthSquared = ParallelMath::MakeFloat(m_maxValue) / lenSquared; | 
|---|
| 74 |  | 
|---|
| 75 | for (int ch = 0; ch < TVectorSize; ch++) | 
|---|
| 76 | m_axis[ch] = epDiffWeighted[ch] * channelWeights[ch] * maxValueDividedByLengthSquared; | 
|---|
| 77 | } | 
|---|
| 78 |  | 
|---|
| 79 | template<bool TSigned> | 
|---|
| 80 | void Init(const float channelWeights[TVectorSize], const MUInt15 endPoints[2][TVectorSize], int range) | 
|---|
| 81 | { | 
|---|
| 82 | MAInt16 converted[2][TVectorSize]; | 
|---|
| 83 | for (int epi = 0; epi < 2; epi++) | 
|---|
| 84 | for (int ch = 0; ch < TVectorSize; ch++) | 
|---|
| 85 | converted[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(endPoints[epi][ch]); | 
|---|
| 86 |  | 
|---|
| 87 | Init<MUInt15, MUInt15>(channelWeights, endPoints, endPoints, range); | 
|---|
| 88 | } | 
|---|
| 89 |  | 
|---|
| 90 | void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel, int numRealChannels) | 
|---|
| 91 | { | 
|---|
| 92 | MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9)); | 
|---|
| 93 |  | 
|---|
| 94 | for (int ch = 0; ch < numRealChannels; ch++) | 
|---|
| 95 | { | 
|---|
| 96 | MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(64) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch]))); | 
|---|
| 97 | MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch]))); | 
|---|
| 98 | pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(32), 6)); | 
|---|
| 99 | } | 
|---|
| 100 | } | 
|---|
| 101 |  | 
|---|
| 102 | void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel, int numRealChannels) | 
|---|
| 103 | { | 
|---|
| 104 | MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 64, 7)); | 
|---|
| 105 |  | 
|---|
| 106 | for (int ch = 0; ch < numRealChannels; ch++) | 
|---|
| 107 | { | 
|---|
| 108 | MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(256) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch]))); | 
|---|
| 109 | MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch]))); | 
|---|
| 110 | pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(128), 8)); | 
|---|
| 111 | } | 
|---|
| 112 | } | 
|---|
| 113 |  | 
|---|
| 114 | void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel) | 
|---|
| 115 | { | 
|---|
| 116 | ReconstructLDR_BC7(index, pixel, TVectorSize); | 
|---|
| 117 | } | 
|---|
| 118 |  | 
|---|
| 119 | void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel) | 
|---|
| 120 | { | 
|---|
| 121 | ReconstructLDRPrecise(index, pixel, TVectorSize); | 
|---|
| 122 | } | 
|---|
| 123 |  | 
|---|
| 124 | MUInt15 SelectIndexLDR(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const | 
|---|
| 125 | { | 
|---|
| 126 | MFloat dist = (pixel[0] - m_origin[0]) * m_axis[0]; | 
|---|
| 127 | for (int ch = 1; ch < TVectorSize; ch++) | 
|---|
| 128 | dist = dist + (pixel[ch] - m_origin[ch]) * m_axis[ch]; | 
|---|
| 129 |  | 
|---|
| 130 | return ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(dist, 0.0f, m_maxValue), rtn); | 
|---|
| 131 | } | 
|---|
| 132 |  | 
|---|
| 133 | protected: | 
|---|
| 134 | MAInt16 m_endPoint[2][TVectorSize]; | 
|---|
| 135 |  | 
|---|
| 136 | private: | 
|---|
| 137 | MFloat m_origin[TVectorSize]; | 
|---|
| 138 | MFloat m_axis[TVectorSize]; | 
|---|
| 139 | int m_range; | 
|---|
| 140 | float m_maxValue; | 
|---|
| 141 | bool m_isUniform; | 
|---|
| 142 | }; | 
|---|
| 143 | } | 
|---|
| 144 | } | 
|---|
| 145 |  | 
|---|
| 146 | #endif | 
|---|
| 147 |  | 
|---|
| 148 |  | 
|---|