1 | #pragma once |
2 | #ifndef __CVTT_INDEXSELECTOR_H__ |
3 | #define __CVTT_INDEXSELECTOR_H__ |
4 | |
5 | #include "ConvectionKernels_ParallelMath.h" |
6 | |
7 | namespace cvtt |
8 | { |
9 | namespace Internal |
10 | { |
11 | extern const ParallelMath::UInt16 g_weightReciprocals[17]; |
12 | |
13 | template<int TVectorSize> |
14 | class IndexSelector |
15 | { |
16 | public: |
17 | typedef ParallelMath::Float MFloat; |
18 | typedef ParallelMath::UInt16 MUInt16; |
19 | typedef ParallelMath::UInt15 MUInt15; |
20 | typedef ParallelMath::SInt16 MSInt16; |
21 | typedef ParallelMath::AInt16 MAInt16; |
22 | typedef ParallelMath::SInt32 MSInt32; |
23 | typedef ParallelMath::UInt31 MUInt31; |
24 | |
25 | |
26 | template<class TInterpolationEPType, class TColorEPType> |
27 | void Init(const float *channelWeights, const TInterpolationEPType interpolationEndPoints[2][TVectorSize], const TColorEPType colorSpaceEndpoints[2][TVectorSize], int range) |
28 | { |
29 | // In BC6H, the interpolation endpoints are higher-precision than the endpoints in color space. |
30 | // We need to select indexes using the color-space endpoints. |
31 | |
32 | m_isUniform = true; |
33 | for (int ch = 1; ch < TVectorSize; ch++) |
34 | { |
35 | if (channelWeights[ch] != channelWeights[0]) |
36 | m_isUniform = false; |
37 | } |
38 | |
39 | // To work with channel weights, we need something where: |
40 | // pxDiff = px - ep[0] |
41 | // epDiff = ep[1] - ep[0] |
42 | // |
43 | // weightedEPDiff = epDiff * channelWeights |
44 | // normalizedWeightedAxis = weightedEPDiff / len(weightedEPDiff) |
45 | // normalizedIndex = dot(pxDiff * channelWeights, normalizedWeightedAxis) / len(weightedEPDiff) |
46 | // index = normalizedIndex * maxValue |
47 | // |
48 | // Equivalent to: |
49 | // axis = channelWeights * maxValue * epDiff * channelWeights / lenSquared(epDiff * channelWeights) |
50 | // index = dot(axis, pxDiff) |
51 | |
52 | for (int ep = 0; ep < 2; ep++) |
53 | for (int ch = 0; ch < TVectorSize; ch++) |
54 | m_endPoint[ep][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(interpolationEndPoints[ep][ch]); |
55 | |
56 | m_range = range; |
57 | m_maxValue = static_cast<float>(range - 1); |
58 | |
59 | MFloat epDiffWeighted[TVectorSize]; |
60 | for (int ch = 0; ch < TVectorSize; ch++) |
61 | { |
62 | m_origin[ch] = ParallelMath::ToFloat(colorSpaceEndpoints[0][ch]); |
63 | MFloat opposingOriginCh = ParallelMath::ToFloat(colorSpaceEndpoints[1][ch]); |
64 | epDiffWeighted[ch] = (opposingOriginCh - m_origin[ch]) * channelWeights[ch]; |
65 | } |
66 | |
67 | MFloat lenSquared = epDiffWeighted[0] * epDiffWeighted[0]; |
68 | for (int ch = 1; ch < TVectorSize; ch++) |
69 | lenSquared = lenSquared + epDiffWeighted[ch] * epDiffWeighted[ch]; |
70 | |
71 | ParallelMath::MakeSafeDenominator(lenSquared); |
72 | |
73 | MFloat maxValueDividedByLengthSquared = ParallelMath::MakeFloat(m_maxValue) / lenSquared; |
74 | |
75 | for (int ch = 0; ch < TVectorSize; ch++) |
76 | m_axis[ch] = epDiffWeighted[ch] * channelWeights[ch] * maxValueDividedByLengthSquared; |
77 | } |
78 | |
79 | template<bool TSigned> |
80 | void Init(const float channelWeights[TVectorSize], const MUInt15 endPoints[2][TVectorSize], int range) |
81 | { |
82 | MAInt16 converted[2][TVectorSize]; |
83 | for (int epi = 0; epi < 2; epi++) |
84 | for (int ch = 0; ch < TVectorSize; ch++) |
85 | converted[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(endPoints[epi][ch]); |
86 | |
87 | Init<MUInt15, MUInt15>(channelWeights, endPoints, endPoints, range); |
88 | } |
89 | |
90 | void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel, int numRealChannels) |
91 | { |
92 | MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9)); |
93 | |
94 | for (int ch = 0; ch < numRealChannels; ch++) |
95 | { |
96 | MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(64) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch]))); |
97 | MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch]))); |
98 | pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(32), 6)); |
99 | } |
100 | } |
101 | |
102 | void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel, int numRealChannels) |
103 | { |
104 | MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 64, 7)); |
105 | |
106 | for (int ch = 0; ch < numRealChannels; ch++) |
107 | { |
108 | MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(256) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch]))); |
109 | MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch]))); |
110 | pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(128), 8)); |
111 | } |
112 | } |
113 | |
114 | void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel) |
115 | { |
116 | ReconstructLDR_BC7(index, pixel, TVectorSize); |
117 | } |
118 | |
119 | void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel) |
120 | { |
121 | ReconstructLDRPrecise(index, pixel, TVectorSize); |
122 | } |
123 | |
124 | MUInt15 SelectIndexLDR(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const |
125 | { |
126 | MFloat dist = (pixel[0] - m_origin[0]) * m_axis[0]; |
127 | for (int ch = 1; ch < TVectorSize; ch++) |
128 | dist = dist + (pixel[ch] - m_origin[ch]) * m_axis[ch]; |
129 | |
130 | return ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(dist, 0.0f, m_maxValue), rtn); |
131 | } |
132 | |
133 | protected: |
134 | MAInt16 m_endPoint[2][TVectorSize]; |
135 | |
136 | private: |
137 | MFloat m_origin[TVectorSize]; |
138 | MFloat m_axis[TVectorSize]; |
139 | int m_range; |
140 | float m_maxValue; |
141 | bool m_isUniform; |
142 | }; |
143 | } |
144 | } |
145 | |
146 | #endif |
147 | |
148 | |