1#pragma once
2#ifndef __CVTT_INDEXSELECTOR_H__
3#define __CVTT_INDEXSELECTOR_H__
4
5#include "ConvectionKernels_ParallelMath.h"
6
7namespace cvtt
8{
9 namespace Internal
10 {
11 extern const ParallelMath::UInt16 g_weightReciprocals[17];
12
13 template<int TVectorSize>
14 class IndexSelector
15 {
16 public:
17 typedef ParallelMath::Float MFloat;
18 typedef ParallelMath::UInt16 MUInt16;
19 typedef ParallelMath::UInt15 MUInt15;
20 typedef ParallelMath::SInt16 MSInt16;
21 typedef ParallelMath::AInt16 MAInt16;
22 typedef ParallelMath::SInt32 MSInt32;
23 typedef ParallelMath::UInt31 MUInt31;
24
25
26 template<class TInterpolationEPType, class TColorEPType>
27 void Init(const float *channelWeights, const TInterpolationEPType interpolationEndPoints[2][TVectorSize], const TColorEPType colorSpaceEndpoints[2][TVectorSize], int range)
28 {
29 // In BC6H, the interpolation endpoints are higher-precision than the endpoints in color space.
30 // We need to select indexes using the color-space endpoints.
31
32 m_isUniform = true;
33 for (int ch = 1; ch < TVectorSize; ch++)
34 {
35 if (channelWeights[ch] != channelWeights[0])
36 m_isUniform = false;
37 }
38
39 // To work with channel weights, we need something where:
40 // pxDiff = px - ep[0]
41 // epDiff = ep[1] - ep[0]
42 //
43 // weightedEPDiff = epDiff * channelWeights
44 // normalizedWeightedAxis = weightedEPDiff / len(weightedEPDiff)
45 // normalizedIndex = dot(pxDiff * channelWeights, normalizedWeightedAxis) / len(weightedEPDiff)
46 // index = normalizedIndex * maxValue
47 //
48 // Equivalent to:
49 // axis = channelWeights * maxValue * epDiff * channelWeights / lenSquared(epDiff * channelWeights)
50 // index = dot(axis, pxDiff)
51
52 for (int ep = 0; ep < 2; ep++)
53 for (int ch = 0; ch < TVectorSize; ch++)
54 m_endPoint[ep][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(interpolationEndPoints[ep][ch]);
55
56 m_range = range;
57 m_maxValue = static_cast<float>(range - 1);
58
59 MFloat epDiffWeighted[TVectorSize];
60 for (int ch = 0; ch < TVectorSize; ch++)
61 {
62 m_origin[ch] = ParallelMath::ToFloat(colorSpaceEndpoints[0][ch]);
63 MFloat opposingOriginCh = ParallelMath::ToFloat(colorSpaceEndpoints[1][ch]);
64 epDiffWeighted[ch] = (opposingOriginCh - m_origin[ch]) * channelWeights[ch];
65 }
66
67 MFloat lenSquared = epDiffWeighted[0] * epDiffWeighted[0];
68 for (int ch = 1; ch < TVectorSize; ch++)
69 lenSquared = lenSquared + epDiffWeighted[ch] * epDiffWeighted[ch];
70
71 ParallelMath::MakeSafeDenominator(lenSquared);
72
73 MFloat maxValueDividedByLengthSquared = ParallelMath::MakeFloat(m_maxValue) / lenSquared;
74
75 for (int ch = 0; ch < TVectorSize; ch++)
76 m_axis[ch] = epDiffWeighted[ch] * channelWeights[ch] * maxValueDividedByLengthSquared;
77 }
78
79 template<bool TSigned>
80 void Init(const float channelWeights[TVectorSize], const MUInt15 endPoints[2][TVectorSize], int range)
81 {
82 MAInt16 converted[2][TVectorSize];
83 for (int epi = 0; epi < 2; epi++)
84 for (int ch = 0; ch < TVectorSize; ch++)
85 converted[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(endPoints[epi][ch]);
86
87 Init<MUInt15, MUInt15>(channelWeights, endPoints, endPoints, range);
88 }
89
90 void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel, int numRealChannels)
91 {
92 MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9));
93
94 for (int ch = 0; ch < numRealChannels; ch++)
95 {
96 MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(64) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch])));
97 MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch])));
98 pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(32), 6));
99 }
100 }
101
102 void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel, int numRealChannels)
103 {
104 MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 64, 7));
105
106 for (int ch = 0; ch < numRealChannels; ch++)
107 {
108 MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(256) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch])));
109 MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch])));
110 pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(128), 8));
111 }
112 }
113
114 void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel)
115 {
116 ReconstructLDR_BC7(index, pixel, TVectorSize);
117 }
118
119 void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel)
120 {
121 ReconstructLDRPrecise(index, pixel, TVectorSize);
122 }
123
124 MUInt15 SelectIndexLDR(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const
125 {
126 MFloat dist = (pixel[0] - m_origin[0]) * m_axis[0];
127 for (int ch = 1; ch < TVectorSize; ch++)
128 dist = dist + (pixel[ch] - m_origin[ch]) * m_axis[ch];
129
130 return ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(dist, 0.0f, m_maxValue), rtn);
131 }
132
133 protected:
134 MAInt16 m_endPoint[2][TVectorSize];
135
136 private:
137 MFloat m_origin[TVectorSize];
138 MFloat m_axis[TVectorSize];
139 int m_range;
140 float m_maxValue;
141 bool m_isUniform;
142 };
143 }
144}
145
146#endif
147
148