1 | #pragma once |
2 | #ifndef __CVTT_INDEXSELECTORHDR_H__ |
3 | #define __CVTT_INDEXSELECTORHDR_H__ |
4 | |
5 | #include "ConvectionKernels_ParallelMath.h" |
6 | #include "ConvectionKernels_IndexSelector.h" |
7 | |
8 | namespace cvtt |
9 | { |
10 | namespace Internal |
11 | { |
12 | ParallelMath::SInt16 UnscaleHDRValueSigned(const ParallelMath::SInt16 &v); |
13 | ParallelMath::UInt15 UnscaleHDRValueUnsigned(const ParallelMath::UInt16 &v); |
14 | |
15 | template<int TVectorSize> |
16 | class IndexSelectorHDR : public IndexSelector<TVectorSize> |
17 | { |
18 | public: |
19 | typedef ParallelMath::UInt15 MUInt15; |
20 | typedef ParallelMath::UInt16 MUInt16; |
21 | typedef ParallelMath::UInt31 MUInt31; |
22 | typedef ParallelMath::SInt16 MSInt16; |
23 | typedef ParallelMath::SInt32 MSInt32; |
24 | typedef ParallelMath::Float MFloat; |
25 | |
26 | private: |
27 | |
28 | MUInt15 InvertSingle(const MUInt15& anIndex) const |
29 | { |
30 | MUInt15 inverted = m_maxValueMinusOne - anIndex; |
31 | return ParallelMath::Select(m_isInverted, inverted, anIndex); |
32 | } |
33 | |
34 | void ReconstructHDRSignedUninverted(const MUInt15 &index, MSInt16* pixel) const |
35 | { |
36 | MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9)); |
37 | |
38 | for (int ch = 0; ch < TVectorSize; ch++) |
39 | { |
40 | MSInt16 ep0 = ParallelMath::LosslessCast<MSInt16>::Cast(this->m_endPoint[0][ch]); |
41 | MSInt16 ep1 = ParallelMath::LosslessCast<MSInt16>::Cast(this->m_endPoint[1][ch]); |
42 | |
43 | MSInt32 pixel32 = ParallelMath::XMultiply((ParallelMath::MakeUInt15(64) - weight), ep0) + ParallelMath::XMultiply(weight, ep1); |
44 | |
45 | pixel32 = ParallelMath::RightShift(pixel32 + ParallelMath::MakeSInt32(32), 6); |
46 | |
47 | pixel[ch] = UnscaleHDRValueSigned(ParallelMath::ToSInt16(pixel32)); |
48 | } |
49 | } |
50 | |
51 | void ReconstructHDRUnsignedUninverted(const MUInt15 &index, MSInt16* pixel) const |
52 | { |
53 | MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9)); |
54 | |
55 | for (int ch = 0; ch < TVectorSize; ch++) |
56 | { |
57 | MUInt16 ep0 = ParallelMath::LosslessCast<MUInt16>::Cast(this->m_endPoint[0][ch]); |
58 | MUInt16 ep1 = ParallelMath::LosslessCast<MUInt16>::Cast(this->m_endPoint[1][ch]); |
59 | |
60 | MUInt31 pixel31 = ParallelMath::XMultiply((ParallelMath::MakeUInt15(64) - weight), ep0) + ParallelMath::XMultiply(weight, ep1); |
61 | |
62 | pixel31 = ParallelMath::RightShift(pixel31 + ParallelMath::MakeUInt31(32), 6); |
63 | |
64 | pixel[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(UnscaleHDRValueUnsigned(ParallelMath::ToUInt16(pixel31))); |
65 | } |
66 | } |
67 | |
68 | MFloat ErrorForInterpolatorComponent(int index, int ch, const MFloat *pixel) const |
69 | { |
70 | MFloat diff = pixel[ch] - m_reconstructedInterpolators[index][ch]; |
71 | return diff * diff; |
72 | } |
73 | |
74 | MFloat ErrorForInterpolator(int index, const MFloat *pixel) const |
75 | { |
76 | MFloat error = ErrorForInterpolatorComponent(index, 0, pixel); |
77 | for (int ch = 1; ch < TVectorSize; ch++) |
78 | error = error + ErrorForInterpolatorComponent(index, ch, pixel); |
79 | return error; |
80 | } |
81 | |
82 | public: |
83 | |
84 | void InitHDR(int range, bool isSigned, bool fastIndexing, const float *channelWeights) |
85 | { |
86 | assert(range <= 16); |
87 | |
88 | m_range = range; |
89 | |
90 | m_isInverted = ParallelMath::MakeBoolInt16(false); |
91 | m_maxValueMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(range - 1)); |
92 | |
93 | if (!fastIndexing) |
94 | { |
95 | for (int i = 0; i < range; i++) |
96 | { |
97 | MSInt16 recon2CL[TVectorSize]; |
98 | |
99 | if (isSigned) |
100 | ReconstructHDRSignedUninverted(ParallelMath::MakeUInt15(static_cast<uint16_t>(i)), recon2CL); |
101 | else |
102 | ReconstructHDRUnsignedUninverted(ParallelMath::MakeUInt15(static_cast<uint16_t>(i)), recon2CL); |
103 | |
104 | for (int ch = 0; ch < TVectorSize; ch++) |
105 | m_reconstructedInterpolators[i][ch] = ParallelMath::TwosCLHalfToFloat(recon2CL[ch]) * channelWeights[ch]; |
106 | } |
107 | } |
108 | } |
109 | |
110 | void ReconstructHDRSigned(const MUInt15 &index, MSInt16* pixel) const |
111 | { |
112 | ReconstructHDRSignedUninverted(InvertSingle(index), pixel); |
113 | } |
114 | |
115 | void ReconstructHDRUnsigned(const MUInt15 &index, MSInt16* pixel) const |
116 | { |
117 | ReconstructHDRUnsignedUninverted(InvertSingle(index), pixel); |
118 | } |
119 | |
120 | void ConditionalInvert(const ParallelMath::Int16CompFlag &invert) |
121 | { |
122 | m_isInverted = invert; |
123 | } |
124 | |
125 | MUInt15 SelectIndexHDRSlow(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope*) const |
126 | { |
127 | MUInt15 index = ParallelMath::MakeUInt15(0); |
128 | |
129 | MFloat bestError = ErrorForInterpolator(0, pixel); |
130 | for (int i = 1; i < m_range; i++) |
131 | { |
132 | MFloat error = ErrorForInterpolator(i, pixel); |
133 | ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, bestError); |
134 | ParallelMath::ConditionalSet(index, ParallelMath::FloatFlagToInt16(errorBetter), ParallelMath::MakeUInt15(static_cast<uint16_t>(i))); |
135 | bestError = ParallelMath::Min(bestError, error); |
136 | } |
137 | |
138 | return InvertSingle(index); |
139 | } |
140 | |
141 | MUInt15 SelectIndexHDRFast(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const |
142 | { |
143 | return InvertSingle(this->SelectIndexLDR(pixel, rtn)); |
144 | } |
145 | |
146 | private: |
147 | MFloat m_reconstructedInterpolators[16][TVectorSize]; |
148 | ParallelMath::Int16CompFlag m_isInverted; |
149 | MUInt15 m_maxValueMinusOne; |
150 | int m_range; |
151 | }; |
152 | } |
153 | } |
154 | #endif |
155 | |
156 | |