1#pragma once
2#ifndef __CVTT_ENDPOINTREFINER_H__
3#define __CVTT_ENDPOINTREFINER_H__
4
5#include "ConvectionKernels_ParallelMath.h"
6
7namespace cvtt
8{
9 namespace Internal
10 {
11 // Solve for a, b where v = a*t + b
12 // This allows endpoints to be mapped to where T=0 and T=1
13 // Least squares from totals:
14 // a = (tv - t*v/w)/(tt - t*t/w)
15 // b = (v - a*t)/w
16 template<int TVectorSize>
17 class EndpointRefiner
18 {
19 public:
20 typedef ParallelMath::Float MFloat;
21 typedef ParallelMath::UInt16 MUInt16;
22 typedef ParallelMath::UInt15 MUInt15;
23 typedef ParallelMath::AInt16 MAInt16;
24 typedef ParallelMath::SInt16 MSInt16;
25 typedef ParallelMath::SInt32 MSInt32;
26
27 MFloat m_tv[TVectorSize];
28 MFloat m_v[TVectorSize];
29 MFloat m_tt;
30 MFloat m_t;
31 MFloat m_w;
32 int m_wu;
33
34 float m_rcpMaxIndex;
35 float m_channelWeights[TVectorSize];
36 float m_rcpChannelWeights[TVectorSize];
37
38 void Init(int indexRange, const float channelWeights[TVectorSize])
39 {
40 for (int ch = 0; ch < TVectorSize; ch++)
41 {
42 m_tv[ch] = ParallelMath::MakeFloatZero();
43 m_v[ch] = ParallelMath::MakeFloatZero();
44 }
45 m_tt = ParallelMath::MakeFloatZero();
46 m_t = ParallelMath::MakeFloatZero();
47 m_w = ParallelMath::MakeFloatZero();
48
49 m_rcpMaxIndex = 1.0f / static_cast<float>(indexRange - 1);
50
51 for (int ch = 0; ch < TVectorSize; ch++)
52 {
53 m_channelWeights[ch] = channelWeights[ch];
54 m_rcpChannelWeights[ch] = 1.0f;
55 if (m_channelWeights[ch] != 0.0f)
56 m_rcpChannelWeights[ch] = 1.0f / channelWeights[ch];
57 }
58
59 m_wu = 0;
60 }
61
62 void ContributePW(const MFloat *pwFloatPixel, const MUInt15 &index, const MFloat &weight)
63 {
64 MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;
65
66 for (int ch = 0; ch < TVectorSize; ch++)
67 {
68 MFloat v = pwFloatPixel[ch] * weight;
69
70 m_tv[ch] = m_tv[ch] + t * v;
71 m_v[ch] = m_v[ch] + v;
72 }
73 m_tt = m_tt + weight * t * t;
74 m_t = m_t + weight * t;
75 m_w = m_w + weight;
76 }
77
78 void ContributeUnweightedPW(const MFloat *pwFloatPixel, const MUInt15 &index, int numRealChannels)
79 {
80 MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;
81
82 for (int ch = 0; ch < numRealChannels; ch++)
83 {
84 MFloat v = pwFloatPixel[ch];
85
86 m_tv[ch] = m_tv[ch] + t * v;
87 m_v[ch] = m_v[ch] + v;
88 }
89 m_tt = m_tt + t * t;
90 m_t = m_t + t;
91 m_wu++;
92 }
93
94 void ContributeUnweightedPW(const MFloat *floatPixel, const MUInt15 &index)
95 {
96 ContributeUnweightedPW(floatPixel, index, TVectorSize);
97 }
98
99 void GetRefinedEndpoints(MFloat endPoint[2][TVectorSize])
100 {
101 // a = (tv - t*v/w)/(tt - t*t/w)
102 // b = (v - a*t)/w
103 MFloat w = m_w + ParallelMath::MakeFloat(static_cast<float>(m_wu));
104
105 ParallelMath::MakeSafeDenominator(w);
106 MFloat wRcp = ParallelMath::Reciprocal(w);
107
108 MFloat adenom = (m_tt * w - m_t * m_t) * wRcp;
109
110 ParallelMath::FloatCompFlag adenomZero = ParallelMath::Equal(adenom, ParallelMath::MakeFloatZero());
111 ParallelMath::ConditionalSet(adenom, adenomZero, ParallelMath::MakeFloat(1.0f));
112
113 for (int ch = 0; ch < TVectorSize; ch++)
114 {
115 /*
116 if (adenom == 0.0)
117 p1 = p2 = er.v / er.w;
118 else
119 {
120 float4 a = (er.tv - er.t*er.v / er.w) / adenom;
121 float4 b = (er.v - a * er.t) / er.w;
122 p1 = b;
123 p2 = a + b;
124 }
125 */
126
127 MFloat a = (m_tv[ch] - m_t * m_v[ch] * wRcp) / adenom;
128 MFloat b = (m_v[ch] - a * m_t) * wRcp;
129
130 MFloat p1 = b;
131 MFloat p2 = a + b;
132
133 ParallelMath::ConditionalSet(p1, adenomZero, (m_v[ch] * wRcp));
134 ParallelMath::ConditionalSet(p2, adenomZero, p1);
135
136 // Unweight
137 float inverseWeight = m_rcpChannelWeights[ch];
138
139 endPoint[0][ch] = p1 * inverseWeight;
140 endPoint[1][ch] = p2 * inverseWeight;
141 }
142 }
143
144 void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], int numRealChannels, const ParallelMath::RoundTowardNearestForScope *roundingMode)
145 {
146 MFloat floatEndPoint[2][TVectorSize];
147 GetRefinedEndpoints(floatEndPoint);
148
149 for (int epi = 0; epi < 2; epi++)
150 for (int ch = 0; ch < TVectorSize; ch++)
151 endPoint[epi][ch] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(floatEndPoint[epi][ch], 0.0f, 255.0f), roundingMode);
152 }
153
154 void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], const ParallelMath::RoundTowardNearestForScope *roundingMode)
155 {
156 GetRefinedEndpointsLDR(endPoint, TVectorSize, roundingMode);
157 }
158
159 void GetRefinedEndpointsHDR(MSInt16 endPoint[2][TVectorSize], bool isSigned, const ParallelMath::RoundTowardNearestForScope *roundingMode)
160 {
161 MFloat floatEndPoint[2][TVectorSize];
162 GetRefinedEndpoints(floatEndPoint);
163
164 for (int epi = 0; epi < 2; epi++)
165 {
166 for (int ch = 0; ch < TVectorSize; ch++)
167 {
168 MFloat f = floatEndPoint[epi][ch];
169 if (isSigned)
170 endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToS16(ParallelMath::Clamp(f, -31743.0f, 31743.0f), roundingMode));
171 else
172 endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(f, 0.0f, 31743.0f), roundingMode));
173 }
174 }
175 }
176 };
177 }
178}
179
180#endif
181
182