ConvectionKernels_EndpointRefiner.h source code [Godot/thirdparty/cvtt/ConvectionKernels_EndpointRefiner.h]

1	#pragma once
2	#ifndef __CVTT_ENDPOINTREFINER_H__
3	#define __CVTT_ENDPOINTREFINER_H__
4
5	#include "ConvectionKernels_ParallelMath.h"
6
7	namespace cvtt
8	{
9	namespace Internal
10	{
11	// Solve for a, b where v = at + b*
12	// This allows endpoints to be mapped to where T=0 and T=1
13	// Least squares from totals:
14	// a = (tv - tv/w)/(tt - tt/w)
15	// b = (v - at)/w*
16	template<int TVectorSize>
17	class EndpointRefiner
18	{
19	public:
20	typedef ParallelMath::Float MFloat;
21	typedef ParallelMath::UInt16 MUInt16;
22	typedef ParallelMath::UInt15 MUInt15;
23	typedef ParallelMath::AInt16 MAInt16;
24	typedef ParallelMath::SInt16 MSInt16;
25	typedef ParallelMath::SInt32 MSInt32;
26
27	MFloat m_tv[TVectorSize];
28	MFloat m_v[TVectorSize];
29	MFloat m_tt;
30	MFloat m_t;
31	MFloat m_w;
32	int m_wu;
33
34	float m_rcpMaxIndex;
35	float m_channelWeights[TVectorSize];
36	float m_rcpChannelWeights[TVectorSize];
37
38	void Init(int indexRange, const float channelWeights[TVectorSize])
39	{
40	for (int ch = `0`; ch < TVectorSize; ch++)
41	{
42	m_tv[ch] = ParallelMath::MakeFloatZero();
43	m_v[ch] = ParallelMath::MakeFloatZero();
44	}
45	m_tt = ParallelMath::MakeFloatZero();
46	m_t = ParallelMath::MakeFloatZero();
47	m_w = ParallelMath::MakeFloatZero();
48
49	m_rcpMaxIndex = `1.0f` / static_cast<float>(indexRange - `1`);
50
51	for (int ch = `0`; ch < TVectorSize; ch++)
52	{
53	m_channelWeights[ch] = channelWeights[ch];
54	m_rcpChannelWeights[ch] = `1.0f`;
55	if (m_channelWeights[ch] != `0.0f`)
56	m_rcpChannelWeights[ch] = `1.0f` / channelWeights[ch];
57	}
58
59	m_wu = `0`;
60	}
61
62	void ContributePW(const MFloat pwFloatPixel, const* MUInt15 &index, const MFloat &weight)
63	{
64	MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;
65
66	for (int ch = `0`; ch < TVectorSize; ch++)
67	{
68	MFloat v = pwFloatPixel[ch] * weight;
69
70	m_tv[ch] = m_tv[ch] + t * v;
71	m_v[ch] = m_v[ch] + v;
72	}
73	m_tt = m_tt + weight * t * t;
74	m_t = m_t + weight * t;
75	m_w = m_w + weight;
76	}
77
78	void ContributeUnweightedPW(const MFloat pwFloatPixel, const* MUInt15 &index, int numRealChannels)
79	{
80	MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;
81
82	for (int ch = `0`; ch < numRealChannels; ch++)
83	{
84	MFloat v = pwFloatPixel[ch];
85
86	m_tv[ch] = m_tv[ch] + t * v;
87	m_v[ch] = m_v[ch] + v;
88	}
89	m_tt = m_tt + t * t;
90	m_t = m_t + t;
91	m_wu++;
92	}
93
94	void ContributeUnweightedPW(const MFloat floatPixel, const* MUInt15 &index)
95	{
96	ContributeUnweightedPW(floatPixel, index, TVectorSize);
97	}
98
99	void GetRefinedEndpoints(MFloat endPoint[`2`][TVectorSize])
100	{
101	// a = (tv - tv/w)/(tt - tt/w)
102	// b = (v - at)/w*
103	MFloat w = m_w + ParallelMath::MakeFloat(static_cast<float>(m_wu));
104
105	ParallelMath::MakeSafeDenominator(w);
106	MFloat wRcp = ParallelMath::Reciprocal(w);
107
108	MFloat adenom = (m_tt * w - m_t * m_t) * wRcp;
109
110	ParallelMath::FloatCompFlag adenomZero = ParallelMath::Equal(adenom, ParallelMath::MakeFloatZero());
111	ParallelMath::ConditionalSet(adenom, adenomZero, ParallelMath::MakeFloat(`1.0f`));
112
113	for (int ch = `0`; ch < TVectorSize; ch++)
114	{
115	/*
116	if (adenom == 0.0)
117	p1 = p2 = er.v / er.w;
118	else
119	{
120	float4 a = (er.tv - er.ter.v / er.w) / adenom;*
121	float4 b = (er.v - a er.t) / er.w;*
122	p1 = b;
123	p2 = a + b;
124	}
125	*/
126
127	MFloat a = (m_tv[ch] - m_t * m_v[ch] * wRcp) / adenom;
128	MFloat b = (m_v[ch] - a * m_t) * wRcp;
129
130	MFloat p1 = b;
131	MFloat p2 = a + b;
132
133	ParallelMath::ConditionalSet(p1, adenomZero, (m_v[ch] * wRcp));
134	ParallelMath::ConditionalSet(p2, adenomZero, p1);
135
136	// Unweight
137	float inverseWeight = m_rcpChannelWeights[ch];
138
139	endPoint[`0`][ch] = p1 * inverseWeight;
140	endPoint[`1`][ch] = p2 * inverseWeight;
141	}
142	}
143
144	void GetRefinedEndpointsLDR(MUInt15 endPoint[`2`][TVectorSize], int numRealChannels, const ParallelMath::RoundTowardNearestForScope *roundingMode)
145	{
146	MFloat floatEndPoint[`2`][TVectorSize];
147	GetRefinedEndpoints(floatEndPoint);
148
149	for (int epi = `0`; epi < `2`; epi++)
150	for (int ch = `0`; ch < TVectorSize; ch++)
151	endPoint[epi][ch] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(floatEndPoint[epi][ch], `0.0f`, `255.0f`), roundingMode);
152	}
153
154	void GetRefinedEndpointsLDR(MUInt15 endPoint[`2`][TVectorSize], const ParallelMath::RoundTowardNearestForScope *roundingMode)
155	{
156	GetRefinedEndpointsLDR(endPoint, TVectorSize, roundingMode);
157	}
158
159	void GetRefinedEndpointsHDR(MSInt16 endPoint[`2`][TVectorSize], bool isSigned, const ParallelMath::RoundTowardNearestForScope *roundingMode)
160	{
161	MFloat floatEndPoint[`2`][TVectorSize];
162	GetRefinedEndpoints(floatEndPoint);
163
164	for (int epi = `0`; epi < `2`; epi++)
165	{
166	for (int ch = `0`; ch < TVectorSize; ch++)
167	{
168	MFloat f = floatEndPoint[epi][ch];
169	if (isSigned)
170	endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToS16(ParallelMath::Clamp(f, -`31743.0f`, `31743.0f`), roundingMode));
171	else
172	endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(f, `0.0f`, `31743.0f`), roundingMode));
173	}
174	}
175	}
176	};
177	}
178	}
179
180	#endif
181
182

Browse the source code of Godot/thirdparty/cvtt/ConvectionKernels_EndpointRefiner.h