1 | #pragma once |
2 | |
3 | #include "ConvectionKernels_Util.h" |
4 | |
5 | namespace cvtt |
6 | { |
7 | namespace Internal |
8 | { |
9 | template<int TVectorSize> |
10 | class UnfinishedEndpoints |
11 | { |
12 | public: |
13 | typedef ParallelMath::Float MFloat; |
14 | typedef ParallelMath::UInt16 MUInt16; |
15 | typedef ParallelMath::UInt15 MUInt15; |
16 | typedef ParallelMath::SInt16 MSInt16; |
17 | typedef ParallelMath::SInt32 MSInt32; |
18 | |
19 | UnfinishedEndpoints() |
20 | { |
21 | } |
22 | |
23 | UnfinishedEndpoints(const MFloat *base, const MFloat *offset) |
24 | { |
25 | for (int ch = 0; ch < TVectorSize; ch++) |
26 | m_base[ch] = base[ch]; |
27 | for (int ch = 0; ch < TVectorSize; ch++) |
28 | m_offset[ch] = offset[ch]; |
29 | } |
30 | |
31 | UnfinishedEndpoints(const UnfinishedEndpoints& other) |
32 | { |
33 | for (int ch = 0; ch < TVectorSize; ch++) |
34 | m_base[ch] = other.m_base[ch]; |
35 | for (int ch = 0; ch < TVectorSize; ch++) |
36 | m_offset[ch] = other.m_offset[ch]; |
37 | } |
38 | |
39 | void FinishHDRUnsigned(int tweak, int range, MSInt16 *outEP0, MSInt16 *outEP1, ParallelMath::RoundTowardNearestForScope *roundingMode) |
40 | { |
41 | float tweakFactors[2]; |
42 | Util::ComputeTweakFactors(tweak, range, tweakFactors); |
43 | |
44 | for (int ch = 0; ch < TVectorSize; ch++) |
45 | { |
46 | MUInt15 channelEPs[2]; |
47 | for (int epi = 0; epi < 2; epi++) |
48 | { |
49 | MFloat f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[epi], 0.0f, 31743.0f); |
50 | channelEPs[epi] = ParallelMath::RoundAndConvertToU15(f, roundingMode); |
51 | } |
52 | |
53 | outEP0[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(channelEPs[0]); |
54 | outEP1[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(channelEPs[1]); |
55 | } |
56 | } |
57 | |
58 | void FinishHDRSigned(int tweak, int range, MSInt16* outEP0, MSInt16* outEP1, ParallelMath::RoundTowardNearestForScope* roundingMode) |
59 | { |
60 | float tweakFactors[2]; |
61 | Util::ComputeTweakFactors(tweak, range, tweakFactors); |
62 | |
63 | for (int ch = 0; ch < TVectorSize; ch++) |
64 | { |
65 | MSInt16 channelEPs[2]; |
66 | for (int epi = 0; epi < 2; epi++) |
67 | { |
68 | MFloat f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[epi], -31743.0f, 31743.0f); |
69 | channelEPs[epi] = ParallelMath::RoundAndConvertToS16(f, roundingMode); |
70 | } |
71 | |
72 | outEP0[ch] = channelEPs[0]; |
73 | outEP1[ch] = channelEPs[1]; |
74 | } |
75 | } |
76 | |
77 | void FinishLDR(int tweak, int range, MUInt15* outEP0, MUInt15* outEP1) |
78 | { |
79 | ParallelMath::RoundTowardNearestForScope roundingMode; |
80 | |
81 | float tweakFactors[2]; |
82 | Util::ComputeTweakFactors(tweak, range, tweakFactors); |
83 | |
84 | for (int ch = 0; ch < TVectorSize; ch++) |
85 | { |
86 | MFloat ep0f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[0], 0.0f, 255.0f); |
87 | MFloat ep1f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[1], 0.0f, 255.0f); |
88 | outEP0[ch] = ParallelMath::RoundAndConvertToU15(ep0f, &roundingMode); |
89 | outEP1[ch] = ParallelMath::RoundAndConvertToU15(ep1f, &roundingMode); |
90 | } |
91 | } |
92 | |
93 | template<int TNewVectorSize> |
94 | UnfinishedEndpoints<TNewVectorSize> ExpandTo(float filler) |
95 | { |
96 | MFloat newBase[TNewVectorSize]; |
97 | MFloat newOffset[TNewVectorSize]; |
98 | |
99 | for (int ch = 0; ch < TNewVectorSize && ch < TVectorSize; ch++) |
100 | { |
101 | newBase[ch] = m_base[ch]; |
102 | newOffset[ch] = m_offset[ch]; |
103 | } |
104 | |
105 | MFloat fillerV = ParallelMath::MakeFloat(filler); |
106 | |
107 | for (int ch = TVectorSize; ch < TNewVectorSize; ch++) |
108 | { |
109 | newBase[ch] = fillerV; |
110 | newOffset[ch] = ParallelMath::MakeFloatZero(); |
111 | } |
112 | |
113 | return UnfinishedEndpoints<TNewVectorSize>(newBase, newOffset); |
114 | } |
115 | |
116 | private: |
117 | MFloat m_base[TVectorSize]; |
118 | MFloat m_offset[TVectorSize]; |
119 | }; |
120 | } |
121 | } |
122 | |