1 | #pragma once |
2 | #ifndef __CVTT_BCCOMMON_H__ |
3 | #define __CVTT_BCCOMMON_H__ |
4 | |
5 | #include "ConvectionKernels_AggregatedError.h" |
6 | #include "ConvectionKernels_ParallelMath.h" |
7 | |
8 | namespace cvtt |
9 | { |
10 | namespace Internal |
11 | { |
12 | class BCCommon |
13 | { |
14 | public: |
15 | typedef ParallelMath::Float MFloat; |
16 | typedef ParallelMath::UInt16 MUInt16; |
17 | typedef ParallelMath::UInt15 MUInt15; |
18 | typedef ParallelMath::AInt16 MAInt16; |
19 | typedef ParallelMath::SInt16 MSInt16; |
20 | typedef ParallelMath::SInt32 MSInt32; |
21 | |
22 | static int TweakRoundsForRange(int range); |
23 | |
24 | template<int TVectorSize> |
25 | static void ComputeErrorLDR(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], int numRealChannels, AggregatedError<TVectorSize> &aggError) |
26 | { |
27 | for (int ch = 0; ch < numRealChannels; ch++) |
28 | aggError.Add(ParallelMath::SqDiffUInt8(reconstructed[ch], original[ch]), ch); |
29 | } |
30 | |
31 | template<int TVectorSize> |
32 | static void ComputeErrorLDR(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], AggregatedError<TVectorSize> &aggError) |
33 | { |
34 | ComputeErrorLDR<TVectorSize>(flags, reconstructed, original, TVectorSize, aggError); |
35 | } |
36 | |
37 | template<int TVectorSize> |
38 | static MFloat ComputeErrorLDRSimple(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], int numRealChannels, const float *channelWeightsSq) |
39 | { |
40 | AggregatedError<TVectorSize> aggError; |
41 | ComputeErrorLDR<TVectorSize>(flags, reconstructed, original, numRealChannels, aggError); |
42 | return aggError.Finalize(flags, channelWeightsSq); |
43 | } |
44 | |
45 | template<int TVectorSize> |
46 | static MFloat ComputeErrorHDRFast(uint32_t flags, const MSInt16 reconstructed[TVectorSize], const MSInt16 original[TVectorSize], const float channelWeightsSq[TVectorSize]) |
47 | { |
48 | MFloat error = ParallelMath::MakeFloatZero(); |
49 | if (flags & Flags::Uniform) |
50 | { |
51 | for (int ch = 0; ch < TVectorSize; ch++) |
52 | error = error + ParallelMath::SqDiffSInt16(reconstructed[ch], original[ch]); |
53 | } |
54 | else |
55 | { |
56 | for (int ch = 0; ch < TVectorSize; ch++) |
57 | error = error + ParallelMath::SqDiffSInt16(reconstructed[ch], original[ch]) * ParallelMath::MakeFloat(channelWeightsSq[ch]); |
58 | } |
59 | |
60 | return error; |
61 | } |
62 | |
63 | template<int TVectorSize> |
64 | static MFloat ComputeErrorHDRSlow(uint32_t flags, const MSInt16 reconstructed[TVectorSize], const MSInt16 original[TVectorSize], const float channelWeightsSq[TVectorSize]) |
65 | { |
66 | MFloat error = ParallelMath::MakeFloatZero(); |
67 | if (flags & Flags::Uniform) |
68 | { |
69 | for (int ch = 0; ch < TVectorSize; ch++) |
70 | error = error + ParallelMath::SqDiff2CL(reconstructed[ch], original[ch]); |
71 | } |
72 | else |
73 | { |
74 | for (int ch = 0; ch < TVectorSize; ch++) |
75 | error = error + ParallelMath::SqDiff2CL(reconstructed[ch], original[ch]) * ParallelMath::MakeFloat(channelWeightsSq[ch]); |
76 | } |
77 | |
78 | return error; |
79 | } |
80 | |
81 | template<int TChannelCount> |
82 | static void PreWeightPixelsLDR(MFloat preWeightedPixels[16][TChannelCount], const MUInt15 pixels[16][TChannelCount], const float channelWeights[TChannelCount]) |
83 | { |
84 | for (int px = 0; px < 16; px++) |
85 | { |
86 | for (int ch = 0; ch < TChannelCount; ch++) |
87 | preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]) * channelWeights[ch]; |
88 | } |
89 | } |
90 | |
91 | template<int TChannelCount> |
92 | static void PreWeightPixelsHDR(MFloat preWeightedPixels[16][TChannelCount], const MSInt16 pixels[16][TChannelCount], const float channelWeights[TChannelCount]) |
93 | { |
94 | for (int px = 0; px < 16; px++) |
95 | { |
96 | for (int ch = 0; ch < TChannelCount; ch++) |
97 | preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]) * channelWeights[ch]; |
98 | } |
99 | } |
100 | }; |
101 | } |
102 | } |
103 | |
104 | #endif |
105 | |