| 1 | #pragma once |
| 2 | #ifndef __CVTT_BCCOMMON_H__ |
| 3 | #define __CVTT_BCCOMMON_H__ |
| 4 | |
| 5 | #include "ConvectionKernels_AggregatedError.h" |
| 6 | #include "ConvectionKernels_ParallelMath.h" |
| 7 | |
| 8 | namespace cvtt |
| 9 | { |
| 10 | namespace Internal |
| 11 | { |
| 12 | class BCCommon |
| 13 | { |
| 14 | public: |
| 15 | typedef ParallelMath::Float MFloat; |
| 16 | typedef ParallelMath::UInt16 MUInt16; |
| 17 | typedef ParallelMath::UInt15 MUInt15; |
| 18 | typedef ParallelMath::AInt16 MAInt16; |
| 19 | typedef ParallelMath::SInt16 MSInt16; |
| 20 | typedef ParallelMath::SInt32 MSInt32; |
| 21 | |
| 22 | static int TweakRoundsForRange(int range); |
| 23 | |
| 24 | template<int TVectorSize> |
| 25 | static void ComputeErrorLDR(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], int numRealChannels, AggregatedError<TVectorSize> &aggError) |
| 26 | { |
| 27 | for (int ch = 0; ch < numRealChannels; ch++) |
| 28 | aggError.Add(ParallelMath::SqDiffUInt8(reconstructed[ch], original[ch]), ch); |
| 29 | } |
| 30 | |
| 31 | template<int TVectorSize> |
| 32 | static void ComputeErrorLDR(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], AggregatedError<TVectorSize> &aggError) |
| 33 | { |
| 34 | ComputeErrorLDR<TVectorSize>(flags, reconstructed, original, TVectorSize, aggError); |
| 35 | } |
| 36 | |
| 37 | template<int TVectorSize> |
| 38 | static MFloat ComputeErrorLDRSimple(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], int numRealChannels, const float *channelWeightsSq) |
| 39 | { |
| 40 | AggregatedError<TVectorSize> aggError; |
| 41 | ComputeErrorLDR<TVectorSize>(flags, reconstructed, original, numRealChannels, aggError); |
| 42 | return aggError.Finalize(flags, channelWeightsSq); |
| 43 | } |
| 44 | |
| 45 | template<int TVectorSize> |
| 46 | static MFloat ComputeErrorHDRFast(uint32_t flags, const MSInt16 reconstructed[TVectorSize], const MSInt16 original[TVectorSize], const float channelWeightsSq[TVectorSize]) |
| 47 | { |
| 48 | MFloat error = ParallelMath::MakeFloatZero(); |
| 49 | if (flags & Flags::Uniform) |
| 50 | { |
| 51 | for (int ch = 0; ch < TVectorSize; ch++) |
| 52 | error = error + ParallelMath::SqDiffSInt16(reconstructed[ch], original[ch]); |
| 53 | } |
| 54 | else |
| 55 | { |
| 56 | for (int ch = 0; ch < TVectorSize; ch++) |
| 57 | error = error + ParallelMath::SqDiffSInt16(reconstructed[ch], original[ch]) * ParallelMath::MakeFloat(channelWeightsSq[ch]); |
| 58 | } |
| 59 | |
| 60 | return error; |
| 61 | } |
| 62 | |
| 63 | template<int TVectorSize> |
| 64 | static MFloat ComputeErrorHDRSlow(uint32_t flags, const MSInt16 reconstructed[TVectorSize], const MSInt16 original[TVectorSize], const float channelWeightsSq[TVectorSize]) |
| 65 | { |
| 66 | MFloat error = ParallelMath::MakeFloatZero(); |
| 67 | if (flags & Flags::Uniform) |
| 68 | { |
| 69 | for (int ch = 0; ch < TVectorSize; ch++) |
| 70 | error = error + ParallelMath::SqDiff2CL(reconstructed[ch], original[ch]); |
| 71 | } |
| 72 | else |
| 73 | { |
| 74 | for (int ch = 0; ch < TVectorSize; ch++) |
| 75 | error = error + ParallelMath::SqDiff2CL(reconstructed[ch], original[ch]) * ParallelMath::MakeFloat(channelWeightsSq[ch]); |
| 76 | } |
| 77 | |
| 78 | return error; |
| 79 | } |
| 80 | |
| 81 | template<int TChannelCount> |
| 82 | static void PreWeightPixelsLDR(MFloat preWeightedPixels[16][TChannelCount], const MUInt15 pixels[16][TChannelCount], const float channelWeights[TChannelCount]) |
| 83 | { |
| 84 | for (int px = 0; px < 16; px++) |
| 85 | { |
| 86 | for (int ch = 0; ch < TChannelCount; ch++) |
| 87 | preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]) * channelWeights[ch]; |
| 88 | } |
| 89 | } |
| 90 | |
| 91 | template<int TChannelCount> |
| 92 | static void PreWeightPixelsHDR(MFloat preWeightedPixels[16][TChannelCount], const MSInt16 pixels[16][TChannelCount], const float channelWeights[TChannelCount]) |
| 93 | { |
| 94 | for (int px = 0; px < 16; px++) |
| 95 | { |
| 96 | for (int ch = 0; ch < TChannelCount; ch++) |
| 97 | preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]) * channelWeights[ch]; |
| 98 | } |
| 99 | } |
| 100 | }; |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | #endif |
| 105 | |