1 | /* |
2 | Convection Texture Tools |
3 | Copyright (c) 2018-2019 Eric Lasota |
4 | |
5 | Permission is hereby granted, free of charge, to any person obtaining |
6 | a copy of this software and associated documentation files (the |
7 | "Software"), to deal in the Software without restriction, including |
8 | without limitation the rights to use, copy, modify, merge, publish, |
9 | distribute, sublicense, and/or sell copies of the Software, and to |
10 | permit persons to whom the Software is furnished to do so, subject |
11 | to the following conditions: |
12 | |
13 | The above copyright notice and this permission notice shall be included |
14 | in all copies or substantial portions of the Software. |
15 | |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
17 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
23 | */ |
24 | #include "ConvectionKernels_Config.h" |
25 | |
26 | #if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL) |
27 | |
28 | #include <stdint.h> |
29 | #include "ConvectionKernels.h" |
30 | #include "ConvectionKernels_Util.h" |
31 | #include "ConvectionKernels_BC67.h" |
32 | #include "ConvectionKernels_ETC.h" |
33 | #include "ConvectionKernels_S3TC.h" |
34 | |
35 | #include <assert.h> |
36 | |
37 | namespace cvtt |
38 | { |
39 | namespace Kernels |
40 | { |
41 | void EncodeBC7(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, const BC7EncodingPlan &encodingPlan) |
42 | { |
43 | assert(pBlocks); |
44 | assert(pBC); |
45 | |
46 | float channelWeights[4]; |
47 | Util::FillWeights(options, channelWeights); |
48 | |
49 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) |
50 | { |
51 | Internal::BC7Computer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, encodingPlan, options.refineRoundsBC7); |
52 | pBC += ParallelMath::ParallelSize * 16; |
53 | } |
54 | } |
55 | |
56 | void EncodeBC6HU(uint8_t *pBC, const PixelBlockF16 *pBlocks, const cvtt::Options &options) |
57 | { |
58 | assert(pBlocks); |
59 | assert(pBC); |
60 | |
61 | float channelWeights[4]; |
62 | Util::FillWeights(options, channelWeights); |
63 | |
64 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) |
65 | { |
66 | Internal::BC6HComputer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, false, options.seedPoints, options.refineRoundsBC6H); |
67 | pBC += ParallelMath::ParallelSize * 16; |
68 | } |
69 | } |
70 | |
71 | void EncodeBC6HS(uint8_t *pBC, const PixelBlockF16 *pBlocks, const cvtt::Options &options) |
72 | { |
73 | assert(pBlocks); |
74 | assert(pBC); |
75 | |
76 | float channelWeights[4]; |
77 | Util::FillWeights(options, channelWeights); |
78 | |
79 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) |
80 | { |
81 | Internal::BC6HComputer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, true, options.seedPoints, options.refineRoundsBC6H); |
82 | pBC += ParallelMath::ParallelSize * 16; |
83 | } |
84 | } |
85 | |
86 | void EncodeBC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options) |
87 | { |
88 | assert(pBlocks); |
89 | assert(pBC); |
90 | |
91 | float channelWeights[4]; |
92 | Util::FillWeights(options, channelWeights); |
93 | |
94 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) |
95 | { |
96 | Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC, 8, channelWeights, true, options.threshold, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC); |
97 | pBC += ParallelMath::ParallelSize * 8; |
98 | } |
99 | } |
100 | |
101 | void EncodeBC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options) |
102 | { |
103 | assert(pBlocks); |
104 | assert(pBC); |
105 | |
106 | float channelWeights[4]; |
107 | Util::FillWeights(options, channelWeights); |
108 | |
109 | for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) |
110 | { |
111 | Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC + 8, 16, channelWeights, false, 1.0f, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC); |
112 | Internal::S3TCComputer::PackExplicitAlpha(options.flags, pBlocks + blockBase, 3, pBC, 16); |
113 | pBC += ParallelMath::ParallelSize * 16; |
114 | } |
115 | } |
116 | |
117 | void EncodeBC3(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options) |
118 | { |
119 | assert(pBlocks); |
120 | assert(pBC); |
121 | |
122 | float channelWeights[4]; |
123 | Util::FillWeights(options, channelWeights); |
124 | |
125 | for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) |
126 | { |
127 | Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC + 8, 16, channelWeights, false, 1.0f, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC); |
128 | Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 3, pBC, 16, false, options.seedPoints, options.refineRoundsIIC); |
129 | pBC += ParallelMath::ParallelSize * 16; |
130 | } |
131 | } |
132 | |
133 | void EncodeBC4U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options) |
134 | { |
135 | assert(pBlocks); |
136 | assert(pBC); |
137 | |
138 | float channelWeights[4]; |
139 | Util::FillWeights(options, channelWeights); |
140 | |
141 | for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) |
142 | { |
143 | Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 0, pBC, 8, false, options.seedPoints, options.refineRoundsIIC); |
144 | pBC += ParallelMath::ParallelSize * 8; |
145 | } |
146 | } |
147 | |
148 | void EncodeBC4S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options) |
149 | { |
150 | assert(pBlocks); |
151 | assert(pBC); |
152 | |
153 | float channelWeights[4]; |
154 | Util::FillWeights(options, channelWeights); |
155 | |
156 | for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) |
157 | { |
158 | PixelBlockU8 inputBlocks[ParallelMath::ParallelSize]; |
159 | Util::BiasSignedInput(inputBlocks, pBlocks + blockBase); |
160 | |
161 | Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 0, pBC, 8, true, options.seedPoints, options.refineRoundsIIC); |
162 | pBC += ParallelMath::ParallelSize * 8; |
163 | } |
164 | } |
165 | |
166 | void EncodeBC5U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options) |
167 | { |
168 | assert(pBlocks); |
169 | assert(pBC); |
170 | |
171 | float channelWeights[4]; |
172 | Util::FillWeights(options, channelWeights); |
173 | |
174 | for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) |
175 | { |
176 | Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 0, pBC, 16, false, options.seedPoints, options.refineRoundsIIC); |
177 | Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 1, pBC + 8, 16, false, options.seedPoints, options.refineRoundsIIC); |
178 | pBC += ParallelMath::ParallelSize * 16; |
179 | } |
180 | } |
181 | |
182 | void EncodeBC5S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options) |
183 | { |
184 | assert(pBlocks); |
185 | assert(pBC); |
186 | |
187 | float channelWeights[4]; |
188 | Util::FillWeights(options, channelWeights); |
189 | |
190 | for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) |
191 | { |
192 | PixelBlockU8 inputBlocks[ParallelMath::ParallelSize]; |
193 | Util::BiasSignedInput(inputBlocks, pBlocks + blockBase); |
194 | |
195 | Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 0, pBC, 16, true, options.seedPoints, options.refineRoundsIIC); |
196 | Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 1, pBC + 8, 16, true, options.seedPoints, options.refineRoundsIIC); |
197 | pBC += ParallelMath::ParallelSize * 16; |
198 | } |
199 | } |
200 | |
201 | void EncodeETC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC1CompressionData *compressionData) |
202 | { |
203 | assert(pBlocks); |
204 | assert(pBC); |
205 | |
206 | float channelWeights[4]; |
207 | Util::FillWeights(options, channelWeights); |
208 | |
209 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) |
210 | { |
211 | Internal::ETCComputer::CompressETC1Block(pBC, pBlocks + blockBase, compressionData, options); |
212 | pBC += ParallelMath::ParallelSize * 8; |
213 | } |
214 | } |
215 | |
216 | void EncodeETC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData) |
217 | { |
218 | assert(pBlocks); |
219 | assert(pBC); |
220 | |
221 | float channelWeights[4]; |
222 | Util::FillWeights(options, channelWeights); |
223 | |
224 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) |
225 | { |
226 | Internal::ETCComputer::CompressETC2Block(pBC, pBlocks + blockBase, compressionData, options, false); |
227 | pBC += ParallelMath::ParallelSize * 8; |
228 | } |
229 | } |
230 | |
231 | void EncodeETC2PunchthroughAlpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData) |
232 | { |
233 | assert(pBlocks); |
234 | assert(pBC); |
235 | |
236 | float channelWeights[4]; |
237 | Util::FillWeights(options, channelWeights); |
238 | |
239 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) |
240 | { |
241 | Internal::ETCComputer::CompressETC2Block(pBC, pBlocks + blockBase, compressionData, options, true); |
242 | pBC += ParallelMath::ParallelSize * 8; |
243 | } |
244 | } |
245 | |
246 | void EncodeETC2Alpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options) |
247 | { |
248 | assert(pBlocks); |
249 | assert(pBC); |
250 | |
251 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) |
252 | { |
253 | Internal::ETCComputer::CompressETC2AlphaBlock(pBC, pBlocks + blockBase, options); |
254 | pBC += ParallelMath::ParallelSize * 8; |
255 | } |
256 | } |
257 | |
258 | void EncodeETC2Alpha11(uint8_t *pBC, const PixelBlockScalarS16 *pBlocks, bool isSigned, const cvtt::Options &options) |
259 | { |
260 | assert(pBlocks); |
261 | assert(pBC); |
262 | |
263 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) |
264 | { |
265 | Internal::ETCComputer::CompressEACBlock(pBC, pBlocks + blockBase, isSigned, options); |
266 | pBC += ParallelMath::ParallelSize * 8; |
267 | } |
268 | } |
269 | |
270 | void EncodeETC2RGBA(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData) |
271 | { |
272 | uint8_t alphaBlockData[cvtt::NumParallelBlocks * 8]; |
273 | uint8_t colorBlockData[cvtt::NumParallelBlocks * 8]; |
274 | |
275 | EncodeETC2(colorBlockData, pBlocks, options, compressionData); |
276 | EncodeETC2Alpha(alphaBlockData, pBlocks, options); |
277 | |
278 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++) |
279 | { |
280 | for (size_t blockData = 0; blockData < 8; blockData++) |
281 | pBC[blockBase * 16 + blockData] = alphaBlockData[blockBase * 8 + blockData]; |
282 | |
283 | for (size_t blockData = 0; blockData < 8; blockData++) |
284 | pBC[blockBase * 16 + 8 + blockData] = colorBlockData[blockBase * 8 + blockData]; |
285 | } |
286 | } |
287 | |
288 | void DecodeBC7(PixelBlockU8 *pBlocks, const uint8_t *pBC) |
289 | { |
290 | assert(pBlocks); |
291 | assert(pBC); |
292 | |
293 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++) |
294 | { |
295 | Internal::BC7Computer::UnpackOne(pBlocks[blockBase], pBC); |
296 | pBC += 16; |
297 | } |
298 | } |
299 | |
300 | void DecodeBC6HU(PixelBlockF16 *pBlocks, const uint8_t *pBC) |
301 | { |
302 | assert(pBlocks); |
303 | assert(pBC); |
304 | |
305 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++) |
306 | { |
307 | Internal::BC6HComputer::UnpackOne(pBlocks[blockBase], pBC, false); |
308 | pBC += 16; |
309 | } |
310 | } |
311 | |
312 | void DecodeBC6HS(PixelBlockF16 *pBlocks, const uint8_t *pBC) |
313 | { |
314 | assert(pBlocks); |
315 | assert(pBC); |
316 | |
317 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++) |
318 | { |
319 | Internal::BC6HComputer::UnpackOne(pBlocks[blockBase], pBC, true); |
320 | pBC += 16; |
321 | } |
322 | } |
323 | |
324 | ETC1CompressionData *AllocETC1Data(allocFunc_t allocFunc, void *context) |
325 | { |
326 | return cvtt::Internal::ETCComputer::AllocETC1Data(allocFunc, context); |
327 | } |
328 | |
329 | void ReleaseETC1Data(ETC1CompressionData *compressionData, freeFunc_t freeFunc) |
330 | { |
331 | cvtt::Internal::ETCComputer::ReleaseETC1Data(compressionData, freeFunc); |
332 | } |
333 | |
334 | ETC2CompressionData *AllocETC2Data(allocFunc_t allocFunc, void *context, const cvtt::Options &options) |
335 | { |
336 | return cvtt::Internal::ETCComputer::AllocETC2Data(allocFunc, context, options); |
337 | } |
338 | |
339 | void ReleaseETC2Data(ETC2CompressionData *compressionData, freeFunc_t freeFunc) |
340 | { |
341 | cvtt::Internal::ETCComputer::ReleaseETC2Data(compressionData, freeFunc); |
342 | } |
343 | } |
344 | } |
345 | |
346 | #endif |
347 | |