1/*
2Convection Texture Tools
3Copyright (c) 2018 Eric Lasota
4
5Permission is hereby granted, free of charge, to any person obtaining
6a copy of this software and associated documentation files (the
7"Software"), to deal in the Software without restriction, including
8without limitation the rights to use, copy, modify, merge, publish,
9distribute, sublicense, and/or sell copies of the Software, and to
10permit persons to whom the Software is furnished to do so, subject
11to the following conditions:
12
13The above copyright notice and this permission notice shall be included
14in all copies or substantial portions of the Software.
15
16THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23*/
24#pragma once
25#ifndef __CVTT_CONVECTION_KERNELS__
26#define __CVTT_CONVECTION_KERNELS__
27
28#include <stddef.h>
29#include <stdint.h>
30
31namespace cvtt
32{
33 namespace Flags
34 {
35 // Use fast indexing in BC7 encoding (about 2x faster, slightly worse quality)
36 const uint32_t BC7_FastIndexing = 0x008;
37
38 // Try precomputed single-color lookups where applicable (slightly slower, small quality increase on specific blocks)
39 const uint32_t BC7_TrySingleColor = 0x010;
40
41 // Don't allow non-zero or non-max alpha values in blocks that only contain one or the other
42 const uint32_t BC7_RespectPunchThrough = 0x020;
43
44 // Use fast indexing in HDR formats (faster, worse quality)
45 const uint32_t BC6H_FastIndexing = 0x040;
46
47 // Exhaustive search RGB orderings when encoding BC1-BC3 (much slower, better quality)
48 const uint32_t S3TC_Exhaustive = 0x080;
49
50 // Penalize distant endpoints, improving quality on inaccurate GPU decoders
51 const uint32_t S3TC_Paranoid = 0x100;
52
53 // Uniform color channel importance
54 const uint32_t Uniform = 0x200;
55
56 // Use fake BT.709 color space for etc2comp compatibility (slower)
57 const uint32_t ETC_UseFakeBT709 = 0x400;
58
59 // Use accurate quantization functions when quantizing fake BT.709 (much slower, marginal improvement on specific blocks)
60 const uint32_t ETC_FakeBT709Accurate = 0x800;
61
62 // Misc useful default flag combinations
63 const uint32_t Fastest = (BC6H_FastIndexing | BC7_FastIndexing | S3TC_Paranoid);
64 const uint32_t Faster = (BC6H_FastIndexing | BC7_FastIndexing | S3TC_Paranoid);
65 const uint32_t Fast = (BC7_FastIndexing | S3TC_Paranoid);
66 const uint32_t Default = (BC7_FastIndexing | S3TC_Paranoid);
67 const uint32_t Better = (S3TC_Paranoid | S3TC_Exhaustive);
68 const uint32_t Ultra = (BC7_TrySingleColor | S3TC_Paranoid | S3TC_Exhaustive | ETC_FakeBT709Accurate);
69 }
70
71 const unsigned int NumParallelBlocks = 8;
72
73 struct Options
74 {
75 uint32_t flags; // Bitmask of cvtt::Flags values
76 float threshold; // Alpha test threshold for BC1
77 float redWeight; // Red channel importance
78 float greenWeight; // Green channel importance
79 float blueWeight; // Blue channel importance
80 float alphaWeight; // Alpha channel importance
81
82 int refineRoundsBC7; // Number of refine rounds for BC7
83 int refineRoundsBC6H; // Number of refine rounds for BC6H (max 3)
84 int refineRoundsIIC; // Number of refine rounds for independent interpolated channels (BC3 alpha, BC4, BC5)
85 int refineRoundsS3TC; // Number of refine rounds for S3TC RGB
86
87 int seedPoints; // Number of seed points (min 1, max 4)
88
89 Options()
90 : flags(Flags::Default)
91 , threshold(0.5f)
92 , redWeight(0.2125f / 0.7154f)
93 , greenWeight(1.0f)
94 , blueWeight(0.0721f / 0.7154f)
95 , alphaWeight(1.0f)
96 , refineRoundsBC7(2)
97 , refineRoundsBC6H(3)
98 , refineRoundsIIC(8)
99 , refineRoundsS3TC(2)
100 , seedPoints(4)
101 {
102 }
103 };
104
105 struct BC7FineTuningParams
106 {
107 // Seed point counts for each mode+configuration combination
108 uint8_t mode0SP[16];
109 uint8_t mode1SP[64];
110 uint8_t mode2SP[64];
111 uint8_t mode3SP[64];
112 uint8_t mode4SP[4][2];
113 uint8_t mode5SP[4];
114 uint8_t mode6SP;
115 uint8_t mode7SP[64];
116
117 BC7FineTuningParams()
118 {
119 for (int i = 0; i < 16; i++)
120 this->mode0SP[i] = 4;
121
122 for (int i = 0; i < 64; i++)
123 {
124 this->mode1SP[i] = 4;
125 this->mode2SP[i] = 4;
126 this->mode3SP[i] = 4;
127 this->mode7SP[i] = 4;
128 }
129
130 for (int i = 0; i < 4; i++)
131 {
132 for (int j = 0; j < 2; j++)
133 this->mode4SP[i][j] = 4;
134
135 this->mode5SP[i] = 4;
136 }
137
138 this->mode6SP = 4;
139 }
140 };
141
142 struct BC7EncodingPlan
143 {
144 static const int kNumRGBAShapes = 129;
145 static const int kNumRGBShapes = 243;
146
147 uint64_t mode1PartitionEnabled;
148 uint64_t mode2PartitionEnabled;
149 uint64_t mode3PartitionEnabled;
150 uint16_t mode0PartitionEnabled;
151 uint64_t mode7RGBAPartitionEnabled;
152 uint64_t mode7RGBPartitionEnabled;
153 uint8_t mode4SP[4][2];
154 uint8_t mode5SP[4];
155 bool mode6Enabled;
156
157 uint8_t seedPointsForShapeRGB[kNumRGBShapes];
158 uint8_t seedPointsForShapeRGBA[kNumRGBAShapes];
159
160 uint8_t rgbaShapeList[kNumRGBAShapes];
161 uint8_t rgbaNumShapesToEvaluate;
162
163 uint8_t rgbShapeList[kNumRGBShapes];
164 uint8_t rgbNumShapesToEvaluate;
165
166 BC7EncodingPlan()
167 {
168 for (int i = 0; i < kNumRGBShapes; i++)
169 {
170 this->rgbShapeList[i] = i;
171 this->seedPointsForShapeRGB[i] = 4;
172 }
173 this->rgbNumShapesToEvaluate = kNumRGBShapes;
174
175 for (int i = 0; i < kNumRGBAShapes; i++)
176 {
177 this->rgbaShapeList[i] = i;
178 this->seedPointsForShapeRGBA[i] = 4;
179 }
180 this->rgbaNumShapesToEvaluate = kNumRGBAShapes;
181
182
183 this->mode0PartitionEnabled = 0xffff;
184 this->mode1PartitionEnabled = 0xffffffffffffffffULL;
185 this->mode2PartitionEnabled = 0xffffffffffffffffULL;
186 this->mode3PartitionEnabled = 0xffffffffffffffffULL;
187 this->mode6Enabled = true;
188 this->mode7RGBPartitionEnabled = 0xffffffffffffffffULL;
189 this->mode7RGBAPartitionEnabled = 0xffffffffffffffffULL;
190
191 for (int i = 0; i < 4; i++)
192 {
193 for (int j = 0; j < 2; j++)
194 this->mode4SP[i][j] = 4;
195
196 this->mode5SP[i] = 4;
197 }
198 }
199 };
200
201 // RGBA input block for unsigned 8-bit formats
202 struct PixelBlockU8
203 {
204 uint8_t m_pixels[16][4];
205 };
206
207 // RGBA input block for signed 8-bit formats
208 struct PixelBlockS8
209 {
210 int8_t m_pixels[16][4];
211 };
212
213 struct PixelBlockScalarS16
214 {
215 int16_t m_pixels[16];
216 };
217
218 // RGBA input block for half-precision float formats (bit-cast to int16_t)
219 struct PixelBlockF16
220 {
221 int16_t m_pixels[16][4];
222 };
223
224 class ETC2CompressionData
225 {
226 protected:
227 ETC2CompressionData() {}
228 };
229
230 class ETC1CompressionData
231 {
232 protected:
233 ETC1CompressionData() {}
234 };
235
236 namespace Kernels
237 {
238 typedef void* allocFunc_t(void *context, size_t size);
239 typedef void freeFunc_t(void *context, void* ptr, size_t size);
240
241 // NOTE: All functions accept and output NumParallelBlocks blocks at once
242 void EncodeBC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);
243 void EncodeBC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);
244 void EncodeBC3(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);
245 void EncodeBC4U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);
246 void EncodeBC4S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options);
247 void EncodeBC5U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);
248 void EncodeBC5S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options);
249 void EncodeBC6HU(uint8_t *pBC, const PixelBlockF16 *pBlocks, const Options &options);
250 void EncodeBC6HS(uint8_t *pBC, const PixelBlockF16 *pBlocks, const Options &options);
251 void EncodeBC7(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, const BC7EncodingPlan &encodingPlan);
252 void EncodeETC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, ETC1CompressionData *compressionData);
253 void EncodeETC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, ETC2CompressionData *compressionData);
254 void EncodeETC2RGBA(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData);
255 void EncodeETC2PunchthroughAlpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData);
256
257 void EncodeETC2Alpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options);
258 void EncodeETC2Alpha11(uint8_t *pBC, const PixelBlockScalarS16 *pBlocks, bool isSigned, const cvtt::Options &options);
259
260 // Generates a BC7 encoding plan from a quality parameter that ranges from 1 (fastest) to 100 (best)
261 void ConfigureBC7EncodingPlanFromQuality(BC7EncodingPlan &encodingPlan, int quality);
262
263 // Generates a BC7 encoding plan from fine-tuning parameters.
264 bool ConfigureBC7EncodingPlanFromFineTuningParams(BC7EncodingPlan &encodingPlan, const BC7FineTuningParams &params);
265
266 // ETC compression requires temporary storage that normally consumes a large amount of stack space.
267 // To allocate and release it, use one of these functions.
268 ETC2CompressionData *AllocETC2Data(allocFunc_t allocFunc, void *context, const cvtt::Options &options);
269 void ReleaseETC2Data(ETC2CompressionData *compressionData, freeFunc_t freeFunc);
270
271 ETC1CompressionData *AllocETC1Data(allocFunc_t allocFunc, void *context);
272 void ReleaseETC1Data(ETC1CompressionData *compressionData, freeFunc_t freeFunc);
273
274 void DecodeBC6HU(PixelBlockF16 *pBlocks, const uint8_t *pBC);
275 void DecodeBC6HS(PixelBlockF16 *pBlocks, const uint8_t *pBC);
276 void DecodeBC7(PixelBlockU8 *pBlocks, const uint8_t *pBC);
277 }
278}
279
280#endif
281