ConvectionKernels.h source code [Godot/thirdparty/cvtt/ConvectionKernels.h]

1	/*
2	Convection Texture Tools
3	Copyright (c) 2018 Eric Lasota
4
5	Permission is hereby granted, free of charge, to any person obtaining
6	a copy of this software and associated documentation files (the
7	"Software"), to deal in the Software without restriction, including
8	without limitation the rights to use, copy, modify, merge, publish,
9	distribute, sublicense, and/or sell copies of the Software, and to
10	permit persons to whom the Software is furnished to do so, subject
11	to the following conditions:
12
13	The above copyright notice and this permission notice shall be included
14	in all copies or substantial portions of the Software.
15
16	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23	*/
24	#pragma once
25	#ifndef __CVTT_CONVECTION_KERNELS__
26	#define __CVTT_CONVECTION_KERNELS__
27
28	#include <stddef.h>
29	#include <stdint.h>
30
31	namespace cvtt
32	{
33	namespace Flags
34	{
35	// Use fast indexing in BC7 encoding (about 2x faster, slightly worse quality)
36	const uint32_t BC7_FastIndexing = `0x008`;
37
38	// Try precomputed single-color lookups where applicable (slightly slower, small quality increase on specific blocks)
39	const uint32_t BC7_TrySingleColor = `0x010`;
40
41	// Don't allow non-zero or non-max alpha values in blocks that only contain one or the other
42	const uint32_t BC7_RespectPunchThrough = `0x020`;
43
44	// Use fast indexing in HDR formats (faster, worse quality)
45	const uint32_t BC6H_FastIndexing = `0x040`;
46
47	// Exhaustive search RGB orderings when encoding BC1-BC3 (much slower, better quality)
48	const uint32_t S3TC_Exhaustive = `0x080`;
49
50	// Penalize distant endpoints, improving quality on inaccurate GPU decoders
51	const uint32_t S3TC_Paranoid = `0x100`;
52
53	// Uniform color channel importance
54	const uint32_t Uniform = `0x200`;
55
56	// Use fake BT.709 color space for etc2comp compatibility (slower)
57	const uint32_t ETC_UseFakeBT709 = `0x400`;
58
59	// Use accurate quantization functions when quantizing fake BT.709 (much slower, marginal improvement on specific blocks)
60	const uint32_t ETC_FakeBT709Accurate = `0x800`;
61
62	// Misc useful default flag combinations
63	const uint32_t Fastest = (BC6H_FastIndexing \| BC7_FastIndexing \| S3TC_Paranoid);
64	const uint32_t Faster = (BC6H_FastIndexing \| BC7_FastIndexing \| S3TC_Paranoid);
65	const uint32_t Fast = (BC7_FastIndexing \| S3TC_Paranoid);
66	const uint32_t Default = (BC7_FastIndexing \| S3TC_Paranoid);
67	const uint32_t Better = (S3TC_Paranoid \| S3TC_Exhaustive);
68	const uint32_t Ultra = (BC7_TrySingleColor \| S3TC_Paranoid \| S3TC_Exhaustive \| ETC_FakeBT709Accurate);
69	}
70
71	const unsigned int NumParallelBlocks = `8`;
72
73	struct Options
74	{
75	uint32_t flags; // Bitmask of cvtt::Flags values
76	float threshold; // Alpha test threshold for BC1
77	float redWeight; // Red channel importance
78	float greenWeight; // Green channel importance
79	float blueWeight; // Blue channel importance
80	float alphaWeight; // Alpha channel importance
81
82	int refineRoundsBC7; // Number of refine rounds for BC7
83	int refineRoundsBC6H; // Number of refine rounds for BC6H (max 3)
84	int refineRoundsIIC; // Number of refine rounds for independent interpolated channels (BC3 alpha, BC4, BC5)
85	int refineRoundsS3TC; // Number of refine rounds for S3TC RGB
86
87	int seedPoints; // Number of seed points (min 1, max 4)
88
89	Options()
90	: flags(Flags::Default)
91	, threshold(`0.5f`)
92	, redWeight(`0.2125f` / `0.7154f`)
93	, greenWeight(`1.0f`)
94	, blueWeight(`0.0721f` / `0.7154f`)
95	, alphaWeight(`1.0f`)
96	, refineRoundsBC7(`2`)
97	, refineRoundsBC6H(`3`)
98	, refineRoundsIIC(`8`)
99	, refineRoundsS3TC(`2`)
100	, seedPoints(`4`)
101	{
102	}
103	};
104
105	struct BC7FineTuningParams
106	{
107	// Seed point counts for each mode+configuration combination
108	uint8_t mode0SP[`16`];
109	uint8_t mode1SP[`64`];
110	uint8_t mode2SP[`64`];
111	uint8_t mode3SP[`64`];
112	uint8_t mode4SP[`4`][`2`];
113	uint8_t mode5SP[`4`];
114	uint8_t mode6SP;
115	uint8_t mode7SP[`64`];
116
117	BC7FineTuningParams()
118	{
119	for (int i = `0`; i < `16`; i++)
120	this->mode0SP[i] = `4`;
121
122	for (int i = `0`; i < `64`; i++)
123	{
124	this->mode1SP[i] = `4`;
125	this->mode2SP[i] = `4`;
126	this->mode3SP[i] = `4`;
127	this->mode7SP[i] = `4`;
128	}
129
130	for (int i = `0`; i < `4`; i++)
131	{
132	for (int j = `0`; j < `2`; j++)
133	this->mode4SP[i][j] = `4`;
134
135	this->mode5SP[i] = `4`;
136	}
137
138	this->mode6SP = `4`;
139	}
140	};
141
142	struct BC7EncodingPlan
143	{
144	static const int kNumRGBAShapes = `129`;
145	static const int kNumRGBShapes = `243`;
146
147	uint64_t mode1PartitionEnabled;
148	uint64_t mode2PartitionEnabled;
149	uint64_t mode3PartitionEnabled;
150	uint16_t mode0PartitionEnabled;
151	uint64_t mode7RGBAPartitionEnabled;
152	uint64_t mode7RGBPartitionEnabled;
153	uint8_t mode4SP[`4`][`2`];
154	uint8_t mode5SP[`4`];
155	bool mode6Enabled;
156
157	uint8_t seedPointsForShapeRGB[kNumRGBShapes];
158	uint8_t seedPointsForShapeRGBA[kNumRGBAShapes];
159
160	uint8_t rgbaShapeList[kNumRGBAShapes];
161	uint8_t rgbaNumShapesToEvaluate;
162
163	uint8_t rgbShapeList[kNumRGBShapes];
164	uint8_t rgbNumShapesToEvaluate;
165
166	BC7EncodingPlan()
167	{
168	for (int i = `0`; i < kNumRGBShapes; i++)
169	{
170	this->rgbShapeList[i] = i;
171	this->seedPointsForShapeRGB[i] = `4`;
172	}
173	this->rgbNumShapesToEvaluate = kNumRGBShapes;
174
175	for (int i = `0`; i < kNumRGBAShapes; i++)
176	{
177	this->rgbaShapeList[i] = i;
178	this->seedPointsForShapeRGBA[i] = `4`;
179	}
180	this->rgbaNumShapesToEvaluate = kNumRGBAShapes;
181
182
183	this->mode0PartitionEnabled = `0xffff`;
184	this->mode1PartitionEnabled = `0xffffffffffffffffULL`;
185	this->mode2PartitionEnabled = `0xffffffffffffffffULL`;
186	this->mode3PartitionEnabled = `0xffffffffffffffffULL`;
187	this->mode6Enabled = true;
188	this->mode7RGBPartitionEnabled = `0xffffffffffffffffULL`;
189	this->mode7RGBAPartitionEnabled = `0xffffffffffffffffULL`;
190
191	for (int i = `0`; i < `4`; i++)
192	{
193	for (int j = `0`; j < `2`; j++)
194	this->mode4SP[i][j] = `4`;
195
196	this->mode5SP[i] = `4`;
197	}
198	}
199	};
200
201	// RGBA input block for unsigned 8-bit formats
202	struct PixelBlockU8
203	{
204	uint8_t m_pixels[`16`][`4`];
205	};
206
207	// RGBA input block for signed 8-bit formats
208	struct PixelBlockS8
209	{
210	int8_t m_pixels[`16`][`4`];
211	};
212
213	struct PixelBlockScalarS16
214	{
215	int16_t m_pixels[`16`];
216	};
217
218	// RGBA input block for half-precision float formats (bit-cast to int16_t)
219	struct PixelBlockF16
220	{
221	int16_t m_pixels[`16`][`4`];
222	};
223
224	class ETC2CompressionData
225	{
226	protected:
227	ETC2CompressionData() {}
228	};
229
230	class ETC1CompressionData
231	{
232	protected:
233	ETC1CompressionData() {}
234	};
235
236	namespace Kernels
237	{
238	typedef void* allocFunc_t(void *context, size_t size);
239	typedef void freeFunc_t(void context, void** ptr, size_t size);
240
241	// NOTE: All functions accept and output NumParallelBlocks blocks at once
242	void EncodeBC1(uint8_t pBC, const* PixelBlockU8 pBlocks, const* Options &options);
243	void EncodeBC2(uint8_t pBC, const* PixelBlockU8 pBlocks, const* Options &options);
244	void EncodeBC3(uint8_t pBC, const* PixelBlockU8 pBlocks, const* Options &options);
245	void EncodeBC4U(uint8_t pBC, const* PixelBlockU8 pBlocks, const* Options &options);
246	void EncodeBC4S(uint8_t pBC, const* PixelBlockS8 pBlocks, const* Options &options);
247	void EncodeBC5U(uint8_t pBC, const* PixelBlockU8 pBlocks, const* Options &options);
248	void EncodeBC5S(uint8_t pBC, const* PixelBlockS8 pBlocks, const* Options &options);
249	void EncodeBC6HU(uint8_t pBC, const* PixelBlockF16 pBlocks, const* Options &options);
250	void EncodeBC6HS(uint8_t pBC, const* PixelBlockF16 pBlocks, const* Options &options);
251	void EncodeBC7(uint8_t pBC, const* PixelBlockU8 pBlocks, const* Options &options, const BC7EncodingPlan &encodingPlan);
252	void EncodeETC1(uint8_t pBC, const* PixelBlockU8 pBlocks, const* Options &options, ETC1CompressionData *compressionData);
253	void EncodeETC2(uint8_t pBC, const* PixelBlockU8 pBlocks, const* Options &options, ETC2CompressionData *compressionData);
254	void EncodeETC2RGBA(uint8_t pBC, const* PixelBlockU8 pBlocks, const* cvtt::Options &options, cvtt::ETC2CompressionData *compressionData);
255	void EncodeETC2PunchthroughAlpha(uint8_t pBC, const* PixelBlockU8 pBlocks, const* cvtt::Options &options, cvtt::ETC2CompressionData *compressionData);
256
257	void EncodeETC2Alpha(uint8_t pBC, const* PixelBlockU8 pBlocks, const* cvtt::Options &options);
258	void EncodeETC2Alpha11(uint8_t pBC, const* PixelBlockScalarS16 pBlocks, bool* isSigned, const cvtt::Options &options);
259
260	// Generates a BC7 encoding plan from a quality parameter that ranges from 1 (fastest) to 100 (best)
261	void ConfigureBC7EncodingPlanFromQuality(BC7EncodingPlan &encodingPlan, int quality);
262
263	// Generates a BC7 encoding plan from fine-tuning parameters.
264	bool ConfigureBC7EncodingPlanFromFineTuningParams(BC7EncodingPlan &encodingPlan, const BC7FineTuningParams &params);
265
266	// ETC compression requires temporary storage that normally consumes a large amount of stack space.
267	// To allocate and release it, use one of these functions.
268	ETC2CompressionData AllocETC2Data(allocFunc_t allocFunc, void* context, const* cvtt::Options &options);
269	void ReleaseETC2Data(ETC2CompressionData *compressionData, freeFunc_t freeFunc);
270
271	ETC1CompressionData AllocETC1Data(allocFunc_t allocFunc, void* *context);
272	void ReleaseETC1Data(ETC1CompressionData *compressionData, freeFunc_t freeFunc);
273
274	void DecodeBC6HU(PixelBlockF16 pBlocks, const* uint8_t *pBC);
275	void DecodeBC6HS(PixelBlockF16 pBlocks, const* uint8_t *pBC);
276	void DecodeBC7(PixelBlockU8 pBlocks, const* uint8_t *pBC);
277	}
278	}
279
280	#endif
281

Browse the source code of Godot/thirdparty/cvtt/ConvectionKernels.h