1 | /* |
2 | Convection Texture Tools |
3 | Copyright (c) 2018-2019 Eric Lasota |
4 | |
5 | Permission is hereby granted, free of charge, to any person obtaining |
6 | a copy of this software and associated documentation files (the |
7 | "Software"), to deal in the Software without restriction, including |
8 | without limitation the rights to use, copy, modify, merge, publish, |
9 | distribute, sublicense, and/or sell copies of the Software, and to |
10 | permit persons to whom the Software is furnished to do so, subject |
11 | to the following conditions: |
12 | |
13 | The above copyright notice and this permission notice shall be included |
14 | in all copies or substantial portions of the Software. |
15 | |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
17 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
23 | |
24 | ------------------------------------------------------------------------------------- |
25 | |
26 | Portions based on DirectX Texture Library (DirectXTex) |
27 | |
28 | Copyright (c) Microsoft Corporation. All rights reserved. |
29 | Licensed under the MIT License. |
30 | |
31 | http://go.microsoft.com/fwlink/?LinkId=248926 |
32 | */ |
33 | #include "ConvectionKernels_Config.h" |
34 | |
35 | #if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL) |
36 | |
37 | #include "ConvectionKernels_BC67.h" |
38 | |
39 | #include "ConvectionKernels_AggregatedError.h" |
40 | #include "ConvectionKernels_BCCommon.h" |
41 | #include "ConvectionKernels_BC7_Prio.h" |
42 | #include "ConvectionKernels_BC7_SingleColor.h" |
43 | #include "ConvectionKernels_BC6H_IO.h" |
44 | #include "ConvectionKernels_EndpointRefiner.h" |
45 | #include "ConvectionKernels_EndpointSelector.h" |
46 | #include "ConvectionKernels_IndexSelectorHDR.h" |
47 | #include "ConvectionKernels_ParallelMath.h" |
48 | #include "ConvectionKernels_UnfinishedEndpoints.h" |
49 | |
50 | namespace cvtt |
51 | { |
52 | namespace Internal |
53 | { |
54 | namespace BC67 |
55 | { |
56 | typedef ParallelMath::Float MFloat; |
57 | typedef ParallelMath::UInt15 MUInt15; |
58 | |
59 | struct WorkInfo |
60 | { |
61 | MUInt15 m_mode; |
62 | MFloat m_error; |
63 | MUInt15 m_ep[3][2][4]; |
64 | MUInt15 m_indexes[16]; |
65 | MUInt15 m_indexes2[16]; |
66 | |
67 | union |
68 | { |
69 | MUInt15 m_partition; |
70 | struct IndexSelectorAndRotation |
71 | { |
72 | MUInt15 m_indexSelector; |
73 | MUInt15 m_rotation; |
74 | } m_isr; |
75 | } m_u; |
76 | }; |
77 | } |
78 | |
79 | namespace BC6HData |
80 | { |
81 | enum EField |
82 | { |
83 | NA, // N/A |
84 | M, // Mode |
85 | D, // Shape |
86 | RW, |
87 | RX, |
88 | RY, |
89 | RZ, |
90 | GW, |
91 | GX, |
92 | GY, |
93 | GZ, |
94 | BW, |
95 | BX, |
96 | BY, |
97 | BZ, |
98 | }; |
99 | |
100 | struct ModeDescriptor |
101 | { |
102 | EField m_eField; |
103 | uint8_t m_uBit; |
104 | }; |
105 | |
106 | const ModeDescriptor g_modeDescriptors[14][82] = |
107 | { |
108 | { // Mode 1 (0x00) - 10 5 5 5 |
109 | { M, 0 },{ M, 1 },{ GY, 4 },{ BY, 4 },{ BZ, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
110 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
111 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
112 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
113 | { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
114 | { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
115 | { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, |
116 | { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 }, |
117 | { D, 3 },{ D, 4 }, |
118 | }, |
119 | |
120 | { // Mode 2 (0x01) - 7 6 6 6 |
121 | { M, 0 },{ M, 1 },{ GY, 5 },{ GZ, 4 },{ GZ, 5 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
122 | { RW, 5 },{ RW, 6 },{ BZ, 0 },{ BZ, 1 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
123 | { GW, 5 },{ GW, 6 },{ BY, 5 },{ BZ, 2 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
124 | { BW, 5 },{ BW, 6 },{ BZ, 3 },{ BZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
125 | { RX, 5 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
126 | { GX, 5 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
127 | { BX, 5 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, |
128 | { RY, 5 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ RZ, 5 },{ D, 0 },{ D, 1 },{ D, 2 }, |
129 | { D, 3 },{ D, 4 }, |
130 | }, |
131 | |
132 | { // Mode 3 (0x02) - 11 5 4 4 |
133 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
134 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
135 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
136 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
137 | { RW,10 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GW,10 }, |
138 | { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BW,10 }, |
139 | { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, |
140 | { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 }, |
141 | { D, 3 },{ D, 4 }, |
142 | }, |
143 | |
144 | { // Mode 4 (0x06) - 11 4 5 4 |
145 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
146 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
147 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
148 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RW,10 }, |
149 | { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
150 | { GW,10 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BW,10 }, |
151 | { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ BZ, 0 }, |
152 | { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ GY, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 }, |
153 | { D, 3 },{ D, 4 }, |
154 | }, |
155 | |
156 | { // Mode 5 (0x0a) - 11 4 4 5 |
157 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
158 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
159 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
160 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RW,10 }, |
161 | { BY, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GW,10 }, |
162 | { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
163 | { BW,10 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ BZ, 1 }, |
164 | { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ BZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 }, |
165 | { D, 3 },{ D, 4 }, |
166 | }, |
167 | |
168 | { // Mode 6 (0x0e) - 9 5 5 5 |
169 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
170 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
171 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
172 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
173 | { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
174 | { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
175 | { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, |
176 | { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 }, |
177 | { D, 3 },{ D, 4 }, |
178 | }, |
179 | |
180 | { // Mode 7 (0x12) - 8 6 5 5 |
181 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
182 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ GZ, 4 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
183 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ BZ, 2 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
184 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BZ, 3 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
185 | { RX, 5 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
186 | { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
187 | { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, |
188 | { RY, 5 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ RZ, 5 },{ D, 0 },{ D, 1 },{ D, 2 }, |
189 | { D, 3 },{ D, 4 }, |
190 | }, |
191 | |
192 | { // Mode 8 (0x16) - 8 5 6 5 |
193 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
194 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ BZ, 0 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
195 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ GY, 5 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
196 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ GZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
197 | { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
198 | { GX, 5 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
199 | { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, |
200 | { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 }, |
201 | { D, 3 },{ D, 4 }, |
202 | }, |
203 | |
204 | { // Mode 9 (0x1a) - 8 5 5 6 |
205 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
206 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ BZ, 1 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
207 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ BY, 5 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
208 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
209 | { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
210 | { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
211 | { BX, 5 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, |
212 | { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 }, |
213 | { D, 3 },{ D, 4 }, |
214 | }, |
215 | |
216 | { // Mode 10 (0x1e) - 6 6 6 6 |
217 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
218 | { RW, 5 },{ GZ, 4 },{ BZ, 0 },{ BZ, 1 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
219 | { GW, 5 },{ GY, 5 },{ BY, 5 },{ BZ, 2 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
220 | { BW, 5 },{ GZ, 5 },{ BZ, 3 },{ BZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
221 | { RX, 5 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
222 | { GX, 5 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
223 | { BX, 5 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, |
224 | { RY, 5 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ RZ, 5 },{ D, 0 },{ D, 1 },{ D, 2 }, |
225 | { D, 3 },{ D, 4 }, |
226 | }, |
227 | |
228 | { // Mode 11 (0x03) - 10 10 |
229 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
230 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
231 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
232 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
233 | { RX, 5 },{ RX, 6 },{ RX, 7 },{ RX, 8 },{ RX, 9 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
234 | { GX, 5 },{ GX, 6 },{ GX, 7 },{ GX, 8 },{ GX, 9 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
235 | { BX, 5 },{ BX, 6 },{ BX, 7 },{ BX, 8 },{ BX, 9 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, |
236 | { NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, |
237 | { NA, 0 },{ NA, 0 }, |
238 | }, |
239 | |
240 | { // Mode 12 (0x07) - 11 9 |
241 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
242 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
243 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
244 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
245 | { RX, 5 },{ RX, 6 },{ RX, 7 },{ RX, 8 },{ RW,10 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
246 | { GX, 5 },{ GX, 6 },{ GX, 7 },{ GX, 8 },{ GW,10 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
247 | { BX, 5 },{ BX, 6 },{ BX, 7 },{ BX, 8 },{ BW,10 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, |
248 | { NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, |
249 | { NA, 0 },{ NA, 0 }, |
250 | }, |
251 | |
252 | { // Mode 13 (0x0b) - 12 8 |
253 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
254 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
255 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
256 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
257 | { RX, 5 },{ RX, 6 },{ RX, 7 },{ RW,11 },{ RW,10 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
258 | { GX, 5 },{ GX, 6 },{ GX, 7 },{ GW,11 },{ GW,10 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
259 | { BX, 5 },{ BX, 6 },{ BX, 7 },{ BW,11 },{ BW,10 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, |
260 | { NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, |
261 | { NA, 0 },{ NA, 0 }, |
262 | }, |
263 | |
264 | { // Mode 14 (0x0f) - 16 4 |
265 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
266 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
267 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
268 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RW,15 }, |
269 | { RW,14 },{ RW,13 },{ RW,12 },{ RW,11 },{ RW,10 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GW,15 }, |
270 | { GW,14 },{ GW,13 },{ GW,12 },{ GW,11 },{ GW,10 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BW,15 }, |
271 | { BW,14 },{ BW,13 },{ BW,12 },{ BW,11 },{ BW,10 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, |
272 | { NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, |
273 | { NA, 0 },{ NA, 0 }, |
274 | }, |
275 | }; |
276 | } |
277 | |
278 | namespace BC7Data |
279 | { |
280 | enum AlphaMode |
281 | { |
282 | AlphaMode_Combined, |
283 | AlphaMode_Separate, |
284 | AlphaMode_None, |
285 | }; |
286 | |
287 | enum PBitMode |
288 | { |
289 | PBitMode_PerEndpoint, |
290 | PBitMode_PerSubset, |
291 | PBitMode_None |
292 | }; |
293 | |
294 | struct BC7ModeInfo |
295 | { |
296 | PBitMode m_pBitMode; |
297 | AlphaMode m_alphaMode; |
298 | int m_rgbBits; |
299 | int m_alphaBits; |
300 | int m_partitionBits; |
301 | int m_numSubsets; |
302 | int m_indexBits; |
303 | int m_alphaIndexBits; |
304 | bool m_hasIndexSelector; |
305 | }; |
306 | |
307 | BC7ModeInfo g_modes[] = |
308 | { |
309 | { PBitMode_PerEndpoint, AlphaMode_None, 4, 0, 4, 3, 3, 0, false }, // 0 |
310 | { PBitMode_PerSubset, AlphaMode_None, 6, 0, 6, 2, 3, 0, false }, // 1 |
311 | { PBitMode_None, AlphaMode_None, 5, 0, 6, 3, 2, 0, false }, // 2 |
312 | { PBitMode_PerEndpoint, AlphaMode_None, 7, 0, 6, 2, 2, 0, false }, // 3 (Mode reference has an error, P-bit is really per-endpoint) |
313 | |
314 | { PBitMode_None, AlphaMode_Separate, 5, 6, 0, 1, 2, 3, true }, // 4 |
315 | { PBitMode_None, AlphaMode_Separate, 7, 8, 0, 1, 2, 2, false }, // 5 |
316 | { PBitMode_PerEndpoint, AlphaMode_Combined, 7, 7, 0, 1, 4, 0, false }, // 6 |
317 | { PBitMode_PerEndpoint, AlphaMode_Combined, 5, 5, 6, 2, 2, 0, false } // 7 |
318 | }; |
319 | |
320 | const int g_weight2[] = { 0, 21, 43, 64 }; |
321 | const int g_weight3[] = { 0, 9, 18, 27, 37, 46, 55, 64 }; |
322 | const int g_weight4[] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; |
323 | |
324 | const int *g_weightTables[] = |
325 | { |
326 | NULL, |
327 | NULL, |
328 | g_weight2, |
329 | g_weight3, |
330 | g_weight4 |
331 | }; |
332 | |
333 | struct BC6HModeInfo |
334 | { |
335 | uint16_t m_modeID; |
336 | bool m_partitioned; |
337 | bool m_transformed; |
338 | int m_aPrec; |
339 | int m_bPrec[3]; |
340 | }; |
341 | |
342 | // [partitioned][precision] |
343 | bool g_hdrModesExistForPrecision[2][17] = |
344 | { |
345 | //0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
346 | { false, false, false, false, false, false, false, false, false, false, true, true, true, false, false, false, true }, |
347 | { false, false, false, false, false, false, true, true, true, true, true, true, false, false, false, false, false }, |
348 | }; |
349 | |
350 | BC6HModeInfo g_hdrModes[] = |
351 | { |
352 | { 0x00, true, true, 10,{ 5, 5, 5 } }, |
353 | { 0x01, true, true, 7,{ 6, 6, 6 } }, |
354 | { 0x02, true, true, 11,{ 5, 4, 4 } }, |
355 | { 0x06, true, true, 11,{ 4, 5, 4 } }, |
356 | { 0x0a, true, true, 11,{ 4, 4, 5 } }, |
357 | { 0x0e, true, true, 9,{ 5, 5, 5 } }, |
358 | { 0x12, true, true, 8,{ 6, 5, 5 } }, |
359 | { 0x16, true, true, 8,{ 5, 6, 5 } }, |
360 | { 0x1a, true, true, 8,{ 5, 5, 6 } }, |
361 | { 0x1e, true, false, 6,{ 6, 6, 6 } }, |
362 | { 0x03, false, false, 10,{ 10, 10, 10 } }, |
363 | { 0x07, false, true, 11,{ 9, 9, 9 } }, |
364 | { 0x0b, false, true, 12,{ 8, 8, 8 } }, |
365 | { 0x0f, false, true, 16,{ 4, 4, 4 } }, |
366 | }; |
367 | |
368 | const int g_maxHDRPrecision = 16; |
369 | |
370 | static const size_t g_numHDRModes = sizeof(g_hdrModes) / sizeof(g_hdrModes[0]); |
371 | |
372 | static uint16_t g_partitionMap[64] = |
373 | { |
374 | 0xCCCC, 0x8888, 0xEEEE, 0xECC8, |
375 | 0xC880, 0xFEEC, 0xFEC8, 0xEC80, |
376 | 0xC800, 0xFFEC, 0xFE80, 0xE800, |
377 | 0xFFE8, 0xFF00, 0xFFF0, 0xF000, |
378 | 0xF710, 0x008E, 0x7100, 0x08CE, |
379 | 0x008C, 0x7310, 0x3100, 0x8CCE, |
380 | 0x088C, 0x3110, 0x6666, 0x366C, |
381 | 0x17E8, 0x0FF0, 0x718E, 0x399C, |
382 | 0xaaaa, 0xf0f0, 0x5a5a, 0x33cc, |
383 | 0x3c3c, 0x55aa, 0x9696, 0xa55a, |
384 | 0x73ce, 0x13c8, 0x324c, 0x3bdc, |
385 | 0x6996, 0xc33c, 0x9966, 0x660, |
386 | 0x272, 0x4e4, 0x4e40, 0x2720, |
387 | 0xc936, 0x936c, 0x39c6, 0x639c, |
388 | 0x9336, 0x9cc6, 0x817e, 0xe718, |
389 | 0xccf0, 0xfcc, 0x7744, 0xee22, |
390 | }; |
391 | |
392 | static uint32_t g_partitionMap2[64] = |
393 | { |
394 | 0xaa685050, 0x6a5a5040, 0x5a5a4200, 0x5450a0a8, |
395 | 0xa5a50000, 0xa0a05050, 0x5555a0a0, 0x5a5a5050, |
396 | 0xaa550000, 0xaa555500, 0xaaaa5500, 0x90909090, |
397 | 0x94949494, 0xa4a4a4a4, 0xa9a59450, 0x2a0a4250, |
398 | 0xa5945040, 0x0a425054, 0xa5a5a500, 0x55a0a0a0, |
399 | 0xa8a85454, 0x6a6a4040, 0xa4a45000, 0x1a1a0500, |
400 | 0x0050a4a4, 0xaaa59090, 0x14696914, 0x69691400, |
401 | 0xa08585a0, 0xaa821414, 0x50a4a450, 0x6a5a0200, |
402 | 0xa9a58000, 0x5090a0a8, 0xa8a09050, 0x24242424, |
403 | 0x00aa5500, 0x24924924, 0x24499224, 0x50a50a50, |
404 | 0x500aa550, 0xaaaa4444, 0x66660000, 0xa5a0a5a0, |
405 | 0x50a050a0, 0x69286928, 0x44aaaa44, 0x66666600, |
406 | 0xaa444444, 0x54a854a8, 0x95809580, 0x96969600, |
407 | 0xa85454a8, 0x80959580, 0xaa141414, 0x96960000, |
408 | 0xaaaa1414, 0xa05050a0, 0xa0a5a5a0, 0x96000000, |
409 | 0x40804080, 0xa9a8a9a8, 0xaaaaaa44, 0x2a4a5254, |
410 | }; |
411 | |
412 | static int g_fixupIndexes2[64] = |
413 | { |
414 | 15,15,15,15, |
415 | 15,15,15,15, |
416 | 15,15,15,15, |
417 | 15,15,15,15, |
418 | 15, 2, 8, 2, |
419 | 2, 8, 8,15, |
420 | 2, 8, 2, 2, |
421 | 8, 8, 2, 2, |
422 | |
423 | 15,15, 6, 8, |
424 | 2, 8,15,15, |
425 | 2, 8, 2, 2, |
426 | 2,15,15, 6, |
427 | 6, 2, 6, 8, |
428 | 15,15, 2, 2, |
429 | 15,15,15,15, |
430 | 15, 2, 2,15, |
431 | }; |
432 | |
433 | static int g_fixupIndexes3[64][2] = |
434 | { |
435 | { 3,15 },{ 3, 8 },{ 15, 8 },{ 15, 3 }, |
436 | { 8,15 },{ 3,15 },{ 15, 3 },{ 15, 8 }, |
437 | { 8,15 },{ 8,15 },{ 6,15 },{ 6,15 }, |
438 | { 6,15 },{ 5,15 },{ 3,15 },{ 3, 8 }, |
439 | { 3,15 },{ 3, 8 },{ 8,15 },{ 15, 3 }, |
440 | { 3,15 },{ 3, 8 },{ 6,15 },{ 10, 8 }, |
441 | { 5, 3 },{ 8,15 },{ 8, 6 },{ 6,10 }, |
442 | { 8,15 },{ 5,15 },{ 15,10 },{ 15, 8 }, |
443 | |
444 | { 8,15 },{ 15, 3 },{ 3,15 },{ 5,10 }, |
445 | { 6,10 },{ 10, 8 },{ 8, 9 },{ 15,10 }, |
446 | { 15, 6 },{ 3,15 },{ 15, 8 },{ 5,15 }, |
447 | { 15, 3 },{ 15, 6 },{ 15, 6 },{ 15, 8 }, |
448 | { 3,15 },{ 15, 3 },{ 5,15 },{ 5,15 }, |
449 | { 5,15 },{ 8,15 },{ 5,15 },{ 10,15 }, |
450 | { 5,15 },{ 10,15 },{ 8,15 },{ 13,15 }, |
451 | { 15, 3 },{ 12,15 },{ 3,15 },{ 3, 8 }, |
452 | }; |
453 | |
454 | static const unsigned char g_fragments[] = |
455 | { |
456 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 0, 16 |
457 | 0, 1, 2, 3, // 16, 4 |
458 | 0, 1, 4, // 20, 3 |
459 | 0, 1, 2, 4, // 23, 4 |
460 | 2, 3, 7, // 27, 3 |
461 | 1, 2, 3, 7, // 30, 4 |
462 | 0, 1, 2, 3, 4, 5, 6, 7, // 34, 8 |
463 | 0, 1, 4, 8, // 42, 4 |
464 | 0, 1, 2, 4, 5, 8, // 46, 6 |
465 | 0, 1, 2, 3, 4, 5, 6, 8, // 52, 8 |
466 | 1, 4, 5, 6, 9, // 60, 5 |
467 | 2, 5, 6, 7, 10, // 65, 5 |
468 | 5, 6, 9, 10, // 70, 4 |
469 | 2, 3, 7, 11, // 74, 4 |
470 | 1, 2, 3, 6, 7, 11, // 78, 6 |
471 | 0, 1, 2, 3, 5, 6, 7, 11, // 84, 8 |
472 | 0, 1, 2, 3, 8, 9, 10, 11, // 92, 8 |
473 | 2, 3, 6, 7, 8, 9, 10, 11, // 100, 8 |
474 | 4, 5, 6, 7, 8, 9, 10, 11, // 108, 8 |
475 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, // 116, 12 |
476 | 0, 4, 8, 12, // 128, 4 |
477 | 0, 2, 3, 4, 6, 7, 8, 12, // 132, 8 |
478 | 0, 1, 2, 4, 5, 8, 9, 12, // 140, 8 |
479 | 0, 1, 2, 3, 4, 5, 6, 8, 9, 12, // 148, 10 |
480 | 3, 6, 7, 8, 9, 12, // 158, 6 |
481 | 3, 5, 6, 7, 8, 9, 10, 12, // 164, 8 |
482 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, // 172, 12 |
483 | 0, 1, 2, 5, 6, 7, 11, 12, // 184, 8 |
484 | 5, 8, 9, 10, 13, // 192, 5 |
485 | 8, 12, 13, // 197, 3 |
486 | 4, 8, 12, 13, // 200, 4 |
487 | 2, 3, 6, 9, 12, 13, // 204, 6 |
488 | 0, 1, 2, 3, 8, 9, 12, 13, // 210, 8 |
489 | 0, 1, 4, 5, 8, 9, 12, 13, // 218, 8 |
490 | 2, 3, 6, 7, 8, 9, 12, 13, // 226, 8 |
491 | 2, 3, 5, 6, 9, 10, 12, 13, // 234, 8 |
492 | 0, 3, 6, 7, 9, 10, 12, 13, // 242, 8 |
493 | 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 13, // 250, 12 |
494 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, // 262, 13 |
495 | 2, 3, 4, 7, 8, 11, 12, 13, // 275, 8 |
496 | 1, 2, 6, 7, 8, 11, 12, 13, // 283, 8 |
497 | 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, // 291, 10 |
498 | 2, 3, 4, 5, 10, 11, 12, 13, // 301, 8 |
499 | 0, 1, 6, 7, 10, 11, 12, 13, // 309, 8 |
500 | 6, 9, 10, 11, 14, // 317, 5 |
501 | 0, 2, 4, 6, 8, 10, 12, 14, // 322, 8 |
502 | 1, 3, 5, 7, 8, 10, 12, 14, // 330, 8 |
503 | 1, 3, 4, 6, 9, 11, 12, 14, // 338, 8 |
504 | 0, 2, 5, 7, 9, 11, 12, 14, // 346, 8 |
505 | 0, 3, 4, 5, 8, 9, 13, 14, // 354, 8 |
506 | 2, 3, 4, 7, 8, 9, 13, 14, // 362, 8 |
507 | 1, 2, 5, 6, 9, 10, 13, 14, // 370, 8 |
508 | 0, 3, 4, 7, 9, 10, 13, 14, // 378, 8 |
509 | 0, 3, 5, 6, 8, 11, 13, 14, // 386, 8 |
510 | 1, 2, 4, 7, 8, 11, 13, 14, // 394, 8 |
511 | 0, 1, 4, 7, 10, 11, 13, 14, // 402, 8 |
512 | 0, 3, 6, 7, 10, 11, 13, 14, // 410, 8 |
513 | 8, 12, 13, 14, // 418, 4 |
514 | 1, 2, 3, 7, 8, 12, 13, 14, // 422, 8 |
515 | 4, 8, 9, 12, 13, 14, // 430, 6 |
516 | 0, 4, 5, 8, 9, 12, 13, 14, // 436, 8 |
517 | 1, 2, 3, 6, 7, 8, 9, 12, 13, 14, // 444, 10 |
518 | 2, 6, 8, 9, 10, 12, 13, 14, // 454, 8 |
519 | 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, // 462, 12 |
520 | 0, 7, 9, 10, 11, 12, 13, 14, // 474, 8 |
521 | 1, 2, 3, 4, 5, 6, 8, 15, // 482, 8 |
522 | 3, 7, 11, 15, // 490, 4 |
523 | 0, 1, 3, 4, 5, 7, 11, 15, // 494, 8 |
524 | 0, 4, 5, 10, 11, 15, // 502, 6 |
525 | 1, 2, 3, 6, 7, 10, 11, 15, // 508, 8 |
526 | 0, 1, 2, 3, 5, 6, 7, 10, 11, 15, // 516, 10 |
527 | 0, 4, 5, 6, 9, 10, 11, 15, // 526, 8 |
528 | 0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 15, // 534, 12 |
529 | 1, 2, 4, 5, 8, 9, 12, 15, // 546, 8 |
530 | 2, 3, 5, 6, 8, 9, 12, 15, // 554, 8 |
531 | 0, 3, 5, 6, 9, 10, 12, 15, // 562, 8 |
532 | 1, 2, 4, 7, 9, 10, 12, 15, // 570, 8 |
533 | 1, 2, 5, 6, 8, 11, 12, 15, // 578, 8 |
534 | 0, 3, 4, 7, 8, 11, 12, 15, // 586, 8 |
535 | 0, 1, 5, 6, 10, 11, 12, 15, // 594, 8 |
536 | 1, 2, 6, 7, 10, 11, 12, 15, // 602, 8 |
537 | 1, 3, 4, 6, 8, 10, 13, 15, // 610, 8 |
538 | 0, 2, 5, 7, 8, 10, 13, 15, // 618, 8 |
539 | 0, 2, 4, 6, 9, 11, 13, 15, // 626, 8 |
540 | 1, 3, 5, 7, 9, 11, 13, 15, // 634, 8 |
541 | 0, 1, 2, 3, 4, 5, 7, 8, 12, 13, 15, // 642, 11 |
542 | 2, 3, 4, 5, 8, 9, 14, 15, // 653, 8 |
543 | 0, 1, 6, 7, 8, 9, 14, 15, // 661, 8 |
544 | 0, 1, 5, 10, 14, 15, // 669, 6 |
545 | 0, 3, 4, 5, 9, 10, 14, 15, // 675, 8 |
546 | 0, 1, 5, 6, 9, 10, 14, 15, // 683, 8 |
547 | 11, 14, 15, // 691, 3 |
548 | 7, 11, 14, 15, // 694, 4 |
549 | 1, 2, 4, 5, 8, 11, 14, 15, // 698, 8 |
550 | 0, 1, 4, 7, 8, 11, 14, 15, // 706, 8 |
551 | 0, 1, 4, 5, 10, 11, 14, 15, // 714, 8 |
552 | 2, 3, 6, 7, 10, 11, 14, 15, // 722, 8 |
553 | 4, 5, 6, 7, 10, 11, 14, 15, // 730, 8 |
554 | 0, 1, 4, 5, 7, 8, 10, 11, 14, 15, // 738, 10 |
555 | 0, 1, 2, 3, 5, 6, 7, 9, 10, 11, 14, 15, // 748, 12 |
556 | 0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 14, 15, // 760, 13 |
557 | 0, 1, 2, 3, 4, 6, 7, 11, 12, 14, 15, // 773, 11 |
558 | 3, 4, 8, 9, 10, 13, 14, 15, // 784, 8 |
559 | 11, 13, 14, 15, // 792, 4 |
560 | 0, 1, 2, 4, 11, 13, 14, 15, // 796, 8 |
561 | 0, 1, 2, 4, 5, 10, 11, 13, 14, 15, // 804, 10 |
562 | 7, 10, 11, 13, 14, 15, // 814, 6 |
563 | 3, 6, 7, 10, 11, 13, 14, 15, // 820, 8 |
564 | 1, 5, 9, 10, 11, 13, 14, 15, // 828, 8 |
565 | 1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15, // 836, 12 |
566 | 12, 13, 14, 15, // 848, 4 |
567 | 0, 1, 2, 3, 12, 13, 14, 15, // 852, 8 |
568 | 0, 1, 4, 5, 12, 13, 14, 15, // 860, 8 |
569 | 4, 5, 6, 7, 12, 13, 14, 15, // 868, 8 |
570 | 4, 8, 9, 10, 12, 13, 14, 15, // 876, 8 |
571 | 0, 4, 5, 8, 9, 10, 12, 13, 14, 15, // 884, 10 |
572 | 0, 1, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, // 894, 12 |
573 | 0, 1, 2, 3, 4, 7, 8, 11, 12, 13, 14, 15, // 906, 12 |
574 | 0, 1, 3, 4, 8, 9, 11, 12, 13, 14, 15, // 918, 11 |
575 | 0, 2, 3, 7, 8, 10, 11, 12, 13, 14, 15, // 929, 11 |
576 | 7, 9, 10, 11, 12, 13, 14, 15, // 940, 8 |
577 | 3, 6, 7, 9, 10, 11, 12, 13, 14, 15, // 948, 10 |
578 | 2, 3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, // 958, 12 |
579 | 8, 9, 10, 11, 12, 13, 14, 15, // 970, 8 |
580 | 0, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, // 978, 12 |
581 | 0, 1, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, // 990, 13 |
582 | 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 1003, 12 |
583 | 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 1015, 13 |
584 | 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 1028, 12 |
585 | 0, 2, // 1040, 2 |
586 | 1, 3, // 1042, 2 |
587 | 0, 1, 4, 5, // 1044, 4 |
588 | 0, 1, 2, 4, 5, // 1048, 5 |
589 | 2, 3, 6, // 1053, 3 |
590 | 0, 2, 4, 6, // 1056, 4 |
591 | 1, 2, 5, 6, // 1060, 4 |
592 | 0, 1, 2, 3, 5, 6, // 1064, 6 |
593 | 0, 1, 2, 4, 5, 6, // 1070, 6 |
594 | 0, 1, 2, 3, 4, 5, 6, // 1076, 7 |
595 | 0, 3, 4, 7, // 1083, 4 |
596 | 0, 1, 2, 3, 4, 7, // 1087, 6 |
597 | 1, 3, 5, 7, // 1093, 4 |
598 | 2, 3, 6, 7, // 1097, 4 |
599 | 1, 2, 3, 6, 7, // 1101, 5 |
600 | 1, 2, 3, 5, 6, 7, // 1106, 6 |
601 | 0, 1, 2, 3, 5, 6, 7, // 1112, 7 |
602 | 4, 5, 6, 7, // 1119, 4 |
603 | 0, 8, // 1123, 2 |
604 | 0, 1, 4, 5, 8, // 1125, 5 |
605 | 0, 1, 8, 9, // 1130, 4 |
606 | 4, 5, 8, 9, // 1134, 4 |
607 | 0, 1, 4, 5, 8, 9, // 1138, 6 |
608 | 2, 6, 8, 9, // 1144, 4 |
609 | 6, 7, 8, 9, // 1148, 4 |
610 | 0, 2, 4, 6, 8, 10, // 1152, 6 |
611 | 1, 2, 5, 6, 9, 10, // 1158, 6 |
612 | 0, 3, 4, 7, 9, 10, // 1164, 6 |
613 | 0, 1, 2, 8, 9, 10, // 1170, 6 |
614 | 4, 5, 6, 8, 9, 10, // 1176, 6 |
615 | 3, 11, // 1182, 2 |
616 | 2, 3, 6, 7, 11, // 1184, 5 |
617 | 0, 3, 8, 11, // 1189, 4 |
618 | 0, 3, 4, 7, 8, 11, // 1193, 6 |
619 | 1, 3, 5, 7, 9, 11, // 1199, 6 |
620 | 2, 3, 10, 11, // 1205, 4 |
621 | 1, 5, 10, 11, // 1209, 4 |
622 | 4, 5, 10, 11, // 1213, 4 |
623 | 6, 7, 10, 11, // 1217, 4 |
624 | 2, 3, 6, 7, 10, 11, // 1221, 6 |
625 | 1, 2, 3, 9, 10, 11, // 1227, 6 |
626 | 5, 6, 7, 9, 10, 11, // 1233, 6 |
627 | 8, 9, 10, 11, // 1239, 4 |
628 | 4, 12, // 1243, 2 |
629 | 0, 1, 2, 3, 4, 5, 8, 12, // 1245, 8 |
630 | 8, 9, 12, // 1253, 3 |
631 | 0, 4, 5, 8, 9, 12, // 1256, 6 |
632 | 0, 1, 4, 5, 8, 9, 12, // 1262, 7 |
633 | 2, 3, 5, 6, 8, 9, 12, // 1269, 7 |
634 | 1, 5, 9, 13, // 1276, 4 |
635 | 6, 7, 9, 13, // 1280, 4 |
636 | 1, 4, 7, 10, 13, // 1284, 5 |
637 | 1, 6, 8, 11, 13, // 1289, 5 |
638 | 0, 1, 12, 13, // 1294, 4 |
639 | 4, 5, 12, 13, // 1298, 4 |
640 | 0, 1, 6, 7, 12, 13, // 1302, 6 |
641 | 0, 1, 4, 8, 12, 13, // 1308, 6 |
642 | 8, 9, 12, 13, // 1314, 4 |
643 | 4, 8, 9, 12, 13, // 1318, 5 |
644 | 4, 5, 8, 9, 12, 13, // 1323, 6 |
645 | 0, 4, 5, 8, 9, 12, 13, // 1329, 7 |
646 | 0, 1, 6, 10, 12, 13, // 1336, 6 |
647 | 3, 6, 7, 9, 10, 12, 13, // 1342, 7 |
648 | 0, 1, 10, 11, 12, 13, // 1349, 6 |
649 | 2, 4, 7, 9, 14, // 1355, 5 |
650 | 4, 5, 10, 14, // 1360, 4 |
651 | 2, 6, 10, 14, // 1364, 4 |
652 | 2, 5, 8, 11, 14, // 1368, 5 |
653 | 0, 2, 12, 14, // 1373, 4 |
654 | 8, 10, 12, 14, // 1377, 4 |
655 | 4, 6, 8, 10, 12, 14, // 1381, 6 |
656 | 13, 14, // 1387, 2 |
657 | 9, 10, 13, 14, // 1389, 4 |
658 | 5, 6, 9, 10, 13, 14, // 1393, 6 |
659 | 0, 1, 2, 12, 13, 14, // 1399, 6 |
660 | 4, 5, 6, 12, 13, 14, // 1405, 6 |
661 | 8, 9, 12, 13, 14, // 1411, 5 |
662 | 8, 9, 10, 12, 13, 14, // 1416, 6 |
663 | 7, 15, // 1422, 2 |
664 | 0, 5, 10, 15, // 1424, 4 |
665 | 0, 1, 2, 3, 6, 7, 11, 15, // 1428, 8 |
666 | 10, 11, 15, // 1436, 3 |
667 | 0, 1, 5, 6, 10, 11, 15, // 1439, 7 |
668 | 3, 6, 7, 10, 11, 15, // 1446, 6 |
669 | 12, 15, // 1452, 2 |
670 | 0, 3, 12, 15, // 1454, 4 |
671 | 4, 7, 12, 15, // 1458, 4 |
672 | 0, 3, 6, 9, 12, 15, // 1462, 6 |
673 | 0, 3, 5, 10, 12, 15, // 1468, 6 |
674 | 8, 11, 12, 15, // 1474, 4 |
675 | 5, 6, 8, 11, 12, 15, // 1478, 6 |
676 | 4, 7, 8, 11, 12, 15, // 1484, 6 |
677 | 1, 3, 13, 15, // 1490, 4 |
678 | 9, 11, 13, 15, // 1494, 4 |
679 | 5, 7, 9, 11, 13, 15, // 1498, 6 |
680 | 2, 3, 14, 15, // 1504, 4 |
681 | 2, 3, 4, 5, 14, 15, // 1508, 6 |
682 | 6, 7, 14, 15, // 1514, 4 |
683 | 2, 3, 5, 9, 14, 15, // 1518, 6 |
684 | 2, 3, 8, 9, 14, 15, // 1524, 6 |
685 | 10, 14, 15, // 1530, 3 |
686 | 0, 4, 5, 9, 10, 14, 15, // 1533, 7 |
687 | 2, 3, 7, 11, 14, 15, // 1540, 6 |
688 | 10, 11, 14, 15, // 1546, 4 |
689 | 7, 10, 11, 14, 15, // 1550, 5 |
690 | 6, 7, 10, 11, 14, 15, // 1555, 6 |
691 | 1, 2, 3, 13, 14, 15, // 1561, 6 |
692 | 5, 6, 7, 13, 14, 15, // 1567, 6 |
693 | 10, 11, 13, 14, 15, // 1573, 5 |
694 | 9, 10, 11, 13, 14, 15, // 1578, 6 |
695 | 0, 4, 8, 9, 12, 13, 14, 15, // 1584, 8 |
696 | 9, 10, 12, 13, 14, 15, // 1592, 6 |
697 | 8, 11, 12, 13, 14, 15, // 1598, 6 |
698 | 3, 7, 10, 11, 12, 13, 14, 15, // 1604, 8 |
699 | }; |
700 | static const int g_shapeRanges[][2] = |
701 | { |
702 | { 0, 16 },{ 16, 4 },{ 20, 3 },{ 23, 4 },{ 27, 3 },{ 30, 4 },{ 34, 8 },{ 42, 4 },{ 46, 6 },{ 52, 8 },{ 60, 5 }, |
703 | { 65, 5 },{ 70, 4 },{ 74, 4 },{ 78, 6 },{ 84, 8 },{ 92, 8 },{ 100, 8 },{ 108, 8 },{ 116, 12 },{ 128, 4 },{ 132, 8 }, |
704 | { 140, 8 },{ 148, 10 },{ 158, 6 },{ 164, 8 },{ 172, 12 },{ 184, 8 },{ 192, 5 },{ 197, 3 },{ 200, 4 },{ 204, 6 },{ 210, 8 }, |
705 | { 218, 8 },{ 226, 8 },{ 234, 8 },{ 242, 8 },{ 250, 12 },{ 262, 13 },{ 275, 8 },{ 283, 8 },{ 291, 10 },{ 301, 8 },{ 309, 8 }, |
706 | { 317, 5 },{ 322, 8 },{ 330, 8 },{ 338, 8 },{ 346, 8 },{ 354, 8 },{ 362, 8 },{ 370, 8 },{ 378, 8 },{ 386, 8 },{ 394, 8 }, |
707 | { 402, 8 },{ 410, 8 },{ 418, 4 },{ 422, 8 },{ 430, 6 },{ 436, 8 },{ 444, 10 },{ 454, 8 },{ 462, 12 },{ 474, 8 },{ 482, 8 }, |
708 | { 490, 4 },{ 494, 8 },{ 502, 6 },{ 508, 8 },{ 516, 10 },{ 526, 8 },{ 534, 12 },{ 546, 8 },{ 554, 8 },{ 562, 8 },{ 570, 8 }, |
709 | { 578, 8 },{ 586, 8 },{ 594, 8 },{ 602, 8 },{ 610, 8 },{ 618, 8 },{ 626, 8 },{ 634, 8 },{ 642, 11 },{ 653, 8 },{ 661, 8 }, |
710 | { 669, 6 },{ 675, 8 },{ 683, 8 },{ 691, 3 },{ 694, 4 },{ 698, 8 },{ 706, 8 },{ 714, 8 },{ 722, 8 },{ 730, 8 },{ 738, 10 }, |
711 | { 748, 12 },{ 760, 13 },{ 773, 11 },{ 784, 8 },{ 792, 4 },{ 796, 8 },{ 804, 10 },{ 814, 6 },{ 820, 8 },{ 828, 8 },{ 836, 12 }, |
712 | { 848, 4 },{ 852, 8 },{ 860, 8 },{ 868, 8 },{ 876, 8 },{ 884, 10 },{ 894, 12 },{ 906, 12 },{ 918, 11 },{ 929, 11 },{ 940, 8 }, |
713 | { 948, 10 },{ 958, 12 },{ 970, 8 },{ 978, 12 },{ 990, 13 },{ 1003, 12 },{ 1015, 13 },{ 1028, 12 },{ 1040, 2 },{ 1042, 2 },{ 1044, 4 }, |
714 | { 1048, 5 },{ 1053, 3 },{ 1056, 4 },{ 1060, 4 },{ 1064, 6 },{ 1070, 6 },{ 1076, 7 },{ 1083, 4 },{ 1087, 6 },{ 1093, 4 },{ 1097, 4 }, |
715 | { 1101, 5 },{ 1106, 6 },{ 1112, 7 },{ 1119, 4 },{ 1123, 2 },{ 1125, 5 },{ 1130, 4 },{ 1134, 4 },{ 1138, 6 },{ 1144, 4 },{ 1148, 4 }, |
716 | { 1152, 6 },{ 1158, 6 },{ 1164, 6 },{ 1170, 6 },{ 1176, 6 },{ 1182, 2 },{ 1184, 5 },{ 1189, 4 },{ 1193, 6 },{ 1199, 6 },{ 1205, 4 }, |
717 | { 1209, 4 },{ 1213, 4 },{ 1217, 4 },{ 1221, 6 },{ 1227, 6 },{ 1233, 6 },{ 1239, 4 },{ 1243, 2 },{ 1245, 8 },{ 1253, 3 },{ 1256, 6 }, |
718 | { 1262, 7 },{ 1269, 7 },{ 1276, 4 },{ 1280, 4 },{ 1284, 5 },{ 1289, 5 },{ 1294, 4 },{ 1298, 4 },{ 1302, 6 },{ 1308, 6 },{ 1314, 4 }, |
719 | { 1318, 5 },{ 1323, 6 },{ 1329, 7 },{ 1336, 6 },{ 1342, 7 },{ 1349, 6 },{ 1355, 5 },{ 1360, 4 },{ 1364, 4 },{ 1368, 5 },{ 1373, 4 }, |
720 | { 1377, 4 },{ 1381, 6 },{ 1387, 2 },{ 1389, 4 },{ 1393, 6 },{ 1399, 6 },{ 1405, 6 },{ 1411, 5 },{ 1416, 6 },{ 1422, 2 },{ 1424, 4 }, |
721 | { 1428, 8 },{ 1436, 3 },{ 1439, 7 },{ 1446, 6 },{ 1452, 2 },{ 1454, 4 },{ 1458, 4 },{ 1462, 6 },{ 1468, 6 },{ 1474, 4 },{ 1478, 6 }, |
722 | { 1484, 6 },{ 1490, 4 },{ 1494, 4 },{ 1498, 6 },{ 1504, 4 },{ 1508, 6 },{ 1514, 4 },{ 1518, 6 },{ 1524, 6 },{ 1530, 3 },{ 1533, 7 }, |
723 | { 1540, 6 },{ 1546, 4 },{ 1550, 5 },{ 1555, 6 },{ 1561, 6 },{ 1567, 6 },{ 1573, 5 },{ 1578, 6 },{ 1584, 8 },{ 1592, 6 },{ 1598, 6 }, |
724 | { 1604, 8 }, |
725 | }; |
726 | static const int g_shapes1[][2] = |
727 | { |
728 | { 0, 16 } |
729 | }; |
730 | static const int g_shapes2[64][2] = |
731 | { |
732 | { 33, 96 },{ 63, 66 },{ 20, 109 },{ 22, 107 },{ 37, 92 },{ 7, 122 },{ 8, 121 },{ 23, 106 }, |
733 | { 38, 91 },{ 2, 127 },{ 9, 120 },{ 26, 103 },{ 3, 126 },{ 6, 123 },{ 1, 128 },{ 19, 110 }, |
734 | { 15, 114 },{ 124, 5 },{ 72, 57 },{ 115, 14 },{ 125, 4 },{ 70, 59 },{ 100, 29 },{ 60, 69 }, |
735 | { 116, 13 },{ 99, 30 },{ 78, 51 },{ 94, 35 },{ 104, 25 },{ 111, 18 },{ 71, 58 },{ 90, 39 }, |
736 | { 45, 84 },{ 16, 113 },{ 82, 47 },{ 95, 34 },{ 87, 42 },{ 83, 46 },{ 53, 76 },{ 48, 81 }, |
737 | { 68, 61 },{ 105, 24 },{ 98, 31 },{ 88, 41 },{ 75, 54 },{ 43, 86 },{ 52, 77 },{ 117, 12 }, |
738 | { 119, 10 },{ 118, 11 },{ 85, 44 },{ 101, 28 },{ 36, 93 },{ 55, 74 },{ 89, 40 },{ 79, 50 }, |
739 | { 56, 73 },{ 49, 80 },{ 64, 65 },{ 27, 102 },{ 32, 97 },{ 112, 17 },{ 67, 62 },{ 21, 108 }, |
740 | }; |
741 | static const int g_shapes3[64][3] = |
742 | { |
743 | { 148, 160, 240 },{ 132, 212, 205 },{ 136, 233, 187 },{ 175, 237, 143 },{ 6, 186, 232 },{ 33, 142, 232 },{ 131, 123, 142 },{ 131, 96, 186 }, |
744 | { 6, 171, 110 },{ 1, 18, 110 },{ 1, 146, 123 },{ 33, 195, 66 },{ 20, 51, 66 },{ 20, 178, 96 },{ 2, 177, 106 },{ 211, 4, 59 }, |
745 | { 8, 191, 91 },{ 230, 14, 29 },{ 1, 188, 234 },{ 151, 110, 168 },{ 20, 144, 238 },{ 137, 66, 206 },{ 173, 179, 232 },{ 209, 194, 186 }, |
746 | { 239, 165, 142 },{ 131, 152, 242 },{ 214, 54, 12 },{ 140, 219, 201 },{ 190, 150, 231 },{ 156, 135, 241 },{ 185, 227, 167 },{ 145, 210, 59 }, |
747 | { 138, 174, 106 },{ 189, 229, 14 },{ 176, 133, 106 },{ 78, 178, 195 },{ 111, 146, 171 },{ 216, 180, 196 },{ 217, 181, 193 },{ 184, 228, 166 }, |
748 | { 192, 225, 153 },{ 134, 141, 123 },{ 6, 222, 198 },{ 149, 183, 96 },{ 33, 226, 164 },{ 161, 215, 51 },{ 197, 221, 18 },{ 1, 223, 199 }, |
749 | { 154, 163, 110 },{ 20, 236, 169 },{ 157, 204, 66 },{ 1, 202, 220 },{ 20, 170, 235 },{ 203, 158, 66 },{ 162, 155, 110 },{ 6, 201, 218 }, |
750 | { 139, 135, 123 },{ 33, 167, 224 },{ 182, 150, 96 },{ 19, 200, 213 },{ 63, 207, 159 },{ 147, 172, 109 },{ 129, 130, 128 },{ 208, 14, 59 }, |
751 | }; |
752 | |
753 | static const int g_shapeList1[] = |
754 | { |
755 | 0, |
756 | }; |
757 | |
758 | static const int g_shapeList2[] = |
759 | { |
760 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, |
761 | 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, |
762 | 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, |
763 | 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, |
764 | 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, |
765 | 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, |
766 | 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, |
767 | 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, |
768 | 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, |
769 | 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, |
770 | 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, |
771 | 122, 123, 124, 125, 126, 127, 128, |
772 | }; |
773 | |
774 | static const int g_shapeList12[] = |
775 | { |
776 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, |
777 | 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, |
778 | 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
779 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, |
780 | 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, |
781 | 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, |
782 | 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, |
783 | 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, |
784 | 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, |
785 | 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, |
786 | 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, |
787 | 121, 122, 123, 124, 125, 126, 127, 128, |
788 | }; |
789 | |
790 | static const int g_shapeList3[] = |
791 | { |
792 | 1, 2, 4, 6, 8, 12, 14, 18, 19, 20, 29, |
793 | 33, 51, 54, 59, 63, 66, 78, 91, 96, 106, 109, |
794 | 110, 111, 123, 128, 129, 130, 131, 132, 133, 134, 135, |
795 | 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, |
796 | 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, |
797 | 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, |
798 | 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, |
799 | 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, |
800 | 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, |
801 | 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, |
802 | 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, |
803 | 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, |
804 | 235, 236, 237, 238, 239, 240, 241, 242, |
805 | }; |
806 | |
807 | static const int g_shapeList3Short[] = |
808 | { |
809 | 1, 2, 4, 6, 18, 20, 33, 51, 59, 66, 96, |
810 | 106, 110, 123, 131, 132, 136, 142, 143, 146, 148, 160, |
811 | 171, 175, 177, 178, 186, 187, 195, 205, 211, 212, 232, |
812 | 233, 237, 240, |
813 | }; |
814 | |
815 | static const int g_shapeListAll[] = |
816 | { |
817 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, |
818 | 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, |
819 | 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
820 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, |
821 | 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, |
822 | 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, |
823 | 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, |
824 | 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, |
825 | 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, |
826 | 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, |
827 | 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, |
828 | 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, |
829 | 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, |
830 | 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, |
831 | 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, |
832 | 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, |
833 | 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, |
834 | 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, |
835 | 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, |
836 | 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, |
837 | 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, |
838 | 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, |
839 | 242, |
840 | }; |
841 | |
842 | static const int g_numShapes1 = sizeof(g_shapeList1) / sizeof(g_shapeList1[0]); |
843 | static const int g_numShapes2 = sizeof(g_shapeList2) / sizeof(g_shapeList2[0]); |
844 | static const int g_numShapes12 = sizeof(g_shapeList12) / sizeof(g_shapeList12[0]); |
845 | static const int g_numShapes3 = sizeof(g_shapeList3) / sizeof(g_shapeList3[0]); |
846 | static const int g_numShapes3Short = sizeof(g_shapeList3Short) / sizeof(g_shapeList3Short[0]); |
847 | static const int g_numShapesAll = sizeof(g_shapeListAll) / sizeof(g_shapeListAll[0]); |
848 | static const int g_numFragments = sizeof(g_fragments) / sizeof(g_fragments[0]); |
849 | } |
850 | |
851 | struct PackingVector |
852 | { |
853 | uint32_t m_vector[4]; |
854 | int m_offset; |
855 | |
856 | void Init() |
857 | { |
858 | for (int i = 0; i < 4; i++) |
859 | m_vector[i] = 0; |
860 | |
861 | m_offset = 0; |
862 | } |
863 | |
864 | void InitPacked(const uint32_t *v, int bits) |
865 | { |
866 | for (int b = 0; b < bits; b += 32) |
867 | m_vector[b / 32] = v[b / 32]; |
868 | |
869 | m_offset = bits; |
870 | } |
871 | |
872 | inline void Pack(ParallelMath::ScalarUInt16 value, int bits) |
873 | { |
874 | int vOffset = m_offset >> 5; |
875 | int bitOffset = m_offset & 0x1f; |
876 | |
877 | m_vector[vOffset] |= (static_cast<uint32_t>(value) << bitOffset) & static_cast<uint32_t>(0xffffffff); |
878 | |
879 | int overflowBits = bitOffset + bits - 32; |
880 | if (overflowBits > 0) |
881 | m_vector[vOffset + 1] |= (static_cast<uint32_t>(value) >> (bits - overflowBits)); |
882 | |
883 | m_offset += bits; |
884 | } |
885 | |
886 | inline void Flush(uint8_t* output) |
887 | { |
888 | assert(m_offset == 128); |
889 | |
890 | for (int v = 0; v < 4; v++) |
891 | { |
892 | uint32_t chunk = m_vector[v]; |
893 | for (int b = 0; b < 4; b++) |
894 | output[v * 4 + b] = static_cast<uint8_t>((chunk >> (b * 8)) & 0xff); |
895 | } |
896 | } |
897 | }; |
898 | |
899 | |
900 | struct UnpackingVector |
901 | { |
902 | uint32_t m_vector[4]; |
903 | |
904 | void Init(const uint8_t *bytes) |
905 | { |
906 | for (int i = 0; i < 4; i++) |
907 | m_vector[i] = 0; |
908 | |
909 | for (int b = 0; b < 16; b++) |
910 | m_vector[b / 4] |= (bytes[b] << ((b % 4) * 8)); |
911 | } |
912 | |
913 | inline void UnpackStart(uint32_t *v, int bits) |
914 | { |
915 | for (int b = 0; b < bits; b += 32) |
916 | v[b / 32] = m_vector[b / 32]; |
917 | |
918 | int entriesShifted = bits / 32; |
919 | int carry = bits % 32; |
920 | |
921 | for (int i = entriesShifted; i < 4; i++) |
922 | m_vector[i - entriesShifted] = m_vector[i]; |
923 | |
924 | int entriesRemaining = 4 - entriesShifted; |
925 | if (carry) |
926 | { |
927 | uint32_t bitMask = (1 << carry) - 1; |
928 | for (int i = 0; i < entriesRemaining; i++) |
929 | { |
930 | m_vector[i] >>= carry; |
931 | if (i != entriesRemaining - 1) |
932 | m_vector[i] |= (m_vector[i + 1] & bitMask) << (32 - carry); |
933 | } |
934 | } |
935 | } |
936 | |
937 | inline ParallelMath::ScalarUInt16 Unpack(int bits) |
938 | { |
939 | uint32_t bitMask = (1 << bits) - 1; |
940 | |
941 | ParallelMath::ScalarUInt16 result = static_cast<ParallelMath::ScalarUInt16>(m_vector[0] & bitMask); |
942 | |
943 | for (int i = 0; i < 4; i++) |
944 | { |
945 | m_vector[i] >>= bits; |
946 | if (i != 3) |
947 | m_vector[i] |= (m_vector[i + 1] & bitMask) << (32 - bits); |
948 | } |
949 | |
950 | return result; |
951 | } |
952 | }; |
953 | |
954 | ParallelMath::Float ScaleHDRValue(const ParallelMath::Float &v, bool isSigned) |
955 | { |
956 | if (isSigned) |
957 | { |
958 | ParallelMath::Float offset = ParallelMath::Select(ParallelMath::Less(v, ParallelMath::MakeFloatZero()), ParallelMath::MakeFloat(-30.0f), ParallelMath::MakeFloat(30.0f)); |
959 | return (v * 32.0f + offset) / 31.0f; |
960 | } |
961 | else |
962 | return (v * 64.0f + 30.0f) / 31.0f; |
963 | } |
964 | |
965 | ParallelMath::SInt16 UnscaleHDRValueSigned(const ParallelMath::SInt16 &v) |
966 | { |
967 | #ifdef CVTT_ENABLE_ASSERTS |
968 | for (int i = 0; i < ParallelMath::ParallelSize; i++) |
969 | assert(ParallelMath::Extract(v, i) != -32768) |
970 | #endif |
971 | |
972 | ParallelMath::Int16CompFlag negative = ParallelMath::Less(v, ParallelMath::MakeSInt16(0)); |
973 | ParallelMath::UInt15 absComp = ParallelMath::LosslessCast<ParallelMath::UInt15>::Cast(ParallelMath::Select(negative, ParallelMath::SInt16(ParallelMath::MakeSInt16(0) - v), v)); |
974 | |
975 | ParallelMath::UInt31 multiplied = ParallelMath::XMultiply(absComp, ParallelMath::MakeUInt15(31)); |
976 | ParallelMath::UInt31 shifted = ParallelMath::RightShift(multiplied, 5); |
977 | ParallelMath::UInt15 absCompScaled = ParallelMath::ToUInt15(shifted); |
978 | ParallelMath::SInt16 signBits = ParallelMath::SelectOrZero(negative, ParallelMath::MakeSInt16(-32768)); |
979 | |
980 | return ParallelMath::LosslessCast<ParallelMath::SInt16>::Cast(absCompScaled) | signBits; |
981 | } |
982 | |
983 | ParallelMath::UInt15 UnscaleHDRValueUnsigned(const ParallelMath::UInt16 &v) |
984 | { |
985 | return ParallelMath::ToUInt15(ParallelMath::RightShift(ParallelMath::XMultiply(v, ParallelMath::MakeUInt15(31)), 6)); |
986 | } |
987 | |
988 | void UnscaleHDREndpoints(const ParallelMath::AInt16 inEP[2][3], ParallelMath::AInt16 outEP[2][3], bool isSigned) |
989 | { |
990 | for (int epi = 0; epi < 2; epi++) |
991 | { |
992 | for (int ch = 0; ch < 3; ch++) |
993 | { |
994 | if (isSigned) |
995 | outEP[epi][ch] = ParallelMath::LosslessCast<ParallelMath::AInt16>::Cast(UnscaleHDRValueSigned(ParallelMath::LosslessCast<ParallelMath::SInt16>::Cast(inEP[epi][ch]))); |
996 | else |
997 | outEP[epi][ch] = ParallelMath::LosslessCast<ParallelMath::AInt16>::Cast(UnscaleHDRValueUnsigned(ParallelMath::LosslessCast<ParallelMath::UInt16>::Cast(inEP[epi][ch]))); |
998 | } |
999 | } |
1000 | } |
1001 | |
1002 | struct SinglePlaneTemporaries |
1003 | { |
1004 | UnfinishedEndpoints<3> unfinishedRGB[BC7Data::g_numShapesAll]; |
1005 | UnfinishedEndpoints<4> unfinishedRGBA[BC7Data::g_numShapes12]; |
1006 | |
1007 | ParallelMath::UInt15 fragmentBestIndexes[BC7Data::g_numFragments]; |
1008 | ParallelMath::UInt15 shapeBestEP[BC7Data::g_numShapesAll][2][4]; |
1009 | ParallelMath::Float shapeBestError[BC7Data::g_numShapesAll]; |
1010 | }; |
1011 | } |
1012 | } |
1013 | |
1014 | void cvtt::Internal::BC7Computer::TweakAlpha(const MUInt15 original[2], int tweak, int range, MUInt15 result[2]) |
1015 | { |
1016 | ParallelMath::RoundTowardNearestForScope roundingMode; |
1017 | |
1018 | float tf[2]; |
1019 | Util::ComputeTweakFactors(tweak, range, tf); |
1020 | |
1021 | MFloat base = ParallelMath::ToFloat(original[0]); |
1022 | MFloat offs = ParallelMath::ToFloat(original[1]) - base; |
1023 | |
1024 | result[0] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(base + offs * tf[0], 0.0f, 255.0f), &roundingMode); |
1025 | result[1] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(base + offs * tf[1], 0.0f, 255.0f), &roundingMode); |
1026 | } |
1027 | |
1028 | void cvtt::Internal::BC7Computer::Quantize(MUInt15* color, int bits, int channels) |
1029 | { |
1030 | for (int ch = 0; ch < channels; ch++) |
1031 | color[ch] = ParallelMath::RightShift(((color[ch] << bits) - color[ch]) + ParallelMath::MakeUInt15(127 + (1 << (7 - bits))), 8); |
1032 | } |
1033 | |
1034 | void cvtt::Internal::BC7Computer::QuantizeP(MUInt15* color, int bits, uint16_t p, int channels) |
1035 | { |
1036 | int16_t addend; |
1037 | if (p) |
1038 | addend = ((1 << (8 - bits)) - 1); |
1039 | else |
1040 | addend = 255; |
1041 | |
1042 | for (int ch = 0; ch < channels; ch++) |
1043 | { |
1044 | MUInt16 ch16 = ParallelMath::LosslessCast<MUInt16>::Cast(color[ch]); |
1045 | ch16 = ParallelMath::RightShift((ch16 << (bits + 1)) - ch16 + addend, 9); |
1046 | ch16 = (ch16 << 1) | ParallelMath::MakeUInt16(p); |
1047 | color[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ch16); |
1048 | } |
1049 | } |
1050 | |
1051 | void cvtt::Internal::BC7Computer::Unquantize(MUInt15* color, int bits, int channels) |
1052 | { |
1053 | for (int ch = 0; ch < channels; ch++) |
1054 | { |
1055 | MUInt15 clr = color[ch]; |
1056 | clr = clr << (8 - bits); |
1057 | color[ch] = clr | ParallelMath::RightShift(clr, bits); |
1058 | } |
1059 | } |
1060 | |
1061 | void cvtt::Internal::BC7Computer::CompressEndpoints0(MUInt15 ep[2][4], uint16_t p[2]) |
1062 | { |
1063 | for (int j = 0; j < 2; j++) |
1064 | { |
1065 | QuantizeP(ep[j], 4, p[j], 3); |
1066 | Unquantize(ep[j], 5, 3); |
1067 | ep[j][3] = ParallelMath::MakeUInt15(255); |
1068 | } |
1069 | } |
1070 | |
1071 | void cvtt::Internal::BC7Computer::CompressEndpoints1(MUInt15 ep[2][4], uint16_t p) |
1072 | { |
1073 | for (int j = 0; j < 2; j++) |
1074 | { |
1075 | QuantizeP(ep[j], 6, p, 3); |
1076 | Unquantize(ep[j], 7, 3); |
1077 | ep[j][3] = ParallelMath::MakeUInt15(255); |
1078 | } |
1079 | } |
1080 | |
1081 | void cvtt::Internal::BC7Computer::CompressEndpoints2(MUInt15 ep[2][4]) |
1082 | { |
1083 | for (int j = 0; j < 2; j++) |
1084 | { |
1085 | Quantize(ep[j], 5, 3); |
1086 | Unquantize(ep[j], 5, 3); |
1087 | ep[j][3] = ParallelMath::MakeUInt15(255); |
1088 | } |
1089 | } |
1090 | |
1091 | void cvtt::Internal::BC7Computer::CompressEndpoints3(MUInt15 ep[2][4], uint16_t p[2]) |
1092 | { |
1093 | for (int j = 0; j < 2; j++) |
1094 | { |
1095 | QuantizeP(ep[j], 7, p[j], 3); |
1096 | ep[j][3] = ParallelMath::MakeUInt15(255); |
1097 | } |
1098 | } |
1099 | |
1100 | void cvtt::Internal::BC7Computer::CompressEndpoints4(MUInt15 epRGB[2][3], MUInt15 epA[2]) |
1101 | { |
1102 | for (int j = 0; j < 2; j++) |
1103 | { |
1104 | Quantize(epRGB[j], 5, 3); |
1105 | Unquantize(epRGB[j], 5, 3); |
1106 | |
1107 | Quantize(epA + j, 6, 1); |
1108 | Unquantize(epA + j, 6, 1); |
1109 | } |
1110 | } |
1111 | |
1112 | void cvtt::Internal::BC7Computer::CompressEndpoints5(MUInt15 epRGB[2][3], MUInt15 epA[2]) |
1113 | { |
1114 | for (int j = 0; j < 2; j++) |
1115 | { |
1116 | Quantize(epRGB[j], 7, 3); |
1117 | Unquantize(epRGB[j], 7, 3); |
1118 | } |
1119 | |
1120 | // Alpha is full precision |
1121 | (void)epA; |
1122 | } |
1123 | |
1124 | void cvtt::Internal::BC7Computer::CompressEndpoints6(MUInt15 ep[2][4], uint16_t p[2]) |
1125 | { |
1126 | for (int j = 0; j < 2; j++) |
1127 | QuantizeP(ep[j], 7, p[j], 4); |
1128 | } |
1129 | |
1130 | void cvtt::Internal::BC7Computer::CompressEndpoints7(MUInt15 ep[2][4], uint16_t p[2]) |
1131 | { |
1132 | for (int j = 0; j < 2; j++) |
1133 | { |
1134 | QuantizeP(ep[j], 5, p[j], 4); |
1135 | Unquantize(ep[j], 6, 4); |
1136 | } |
1137 | } |
1138 | |
1139 | void cvtt::Internal::BC7Computer::TrySingleColorRGBAMultiTable(uint32_t flags, const MUInt15 pixels[16][4], const MFloat average[4], int numRealChannels, const uint8_t *fragmentStart, int shapeLength, const MFloat &staticAlphaError, const ParallelMath::Int16CompFlag punchThroughInvalid[4], MFloat& shapeBestError, MUInt15 shapeBestEP[2][4], MUInt15 *fragmentBestIndexes, const float *channelWeightsSq, const cvtt::Tables::BC7SC::Table*const* tables, int numTables, const ParallelMath::RoundTowardNearestForScope *rtn) |
1140 | { |
1141 | MFloat bestAverageError = ParallelMath::MakeFloat(FLT_MAX); |
1142 | |
1143 | MUInt15 intAverage[4]; |
1144 | for (int ch = 0; ch < 4; ch++) |
1145 | intAverage[ch] = ParallelMath::RoundAndConvertToU15(average[ch], rtn); |
1146 | |
1147 | MUInt15 eps[2][4]; |
1148 | MUInt15 reconstructed[4]; |
1149 | MUInt15 index = ParallelMath::MakeUInt15(0); |
1150 | |
1151 | for (int epi = 0; epi < 2; epi++) |
1152 | { |
1153 | for (int ch = 0; ch < 3; ch++) |
1154 | eps[epi][ch] = ParallelMath::MakeUInt15(0); |
1155 | eps[epi][3] = ParallelMath::MakeUInt15(255); |
1156 | } |
1157 | |
1158 | for (int ch = 0; ch < 3; ch++) |
1159 | reconstructed[ch] = ParallelMath::MakeUInt15(0); |
1160 | reconstructed[3] = ParallelMath::MakeUInt15(255); |
1161 | |
1162 | // Depending on the target index and parity bits, there are multiple valid solid colors. |
1163 | // We want to find the one closest to the actual average. |
1164 | MFloat epsAverageDiff = ParallelMath::MakeFloat(FLT_MAX); |
1165 | for (int t = 0; t < numTables; t++) |
1166 | { |
1167 | const cvtt::Tables::BC7SC::Table& table = *(tables[t]); |
1168 | |
1169 | ParallelMath::Int16CompFlag pti = punchThroughInvalid[table.m_pBits]; |
1170 | |
1171 | MUInt15 candidateReconstructed[4]; |
1172 | MUInt15 candidateEPs[2][4]; |
1173 | |
1174 | for (int i = 0; i < ParallelMath::ParallelSize; i++) |
1175 | { |
1176 | for (int ch = 0; ch < numRealChannels; ch++) |
1177 | { |
1178 | ParallelMath::ScalarUInt16 avgValue = ParallelMath::Extract(intAverage[ch], i); |
1179 | assert(avgValue >= 0 && avgValue <= 255); |
1180 | |
1181 | const cvtt::Tables::BC7SC::TableEntry &entry = table.m_entries[avgValue]; |
1182 | |
1183 | ParallelMath::PutUInt15(candidateEPs[0][ch], i, entry.m_min); |
1184 | ParallelMath::PutUInt15(candidateEPs[1][ch], i, entry.m_max); |
1185 | ParallelMath::PutUInt15(candidateReconstructed[ch], i, entry.m_actualColor); |
1186 | } |
1187 | } |
1188 | |
1189 | MFloat avgError = ParallelMath::MakeFloatZero(); |
1190 | for (int ch = 0; ch < numRealChannels; ch++) |
1191 | { |
1192 | MFloat delta = ParallelMath::ToFloat(candidateReconstructed[ch]) - average[ch]; |
1193 | avgError = avgError + delta * delta * channelWeightsSq[ch]; |
1194 | } |
1195 | |
1196 | ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(avgError, bestAverageError)); |
1197 | better = ParallelMath::AndNot(pti, better); // Mask out punch-through invalidations |
1198 | |
1199 | if (ParallelMath::AnySet(better)) |
1200 | { |
1201 | ParallelMath::ConditionalSet(bestAverageError, ParallelMath::Int16FlagToFloat(better), avgError); |
1202 | |
1203 | MUInt15 candidateIndex = ParallelMath::MakeUInt15(table.m_index); |
1204 | |
1205 | ParallelMath::ConditionalSet(index, better, candidateIndex); |
1206 | |
1207 | for (int ch = 0; ch < numRealChannels; ch++) |
1208 | ParallelMath::ConditionalSet(reconstructed[ch], better, candidateReconstructed[ch]); |
1209 | |
1210 | for (int epi = 0; epi < 2; epi++) |
1211 | for (int ch = 0; ch < numRealChannels; ch++) |
1212 | ParallelMath::ConditionalSet(eps[epi][ch], better, candidateEPs[epi][ch]); |
1213 | } |
1214 | } |
1215 | |
1216 | AggregatedError<4> aggError; |
1217 | for (int pxi = 0; pxi < shapeLength; pxi++) |
1218 | { |
1219 | int px = fragmentStart[pxi]; |
1220 | |
1221 | BCCommon::ComputeErrorLDR<4>(flags, reconstructed, pixels[px], numRealChannels, aggError); |
1222 | } |
1223 | |
1224 | MFloat error = aggError.Finalize(flags, channelWeightsSq) + staticAlphaError; |
1225 | |
1226 | ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(error, shapeBestError)); |
1227 | if (ParallelMath::AnySet(better)) |
1228 | { |
1229 | shapeBestError = ParallelMath::Min(shapeBestError, error); |
1230 | for (int epi = 0; epi < 2; epi++) |
1231 | { |
1232 | for (int ch = 0; ch < numRealChannels; ch++) |
1233 | ParallelMath::ConditionalSet(shapeBestEP[epi][ch], better, eps[epi][ch]); |
1234 | } |
1235 | |
1236 | for (int pxi = 0; pxi < shapeLength; pxi++) |
1237 | ParallelMath::ConditionalSet(fragmentBestIndexes[pxi], better, index); |
1238 | } |
1239 | } |
1240 | |
1241 | void cvtt::Internal::BC7Computer::TrySinglePlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn) |
1242 | { |
1243 | if (numRefineRounds < 1) |
1244 | numRefineRounds = 1; |
1245 | |
1246 | float channelWeightsSq[4]; |
1247 | |
1248 | for (int ch = 0; ch < 4; ch++) |
1249 | channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch]; |
1250 | |
1251 | SinglePlaneTemporaries temps; |
1252 | |
1253 | MUInt15 maxAlpha = ParallelMath::MakeUInt15(0); |
1254 | MUInt15 minAlpha = ParallelMath::MakeUInt15(255); |
1255 | ParallelMath::Int16CompFlag isPunchThrough = ParallelMath::MakeBoolInt16(true); |
1256 | for (int px = 0; px < 16; px++) |
1257 | { |
1258 | MUInt15 a = pixels[px][3]; |
1259 | maxAlpha = ParallelMath::Max(maxAlpha, a); |
1260 | minAlpha = ParallelMath::Min(minAlpha, a); |
1261 | |
1262 | isPunchThrough = (isPunchThrough & (ParallelMath::Equal(a, ParallelMath::MakeUInt15(0)) | ParallelMath::Equal(a, ParallelMath::MakeUInt15(255)))); |
1263 | } |
1264 | |
1265 | ParallelMath::Int16CompFlag blockHasNonMaxAlpha = ParallelMath::Less(minAlpha, ParallelMath::MakeUInt15(255)); |
1266 | ParallelMath::Int16CompFlag blockHasNonZeroAlpha = ParallelMath::Less(ParallelMath::MakeUInt15(0), maxAlpha); |
1267 | |
1268 | bool anyBlockHasAlpha = ParallelMath::AnySet(blockHasNonMaxAlpha); |
1269 | |
1270 | // Try RGB modes if any block has a min alpha 251 or higher |
1271 | bool allowRGBModes = ParallelMath::AnySet(ParallelMath::Less(ParallelMath::MakeUInt15(250), minAlpha)); |
1272 | |
1273 | // Try mode 7 if any block has alpha. |
1274 | // Mode 7 is almost never selected for RGB blocks because mode 4 has very accurate 7.7.7.1 endpoints |
1275 | // and its parity bit doesn't affect alpha, meaning mode 7 can only be better in extremely specific |
1276 | // situations, and only by at most 1 unit of error per pixel. |
1277 | bool allowMode7 = anyBlockHasAlpha || (encodingPlan.mode7RGBPartitionEnabled != 0); |
1278 | |
1279 | MFloat preWeightedPixels[16][4]; |
1280 | |
1281 | BCCommon::PreWeightPixelsLDR<4>(preWeightedPixels, pixels, channelWeights); |
1282 | |
1283 | // Get initial RGB endpoints |
1284 | if (allowRGBModes) |
1285 | { |
1286 | const uint8_t *shapeList = encodingPlan.rgbShapeList; |
1287 | int numShapesToEvaluate = encodingPlan.rgbNumShapesToEvaluate; |
1288 | |
1289 | for (int shapeIter = 0; shapeIter < numShapesToEvaluate; shapeIter++) |
1290 | { |
1291 | int shape = shapeList[shapeIter]; |
1292 | |
1293 | int shapeStart = BC7Data::g_shapeRanges[shape][0]; |
1294 | int shapeSize = BC7Data::g_shapeRanges[shape][1]; |
1295 | |
1296 | EndpointSelector<3, 8> epSelector; |
1297 | |
1298 | for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++) |
1299 | { |
1300 | for (int spx = 0; spx < shapeSize; spx++) |
1301 | { |
1302 | int px = BC7Data::g_fragments[shapeStart + spx]; |
1303 | epSelector.ContributePass(preWeightedPixels[px], epPass, ParallelMath::MakeFloat(1.0f)); |
1304 | } |
1305 | epSelector.FinishPass(epPass); |
1306 | } |
1307 | temps.unfinishedRGB[shape] = epSelector.GetEndpoints(channelWeights); |
1308 | } |
1309 | } |
1310 | |
1311 | // Get initial RGBA endpoints |
1312 | { |
1313 | const uint8_t *shapeList = encodingPlan.rgbaShapeList; |
1314 | int numShapesToEvaluate = encodingPlan.rgbaNumShapesToEvaluate; |
1315 | |
1316 | for (int shapeIter = 0; shapeIter < numShapesToEvaluate; shapeIter++) |
1317 | { |
1318 | int shape = shapeList[shapeIter]; |
1319 | |
1320 | if (anyBlockHasAlpha || !allowRGBModes) |
1321 | { |
1322 | int shapeStart = BC7Data::g_shapeRanges[shape][0]; |
1323 | int shapeSize = BC7Data::g_shapeRanges[shape][1]; |
1324 | |
1325 | EndpointSelector<4, 8> epSelector; |
1326 | |
1327 | for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++) |
1328 | { |
1329 | for (int spx = 0; spx < shapeSize; spx++) |
1330 | { |
1331 | int px = BC7Data::g_fragments[shapeStart + spx]; |
1332 | epSelector.ContributePass(preWeightedPixels[px], epPass, ParallelMath::MakeFloat(1.0f)); |
1333 | } |
1334 | epSelector.FinishPass(epPass); |
1335 | } |
1336 | temps.unfinishedRGBA[shape] = epSelector.GetEndpoints(channelWeights); |
1337 | } |
1338 | else |
1339 | { |
1340 | temps.unfinishedRGBA[shape] = temps.unfinishedRGB[shape].ExpandTo<4>(255); |
1341 | } |
1342 | } |
1343 | } |
1344 | |
1345 | for (uint16_t mode = 0; mode <= 7; mode++) |
1346 | { |
1347 | if (mode == 4 || mode == 5) |
1348 | continue; |
1349 | |
1350 | if (mode < 4 && !allowRGBModes) |
1351 | continue; |
1352 | |
1353 | if (mode == 7 && !allowMode7) |
1354 | continue; |
1355 | |
1356 | uint64_t partitionEnabledBits = 0; |
1357 | switch (mode) |
1358 | { |
1359 | case 0: |
1360 | partitionEnabledBits = encodingPlan.mode0PartitionEnabled; |
1361 | break; |
1362 | case 1: |
1363 | partitionEnabledBits = encodingPlan.mode1PartitionEnabled; |
1364 | break; |
1365 | case 2: |
1366 | partitionEnabledBits = encodingPlan.mode2PartitionEnabled; |
1367 | break; |
1368 | case 3: |
1369 | partitionEnabledBits = encodingPlan.mode3PartitionEnabled; |
1370 | break; |
1371 | case 6: |
1372 | partitionEnabledBits = encodingPlan.mode6Enabled ? 1 : 0; |
1373 | break; |
1374 | case 7: |
1375 | if (anyBlockHasAlpha) |
1376 | partitionEnabledBits = encodingPlan.mode7RGBAPartitionEnabled; |
1377 | else |
1378 | partitionEnabledBits = encodingPlan.mode7RGBPartitionEnabled; |
1379 | break; |
1380 | default: |
1381 | break; |
1382 | } |
1383 | |
1384 | bool isRGB = (mode < 4); |
1385 | |
1386 | unsigned int numPartitions = 1 << BC7Data::g_modes[mode].m_partitionBits; |
1387 | int numSubsets = BC7Data::g_modes[mode].m_numSubsets; |
1388 | int indexPrec = BC7Data::g_modes[mode].m_indexBits; |
1389 | |
1390 | int parityBitMax = 1; |
1391 | if (BC7Data::g_modes[mode].m_pBitMode == BC7Data::PBitMode_PerEndpoint) |
1392 | parityBitMax = 4; |
1393 | else if (BC7Data::g_modes[mode].m_pBitMode == BC7Data::PBitMode_PerSubset) |
1394 | parityBitMax = 2; |
1395 | |
1396 | int numRealChannels = isRGB ? 3 : 4; |
1397 | |
1398 | int numShapes; |
1399 | const int *shapeList; |
1400 | |
1401 | if (numSubsets == 1) |
1402 | { |
1403 | numShapes = BC7Data::g_numShapes1; |
1404 | shapeList = BC7Data::g_shapeList1; |
1405 | } |
1406 | else if (numSubsets == 2) |
1407 | { |
1408 | numShapes = BC7Data::g_numShapes2; |
1409 | shapeList = BC7Data::g_shapeList2; |
1410 | } |
1411 | else |
1412 | { |
1413 | assert(numSubsets == 3); |
1414 | if (numPartitions == 16) |
1415 | { |
1416 | numShapes = BC7Data::g_numShapes3Short; |
1417 | shapeList = BC7Data::g_shapeList3Short; |
1418 | } |
1419 | else |
1420 | { |
1421 | assert(numPartitions == 64); |
1422 | numShapes = BC7Data::g_numShapes3; |
1423 | shapeList = BC7Data::g_shapeList3; |
1424 | } |
1425 | } |
1426 | |
1427 | for (int slot = 0; slot < BC7Data::g_numShapesAll; slot++) |
1428 | temps.shapeBestError[slot] = ParallelMath::MakeFloat(FLT_MAX); |
1429 | |
1430 | for (int shapeIter = 0; shapeIter < numShapes; shapeIter++) |
1431 | { |
1432 | int shape = shapeList[shapeIter]; |
1433 | |
1434 | int numTweakRounds = 0; |
1435 | if (isRGB) |
1436 | numTweakRounds = encodingPlan.seedPointsForShapeRGB[shape]; |
1437 | else |
1438 | numTweakRounds = encodingPlan.seedPointsForShapeRGBA[shape]; |
1439 | |
1440 | if (numTweakRounds == 0) |
1441 | continue; |
1442 | |
1443 | if (numTweakRounds > MaxTweakRounds) |
1444 | numTweakRounds = MaxTweakRounds; |
1445 | |
1446 | int shapeStart = BC7Data::g_shapeRanges[shape][0]; |
1447 | int shapeLength = BC7Data::g_shapeRanges[shape][1]; |
1448 | |
1449 | AggregatedError<1> alphaAggError; |
1450 | if (isRGB && anyBlockHasAlpha) |
1451 | { |
1452 | MUInt15 filledAlpha[1] = { ParallelMath::MakeUInt15(255) }; |
1453 | |
1454 | for (int pxi = 0; pxi < shapeLength; pxi++) |
1455 | { |
1456 | int px = BC7Data::g_fragments[shapeStart + pxi]; |
1457 | MUInt15 original[1] = { pixels[px][3] }; |
1458 | BCCommon::ComputeErrorLDR<1>(flags, filledAlpha, original, alphaAggError); |
1459 | } |
1460 | } |
1461 | |
1462 | float alphaWeightsSq[1] = { channelWeightsSq[3] }; |
1463 | MFloat staticAlphaError = alphaAggError.Finalize(flags, alphaWeightsSq); |
1464 | |
1465 | MUInt15 tweakBaseEP[MaxTweakRounds][2][4]; |
1466 | |
1467 | for (int tweak = 0; tweak < numTweakRounds; tweak++) |
1468 | { |
1469 | if (isRGB) |
1470 | { |
1471 | temps.unfinishedRGB[shape].FinishLDR(tweak, 1 << indexPrec, tweakBaseEP[tweak][0], tweakBaseEP[tweak][1]); |
1472 | tweakBaseEP[tweak][0][3] = tweakBaseEP[tweak][1][3] = ParallelMath::MakeUInt15(255); |
1473 | } |
1474 | else |
1475 | { |
1476 | temps.unfinishedRGBA[shape].FinishLDR(tweak, 1 << indexPrec, tweakBaseEP[tweak][0], tweakBaseEP[tweak][1]); |
1477 | } |
1478 | } |
1479 | |
1480 | ParallelMath::Int16CompFlag punchThroughInvalid[4]; |
1481 | for (int pIter = 0; pIter < parityBitMax; pIter++) |
1482 | { |
1483 | punchThroughInvalid[pIter] = ParallelMath::MakeBoolInt16(false); |
1484 | |
1485 | if ((flags & Flags::BC7_RespectPunchThrough) && (mode == 6 || mode == 7)) |
1486 | { |
1487 | // Modes 6 and 7 have parity bits that affect alpha |
1488 | if (pIter == 0) |
1489 | punchThroughInvalid[pIter] = (isPunchThrough & blockHasNonZeroAlpha); |
1490 | else if (pIter == parityBitMax - 1) |
1491 | punchThroughInvalid[pIter] = (isPunchThrough & blockHasNonMaxAlpha); |
1492 | else |
1493 | punchThroughInvalid[pIter] = isPunchThrough; |
1494 | } |
1495 | } |
1496 | |
1497 | for (int pIter = 0; pIter < parityBitMax; pIter++) |
1498 | { |
1499 | if (ParallelMath::AllSet(punchThroughInvalid[pIter])) |
1500 | continue; |
1501 | |
1502 | bool needPunchThroughCheck = ParallelMath::AnySet(punchThroughInvalid[pIter]); |
1503 | |
1504 | for (int tweak = 0; tweak < numTweakRounds; tweak++) |
1505 | { |
1506 | uint16_t p[2]; |
1507 | p[0] = (pIter & 1); |
1508 | p[1] = ((pIter >> 1) & 1); |
1509 | |
1510 | MUInt15 ep[2][4]; |
1511 | |
1512 | for (int epi = 0; epi < 2; epi++) |
1513 | for (int ch = 0; ch < 4; ch++) |
1514 | ep[epi][ch] = tweakBaseEP[tweak][epi][ch]; |
1515 | |
1516 | for (int refine = 0; refine < numRefineRounds; refine++) |
1517 | { |
1518 | switch (mode) |
1519 | { |
1520 | case 0: |
1521 | CompressEndpoints0(ep, p); |
1522 | break; |
1523 | case 1: |
1524 | CompressEndpoints1(ep, p[0]); |
1525 | break; |
1526 | case 2: |
1527 | CompressEndpoints2(ep); |
1528 | break; |
1529 | case 3: |
1530 | CompressEndpoints3(ep, p); |
1531 | break; |
1532 | case 6: |
1533 | CompressEndpoints6(ep, p); |
1534 | break; |
1535 | case 7: |
1536 | CompressEndpoints7(ep, p); |
1537 | break; |
1538 | default: |
1539 | assert(false); |
1540 | break; |
1541 | }; |
1542 | |
1543 | MFloat shapeError = ParallelMath::MakeFloatZero(); |
1544 | |
1545 | IndexSelector<4> indexSelector; |
1546 | indexSelector.Init<false>(channelWeights, ep, 1 << indexPrec); |
1547 | |
1548 | EndpointRefiner<4> epRefiner; |
1549 | epRefiner.Init(1 << indexPrec, channelWeights); |
1550 | |
1551 | MUInt15 indexes[16]; |
1552 | |
1553 | AggregatedError<4> aggError; |
1554 | for (int pxi = 0; pxi < shapeLength; pxi++) |
1555 | { |
1556 | int px = BC7Data::g_fragments[shapeStart + pxi]; |
1557 | |
1558 | MUInt15 index; |
1559 | MUInt15 reconstructed[4]; |
1560 | |
1561 | index = indexSelector.SelectIndexLDR(floatPixels[px], rtn); |
1562 | indexSelector.ReconstructLDR_BC7(index, reconstructed, numRealChannels); |
1563 | |
1564 | if (flags & cvtt::Flags::BC7_FastIndexing) |
1565 | BCCommon::ComputeErrorLDR<4>(flags, reconstructed, pixels[px], numRealChannels, aggError); |
1566 | else |
1567 | { |
1568 | MFloat error = BCCommon::ComputeErrorLDRSimple<4>(flags, reconstructed, pixels[px], numRealChannels, channelWeightsSq); |
1569 | |
1570 | MUInt15 altIndexes[2]; |
1571 | altIndexes[0] = ParallelMath::Max(index, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1); |
1572 | altIndexes[1] = ParallelMath::Min(index + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << indexPrec) - 1))); |
1573 | |
1574 | for (int ii = 0; ii < 2; ii++) |
1575 | { |
1576 | indexSelector.ReconstructLDR_BC7(altIndexes[ii], reconstructed, numRealChannels); |
1577 | |
1578 | MFloat altError = BCCommon::ComputeErrorLDRSimple<4>(flags, reconstructed, pixels[px], numRealChannels, channelWeightsSq); |
1579 | ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altError, error)); |
1580 | error = ParallelMath::Min(error, altError); |
1581 | ParallelMath::ConditionalSet(index, better, altIndexes[ii]); |
1582 | } |
1583 | |
1584 | shapeError = shapeError + error; |
1585 | } |
1586 | |
1587 | if (refine != numRefineRounds - 1) |
1588 | epRefiner.ContributeUnweightedPW(preWeightedPixels[px], index, numRealChannels); |
1589 | |
1590 | indexes[pxi] = index; |
1591 | } |
1592 | |
1593 | if (flags & cvtt::Flags::BC7_FastIndexing) |
1594 | shapeError = aggError.Finalize(flags, channelWeightsSq); |
1595 | |
1596 | if (isRGB) |
1597 | shapeError = shapeError + staticAlphaError; |
1598 | |
1599 | ParallelMath::FloatCompFlag shapeErrorBetter; |
1600 | ParallelMath::Int16CompFlag shapeErrorBetter16; |
1601 | |
1602 | shapeErrorBetter = ParallelMath::Less(shapeError, temps.shapeBestError[shape]); |
1603 | shapeErrorBetter16 = ParallelMath::FloatFlagToInt16(shapeErrorBetter); |
1604 | |
1605 | if (ParallelMath::AnySet(shapeErrorBetter16)) |
1606 | { |
1607 | bool punchThroughOK = true; |
1608 | if (needPunchThroughCheck) |
1609 | { |
1610 | shapeErrorBetter16 = ParallelMath::AndNot(punchThroughInvalid[pIter], shapeErrorBetter16); |
1611 | shapeErrorBetter = ParallelMath::Int16FlagToFloat(shapeErrorBetter16); |
1612 | |
1613 | if (!ParallelMath::AnySet(shapeErrorBetter16)) |
1614 | punchThroughOK = false; |
1615 | } |
1616 | |
1617 | if (punchThroughOK) |
1618 | { |
1619 | ParallelMath::ConditionalSet(temps.shapeBestError[shape], shapeErrorBetter, shapeError); |
1620 | for (int epi = 0; epi < 2; epi++) |
1621 | for (int ch = 0; ch < numRealChannels; ch++) |
1622 | ParallelMath::ConditionalSet(temps.shapeBestEP[shape][epi][ch], shapeErrorBetter16, ep[epi][ch]); |
1623 | |
1624 | for (int pxi = 0; pxi < shapeLength; pxi++) |
1625 | ParallelMath::ConditionalSet(temps.fragmentBestIndexes[shapeStart + pxi], shapeErrorBetter16, indexes[pxi]); |
1626 | } |
1627 | } |
1628 | |
1629 | if (refine != numRefineRounds - 1) |
1630 | epRefiner.GetRefinedEndpointsLDR(ep, numRealChannels, rtn); |
1631 | } // refine |
1632 | } // tweak |
1633 | } // p |
1634 | |
1635 | if (flags & cvtt::Flags::BC7_TrySingleColor) |
1636 | { |
1637 | MUInt15 total[4]; |
1638 | for (int ch = 0; ch < 4; ch++) |
1639 | total[ch] = ParallelMath::MakeUInt15(0); |
1640 | |
1641 | for (int pxi = 0; pxi < shapeLength; pxi++) |
1642 | { |
1643 | int px = BC7Data::g_fragments[shapeStart + pxi]; |
1644 | for (int ch = 0; ch < 4; ch++) |
1645 | total[ch] = total[ch] + pixels[pxi][ch]; |
1646 | } |
1647 | |
1648 | MFloat rcpShapeLength = ParallelMath::MakeFloat(1.0f / static_cast<float>(shapeLength)); |
1649 | MFloat average[4]; |
1650 | for (int ch = 0; ch < 4; ch++) |
1651 | average[ch] = ParallelMath::ToFloat(total[ch]) * rcpShapeLength; |
1652 | |
1653 | const uint8_t *fragment = BC7Data::g_fragments + shapeStart; |
1654 | MFloat &shapeBestError = temps.shapeBestError[shape]; |
1655 | MUInt15 (&shapeBestEP)[2][4] = temps.shapeBestEP[shape]; |
1656 | MUInt15 *fragmentBestIndexes = temps.fragmentBestIndexes + shapeStart; |
1657 | |
1658 | const cvtt::Tables::BC7SC::Table **scTables = NULL; |
1659 | int numSCTables = 0; |
1660 | |
1661 | const cvtt::Tables::BC7SC::Table *tables0[] = |
1662 | { |
1663 | &cvtt::Tables::BC7SC::g_mode0_p00_i1, |
1664 | &cvtt::Tables::BC7SC::g_mode0_p00_i2, |
1665 | &cvtt::Tables::BC7SC::g_mode0_p00_i3, |
1666 | &cvtt::Tables::BC7SC::g_mode0_p01_i1, |
1667 | &cvtt::Tables::BC7SC::g_mode0_p01_i2, |
1668 | &cvtt::Tables::BC7SC::g_mode0_p01_i3, |
1669 | &cvtt::Tables::BC7SC::g_mode0_p10_i1, |
1670 | &cvtt::Tables::BC7SC::g_mode0_p10_i2, |
1671 | &cvtt::Tables::BC7SC::g_mode0_p10_i3, |
1672 | &cvtt::Tables::BC7SC::g_mode0_p11_i1, |
1673 | &cvtt::Tables::BC7SC::g_mode0_p11_i2, |
1674 | &cvtt::Tables::BC7SC::g_mode0_p11_i3, |
1675 | }; |
1676 | |
1677 | const cvtt::Tables::BC7SC::Table *tables1[] = |
1678 | { |
1679 | &cvtt::Tables::BC7SC::g_mode1_p0_i1, |
1680 | &cvtt::Tables::BC7SC::g_mode1_p0_i2, |
1681 | &cvtt::Tables::BC7SC::g_mode1_p0_i3, |
1682 | &cvtt::Tables::BC7SC::g_mode1_p1_i1, |
1683 | &cvtt::Tables::BC7SC::g_mode1_p1_i2, |
1684 | &cvtt::Tables::BC7SC::g_mode1_p1_i3, |
1685 | }; |
1686 | |
1687 | const cvtt::Tables::BC7SC::Table *tables2[] = |
1688 | { |
1689 | &cvtt::Tables::BC7SC::g_mode2, |
1690 | }; |
1691 | |
1692 | const cvtt::Tables::BC7SC::Table *tables3[] = |
1693 | { |
1694 | &cvtt::Tables::BC7SC::g_mode3_p0, |
1695 | &cvtt::Tables::BC7SC::g_mode3_p1, |
1696 | }; |
1697 | |
1698 | const cvtt::Tables::BC7SC::Table *tables6[] = |
1699 | { |
1700 | &cvtt::Tables::BC7SC::g_mode6_p0_i1, |
1701 | &cvtt::Tables::BC7SC::g_mode6_p0_i2, |
1702 | &cvtt::Tables::BC7SC::g_mode6_p0_i3, |
1703 | &cvtt::Tables::BC7SC::g_mode6_p0_i4, |
1704 | &cvtt::Tables::BC7SC::g_mode6_p0_i5, |
1705 | &cvtt::Tables::BC7SC::g_mode6_p0_i6, |
1706 | &cvtt::Tables::BC7SC::g_mode6_p0_i7, |
1707 | &cvtt::Tables::BC7SC::g_mode6_p1_i1, |
1708 | &cvtt::Tables::BC7SC::g_mode6_p1_i2, |
1709 | &cvtt::Tables::BC7SC::g_mode6_p1_i3, |
1710 | &cvtt::Tables::BC7SC::g_mode6_p1_i4, |
1711 | &cvtt::Tables::BC7SC::g_mode6_p1_i5, |
1712 | &cvtt::Tables::BC7SC::g_mode6_p1_i6, |
1713 | &cvtt::Tables::BC7SC::g_mode6_p1_i7, |
1714 | }; |
1715 | |
1716 | const cvtt::Tables::BC7SC::Table *tables7[] = |
1717 | { |
1718 | &cvtt::Tables::BC7SC::g_mode7_p00, |
1719 | &cvtt::Tables::BC7SC::g_mode7_p01, |
1720 | &cvtt::Tables::BC7SC::g_mode7_p10, |
1721 | &cvtt::Tables::BC7SC::g_mode7_p11, |
1722 | }; |
1723 | |
1724 | switch (mode) |
1725 | { |
1726 | case 0: |
1727 | { |
1728 | scTables = tables0; |
1729 | numSCTables = sizeof(tables0) / sizeof(tables0[0]); |
1730 | } |
1731 | break; |
1732 | case 1: |
1733 | { |
1734 | scTables = tables1; |
1735 | numSCTables = sizeof(tables1) / sizeof(tables1[0]); |
1736 | } |
1737 | break; |
1738 | case 2: |
1739 | { |
1740 | |
1741 | scTables = tables2; |
1742 | numSCTables = sizeof(tables2) / sizeof(tables2[0]); |
1743 | } |
1744 | break; |
1745 | case 3: |
1746 | { |
1747 | scTables = tables3; |
1748 | numSCTables = sizeof(tables3) / sizeof(tables3[0]); |
1749 | } |
1750 | break; |
1751 | case 6: |
1752 | { |
1753 | scTables = tables6; |
1754 | numSCTables = sizeof(tables6) / sizeof(tables6[0]); |
1755 | } |
1756 | break; |
1757 | case 7: |
1758 | { |
1759 | scTables = tables7; |
1760 | numSCTables = sizeof(tables7) / sizeof(tables7[0]); |
1761 | } |
1762 | break; |
1763 | default: |
1764 | assert(false); |
1765 | break; |
1766 | } |
1767 | |
1768 | TrySingleColorRGBAMultiTable(flags, pixels, average, numRealChannels, fragment, shapeLength, staticAlphaError, punchThroughInvalid, shapeBestError, shapeBestEP, fragmentBestIndexes, channelWeightsSq, scTables, numSCTables, rtn); |
1769 | } |
1770 | } // shapeIter |
1771 | |
1772 | uint64_t partitionsEnabledBits = 0xffffffffffffffffULL; |
1773 | |
1774 | switch (mode) |
1775 | { |
1776 | case 0: |
1777 | partitionsEnabledBits = encodingPlan.mode0PartitionEnabled; |
1778 | break; |
1779 | case 1: |
1780 | partitionsEnabledBits = encodingPlan.mode1PartitionEnabled; |
1781 | break; |
1782 | case 2: |
1783 | partitionsEnabledBits = encodingPlan.mode2PartitionEnabled; |
1784 | break; |
1785 | case 3: |
1786 | partitionsEnabledBits = encodingPlan.mode3PartitionEnabled; |
1787 | break; |
1788 | case 6: |
1789 | partitionsEnabledBits = encodingPlan.mode6Enabled ? 1 : 0; |
1790 | break; |
1791 | case 7: |
1792 | if (anyBlockHasAlpha) |
1793 | partitionEnabledBits = encodingPlan.mode7RGBAPartitionEnabled; |
1794 | else |
1795 | partitionEnabledBits = encodingPlan.mode7RGBPartitionEnabled; |
1796 | break; |
1797 | default: |
1798 | break; |
1799 | }; |
1800 | |
1801 | for (uint16_t partition = 0; partition < numPartitions; partition++) |
1802 | { |
1803 | if (((partitionsEnabledBits >> partition) & 1) == 0) |
1804 | continue; |
1805 | |
1806 | const int *partitionShapes; |
1807 | if (numSubsets == 1) |
1808 | partitionShapes = BC7Data::g_shapes1[partition]; |
1809 | else if (numSubsets == 2) |
1810 | partitionShapes = BC7Data::g_shapes2[partition]; |
1811 | else |
1812 | { |
1813 | assert(numSubsets == 3); |
1814 | partitionShapes = BC7Data::g_shapes3[partition]; |
1815 | } |
1816 | |
1817 | MFloat totalError = ParallelMath::MakeFloatZero(); |
1818 | for (int subset = 0; subset < numSubsets; subset++) |
1819 | totalError = totalError + temps.shapeBestError[partitionShapes[subset]]; |
1820 | |
1821 | ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(totalError, work.m_error); |
1822 | ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter); |
1823 | |
1824 | if (mode == 7 && anyBlockHasAlpha) |
1825 | { |
1826 | // Some lanes could be better, but we filter them out to ensure consistency with scalar |
1827 | bool isRGBAllowedForThisPartition = (((encodingPlan.mode7RGBPartitionEnabled >> partition) & 1) != 0); |
1828 | |
1829 | if (!isRGBAllowedForThisPartition) |
1830 | { |
1831 | errorBetter16 = (errorBetter16 & blockHasNonMaxAlpha); |
1832 | errorBetter = ParallelMath::Int16FlagToFloat(errorBetter16); |
1833 | } |
1834 | } |
1835 | |
1836 | if (ParallelMath::AnySet(errorBetter16)) |
1837 | { |
1838 | for (int subset = 0; subset < numSubsets; subset++) |
1839 | { |
1840 | int shape = partitionShapes[subset]; |
1841 | int shapeStart = BC7Data::g_shapeRanges[shape][0]; |
1842 | int shapeLength = BC7Data::g_shapeRanges[shape][1]; |
1843 | |
1844 | for (int epi = 0; epi < 2; epi++) |
1845 | for (int ch = 0; ch < 4; ch++) |
1846 | ParallelMath::ConditionalSet(work.m_ep[subset][epi][ch], errorBetter16, temps.shapeBestEP[shape][epi][ch]); |
1847 | |
1848 | for (int pxi = 0; pxi < shapeLength; pxi++) |
1849 | { |
1850 | int px = BC7Data::g_fragments[shapeStart + pxi]; |
1851 | ParallelMath::ConditionalSet(work.m_indexes[px], errorBetter16, temps.fragmentBestIndexes[shapeStart + pxi]); |
1852 | } |
1853 | } |
1854 | |
1855 | ParallelMath::ConditionalSet(work.m_error, errorBetter, totalError); |
1856 | ParallelMath::ConditionalSet(work.m_mode, errorBetter16, ParallelMath::MakeUInt15(mode)); |
1857 | ParallelMath::ConditionalSet(work.m_u.m_partition, errorBetter16, ParallelMath::MakeUInt15(partition)); |
1858 | } |
1859 | } |
1860 | } |
1861 | } |
1862 | |
1863 | void cvtt::Internal::BC7Computer::TryDualPlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn) |
1864 | { |
1865 | // TODO: These error calculations are not optimal for weight-by-alpha, but this routine needs to be mostly rewritten for that. |
1866 | // The alpha/color solutions are co-dependent in that case, but a good way to solve it would probably be to |
1867 | // solve the alpha channel first, then solve the RGB channels, which in turn breaks down into two cases: |
1868 | // - Separate alpha channel, then weighted RGB |
1869 | // - Alpha+2 other channels, then the independent channel |
1870 | if (numRefineRounds < 1) |
1871 | numRefineRounds = 1; |
1872 | |
1873 | float channelWeightsSq[4]; |
1874 | for (int ch = 0; ch < 4; ch++) |
1875 | channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch]; |
1876 | |
1877 | for (uint16_t mode = 4; mode <= 5; mode++) |
1878 | { |
1879 | int numSP[2] = { 0, 0 }; |
1880 | |
1881 | for (uint16_t rotation = 0; rotation < 4; rotation++) |
1882 | { |
1883 | if (mode == 4) |
1884 | { |
1885 | numSP[0] = encodingPlan.mode4SP[rotation][0]; |
1886 | numSP[1] = encodingPlan.mode4SP[rotation][1]; |
1887 | } |
1888 | else |
1889 | numSP[0] = numSP[1] = encodingPlan.mode5SP[rotation]; |
1890 | |
1891 | if (numSP[0] == 0 && numSP[1] == 0) |
1892 | continue; |
1893 | |
1894 | int alphaChannel = (rotation + 3) & 3; |
1895 | int redChannel = (rotation == 1) ? 3 : 0; |
1896 | int greenChannel = (rotation == 2) ? 3 : 1; |
1897 | int blueChannel = (rotation == 3) ? 3 : 2; |
1898 | |
1899 | MUInt15 rotatedRGB[16][3]; |
1900 | MFloat floatRotatedRGB[16][3]; |
1901 | |
1902 | for (int px = 0; px < 16; px++) |
1903 | { |
1904 | rotatedRGB[px][0] = pixels[px][redChannel]; |
1905 | rotatedRGB[px][1] = pixels[px][greenChannel]; |
1906 | rotatedRGB[px][2] = pixels[px][blueChannel]; |
1907 | |
1908 | for (int ch = 0; ch < 3; ch++) |
1909 | floatRotatedRGB[px][ch] = ParallelMath::ToFloat(rotatedRGB[px][ch]); |
1910 | } |
1911 | |
1912 | uint16_t maxIndexSelector = (mode == 4) ? 2 : 1; |
1913 | |
1914 | float rotatedRGBWeights[3] = { channelWeights[redChannel], channelWeights[greenChannel], channelWeights[blueChannel] }; |
1915 | float rotatedRGBWeightsSq[3] = { channelWeightsSq[redChannel], channelWeightsSq[greenChannel], channelWeightsSq[blueChannel] }; |
1916 | float rotatedAlphaWeight[1] = { channelWeights[alphaChannel] }; |
1917 | float rotatedAlphaWeightSq[1] = { channelWeightsSq[alphaChannel] }; |
1918 | |
1919 | float uniformWeight[1] = { 1.0f }; // Since the alpha channel is independent, there's no need to bother with weights when doing refinement or selection, only error |
1920 | |
1921 | MFloat preWeightedRotatedRGB[16][3]; |
1922 | BCCommon::PreWeightPixelsLDR<3>(preWeightedRotatedRGB, rotatedRGB, rotatedRGBWeights); |
1923 | |
1924 | for (uint16_t indexSelector = 0; indexSelector < maxIndexSelector; indexSelector++) |
1925 | { |
1926 | int numTweakRounds = numSP[indexSelector]; |
1927 | |
1928 | if (numTweakRounds <= 0) |
1929 | continue; |
1930 | |
1931 | if (numTweakRounds > MaxTweakRounds) |
1932 | numTweakRounds = MaxTweakRounds; |
1933 | |
1934 | EndpointSelector<3, 8> rgbSelector; |
1935 | |
1936 | for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++) |
1937 | { |
1938 | for (int px = 0; px < 16; px++) |
1939 | rgbSelector.ContributePass(preWeightedRotatedRGB[px], epPass, ParallelMath::MakeFloat(1.0f)); |
1940 | |
1941 | rgbSelector.FinishPass(epPass); |
1942 | } |
1943 | |
1944 | MUInt15 alphaRange[2]; |
1945 | |
1946 | alphaRange[0] = alphaRange[1] = pixels[0][alphaChannel]; |
1947 | for (int px = 1; px < 16; px++) |
1948 | { |
1949 | alphaRange[0] = ParallelMath::Min(pixels[px][alphaChannel], alphaRange[0]); |
1950 | alphaRange[1] = ParallelMath::Max(pixels[px][alphaChannel], alphaRange[1]); |
1951 | } |
1952 | |
1953 | int rgbPrec = 0; |
1954 | int alphaPrec = 0; |
1955 | |
1956 | if (mode == 4) |
1957 | { |
1958 | rgbPrec = indexSelector ? 3 : 2; |
1959 | alphaPrec = indexSelector ? 2 : 3; |
1960 | } |
1961 | else |
1962 | rgbPrec = alphaPrec = 2; |
1963 | |
1964 | UnfinishedEndpoints<3> unfinishedRGB = rgbSelector.GetEndpoints(rotatedRGBWeights); |
1965 | |
1966 | MFloat bestRGBError = ParallelMath::MakeFloat(FLT_MAX); |
1967 | MFloat bestAlphaError = ParallelMath::MakeFloat(FLT_MAX); |
1968 | |
1969 | MUInt15 bestRGBIndexes[16]; |
1970 | MUInt15 bestAlphaIndexes[16]; |
1971 | MUInt15 bestEP[2][4]; |
1972 | |
1973 | for (int px = 0; px < 16; px++) |
1974 | bestRGBIndexes[px] = bestAlphaIndexes[px] = ParallelMath::MakeUInt15(0); |
1975 | |
1976 | for (int tweak = 0; tweak < numTweakRounds; tweak++) |
1977 | { |
1978 | MUInt15 rgbEP[2][3]; |
1979 | MUInt15 alphaEP[2]; |
1980 | |
1981 | unfinishedRGB.FinishLDR(tweak, 1 << rgbPrec, rgbEP[0], rgbEP[1]); |
1982 | |
1983 | TweakAlpha(alphaRange, tweak, 1 << alphaPrec, alphaEP); |
1984 | |
1985 | for (int refine = 0; refine < numRefineRounds; refine++) |
1986 | { |
1987 | if (mode == 4) |
1988 | CompressEndpoints4(rgbEP, alphaEP); |
1989 | else |
1990 | CompressEndpoints5(rgbEP, alphaEP); |
1991 | |
1992 | |
1993 | IndexSelector<1> alphaIndexSelector; |
1994 | IndexSelector<3> rgbIndexSelector; |
1995 | |
1996 | { |
1997 | MUInt15 alphaEPTemp[2][1] = { { alphaEP[0] },{ alphaEP[1] } }; |
1998 | alphaIndexSelector.Init<false>(uniformWeight, alphaEPTemp, 1 << alphaPrec); |
1999 | } |
2000 | rgbIndexSelector.Init<false>(rotatedRGBWeights, rgbEP, 1 << rgbPrec); |
2001 | |
2002 | EndpointRefiner<3> rgbRefiner; |
2003 | EndpointRefiner<1> alphaRefiner; |
2004 | |
2005 | rgbRefiner.Init(1 << rgbPrec, rotatedRGBWeights); |
2006 | alphaRefiner.Init(1 << alphaPrec, uniformWeight); |
2007 | |
2008 | MFloat errorRGB = ParallelMath::MakeFloatZero(); |
2009 | MFloat errorA = ParallelMath::MakeFloatZero(); |
2010 | |
2011 | MUInt15 rgbIndexes[16]; |
2012 | MUInt15 alphaIndexes[16]; |
2013 | |
2014 | AggregatedError<3> rgbAggError; |
2015 | AggregatedError<1> alphaAggError; |
2016 | |
2017 | for (int px = 0; px < 16; px++) |
2018 | { |
2019 | MUInt15 rgbIndex = rgbIndexSelector.SelectIndexLDR(floatRotatedRGB[px], rtn); |
2020 | MUInt15 alphaIndex = alphaIndexSelector.SelectIndexLDR(floatPixels[px] + alphaChannel, rtn); |
2021 | |
2022 | MUInt15 reconstructedRGB[3]; |
2023 | MUInt15 reconstructedAlpha[1]; |
2024 | |
2025 | rgbIndexSelector.ReconstructLDR_BC7(rgbIndex, reconstructedRGB); |
2026 | alphaIndexSelector.ReconstructLDR_BC7(alphaIndex, reconstructedAlpha); |
2027 | |
2028 | if (flags & cvtt::Flags::BC7_FastIndexing) |
2029 | { |
2030 | BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], rgbAggError); |
2031 | BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, alphaAggError); |
2032 | } |
2033 | else |
2034 | { |
2035 | AggregatedError<3> baseRGBAggError; |
2036 | AggregatedError<1> baseAlphaAggError; |
2037 | |
2038 | BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], baseRGBAggError); |
2039 | BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, baseAlphaAggError); |
2040 | |
2041 | MFloat rgbError = baseRGBAggError.Finalize(flags, rotatedRGBWeightsSq); |
2042 | MFloat alphaError = baseAlphaAggError.Finalize(flags, rotatedAlphaWeightSq); |
2043 | |
2044 | MUInt15 altRGBIndexes[2]; |
2045 | MUInt15 altAlphaIndexes[2]; |
2046 | |
2047 | altRGBIndexes[0] = ParallelMath::Max(rgbIndex, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1); |
2048 | altRGBIndexes[1] = ParallelMath::Min(rgbIndex + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << rgbPrec) - 1))); |
2049 | |
2050 | altAlphaIndexes[0] = ParallelMath::Max(alphaIndex, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1); |
2051 | altAlphaIndexes[1] = ParallelMath::Min(alphaIndex + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << alphaPrec) - 1))); |
2052 | |
2053 | for (int ii = 0; ii < 2; ii++) |
2054 | { |
2055 | rgbIndexSelector.ReconstructLDR_BC7(altRGBIndexes[ii], reconstructedRGB); |
2056 | alphaIndexSelector.ReconstructLDR_BC7(altAlphaIndexes[ii], reconstructedAlpha); |
2057 | |
2058 | AggregatedError<3> altRGBAggError; |
2059 | AggregatedError<1> altAlphaAggError; |
2060 | |
2061 | BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], altRGBAggError); |
2062 | BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, altAlphaAggError); |
2063 | |
2064 | MFloat altRGBError = altRGBAggError.Finalize(flags, rotatedRGBWeightsSq); |
2065 | MFloat altAlphaError = altAlphaAggError.Finalize(flags, rotatedAlphaWeightSq); |
2066 | |
2067 | ParallelMath::Int16CompFlag rgbBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altRGBError, rgbError)); |
2068 | ParallelMath::Int16CompFlag alphaBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altAlphaError, alphaError)); |
2069 | |
2070 | rgbError = ParallelMath::Min(altRGBError, rgbError); |
2071 | alphaError = ParallelMath::Min(altAlphaError, alphaError); |
2072 | |
2073 | ParallelMath::ConditionalSet(rgbIndex, rgbBetter, altRGBIndexes[ii]); |
2074 | ParallelMath::ConditionalSet(alphaIndex, alphaBetter, altAlphaIndexes[ii]); |
2075 | } |
2076 | |
2077 | errorRGB = errorRGB + rgbError; |
2078 | errorA = errorA + alphaError; |
2079 | } |
2080 | |
2081 | if (refine != numRefineRounds - 1) |
2082 | { |
2083 | rgbRefiner.ContributeUnweightedPW(preWeightedRotatedRGB[px], rgbIndex); |
2084 | alphaRefiner.ContributeUnweightedPW(floatPixels[px] + alphaChannel, alphaIndex); |
2085 | } |
2086 | |
2087 | if (flags & Flags::BC7_FastIndexing) |
2088 | { |
2089 | errorRGB = rgbAggError.Finalize(flags, rotatedRGBWeightsSq); |
2090 | errorA = alphaAggError.Finalize(flags, rotatedAlphaWeightSq); |
2091 | } |
2092 | |
2093 | rgbIndexes[px] = rgbIndex; |
2094 | alphaIndexes[px] = alphaIndex; |
2095 | } |
2096 | |
2097 | ParallelMath::FloatCompFlag rgbBetter = ParallelMath::Less(errorRGB, bestRGBError); |
2098 | ParallelMath::FloatCompFlag alphaBetter = ParallelMath::Less(errorA, bestAlphaError); |
2099 | |
2100 | ParallelMath::Int16CompFlag rgbBetterInt16 = ParallelMath::FloatFlagToInt16(rgbBetter); |
2101 | ParallelMath::Int16CompFlag alphaBetterInt16 = ParallelMath::FloatFlagToInt16(alphaBetter); |
2102 | |
2103 | if (ParallelMath::AnySet(rgbBetterInt16)) |
2104 | { |
2105 | bestRGBError = ParallelMath::Min(errorRGB, bestRGBError); |
2106 | |
2107 | for (int px = 0; px < 16; px++) |
2108 | ParallelMath::ConditionalSet(bestRGBIndexes[px], rgbBetterInt16, rgbIndexes[px]); |
2109 | |
2110 | for (int ep = 0; ep < 2; ep++) |
2111 | { |
2112 | for (int ch = 0; ch < 3; ch++) |
2113 | ParallelMath::ConditionalSet(bestEP[ep][ch], rgbBetterInt16, rgbEP[ep][ch]); |
2114 | } |
2115 | } |
2116 | |
2117 | if (ParallelMath::AnySet(alphaBetterInt16)) |
2118 | { |
2119 | bestAlphaError = ParallelMath::Min(errorA, bestAlphaError); |
2120 | |
2121 | for (int px = 0; px < 16; px++) |
2122 | ParallelMath::ConditionalSet(bestAlphaIndexes[px], alphaBetterInt16, alphaIndexes[px]); |
2123 | |
2124 | for (int ep = 0; ep < 2; ep++) |
2125 | ParallelMath::ConditionalSet(bestEP[ep][3], alphaBetterInt16, alphaEP[ep]); |
2126 | } |
2127 | |
2128 | if (refine != numRefineRounds - 1) |
2129 | { |
2130 | rgbRefiner.GetRefinedEndpointsLDR(rgbEP, rtn); |
2131 | |
2132 | MUInt15 alphaEPTemp[2][1]; |
2133 | alphaRefiner.GetRefinedEndpointsLDR(alphaEPTemp, rtn); |
2134 | |
2135 | for (int i = 0; i < 2; i++) |
2136 | alphaEP[i] = alphaEPTemp[i][0]; |
2137 | } |
2138 | } // refine |
2139 | } // tweak |
2140 | |
2141 | MFloat combinedError = bestRGBError + bestAlphaError; |
2142 | |
2143 | ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(combinedError, work.m_error); |
2144 | ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter); |
2145 | |
2146 | work.m_error = ParallelMath::Min(combinedError, work.m_error); |
2147 | |
2148 | ParallelMath::ConditionalSet(work.m_mode, errorBetter16, ParallelMath::MakeUInt15(mode)); |
2149 | ParallelMath::ConditionalSet(work.m_u.m_isr.m_rotation, errorBetter16, ParallelMath::MakeUInt15(rotation)); |
2150 | ParallelMath::ConditionalSet(work.m_u.m_isr.m_indexSelector, errorBetter16, ParallelMath::MakeUInt15(indexSelector)); |
2151 | |
2152 | for (int px = 0; px < 16; px++) |
2153 | { |
2154 | ParallelMath::ConditionalSet(work.m_indexes[px], errorBetter16, indexSelector ? bestAlphaIndexes[px] : bestRGBIndexes[px]); |
2155 | ParallelMath::ConditionalSet(work.m_indexes2[px], errorBetter16, indexSelector ? bestRGBIndexes[px] : bestAlphaIndexes[px]); |
2156 | } |
2157 | |
2158 | for (int ep = 0; ep < 2; ep++) |
2159 | for (int ch = 0; ch < 4; ch++) |
2160 | ParallelMath::ConditionalSet(work.m_ep[0][ep][ch], errorBetter16, bestEP[ep][ch]); |
2161 | } |
2162 | } |
2163 | } |
2164 | } |
2165 | |
2166 | template<class T> |
2167 | void cvtt::Internal::BC7Computer::Swap(T& a, T& b) |
2168 | { |
2169 | T temp = a; |
2170 | a = b; |
2171 | b = temp; |
2172 | } |
2173 | |
2174 | void cvtt::Internal::BC7Computer::Pack(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds) |
2175 | { |
2176 | MUInt15 pixels[16][4]; |
2177 | MFloat floatPixels[16][4]; |
2178 | |
2179 | for (int px = 0; px < 16; px++) |
2180 | { |
2181 | for (int ch = 0; ch < 4; ch++) |
2182 | ParallelMath::ConvertLDRInputs(inputs, px, ch, pixels[px][ch]); |
2183 | } |
2184 | |
2185 | for (int px = 0; px < 16; px++) |
2186 | { |
2187 | for (int ch = 0; ch < 4; ch++) |
2188 | floatPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]); |
2189 | } |
2190 | |
2191 | BC67::WorkInfo work; |
2192 | memset(&work, 0, sizeof(work)); |
2193 | |
2194 | work.m_error = ParallelMath::MakeFloat(FLT_MAX); |
2195 | |
2196 | { |
2197 | ParallelMath::RoundTowardNearestForScope rtn; |
2198 | TrySinglePlane(flags, pixels, floatPixels, channelWeights, encodingPlan, numRefineRounds, work, &rtn); |
2199 | TryDualPlane(flags, pixels, floatPixels, channelWeights, encodingPlan, numRefineRounds, work, &rtn); |
2200 | } |
2201 | |
2202 | for (int block = 0; block < ParallelMath::ParallelSize; block++) |
2203 | { |
2204 | PackingVector pv; |
2205 | pv.Init(); |
2206 | |
2207 | ParallelMath::ScalarUInt16 mode = ParallelMath::Extract(work.m_mode, block); |
2208 | ParallelMath::ScalarUInt16 partition = ParallelMath::Extract(work.m_u.m_partition, block); |
2209 | ParallelMath::ScalarUInt16 indexSelector = ParallelMath::Extract(work.m_u.m_isr.m_indexSelector, block); |
2210 | |
2211 | const BC7Data::BC7ModeInfo& modeInfo = BC7Data::g_modes[mode]; |
2212 | |
2213 | ParallelMath::ScalarUInt16 indexes[16]; |
2214 | ParallelMath::ScalarUInt16 indexes2[16]; |
2215 | ParallelMath::ScalarUInt16 endPoints[3][2][4]; |
2216 | |
2217 | for (int i = 0; i < 16; i++) |
2218 | { |
2219 | indexes[i] = ParallelMath::Extract(work.m_indexes[i], block); |
2220 | if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) |
2221 | indexes2[i] = ParallelMath::Extract(work.m_indexes2[i], block); |
2222 | } |
2223 | |
2224 | for (int subset = 0; subset < 3; subset++) |
2225 | { |
2226 | for (int ep = 0; ep < 2; ep++) |
2227 | { |
2228 | for (int ch = 0; ch < 4; ch++) |
2229 | endPoints[subset][ep][ch] = ParallelMath::Extract(work.m_ep[subset][ep][ch], block); |
2230 | } |
2231 | } |
2232 | |
2233 | int fixups[3] = { 0, 0, 0 }; |
2234 | |
2235 | if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) |
2236 | { |
2237 | bool flipRGB = ((indexes[0] & (1 << (modeInfo.m_indexBits - 1))) != 0); |
2238 | bool flipAlpha = ((indexes2[0] & (1 << (modeInfo.m_alphaIndexBits - 1))) != 0); |
2239 | |
2240 | if (flipRGB) |
2241 | { |
2242 | uint16_t highIndex = (1 << modeInfo.m_indexBits) - 1; |
2243 | for (int px = 0; px < 16; px++) |
2244 | indexes[px] = highIndex - indexes[px]; |
2245 | } |
2246 | |
2247 | if (flipAlpha) |
2248 | { |
2249 | uint16_t highIndex = (1 << modeInfo.m_alphaIndexBits) - 1; |
2250 | for (int px = 0; px < 16; px++) |
2251 | indexes2[px] = highIndex - indexes2[px]; |
2252 | } |
2253 | |
2254 | if (indexSelector) |
2255 | Swap(flipRGB, flipAlpha); |
2256 | |
2257 | if (flipRGB) |
2258 | { |
2259 | for (int ch = 0; ch < 3; ch++) |
2260 | Swap(endPoints[0][0][ch], endPoints[0][1][ch]); |
2261 | } |
2262 | if (flipAlpha) |
2263 | Swap(endPoints[0][0][3], endPoints[0][1][3]); |
2264 | |
2265 | } |
2266 | else |
2267 | { |
2268 | if (modeInfo.m_numSubsets == 2) |
2269 | fixups[1] = BC7Data::g_fixupIndexes2[partition]; |
2270 | else if (modeInfo.m_numSubsets == 3) |
2271 | { |
2272 | fixups[1] = BC7Data::g_fixupIndexes3[partition][0]; |
2273 | fixups[2] = BC7Data::g_fixupIndexes3[partition][1]; |
2274 | } |
2275 | |
2276 | bool flip[3] = { false, false, false }; |
2277 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
2278 | flip[subset] = ((indexes[fixups[subset]] & (1 << (modeInfo.m_indexBits - 1))) != 0); |
2279 | |
2280 | if (flip[0] || flip[1] || flip[2]) |
2281 | { |
2282 | uint16_t highIndex = (1 << modeInfo.m_indexBits) - 1; |
2283 | for (int px = 0; px < 16; px++) |
2284 | { |
2285 | int subset = 0; |
2286 | if (modeInfo.m_numSubsets == 2) |
2287 | subset = (BC7Data::g_partitionMap[partition] >> px) & 1; |
2288 | else if (modeInfo.m_numSubsets == 3) |
2289 | subset = (BC7Data::g_partitionMap2[partition] >> (px * 2)) & 3; |
2290 | |
2291 | if (flip[subset]) |
2292 | indexes[px] = highIndex - indexes[px]; |
2293 | } |
2294 | |
2295 | int maxCH = (modeInfo.m_alphaMode == BC7Data::AlphaMode_Combined) ? 4 : 3; |
2296 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
2297 | { |
2298 | if (flip[subset]) |
2299 | for (int ch = 0; ch < maxCH; ch++) |
2300 | Swap(endPoints[subset][0][ch], endPoints[subset][1][ch]); |
2301 | } |
2302 | } |
2303 | } |
2304 | |
2305 | pv.Pack(static_cast<uint8_t>(1 << mode), mode + 1); |
2306 | |
2307 | if (modeInfo.m_partitionBits) |
2308 | pv.Pack(partition, modeInfo.m_partitionBits); |
2309 | |
2310 | if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) |
2311 | { |
2312 | ParallelMath::ScalarUInt16 rotation = ParallelMath::Extract(work.m_u.m_isr.m_rotation, block); |
2313 | pv.Pack(rotation, 2); |
2314 | } |
2315 | |
2316 | if (modeInfo.m_hasIndexSelector) |
2317 | pv.Pack(indexSelector, 1); |
2318 | |
2319 | // Encode RGB |
2320 | for (int ch = 0; ch < 3; ch++) |
2321 | { |
2322 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
2323 | { |
2324 | for (int ep = 0; ep < 2; ep++) |
2325 | { |
2326 | ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][ch]; |
2327 | epPart >>= (8 - modeInfo.m_rgbBits); |
2328 | |
2329 | pv.Pack(epPart, modeInfo.m_rgbBits); |
2330 | } |
2331 | } |
2332 | } |
2333 | |
2334 | // Encode alpha |
2335 | if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) |
2336 | { |
2337 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
2338 | { |
2339 | for (int ep = 0; ep < 2; ep++) |
2340 | { |
2341 | ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][3]; |
2342 | epPart >>= (8 - modeInfo.m_alphaBits); |
2343 | |
2344 | pv.Pack(epPart, modeInfo.m_alphaBits); |
2345 | } |
2346 | } |
2347 | } |
2348 | |
2349 | // Encode parity bits |
2350 | if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerSubset) |
2351 | { |
2352 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
2353 | { |
2354 | ParallelMath::ScalarUInt16 epPart = endPoints[subset][0][0]; |
2355 | epPart >>= (7 - modeInfo.m_rgbBits); |
2356 | epPart &= 1; |
2357 | |
2358 | pv.Pack(epPart, 1); |
2359 | } |
2360 | } |
2361 | else if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerEndpoint) |
2362 | { |
2363 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
2364 | { |
2365 | for (int ep = 0; ep < 2; ep++) |
2366 | { |
2367 | ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][0]; |
2368 | epPart >>= (7 - modeInfo.m_rgbBits); |
2369 | epPart &= 1; |
2370 | |
2371 | pv.Pack(epPart, 1); |
2372 | } |
2373 | } |
2374 | } |
2375 | |
2376 | // Encode indexes |
2377 | for (int px = 0; px < 16; px++) |
2378 | { |
2379 | int bits = modeInfo.m_indexBits; |
2380 | if ((px == 0) || (px == fixups[1]) || (px == fixups[2])) |
2381 | bits--; |
2382 | |
2383 | pv.Pack(indexes[px], bits); |
2384 | } |
2385 | |
2386 | // Encode secondary indexes |
2387 | if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) |
2388 | { |
2389 | for (int px = 0; px < 16; px++) |
2390 | { |
2391 | int bits = modeInfo.m_alphaIndexBits; |
2392 | if (px == 0) |
2393 | bits--; |
2394 | |
2395 | pv.Pack(indexes2[px], bits); |
2396 | } |
2397 | } |
2398 | |
2399 | pv.Flush(packedBlocks); |
2400 | |
2401 | packedBlocks += 16; |
2402 | } |
2403 | } |
2404 | |
2405 | void cvtt::Internal::BC7Computer::UnpackOne(PixelBlockU8 &output, const uint8_t* packedBlock) |
2406 | { |
2407 | UnpackingVector pv; |
2408 | pv.Init(packedBlock); |
2409 | |
2410 | int mode = 8; |
2411 | for (int i = 0; i < 8; i++) |
2412 | { |
2413 | if (pv.Unpack(1) == 1) |
2414 | { |
2415 | mode = i; |
2416 | break; |
2417 | } |
2418 | } |
2419 | |
2420 | if (mode > 7) |
2421 | { |
2422 | for (int px = 0; px < 16; px++) |
2423 | for (int ch = 0; ch < 4; ch++) |
2424 | output.m_pixels[px][ch] = 0; |
2425 | |
2426 | return; |
2427 | } |
2428 | |
2429 | const BC7Data::BC7ModeInfo &modeInfo = BC7Data::g_modes[mode]; |
2430 | |
2431 | int partition = 0; |
2432 | if (modeInfo.m_partitionBits) |
2433 | partition = pv.Unpack(modeInfo.m_partitionBits); |
2434 | |
2435 | int rotation = 0; |
2436 | if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) |
2437 | rotation = pv.Unpack(2); |
2438 | |
2439 | int indexSelector = 0; |
2440 | if (modeInfo.m_hasIndexSelector) |
2441 | indexSelector = pv.Unpack(1); |
2442 | |
2443 | // Resolve fixups |
2444 | int fixups[3] = { 0, 0, 0 }; |
2445 | |
2446 | if (modeInfo.m_alphaMode != BC7Data::AlphaMode_Separate) |
2447 | { |
2448 | if (modeInfo.m_numSubsets == 2) |
2449 | fixups[1] = BC7Data::g_fixupIndexes2[partition]; |
2450 | else if (modeInfo.m_numSubsets == 3) |
2451 | { |
2452 | fixups[1] = BC7Data::g_fixupIndexes3[partition][0]; |
2453 | fixups[2] = BC7Data::g_fixupIndexes3[partition][1]; |
2454 | } |
2455 | } |
2456 | |
2457 | int endPoints[3][2][4]; |
2458 | |
2459 | // Decode RGB |
2460 | for (int ch = 0; ch < 3; ch++) |
2461 | { |
2462 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
2463 | { |
2464 | for (int ep = 0; ep < 2; ep++) |
2465 | endPoints[subset][ep][ch] = (pv.Unpack(modeInfo.m_rgbBits) << (8 - modeInfo.m_rgbBits)); |
2466 | } |
2467 | } |
2468 | |
2469 | // Decode alpha |
2470 | if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) |
2471 | { |
2472 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
2473 | { |
2474 | for (int ep = 0; ep < 2; ep++) |
2475 | endPoints[subset][ep][3] = (pv.Unpack(modeInfo.m_alphaBits) << (8 - modeInfo.m_alphaBits)); |
2476 | } |
2477 | } |
2478 | else |
2479 | { |
2480 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
2481 | { |
2482 | for (int ep = 0; ep < 2; ep++) |
2483 | endPoints[subset][ep][3] = 255; |
2484 | } |
2485 | } |
2486 | |
2487 | int parityBits = 0; |
2488 | |
2489 | // Decode parity bits |
2490 | if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerSubset) |
2491 | { |
2492 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
2493 | { |
2494 | int p = pv.Unpack(1); |
2495 | |
2496 | for (int ep = 0; ep < 2; ep++) |
2497 | { |
2498 | for (int ch = 0; ch < 3; ch++) |
2499 | endPoints[subset][ep][ch] |= p << (7 - modeInfo.m_rgbBits); |
2500 | |
2501 | if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) |
2502 | endPoints[subset][ep][3] |= p << (7 - modeInfo.m_alphaBits); |
2503 | } |
2504 | } |
2505 | |
2506 | parityBits = 1; |
2507 | } |
2508 | else if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerEndpoint) |
2509 | { |
2510 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
2511 | { |
2512 | for (int ep = 0; ep < 2; ep++) |
2513 | { |
2514 | int p = pv.Unpack(1); |
2515 | |
2516 | for (int ch = 0; ch < 3; ch++) |
2517 | endPoints[subset][ep][ch] |= p << (7 - modeInfo.m_rgbBits); |
2518 | |
2519 | if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) |
2520 | endPoints[subset][ep][3] |= p << (7 - modeInfo.m_alphaBits); |
2521 | } |
2522 | } |
2523 | |
2524 | parityBits = 1; |
2525 | } |
2526 | |
2527 | // Fill endpoint bits |
2528 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
2529 | { |
2530 | for (int ep = 0; ep < 2; ep++) |
2531 | { |
2532 | for (int ch = 0; ch < 3; ch++) |
2533 | endPoints[subset][ep][ch] |= (endPoints[subset][ep][ch] >> (modeInfo.m_rgbBits + parityBits)); |
2534 | |
2535 | if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) |
2536 | endPoints[subset][ep][3] |= (endPoints[subset][ep][3] >> (modeInfo.m_alphaBits + parityBits)); |
2537 | } |
2538 | } |
2539 | |
2540 | int indexes[16]; |
2541 | int indexes2[16]; |
2542 | |
2543 | // Decode indexes |
2544 | for (int px = 0; px < 16; px++) |
2545 | { |
2546 | int bits = modeInfo.m_indexBits; |
2547 | if ((px == 0) || (px == fixups[1]) || (px == fixups[2])) |
2548 | bits--; |
2549 | |
2550 | indexes[px] = pv.Unpack(bits); |
2551 | } |
2552 | |
2553 | // Decode secondary indexes |
2554 | if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) |
2555 | { |
2556 | for (int px = 0; px < 16; px++) |
2557 | { |
2558 | int bits = modeInfo.m_alphaIndexBits; |
2559 | if (px == 0) |
2560 | bits--; |
2561 | |
2562 | indexes2[px] = pv.Unpack(bits); |
2563 | } |
2564 | } |
2565 | else |
2566 | { |
2567 | for (int px = 0; px < 16; px++) |
2568 | indexes2[px] = 0; |
2569 | } |
2570 | |
2571 | const int *alphaWeights = BC7Data::g_weightTables[modeInfo.m_alphaIndexBits]; |
2572 | const int *rgbWeights = BC7Data::g_weightTables[modeInfo.m_indexBits]; |
2573 | |
2574 | // Decode each pixel |
2575 | for (int px = 0; px < 16; px++) |
2576 | { |
2577 | int rgbWeight = 0; |
2578 | int alphaWeight = 0; |
2579 | |
2580 | int rgbIndex = indexes[px]; |
2581 | |
2582 | rgbWeight = rgbWeights[indexes[px]]; |
2583 | |
2584 | if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Combined) |
2585 | alphaWeight = rgbWeight; |
2586 | else if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) |
2587 | alphaWeight = alphaWeights[indexes2[px]]; |
2588 | |
2589 | if (indexSelector == 1) |
2590 | { |
2591 | int temp = rgbWeight; |
2592 | rgbWeight = alphaWeight; |
2593 | alphaWeight = temp; |
2594 | } |
2595 | |
2596 | int pixel[4] = { 0, 0, 0, 255 }; |
2597 | |
2598 | int subset = 0; |
2599 | |
2600 | if (modeInfo.m_numSubsets == 2) |
2601 | subset = (BC7Data::g_partitionMap[partition] >> px) & 1; |
2602 | else if (modeInfo.m_numSubsets == 3) |
2603 | subset = (BC7Data::g_partitionMap2[partition] >> (px * 2)) & 3; |
2604 | |
2605 | for (int ch = 0; ch < 3; ch++) |
2606 | pixel[ch] = ((64 - rgbWeight) * endPoints[subset][0][ch] + rgbWeight * endPoints[subset][1][ch] + 32) >> 6; |
2607 | |
2608 | if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) |
2609 | pixel[3] = ((64 - alphaWeight) * endPoints[subset][0][3] + alphaWeight * endPoints[subset][1][3] + 32) >> 6; |
2610 | |
2611 | if (rotation != 0) |
2612 | { |
2613 | int ch = rotation - 1; |
2614 | int temp = pixel[ch]; |
2615 | pixel[ch] = pixel[3]; |
2616 | pixel[3] = temp; |
2617 | } |
2618 | |
2619 | for (int ch = 0; ch < 4; ch++) |
2620 | output.m_pixels[px][ch] = static_cast<uint8_t>(pixel[ch]); |
2621 | } |
2622 | } |
2623 | |
2624 | cvtt::ParallelMath::SInt16 cvtt::Internal::BC6HComputer::QuantizeSingleEndpointElementSigned(const MSInt16 &elem2CL, int precision, const ParallelMath::RoundUpForScope* ru) |
2625 | { |
2626 | assert(ParallelMath::AllSet(ParallelMath::Less(elem2CL, ParallelMath::MakeSInt16(31744)))); |
2627 | assert(ParallelMath::AllSet(ParallelMath::Less(ParallelMath::MakeSInt16(-31744), elem2CL))); |
2628 | |
2629 | // Expand to full range |
2630 | ParallelMath::Int16CompFlag isNegative = ParallelMath::Less(elem2CL, ParallelMath::MakeSInt16(0)); |
2631 | MUInt15 absElem = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Select(isNegative, ParallelMath::MakeSInt16(0) - elem2CL, elem2CL)); |
2632 | |
2633 | absElem = ParallelMath::RightShift(ParallelMath::RoundAndConvertToU15(ParallelMath::ToFloat(absElem) * 32.0f / 31.0f, ru), 16 - precision); |
2634 | |
2635 | MSInt16 absElemS16 = ParallelMath::LosslessCast<MSInt16>::Cast(absElem); |
2636 | |
2637 | return ParallelMath::Select(isNegative, ParallelMath::MakeSInt16(0) - absElemS16, absElemS16); |
2638 | } |
2639 | |
2640 | cvtt::ParallelMath::UInt15 cvtt::Internal::BC6HComputer::QuantizeSingleEndpointElementUnsigned(const MUInt15 &elem, int precision, const ParallelMath::RoundUpForScope* ru) |
2641 | { |
2642 | MUInt16 expandedElem = ParallelMath::RoundAndConvertToU16(ParallelMath::Min(ParallelMath::ToFloat(elem) * 64.0f / 31.0f, ParallelMath::MakeFloat(65535.0f)), ru); |
2643 | return ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(expandedElem, 16 - precision)); |
2644 | } |
2645 | |
2646 | void cvtt::Internal::BC6HComputer::UnquantizeSingleEndpointElementSigned(const MSInt16 &comp, int precision, MSInt16 &outUnquantized, MSInt16 &outUnquantizedFinished2CL) |
2647 | { |
2648 | MSInt16 zero = ParallelMath::MakeSInt16(0); |
2649 | |
2650 | ParallelMath::Int16CompFlag negative = ParallelMath::Less(comp, zero); |
2651 | MUInt15 absComp = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Select(negative, MSInt16(zero - comp), comp)); |
2652 | |
2653 | MSInt16 unq; |
2654 | MUInt15 absUnq; |
2655 | |
2656 | if (precision >= 16) |
2657 | { |
2658 | unq = comp; |
2659 | absUnq = absComp; |
2660 | } |
2661 | else |
2662 | { |
2663 | MSInt16 maxCompMinusOne = ParallelMath::MakeSInt16(static_cast<int16_t>((1 << (precision - 1)) - 2)); |
2664 | ParallelMath::Int16CompFlag isZero = ParallelMath::Equal(comp, zero); |
2665 | ParallelMath::Int16CompFlag isMax = ParallelMath::Less(maxCompMinusOne, comp); |
2666 | |
2667 | absUnq = (absComp << (16 - precision)) + ParallelMath::MakeUInt15(static_cast<uint16_t>(0x4000 >> (precision - 1))); |
2668 | ParallelMath::ConditionalSet(absUnq, isZero, ParallelMath::MakeUInt15(0)); |
2669 | ParallelMath::ConditionalSet(absUnq, isMax, ParallelMath::MakeUInt15(0x7fff)); |
2670 | |
2671 | unq = ParallelMath::ConditionalNegate(negative, ParallelMath::LosslessCast<MSInt16>::Cast(absUnq)); |
2672 | } |
2673 | |
2674 | outUnquantized = unq; |
2675 | |
2676 | MUInt15 funq = ParallelMath::ToUInt15(ParallelMath::RightShift(ParallelMath::XMultiply(absUnq, ParallelMath::MakeUInt15(31)), 5)); |
2677 | |
2678 | outUnquantizedFinished2CL = ParallelMath::ConditionalNegate(negative, ParallelMath::LosslessCast<MSInt16>::Cast(funq)); |
2679 | } |
2680 | |
2681 | void cvtt::Internal::BC6HComputer::UnquantizeSingleEndpointElementUnsigned(const MUInt15 &comp, int precision, MUInt16 &outUnquantized, MUInt16 &outUnquantizedFinished) |
2682 | { |
2683 | MUInt16 unq = ParallelMath::LosslessCast<MUInt16>::Cast(comp); |
2684 | if (precision < 15) |
2685 | { |
2686 | MUInt15 zero = ParallelMath::MakeUInt15(0); |
2687 | MUInt15 maxCompMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << precision) - 2)); |
2688 | |
2689 | ParallelMath::Int16CompFlag isZero = ParallelMath::Equal(comp, zero); |
2690 | ParallelMath::Int16CompFlag isMax = ParallelMath::Less(maxCompMinusOne, comp); |
2691 | |
2692 | unq = (ParallelMath::LosslessCast<MUInt16>::Cast(comp) << (16 - precision)) + ParallelMath::MakeUInt16(static_cast<uint16_t>(0x8000 >> precision)); |
2693 | |
2694 | ParallelMath::ConditionalSet(unq, isZero, ParallelMath::MakeUInt16(0)); |
2695 | ParallelMath::ConditionalSet(unq, isMax, ParallelMath::MakeUInt16(0xffff)); |
2696 | } |
2697 | |
2698 | outUnquantized = unq; |
2699 | outUnquantizedFinished = ParallelMath::ToUInt16(ParallelMath::RightShift(ParallelMath::XMultiply(unq, ParallelMath::MakeUInt15(31)), 6)); |
2700 | } |
2701 | |
2702 | void cvtt::Internal::BC6HComputer::QuantizeEndpointsSigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn) |
2703 | { |
2704 | MSInt16 unquantizedEP[2][3]; |
2705 | MSInt16 finishedUnquantizedEP[2][3]; |
2706 | |
2707 | { |
2708 | ParallelMath::RoundUpForScope ru; |
2709 | |
2710 | for (int epi = 0; epi < 2; epi++) |
2711 | { |
2712 | for (int ch = 0; ch < 3; ch++) |
2713 | { |
2714 | MSInt16 qee = QuantizeSingleEndpointElementSigned(endPoints[epi][ch], precision, &ru); |
2715 | UnquantizeSingleEndpointElementSigned(qee, precision, unquantizedEP[epi][ch], finishedUnquantizedEP[epi][ch]); |
2716 | quantizedEndPoints[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(qee); |
2717 | } |
2718 | } |
2719 | } |
2720 | |
2721 | indexSelector.Init(channelWeights, unquantizedEP, finishedUnquantizedEP, indexRange); |
2722 | indexSelector.InitHDR(indexRange, true, fastIndexing, channelWeights); |
2723 | |
2724 | MUInt15 halfRangeMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange / 2) - 1); |
2725 | |
2726 | MUInt15 index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixelsColorSpace[fixupIndex], rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[fixupIndex], rtn); |
2727 | |
2728 | ParallelMath::Int16CompFlag invert = ParallelMath::Less(halfRangeMinusOne, index); |
2729 | |
2730 | if (ParallelMath::AnySet(invert)) |
2731 | { |
2732 | ParallelMath::ConditionalSet(index, invert, MUInt15(ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange - 1)) - index)); |
2733 | |
2734 | indexSelector.ConditionalInvert(invert); |
2735 | |
2736 | for (int ch = 0; ch < 3; ch++) |
2737 | { |
2738 | MAInt16 firstEP = quantizedEndPoints[0][ch]; |
2739 | MAInt16 secondEP = quantizedEndPoints[1][ch]; |
2740 | |
2741 | quantizedEndPoints[0][ch] = ParallelMath::Select(invert, secondEP, firstEP); |
2742 | quantizedEndPoints[1][ch] = ParallelMath::Select(invert, firstEP, secondEP); |
2743 | } |
2744 | } |
2745 | |
2746 | indexes[fixupIndex] = index; |
2747 | } |
2748 | |
2749 | void cvtt::Internal::BC6HComputer::QuantizeEndpointsUnsigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn) |
2750 | { |
2751 | MUInt16 unquantizedEP[2][3]; |
2752 | MUInt16 finishedUnquantizedEP[2][3]; |
2753 | |
2754 | { |
2755 | ParallelMath::RoundUpForScope ru; |
2756 | |
2757 | for (int epi = 0; epi < 2; epi++) |
2758 | { |
2759 | for (int ch = 0; ch < 3; ch++) |
2760 | { |
2761 | MUInt15 qee = QuantizeSingleEndpointElementUnsigned(ParallelMath::LosslessCast<MUInt15>::Cast(endPoints[epi][ch]), precision, &ru); |
2762 | UnquantizeSingleEndpointElementUnsigned(qee, precision, unquantizedEP[epi][ch], finishedUnquantizedEP[epi][ch]); |
2763 | quantizedEndPoints[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(qee); |
2764 | } |
2765 | } |
2766 | } |
2767 | |
2768 | indexSelector.Init(channelWeights, unquantizedEP, finishedUnquantizedEP, indexRange); |
2769 | indexSelector.InitHDR(indexRange, false, fastIndexing, channelWeights); |
2770 | |
2771 | MUInt15 halfRangeMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange / 2) - 1); |
2772 | |
2773 | MUInt15 index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixelsColorSpace[fixupIndex], rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[fixupIndex], rtn); |
2774 | |
2775 | ParallelMath::Int16CompFlag invert = ParallelMath::Less(halfRangeMinusOne, index); |
2776 | |
2777 | if (ParallelMath::AnySet(invert)) |
2778 | { |
2779 | ParallelMath::ConditionalSet(index, invert, MUInt15(ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange - 1)) - index)); |
2780 | |
2781 | indexSelector.ConditionalInvert(invert); |
2782 | |
2783 | for (int ch = 0; ch < 3; ch++) |
2784 | { |
2785 | MAInt16 firstEP = quantizedEndPoints[0][ch]; |
2786 | MAInt16 secondEP = quantizedEndPoints[1][ch]; |
2787 | |
2788 | quantizedEndPoints[0][ch] = ParallelMath::Select(invert, secondEP, firstEP); |
2789 | quantizedEndPoints[1][ch] = ParallelMath::Select(invert, firstEP, secondEP); |
2790 | } |
2791 | } |
2792 | |
2793 | indexes[fixupIndex] = index; |
2794 | } |
2795 | |
2796 | void cvtt::Internal::BC6HComputer::EvaluatePartitionedLegality(const MAInt16 ep0[2][3], const MAInt16 ep1[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][2][3], ParallelMath::Int16CompFlag& outIsLegal) |
2797 | { |
2798 | ParallelMath::Int16CompFlag allLegal = ParallelMath::MakeBoolInt16(true); |
2799 | |
2800 | MAInt16 aSignificantMask = ParallelMath::MakeAInt16(static_cast<int16_t>((1 << aPrec) - 1)); |
2801 | |
2802 | for (int ch = 0; ch < 3; ch++) |
2803 | { |
2804 | outEncodedEPs[0][0][ch] = ep0[0][ch]; |
2805 | outEncodedEPs[0][1][ch] = ep0[1][ch]; |
2806 | outEncodedEPs[1][0][ch] = ep1[0][ch]; |
2807 | outEncodedEPs[1][1][ch] = ep1[1][ch]; |
2808 | |
2809 | if (isTransformed) |
2810 | { |
2811 | for (int subset = 0; subset < 2; subset++) |
2812 | { |
2813 | for (int epi = 0; epi < 2; epi++) |
2814 | { |
2815 | if (epi == 0 && subset == 0) |
2816 | continue; |
2817 | |
2818 | MAInt16 bReduced = (outEncodedEPs[subset][epi][ch] & aSignificantMask); |
2819 | |
2820 | MSInt16 delta = ParallelMath::TruncateToPrecisionSigned(ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::AbstractSubtract(outEncodedEPs[subset][epi][ch], outEncodedEPs[0][0][ch])), bPrec[ch]); |
2821 | |
2822 | outEncodedEPs[subset][epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(delta); |
2823 | |
2824 | MAInt16 reconstructed = (ParallelMath::AbstractAdd(outEncodedEPs[subset][epi][ch], outEncodedEPs[0][0][ch]) & aSignificantMask); |
2825 | allLegal = allLegal & ParallelMath::Equal(reconstructed, bReduced); |
2826 | } |
2827 | } |
2828 | } |
2829 | |
2830 | if (!ParallelMath::AnySet(allLegal)) |
2831 | break; |
2832 | } |
2833 | |
2834 | outIsLegal = allLegal; |
2835 | } |
2836 | |
2837 | void cvtt::Internal::BC6HComputer::EvaluateSingleLegality(const MAInt16 ep[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][3], ParallelMath::Int16CompFlag& outIsLegal) |
2838 | { |
2839 | ParallelMath::Int16CompFlag allLegal = ParallelMath::MakeBoolInt16(true); |
2840 | |
2841 | MAInt16 aSignificantMask = ParallelMath::MakeAInt16(static_cast<int16_t>((1 << aPrec) - 1)); |
2842 | |
2843 | for (int ch = 0; ch < 3; ch++) |
2844 | { |
2845 | outEncodedEPs[0][ch] = ep[0][ch]; |
2846 | outEncodedEPs[1][ch] = ep[1][ch]; |
2847 | |
2848 | if (isTransformed) |
2849 | { |
2850 | MAInt16 bReduced = (outEncodedEPs[1][ch] & aSignificantMask); |
2851 | |
2852 | MSInt16 delta = ParallelMath::TruncateToPrecisionSigned(ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::AbstractSubtract(outEncodedEPs[1][ch], outEncodedEPs[0][ch])), bPrec[ch]); |
2853 | |
2854 | outEncodedEPs[1][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(delta); |
2855 | |
2856 | MAInt16 reconstructed = (ParallelMath::AbstractAdd(outEncodedEPs[1][ch], outEncodedEPs[0][ch]) & aSignificantMask); |
2857 | allLegal = allLegal & ParallelMath::Equal(reconstructed, bReduced); |
2858 | } |
2859 | } |
2860 | |
2861 | outIsLegal = allLegal; |
2862 | } |
2863 | |
2864 | void cvtt::Internal::BC6HComputer::Pack(uint32_t flags, const PixelBlockF16* inputs, uint8_t* packedBlocks, const float channelWeights[4], bool isSigned, int numTweakRounds, int numRefineRounds) |
2865 | { |
2866 | if (numTweakRounds < 1) |
2867 | numTweakRounds = 1; |
2868 | else if (numTweakRounds > MaxTweakRounds) |
2869 | numTweakRounds = MaxTweakRounds; |
2870 | |
2871 | if (numRefineRounds < 1) |
2872 | numRefineRounds = 1; |
2873 | else if (numRefineRounds > MaxRefineRounds) |
2874 | numRefineRounds = MaxRefineRounds; |
2875 | |
2876 | bool fastIndexing = ((flags & cvtt::Flags::BC6H_FastIndexing) != 0); |
2877 | float channelWeightsSq[3]; |
2878 | |
2879 | ParallelMath::RoundTowardNearestForScope rtn; |
2880 | |
2881 | MSInt16 pixels[16][3]; |
2882 | MFloat floatPixels2CL[16][3]; |
2883 | MFloat floatPixelsLinearWeighted[16][3]; |
2884 | |
2885 | MSInt16 low15Bits = ParallelMath::MakeSInt16(32767); |
2886 | |
2887 | for (int ch = 0; ch < 3; ch++) |
2888 | channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch]; |
2889 | |
2890 | for (int px = 0; px < 16; px++) |
2891 | { |
2892 | for (int ch = 0; ch < 3; ch++) |
2893 | { |
2894 | MSInt16 pixelValue; |
2895 | ParallelMath::ConvertHDRInputs(inputs, px, ch, pixelValue); |
2896 | |
2897 | // Convert from sign+magnitude to 2CL |
2898 | if (isSigned) |
2899 | { |
2900 | ParallelMath::Int16CompFlag negative = ParallelMath::Less(pixelValue, ParallelMath::MakeSInt16(0)); |
2901 | MSInt16 magnitude = (pixelValue & low15Bits); |
2902 | ParallelMath::ConditionalSet(pixelValue, negative, ParallelMath::MakeSInt16(0) - magnitude); |
2903 | pixelValue = ParallelMath::Max(pixelValue, ParallelMath::MakeSInt16(-31743)); |
2904 | } |
2905 | else |
2906 | pixelValue = ParallelMath::Max(pixelValue, ParallelMath::MakeSInt16(0)); |
2907 | |
2908 | pixelValue = ParallelMath::Min(pixelValue, ParallelMath::MakeSInt16(31743)); |
2909 | |
2910 | pixels[px][ch] = pixelValue; |
2911 | floatPixels2CL[px][ch] = ParallelMath::ToFloat(pixelValue); |
2912 | floatPixelsLinearWeighted[px][ch] = ParallelMath::TwosCLHalfToFloat(pixelValue) * channelWeights[ch]; |
2913 | } |
2914 | } |
2915 | |
2916 | MFloat preWeightedPixels[16][3]; |
2917 | |
2918 | BCCommon::PreWeightPixelsHDR<3>(preWeightedPixels, pixels, channelWeights); |
2919 | |
2920 | MAInt16 bestEndPoints[2][2][3]; |
2921 | MUInt15 bestIndexes[16]; |
2922 | MFloat bestError = ParallelMath::MakeFloat(FLT_MAX); |
2923 | MUInt15 bestMode = ParallelMath::MakeUInt15(0); |
2924 | MUInt15 bestPartition = ParallelMath::MakeUInt15(0); |
2925 | |
2926 | for (int px = 0; px < 16; px++) |
2927 | bestIndexes[px] = ParallelMath::MakeUInt15(0); |
2928 | |
2929 | for (int subset = 0; subset < 2; subset++) |
2930 | for (int epi = 0; epi < 2; epi++) |
2931 | for (int ch = 0; ch < 3; ch++) |
2932 | bestEndPoints[subset][epi][ch] = ParallelMath::MakeAInt16(0); |
2933 | |
2934 | UnfinishedEndpoints<3> partitionedUFEP[32][2]; |
2935 | UnfinishedEndpoints<3> singleUFEP; |
2936 | |
2937 | // Generate UFEP for partitions |
2938 | for (int p = 0; p < 32; p++) |
2939 | { |
2940 | int partitionMask = BC7Data::g_partitionMap[p]; |
2941 | |
2942 | EndpointSelector<3, 8> epSelectors[2]; |
2943 | |
2944 | for (int pass = 0; pass < NumEndpointSelectorPasses; pass++) |
2945 | { |
2946 | for (int px = 0; px < 16; px++) |
2947 | { |
2948 | int subset = (partitionMask >> px) & 1; |
2949 | epSelectors[subset].ContributePass(preWeightedPixels[px], pass, ParallelMath::MakeFloat(1.0f)); |
2950 | } |
2951 | |
2952 | for (int subset = 0; subset < 2; subset++) |
2953 | epSelectors[subset].FinishPass(pass); |
2954 | } |
2955 | |
2956 | for (int subset = 0; subset < 2; subset++) |
2957 | partitionedUFEP[p][subset] = epSelectors[subset].GetEndpoints(channelWeights); |
2958 | } |
2959 | |
2960 | // Generate UFEP for single |
2961 | { |
2962 | EndpointSelector<3, 8> epSelector; |
2963 | |
2964 | for (int pass = 0; pass < NumEndpointSelectorPasses; pass++) |
2965 | { |
2966 | for (int px = 0; px < 16; px++) |
2967 | epSelector.ContributePass(preWeightedPixels[px], pass, ParallelMath::MakeFloat(1.0f)); |
2968 | |
2969 | epSelector.FinishPass(pass); |
2970 | } |
2971 | |
2972 | singleUFEP = epSelector.GetEndpoints(channelWeights); |
2973 | } |
2974 | |
2975 | for (int partitionedInt = 0; partitionedInt < 2; partitionedInt++) |
2976 | { |
2977 | bool partitioned = (partitionedInt == 1); |
2978 | |
2979 | for (int aPrec = BC7Data::g_maxHDRPrecision; aPrec >= 0; aPrec--) |
2980 | { |
2981 | if (!BC7Data::g_hdrModesExistForPrecision[partitionedInt][aPrec]) |
2982 | continue; |
2983 | |
2984 | int numPartitions = partitioned ? 32 : 1; |
2985 | int numSubsets = partitioned ? 2 : 1; |
2986 | int indexBits = partitioned ? 3 : 4; |
2987 | int indexRange = (1 << indexBits); |
2988 | |
2989 | for (int p = 0; p < numPartitions; p++) |
2990 | { |
2991 | int partitionMask = partitioned ? BC7Data::g_partitionMap[p] : 0; |
2992 | |
2993 | const int MaxMetaRounds = MaxTweakRounds * MaxRefineRounds; |
2994 | |
2995 | MAInt16 metaEndPointsQuantized[MaxMetaRounds][2][2][3]; |
2996 | MUInt15 metaIndexes[MaxMetaRounds][16]; |
2997 | MFloat metaError[MaxMetaRounds][2]; |
2998 | |
2999 | bool roundValid[MaxMetaRounds][2]; |
3000 | |
3001 | for (int r = 0; r < MaxMetaRounds; r++) |
3002 | for (int subset = 0; subset < 2; subset++) |
3003 | roundValid[r][subset] = true; |
3004 | |
3005 | for (int subset = 0; subset < numSubsets; subset++) |
3006 | { |
3007 | for (int tweak = 0; tweak < MaxTweakRounds; tweak++) |
3008 | { |
3009 | EndpointRefiner<3> refiners[2]; |
3010 | |
3011 | bool abortRemainingRefines = false; |
3012 | for (int refinePass = 0; refinePass < MaxRefineRounds; refinePass++) |
3013 | { |
3014 | int metaRound = tweak * MaxRefineRounds + refinePass; |
3015 | |
3016 | if (tweak >= numTweakRounds || refinePass >= numRefineRounds) |
3017 | abortRemainingRefines = true; |
3018 | |
3019 | if (abortRemainingRefines) |
3020 | { |
3021 | roundValid[metaRound][subset] = false; |
3022 | continue; |
3023 | } |
3024 | |
3025 | MAInt16(&mrQuantizedEndPoints)[2][2][3] = metaEndPointsQuantized[metaRound]; |
3026 | MUInt15(&mrIndexes)[16] = metaIndexes[metaRound]; |
3027 | |
3028 | MSInt16 endPointsColorSpace[2][3]; |
3029 | |
3030 | if (refinePass == 0) |
3031 | { |
3032 | UnfinishedEndpoints<3> ufep = partitioned ? partitionedUFEP[p][subset] : singleUFEP; |
3033 | |
3034 | if (isSigned) |
3035 | ufep.FinishHDRSigned(tweak, indexRange, endPointsColorSpace[0], endPointsColorSpace[1], &rtn); |
3036 | else |
3037 | ufep.FinishHDRUnsigned(tweak, indexRange, endPointsColorSpace[0], endPointsColorSpace[1], &rtn); |
3038 | } |
3039 | else |
3040 | refiners[subset].GetRefinedEndpointsHDR(endPointsColorSpace, isSigned, &rtn); |
3041 | |
3042 | refiners[subset].Init(indexRange, channelWeights); |
3043 | |
3044 | int fixupIndex = (subset == 0) ? 0 : BC7Data::g_fixupIndexes2[p]; |
3045 | |
3046 | IndexSelectorHDR<3> indexSelector; |
3047 | if (isSigned) |
3048 | QuantizeEndpointsSigned(endPointsColorSpace, floatPixels2CL, floatPixelsLinearWeighted, mrQuantizedEndPoints[subset], mrIndexes, indexSelector, fixupIndex, aPrec, indexRange, channelWeights, fastIndexing, &rtn); |
3049 | else |
3050 | QuantizeEndpointsUnsigned(endPointsColorSpace, floatPixels2CL, floatPixelsLinearWeighted, mrQuantizedEndPoints[subset], mrIndexes, indexSelector, fixupIndex, aPrec, indexRange, channelWeights, fastIndexing, &rtn); |
3051 | |
3052 | if (metaRound > 0) |
3053 | { |
3054 | ParallelMath::Int16CompFlag anySame = ParallelMath::MakeBoolInt16(false); |
3055 | |
3056 | for (int prevRound = 0; prevRound < metaRound; prevRound++) |
3057 | { |
3058 | MAInt16(&prevRoundEPs)[2][3] = metaEndPointsQuantized[prevRound][subset]; |
3059 | |
3060 | ParallelMath::Int16CompFlag same = ParallelMath::MakeBoolInt16(true); |
3061 | |
3062 | for (int epi = 0; epi < 2; epi++) |
3063 | for (int ch = 0; ch < 3; ch++) |
3064 | same = (same & ParallelMath::Equal(prevRoundEPs[epi][ch], mrQuantizedEndPoints[subset][epi][ch])); |
3065 | |
3066 | anySame = (anySame | same); |
3067 | if (ParallelMath::AllSet(anySame)) |
3068 | break; |
3069 | } |
3070 | |
3071 | if (ParallelMath::AllSet(anySame)) |
3072 | { |
3073 | roundValid[metaRound][subset] = false; |
3074 | continue; |
3075 | } |
3076 | } |
3077 | |
3078 | MFloat subsetError = ParallelMath::MakeFloatZero(); |
3079 | |
3080 | { |
3081 | for (int px = 0; px < 16; px++) |
3082 | { |
3083 | if (subset != ((partitionMask >> px) & 1)) |
3084 | continue; |
3085 | |
3086 | MUInt15 index; |
3087 | if (px == fixupIndex) |
3088 | index = mrIndexes[px]; |
3089 | else |
3090 | { |
3091 | index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixels2CL[px], &rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[px], &rtn); |
3092 | mrIndexes[px] = index; |
3093 | } |
3094 | |
3095 | MSInt16 reconstructed[3]; |
3096 | if (isSigned) |
3097 | indexSelector.ReconstructHDRSigned(mrIndexes[px], reconstructed); |
3098 | else |
3099 | indexSelector.ReconstructHDRUnsigned(mrIndexes[px], reconstructed); |
3100 | |
3101 | subsetError = subsetError + (fastIndexing ? BCCommon::ComputeErrorHDRFast<3>(flags, reconstructed, pixels[px], channelWeightsSq) : BCCommon::ComputeErrorHDRSlow<3>(flags, reconstructed, pixels[px], channelWeightsSq)); |
3102 | |
3103 | if (refinePass != numRefineRounds - 1) |
3104 | refiners[subset].ContributeUnweightedPW(preWeightedPixels[px], index); |
3105 | } |
3106 | } |
3107 | |
3108 | metaError[metaRound][subset] = subsetError; |
3109 | } |
3110 | } |
3111 | } |
3112 | |
3113 | // Now we have a bunch of attempts, but not all of them will fit in the delta coding scheme |
3114 | int numMeta1 = partitioned ? MaxMetaRounds : 1; |
3115 | for (int meta0 = 0; meta0 < MaxMetaRounds; meta0++) |
3116 | { |
3117 | if (!roundValid[meta0][0]) |
3118 | continue; |
3119 | |
3120 | for (int meta1 = 0; meta1 < numMeta1; meta1++) |
3121 | { |
3122 | MFloat combinedError = metaError[meta0][0]; |
3123 | if (partitioned) |
3124 | { |
3125 | if (!roundValid[meta1][1]) |
3126 | continue; |
3127 | |
3128 | combinedError = combinedError + metaError[meta1][1]; |
3129 | } |
3130 | |
3131 | ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(combinedError, bestError); |
3132 | if (!ParallelMath::AnySet(errorBetter)) |
3133 | continue; |
3134 | |
3135 | ParallelMath::Int16CompFlag needsCommit = ParallelMath::FloatFlagToInt16(errorBetter); |
3136 | |
3137 | // Figure out if this is encodable |
3138 | for (int mode = 0; mode < BC7Data::g_numHDRModes; mode++) |
3139 | { |
3140 | const BC7Data::BC6HModeInfo &modeInfo = BC7Data::g_hdrModes[mode]; |
3141 | |
3142 | if (modeInfo.m_partitioned != partitioned || modeInfo.m_aPrec != aPrec) |
3143 | continue; |
3144 | |
3145 | MAInt16 encodedEPs[2][2][3]; |
3146 | ParallelMath::Int16CompFlag isLegal; |
3147 | if (partitioned) |
3148 | EvaluatePartitionedLegality(metaEndPointsQuantized[meta0][0], metaEndPointsQuantized[meta1][1], modeInfo.m_aPrec, modeInfo.m_bPrec, modeInfo.m_transformed, encodedEPs, isLegal); |
3149 | else |
3150 | EvaluateSingleLegality(metaEndPointsQuantized[meta0][0], modeInfo.m_aPrec, modeInfo.m_bPrec, modeInfo.m_transformed, encodedEPs[0], isLegal); |
3151 | |
3152 | ParallelMath::Int16CompFlag isLegalAndBetter = (ParallelMath::FloatFlagToInt16(errorBetter) & isLegal); |
3153 | if (!ParallelMath::AnySet(isLegalAndBetter)) |
3154 | continue; |
3155 | |
3156 | ParallelMath::FloatCompFlag isLegalAndBetterFloat = ParallelMath::Int16FlagToFloat(isLegalAndBetter); |
3157 | |
3158 | ParallelMath::ConditionalSet(bestError, isLegalAndBetterFloat, combinedError); |
3159 | ParallelMath::ConditionalSet(bestMode, isLegalAndBetter, ParallelMath::MakeUInt15(static_cast<uint16_t>(mode))); |
3160 | ParallelMath::ConditionalSet(bestPartition, isLegalAndBetter, ParallelMath::MakeUInt15(static_cast<uint16_t>(p))); |
3161 | |
3162 | for (int subset = 0; subset < numSubsets; subset++) |
3163 | { |
3164 | for (int epi = 0; epi < 2; epi++) |
3165 | { |
3166 | for (int ch = 0; ch < 3; ch++) |
3167 | ParallelMath::ConditionalSet(bestEndPoints[subset][epi][ch], isLegalAndBetter, encodedEPs[subset][epi][ch]); |
3168 | } |
3169 | } |
3170 | |
3171 | for (int px = 0; px < 16; px++) |
3172 | { |
3173 | int subset = ((partitionMask >> px) & 1); |
3174 | if (subset == 0) |
3175 | ParallelMath::ConditionalSet(bestIndexes[px], isLegalAndBetter, metaIndexes[meta0][px]); |
3176 | else |
3177 | ParallelMath::ConditionalSet(bestIndexes[px], isLegalAndBetter, metaIndexes[meta1][px]); |
3178 | } |
3179 | |
3180 | needsCommit = ParallelMath::AndNot(needsCommit, isLegalAndBetter); |
3181 | if (!ParallelMath::AnySet(needsCommit)) |
3182 | break; |
3183 | } |
3184 | } |
3185 | } |
3186 | } |
3187 | } |
3188 | } |
3189 | |
3190 | // At this point, everything should be set |
3191 | for (int block = 0; block < ParallelMath::ParallelSize; block++) |
3192 | { |
3193 | ParallelMath::ScalarUInt16 mode = ParallelMath::Extract(bestMode, block); |
3194 | ParallelMath::ScalarUInt16 partition = ParallelMath::Extract(bestPartition, block); |
3195 | int32_t eps[2][2][3]; |
3196 | ParallelMath::ScalarUInt16 indexes[16]; |
3197 | |
3198 | const BC7Data::BC6HModeInfo& modeInfo = BC7Data::g_hdrModes[mode]; |
3199 | |
3200 | const BC6HData::ModeDescriptor *desc = BC6HData::g_modeDescriptors[mode]; |
3201 | |
3202 | const size_t = modeInfo.m_partitioned ? 82 : 65; |
3203 | |
3204 | for (int subset = 0; subset < 2; subset++) |
3205 | { |
3206 | for (int epi = 0; epi < 2; epi++) |
3207 | { |
3208 | for (int ch = 0; ch < 3; ch++) |
3209 | eps[subset][epi][ch] = ParallelMath::Extract(bestEndPoints[subset][epi][ch], block); |
3210 | } |
3211 | } |
3212 | |
3213 | for (int px = 0; px < 16; px++) |
3214 | indexes[px] = ParallelMath::Extract(bestIndexes[px], block); |
3215 | |
3216 | uint16_t modeID = modeInfo.m_modeID; |
3217 | |
3218 | PackingVector pv; |
3219 | pv.Init(); |
3220 | |
3221 | for (size_t i = 0; i < headerBits; i++) { |
3222 | int32_t codedValue = 0; |
3223 | switch (desc[i].m_eField) { |
3224 | case BC6HData::M: |
3225 | codedValue = modeID; |
3226 | break; |
3227 | case BC6HData::D: |
3228 | codedValue = partition; |
3229 | break; |
3230 | case BC6HData::RW: |
3231 | codedValue = eps[0][0][0]; |
3232 | break; |
3233 | case BC6HData::RX: |
3234 | codedValue = eps[0][1][0]; |
3235 | break; |
3236 | case BC6HData::RY: |
3237 | codedValue = eps[1][0][0]; |
3238 | break; |
3239 | case BC6HData::RZ: |
3240 | codedValue = eps[1][1][0]; |
3241 | break; |
3242 | case BC6HData::GW: |
3243 | codedValue = eps[0][0][1]; |
3244 | break; |
3245 | case BC6HData::GX: |
3246 | codedValue = eps[0][1][1]; |
3247 | break; |
3248 | case BC6HData::GY: |
3249 | codedValue = eps[1][0][1]; |
3250 | break; |
3251 | case BC6HData::GZ: |
3252 | codedValue = eps[1][1][1]; |
3253 | break; |
3254 | case BC6HData::BW: |
3255 | codedValue = eps[0][0][2]; |
3256 | break; |
3257 | case BC6HData::BX: |
3258 | codedValue = eps[0][1][2]; |
3259 | break; |
3260 | case BC6HData::BY: |
3261 | codedValue = eps[1][0][2]; |
3262 | break; |
3263 | case BC6HData::BZ: |
3264 | codedValue = eps[1][1][2]; |
3265 | break; |
3266 | default: |
3267 | assert(false); |
3268 | break; |
3269 | } |
3270 | pv.Pack(static_cast<uint16_t>((codedValue >> desc[i].m_uBit) & 1), 1); |
3271 | } |
3272 | |
3273 | int fixupIndex1 = 0; |
3274 | int indexBits = 4; |
3275 | if (modeInfo.m_partitioned) |
3276 | { |
3277 | fixupIndex1 = BC7Data::g_fixupIndexes2[partition]; |
3278 | indexBits = 3; |
3279 | } |
3280 | |
3281 | for (int px = 0; px < 16; px++) |
3282 | { |
3283 | ParallelMath::ScalarUInt16 index = ParallelMath::Extract(bestIndexes[px], block); |
3284 | if (px == 0 || px == fixupIndex1) |
3285 | pv.Pack(index, indexBits - 1); |
3286 | else |
3287 | pv.Pack(index, indexBits); |
3288 | } |
3289 | |
3290 | pv.Flush(packedBlocks + 16 * block); |
3291 | } |
3292 | } |
3293 | |
3294 | void cvtt::Internal::BC6HComputer::SignExtendSingle(int &v, int bits) |
3295 | { |
3296 | if (v & (1 << (bits - 1))) |
3297 | v |= -(1 << bits); |
3298 | } |
3299 | |
3300 | void cvtt::Internal::BC6HComputer::UnpackOne(PixelBlockF16 &output, const uint8_t *pBC, bool isSigned) |
3301 | { |
3302 | UnpackingVector pv; |
3303 | pv.Init(pBC); |
3304 | |
3305 | int numModeBits = 2; |
3306 | int modeBits = pv.Unpack(2); |
3307 | if (modeBits != 0 && modeBits != 1) |
3308 | { |
3309 | modeBits |= pv.Unpack(3) << 2; |
3310 | numModeBits += 3; |
3311 | } |
3312 | |
3313 | int mode = -1; |
3314 | for (int possibleMode = 0; possibleMode < BC7Data::g_numHDRModes; possibleMode++) |
3315 | { |
3316 | if (BC7Data::g_hdrModes[possibleMode].m_modeID == modeBits) |
3317 | { |
3318 | mode = possibleMode; |
3319 | break; |
3320 | } |
3321 | } |
3322 | |
3323 | if (mode < 0) |
3324 | { |
3325 | for (int px = 0; px < 16; px++) |
3326 | { |
3327 | for (int ch = 0; ch < 3; ch++) |
3328 | output.m_pixels[px][ch] = 0; |
3329 | output.m_pixels[px][3] = 0x3c00; // 1.0 |
3330 | } |
3331 | return; |
3332 | } |
3333 | |
3334 | const BC7Data::BC6HModeInfo& modeInfo = BC7Data::g_hdrModes[mode]; |
3335 | const size_t = modeInfo.m_partitioned ? 82 : 65; |
3336 | const BC6HData::ModeDescriptor *desc = BC6HData::g_modeDescriptors[mode]; |
3337 | |
3338 | int32_t partition = 0; |
3339 | int32_t eps[2][2][3]; |
3340 | |
3341 | for (int subset = 0; subset < 2; subset++) |
3342 | for (int epi = 0; epi < 2; epi++) |
3343 | for (int ch = 0; ch < 3; ch++) |
3344 | eps[subset][epi][ch] = 0; |
3345 | |
3346 | for (size_t i = numModeBits; i < headerBits; i++) { |
3347 | int32_t *pCodedValue = NULL; |
3348 | |
3349 | switch (desc[i].m_eField) { |
3350 | case BC6HData::D: |
3351 | pCodedValue = &partition; |
3352 | break; |
3353 | case BC6HData::RW: |
3354 | pCodedValue = &eps[0][0][0]; |
3355 | break; |
3356 | case BC6HData::RX: |
3357 | pCodedValue = &eps[0][1][0]; |
3358 | break; |
3359 | case BC6HData::RY: |
3360 | pCodedValue = &eps[1][0][0]; |
3361 | break; |
3362 | case BC6HData::RZ: |
3363 | pCodedValue = &eps[1][1][0]; |
3364 | break; |
3365 | case BC6HData::GW: |
3366 | pCodedValue = &eps[0][0][1]; |
3367 | break; |
3368 | case BC6HData::GX: |
3369 | pCodedValue = &eps[0][1][1]; |
3370 | break; |
3371 | case BC6HData::GY: |
3372 | pCodedValue = &eps[1][0][1]; |
3373 | break; |
3374 | case BC6HData::GZ: |
3375 | pCodedValue = &eps[1][1][1]; |
3376 | break; |
3377 | case BC6HData::BW: |
3378 | pCodedValue = &eps[0][0][2]; |
3379 | break; |
3380 | case BC6HData::BX: |
3381 | pCodedValue = &eps[0][1][2]; |
3382 | break; |
3383 | case BC6HData::BY: |
3384 | pCodedValue = &eps[1][0][2]; |
3385 | break; |
3386 | case BC6HData::BZ: |
3387 | pCodedValue = &eps[1][1][2]; |
3388 | break; |
3389 | default: |
3390 | assert(false); |
3391 | break; |
3392 | } |
3393 | |
3394 | (*pCodedValue) |= pv.Unpack(1) << desc[i].m_uBit; |
3395 | } |
3396 | |
3397 | uint16_t modeID = modeInfo.m_modeID; |
3398 | |
3399 | int fixupIndex1 = 0; |
3400 | int indexBits = 4; |
3401 | int numSubsets = 1; |
3402 | if (modeInfo.m_partitioned) |
3403 | { |
3404 | fixupIndex1 = BC7Data::g_fixupIndexes2[partition]; |
3405 | indexBits = 3; |
3406 | numSubsets = 2; |
3407 | } |
3408 | |
3409 | int indexes[16]; |
3410 | for (int px = 0; px < 16; px++) |
3411 | { |
3412 | if (px == 0 || px == fixupIndex1) |
3413 | indexes[px] = pv.Unpack(indexBits - 1); |
3414 | else |
3415 | indexes[px] = pv.Unpack(indexBits); |
3416 | } |
3417 | |
3418 | if (modeInfo.m_partitioned) |
3419 | { |
3420 | for (int ch = 0; ch < 3; ch++) |
3421 | { |
3422 | if (isSigned) |
3423 | SignExtendSingle(eps[0][0][ch], modeInfo.m_aPrec); |
3424 | if (modeInfo.m_transformed || isSigned) |
3425 | { |
3426 | SignExtendSingle(eps[0][1][ch], modeInfo.m_bPrec[ch]); |
3427 | SignExtendSingle(eps[1][0][ch], modeInfo.m_bPrec[ch]); |
3428 | SignExtendSingle(eps[1][1][ch], modeInfo.m_bPrec[ch]); |
3429 | } |
3430 | } |
3431 | } |
3432 | else |
3433 | { |
3434 | for (int ch = 0; ch < 3; ch++) |
3435 | { |
3436 | if (isSigned) |
3437 | SignExtendSingle(eps[0][0][ch], modeInfo.m_aPrec); |
3438 | if (modeInfo.m_transformed || isSigned) |
3439 | SignExtendSingle(eps[0][1][ch], modeInfo.m_bPrec[ch]); |
3440 | } |
3441 | } |
3442 | |
3443 | int aPrec = modeInfo.m_aPrec; |
3444 | |
3445 | if (modeInfo.m_transformed) |
3446 | { |
3447 | for (int ch = 0; ch < 3; ch++) |
3448 | { |
3449 | int wrapMask = (1 << aPrec) - 1; |
3450 | |
3451 | eps[0][1][ch] = ((eps[0][0][ch] + eps[0][1][ch]) & wrapMask); |
3452 | if (isSigned) |
3453 | SignExtendSingle(eps[0][1][ch], aPrec); |
3454 | |
3455 | if (modeInfo.m_partitioned) |
3456 | { |
3457 | eps[1][0][ch] = ((eps[0][0][ch] + eps[1][0][ch]) & wrapMask); |
3458 | eps[1][1][ch] = ((eps[0][0][ch] + eps[1][1][ch]) & wrapMask); |
3459 | |
3460 | if (isSigned) |
3461 | { |
3462 | SignExtendSingle(eps[1][0][ch], aPrec); |
3463 | SignExtendSingle(eps[1][1][ch], aPrec); |
3464 | } |
3465 | } |
3466 | } |
3467 | } |
3468 | |
3469 | // Unquantize endpoints |
3470 | for (int subset = 0; subset < numSubsets; subset++) |
3471 | { |
3472 | for (int epi = 0; epi < 2; epi++) |
3473 | { |
3474 | for (int ch = 0; ch < 3; ch++) |
3475 | { |
3476 | int &v = eps[subset][epi][ch]; |
3477 | |
3478 | if (isSigned) |
3479 | { |
3480 | if (aPrec >= 16) |
3481 | { |
3482 | // Nothing |
3483 | } |
3484 | else |
3485 | { |
3486 | bool s = false; |
3487 | int comp = v; |
3488 | if (v < 0) |
3489 | { |
3490 | s = true; |
3491 | comp = -comp; |
3492 | } |
3493 | |
3494 | int unq = 0; |
3495 | if (comp == 0) |
3496 | unq = 0; |
3497 | else if (comp >= ((1 << (aPrec - 1)) - 1)) |
3498 | unq = 0x7fff; |
3499 | else |
3500 | unq = ((comp << 15) + 0x4000) >> (aPrec - 1); |
3501 | |
3502 | if (s) |
3503 | unq = -unq; |
3504 | |
3505 | v = unq; |
3506 | } |
3507 | } |
3508 | else |
3509 | { |
3510 | if (aPrec >= 15) |
3511 | { |
3512 | // Nothing |
3513 | } |
3514 | else if (v == 0) |
3515 | { |
3516 | // Nothing |
3517 | } |
3518 | else if (v == ((1 << aPrec) - 1)) |
3519 | v = 0xffff; |
3520 | else |
3521 | v = ((v << 16) + 0x8000) >> aPrec; |
3522 | } |
3523 | } |
3524 | } |
3525 | } |
3526 | |
3527 | const int *weights = BC7Data::g_weightTables[indexBits]; |
3528 | |
3529 | for (int px = 0; px < 16; px++) |
3530 | { |
3531 | int subset = 0; |
3532 | if (modeInfo.m_partitioned) |
3533 | subset = (BC7Data::g_partitionMap[partition] >> px) & 1; |
3534 | |
3535 | int w = weights[indexes[px]]; |
3536 | for (int ch = 0; ch < 3; ch++) |
3537 | { |
3538 | int comp = ((64 - w) * eps[subset][0][ch] + w * eps[subset][1][ch] + 32) >> 6; |
3539 | |
3540 | if (isSigned) |
3541 | { |
3542 | if (comp < 0) |
3543 | comp = -(((-comp) * 31) >> 5); |
3544 | else |
3545 | comp = (comp * 31) >> 5; |
3546 | |
3547 | int s = 0; |
3548 | if (comp < 0) |
3549 | { |
3550 | s = 0x8000; |
3551 | comp = -comp; |
3552 | } |
3553 | |
3554 | output.m_pixels[px][ch] = static_cast<uint16_t>(s | comp); |
3555 | } |
3556 | else |
3557 | { |
3558 | comp = (comp * 31) >> 6; |
3559 | output.m_pixels[px][ch] = static_cast<uint16_t>(comp); |
3560 | } |
3561 | } |
3562 | output.m_pixels[px][3] = 0x3c00; // 1.0 |
3563 | } |
3564 | } |
3565 | |
3566 | void cvtt::Kernels::ConfigureBC7EncodingPlanFromQuality(BC7EncodingPlan &encodingPlan, int quality) |
3567 | { |
3568 | static const int kMaxQuality = 100; |
3569 | |
3570 | if (quality < 1) |
3571 | quality = 1; |
3572 | else if (quality > kMaxQuality) |
3573 | quality = kMaxQuality; |
3574 | |
3575 | const int numRGBModes = cvtt::Tables::BC7Prio::g_bc7NumPrioCodesRGB * quality / kMaxQuality; |
3576 | const int numRGBAModes = cvtt::Tables::BC7Prio::g_bc7NumPrioCodesRGBA * quality / kMaxQuality; |
3577 | |
3578 | const uint16_t *prioLists[] = { cvtt::Tables::BC7Prio::g_bc7PrioCodesRGB, cvtt::Tables::BC7Prio::g_bc7PrioCodesRGBA }; |
3579 | const int prioListSizes[] = { numRGBModes, numRGBAModes }; |
3580 | |
3581 | BC7FineTuningParams ftParams; |
3582 | memset(&ftParams, 0, sizeof(ftParams)); |
3583 | |
3584 | for (int listIndex = 0; listIndex < 2; listIndex++) |
3585 | { |
3586 | int prioListSize = prioListSizes[listIndex]; |
3587 | const uint16_t *prioList = prioLists[listIndex]; |
3588 | |
3589 | for (int prioIndex = 0; prioIndex < prioListSize; prioIndex++) |
3590 | { |
3591 | const uint16_t packedMode = prioList[prioIndex]; |
3592 | |
3593 | uint8_t seedPoints = static_cast<uint8_t>(cvtt::Tables::BC7Prio::UnpackSeedPointCount(packedMode)); |
3594 | int mode = cvtt::Tables::BC7Prio::UnpackMode(packedMode); |
3595 | |
3596 | switch (mode) |
3597 | { |
3598 | case 0: |
3599 | ftParams.mode0SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints; |
3600 | break; |
3601 | case 1: |
3602 | ftParams.mode1SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints; |
3603 | break; |
3604 | case 2: |
3605 | ftParams.mode2SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints; |
3606 | break; |
3607 | case 3: |
3608 | ftParams.mode3SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints; |
3609 | break; |
3610 | case 4: |
3611 | ftParams.mode4SP[cvtt::Tables::BC7Prio::UnpackRotation(packedMode)][cvtt::Tables::BC7Prio::UnpackIndexSelector(packedMode)] = seedPoints; |
3612 | break; |
3613 | case 5: |
3614 | ftParams.mode5SP[cvtt::Tables::BC7Prio::UnpackRotation(packedMode)] = seedPoints; |
3615 | break; |
3616 | case 6: |
3617 | ftParams.mode6SP = seedPoints; |
3618 | break; |
3619 | case 7: |
3620 | ftParams.mode7SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints; |
3621 | break; |
3622 | } |
3623 | } |
3624 | } |
3625 | |
3626 | ConfigureBC7EncodingPlanFromFineTuningParams(encodingPlan, ftParams); |
3627 | } |
3628 | |
3629 | // Generates a BC7 encoding plan from fine-tuning parameters. |
3630 | bool cvtt::Kernels::ConfigureBC7EncodingPlanFromFineTuningParams(BC7EncodingPlan &encodingPlan, const BC7FineTuningParams ¶ms) |
3631 | { |
3632 | memset(&encodingPlan, 0, sizeof(encodingPlan)); |
3633 | |
3634 | // Mode 0 |
3635 | for (int partition = 0; partition < 16; partition++) |
3636 | { |
3637 | uint8_t sp = params.mode0SP[partition]; |
3638 | if (sp == 0) |
3639 | continue; |
3640 | |
3641 | encodingPlan.mode0PartitionEnabled |= static_cast<uint16_t>(1) << partition; |
3642 | |
3643 | for (int subset = 0; subset < 3; subset++) |
3644 | { |
3645 | int shape = cvtt::Internal::BC7Data::g_shapes3[partition][subset]; |
3646 | encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp); |
3647 | } |
3648 | } |
3649 | |
3650 | // Mode 1 |
3651 | for (int partition = 0; partition < 64; partition++) |
3652 | { |
3653 | uint8_t sp = params.mode1SP[partition]; |
3654 | if (sp == 0) |
3655 | continue; |
3656 | |
3657 | encodingPlan.mode1PartitionEnabled |= static_cast<uint64_t>(1) << partition; |
3658 | |
3659 | for (int subset = 0; subset < 2; subset++) |
3660 | { |
3661 | int shape = cvtt::Internal::BC7Data::g_shapes2[partition][subset]; |
3662 | encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp); |
3663 | } |
3664 | } |
3665 | |
3666 | // Mode 2 |
3667 | for (int partition = 0; partition < 64; partition++) |
3668 | { |
3669 | uint8_t sp = params.mode2SP[partition]; |
3670 | if (sp == 0) |
3671 | continue; |
3672 | |
3673 | encodingPlan.mode2PartitionEnabled |= static_cast<uint64_t>(1) << partition; |
3674 | |
3675 | for (int subset = 0; subset < 3; subset++) |
3676 | { |
3677 | int shape = cvtt::Internal::BC7Data::g_shapes3[partition][subset]; |
3678 | encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp); |
3679 | } |
3680 | } |
3681 | |
3682 | // Mode 3 |
3683 | for (int partition = 0; partition < 64; partition++) |
3684 | { |
3685 | uint8_t sp = params.mode3SP[partition]; |
3686 | if (sp == 0) |
3687 | continue; |
3688 | |
3689 | encodingPlan.mode3PartitionEnabled |= static_cast<uint64_t>(1) << partition; |
3690 | |
3691 | for (int subset = 0; subset < 2; subset++) |
3692 | { |
3693 | int shape = cvtt::Internal::BC7Data::g_shapes2[partition][subset]; |
3694 | encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp); |
3695 | } |
3696 | } |
3697 | |
3698 | // Mode 4 |
3699 | for (int rotation = 0; rotation < 4; rotation++) |
3700 | { |
3701 | for (int indexMode = 0; indexMode < 2; indexMode++) |
3702 | encodingPlan.mode4SP[rotation][indexMode] = params.mode4SP[rotation][indexMode]; |
3703 | } |
3704 | |
3705 | // Mode 5 |
3706 | for (int rotation = 0; rotation < 4; rotation++) |
3707 | encodingPlan.mode5SP[rotation] = params.mode5SP[rotation]; |
3708 | |
3709 | // Mode 6 |
3710 | { |
3711 | uint8_t sp = params.mode6SP; |
3712 | if (sp != 0) |
3713 | { |
3714 | encodingPlan.mode6Enabled = true; |
3715 | |
3716 | int shape = cvtt::Internal::BC7Data::g_shapes1[0][0]; |
3717 | encodingPlan.seedPointsForShapeRGBA[shape] = std::max(encodingPlan.seedPointsForShapeRGBA[shape], sp); |
3718 | } |
3719 | } |
3720 | |
3721 | // Mode 7 |
3722 | for (int partition = 0; partition < 64; partition++) |
3723 | { |
3724 | uint8_t sp = params.mode7SP[partition]; |
3725 | if (sp == 0) |
3726 | continue; |
3727 | |
3728 | encodingPlan.mode7RGBAPartitionEnabled |= static_cast<uint64_t>(1) << partition; |
3729 | |
3730 | for (int subset = 0; subset < 2; subset++) |
3731 | { |
3732 | int shape = cvtt::Internal::BC7Data::g_shapes2[partition][subset]; |
3733 | encodingPlan.seedPointsForShapeRGBA[shape] = std::max(encodingPlan.seedPointsForShapeRGBA[shape], sp); |
3734 | } |
3735 | } |
3736 | |
3737 | for (int i = 0; i < BC7EncodingPlan::kNumRGBShapes; i++) |
3738 | { |
3739 | if (encodingPlan.seedPointsForShapeRGB[i] > 0) |
3740 | { |
3741 | encodingPlan.rgbShapeList[encodingPlan.rgbNumShapesToEvaluate] = i; |
3742 | encodingPlan.rgbNumShapesToEvaluate++; |
3743 | } |
3744 | } |
3745 | |
3746 | for (int i = 0; i < BC7EncodingPlan::kNumRGBAShapes; i++) |
3747 | { |
3748 | if (encodingPlan.seedPointsForShapeRGBA[i] > 0) |
3749 | { |
3750 | encodingPlan.rgbaShapeList[encodingPlan.rgbaNumShapesToEvaluate] = i; |
3751 | encodingPlan.rgbaNumShapesToEvaluate++; |
3752 | } |
3753 | } |
3754 | |
3755 | encodingPlan.mode7RGBPartitionEnabled = (encodingPlan.mode7RGBAPartitionEnabled & ~encodingPlan.mode3PartitionEnabled); |
3756 | |
3757 | return true; |
3758 | } |
3759 | |
3760 | #endif |
3761 | |