| 1 | /* |
| 2 | Convection Texture Tools |
| 3 | Copyright (c) 2018-2019 Eric Lasota |
| 4 | |
| 5 | Permission is hereby granted, free of charge, to any person obtaining |
| 6 | a copy of this software and associated documentation files (the |
| 7 | "Software"), to deal in the Software without restriction, including |
| 8 | without limitation the rights to use, copy, modify, merge, publish, |
| 9 | distribute, sublicense, and/or sell copies of the Software, and to |
| 10 | permit persons to whom the Software is furnished to do so, subject |
| 11 | to the following conditions: |
| 12 | |
| 13 | The above copyright notice and this permission notice shall be included |
| 14 | in all copies or substantial portions of the Software. |
| 15 | |
| 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| 17 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
| 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
| 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
| 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
| 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 23 | |
| 24 | ------------------------------------------------------------------------------------- |
| 25 | |
| 26 | Portions based on DirectX Texture Library (DirectXTex) |
| 27 | |
| 28 | Copyright (c) Microsoft Corporation. All rights reserved. |
| 29 | Licensed under the MIT License. |
| 30 | |
| 31 | http://go.microsoft.com/fwlink/?LinkId=248926 |
| 32 | */ |
| 33 | #include "ConvectionKernels_Config.h" |
| 34 | |
| 35 | #if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL) |
| 36 | |
| 37 | #include "ConvectionKernels_BC67.h" |
| 38 | |
| 39 | #include "ConvectionKernels_AggregatedError.h" |
| 40 | #include "ConvectionKernels_BCCommon.h" |
| 41 | #include "ConvectionKernels_BC7_Prio.h" |
| 42 | #include "ConvectionKernels_BC7_SingleColor.h" |
| 43 | #include "ConvectionKernels_BC6H_IO.h" |
| 44 | #include "ConvectionKernels_EndpointRefiner.h" |
| 45 | #include "ConvectionKernels_EndpointSelector.h" |
| 46 | #include "ConvectionKernels_IndexSelectorHDR.h" |
| 47 | #include "ConvectionKernels_ParallelMath.h" |
| 48 | #include "ConvectionKernels_UnfinishedEndpoints.h" |
| 49 | |
| 50 | namespace cvtt |
| 51 | { |
| 52 | namespace Internal |
| 53 | { |
| 54 | namespace BC67 |
| 55 | { |
| 56 | typedef ParallelMath::Float MFloat; |
| 57 | typedef ParallelMath::UInt15 MUInt15; |
| 58 | |
| 59 | struct WorkInfo |
| 60 | { |
| 61 | MUInt15 m_mode; |
| 62 | MFloat m_error; |
| 63 | MUInt15 m_ep[3][2][4]; |
| 64 | MUInt15 m_indexes[16]; |
| 65 | MUInt15 m_indexes2[16]; |
| 66 | |
| 67 | union |
| 68 | { |
| 69 | MUInt15 m_partition; |
| 70 | struct IndexSelectorAndRotation |
| 71 | { |
| 72 | MUInt15 m_indexSelector; |
| 73 | MUInt15 m_rotation; |
| 74 | } m_isr; |
| 75 | } m_u; |
| 76 | }; |
| 77 | } |
| 78 | |
| 79 | namespace BC6HData |
| 80 | { |
| 81 | enum EField |
| 82 | { |
| 83 | NA, // N/A |
| 84 | M, // Mode |
| 85 | D, // Shape |
| 86 | RW, |
| 87 | RX, |
| 88 | RY, |
| 89 | RZ, |
| 90 | GW, |
| 91 | GX, |
| 92 | GY, |
| 93 | GZ, |
| 94 | BW, |
| 95 | BX, |
| 96 | BY, |
| 97 | BZ, |
| 98 | }; |
| 99 | |
| 100 | struct ModeDescriptor |
| 101 | { |
| 102 | EField m_eField; |
| 103 | uint8_t m_uBit; |
| 104 | }; |
| 105 | |
| 106 | const ModeDescriptor g_modeDescriptors[14][82] = |
| 107 | { |
| 108 | { // Mode 1 (0x00) - 10 5 5 5 |
| 109 | { M, 0 },{ M, 1 },{ GY, 4 },{ BY, 4 },{ BZ, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
| 110 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
| 111 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
| 112 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
| 113 | { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
| 114 | { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
| 115 | { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, |
| 116 | { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 }, |
| 117 | { D, 3 },{ D, 4 }, |
| 118 | }, |
| 119 | |
| 120 | { // Mode 2 (0x01) - 7 6 6 6 |
| 121 | { M, 0 },{ M, 1 },{ GY, 5 },{ GZ, 4 },{ GZ, 5 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
| 122 | { RW, 5 },{ RW, 6 },{ BZ, 0 },{ BZ, 1 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
| 123 | { GW, 5 },{ GW, 6 },{ BY, 5 },{ BZ, 2 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
| 124 | { BW, 5 },{ BW, 6 },{ BZ, 3 },{ BZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
| 125 | { RX, 5 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
| 126 | { GX, 5 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
| 127 | { BX, 5 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, |
| 128 | { RY, 5 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ RZ, 5 },{ D, 0 },{ D, 1 },{ D, 2 }, |
| 129 | { D, 3 },{ D, 4 }, |
| 130 | }, |
| 131 | |
| 132 | { // Mode 3 (0x02) - 11 5 4 4 |
| 133 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
| 134 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
| 135 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
| 136 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
| 137 | { RW,10 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GW,10 }, |
| 138 | { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BW,10 }, |
| 139 | { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, |
| 140 | { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 }, |
| 141 | { D, 3 },{ D, 4 }, |
| 142 | }, |
| 143 | |
| 144 | { // Mode 4 (0x06) - 11 4 5 4 |
| 145 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
| 146 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
| 147 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
| 148 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RW,10 }, |
| 149 | { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
| 150 | { GW,10 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BW,10 }, |
| 151 | { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ BZ, 0 }, |
| 152 | { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ GY, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 }, |
| 153 | { D, 3 },{ D, 4 }, |
| 154 | }, |
| 155 | |
| 156 | { // Mode 5 (0x0a) - 11 4 4 5 |
| 157 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
| 158 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
| 159 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
| 160 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RW,10 }, |
| 161 | { BY, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GW,10 }, |
| 162 | { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
| 163 | { BW,10 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ BZ, 1 }, |
| 164 | { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ BZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 }, |
| 165 | { D, 3 },{ D, 4 }, |
| 166 | }, |
| 167 | |
| 168 | { // Mode 6 (0x0e) - 9 5 5 5 |
| 169 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
| 170 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
| 171 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
| 172 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
| 173 | { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
| 174 | { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
| 175 | { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, |
| 176 | { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 }, |
| 177 | { D, 3 },{ D, 4 }, |
| 178 | }, |
| 179 | |
| 180 | { // Mode 7 (0x12) - 8 6 5 5 |
| 181 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
| 182 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ GZ, 4 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
| 183 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ BZ, 2 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
| 184 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BZ, 3 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
| 185 | { RX, 5 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
| 186 | { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
| 187 | { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, |
| 188 | { RY, 5 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ RZ, 5 },{ D, 0 },{ D, 1 },{ D, 2 }, |
| 189 | { D, 3 },{ D, 4 }, |
| 190 | }, |
| 191 | |
| 192 | { // Mode 8 (0x16) - 8 5 6 5 |
| 193 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
| 194 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ BZ, 0 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
| 195 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ GY, 5 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
| 196 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ GZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
| 197 | { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
| 198 | { GX, 5 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
| 199 | { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, |
| 200 | { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 }, |
| 201 | { D, 3 },{ D, 4 }, |
| 202 | }, |
| 203 | |
| 204 | { // Mode 9 (0x1a) - 8 5 5 6 |
| 205 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
| 206 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ BZ, 1 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
| 207 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ BY, 5 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
| 208 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
| 209 | { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
| 210 | { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
| 211 | { BX, 5 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, |
| 212 | { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 }, |
| 213 | { D, 3 },{ D, 4 }, |
| 214 | }, |
| 215 | |
| 216 | { // Mode 10 (0x1e) - 6 6 6 6 |
| 217 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
| 218 | { RW, 5 },{ GZ, 4 },{ BZ, 0 },{ BZ, 1 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
| 219 | { GW, 5 },{ GY, 5 },{ BY, 5 },{ BZ, 2 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
| 220 | { BW, 5 },{ GZ, 5 },{ BZ, 3 },{ BZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
| 221 | { RX, 5 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
| 222 | { GX, 5 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
| 223 | { BX, 5 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, |
| 224 | { RY, 5 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ RZ, 5 },{ D, 0 },{ D, 1 },{ D, 2 }, |
| 225 | { D, 3 },{ D, 4 }, |
| 226 | }, |
| 227 | |
| 228 | { // Mode 11 (0x03) - 10 10 |
| 229 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
| 230 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
| 231 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
| 232 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
| 233 | { RX, 5 },{ RX, 6 },{ RX, 7 },{ RX, 8 },{ RX, 9 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
| 234 | { GX, 5 },{ GX, 6 },{ GX, 7 },{ GX, 8 },{ GX, 9 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
| 235 | { BX, 5 },{ BX, 6 },{ BX, 7 },{ BX, 8 },{ BX, 9 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, |
| 236 | { NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, |
| 237 | { NA, 0 },{ NA, 0 }, |
| 238 | }, |
| 239 | |
| 240 | { // Mode 12 (0x07) - 11 9 |
| 241 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
| 242 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
| 243 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
| 244 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
| 245 | { RX, 5 },{ RX, 6 },{ RX, 7 },{ RX, 8 },{ RW,10 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
| 246 | { GX, 5 },{ GX, 6 },{ GX, 7 },{ GX, 8 },{ GW,10 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
| 247 | { BX, 5 },{ BX, 6 },{ BX, 7 },{ BX, 8 },{ BW,10 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, |
| 248 | { NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, |
| 249 | { NA, 0 },{ NA, 0 }, |
| 250 | }, |
| 251 | |
| 252 | { // Mode 13 (0x0b) - 12 8 |
| 253 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
| 254 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
| 255 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
| 256 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, |
| 257 | { RX, 5 },{ RX, 6 },{ RX, 7 },{ RW,11 },{ RW,10 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, |
| 258 | { GX, 5 },{ GX, 6 },{ GX, 7 },{ GW,11 },{ GW,10 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, |
| 259 | { BX, 5 },{ BX, 6 },{ BX, 7 },{ BW,11 },{ BW,10 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, |
| 260 | { NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, |
| 261 | { NA, 0 },{ NA, 0 }, |
| 262 | }, |
| 263 | |
| 264 | { // Mode 14 (0x0f) - 16 4 |
| 265 | { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, |
| 266 | { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, |
| 267 | { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, |
| 268 | { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RW,15 }, |
| 269 | { RW,14 },{ RW,13 },{ RW,12 },{ RW,11 },{ RW,10 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GW,15 }, |
| 270 | { GW,14 },{ GW,13 },{ GW,12 },{ GW,11 },{ GW,10 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BW,15 }, |
| 271 | { BW,14 },{ BW,13 },{ BW,12 },{ BW,11 },{ BW,10 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, |
| 272 | { NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, |
| 273 | { NA, 0 },{ NA, 0 }, |
| 274 | }, |
| 275 | }; |
| 276 | } |
| 277 | |
| 278 | namespace BC7Data |
| 279 | { |
| 280 | enum AlphaMode |
| 281 | { |
| 282 | AlphaMode_Combined, |
| 283 | AlphaMode_Separate, |
| 284 | AlphaMode_None, |
| 285 | }; |
| 286 | |
| 287 | enum PBitMode |
| 288 | { |
| 289 | PBitMode_PerEndpoint, |
| 290 | PBitMode_PerSubset, |
| 291 | PBitMode_None |
| 292 | }; |
| 293 | |
| 294 | struct BC7ModeInfo |
| 295 | { |
| 296 | PBitMode m_pBitMode; |
| 297 | AlphaMode m_alphaMode; |
| 298 | int m_rgbBits; |
| 299 | int m_alphaBits; |
| 300 | int m_partitionBits; |
| 301 | int m_numSubsets; |
| 302 | int m_indexBits; |
| 303 | int m_alphaIndexBits; |
| 304 | bool m_hasIndexSelector; |
| 305 | }; |
| 306 | |
| 307 | BC7ModeInfo g_modes[] = |
| 308 | { |
| 309 | { PBitMode_PerEndpoint, AlphaMode_None, 4, 0, 4, 3, 3, 0, false }, // 0 |
| 310 | { PBitMode_PerSubset, AlphaMode_None, 6, 0, 6, 2, 3, 0, false }, // 1 |
| 311 | { PBitMode_None, AlphaMode_None, 5, 0, 6, 3, 2, 0, false }, // 2 |
| 312 | { PBitMode_PerEndpoint, AlphaMode_None, 7, 0, 6, 2, 2, 0, false }, // 3 (Mode reference has an error, P-bit is really per-endpoint) |
| 313 | |
| 314 | { PBitMode_None, AlphaMode_Separate, 5, 6, 0, 1, 2, 3, true }, // 4 |
| 315 | { PBitMode_None, AlphaMode_Separate, 7, 8, 0, 1, 2, 2, false }, // 5 |
| 316 | { PBitMode_PerEndpoint, AlphaMode_Combined, 7, 7, 0, 1, 4, 0, false }, // 6 |
| 317 | { PBitMode_PerEndpoint, AlphaMode_Combined, 5, 5, 6, 2, 2, 0, false } // 7 |
| 318 | }; |
| 319 | |
| 320 | const int g_weight2[] = { 0, 21, 43, 64 }; |
| 321 | const int g_weight3[] = { 0, 9, 18, 27, 37, 46, 55, 64 }; |
| 322 | const int g_weight4[] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; |
| 323 | |
| 324 | const int *g_weightTables[] = |
| 325 | { |
| 326 | NULL, |
| 327 | NULL, |
| 328 | g_weight2, |
| 329 | g_weight3, |
| 330 | g_weight4 |
| 331 | }; |
| 332 | |
| 333 | struct BC6HModeInfo |
| 334 | { |
| 335 | uint16_t m_modeID; |
| 336 | bool m_partitioned; |
| 337 | bool m_transformed; |
| 338 | int m_aPrec; |
| 339 | int m_bPrec[3]; |
| 340 | }; |
| 341 | |
| 342 | // [partitioned][precision] |
| 343 | bool g_hdrModesExistForPrecision[2][17] = |
| 344 | { |
| 345 | //0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
| 346 | { false, false, false, false, false, false, false, false, false, false, true, true, true, false, false, false, true }, |
| 347 | { false, false, false, false, false, false, true, true, true, true, true, true, false, false, false, false, false }, |
| 348 | }; |
| 349 | |
| 350 | BC6HModeInfo g_hdrModes[] = |
| 351 | { |
| 352 | { 0x00, true, true, 10,{ 5, 5, 5 } }, |
| 353 | { 0x01, true, true, 7,{ 6, 6, 6 } }, |
| 354 | { 0x02, true, true, 11,{ 5, 4, 4 } }, |
| 355 | { 0x06, true, true, 11,{ 4, 5, 4 } }, |
| 356 | { 0x0a, true, true, 11,{ 4, 4, 5 } }, |
| 357 | { 0x0e, true, true, 9,{ 5, 5, 5 } }, |
| 358 | { 0x12, true, true, 8,{ 6, 5, 5 } }, |
| 359 | { 0x16, true, true, 8,{ 5, 6, 5 } }, |
| 360 | { 0x1a, true, true, 8,{ 5, 5, 6 } }, |
| 361 | { 0x1e, true, false, 6,{ 6, 6, 6 } }, |
| 362 | { 0x03, false, false, 10,{ 10, 10, 10 } }, |
| 363 | { 0x07, false, true, 11,{ 9, 9, 9 } }, |
| 364 | { 0x0b, false, true, 12,{ 8, 8, 8 } }, |
| 365 | { 0x0f, false, true, 16,{ 4, 4, 4 } }, |
| 366 | }; |
| 367 | |
| 368 | const int g_maxHDRPrecision = 16; |
| 369 | |
| 370 | static const size_t g_numHDRModes = sizeof(g_hdrModes) / sizeof(g_hdrModes[0]); |
| 371 | |
| 372 | static uint16_t g_partitionMap[64] = |
| 373 | { |
| 374 | 0xCCCC, 0x8888, 0xEEEE, 0xECC8, |
| 375 | 0xC880, 0xFEEC, 0xFEC8, 0xEC80, |
| 376 | 0xC800, 0xFFEC, 0xFE80, 0xE800, |
| 377 | 0xFFE8, 0xFF00, 0xFFF0, 0xF000, |
| 378 | 0xF710, 0x008E, 0x7100, 0x08CE, |
| 379 | 0x008C, 0x7310, 0x3100, 0x8CCE, |
| 380 | 0x088C, 0x3110, 0x6666, 0x366C, |
| 381 | 0x17E8, 0x0FF0, 0x718E, 0x399C, |
| 382 | 0xaaaa, 0xf0f0, 0x5a5a, 0x33cc, |
| 383 | 0x3c3c, 0x55aa, 0x9696, 0xa55a, |
| 384 | 0x73ce, 0x13c8, 0x324c, 0x3bdc, |
| 385 | 0x6996, 0xc33c, 0x9966, 0x660, |
| 386 | 0x272, 0x4e4, 0x4e40, 0x2720, |
| 387 | 0xc936, 0x936c, 0x39c6, 0x639c, |
| 388 | 0x9336, 0x9cc6, 0x817e, 0xe718, |
| 389 | 0xccf0, 0xfcc, 0x7744, 0xee22, |
| 390 | }; |
| 391 | |
| 392 | static uint32_t g_partitionMap2[64] = |
| 393 | { |
| 394 | 0xaa685050, 0x6a5a5040, 0x5a5a4200, 0x5450a0a8, |
| 395 | 0xa5a50000, 0xa0a05050, 0x5555a0a0, 0x5a5a5050, |
| 396 | 0xaa550000, 0xaa555500, 0xaaaa5500, 0x90909090, |
| 397 | 0x94949494, 0xa4a4a4a4, 0xa9a59450, 0x2a0a4250, |
| 398 | 0xa5945040, 0x0a425054, 0xa5a5a500, 0x55a0a0a0, |
| 399 | 0xa8a85454, 0x6a6a4040, 0xa4a45000, 0x1a1a0500, |
| 400 | 0x0050a4a4, 0xaaa59090, 0x14696914, 0x69691400, |
| 401 | 0xa08585a0, 0xaa821414, 0x50a4a450, 0x6a5a0200, |
| 402 | 0xa9a58000, 0x5090a0a8, 0xa8a09050, 0x24242424, |
| 403 | 0x00aa5500, 0x24924924, 0x24499224, 0x50a50a50, |
| 404 | 0x500aa550, 0xaaaa4444, 0x66660000, 0xa5a0a5a0, |
| 405 | 0x50a050a0, 0x69286928, 0x44aaaa44, 0x66666600, |
| 406 | 0xaa444444, 0x54a854a8, 0x95809580, 0x96969600, |
| 407 | 0xa85454a8, 0x80959580, 0xaa141414, 0x96960000, |
| 408 | 0xaaaa1414, 0xa05050a0, 0xa0a5a5a0, 0x96000000, |
| 409 | 0x40804080, 0xa9a8a9a8, 0xaaaaaa44, 0x2a4a5254, |
| 410 | }; |
| 411 | |
| 412 | static int g_fixupIndexes2[64] = |
| 413 | { |
| 414 | 15,15,15,15, |
| 415 | 15,15,15,15, |
| 416 | 15,15,15,15, |
| 417 | 15,15,15,15, |
| 418 | 15, 2, 8, 2, |
| 419 | 2, 8, 8,15, |
| 420 | 2, 8, 2, 2, |
| 421 | 8, 8, 2, 2, |
| 422 | |
| 423 | 15,15, 6, 8, |
| 424 | 2, 8,15,15, |
| 425 | 2, 8, 2, 2, |
| 426 | 2,15,15, 6, |
| 427 | 6, 2, 6, 8, |
| 428 | 15,15, 2, 2, |
| 429 | 15,15,15,15, |
| 430 | 15, 2, 2,15, |
| 431 | }; |
| 432 | |
| 433 | static int g_fixupIndexes3[64][2] = |
| 434 | { |
| 435 | { 3,15 },{ 3, 8 },{ 15, 8 },{ 15, 3 }, |
| 436 | { 8,15 },{ 3,15 },{ 15, 3 },{ 15, 8 }, |
| 437 | { 8,15 },{ 8,15 },{ 6,15 },{ 6,15 }, |
| 438 | { 6,15 },{ 5,15 },{ 3,15 },{ 3, 8 }, |
| 439 | { 3,15 },{ 3, 8 },{ 8,15 },{ 15, 3 }, |
| 440 | { 3,15 },{ 3, 8 },{ 6,15 },{ 10, 8 }, |
| 441 | { 5, 3 },{ 8,15 },{ 8, 6 },{ 6,10 }, |
| 442 | { 8,15 },{ 5,15 },{ 15,10 },{ 15, 8 }, |
| 443 | |
| 444 | { 8,15 },{ 15, 3 },{ 3,15 },{ 5,10 }, |
| 445 | { 6,10 },{ 10, 8 },{ 8, 9 },{ 15,10 }, |
| 446 | { 15, 6 },{ 3,15 },{ 15, 8 },{ 5,15 }, |
| 447 | { 15, 3 },{ 15, 6 },{ 15, 6 },{ 15, 8 }, |
| 448 | { 3,15 },{ 15, 3 },{ 5,15 },{ 5,15 }, |
| 449 | { 5,15 },{ 8,15 },{ 5,15 },{ 10,15 }, |
| 450 | { 5,15 },{ 10,15 },{ 8,15 },{ 13,15 }, |
| 451 | { 15, 3 },{ 12,15 },{ 3,15 },{ 3, 8 }, |
| 452 | }; |
| 453 | |
| 454 | static const unsigned char g_fragments[] = |
| 455 | { |
| 456 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 0, 16 |
| 457 | 0, 1, 2, 3, // 16, 4 |
| 458 | 0, 1, 4, // 20, 3 |
| 459 | 0, 1, 2, 4, // 23, 4 |
| 460 | 2, 3, 7, // 27, 3 |
| 461 | 1, 2, 3, 7, // 30, 4 |
| 462 | 0, 1, 2, 3, 4, 5, 6, 7, // 34, 8 |
| 463 | 0, 1, 4, 8, // 42, 4 |
| 464 | 0, 1, 2, 4, 5, 8, // 46, 6 |
| 465 | 0, 1, 2, 3, 4, 5, 6, 8, // 52, 8 |
| 466 | 1, 4, 5, 6, 9, // 60, 5 |
| 467 | 2, 5, 6, 7, 10, // 65, 5 |
| 468 | 5, 6, 9, 10, // 70, 4 |
| 469 | 2, 3, 7, 11, // 74, 4 |
| 470 | 1, 2, 3, 6, 7, 11, // 78, 6 |
| 471 | 0, 1, 2, 3, 5, 6, 7, 11, // 84, 8 |
| 472 | 0, 1, 2, 3, 8, 9, 10, 11, // 92, 8 |
| 473 | 2, 3, 6, 7, 8, 9, 10, 11, // 100, 8 |
| 474 | 4, 5, 6, 7, 8, 9, 10, 11, // 108, 8 |
| 475 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, // 116, 12 |
| 476 | 0, 4, 8, 12, // 128, 4 |
| 477 | 0, 2, 3, 4, 6, 7, 8, 12, // 132, 8 |
| 478 | 0, 1, 2, 4, 5, 8, 9, 12, // 140, 8 |
| 479 | 0, 1, 2, 3, 4, 5, 6, 8, 9, 12, // 148, 10 |
| 480 | 3, 6, 7, 8, 9, 12, // 158, 6 |
| 481 | 3, 5, 6, 7, 8, 9, 10, 12, // 164, 8 |
| 482 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, // 172, 12 |
| 483 | 0, 1, 2, 5, 6, 7, 11, 12, // 184, 8 |
| 484 | 5, 8, 9, 10, 13, // 192, 5 |
| 485 | 8, 12, 13, // 197, 3 |
| 486 | 4, 8, 12, 13, // 200, 4 |
| 487 | 2, 3, 6, 9, 12, 13, // 204, 6 |
| 488 | 0, 1, 2, 3, 8, 9, 12, 13, // 210, 8 |
| 489 | 0, 1, 4, 5, 8, 9, 12, 13, // 218, 8 |
| 490 | 2, 3, 6, 7, 8, 9, 12, 13, // 226, 8 |
| 491 | 2, 3, 5, 6, 9, 10, 12, 13, // 234, 8 |
| 492 | 0, 3, 6, 7, 9, 10, 12, 13, // 242, 8 |
| 493 | 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 13, // 250, 12 |
| 494 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, // 262, 13 |
| 495 | 2, 3, 4, 7, 8, 11, 12, 13, // 275, 8 |
| 496 | 1, 2, 6, 7, 8, 11, 12, 13, // 283, 8 |
| 497 | 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, // 291, 10 |
| 498 | 2, 3, 4, 5, 10, 11, 12, 13, // 301, 8 |
| 499 | 0, 1, 6, 7, 10, 11, 12, 13, // 309, 8 |
| 500 | 6, 9, 10, 11, 14, // 317, 5 |
| 501 | 0, 2, 4, 6, 8, 10, 12, 14, // 322, 8 |
| 502 | 1, 3, 5, 7, 8, 10, 12, 14, // 330, 8 |
| 503 | 1, 3, 4, 6, 9, 11, 12, 14, // 338, 8 |
| 504 | 0, 2, 5, 7, 9, 11, 12, 14, // 346, 8 |
| 505 | 0, 3, 4, 5, 8, 9, 13, 14, // 354, 8 |
| 506 | 2, 3, 4, 7, 8, 9, 13, 14, // 362, 8 |
| 507 | 1, 2, 5, 6, 9, 10, 13, 14, // 370, 8 |
| 508 | 0, 3, 4, 7, 9, 10, 13, 14, // 378, 8 |
| 509 | 0, 3, 5, 6, 8, 11, 13, 14, // 386, 8 |
| 510 | 1, 2, 4, 7, 8, 11, 13, 14, // 394, 8 |
| 511 | 0, 1, 4, 7, 10, 11, 13, 14, // 402, 8 |
| 512 | 0, 3, 6, 7, 10, 11, 13, 14, // 410, 8 |
| 513 | 8, 12, 13, 14, // 418, 4 |
| 514 | 1, 2, 3, 7, 8, 12, 13, 14, // 422, 8 |
| 515 | 4, 8, 9, 12, 13, 14, // 430, 6 |
| 516 | 0, 4, 5, 8, 9, 12, 13, 14, // 436, 8 |
| 517 | 1, 2, 3, 6, 7, 8, 9, 12, 13, 14, // 444, 10 |
| 518 | 2, 6, 8, 9, 10, 12, 13, 14, // 454, 8 |
| 519 | 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, // 462, 12 |
| 520 | 0, 7, 9, 10, 11, 12, 13, 14, // 474, 8 |
| 521 | 1, 2, 3, 4, 5, 6, 8, 15, // 482, 8 |
| 522 | 3, 7, 11, 15, // 490, 4 |
| 523 | 0, 1, 3, 4, 5, 7, 11, 15, // 494, 8 |
| 524 | 0, 4, 5, 10, 11, 15, // 502, 6 |
| 525 | 1, 2, 3, 6, 7, 10, 11, 15, // 508, 8 |
| 526 | 0, 1, 2, 3, 5, 6, 7, 10, 11, 15, // 516, 10 |
| 527 | 0, 4, 5, 6, 9, 10, 11, 15, // 526, 8 |
| 528 | 0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 15, // 534, 12 |
| 529 | 1, 2, 4, 5, 8, 9, 12, 15, // 546, 8 |
| 530 | 2, 3, 5, 6, 8, 9, 12, 15, // 554, 8 |
| 531 | 0, 3, 5, 6, 9, 10, 12, 15, // 562, 8 |
| 532 | 1, 2, 4, 7, 9, 10, 12, 15, // 570, 8 |
| 533 | 1, 2, 5, 6, 8, 11, 12, 15, // 578, 8 |
| 534 | 0, 3, 4, 7, 8, 11, 12, 15, // 586, 8 |
| 535 | 0, 1, 5, 6, 10, 11, 12, 15, // 594, 8 |
| 536 | 1, 2, 6, 7, 10, 11, 12, 15, // 602, 8 |
| 537 | 1, 3, 4, 6, 8, 10, 13, 15, // 610, 8 |
| 538 | 0, 2, 5, 7, 8, 10, 13, 15, // 618, 8 |
| 539 | 0, 2, 4, 6, 9, 11, 13, 15, // 626, 8 |
| 540 | 1, 3, 5, 7, 9, 11, 13, 15, // 634, 8 |
| 541 | 0, 1, 2, 3, 4, 5, 7, 8, 12, 13, 15, // 642, 11 |
| 542 | 2, 3, 4, 5, 8, 9, 14, 15, // 653, 8 |
| 543 | 0, 1, 6, 7, 8, 9, 14, 15, // 661, 8 |
| 544 | 0, 1, 5, 10, 14, 15, // 669, 6 |
| 545 | 0, 3, 4, 5, 9, 10, 14, 15, // 675, 8 |
| 546 | 0, 1, 5, 6, 9, 10, 14, 15, // 683, 8 |
| 547 | 11, 14, 15, // 691, 3 |
| 548 | 7, 11, 14, 15, // 694, 4 |
| 549 | 1, 2, 4, 5, 8, 11, 14, 15, // 698, 8 |
| 550 | 0, 1, 4, 7, 8, 11, 14, 15, // 706, 8 |
| 551 | 0, 1, 4, 5, 10, 11, 14, 15, // 714, 8 |
| 552 | 2, 3, 6, 7, 10, 11, 14, 15, // 722, 8 |
| 553 | 4, 5, 6, 7, 10, 11, 14, 15, // 730, 8 |
| 554 | 0, 1, 4, 5, 7, 8, 10, 11, 14, 15, // 738, 10 |
| 555 | 0, 1, 2, 3, 5, 6, 7, 9, 10, 11, 14, 15, // 748, 12 |
| 556 | 0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 14, 15, // 760, 13 |
| 557 | 0, 1, 2, 3, 4, 6, 7, 11, 12, 14, 15, // 773, 11 |
| 558 | 3, 4, 8, 9, 10, 13, 14, 15, // 784, 8 |
| 559 | 11, 13, 14, 15, // 792, 4 |
| 560 | 0, 1, 2, 4, 11, 13, 14, 15, // 796, 8 |
| 561 | 0, 1, 2, 4, 5, 10, 11, 13, 14, 15, // 804, 10 |
| 562 | 7, 10, 11, 13, 14, 15, // 814, 6 |
| 563 | 3, 6, 7, 10, 11, 13, 14, 15, // 820, 8 |
| 564 | 1, 5, 9, 10, 11, 13, 14, 15, // 828, 8 |
| 565 | 1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15, // 836, 12 |
| 566 | 12, 13, 14, 15, // 848, 4 |
| 567 | 0, 1, 2, 3, 12, 13, 14, 15, // 852, 8 |
| 568 | 0, 1, 4, 5, 12, 13, 14, 15, // 860, 8 |
| 569 | 4, 5, 6, 7, 12, 13, 14, 15, // 868, 8 |
| 570 | 4, 8, 9, 10, 12, 13, 14, 15, // 876, 8 |
| 571 | 0, 4, 5, 8, 9, 10, 12, 13, 14, 15, // 884, 10 |
| 572 | 0, 1, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, // 894, 12 |
| 573 | 0, 1, 2, 3, 4, 7, 8, 11, 12, 13, 14, 15, // 906, 12 |
| 574 | 0, 1, 3, 4, 8, 9, 11, 12, 13, 14, 15, // 918, 11 |
| 575 | 0, 2, 3, 7, 8, 10, 11, 12, 13, 14, 15, // 929, 11 |
| 576 | 7, 9, 10, 11, 12, 13, 14, 15, // 940, 8 |
| 577 | 3, 6, 7, 9, 10, 11, 12, 13, 14, 15, // 948, 10 |
| 578 | 2, 3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, // 958, 12 |
| 579 | 8, 9, 10, 11, 12, 13, 14, 15, // 970, 8 |
| 580 | 0, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, // 978, 12 |
| 581 | 0, 1, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, // 990, 13 |
| 582 | 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 1003, 12 |
| 583 | 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 1015, 13 |
| 584 | 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 1028, 12 |
| 585 | 0, 2, // 1040, 2 |
| 586 | 1, 3, // 1042, 2 |
| 587 | 0, 1, 4, 5, // 1044, 4 |
| 588 | 0, 1, 2, 4, 5, // 1048, 5 |
| 589 | 2, 3, 6, // 1053, 3 |
| 590 | 0, 2, 4, 6, // 1056, 4 |
| 591 | 1, 2, 5, 6, // 1060, 4 |
| 592 | 0, 1, 2, 3, 5, 6, // 1064, 6 |
| 593 | 0, 1, 2, 4, 5, 6, // 1070, 6 |
| 594 | 0, 1, 2, 3, 4, 5, 6, // 1076, 7 |
| 595 | 0, 3, 4, 7, // 1083, 4 |
| 596 | 0, 1, 2, 3, 4, 7, // 1087, 6 |
| 597 | 1, 3, 5, 7, // 1093, 4 |
| 598 | 2, 3, 6, 7, // 1097, 4 |
| 599 | 1, 2, 3, 6, 7, // 1101, 5 |
| 600 | 1, 2, 3, 5, 6, 7, // 1106, 6 |
| 601 | 0, 1, 2, 3, 5, 6, 7, // 1112, 7 |
| 602 | 4, 5, 6, 7, // 1119, 4 |
| 603 | 0, 8, // 1123, 2 |
| 604 | 0, 1, 4, 5, 8, // 1125, 5 |
| 605 | 0, 1, 8, 9, // 1130, 4 |
| 606 | 4, 5, 8, 9, // 1134, 4 |
| 607 | 0, 1, 4, 5, 8, 9, // 1138, 6 |
| 608 | 2, 6, 8, 9, // 1144, 4 |
| 609 | 6, 7, 8, 9, // 1148, 4 |
| 610 | 0, 2, 4, 6, 8, 10, // 1152, 6 |
| 611 | 1, 2, 5, 6, 9, 10, // 1158, 6 |
| 612 | 0, 3, 4, 7, 9, 10, // 1164, 6 |
| 613 | 0, 1, 2, 8, 9, 10, // 1170, 6 |
| 614 | 4, 5, 6, 8, 9, 10, // 1176, 6 |
| 615 | 3, 11, // 1182, 2 |
| 616 | 2, 3, 6, 7, 11, // 1184, 5 |
| 617 | 0, 3, 8, 11, // 1189, 4 |
| 618 | 0, 3, 4, 7, 8, 11, // 1193, 6 |
| 619 | 1, 3, 5, 7, 9, 11, // 1199, 6 |
| 620 | 2, 3, 10, 11, // 1205, 4 |
| 621 | 1, 5, 10, 11, // 1209, 4 |
| 622 | 4, 5, 10, 11, // 1213, 4 |
| 623 | 6, 7, 10, 11, // 1217, 4 |
| 624 | 2, 3, 6, 7, 10, 11, // 1221, 6 |
| 625 | 1, 2, 3, 9, 10, 11, // 1227, 6 |
| 626 | 5, 6, 7, 9, 10, 11, // 1233, 6 |
| 627 | 8, 9, 10, 11, // 1239, 4 |
| 628 | 4, 12, // 1243, 2 |
| 629 | 0, 1, 2, 3, 4, 5, 8, 12, // 1245, 8 |
| 630 | 8, 9, 12, // 1253, 3 |
| 631 | 0, 4, 5, 8, 9, 12, // 1256, 6 |
| 632 | 0, 1, 4, 5, 8, 9, 12, // 1262, 7 |
| 633 | 2, 3, 5, 6, 8, 9, 12, // 1269, 7 |
| 634 | 1, 5, 9, 13, // 1276, 4 |
| 635 | 6, 7, 9, 13, // 1280, 4 |
| 636 | 1, 4, 7, 10, 13, // 1284, 5 |
| 637 | 1, 6, 8, 11, 13, // 1289, 5 |
| 638 | 0, 1, 12, 13, // 1294, 4 |
| 639 | 4, 5, 12, 13, // 1298, 4 |
| 640 | 0, 1, 6, 7, 12, 13, // 1302, 6 |
| 641 | 0, 1, 4, 8, 12, 13, // 1308, 6 |
| 642 | 8, 9, 12, 13, // 1314, 4 |
| 643 | 4, 8, 9, 12, 13, // 1318, 5 |
| 644 | 4, 5, 8, 9, 12, 13, // 1323, 6 |
| 645 | 0, 4, 5, 8, 9, 12, 13, // 1329, 7 |
| 646 | 0, 1, 6, 10, 12, 13, // 1336, 6 |
| 647 | 3, 6, 7, 9, 10, 12, 13, // 1342, 7 |
| 648 | 0, 1, 10, 11, 12, 13, // 1349, 6 |
| 649 | 2, 4, 7, 9, 14, // 1355, 5 |
| 650 | 4, 5, 10, 14, // 1360, 4 |
| 651 | 2, 6, 10, 14, // 1364, 4 |
| 652 | 2, 5, 8, 11, 14, // 1368, 5 |
| 653 | 0, 2, 12, 14, // 1373, 4 |
| 654 | 8, 10, 12, 14, // 1377, 4 |
| 655 | 4, 6, 8, 10, 12, 14, // 1381, 6 |
| 656 | 13, 14, // 1387, 2 |
| 657 | 9, 10, 13, 14, // 1389, 4 |
| 658 | 5, 6, 9, 10, 13, 14, // 1393, 6 |
| 659 | 0, 1, 2, 12, 13, 14, // 1399, 6 |
| 660 | 4, 5, 6, 12, 13, 14, // 1405, 6 |
| 661 | 8, 9, 12, 13, 14, // 1411, 5 |
| 662 | 8, 9, 10, 12, 13, 14, // 1416, 6 |
| 663 | 7, 15, // 1422, 2 |
| 664 | 0, 5, 10, 15, // 1424, 4 |
| 665 | 0, 1, 2, 3, 6, 7, 11, 15, // 1428, 8 |
| 666 | 10, 11, 15, // 1436, 3 |
| 667 | 0, 1, 5, 6, 10, 11, 15, // 1439, 7 |
| 668 | 3, 6, 7, 10, 11, 15, // 1446, 6 |
| 669 | 12, 15, // 1452, 2 |
| 670 | 0, 3, 12, 15, // 1454, 4 |
| 671 | 4, 7, 12, 15, // 1458, 4 |
| 672 | 0, 3, 6, 9, 12, 15, // 1462, 6 |
| 673 | 0, 3, 5, 10, 12, 15, // 1468, 6 |
| 674 | 8, 11, 12, 15, // 1474, 4 |
| 675 | 5, 6, 8, 11, 12, 15, // 1478, 6 |
| 676 | 4, 7, 8, 11, 12, 15, // 1484, 6 |
| 677 | 1, 3, 13, 15, // 1490, 4 |
| 678 | 9, 11, 13, 15, // 1494, 4 |
| 679 | 5, 7, 9, 11, 13, 15, // 1498, 6 |
| 680 | 2, 3, 14, 15, // 1504, 4 |
| 681 | 2, 3, 4, 5, 14, 15, // 1508, 6 |
| 682 | 6, 7, 14, 15, // 1514, 4 |
| 683 | 2, 3, 5, 9, 14, 15, // 1518, 6 |
| 684 | 2, 3, 8, 9, 14, 15, // 1524, 6 |
| 685 | 10, 14, 15, // 1530, 3 |
| 686 | 0, 4, 5, 9, 10, 14, 15, // 1533, 7 |
| 687 | 2, 3, 7, 11, 14, 15, // 1540, 6 |
| 688 | 10, 11, 14, 15, // 1546, 4 |
| 689 | 7, 10, 11, 14, 15, // 1550, 5 |
| 690 | 6, 7, 10, 11, 14, 15, // 1555, 6 |
| 691 | 1, 2, 3, 13, 14, 15, // 1561, 6 |
| 692 | 5, 6, 7, 13, 14, 15, // 1567, 6 |
| 693 | 10, 11, 13, 14, 15, // 1573, 5 |
| 694 | 9, 10, 11, 13, 14, 15, // 1578, 6 |
| 695 | 0, 4, 8, 9, 12, 13, 14, 15, // 1584, 8 |
| 696 | 9, 10, 12, 13, 14, 15, // 1592, 6 |
| 697 | 8, 11, 12, 13, 14, 15, // 1598, 6 |
| 698 | 3, 7, 10, 11, 12, 13, 14, 15, // 1604, 8 |
| 699 | }; |
| 700 | static const int g_shapeRanges[][2] = |
| 701 | { |
| 702 | { 0, 16 },{ 16, 4 },{ 20, 3 },{ 23, 4 },{ 27, 3 },{ 30, 4 },{ 34, 8 },{ 42, 4 },{ 46, 6 },{ 52, 8 },{ 60, 5 }, |
| 703 | { 65, 5 },{ 70, 4 },{ 74, 4 },{ 78, 6 },{ 84, 8 },{ 92, 8 },{ 100, 8 },{ 108, 8 },{ 116, 12 },{ 128, 4 },{ 132, 8 }, |
| 704 | { 140, 8 },{ 148, 10 },{ 158, 6 },{ 164, 8 },{ 172, 12 },{ 184, 8 },{ 192, 5 },{ 197, 3 },{ 200, 4 },{ 204, 6 },{ 210, 8 }, |
| 705 | { 218, 8 },{ 226, 8 },{ 234, 8 },{ 242, 8 },{ 250, 12 },{ 262, 13 },{ 275, 8 },{ 283, 8 },{ 291, 10 },{ 301, 8 },{ 309, 8 }, |
| 706 | { 317, 5 },{ 322, 8 },{ 330, 8 },{ 338, 8 },{ 346, 8 },{ 354, 8 },{ 362, 8 },{ 370, 8 },{ 378, 8 },{ 386, 8 },{ 394, 8 }, |
| 707 | { 402, 8 },{ 410, 8 },{ 418, 4 },{ 422, 8 },{ 430, 6 },{ 436, 8 },{ 444, 10 },{ 454, 8 },{ 462, 12 },{ 474, 8 },{ 482, 8 }, |
| 708 | { 490, 4 },{ 494, 8 },{ 502, 6 },{ 508, 8 },{ 516, 10 },{ 526, 8 },{ 534, 12 },{ 546, 8 },{ 554, 8 },{ 562, 8 },{ 570, 8 }, |
| 709 | { 578, 8 },{ 586, 8 },{ 594, 8 },{ 602, 8 },{ 610, 8 },{ 618, 8 },{ 626, 8 },{ 634, 8 },{ 642, 11 },{ 653, 8 },{ 661, 8 }, |
| 710 | { 669, 6 },{ 675, 8 },{ 683, 8 },{ 691, 3 },{ 694, 4 },{ 698, 8 },{ 706, 8 },{ 714, 8 },{ 722, 8 },{ 730, 8 },{ 738, 10 }, |
| 711 | { 748, 12 },{ 760, 13 },{ 773, 11 },{ 784, 8 },{ 792, 4 },{ 796, 8 },{ 804, 10 },{ 814, 6 },{ 820, 8 },{ 828, 8 },{ 836, 12 }, |
| 712 | { 848, 4 },{ 852, 8 },{ 860, 8 },{ 868, 8 },{ 876, 8 },{ 884, 10 },{ 894, 12 },{ 906, 12 },{ 918, 11 },{ 929, 11 },{ 940, 8 }, |
| 713 | { 948, 10 },{ 958, 12 },{ 970, 8 },{ 978, 12 },{ 990, 13 },{ 1003, 12 },{ 1015, 13 },{ 1028, 12 },{ 1040, 2 },{ 1042, 2 },{ 1044, 4 }, |
| 714 | { 1048, 5 },{ 1053, 3 },{ 1056, 4 },{ 1060, 4 },{ 1064, 6 },{ 1070, 6 },{ 1076, 7 },{ 1083, 4 },{ 1087, 6 },{ 1093, 4 },{ 1097, 4 }, |
| 715 | { 1101, 5 },{ 1106, 6 },{ 1112, 7 },{ 1119, 4 },{ 1123, 2 },{ 1125, 5 },{ 1130, 4 },{ 1134, 4 },{ 1138, 6 },{ 1144, 4 },{ 1148, 4 }, |
| 716 | { 1152, 6 },{ 1158, 6 },{ 1164, 6 },{ 1170, 6 },{ 1176, 6 },{ 1182, 2 },{ 1184, 5 },{ 1189, 4 },{ 1193, 6 },{ 1199, 6 },{ 1205, 4 }, |
| 717 | { 1209, 4 },{ 1213, 4 },{ 1217, 4 },{ 1221, 6 },{ 1227, 6 },{ 1233, 6 },{ 1239, 4 },{ 1243, 2 },{ 1245, 8 },{ 1253, 3 },{ 1256, 6 }, |
| 718 | { 1262, 7 },{ 1269, 7 },{ 1276, 4 },{ 1280, 4 },{ 1284, 5 },{ 1289, 5 },{ 1294, 4 },{ 1298, 4 },{ 1302, 6 },{ 1308, 6 },{ 1314, 4 }, |
| 719 | { 1318, 5 },{ 1323, 6 },{ 1329, 7 },{ 1336, 6 },{ 1342, 7 },{ 1349, 6 },{ 1355, 5 },{ 1360, 4 },{ 1364, 4 },{ 1368, 5 },{ 1373, 4 }, |
| 720 | { 1377, 4 },{ 1381, 6 },{ 1387, 2 },{ 1389, 4 },{ 1393, 6 },{ 1399, 6 },{ 1405, 6 },{ 1411, 5 },{ 1416, 6 },{ 1422, 2 },{ 1424, 4 }, |
| 721 | { 1428, 8 },{ 1436, 3 },{ 1439, 7 },{ 1446, 6 },{ 1452, 2 },{ 1454, 4 },{ 1458, 4 },{ 1462, 6 },{ 1468, 6 },{ 1474, 4 },{ 1478, 6 }, |
| 722 | { 1484, 6 },{ 1490, 4 },{ 1494, 4 },{ 1498, 6 },{ 1504, 4 },{ 1508, 6 },{ 1514, 4 },{ 1518, 6 },{ 1524, 6 },{ 1530, 3 },{ 1533, 7 }, |
| 723 | { 1540, 6 },{ 1546, 4 },{ 1550, 5 },{ 1555, 6 },{ 1561, 6 },{ 1567, 6 },{ 1573, 5 },{ 1578, 6 },{ 1584, 8 },{ 1592, 6 },{ 1598, 6 }, |
| 724 | { 1604, 8 }, |
| 725 | }; |
| 726 | static const int g_shapes1[][2] = |
| 727 | { |
| 728 | { 0, 16 } |
| 729 | }; |
| 730 | static const int g_shapes2[64][2] = |
| 731 | { |
| 732 | { 33, 96 },{ 63, 66 },{ 20, 109 },{ 22, 107 },{ 37, 92 },{ 7, 122 },{ 8, 121 },{ 23, 106 }, |
| 733 | { 38, 91 },{ 2, 127 },{ 9, 120 },{ 26, 103 },{ 3, 126 },{ 6, 123 },{ 1, 128 },{ 19, 110 }, |
| 734 | { 15, 114 },{ 124, 5 },{ 72, 57 },{ 115, 14 },{ 125, 4 },{ 70, 59 },{ 100, 29 },{ 60, 69 }, |
| 735 | { 116, 13 },{ 99, 30 },{ 78, 51 },{ 94, 35 },{ 104, 25 },{ 111, 18 },{ 71, 58 },{ 90, 39 }, |
| 736 | { 45, 84 },{ 16, 113 },{ 82, 47 },{ 95, 34 },{ 87, 42 },{ 83, 46 },{ 53, 76 },{ 48, 81 }, |
| 737 | { 68, 61 },{ 105, 24 },{ 98, 31 },{ 88, 41 },{ 75, 54 },{ 43, 86 },{ 52, 77 },{ 117, 12 }, |
| 738 | { 119, 10 },{ 118, 11 },{ 85, 44 },{ 101, 28 },{ 36, 93 },{ 55, 74 },{ 89, 40 },{ 79, 50 }, |
| 739 | { 56, 73 },{ 49, 80 },{ 64, 65 },{ 27, 102 },{ 32, 97 },{ 112, 17 },{ 67, 62 },{ 21, 108 }, |
| 740 | }; |
| 741 | static const int g_shapes3[64][3] = |
| 742 | { |
| 743 | { 148, 160, 240 },{ 132, 212, 205 },{ 136, 233, 187 },{ 175, 237, 143 },{ 6, 186, 232 },{ 33, 142, 232 },{ 131, 123, 142 },{ 131, 96, 186 }, |
| 744 | { 6, 171, 110 },{ 1, 18, 110 },{ 1, 146, 123 },{ 33, 195, 66 },{ 20, 51, 66 },{ 20, 178, 96 },{ 2, 177, 106 },{ 211, 4, 59 }, |
| 745 | { 8, 191, 91 },{ 230, 14, 29 },{ 1, 188, 234 },{ 151, 110, 168 },{ 20, 144, 238 },{ 137, 66, 206 },{ 173, 179, 232 },{ 209, 194, 186 }, |
| 746 | { 239, 165, 142 },{ 131, 152, 242 },{ 214, 54, 12 },{ 140, 219, 201 },{ 190, 150, 231 },{ 156, 135, 241 },{ 185, 227, 167 },{ 145, 210, 59 }, |
| 747 | { 138, 174, 106 },{ 189, 229, 14 },{ 176, 133, 106 },{ 78, 178, 195 },{ 111, 146, 171 },{ 216, 180, 196 },{ 217, 181, 193 },{ 184, 228, 166 }, |
| 748 | { 192, 225, 153 },{ 134, 141, 123 },{ 6, 222, 198 },{ 149, 183, 96 },{ 33, 226, 164 },{ 161, 215, 51 },{ 197, 221, 18 },{ 1, 223, 199 }, |
| 749 | { 154, 163, 110 },{ 20, 236, 169 },{ 157, 204, 66 },{ 1, 202, 220 },{ 20, 170, 235 },{ 203, 158, 66 },{ 162, 155, 110 },{ 6, 201, 218 }, |
| 750 | { 139, 135, 123 },{ 33, 167, 224 },{ 182, 150, 96 },{ 19, 200, 213 },{ 63, 207, 159 },{ 147, 172, 109 },{ 129, 130, 128 },{ 208, 14, 59 }, |
| 751 | }; |
| 752 | |
| 753 | static const int g_shapeList1[] = |
| 754 | { |
| 755 | 0, |
| 756 | }; |
| 757 | |
| 758 | static const int g_shapeList2[] = |
| 759 | { |
| 760 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, |
| 761 | 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, |
| 762 | 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, |
| 763 | 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, |
| 764 | 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, |
| 765 | 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, |
| 766 | 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, |
| 767 | 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, |
| 768 | 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, |
| 769 | 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, |
| 770 | 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, |
| 771 | 122, 123, 124, 125, 126, 127, 128, |
| 772 | }; |
| 773 | |
| 774 | static const int g_shapeList12[] = |
| 775 | { |
| 776 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, |
| 777 | 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, |
| 778 | 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
| 779 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, |
| 780 | 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, |
| 781 | 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, |
| 782 | 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, |
| 783 | 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, |
| 784 | 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, |
| 785 | 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, |
| 786 | 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, |
| 787 | 121, 122, 123, 124, 125, 126, 127, 128, |
| 788 | }; |
| 789 | |
| 790 | static const int g_shapeList3[] = |
| 791 | { |
| 792 | 1, 2, 4, 6, 8, 12, 14, 18, 19, 20, 29, |
| 793 | 33, 51, 54, 59, 63, 66, 78, 91, 96, 106, 109, |
| 794 | 110, 111, 123, 128, 129, 130, 131, 132, 133, 134, 135, |
| 795 | 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, |
| 796 | 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, |
| 797 | 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, |
| 798 | 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, |
| 799 | 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, |
| 800 | 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, |
| 801 | 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, |
| 802 | 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, |
| 803 | 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, |
| 804 | 235, 236, 237, 238, 239, 240, 241, 242, |
| 805 | }; |
| 806 | |
| 807 | static const int g_shapeList3Short[] = |
| 808 | { |
| 809 | 1, 2, 4, 6, 18, 20, 33, 51, 59, 66, 96, |
| 810 | 106, 110, 123, 131, 132, 136, 142, 143, 146, 148, 160, |
| 811 | 171, 175, 177, 178, 186, 187, 195, 205, 211, 212, 232, |
| 812 | 233, 237, 240, |
| 813 | }; |
| 814 | |
| 815 | static const int g_shapeListAll[] = |
| 816 | { |
| 817 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, |
| 818 | 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, |
| 819 | 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, |
| 820 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, |
| 821 | 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, |
| 822 | 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, |
| 823 | 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, |
| 824 | 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, |
| 825 | 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, |
| 826 | 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, |
| 827 | 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, |
| 828 | 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, |
| 829 | 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, |
| 830 | 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, |
| 831 | 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, |
| 832 | 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, |
| 833 | 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, |
| 834 | 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, |
| 835 | 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, |
| 836 | 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, |
| 837 | 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, |
| 838 | 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, |
| 839 | 242, |
| 840 | }; |
| 841 | |
| 842 | static const int g_numShapes1 = sizeof(g_shapeList1) / sizeof(g_shapeList1[0]); |
| 843 | static const int g_numShapes2 = sizeof(g_shapeList2) / sizeof(g_shapeList2[0]); |
| 844 | static const int g_numShapes12 = sizeof(g_shapeList12) / sizeof(g_shapeList12[0]); |
| 845 | static const int g_numShapes3 = sizeof(g_shapeList3) / sizeof(g_shapeList3[0]); |
| 846 | static const int g_numShapes3Short = sizeof(g_shapeList3Short) / sizeof(g_shapeList3Short[0]); |
| 847 | static const int g_numShapesAll = sizeof(g_shapeListAll) / sizeof(g_shapeListAll[0]); |
| 848 | static const int g_numFragments = sizeof(g_fragments) / sizeof(g_fragments[0]); |
| 849 | } |
| 850 | |
| 851 | struct PackingVector |
| 852 | { |
| 853 | uint32_t m_vector[4]; |
| 854 | int m_offset; |
| 855 | |
| 856 | void Init() |
| 857 | { |
| 858 | for (int i = 0; i < 4; i++) |
| 859 | m_vector[i] = 0; |
| 860 | |
| 861 | m_offset = 0; |
| 862 | } |
| 863 | |
| 864 | void InitPacked(const uint32_t *v, int bits) |
| 865 | { |
| 866 | for (int b = 0; b < bits; b += 32) |
| 867 | m_vector[b / 32] = v[b / 32]; |
| 868 | |
| 869 | m_offset = bits; |
| 870 | } |
| 871 | |
| 872 | inline void Pack(ParallelMath::ScalarUInt16 value, int bits) |
| 873 | { |
| 874 | int vOffset = m_offset >> 5; |
| 875 | int bitOffset = m_offset & 0x1f; |
| 876 | |
| 877 | m_vector[vOffset] |= (static_cast<uint32_t>(value) << bitOffset) & static_cast<uint32_t>(0xffffffff); |
| 878 | |
| 879 | int overflowBits = bitOffset + bits - 32; |
| 880 | if (overflowBits > 0) |
| 881 | m_vector[vOffset + 1] |= (static_cast<uint32_t>(value) >> (bits - overflowBits)); |
| 882 | |
| 883 | m_offset += bits; |
| 884 | } |
| 885 | |
| 886 | inline void Flush(uint8_t* output) |
| 887 | { |
| 888 | assert(m_offset == 128); |
| 889 | |
| 890 | for (int v = 0; v < 4; v++) |
| 891 | { |
| 892 | uint32_t chunk = m_vector[v]; |
| 893 | for (int b = 0; b < 4; b++) |
| 894 | output[v * 4 + b] = static_cast<uint8_t>((chunk >> (b * 8)) & 0xff); |
| 895 | } |
| 896 | } |
| 897 | }; |
| 898 | |
| 899 | |
| 900 | struct UnpackingVector |
| 901 | { |
| 902 | uint32_t m_vector[4]; |
| 903 | |
| 904 | void Init(const uint8_t *bytes) |
| 905 | { |
| 906 | for (int i = 0; i < 4; i++) |
| 907 | m_vector[i] = 0; |
| 908 | |
| 909 | for (int b = 0; b < 16; b++) |
| 910 | m_vector[b / 4] |= (bytes[b] << ((b % 4) * 8)); |
| 911 | } |
| 912 | |
| 913 | inline void UnpackStart(uint32_t *v, int bits) |
| 914 | { |
| 915 | for (int b = 0; b < bits; b += 32) |
| 916 | v[b / 32] = m_vector[b / 32]; |
| 917 | |
| 918 | int entriesShifted = bits / 32; |
| 919 | int carry = bits % 32; |
| 920 | |
| 921 | for (int i = entriesShifted; i < 4; i++) |
| 922 | m_vector[i - entriesShifted] = m_vector[i]; |
| 923 | |
| 924 | int entriesRemaining = 4 - entriesShifted; |
| 925 | if (carry) |
| 926 | { |
| 927 | uint32_t bitMask = (1 << carry) - 1; |
| 928 | for (int i = 0; i < entriesRemaining; i++) |
| 929 | { |
| 930 | m_vector[i] >>= carry; |
| 931 | if (i != entriesRemaining - 1) |
| 932 | m_vector[i] |= (m_vector[i + 1] & bitMask) << (32 - carry); |
| 933 | } |
| 934 | } |
| 935 | } |
| 936 | |
| 937 | inline ParallelMath::ScalarUInt16 Unpack(int bits) |
| 938 | { |
| 939 | uint32_t bitMask = (1 << bits) - 1; |
| 940 | |
| 941 | ParallelMath::ScalarUInt16 result = static_cast<ParallelMath::ScalarUInt16>(m_vector[0] & bitMask); |
| 942 | |
| 943 | for (int i = 0; i < 4; i++) |
| 944 | { |
| 945 | m_vector[i] >>= bits; |
| 946 | if (i != 3) |
| 947 | m_vector[i] |= (m_vector[i + 1] & bitMask) << (32 - bits); |
| 948 | } |
| 949 | |
| 950 | return result; |
| 951 | } |
| 952 | }; |
| 953 | |
| 954 | ParallelMath::Float ScaleHDRValue(const ParallelMath::Float &v, bool isSigned) |
| 955 | { |
| 956 | if (isSigned) |
| 957 | { |
| 958 | ParallelMath::Float offset = ParallelMath::Select(ParallelMath::Less(v, ParallelMath::MakeFloatZero()), ParallelMath::MakeFloat(-30.0f), ParallelMath::MakeFloat(30.0f)); |
| 959 | return (v * 32.0f + offset) / 31.0f; |
| 960 | } |
| 961 | else |
| 962 | return (v * 64.0f + 30.0f) / 31.0f; |
| 963 | } |
| 964 | |
| 965 | ParallelMath::SInt16 UnscaleHDRValueSigned(const ParallelMath::SInt16 &v) |
| 966 | { |
| 967 | #ifdef CVTT_ENABLE_ASSERTS |
| 968 | for (int i = 0; i < ParallelMath::ParallelSize; i++) |
| 969 | assert(ParallelMath::Extract(v, i) != -32768) |
| 970 | #endif |
| 971 | |
| 972 | ParallelMath::Int16CompFlag negative = ParallelMath::Less(v, ParallelMath::MakeSInt16(0)); |
| 973 | ParallelMath::UInt15 absComp = ParallelMath::LosslessCast<ParallelMath::UInt15>::Cast(ParallelMath::Select(negative, ParallelMath::SInt16(ParallelMath::MakeSInt16(0) - v), v)); |
| 974 | |
| 975 | ParallelMath::UInt31 multiplied = ParallelMath::XMultiply(absComp, ParallelMath::MakeUInt15(31)); |
| 976 | ParallelMath::UInt31 shifted = ParallelMath::RightShift(multiplied, 5); |
| 977 | ParallelMath::UInt15 absCompScaled = ParallelMath::ToUInt15(shifted); |
| 978 | ParallelMath::SInt16 signBits = ParallelMath::SelectOrZero(negative, ParallelMath::MakeSInt16(-32768)); |
| 979 | |
| 980 | return ParallelMath::LosslessCast<ParallelMath::SInt16>::Cast(absCompScaled) | signBits; |
| 981 | } |
| 982 | |
| 983 | ParallelMath::UInt15 UnscaleHDRValueUnsigned(const ParallelMath::UInt16 &v) |
| 984 | { |
| 985 | return ParallelMath::ToUInt15(ParallelMath::RightShift(ParallelMath::XMultiply(v, ParallelMath::MakeUInt15(31)), 6)); |
| 986 | } |
| 987 | |
| 988 | void UnscaleHDREndpoints(const ParallelMath::AInt16 inEP[2][3], ParallelMath::AInt16 outEP[2][3], bool isSigned) |
| 989 | { |
| 990 | for (int epi = 0; epi < 2; epi++) |
| 991 | { |
| 992 | for (int ch = 0; ch < 3; ch++) |
| 993 | { |
| 994 | if (isSigned) |
| 995 | outEP[epi][ch] = ParallelMath::LosslessCast<ParallelMath::AInt16>::Cast(UnscaleHDRValueSigned(ParallelMath::LosslessCast<ParallelMath::SInt16>::Cast(inEP[epi][ch]))); |
| 996 | else |
| 997 | outEP[epi][ch] = ParallelMath::LosslessCast<ParallelMath::AInt16>::Cast(UnscaleHDRValueUnsigned(ParallelMath::LosslessCast<ParallelMath::UInt16>::Cast(inEP[epi][ch]))); |
| 998 | } |
| 999 | } |
| 1000 | } |
| 1001 | |
| 1002 | struct SinglePlaneTemporaries |
| 1003 | { |
| 1004 | UnfinishedEndpoints<3> unfinishedRGB[BC7Data::g_numShapesAll]; |
| 1005 | UnfinishedEndpoints<4> unfinishedRGBA[BC7Data::g_numShapes12]; |
| 1006 | |
| 1007 | ParallelMath::UInt15 fragmentBestIndexes[BC7Data::g_numFragments]; |
| 1008 | ParallelMath::UInt15 shapeBestEP[BC7Data::g_numShapesAll][2][4]; |
| 1009 | ParallelMath::Float shapeBestError[BC7Data::g_numShapesAll]; |
| 1010 | }; |
| 1011 | } |
| 1012 | } |
| 1013 | |
| 1014 | void cvtt::Internal::BC7Computer::TweakAlpha(const MUInt15 original[2], int tweak, int range, MUInt15 result[2]) |
| 1015 | { |
| 1016 | ParallelMath::RoundTowardNearestForScope roundingMode; |
| 1017 | |
| 1018 | float tf[2]; |
| 1019 | Util::ComputeTweakFactors(tweak, range, tf); |
| 1020 | |
| 1021 | MFloat base = ParallelMath::ToFloat(original[0]); |
| 1022 | MFloat offs = ParallelMath::ToFloat(original[1]) - base; |
| 1023 | |
| 1024 | result[0] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(base + offs * tf[0], 0.0f, 255.0f), &roundingMode); |
| 1025 | result[1] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(base + offs * tf[1], 0.0f, 255.0f), &roundingMode); |
| 1026 | } |
| 1027 | |
| 1028 | void cvtt::Internal::BC7Computer::Quantize(MUInt15* color, int bits, int channels) |
| 1029 | { |
| 1030 | for (int ch = 0; ch < channels; ch++) |
| 1031 | color[ch] = ParallelMath::RightShift(((color[ch] << bits) - color[ch]) + ParallelMath::MakeUInt15(127 + (1 << (7 - bits))), 8); |
| 1032 | } |
| 1033 | |
| 1034 | void cvtt::Internal::BC7Computer::QuantizeP(MUInt15* color, int bits, uint16_t p, int channels) |
| 1035 | { |
| 1036 | int16_t addend; |
| 1037 | if (p) |
| 1038 | addend = ((1 << (8 - bits)) - 1); |
| 1039 | else |
| 1040 | addend = 255; |
| 1041 | |
| 1042 | for (int ch = 0; ch < channels; ch++) |
| 1043 | { |
| 1044 | MUInt16 ch16 = ParallelMath::LosslessCast<MUInt16>::Cast(color[ch]); |
| 1045 | ch16 = ParallelMath::RightShift((ch16 << (bits + 1)) - ch16 + addend, 9); |
| 1046 | ch16 = (ch16 << 1) | ParallelMath::MakeUInt16(p); |
| 1047 | color[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ch16); |
| 1048 | } |
| 1049 | } |
| 1050 | |
| 1051 | void cvtt::Internal::BC7Computer::Unquantize(MUInt15* color, int bits, int channels) |
| 1052 | { |
| 1053 | for (int ch = 0; ch < channels; ch++) |
| 1054 | { |
| 1055 | MUInt15 clr = color[ch]; |
| 1056 | clr = clr << (8 - bits); |
| 1057 | color[ch] = clr | ParallelMath::RightShift(clr, bits); |
| 1058 | } |
| 1059 | } |
| 1060 | |
| 1061 | void cvtt::Internal::BC7Computer::CompressEndpoints0(MUInt15 ep[2][4], uint16_t p[2]) |
| 1062 | { |
| 1063 | for (int j = 0; j < 2; j++) |
| 1064 | { |
| 1065 | QuantizeP(ep[j], 4, p[j], 3); |
| 1066 | Unquantize(ep[j], 5, 3); |
| 1067 | ep[j][3] = ParallelMath::MakeUInt15(255); |
| 1068 | } |
| 1069 | } |
| 1070 | |
| 1071 | void cvtt::Internal::BC7Computer::CompressEndpoints1(MUInt15 ep[2][4], uint16_t p) |
| 1072 | { |
| 1073 | for (int j = 0; j < 2; j++) |
| 1074 | { |
| 1075 | QuantizeP(ep[j], 6, p, 3); |
| 1076 | Unquantize(ep[j], 7, 3); |
| 1077 | ep[j][3] = ParallelMath::MakeUInt15(255); |
| 1078 | } |
| 1079 | } |
| 1080 | |
| 1081 | void cvtt::Internal::BC7Computer::CompressEndpoints2(MUInt15 ep[2][4]) |
| 1082 | { |
| 1083 | for (int j = 0; j < 2; j++) |
| 1084 | { |
| 1085 | Quantize(ep[j], 5, 3); |
| 1086 | Unquantize(ep[j], 5, 3); |
| 1087 | ep[j][3] = ParallelMath::MakeUInt15(255); |
| 1088 | } |
| 1089 | } |
| 1090 | |
| 1091 | void cvtt::Internal::BC7Computer::CompressEndpoints3(MUInt15 ep[2][4], uint16_t p[2]) |
| 1092 | { |
| 1093 | for (int j = 0; j < 2; j++) |
| 1094 | { |
| 1095 | QuantizeP(ep[j], 7, p[j], 3); |
| 1096 | ep[j][3] = ParallelMath::MakeUInt15(255); |
| 1097 | } |
| 1098 | } |
| 1099 | |
| 1100 | void cvtt::Internal::BC7Computer::CompressEndpoints4(MUInt15 epRGB[2][3], MUInt15 epA[2]) |
| 1101 | { |
| 1102 | for (int j = 0; j < 2; j++) |
| 1103 | { |
| 1104 | Quantize(epRGB[j], 5, 3); |
| 1105 | Unquantize(epRGB[j], 5, 3); |
| 1106 | |
| 1107 | Quantize(epA + j, 6, 1); |
| 1108 | Unquantize(epA + j, 6, 1); |
| 1109 | } |
| 1110 | } |
| 1111 | |
| 1112 | void cvtt::Internal::BC7Computer::CompressEndpoints5(MUInt15 epRGB[2][3], MUInt15 epA[2]) |
| 1113 | { |
| 1114 | for (int j = 0; j < 2; j++) |
| 1115 | { |
| 1116 | Quantize(epRGB[j], 7, 3); |
| 1117 | Unquantize(epRGB[j], 7, 3); |
| 1118 | } |
| 1119 | |
| 1120 | // Alpha is full precision |
| 1121 | (void)epA; |
| 1122 | } |
| 1123 | |
| 1124 | void cvtt::Internal::BC7Computer::CompressEndpoints6(MUInt15 ep[2][4], uint16_t p[2]) |
| 1125 | { |
| 1126 | for (int j = 0; j < 2; j++) |
| 1127 | QuantizeP(ep[j], 7, p[j], 4); |
| 1128 | } |
| 1129 | |
| 1130 | void cvtt::Internal::BC7Computer::CompressEndpoints7(MUInt15 ep[2][4], uint16_t p[2]) |
| 1131 | { |
| 1132 | for (int j = 0; j < 2; j++) |
| 1133 | { |
| 1134 | QuantizeP(ep[j], 5, p[j], 4); |
| 1135 | Unquantize(ep[j], 6, 4); |
| 1136 | } |
| 1137 | } |
| 1138 | |
| 1139 | void cvtt::Internal::BC7Computer::TrySingleColorRGBAMultiTable(uint32_t flags, const MUInt15 pixels[16][4], const MFloat average[4], int numRealChannels, const uint8_t *fragmentStart, int shapeLength, const MFloat &staticAlphaError, const ParallelMath::Int16CompFlag punchThroughInvalid[4], MFloat& shapeBestError, MUInt15 shapeBestEP[2][4], MUInt15 *fragmentBestIndexes, const float *channelWeightsSq, const cvtt::Tables::BC7SC::Table*const* tables, int numTables, const ParallelMath::RoundTowardNearestForScope *rtn) |
| 1140 | { |
| 1141 | MFloat bestAverageError = ParallelMath::MakeFloat(FLT_MAX); |
| 1142 | |
| 1143 | MUInt15 intAverage[4]; |
| 1144 | for (int ch = 0; ch < 4; ch++) |
| 1145 | intAverage[ch] = ParallelMath::RoundAndConvertToU15(average[ch], rtn); |
| 1146 | |
| 1147 | MUInt15 eps[2][4]; |
| 1148 | MUInt15 reconstructed[4]; |
| 1149 | MUInt15 index = ParallelMath::MakeUInt15(0); |
| 1150 | |
| 1151 | for (int epi = 0; epi < 2; epi++) |
| 1152 | { |
| 1153 | for (int ch = 0; ch < 3; ch++) |
| 1154 | eps[epi][ch] = ParallelMath::MakeUInt15(0); |
| 1155 | eps[epi][3] = ParallelMath::MakeUInt15(255); |
| 1156 | } |
| 1157 | |
| 1158 | for (int ch = 0; ch < 3; ch++) |
| 1159 | reconstructed[ch] = ParallelMath::MakeUInt15(0); |
| 1160 | reconstructed[3] = ParallelMath::MakeUInt15(255); |
| 1161 | |
| 1162 | // Depending on the target index and parity bits, there are multiple valid solid colors. |
| 1163 | // We want to find the one closest to the actual average. |
| 1164 | MFloat epsAverageDiff = ParallelMath::MakeFloat(FLT_MAX); |
| 1165 | for (int t = 0; t < numTables; t++) |
| 1166 | { |
| 1167 | const cvtt::Tables::BC7SC::Table& table = *(tables[t]); |
| 1168 | |
| 1169 | ParallelMath::Int16CompFlag pti = punchThroughInvalid[table.m_pBits]; |
| 1170 | |
| 1171 | MUInt15 candidateReconstructed[4]; |
| 1172 | MUInt15 candidateEPs[2][4]; |
| 1173 | |
| 1174 | for (int i = 0; i < ParallelMath::ParallelSize; i++) |
| 1175 | { |
| 1176 | for (int ch = 0; ch < numRealChannels; ch++) |
| 1177 | { |
| 1178 | ParallelMath::ScalarUInt16 avgValue = ParallelMath::Extract(intAverage[ch], i); |
| 1179 | assert(avgValue >= 0 && avgValue <= 255); |
| 1180 | |
| 1181 | const cvtt::Tables::BC7SC::TableEntry &entry = table.m_entries[avgValue]; |
| 1182 | |
| 1183 | ParallelMath::PutUInt15(candidateEPs[0][ch], i, entry.m_min); |
| 1184 | ParallelMath::PutUInt15(candidateEPs[1][ch], i, entry.m_max); |
| 1185 | ParallelMath::PutUInt15(candidateReconstructed[ch], i, entry.m_actualColor); |
| 1186 | } |
| 1187 | } |
| 1188 | |
| 1189 | MFloat avgError = ParallelMath::MakeFloatZero(); |
| 1190 | for (int ch = 0; ch < numRealChannels; ch++) |
| 1191 | { |
| 1192 | MFloat delta = ParallelMath::ToFloat(candidateReconstructed[ch]) - average[ch]; |
| 1193 | avgError = avgError + delta * delta * channelWeightsSq[ch]; |
| 1194 | } |
| 1195 | |
| 1196 | ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(avgError, bestAverageError)); |
| 1197 | better = ParallelMath::AndNot(pti, better); // Mask out punch-through invalidations |
| 1198 | |
| 1199 | if (ParallelMath::AnySet(better)) |
| 1200 | { |
| 1201 | ParallelMath::ConditionalSet(bestAverageError, ParallelMath::Int16FlagToFloat(better), avgError); |
| 1202 | |
| 1203 | MUInt15 candidateIndex = ParallelMath::MakeUInt15(table.m_index); |
| 1204 | |
| 1205 | ParallelMath::ConditionalSet(index, better, candidateIndex); |
| 1206 | |
| 1207 | for (int ch = 0; ch < numRealChannels; ch++) |
| 1208 | ParallelMath::ConditionalSet(reconstructed[ch], better, candidateReconstructed[ch]); |
| 1209 | |
| 1210 | for (int epi = 0; epi < 2; epi++) |
| 1211 | for (int ch = 0; ch < numRealChannels; ch++) |
| 1212 | ParallelMath::ConditionalSet(eps[epi][ch], better, candidateEPs[epi][ch]); |
| 1213 | } |
| 1214 | } |
| 1215 | |
| 1216 | AggregatedError<4> aggError; |
| 1217 | for (int pxi = 0; pxi < shapeLength; pxi++) |
| 1218 | { |
| 1219 | int px = fragmentStart[pxi]; |
| 1220 | |
| 1221 | BCCommon::ComputeErrorLDR<4>(flags, reconstructed, pixels[px], numRealChannels, aggError); |
| 1222 | } |
| 1223 | |
| 1224 | MFloat error = aggError.Finalize(flags, channelWeightsSq) + staticAlphaError; |
| 1225 | |
| 1226 | ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(error, shapeBestError)); |
| 1227 | if (ParallelMath::AnySet(better)) |
| 1228 | { |
| 1229 | shapeBestError = ParallelMath::Min(shapeBestError, error); |
| 1230 | for (int epi = 0; epi < 2; epi++) |
| 1231 | { |
| 1232 | for (int ch = 0; ch < numRealChannels; ch++) |
| 1233 | ParallelMath::ConditionalSet(shapeBestEP[epi][ch], better, eps[epi][ch]); |
| 1234 | } |
| 1235 | |
| 1236 | for (int pxi = 0; pxi < shapeLength; pxi++) |
| 1237 | ParallelMath::ConditionalSet(fragmentBestIndexes[pxi], better, index); |
| 1238 | } |
| 1239 | } |
| 1240 | |
| 1241 | void cvtt::Internal::BC7Computer::TrySinglePlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn) |
| 1242 | { |
| 1243 | if (numRefineRounds < 1) |
| 1244 | numRefineRounds = 1; |
| 1245 | |
| 1246 | float channelWeightsSq[4]; |
| 1247 | |
| 1248 | for (int ch = 0; ch < 4; ch++) |
| 1249 | channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch]; |
| 1250 | |
| 1251 | SinglePlaneTemporaries temps; |
| 1252 | |
| 1253 | MUInt15 maxAlpha = ParallelMath::MakeUInt15(0); |
| 1254 | MUInt15 minAlpha = ParallelMath::MakeUInt15(255); |
| 1255 | ParallelMath::Int16CompFlag isPunchThrough = ParallelMath::MakeBoolInt16(true); |
| 1256 | for (int px = 0; px < 16; px++) |
| 1257 | { |
| 1258 | MUInt15 a = pixels[px][3]; |
| 1259 | maxAlpha = ParallelMath::Max(maxAlpha, a); |
| 1260 | minAlpha = ParallelMath::Min(minAlpha, a); |
| 1261 | |
| 1262 | isPunchThrough = (isPunchThrough & (ParallelMath::Equal(a, ParallelMath::MakeUInt15(0)) | ParallelMath::Equal(a, ParallelMath::MakeUInt15(255)))); |
| 1263 | } |
| 1264 | |
| 1265 | ParallelMath::Int16CompFlag blockHasNonMaxAlpha = ParallelMath::Less(minAlpha, ParallelMath::MakeUInt15(255)); |
| 1266 | ParallelMath::Int16CompFlag blockHasNonZeroAlpha = ParallelMath::Less(ParallelMath::MakeUInt15(0), maxAlpha); |
| 1267 | |
| 1268 | bool anyBlockHasAlpha = ParallelMath::AnySet(blockHasNonMaxAlpha); |
| 1269 | |
| 1270 | // Try RGB modes if any block has a min alpha 251 or higher |
| 1271 | bool allowRGBModes = ParallelMath::AnySet(ParallelMath::Less(ParallelMath::MakeUInt15(250), minAlpha)); |
| 1272 | |
| 1273 | // Try mode 7 if any block has alpha. |
| 1274 | // Mode 7 is almost never selected for RGB blocks because mode 4 has very accurate 7.7.7.1 endpoints |
| 1275 | // and its parity bit doesn't affect alpha, meaning mode 7 can only be better in extremely specific |
| 1276 | // situations, and only by at most 1 unit of error per pixel. |
| 1277 | bool allowMode7 = anyBlockHasAlpha || (encodingPlan.mode7RGBPartitionEnabled != 0); |
| 1278 | |
| 1279 | MFloat preWeightedPixels[16][4]; |
| 1280 | |
| 1281 | BCCommon::PreWeightPixelsLDR<4>(preWeightedPixels, pixels, channelWeights); |
| 1282 | |
| 1283 | // Get initial RGB endpoints |
| 1284 | if (allowRGBModes) |
| 1285 | { |
| 1286 | const uint8_t *shapeList = encodingPlan.rgbShapeList; |
| 1287 | int numShapesToEvaluate = encodingPlan.rgbNumShapesToEvaluate; |
| 1288 | |
| 1289 | for (int shapeIter = 0; shapeIter < numShapesToEvaluate; shapeIter++) |
| 1290 | { |
| 1291 | int shape = shapeList[shapeIter]; |
| 1292 | |
| 1293 | int shapeStart = BC7Data::g_shapeRanges[shape][0]; |
| 1294 | int shapeSize = BC7Data::g_shapeRanges[shape][1]; |
| 1295 | |
| 1296 | EndpointSelector<3, 8> epSelector; |
| 1297 | |
| 1298 | for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++) |
| 1299 | { |
| 1300 | for (int spx = 0; spx < shapeSize; spx++) |
| 1301 | { |
| 1302 | int px = BC7Data::g_fragments[shapeStart + spx]; |
| 1303 | epSelector.ContributePass(preWeightedPixels[px], epPass, ParallelMath::MakeFloat(1.0f)); |
| 1304 | } |
| 1305 | epSelector.FinishPass(epPass); |
| 1306 | } |
| 1307 | temps.unfinishedRGB[shape] = epSelector.GetEndpoints(channelWeights); |
| 1308 | } |
| 1309 | } |
| 1310 | |
| 1311 | // Get initial RGBA endpoints |
| 1312 | { |
| 1313 | const uint8_t *shapeList = encodingPlan.rgbaShapeList; |
| 1314 | int numShapesToEvaluate = encodingPlan.rgbaNumShapesToEvaluate; |
| 1315 | |
| 1316 | for (int shapeIter = 0; shapeIter < numShapesToEvaluate; shapeIter++) |
| 1317 | { |
| 1318 | int shape = shapeList[shapeIter]; |
| 1319 | |
| 1320 | if (anyBlockHasAlpha || !allowRGBModes) |
| 1321 | { |
| 1322 | int shapeStart = BC7Data::g_shapeRanges[shape][0]; |
| 1323 | int shapeSize = BC7Data::g_shapeRanges[shape][1]; |
| 1324 | |
| 1325 | EndpointSelector<4, 8> epSelector; |
| 1326 | |
| 1327 | for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++) |
| 1328 | { |
| 1329 | for (int spx = 0; spx < shapeSize; spx++) |
| 1330 | { |
| 1331 | int px = BC7Data::g_fragments[shapeStart + spx]; |
| 1332 | epSelector.ContributePass(preWeightedPixels[px], epPass, ParallelMath::MakeFloat(1.0f)); |
| 1333 | } |
| 1334 | epSelector.FinishPass(epPass); |
| 1335 | } |
| 1336 | temps.unfinishedRGBA[shape] = epSelector.GetEndpoints(channelWeights); |
| 1337 | } |
| 1338 | else |
| 1339 | { |
| 1340 | temps.unfinishedRGBA[shape] = temps.unfinishedRGB[shape].ExpandTo<4>(255); |
| 1341 | } |
| 1342 | } |
| 1343 | } |
| 1344 | |
| 1345 | for (uint16_t mode = 0; mode <= 7; mode++) |
| 1346 | { |
| 1347 | if (mode == 4 || mode == 5) |
| 1348 | continue; |
| 1349 | |
| 1350 | if (mode < 4 && !allowRGBModes) |
| 1351 | continue; |
| 1352 | |
| 1353 | if (mode == 7 && !allowMode7) |
| 1354 | continue; |
| 1355 | |
| 1356 | uint64_t partitionEnabledBits = 0; |
| 1357 | switch (mode) |
| 1358 | { |
| 1359 | case 0: |
| 1360 | partitionEnabledBits = encodingPlan.mode0PartitionEnabled; |
| 1361 | break; |
| 1362 | case 1: |
| 1363 | partitionEnabledBits = encodingPlan.mode1PartitionEnabled; |
| 1364 | break; |
| 1365 | case 2: |
| 1366 | partitionEnabledBits = encodingPlan.mode2PartitionEnabled; |
| 1367 | break; |
| 1368 | case 3: |
| 1369 | partitionEnabledBits = encodingPlan.mode3PartitionEnabled; |
| 1370 | break; |
| 1371 | case 6: |
| 1372 | partitionEnabledBits = encodingPlan.mode6Enabled ? 1 : 0; |
| 1373 | break; |
| 1374 | case 7: |
| 1375 | if (anyBlockHasAlpha) |
| 1376 | partitionEnabledBits = encodingPlan.mode7RGBAPartitionEnabled; |
| 1377 | else |
| 1378 | partitionEnabledBits = encodingPlan.mode7RGBPartitionEnabled; |
| 1379 | break; |
| 1380 | default: |
| 1381 | break; |
| 1382 | } |
| 1383 | |
| 1384 | bool isRGB = (mode < 4); |
| 1385 | |
| 1386 | unsigned int numPartitions = 1 << BC7Data::g_modes[mode].m_partitionBits; |
| 1387 | int numSubsets = BC7Data::g_modes[mode].m_numSubsets; |
| 1388 | int indexPrec = BC7Data::g_modes[mode].m_indexBits; |
| 1389 | |
| 1390 | int parityBitMax = 1; |
| 1391 | if (BC7Data::g_modes[mode].m_pBitMode == BC7Data::PBitMode_PerEndpoint) |
| 1392 | parityBitMax = 4; |
| 1393 | else if (BC7Data::g_modes[mode].m_pBitMode == BC7Data::PBitMode_PerSubset) |
| 1394 | parityBitMax = 2; |
| 1395 | |
| 1396 | int numRealChannels = isRGB ? 3 : 4; |
| 1397 | |
| 1398 | int numShapes; |
| 1399 | const int *shapeList; |
| 1400 | |
| 1401 | if (numSubsets == 1) |
| 1402 | { |
| 1403 | numShapes = BC7Data::g_numShapes1; |
| 1404 | shapeList = BC7Data::g_shapeList1; |
| 1405 | } |
| 1406 | else if (numSubsets == 2) |
| 1407 | { |
| 1408 | numShapes = BC7Data::g_numShapes2; |
| 1409 | shapeList = BC7Data::g_shapeList2; |
| 1410 | } |
| 1411 | else |
| 1412 | { |
| 1413 | assert(numSubsets == 3); |
| 1414 | if (numPartitions == 16) |
| 1415 | { |
| 1416 | numShapes = BC7Data::g_numShapes3Short; |
| 1417 | shapeList = BC7Data::g_shapeList3Short; |
| 1418 | } |
| 1419 | else |
| 1420 | { |
| 1421 | assert(numPartitions == 64); |
| 1422 | numShapes = BC7Data::g_numShapes3; |
| 1423 | shapeList = BC7Data::g_shapeList3; |
| 1424 | } |
| 1425 | } |
| 1426 | |
| 1427 | for (int slot = 0; slot < BC7Data::g_numShapesAll; slot++) |
| 1428 | temps.shapeBestError[slot] = ParallelMath::MakeFloat(FLT_MAX); |
| 1429 | |
| 1430 | for (int shapeIter = 0; shapeIter < numShapes; shapeIter++) |
| 1431 | { |
| 1432 | int shape = shapeList[shapeIter]; |
| 1433 | |
| 1434 | int numTweakRounds = 0; |
| 1435 | if (isRGB) |
| 1436 | numTweakRounds = encodingPlan.seedPointsForShapeRGB[shape]; |
| 1437 | else |
| 1438 | numTweakRounds = encodingPlan.seedPointsForShapeRGBA[shape]; |
| 1439 | |
| 1440 | if (numTweakRounds == 0) |
| 1441 | continue; |
| 1442 | |
| 1443 | if (numTweakRounds > MaxTweakRounds) |
| 1444 | numTweakRounds = MaxTweakRounds; |
| 1445 | |
| 1446 | int shapeStart = BC7Data::g_shapeRanges[shape][0]; |
| 1447 | int shapeLength = BC7Data::g_shapeRanges[shape][1]; |
| 1448 | |
| 1449 | AggregatedError<1> alphaAggError; |
| 1450 | if (isRGB && anyBlockHasAlpha) |
| 1451 | { |
| 1452 | MUInt15 filledAlpha[1] = { ParallelMath::MakeUInt15(255) }; |
| 1453 | |
| 1454 | for (int pxi = 0; pxi < shapeLength; pxi++) |
| 1455 | { |
| 1456 | int px = BC7Data::g_fragments[shapeStart + pxi]; |
| 1457 | MUInt15 original[1] = { pixels[px][3] }; |
| 1458 | BCCommon::ComputeErrorLDR<1>(flags, filledAlpha, original, alphaAggError); |
| 1459 | } |
| 1460 | } |
| 1461 | |
| 1462 | float alphaWeightsSq[1] = { channelWeightsSq[3] }; |
| 1463 | MFloat staticAlphaError = alphaAggError.Finalize(flags, alphaWeightsSq); |
| 1464 | |
| 1465 | MUInt15 tweakBaseEP[MaxTweakRounds][2][4]; |
| 1466 | |
| 1467 | for (int tweak = 0; tweak < numTweakRounds; tweak++) |
| 1468 | { |
| 1469 | if (isRGB) |
| 1470 | { |
| 1471 | temps.unfinishedRGB[shape].FinishLDR(tweak, 1 << indexPrec, tweakBaseEP[tweak][0], tweakBaseEP[tweak][1]); |
| 1472 | tweakBaseEP[tweak][0][3] = tweakBaseEP[tweak][1][3] = ParallelMath::MakeUInt15(255); |
| 1473 | } |
| 1474 | else |
| 1475 | { |
| 1476 | temps.unfinishedRGBA[shape].FinishLDR(tweak, 1 << indexPrec, tweakBaseEP[tweak][0], tweakBaseEP[tweak][1]); |
| 1477 | } |
| 1478 | } |
| 1479 | |
| 1480 | ParallelMath::Int16CompFlag punchThroughInvalid[4]; |
| 1481 | for (int pIter = 0; pIter < parityBitMax; pIter++) |
| 1482 | { |
| 1483 | punchThroughInvalid[pIter] = ParallelMath::MakeBoolInt16(false); |
| 1484 | |
| 1485 | if ((flags & Flags::BC7_RespectPunchThrough) && (mode == 6 || mode == 7)) |
| 1486 | { |
| 1487 | // Modes 6 and 7 have parity bits that affect alpha |
| 1488 | if (pIter == 0) |
| 1489 | punchThroughInvalid[pIter] = (isPunchThrough & blockHasNonZeroAlpha); |
| 1490 | else if (pIter == parityBitMax - 1) |
| 1491 | punchThroughInvalid[pIter] = (isPunchThrough & blockHasNonMaxAlpha); |
| 1492 | else |
| 1493 | punchThroughInvalid[pIter] = isPunchThrough; |
| 1494 | } |
| 1495 | } |
| 1496 | |
| 1497 | for (int pIter = 0; pIter < parityBitMax; pIter++) |
| 1498 | { |
| 1499 | if (ParallelMath::AllSet(punchThroughInvalid[pIter])) |
| 1500 | continue; |
| 1501 | |
| 1502 | bool needPunchThroughCheck = ParallelMath::AnySet(punchThroughInvalid[pIter]); |
| 1503 | |
| 1504 | for (int tweak = 0; tweak < numTweakRounds; tweak++) |
| 1505 | { |
| 1506 | uint16_t p[2]; |
| 1507 | p[0] = (pIter & 1); |
| 1508 | p[1] = ((pIter >> 1) & 1); |
| 1509 | |
| 1510 | MUInt15 ep[2][4]; |
| 1511 | |
| 1512 | for (int epi = 0; epi < 2; epi++) |
| 1513 | for (int ch = 0; ch < 4; ch++) |
| 1514 | ep[epi][ch] = tweakBaseEP[tweak][epi][ch]; |
| 1515 | |
| 1516 | for (int refine = 0; refine < numRefineRounds; refine++) |
| 1517 | { |
| 1518 | switch (mode) |
| 1519 | { |
| 1520 | case 0: |
| 1521 | CompressEndpoints0(ep, p); |
| 1522 | break; |
| 1523 | case 1: |
| 1524 | CompressEndpoints1(ep, p[0]); |
| 1525 | break; |
| 1526 | case 2: |
| 1527 | CompressEndpoints2(ep); |
| 1528 | break; |
| 1529 | case 3: |
| 1530 | CompressEndpoints3(ep, p); |
| 1531 | break; |
| 1532 | case 6: |
| 1533 | CompressEndpoints6(ep, p); |
| 1534 | break; |
| 1535 | case 7: |
| 1536 | CompressEndpoints7(ep, p); |
| 1537 | break; |
| 1538 | default: |
| 1539 | assert(false); |
| 1540 | break; |
| 1541 | }; |
| 1542 | |
| 1543 | MFloat shapeError = ParallelMath::MakeFloatZero(); |
| 1544 | |
| 1545 | IndexSelector<4> indexSelector; |
| 1546 | indexSelector.Init<false>(channelWeights, ep, 1 << indexPrec); |
| 1547 | |
| 1548 | EndpointRefiner<4> epRefiner; |
| 1549 | epRefiner.Init(1 << indexPrec, channelWeights); |
| 1550 | |
| 1551 | MUInt15 indexes[16]; |
| 1552 | |
| 1553 | AggregatedError<4> aggError; |
| 1554 | for (int pxi = 0; pxi < shapeLength; pxi++) |
| 1555 | { |
| 1556 | int px = BC7Data::g_fragments[shapeStart + pxi]; |
| 1557 | |
| 1558 | MUInt15 index; |
| 1559 | MUInt15 reconstructed[4]; |
| 1560 | |
| 1561 | index = indexSelector.SelectIndexLDR(floatPixels[px], rtn); |
| 1562 | indexSelector.ReconstructLDR_BC7(index, reconstructed, numRealChannels); |
| 1563 | |
| 1564 | if (flags & cvtt::Flags::BC7_FastIndexing) |
| 1565 | BCCommon::ComputeErrorLDR<4>(flags, reconstructed, pixels[px], numRealChannels, aggError); |
| 1566 | else |
| 1567 | { |
| 1568 | MFloat error = BCCommon::ComputeErrorLDRSimple<4>(flags, reconstructed, pixels[px], numRealChannels, channelWeightsSq); |
| 1569 | |
| 1570 | MUInt15 altIndexes[2]; |
| 1571 | altIndexes[0] = ParallelMath::Max(index, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1); |
| 1572 | altIndexes[1] = ParallelMath::Min(index + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << indexPrec) - 1))); |
| 1573 | |
| 1574 | for (int ii = 0; ii < 2; ii++) |
| 1575 | { |
| 1576 | indexSelector.ReconstructLDR_BC7(altIndexes[ii], reconstructed, numRealChannels); |
| 1577 | |
| 1578 | MFloat altError = BCCommon::ComputeErrorLDRSimple<4>(flags, reconstructed, pixels[px], numRealChannels, channelWeightsSq); |
| 1579 | ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altError, error)); |
| 1580 | error = ParallelMath::Min(error, altError); |
| 1581 | ParallelMath::ConditionalSet(index, better, altIndexes[ii]); |
| 1582 | } |
| 1583 | |
| 1584 | shapeError = shapeError + error; |
| 1585 | } |
| 1586 | |
| 1587 | if (refine != numRefineRounds - 1) |
| 1588 | epRefiner.ContributeUnweightedPW(preWeightedPixels[px], index, numRealChannels); |
| 1589 | |
| 1590 | indexes[pxi] = index; |
| 1591 | } |
| 1592 | |
| 1593 | if (flags & cvtt::Flags::BC7_FastIndexing) |
| 1594 | shapeError = aggError.Finalize(flags, channelWeightsSq); |
| 1595 | |
| 1596 | if (isRGB) |
| 1597 | shapeError = shapeError + staticAlphaError; |
| 1598 | |
| 1599 | ParallelMath::FloatCompFlag shapeErrorBetter; |
| 1600 | ParallelMath::Int16CompFlag shapeErrorBetter16; |
| 1601 | |
| 1602 | shapeErrorBetter = ParallelMath::Less(shapeError, temps.shapeBestError[shape]); |
| 1603 | shapeErrorBetter16 = ParallelMath::FloatFlagToInt16(shapeErrorBetter); |
| 1604 | |
| 1605 | if (ParallelMath::AnySet(shapeErrorBetter16)) |
| 1606 | { |
| 1607 | bool punchThroughOK = true; |
| 1608 | if (needPunchThroughCheck) |
| 1609 | { |
| 1610 | shapeErrorBetter16 = ParallelMath::AndNot(punchThroughInvalid[pIter], shapeErrorBetter16); |
| 1611 | shapeErrorBetter = ParallelMath::Int16FlagToFloat(shapeErrorBetter16); |
| 1612 | |
| 1613 | if (!ParallelMath::AnySet(shapeErrorBetter16)) |
| 1614 | punchThroughOK = false; |
| 1615 | } |
| 1616 | |
| 1617 | if (punchThroughOK) |
| 1618 | { |
| 1619 | ParallelMath::ConditionalSet(temps.shapeBestError[shape], shapeErrorBetter, shapeError); |
| 1620 | for (int epi = 0; epi < 2; epi++) |
| 1621 | for (int ch = 0; ch < numRealChannels; ch++) |
| 1622 | ParallelMath::ConditionalSet(temps.shapeBestEP[shape][epi][ch], shapeErrorBetter16, ep[epi][ch]); |
| 1623 | |
| 1624 | for (int pxi = 0; pxi < shapeLength; pxi++) |
| 1625 | ParallelMath::ConditionalSet(temps.fragmentBestIndexes[shapeStart + pxi], shapeErrorBetter16, indexes[pxi]); |
| 1626 | } |
| 1627 | } |
| 1628 | |
| 1629 | if (refine != numRefineRounds - 1) |
| 1630 | epRefiner.GetRefinedEndpointsLDR(ep, numRealChannels, rtn); |
| 1631 | } // refine |
| 1632 | } // tweak |
| 1633 | } // p |
| 1634 | |
| 1635 | if (flags & cvtt::Flags::BC7_TrySingleColor) |
| 1636 | { |
| 1637 | MUInt15 total[4]; |
| 1638 | for (int ch = 0; ch < 4; ch++) |
| 1639 | total[ch] = ParallelMath::MakeUInt15(0); |
| 1640 | |
| 1641 | for (int pxi = 0; pxi < shapeLength; pxi++) |
| 1642 | { |
| 1643 | int px = BC7Data::g_fragments[shapeStart + pxi]; |
| 1644 | for (int ch = 0; ch < 4; ch++) |
| 1645 | total[ch] = total[ch] + pixels[pxi][ch]; |
| 1646 | } |
| 1647 | |
| 1648 | MFloat rcpShapeLength = ParallelMath::MakeFloat(1.0f / static_cast<float>(shapeLength)); |
| 1649 | MFloat average[4]; |
| 1650 | for (int ch = 0; ch < 4; ch++) |
| 1651 | average[ch] = ParallelMath::ToFloat(total[ch]) * rcpShapeLength; |
| 1652 | |
| 1653 | const uint8_t *fragment = BC7Data::g_fragments + shapeStart; |
| 1654 | MFloat &shapeBestError = temps.shapeBestError[shape]; |
| 1655 | MUInt15 (&shapeBestEP)[2][4] = temps.shapeBestEP[shape]; |
| 1656 | MUInt15 *fragmentBestIndexes = temps.fragmentBestIndexes + shapeStart; |
| 1657 | |
| 1658 | const cvtt::Tables::BC7SC::Table **scTables = NULL; |
| 1659 | int numSCTables = 0; |
| 1660 | |
| 1661 | const cvtt::Tables::BC7SC::Table *tables0[] = |
| 1662 | { |
| 1663 | &cvtt::Tables::BC7SC::g_mode0_p00_i1, |
| 1664 | &cvtt::Tables::BC7SC::g_mode0_p00_i2, |
| 1665 | &cvtt::Tables::BC7SC::g_mode0_p00_i3, |
| 1666 | &cvtt::Tables::BC7SC::g_mode0_p01_i1, |
| 1667 | &cvtt::Tables::BC7SC::g_mode0_p01_i2, |
| 1668 | &cvtt::Tables::BC7SC::g_mode0_p01_i3, |
| 1669 | &cvtt::Tables::BC7SC::g_mode0_p10_i1, |
| 1670 | &cvtt::Tables::BC7SC::g_mode0_p10_i2, |
| 1671 | &cvtt::Tables::BC7SC::g_mode0_p10_i3, |
| 1672 | &cvtt::Tables::BC7SC::g_mode0_p11_i1, |
| 1673 | &cvtt::Tables::BC7SC::g_mode0_p11_i2, |
| 1674 | &cvtt::Tables::BC7SC::g_mode0_p11_i3, |
| 1675 | }; |
| 1676 | |
| 1677 | const cvtt::Tables::BC7SC::Table *tables1[] = |
| 1678 | { |
| 1679 | &cvtt::Tables::BC7SC::g_mode1_p0_i1, |
| 1680 | &cvtt::Tables::BC7SC::g_mode1_p0_i2, |
| 1681 | &cvtt::Tables::BC7SC::g_mode1_p0_i3, |
| 1682 | &cvtt::Tables::BC7SC::g_mode1_p1_i1, |
| 1683 | &cvtt::Tables::BC7SC::g_mode1_p1_i2, |
| 1684 | &cvtt::Tables::BC7SC::g_mode1_p1_i3, |
| 1685 | }; |
| 1686 | |
| 1687 | const cvtt::Tables::BC7SC::Table *tables2[] = |
| 1688 | { |
| 1689 | &cvtt::Tables::BC7SC::g_mode2, |
| 1690 | }; |
| 1691 | |
| 1692 | const cvtt::Tables::BC7SC::Table *tables3[] = |
| 1693 | { |
| 1694 | &cvtt::Tables::BC7SC::g_mode3_p0, |
| 1695 | &cvtt::Tables::BC7SC::g_mode3_p1, |
| 1696 | }; |
| 1697 | |
| 1698 | const cvtt::Tables::BC7SC::Table *tables6[] = |
| 1699 | { |
| 1700 | &cvtt::Tables::BC7SC::g_mode6_p0_i1, |
| 1701 | &cvtt::Tables::BC7SC::g_mode6_p0_i2, |
| 1702 | &cvtt::Tables::BC7SC::g_mode6_p0_i3, |
| 1703 | &cvtt::Tables::BC7SC::g_mode6_p0_i4, |
| 1704 | &cvtt::Tables::BC7SC::g_mode6_p0_i5, |
| 1705 | &cvtt::Tables::BC7SC::g_mode6_p0_i6, |
| 1706 | &cvtt::Tables::BC7SC::g_mode6_p0_i7, |
| 1707 | &cvtt::Tables::BC7SC::g_mode6_p1_i1, |
| 1708 | &cvtt::Tables::BC7SC::g_mode6_p1_i2, |
| 1709 | &cvtt::Tables::BC7SC::g_mode6_p1_i3, |
| 1710 | &cvtt::Tables::BC7SC::g_mode6_p1_i4, |
| 1711 | &cvtt::Tables::BC7SC::g_mode6_p1_i5, |
| 1712 | &cvtt::Tables::BC7SC::g_mode6_p1_i6, |
| 1713 | &cvtt::Tables::BC7SC::g_mode6_p1_i7, |
| 1714 | }; |
| 1715 | |
| 1716 | const cvtt::Tables::BC7SC::Table *tables7[] = |
| 1717 | { |
| 1718 | &cvtt::Tables::BC7SC::g_mode7_p00, |
| 1719 | &cvtt::Tables::BC7SC::g_mode7_p01, |
| 1720 | &cvtt::Tables::BC7SC::g_mode7_p10, |
| 1721 | &cvtt::Tables::BC7SC::g_mode7_p11, |
| 1722 | }; |
| 1723 | |
| 1724 | switch (mode) |
| 1725 | { |
| 1726 | case 0: |
| 1727 | { |
| 1728 | scTables = tables0; |
| 1729 | numSCTables = sizeof(tables0) / sizeof(tables0[0]); |
| 1730 | } |
| 1731 | break; |
| 1732 | case 1: |
| 1733 | { |
| 1734 | scTables = tables1; |
| 1735 | numSCTables = sizeof(tables1) / sizeof(tables1[0]); |
| 1736 | } |
| 1737 | break; |
| 1738 | case 2: |
| 1739 | { |
| 1740 | |
| 1741 | scTables = tables2; |
| 1742 | numSCTables = sizeof(tables2) / sizeof(tables2[0]); |
| 1743 | } |
| 1744 | break; |
| 1745 | case 3: |
| 1746 | { |
| 1747 | scTables = tables3; |
| 1748 | numSCTables = sizeof(tables3) / sizeof(tables3[0]); |
| 1749 | } |
| 1750 | break; |
| 1751 | case 6: |
| 1752 | { |
| 1753 | scTables = tables6; |
| 1754 | numSCTables = sizeof(tables6) / sizeof(tables6[0]); |
| 1755 | } |
| 1756 | break; |
| 1757 | case 7: |
| 1758 | { |
| 1759 | scTables = tables7; |
| 1760 | numSCTables = sizeof(tables7) / sizeof(tables7[0]); |
| 1761 | } |
| 1762 | break; |
| 1763 | default: |
| 1764 | assert(false); |
| 1765 | break; |
| 1766 | } |
| 1767 | |
| 1768 | TrySingleColorRGBAMultiTable(flags, pixels, average, numRealChannels, fragment, shapeLength, staticAlphaError, punchThroughInvalid, shapeBestError, shapeBestEP, fragmentBestIndexes, channelWeightsSq, scTables, numSCTables, rtn); |
| 1769 | } |
| 1770 | } // shapeIter |
| 1771 | |
| 1772 | uint64_t partitionsEnabledBits = 0xffffffffffffffffULL; |
| 1773 | |
| 1774 | switch (mode) |
| 1775 | { |
| 1776 | case 0: |
| 1777 | partitionsEnabledBits = encodingPlan.mode0PartitionEnabled; |
| 1778 | break; |
| 1779 | case 1: |
| 1780 | partitionsEnabledBits = encodingPlan.mode1PartitionEnabled; |
| 1781 | break; |
| 1782 | case 2: |
| 1783 | partitionsEnabledBits = encodingPlan.mode2PartitionEnabled; |
| 1784 | break; |
| 1785 | case 3: |
| 1786 | partitionsEnabledBits = encodingPlan.mode3PartitionEnabled; |
| 1787 | break; |
| 1788 | case 6: |
| 1789 | partitionsEnabledBits = encodingPlan.mode6Enabled ? 1 : 0; |
| 1790 | break; |
| 1791 | case 7: |
| 1792 | if (anyBlockHasAlpha) |
| 1793 | partitionEnabledBits = encodingPlan.mode7RGBAPartitionEnabled; |
| 1794 | else |
| 1795 | partitionEnabledBits = encodingPlan.mode7RGBPartitionEnabled; |
| 1796 | break; |
| 1797 | default: |
| 1798 | break; |
| 1799 | }; |
| 1800 | |
| 1801 | for (uint16_t partition = 0; partition < numPartitions; partition++) |
| 1802 | { |
| 1803 | if (((partitionsEnabledBits >> partition) & 1) == 0) |
| 1804 | continue; |
| 1805 | |
| 1806 | const int *partitionShapes; |
| 1807 | if (numSubsets == 1) |
| 1808 | partitionShapes = BC7Data::g_shapes1[partition]; |
| 1809 | else if (numSubsets == 2) |
| 1810 | partitionShapes = BC7Data::g_shapes2[partition]; |
| 1811 | else |
| 1812 | { |
| 1813 | assert(numSubsets == 3); |
| 1814 | partitionShapes = BC7Data::g_shapes3[partition]; |
| 1815 | } |
| 1816 | |
| 1817 | MFloat totalError = ParallelMath::MakeFloatZero(); |
| 1818 | for (int subset = 0; subset < numSubsets; subset++) |
| 1819 | totalError = totalError + temps.shapeBestError[partitionShapes[subset]]; |
| 1820 | |
| 1821 | ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(totalError, work.m_error); |
| 1822 | ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter); |
| 1823 | |
| 1824 | if (mode == 7 && anyBlockHasAlpha) |
| 1825 | { |
| 1826 | // Some lanes could be better, but we filter them out to ensure consistency with scalar |
| 1827 | bool isRGBAllowedForThisPartition = (((encodingPlan.mode7RGBPartitionEnabled >> partition) & 1) != 0); |
| 1828 | |
| 1829 | if (!isRGBAllowedForThisPartition) |
| 1830 | { |
| 1831 | errorBetter16 = (errorBetter16 & blockHasNonMaxAlpha); |
| 1832 | errorBetter = ParallelMath::Int16FlagToFloat(errorBetter16); |
| 1833 | } |
| 1834 | } |
| 1835 | |
| 1836 | if (ParallelMath::AnySet(errorBetter16)) |
| 1837 | { |
| 1838 | for (int subset = 0; subset < numSubsets; subset++) |
| 1839 | { |
| 1840 | int shape = partitionShapes[subset]; |
| 1841 | int shapeStart = BC7Data::g_shapeRanges[shape][0]; |
| 1842 | int shapeLength = BC7Data::g_shapeRanges[shape][1]; |
| 1843 | |
| 1844 | for (int epi = 0; epi < 2; epi++) |
| 1845 | for (int ch = 0; ch < 4; ch++) |
| 1846 | ParallelMath::ConditionalSet(work.m_ep[subset][epi][ch], errorBetter16, temps.shapeBestEP[shape][epi][ch]); |
| 1847 | |
| 1848 | for (int pxi = 0; pxi < shapeLength; pxi++) |
| 1849 | { |
| 1850 | int px = BC7Data::g_fragments[shapeStart + pxi]; |
| 1851 | ParallelMath::ConditionalSet(work.m_indexes[px], errorBetter16, temps.fragmentBestIndexes[shapeStart + pxi]); |
| 1852 | } |
| 1853 | } |
| 1854 | |
| 1855 | ParallelMath::ConditionalSet(work.m_error, errorBetter, totalError); |
| 1856 | ParallelMath::ConditionalSet(work.m_mode, errorBetter16, ParallelMath::MakeUInt15(mode)); |
| 1857 | ParallelMath::ConditionalSet(work.m_u.m_partition, errorBetter16, ParallelMath::MakeUInt15(partition)); |
| 1858 | } |
| 1859 | } |
| 1860 | } |
| 1861 | } |
| 1862 | |
| 1863 | void cvtt::Internal::BC7Computer::TryDualPlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn) |
| 1864 | { |
| 1865 | // TODO: These error calculations are not optimal for weight-by-alpha, but this routine needs to be mostly rewritten for that. |
| 1866 | // The alpha/color solutions are co-dependent in that case, but a good way to solve it would probably be to |
| 1867 | // solve the alpha channel first, then solve the RGB channels, which in turn breaks down into two cases: |
| 1868 | // - Separate alpha channel, then weighted RGB |
| 1869 | // - Alpha+2 other channels, then the independent channel |
| 1870 | if (numRefineRounds < 1) |
| 1871 | numRefineRounds = 1; |
| 1872 | |
| 1873 | float channelWeightsSq[4]; |
| 1874 | for (int ch = 0; ch < 4; ch++) |
| 1875 | channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch]; |
| 1876 | |
| 1877 | for (uint16_t mode = 4; mode <= 5; mode++) |
| 1878 | { |
| 1879 | int numSP[2] = { 0, 0 }; |
| 1880 | |
| 1881 | for (uint16_t rotation = 0; rotation < 4; rotation++) |
| 1882 | { |
| 1883 | if (mode == 4) |
| 1884 | { |
| 1885 | numSP[0] = encodingPlan.mode4SP[rotation][0]; |
| 1886 | numSP[1] = encodingPlan.mode4SP[rotation][1]; |
| 1887 | } |
| 1888 | else |
| 1889 | numSP[0] = numSP[1] = encodingPlan.mode5SP[rotation]; |
| 1890 | |
| 1891 | if (numSP[0] == 0 && numSP[1] == 0) |
| 1892 | continue; |
| 1893 | |
| 1894 | int alphaChannel = (rotation + 3) & 3; |
| 1895 | int redChannel = (rotation == 1) ? 3 : 0; |
| 1896 | int greenChannel = (rotation == 2) ? 3 : 1; |
| 1897 | int blueChannel = (rotation == 3) ? 3 : 2; |
| 1898 | |
| 1899 | MUInt15 rotatedRGB[16][3]; |
| 1900 | MFloat floatRotatedRGB[16][3]; |
| 1901 | |
| 1902 | for (int px = 0; px < 16; px++) |
| 1903 | { |
| 1904 | rotatedRGB[px][0] = pixels[px][redChannel]; |
| 1905 | rotatedRGB[px][1] = pixels[px][greenChannel]; |
| 1906 | rotatedRGB[px][2] = pixels[px][blueChannel]; |
| 1907 | |
| 1908 | for (int ch = 0; ch < 3; ch++) |
| 1909 | floatRotatedRGB[px][ch] = ParallelMath::ToFloat(rotatedRGB[px][ch]); |
| 1910 | } |
| 1911 | |
| 1912 | uint16_t maxIndexSelector = (mode == 4) ? 2 : 1; |
| 1913 | |
| 1914 | float rotatedRGBWeights[3] = { channelWeights[redChannel], channelWeights[greenChannel], channelWeights[blueChannel] }; |
| 1915 | float rotatedRGBWeightsSq[3] = { channelWeightsSq[redChannel], channelWeightsSq[greenChannel], channelWeightsSq[blueChannel] }; |
| 1916 | float rotatedAlphaWeight[1] = { channelWeights[alphaChannel] }; |
| 1917 | float rotatedAlphaWeightSq[1] = { channelWeightsSq[alphaChannel] }; |
| 1918 | |
| 1919 | float uniformWeight[1] = { 1.0f }; // Since the alpha channel is independent, there's no need to bother with weights when doing refinement or selection, only error |
| 1920 | |
| 1921 | MFloat preWeightedRotatedRGB[16][3]; |
| 1922 | BCCommon::PreWeightPixelsLDR<3>(preWeightedRotatedRGB, rotatedRGB, rotatedRGBWeights); |
| 1923 | |
| 1924 | for (uint16_t indexSelector = 0; indexSelector < maxIndexSelector; indexSelector++) |
| 1925 | { |
| 1926 | int numTweakRounds = numSP[indexSelector]; |
| 1927 | |
| 1928 | if (numTweakRounds <= 0) |
| 1929 | continue; |
| 1930 | |
| 1931 | if (numTweakRounds > MaxTweakRounds) |
| 1932 | numTweakRounds = MaxTweakRounds; |
| 1933 | |
| 1934 | EndpointSelector<3, 8> rgbSelector; |
| 1935 | |
| 1936 | for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++) |
| 1937 | { |
| 1938 | for (int px = 0; px < 16; px++) |
| 1939 | rgbSelector.ContributePass(preWeightedRotatedRGB[px], epPass, ParallelMath::MakeFloat(1.0f)); |
| 1940 | |
| 1941 | rgbSelector.FinishPass(epPass); |
| 1942 | } |
| 1943 | |
| 1944 | MUInt15 alphaRange[2]; |
| 1945 | |
| 1946 | alphaRange[0] = alphaRange[1] = pixels[0][alphaChannel]; |
| 1947 | for (int px = 1; px < 16; px++) |
| 1948 | { |
| 1949 | alphaRange[0] = ParallelMath::Min(pixels[px][alphaChannel], alphaRange[0]); |
| 1950 | alphaRange[1] = ParallelMath::Max(pixels[px][alphaChannel], alphaRange[1]); |
| 1951 | } |
| 1952 | |
| 1953 | int rgbPrec = 0; |
| 1954 | int alphaPrec = 0; |
| 1955 | |
| 1956 | if (mode == 4) |
| 1957 | { |
| 1958 | rgbPrec = indexSelector ? 3 : 2; |
| 1959 | alphaPrec = indexSelector ? 2 : 3; |
| 1960 | } |
| 1961 | else |
| 1962 | rgbPrec = alphaPrec = 2; |
| 1963 | |
| 1964 | UnfinishedEndpoints<3> unfinishedRGB = rgbSelector.GetEndpoints(rotatedRGBWeights); |
| 1965 | |
| 1966 | MFloat bestRGBError = ParallelMath::MakeFloat(FLT_MAX); |
| 1967 | MFloat bestAlphaError = ParallelMath::MakeFloat(FLT_MAX); |
| 1968 | |
| 1969 | MUInt15 bestRGBIndexes[16]; |
| 1970 | MUInt15 bestAlphaIndexes[16]; |
| 1971 | MUInt15 bestEP[2][4]; |
| 1972 | |
| 1973 | for (int px = 0; px < 16; px++) |
| 1974 | bestRGBIndexes[px] = bestAlphaIndexes[px] = ParallelMath::MakeUInt15(0); |
| 1975 | |
| 1976 | for (int tweak = 0; tweak < numTweakRounds; tweak++) |
| 1977 | { |
| 1978 | MUInt15 rgbEP[2][3]; |
| 1979 | MUInt15 alphaEP[2]; |
| 1980 | |
| 1981 | unfinishedRGB.FinishLDR(tweak, 1 << rgbPrec, rgbEP[0], rgbEP[1]); |
| 1982 | |
| 1983 | TweakAlpha(alphaRange, tweak, 1 << alphaPrec, alphaEP); |
| 1984 | |
| 1985 | for (int refine = 0; refine < numRefineRounds; refine++) |
| 1986 | { |
| 1987 | if (mode == 4) |
| 1988 | CompressEndpoints4(rgbEP, alphaEP); |
| 1989 | else |
| 1990 | CompressEndpoints5(rgbEP, alphaEP); |
| 1991 | |
| 1992 | |
| 1993 | IndexSelector<1> alphaIndexSelector; |
| 1994 | IndexSelector<3> rgbIndexSelector; |
| 1995 | |
| 1996 | { |
| 1997 | MUInt15 alphaEPTemp[2][1] = { { alphaEP[0] },{ alphaEP[1] } }; |
| 1998 | alphaIndexSelector.Init<false>(uniformWeight, alphaEPTemp, 1 << alphaPrec); |
| 1999 | } |
| 2000 | rgbIndexSelector.Init<false>(rotatedRGBWeights, rgbEP, 1 << rgbPrec); |
| 2001 | |
| 2002 | EndpointRefiner<3> rgbRefiner; |
| 2003 | EndpointRefiner<1> alphaRefiner; |
| 2004 | |
| 2005 | rgbRefiner.Init(1 << rgbPrec, rotatedRGBWeights); |
| 2006 | alphaRefiner.Init(1 << alphaPrec, uniformWeight); |
| 2007 | |
| 2008 | MFloat errorRGB = ParallelMath::MakeFloatZero(); |
| 2009 | MFloat errorA = ParallelMath::MakeFloatZero(); |
| 2010 | |
| 2011 | MUInt15 rgbIndexes[16]; |
| 2012 | MUInt15 alphaIndexes[16]; |
| 2013 | |
| 2014 | AggregatedError<3> rgbAggError; |
| 2015 | AggregatedError<1> alphaAggError; |
| 2016 | |
| 2017 | for (int px = 0; px < 16; px++) |
| 2018 | { |
| 2019 | MUInt15 rgbIndex = rgbIndexSelector.SelectIndexLDR(floatRotatedRGB[px], rtn); |
| 2020 | MUInt15 alphaIndex = alphaIndexSelector.SelectIndexLDR(floatPixels[px] + alphaChannel, rtn); |
| 2021 | |
| 2022 | MUInt15 reconstructedRGB[3]; |
| 2023 | MUInt15 reconstructedAlpha[1]; |
| 2024 | |
| 2025 | rgbIndexSelector.ReconstructLDR_BC7(rgbIndex, reconstructedRGB); |
| 2026 | alphaIndexSelector.ReconstructLDR_BC7(alphaIndex, reconstructedAlpha); |
| 2027 | |
| 2028 | if (flags & cvtt::Flags::BC7_FastIndexing) |
| 2029 | { |
| 2030 | BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], rgbAggError); |
| 2031 | BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, alphaAggError); |
| 2032 | } |
| 2033 | else |
| 2034 | { |
| 2035 | AggregatedError<3> baseRGBAggError; |
| 2036 | AggregatedError<1> baseAlphaAggError; |
| 2037 | |
| 2038 | BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], baseRGBAggError); |
| 2039 | BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, baseAlphaAggError); |
| 2040 | |
| 2041 | MFloat rgbError = baseRGBAggError.Finalize(flags, rotatedRGBWeightsSq); |
| 2042 | MFloat alphaError = baseAlphaAggError.Finalize(flags, rotatedAlphaWeightSq); |
| 2043 | |
| 2044 | MUInt15 altRGBIndexes[2]; |
| 2045 | MUInt15 altAlphaIndexes[2]; |
| 2046 | |
| 2047 | altRGBIndexes[0] = ParallelMath::Max(rgbIndex, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1); |
| 2048 | altRGBIndexes[1] = ParallelMath::Min(rgbIndex + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << rgbPrec) - 1))); |
| 2049 | |
| 2050 | altAlphaIndexes[0] = ParallelMath::Max(alphaIndex, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1); |
| 2051 | altAlphaIndexes[1] = ParallelMath::Min(alphaIndex + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << alphaPrec) - 1))); |
| 2052 | |
| 2053 | for (int ii = 0; ii < 2; ii++) |
| 2054 | { |
| 2055 | rgbIndexSelector.ReconstructLDR_BC7(altRGBIndexes[ii], reconstructedRGB); |
| 2056 | alphaIndexSelector.ReconstructLDR_BC7(altAlphaIndexes[ii], reconstructedAlpha); |
| 2057 | |
| 2058 | AggregatedError<3> altRGBAggError; |
| 2059 | AggregatedError<1> altAlphaAggError; |
| 2060 | |
| 2061 | BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], altRGBAggError); |
| 2062 | BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, altAlphaAggError); |
| 2063 | |
| 2064 | MFloat altRGBError = altRGBAggError.Finalize(flags, rotatedRGBWeightsSq); |
| 2065 | MFloat altAlphaError = altAlphaAggError.Finalize(flags, rotatedAlphaWeightSq); |
| 2066 | |
| 2067 | ParallelMath::Int16CompFlag rgbBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altRGBError, rgbError)); |
| 2068 | ParallelMath::Int16CompFlag alphaBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altAlphaError, alphaError)); |
| 2069 | |
| 2070 | rgbError = ParallelMath::Min(altRGBError, rgbError); |
| 2071 | alphaError = ParallelMath::Min(altAlphaError, alphaError); |
| 2072 | |
| 2073 | ParallelMath::ConditionalSet(rgbIndex, rgbBetter, altRGBIndexes[ii]); |
| 2074 | ParallelMath::ConditionalSet(alphaIndex, alphaBetter, altAlphaIndexes[ii]); |
| 2075 | } |
| 2076 | |
| 2077 | errorRGB = errorRGB + rgbError; |
| 2078 | errorA = errorA + alphaError; |
| 2079 | } |
| 2080 | |
| 2081 | if (refine != numRefineRounds - 1) |
| 2082 | { |
| 2083 | rgbRefiner.ContributeUnweightedPW(preWeightedRotatedRGB[px], rgbIndex); |
| 2084 | alphaRefiner.ContributeUnweightedPW(floatPixels[px] + alphaChannel, alphaIndex); |
| 2085 | } |
| 2086 | |
| 2087 | if (flags & Flags::BC7_FastIndexing) |
| 2088 | { |
| 2089 | errorRGB = rgbAggError.Finalize(flags, rotatedRGBWeightsSq); |
| 2090 | errorA = alphaAggError.Finalize(flags, rotatedAlphaWeightSq); |
| 2091 | } |
| 2092 | |
| 2093 | rgbIndexes[px] = rgbIndex; |
| 2094 | alphaIndexes[px] = alphaIndex; |
| 2095 | } |
| 2096 | |
| 2097 | ParallelMath::FloatCompFlag rgbBetter = ParallelMath::Less(errorRGB, bestRGBError); |
| 2098 | ParallelMath::FloatCompFlag alphaBetter = ParallelMath::Less(errorA, bestAlphaError); |
| 2099 | |
| 2100 | ParallelMath::Int16CompFlag rgbBetterInt16 = ParallelMath::FloatFlagToInt16(rgbBetter); |
| 2101 | ParallelMath::Int16CompFlag alphaBetterInt16 = ParallelMath::FloatFlagToInt16(alphaBetter); |
| 2102 | |
| 2103 | if (ParallelMath::AnySet(rgbBetterInt16)) |
| 2104 | { |
| 2105 | bestRGBError = ParallelMath::Min(errorRGB, bestRGBError); |
| 2106 | |
| 2107 | for (int px = 0; px < 16; px++) |
| 2108 | ParallelMath::ConditionalSet(bestRGBIndexes[px], rgbBetterInt16, rgbIndexes[px]); |
| 2109 | |
| 2110 | for (int ep = 0; ep < 2; ep++) |
| 2111 | { |
| 2112 | for (int ch = 0; ch < 3; ch++) |
| 2113 | ParallelMath::ConditionalSet(bestEP[ep][ch], rgbBetterInt16, rgbEP[ep][ch]); |
| 2114 | } |
| 2115 | } |
| 2116 | |
| 2117 | if (ParallelMath::AnySet(alphaBetterInt16)) |
| 2118 | { |
| 2119 | bestAlphaError = ParallelMath::Min(errorA, bestAlphaError); |
| 2120 | |
| 2121 | for (int px = 0; px < 16; px++) |
| 2122 | ParallelMath::ConditionalSet(bestAlphaIndexes[px], alphaBetterInt16, alphaIndexes[px]); |
| 2123 | |
| 2124 | for (int ep = 0; ep < 2; ep++) |
| 2125 | ParallelMath::ConditionalSet(bestEP[ep][3], alphaBetterInt16, alphaEP[ep]); |
| 2126 | } |
| 2127 | |
| 2128 | if (refine != numRefineRounds - 1) |
| 2129 | { |
| 2130 | rgbRefiner.GetRefinedEndpointsLDR(rgbEP, rtn); |
| 2131 | |
| 2132 | MUInt15 alphaEPTemp[2][1]; |
| 2133 | alphaRefiner.GetRefinedEndpointsLDR(alphaEPTemp, rtn); |
| 2134 | |
| 2135 | for (int i = 0; i < 2; i++) |
| 2136 | alphaEP[i] = alphaEPTemp[i][0]; |
| 2137 | } |
| 2138 | } // refine |
| 2139 | } // tweak |
| 2140 | |
| 2141 | MFloat combinedError = bestRGBError + bestAlphaError; |
| 2142 | |
| 2143 | ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(combinedError, work.m_error); |
| 2144 | ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter); |
| 2145 | |
| 2146 | work.m_error = ParallelMath::Min(combinedError, work.m_error); |
| 2147 | |
| 2148 | ParallelMath::ConditionalSet(work.m_mode, errorBetter16, ParallelMath::MakeUInt15(mode)); |
| 2149 | ParallelMath::ConditionalSet(work.m_u.m_isr.m_rotation, errorBetter16, ParallelMath::MakeUInt15(rotation)); |
| 2150 | ParallelMath::ConditionalSet(work.m_u.m_isr.m_indexSelector, errorBetter16, ParallelMath::MakeUInt15(indexSelector)); |
| 2151 | |
| 2152 | for (int px = 0; px < 16; px++) |
| 2153 | { |
| 2154 | ParallelMath::ConditionalSet(work.m_indexes[px], errorBetter16, indexSelector ? bestAlphaIndexes[px] : bestRGBIndexes[px]); |
| 2155 | ParallelMath::ConditionalSet(work.m_indexes2[px], errorBetter16, indexSelector ? bestRGBIndexes[px] : bestAlphaIndexes[px]); |
| 2156 | } |
| 2157 | |
| 2158 | for (int ep = 0; ep < 2; ep++) |
| 2159 | for (int ch = 0; ch < 4; ch++) |
| 2160 | ParallelMath::ConditionalSet(work.m_ep[0][ep][ch], errorBetter16, bestEP[ep][ch]); |
| 2161 | } |
| 2162 | } |
| 2163 | } |
| 2164 | } |
| 2165 | |
| 2166 | template<class T> |
| 2167 | void cvtt::Internal::BC7Computer::Swap(T& a, T& b) |
| 2168 | { |
| 2169 | T temp = a; |
| 2170 | a = b; |
| 2171 | b = temp; |
| 2172 | } |
| 2173 | |
| 2174 | void cvtt::Internal::BC7Computer::Pack(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds) |
| 2175 | { |
| 2176 | MUInt15 pixels[16][4]; |
| 2177 | MFloat floatPixels[16][4]; |
| 2178 | |
| 2179 | for (int px = 0; px < 16; px++) |
| 2180 | { |
| 2181 | for (int ch = 0; ch < 4; ch++) |
| 2182 | ParallelMath::ConvertLDRInputs(inputs, px, ch, pixels[px][ch]); |
| 2183 | } |
| 2184 | |
| 2185 | for (int px = 0; px < 16; px++) |
| 2186 | { |
| 2187 | for (int ch = 0; ch < 4; ch++) |
| 2188 | floatPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]); |
| 2189 | } |
| 2190 | |
| 2191 | BC67::WorkInfo work; |
| 2192 | memset(&work, 0, sizeof(work)); |
| 2193 | |
| 2194 | work.m_error = ParallelMath::MakeFloat(FLT_MAX); |
| 2195 | |
| 2196 | { |
| 2197 | ParallelMath::RoundTowardNearestForScope rtn; |
| 2198 | TrySinglePlane(flags, pixels, floatPixels, channelWeights, encodingPlan, numRefineRounds, work, &rtn); |
| 2199 | TryDualPlane(flags, pixels, floatPixels, channelWeights, encodingPlan, numRefineRounds, work, &rtn); |
| 2200 | } |
| 2201 | |
| 2202 | for (int block = 0; block < ParallelMath::ParallelSize; block++) |
| 2203 | { |
| 2204 | PackingVector pv; |
| 2205 | pv.Init(); |
| 2206 | |
| 2207 | ParallelMath::ScalarUInt16 mode = ParallelMath::Extract(work.m_mode, block); |
| 2208 | ParallelMath::ScalarUInt16 partition = ParallelMath::Extract(work.m_u.m_partition, block); |
| 2209 | ParallelMath::ScalarUInt16 indexSelector = ParallelMath::Extract(work.m_u.m_isr.m_indexSelector, block); |
| 2210 | |
| 2211 | const BC7Data::BC7ModeInfo& modeInfo = BC7Data::g_modes[mode]; |
| 2212 | |
| 2213 | ParallelMath::ScalarUInt16 indexes[16]; |
| 2214 | ParallelMath::ScalarUInt16 indexes2[16]; |
| 2215 | ParallelMath::ScalarUInt16 endPoints[3][2][4]; |
| 2216 | |
| 2217 | for (int i = 0; i < 16; i++) |
| 2218 | { |
| 2219 | indexes[i] = ParallelMath::Extract(work.m_indexes[i], block); |
| 2220 | if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) |
| 2221 | indexes2[i] = ParallelMath::Extract(work.m_indexes2[i], block); |
| 2222 | } |
| 2223 | |
| 2224 | for (int subset = 0; subset < 3; subset++) |
| 2225 | { |
| 2226 | for (int ep = 0; ep < 2; ep++) |
| 2227 | { |
| 2228 | for (int ch = 0; ch < 4; ch++) |
| 2229 | endPoints[subset][ep][ch] = ParallelMath::Extract(work.m_ep[subset][ep][ch], block); |
| 2230 | } |
| 2231 | } |
| 2232 | |
| 2233 | int fixups[3] = { 0, 0, 0 }; |
| 2234 | |
| 2235 | if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) |
| 2236 | { |
| 2237 | bool flipRGB = ((indexes[0] & (1 << (modeInfo.m_indexBits - 1))) != 0); |
| 2238 | bool flipAlpha = ((indexes2[0] & (1 << (modeInfo.m_alphaIndexBits - 1))) != 0); |
| 2239 | |
| 2240 | if (flipRGB) |
| 2241 | { |
| 2242 | uint16_t highIndex = (1 << modeInfo.m_indexBits) - 1; |
| 2243 | for (int px = 0; px < 16; px++) |
| 2244 | indexes[px] = highIndex - indexes[px]; |
| 2245 | } |
| 2246 | |
| 2247 | if (flipAlpha) |
| 2248 | { |
| 2249 | uint16_t highIndex = (1 << modeInfo.m_alphaIndexBits) - 1; |
| 2250 | for (int px = 0; px < 16; px++) |
| 2251 | indexes2[px] = highIndex - indexes2[px]; |
| 2252 | } |
| 2253 | |
| 2254 | if (indexSelector) |
| 2255 | Swap(flipRGB, flipAlpha); |
| 2256 | |
| 2257 | if (flipRGB) |
| 2258 | { |
| 2259 | for (int ch = 0; ch < 3; ch++) |
| 2260 | Swap(endPoints[0][0][ch], endPoints[0][1][ch]); |
| 2261 | } |
| 2262 | if (flipAlpha) |
| 2263 | Swap(endPoints[0][0][3], endPoints[0][1][3]); |
| 2264 | |
| 2265 | } |
| 2266 | else |
| 2267 | { |
| 2268 | if (modeInfo.m_numSubsets == 2) |
| 2269 | fixups[1] = BC7Data::g_fixupIndexes2[partition]; |
| 2270 | else if (modeInfo.m_numSubsets == 3) |
| 2271 | { |
| 2272 | fixups[1] = BC7Data::g_fixupIndexes3[partition][0]; |
| 2273 | fixups[2] = BC7Data::g_fixupIndexes3[partition][1]; |
| 2274 | } |
| 2275 | |
| 2276 | bool flip[3] = { false, false, false }; |
| 2277 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
| 2278 | flip[subset] = ((indexes[fixups[subset]] & (1 << (modeInfo.m_indexBits - 1))) != 0); |
| 2279 | |
| 2280 | if (flip[0] || flip[1] || flip[2]) |
| 2281 | { |
| 2282 | uint16_t highIndex = (1 << modeInfo.m_indexBits) - 1; |
| 2283 | for (int px = 0; px < 16; px++) |
| 2284 | { |
| 2285 | int subset = 0; |
| 2286 | if (modeInfo.m_numSubsets == 2) |
| 2287 | subset = (BC7Data::g_partitionMap[partition] >> px) & 1; |
| 2288 | else if (modeInfo.m_numSubsets == 3) |
| 2289 | subset = (BC7Data::g_partitionMap2[partition] >> (px * 2)) & 3; |
| 2290 | |
| 2291 | if (flip[subset]) |
| 2292 | indexes[px] = highIndex - indexes[px]; |
| 2293 | } |
| 2294 | |
| 2295 | int maxCH = (modeInfo.m_alphaMode == BC7Data::AlphaMode_Combined) ? 4 : 3; |
| 2296 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
| 2297 | { |
| 2298 | if (flip[subset]) |
| 2299 | for (int ch = 0; ch < maxCH; ch++) |
| 2300 | Swap(endPoints[subset][0][ch], endPoints[subset][1][ch]); |
| 2301 | } |
| 2302 | } |
| 2303 | } |
| 2304 | |
| 2305 | pv.Pack(static_cast<uint8_t>(1 << mode), mode + 1); |
| 2306 | |
| 2307 | if (modeInfo.m_partitionBits) |
| 2308 | pv.Pack(partition, modeInfo.m_partitionBits); |
| 2309 | |
| 2310 | if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) |
| 2311 | { |
| 2312 | ParallelMath::ScalarUInt16 rotation = ParallelMath::Extract(work.m_u.m_isr.m_rotation, block); |
| 2313 | pv.Pack(rotation, 2); |
| 2314 | } |
| 2315 | |
| 2316 | if (modeInfo.m_hasIndexSelector) |
| 2317 | pv.Pack(indexSelector, 1); |
| 2318 | |
| 2319 | // Encode RGB |
| 2320 | for (int ch = 0; ch < 3; ch++) |
| 2321 | { |
| 2322 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
| 2323 | { |
| 2324 | for (int ep = 0; ep < 2; ep++) |
| 2325 | { |
| 2326 | ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][ch]; |
| 2327 | epPart >>= (8 - modeInfo.m_rgbBits); |
| 2328 | |
| 2329 | pv.Pack(epPart, modeInfo.m_rgbBits); |
| 2330 | } |
| 2331 | } |
| 2332 | } |
| 2333 | |
| 2334 | // Encode alpha |
| 2335 | if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) |
| 2336 | { |
| 2337 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
| 2338 | { |
| 2339 | for (int ep = 0; ep < 2; ep++) |
| 2340 | { |
| 2341 | ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][3]; |
| 2342 | epPart >>= (8 - modeInfo.m_alphaBits); |
| 2343 | |
| 2344 | pv.Pack(epPart, modeInfo.m_alphaBits); |
| 2345 | } |
| 2346 | } |
| 2347 | } |
| 2348 | |
| 2349 | // Encode parity bits |
| 2350 | if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerSubset) |
| 2351 | { |
| 2352 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
| 2353 | { |
| 2354 | ParallelMath::ScalarUInt16 epPart = endPoints[subset][0][0]; |
| 2355 | epPart >>= (7 - modeInfo.m_rgbBits); |
| 2356 | epPart &= 1; |
| 2357 | |
| 2358 | pv.Pack(epPart, 1); |
| 2359 | } |
| 2360 | } |
| 2361 | else if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerEndpoint) |
| 2362 | { |
| 2363 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
| 2364 | { |
| 2365 | for (int ep = 0; ep < 2; ep++) |
| 2366 | { |
| 2367 | ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][0]; |
| 2368 | epPart >>= (7 - modeInfo.m_rgbBits); |
| 2369 | epPart &= 1; |
| 2370 | |
| 2371 | pv.Pack(epPart, 1); |
| 2372 | } |
| 2373 | } |
| 2374 | } |
| 2375 | |
| 2376 | // Encode indexes |
| 2377 | for (int px = 0; px < 16; px++) |
| 2378 | { |
| 2379 | int bits = modeInfo.m_indexBits; |
| 2380 | if ((px == 0) || (px == fixups[1]) || (px == fixups[2])) |
| 2381 | bits--; |
| 2382 | |
| 2383 | pv.Pack(indexes[px], bits); |
| 2384 | } |
| 2385 | |
| 2386 | // Encode secondary indexes |
| 2387 | if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) |
| 2388 | { |
| 2389 | for (int px = 0; px < 16; px++) |
| 2390 | { |
| 2391 | int bits = modeInfo.m_alphaIndexBits; |
| 2392 | if (px == 0) |
| 2393 | bits--; |
| 2394 | |
| 2395 | pv.Pack(indexes2[px], bits); |
| 2396 | } |
| 2397 | } |
| 2398 | |
| 2399 | pv.Flush(packedBlocks); |
| 2400 | |
| 2401 | packedBlocks += 16; |
| 2402 | } |
| 2403 | } |
| 2404 | |
| 2405 | void cvtt::Internal::BC7Computer::UnpackOne(PixelBlockU8 &output, const uint8_t* packedBlock) |
| 2406 | { |
| 2407 | UnpackingVector pv; |
| 2408 | pv.Init(packedBlock); |
| 2409 | |
| 2410 | int mode = 8; |
| 2411 | for (int i = 0; i < 8; i++) |
| 2412 | { |
| 2413 | if (pv.Unpack(1) == 1) |
| 2414 | { |
| 2415 | mode = i; |
| 2416 | break; |
| 2417 | } |
| 2418 | } |
| 2419 | |
| 2420 | if (mode > 7) |
| 2421 | { |
| 2422 | for (int px = 0; px < 16; px++) |
| 2423 | for (int ch = 0; ch < 4; ch++) |
| 2424 | output.m_pixels[px][ch] = 0; |
| 2425 | |
| 2426 | return; |
| 2427 | } |
| 2428 | |
| 2429 | const BC7Data::BC7ModeInfo &modeInfo = BC7Data::g_modes[mode]; |
| 2430 | |
| 2431 | int partition = 0; |
| 2432 | if (modeInfo.m_partitionBits) |
| 2433 | partition = pv.Unpack(modeInfo.m_partitionBits); |
| 2434 | |
| 2435 | int rotation = 0; |
| 2436 | if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) |
| 2437 | rotation = pv.Unpack(2); |
| 2438 | |
| 2439 | int indexSelector = 0; |
| 2440 | if (modeInfo.m_hasIndexSelector) |
| 2441 | indexSelector = pv.Unpack(1); |
| 2442 | |
| 2443 | // Resolve fixups |
| 2444 | int fixups[3] = { 0, 0, 0 }; |
| 2445 | |
| 2446 | if (modeInfo.m_alphaMode != BC7Data::AlphaMode_Separate) |
| 2447 | { |
| 2448 | if (modeInfo.m_numSubsets == 2) |
| 2449 | fixups[1] = BC7Data::g_fixupIndexes2[partition]; |
| 2450 | else if (modeInfo.m_numSubsets == 3) |
| 2451 | { |
| 2452 | fixups[1] = BC7Data::g_fixupIndexes3[partition][0]; |
| 2453 | fixups[2] = BC7Data::g_fixupIndexes3[partition][1]; |
| 2454 | } |
| 2455 | } |
| 2456 | |
| 2457 | int endPoints[3][2][4]; |
| 2458 | |
| 2459 | // Decode RGB |
| 2460 | for (int ch = 0; ch < 3; ch++) |
| 2461 | { |
| 2462 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
| 2463 | { |
| 2464 | for (int ep = 0; ep < 2; ep++) |
| 2465 | endPoints[subset][ep][ch] = (pv.Unpack(modeInfo.m_rgbBits) << (8 - modeInfo.m_rgbBits)); |
| 2466 | } |
| 2467 | } |
| 2468 | |
| 2469 | // Decode alpha |
| 2470 | if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) |
| 2471 | { |
| 2472 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
| 2473 | { |
| 2474 | for (int ep = 0; ep < 2; ep++) |
| 2475 | endPoints[subset][ep][3] = (pv.Unpack(modeInfo.m_alphaBits) << (8 - modeInfo.m_alphaBits)); |
| 2476 | } |
| 2477 | } |
| 2478 | else |
| 2479 | { |
| 2480 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
| 2481 | { |
| 2482 | for (int ep = 0; ep < 2; ep++) |
| 2483 | endPoints[subset][ep][3] = 255; |
| 2484 | } |
| 2485 | } |
| 2486 | |
| 2487 | int parityBits = 0; |
| 2488 | |
| 2489 | // Decode parity bits |
| 2490 | if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerSubset) |
| 2491 | { |
| 2492 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
| 2493 | { |
| 2494 | int p = pv.Unpack(1); |
| 2495 | |
| 2496 | for (int ep = 0; ep < 2; ep++) |
| 2497 | { |
| 2498 | for (int ch = 0; ch < 3; ch++) |
| 2499 | endPoints[subset][ep][ch] |= p << (7 - modeInfo.m_rgbBits); |
| 2500 | |
| 2501 | if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) |
| 2502 | endPoints[subset][ep][3] |= p << (7 - modeInfo.m_alphaBits); |
| 2503 | } |
| 2504 | } |
| 2505 | |
| 2506 | parityBits = 1; |
| 2507 | } |
| 2508 | else if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerEndpoint) |
| 2509 | { |
| 2510 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
| 2511 | { |
| 2512 | for (int ep = 0; ep < 2; ep++) |
| 2513 | { |
| 2514 | int p = pv.Unpack(1); |
| 2515 | |
| 2516 | for (int ch = 0; ch < 3; ch++) |
| 2517 | endPoints[subset][ep][ch] |= p << (7 - modeInfo.m_rgbBits); |
| 2518 | |
| 2519 | if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) |
| 2520 | endPoints[subset][ep][3] |= p << (7 - modeInfo.m_alphaBits); |
| 2521 | } |
| 2522 | } |
| 2523 | |
| 2524 | parityBits = 1; |
| 2525 | } |
| 2526 | |
| 2527 | // Fill endpoint bits |
| 2528 | for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) |
| 2529 | { |
| 2530 | for (int ep = 0; ep < 2; ep++) |
| 2531 | { |
| 2532 | for (int ch = 0; ch < 3; ch++) |
| 2533 | endPoints[subset][ep][ch] |= (endPoints[subset][ep][ch] >> (modeInfo.m_rgbBits + parityBits)); |
| 2534 | |
| 2535 | if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) |
| 2536 | endPoints[subset][ep][3] |= (endPoints[subset][ep][3] >> (modeInfo.m_alphaBits + parityBits)); |
| 2537 | } |
| 2538 | } |
| 2539 | |
| 2540 | int indexes[16]; |
| 2541 | int indexes2[16]; |
| 2542 | |
| 2543 | // Decode indexes |
| 2544 | for (int px = 0; px < 16; px++) |
| 2545 | { |
| 2546 | int bits = modeInfo.m_indexBits; |
| 2547 | if ((px == 0) || (px == fixups[1]) || (px == fixups[2])) |
| 2548 | bits--; |
| 2549 | |
| 2550 | indexes[px] = pv.Unpack(bits); |
| 2551 | } |
| 2552 | |
| 2553 | // Decode secondary indexes |
| 2554 | if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) |
| 2555 | { |
| 2556 | for (int px = 0; px < 16; px++) |
| 2557 | { |
| 2558 | int bits = modeInfo.m_alphaIndexBits; |
| 2559 | if (px == 0) |
| 2560 | bits--; |
| 2561 | |
| 2562 | indexes2[px] = pv.Unpack(bits); |
| 2563 | } |
| 2564 | } |
| 2565 | else |
| 2566 | { |
| 2567 | for (int px = 0; px < 16; px++) |
| 2568 | indexes2[px] = 0; |
| 2569 | } |
| 2570 | |
| 2571 | const int *alphaWeights = BC7Data::g_weightTables[modeInfo.m_alphaIndexBits]; |
| 2572 | const int *rgbWeights = BC7Data::g_weightTables[modeInfo.m_indexBits]; |
| 2573 | |
| 2574 | // Decode each pixel |
| 2575 | for (int px = 0; px < 16; px++) |
| 2576 | { |
| 2577 | int rgbWeight = 0; |
| 2578 | int alphaWeight = 0; |
| 2579 | |
| 2580 | int rgbIndex = indexes[px]; |
| 2581 | |
| 2582 | rgbWeight = rgbWeights[indexes[px]]; |
| 2583 | |
| 2584 | if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Combined) |
| 2585 | alphaWeight = rgbWeight; |
| 2586 | else if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) |
| 2587 | alphaWeight = alphaWeights[indexes2[px]]; |
| 2588 | |
| 2589 | if (indexSelector == 1) |
| 2590 | { |
| 2591 | int temp = rgbWeight; |
| 2592 | rgbWeight = alphaWeight; |
| 2593 | alphaWeight = temp; |
| 2594 | } |
| 2595 | |
| 2596 | int pixel[4] = { 0, 0, 0, 255 }; |
| 2597 | |
| 2598 | int subset = 0; |
| 2599 | |
| 2600 | if (modeInfo.m_numSubsets == 2) |
| 2601 | subset = (BC7Data::g_partitionMap[partition] >> px) & 1; |
| 2602 | else if (modeInfo.m_numSubsets == 3) |
| 2603 | subset = (BC7Data::g_partitionMap2[partition] >> (px * 2)) & 3; |
| 2604 | |
| 2605 | for (int ch = 0; ch < 3; ch++) |
| 2606 | pixel[ch] = ((64 - rgbWeight) * endPoints[subset][0][ch] + rgbWeight * endPoints[subset][1][ch] + 32) >> 6; |
| 2607 | |
| 2608 | if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) |
| 2609 | pixel[3] = ((64 - alphaWeight) * endPoints[subset][0][3] + alphaWeight * endPoints[subset][1][3] + 32) >> 6; |
| 2610 | |
| 2611 | if (rotation != 0) |
| 2612 | { |
| 2613 | int ch = rotation - 1; |
| 2614 | int temp = pixel[ch]; |
| 2615 | pixel[ch] = pixel[3]; |
| 2616 | pixel[3] = temp; |
| 2617 | } |
| 2618 | |
| 2619 | for (int ch = 0; ch < 4; ch++) |
| 2620 | output.m_pixels[px][ch] = static_cast<uint8_t>(pixel[ch]); |
| 2621 | } |
| 2622 | } |
| 2623 | |
| 2624 | cvtt::ParallelMath::SInt16 cvtt::Internal::BC6HComputer::QuantizeSingleEndpointElementSigned(const MSInt16 &elem2CL, int precision, const ParallelMath::RoundUpForScope* ru) |
| 2625 | { |
| 2626 | assert(ParallelMath::AllSet(ParallelMath::Less(elem2CL, ParallelMath::MakeSInt16(31744)))); |
| 2627 | assert(ParallelMath::AllSet(ParallelMath::Less(ParallelMath::MakeSInt16(-31744), elem2CL))); |
| 2628 | |
| 2629 | // Expand to full range |
| 2630 | ParallelMath::Int16CompFlag isNegative = ParallelMath::Less(elem2CL, ParallelMath::MakeSInt16(0)); |
| 2631 | MUInt15 absElem = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Select(isNegative, ParallelMath::MakeSInt16(0) - elem2CL, elem2CL)); |
| 2632 | |
| 2633 | absElem = ParallelMath::RightShift(ParallelMath::RoundAndConvertToU15(ParallelMath::ToFloat(absElem) * 32.0f / 31.0f, ru), 16 - precision); |
| 2634 | |
| 2635 | MSInt16 absElemS16 = ParallelMath::LosslessCast<MSInt16>::Cast(absElem); |
| 2636 | |
| 2637 | return ParallelMath::Select(isNegative, ParallelMath::MakeSInt16(0) - absElemS16, absElemS16); |
| 2638 | } |
| 2639 | |
| 2640 | cvtt::ParallelMath::UInt15 cvtt::Internal::BC6HComputer::QuantizeSingleEndpointElementUnsigned(const MUInt15 &elem, int precision, const ParallelMath::RoundUpForScope* ru) |
| 2641 | { |
| 2642 | MUInt16 expandedElem = ParallelMath::RoundAndConvertToU16(ParallelMath::Min(ParallelMath::ToFloat(elem) * 64.0f / 31.0f, ParallelMath::MakeFloat(65535.0f)), ru); |
| 2643 | return ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(expandedElem, 16 - precision)); |
| 2644 | } |
| 2645 | |
| 2646 | void cvtt::Internal::BC6HComputer::UnquantizeSingleEndpointElementSigned(const MSInt16 &comp, int precision, MSInt16 &outUnquantized, MSInt16 &outUnquantizedFinished2CL) |
| 2647 | { |
| 2648 | MSInt16 zero = ParallelMath::MakeSInt16(0); |
| 2649 | |
| 2650 | ParallelMath::Int16CompFlag negative = ParallelMath::Less(comp, zero); |
| 2651 | MUInt15 absComp = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Select(negative, MSInt16(zero - comp), comp)); |
| 2652 | |
| 2653 | MSInt16 unq; |
| 2654 | MUInt15 absUnq; |
| 2655 | |
| 2656 | if (precision >= 16) |
| 2657 | { |
| 2658 | unq = comp; |
| 2659 | absUnq = absComp; |
| 2660 | } |
| 2661 | else |
| 2662 | { |
| 2663 | MSInt16 maxCompMinusOne = ParallelMath::MakeSInt16(static_cast<int16_t>((1 << (precision - 1)) - 2)); |
| 2664 | ParallelMath::Int16CompFlag isZero = ParallelMath::Equal(comp, zero); |
| 2665 | ParallelMath::Int16CompFlag isMax = ParallelMath::Less(maxCompMinusOne, comp); |
| 2666 | |
| 2667 | absUnq = (absComp << (16 - precision)) + ParallelMath::MakeUInt15(static_cast<uint16_t>(0x4000 >> (precision - 1))); |
| 2668 | ParallelMath::ConditionalSet(absUnq, isZero, ParallelMath::MakeUInt15(0)); |
| 2669 | ParallelMath::ConditionalSet(absUnq, isMax, ParallelMath::MakeUInt15(0x7fff)); |
| 2670 | |
| 2671 | unq = ParallelMath::ConditionalNegate(negative, ParallelMath::LosslessCast<MSInt16>::Cast(absUnq)); |
| 2672 | } |
| 2673 | |
| 2674 | outUnquantized = unq; |
| 2675 | |
| 2676 | MUInt15 funq = ParallelMath::ToUInt15(ParallelMath::RightShift(ParallelMath::XMultiply(absUnq, ParallelMath::MakeUInt15(31)), 5)); |
| 2677 | |
| 2678 | outUnquantizedFinished2CL = ParallelMath::ConditionalNegate(negative, ParallelMath::LosslessCast<MSInt16>::Cast(funq)); |
| 2679 | } |
| 2680 | |
| 2681 | void cvtt::Internal::BC6HComputer::UnquantizeSingleEndpointElementUnsigned(const MUInt15 &comp, int precision, MUInt16 &outUnquantized, MUInt16 &outUnquantizedFinished) |
| 2682 | { |
| 2683 | MUInt16 unq = ParallelMath::LosslessCast<MUInt16>::Cast(comp); |
| 2684 | if (precision < 15) |
| 2685 | { |
| 2686 | MUInt15 zero = ParallelMath::MakeUInt15(0); |
| 2687 | MUInt15 maxCompMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << precision) - 2)); |
| 2688 | |
| 2689 | ParallelMath::Int16CompFlag isZero = ParallelMath::Equal(comp, zero); |
| 2690 | ParallelMath::Int16CompFlag isMax = ParallelMath::Less(maxCompMinusOne, comp); |
| 2691 | |
| 2692 | unq = (ParallelMath::LosslessCast<MUInt16>::Cast(comp) << (16 - precision)) + ParallelMath::MakeUInt16(static_cast<uint16_t>(0x8000 >> precision)); |
| 2693 | |
| 2694 | ParallelMath::ConditionalSet(unq, isZero, ParallelMath::MakeUInt16(0)); |
| 2695 | ParallelMath::ConditionalSet(unq, isMax, ParallelMath::MakeUInt16(0xffff)); |
| 2696 | } |
| 2697 | |
| 2698 | outUnquantized = unq; |
| 2699 | outUnquantizedFinished = ParallelMath::ToUInt16(ParallelMath::RightShift(ParallelMath::XMultiply(unq, ParallelMath::MakeUInt15(31)), 6)); |
| 2700 | } |
| 2701 | |
| 2702 | void cvtt::Internal::BC6HComputer::QuantizeEndpointsSigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn) |
| 2703 | { |
| 2704 | MSInt16 unquantizedEP[2][3]; |
| 2705 | MSInt16 finishedUnquantizedEP[2][3]; |
| 2706 | |
| 2707 | { |
| 2708 | ParallelMath::RoundUpForScope ru; |
| 2709 | |
| 2710 | for (int epi = 0; epi < 2; epi++) |
| 2711 | { |
| 2712 | for (int ch = 0; ch < 3; ch++) |
| 2713 | { |
| 2714 | MSInt16 qee = QuantizeSingleEndpointElementSigned(endPoints[epi][ch], precision, &ru); |
| 2715 | UnquantizeSingleEndpointElementSigned(qee, precision, unquantizedEP[epi][ch], finishedUnquantizedEP[epi][ch]); |
| 2716 | quantizedEndPoints[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(qee); |
| 2717 | } |
| 2718 | } |
| 2719 | } |
| 2720 | |
| 2721 | indexSelector.Init(channelWeights, unquantizedEP, finishedUnquantizedEP, indexRange); |
| 2722 | indexSelector.InitHDR(indexRange, true, fastIndexing, channelWeights); |
| 2723 | |
| 2724 | MUInt15 halfRangeMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange / 2) - 1); |
| 2725 | |
| 2726 | MUInt15 index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixelsColorSpace[fixupIndex], rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[fixupIndex], rtn); |
| 2727 | |
| 2728 | ParallelMath::Int16CompFlag invert = ParallelMath::Less(halfRangeMinusOne, index); |
| 2729 | |
| 2730 | if (ParallelMath::AnySet(invert)) |
| 2731 | { |
| 2732 | ParallelMath::ConditionalSet(index, invert, MUInt15(ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange - 1)) - index)); |
| 2733 | |
| 2734 | indexSelector.ConditionalInvert(invert); |
| 2735 | |
| 2736 | for (int ch = 0; ch < 3; ch++) |
| 2737 | { |
| 2738 | MAInt16 firstEP = quantizedEndPoints[0][ch]; |
| 2739 | MAInt16 secondEP = quantizedEndPoints[1][ch]; |
| 2740 | |
| 2741 | quantizedEndPoints[0][ch] = ParallelMath::Select(invert, secondEP, firstEP); |
| 2742 | quantizedEndPoints[1][ch] = ParallelMath::Select(invert, firstEP, secondEP); |
| 2743 | } |
| 2744 | } |
| 2745 | |
| 2746 | indexes[fixupIndex] = index; |
| 2747 | } |
| 2748 | |
| 2749 | void cvtt::Internal::BC6HComputer::QuantizeEndpointsUnsigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn) |
| 2750 | { |
| 2751 | MUInt16 unquantizedEP[2][3]; |
| 2752 | MUInt16 finishedUnquantizedEP[2][3]; |
| 2753 | |
| 2754 | { |
| 2755 | ParallelMath::RoundUpForScope ru; |
| 2756 | |
| 2757 | for (int epi = 0; epi < 2; epi++) |
| 2758 | { |
| 2759 | for (int ch = 0; ch < 3; ch++) |
| 2760 | { |
| 2761 | MUInt15 qee = QuantizeSingleEndpointElementUnsigned(ParallelMath::LosslessCast<MUInt15>::Cast(endPoints[epi][ch]), precision, &ru); |
| 2762 | UnquantizeSingleEndpointElementUnsigned(qee, precision, unquantizedEP[epi][ch], finishedUnquantizedEP[epi][ch]); |
| 2763 | quantizedEndPoints[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(qee); |
| 2764 | } |
| 2765 | } |
| 2766 | } |
| 2767 | |
| 2768 | indexSelector.Init(channelWeights, unquantizedEP, finishedUnquantizedEP, indexRange); |
| 2769 | indexSelector.InitHDR(indexRange, false, fastIndexing, channelWeights); |
| 2770 | |
| 2771 | MUInt15 halfRangeMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange / 2) - 1); |
| 2772 | |
| 2773 | MUInt15 index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixelsColorSpace[fixupIndex], rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[fixupIndex], rtn); |
| 2774 | |
| 2775 | ParallelMath::Int16CompFlag invert = ParallelMath::Less(halfRangeMinusOne, index); |
| 2776 | |
| 2777 | if (ParallelMath::AnySet(invert)) |
| 2778 | { |
| 2779 | ParallelMath::ConditionalSet(index, invert, MUInt15(ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange - 1)) - index)); |
| 2780 | |
| 2781 | indexSelector.ConditionalInvert(invert); |
| 2782 | |
| 2783 | for (int ch = 0; ch < 3; ch++) |
| 2784 | { |
| 2785 | MAInt16 firstEP = quantizedEndPoints[0][ch]; |
| 2786 | MAInt16 secondEP = quantizedEndPoints[1][ch]; |
| 2787 | |
| 2788 | quantizedEndPoints[0][ch] = ParallelMath::Select(invert, secondEP, firstEP); |
| 2789 | quantizedEndPoints[1][ch] = ParallelMath::Select(invert, firstEP, secondEP); |
| 2790 | } |
| 2791 | } |
| 2792 | |
| 2793 | indexes[fixupIndex] = index; |
| 2794 | } |
| 2795 | |
| 2796 | void cvtt::Internal::BC6HComputer::EvaluatePartitionedLegality(const MAInt16 ep0[2][3], const MAInt16 ep1[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][2][3], ParallelMath::Int16CompFlag& outIsLegal) |
| 2797 | { |
| 2798 | ParallelMath::Int16CompFlag allLegal = ParallelMath::MakeBoolInt16(true); |
| 2799 | |
| 2800 | MAInt16 aSignificantMask = ParallelMath::MakeAInt16(static_cast<int16_t>((1 << aPrec) - 1)); |
| 2801 | |
| 2802 | for (int ch = 0; ch < 3; ch++) |
| 2803 | { |
| 2804 | outEncodedEPs[0][0][ch] = ep0[0][ch]; |
| 2805 | outEncodedEPs[0][1][ch] = ep0[1][ch]; |
| 2806 | outEncodedEPs[1][0][ch] = ep1[0][ch]; |
| 2807 | outEncodedEPs[1][1][ch] = ep1[1][ch]; |
| 2808 | |
| 2809 | if (isTransformed) |
| 2810 | { |
| 2811 | for (int subset = 0; subset < 2; subset++) |
| 2812 | { |
| 2813 | for (int epi = 0; epi < 2; epi++) |
| 2814 | { |
| 2815 | if (epi == 0 && subset == 0) |
| 2816 | continue; |
| 2817 | |
| 2818 | MAInt16 bReduced = (outEncodedEPs[subset][epi][ch] & aSignificantMask); |
| 2819 | |
| 2820 | MSInt16 delta = ParallelMath::TruncateToPrecisionSigned(ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::AbstractSubtract(outEncodedEPs[subset][epi][ch], outEncodedEPs[0][0][ch])), bPrec[ch]); |
| 2821 | |
| 2822 | outEncodedEPs[subset][epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(delta); |
| 2823 | |
| 2824 | MAInt16 reconstructed = (ParallelMath::AbstractAdd(outEncodedEPs[subset][epi][ch], outEncodedEPs[0][0][ch]) & aSignificantMask); |
| 2825 | allLegal = allLegal & ParallelMath::Equal(reconstructed, bReduced); |
| 2826 | } |
| 2827 | } |
| 2828 | } |
| 2829 | |
| 2830 | if (!ParallelMath::AnySet(allLegal)) |
| 2831 | break; |
| 2832 | } |
| 2833 | |
| 2834 | outIsLegal = allLegal; |
| 2835 | } |
| 2836 | |
| 2837 | void cvtt::Internal::BC6HComputer::EvaluateSingleLegality(const MAInt16 ep[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][3], ParallelMath::Int16CompFlag& outIsLegal) |
| 2838 | { |
| 2839 | ParallelMath::Int16CompFlag allLegal = ParallelMath::MakeBoolInt16(true); |
| 2840 | |
| 2841 | MAInt16 aSignificantMask = ParallelMath::MakeAInt16(static_cast<int16_t>((1 << aPrec) - 1)); |
| 2842 | |
| 2843 | for (int ch = 0; ch < 3; ch++) |
| 2844 | { |
| 2845 | outEncodedEPs[0][ch] = ep[0][ch]; |
| 2846 | outEncodedEPs[1][ch] = ep[1][ch]; |
| 2847 | |
| 2848 | if (isTransformed) |
| 2849 | { |
| 2850 | MAInt16 bReduced = (outEncodedEPs[1][ch] & aSignificantMask); |
| 2851 | |
| 2852 | MSInt16 delta = ParallelMath::TruncateToPrecisionSigned(ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::AbstractSubtract(outEncodedEPs[1][ch], outEncodedEPs[0][ch])), bPrec[ch]); |
| 2853 | |
| 2854 | outEncodedEPs[1][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(delta); |
| 2855 | |
| 2856 | MAInt16 reconstructed = (ParallelMath::AbstractAdd(outEncodedEPs[1][ch], outEncodedEPs[0][ch]) & aSignificantMask); |
| 2857 | allLegal = allLegal & ParallelMath::Equal(reconstructed, bReduced); |
| 2858 | } |
| 2859 | } |
| 2860 | |
| 2861 | outIsLegal = allLegal; |
| 2862 | } |
| 2863 | |
| 2864 | void cvtt::Internal::BC6HComputer::Pack(uint32_t flags, const PixelBlockF16* inputs, uint8_t* packedBlocks, const float channelWeights[4], bool isSigned, int numTweakRounds, int numRefineRounds) |
| 2865 | { |
| 2866 | if (numTweakRounds < 1) |
| 2867 | numTweakRounds = 1; |
| 2868 | else if (numTweakRounds > MaxTweakRounds) |
| 2869 | numTweakRounds = MaxTweakRounds; |
| 2870 | |
| 2871 | if (numRefineRounds < 1) |
| 2872 | numRefineRounds = 1; |
| 2873 | else if (numRefineRounds > MaxRefineRounds) |
| 2874 | numRefineRounds = MaxRefineRounds; |
| 2875 | |
| 2876 | bool fastIndexing = ((flags & cvtt::Flags::BC6H_FastIndexing) != 0); |
| 2877 | float channelWeightsSq[3]; |
| 2878 | |
| 2879 | ParallelMath::RoundTowardNearestForScope rtn; |
| 2880 | |
| 2881 | MSInt16 pixels[16][3]; |
| 2882 | MFloat floatPixels2CL[16][3]; |
| 2883 | MFloat floatPixelsLinearWeighted[16][3]; |
| 2884 | |
| 2885 | MSInt16 low15Bits = ParallelMath::MakeSInt16(32767); |
| 2886 | |
| 2887 | for (int ch = 0; ch < 3; ch++) |
| 2888 | channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch]; |
| 2889 | |
| 2890 | for (int px = 0; px < 16; px++) |
| 2891 | { |
| 2892 | for (int ch = 0; ch < 3; ch++) |
| 2893 | { |
| 2894 | MSInt16 pixelValue; |
| 2895 | ParallelMath::ConvertHDRInputs(inputs, px, ch, pixelValue); |
| 2896 | |
| 2897 | // Convert from sign+magnitude to 2CL |
| 2898 | if (isSigned) |
| 2899 | { |
| 2900 | ParallelMath::Int16CompFlag negative = ParallelMath::Less(pixelValue, ParallelMath::MakeSInt16(0)); |
| 2901 | MSInt16 magnitude = (pixelValue & low15Bits); |
| 2902 | ParallelMath::ConditionalSet(pixelValue, negative, ParallelMath::MakeSInt16(0) - magnitude); |
| 2903 | pixelValue = ParallelMath::Max(pixelValue, ParallelMath::MakeSInt16(-31743)); |
| 2904 | } |
| 2905 | else |
| 2906 | pixelValue = ParallelMath::Max(pixelValue, ParallelMath::MakeSInt16(0)); |
| 2907 | |
| 2908 | pixelValue = ParallelMath::Min(pixelValue, ParallelMath::MakeSInt16(31743)); |
| 2909 | |
| 2910 | pixels[px][ch] = pixelValue; |
| 2911 | floatPixels2CL[px][ch] = ParallelMath::ToFloat(pixelValue); |
| 2912 | floatPixelsLinearWeighted[px][ch] = ParallelMath::TwosCLHalfToFloat(pixelValue) * channelWeights[ch]; |
| 2913 | } |
| 2914 | } |
| 2915 | |
| 2916 | MFloat preWeightedPixels[16][3]; |
| 2917 | |
| 2918 | BCCommon::PreWeightPixelsHDR<3>(preWeightedPixels, pixels, channelWeights); |
| 2919 | |
| 2920 | MAInt16 bestEndPoints[2][2][3]; |
| 2921 | MUInt15 bestIndexes[16]; |
| 2922 | MFloat bestError = ParallelMath::MakeFloat(FLT_MAX); |
| 2923 | MUInt15 bestMode = ParallelMath::MakeUInt15(0); |
| 2924 | MUInt15 bestPartition = ParallelMath::MakeUInt15(0); |
| 2925 | |
| 2926 | for (int px = 0; px < 16; px++) |
| 2927 | bestIndexes[px] = ParallelMath::MakeUInt15(0); |
| 2928 | |
| 2929 | for (int subset = 0; subset < 2; subset++) |
| 2930 | for (int epi = 0; epi < 2; epi++) |
| 2931 | for (int ch = 0; ch < 3; ch++) |
| 2932 | bestEndPoints[subset][epi][ch] = ParallelMath::MakeAInt16(0); |
| 2933 | |
| 2934 | UnfinishedEndpoints<3> partitionedUFEP[32][2]; |
| 2935 | UnfinishedEndpoints<3> singleUFEP; |
| 2936 | |
| 2937 | // Generate UFEP for partitions |
| 2938 | for (int p = 0; p < 32; p++) |
| 2939 | { |
| 2940 | int partitionMask = BC7Data::g_partitionMap[p]; |
| 2941 | |
| 2942 | EndpointSelector<3, 8> epSelectors[2]; |
| 2943 | |
| 2944 | for (int pass = 0; pass < NumEndpointSelectorPasses; pass++) |
| 2945 | { |
| 2946 | for (int px = 0; px < 16; px++) |
| 2947 | { |
| 2948 | int subset = (partitionMask >> px) & 1; |
| 2949 | epSelectors[subset].ContributePass(preWeightedPixels[px], pass, ParallelMath::MakeFloat(1.0f)); |
| 2950 | } |
| 2951 | |
| 2952 | for (int subset = 0; subset < 2; subset++) |
| 2953 | epSelectors[subset].FinishPass(pass); |
| 2954 | } |
| 2955 | |
| 2956 | for (int subset = 0; subset < 2; subset++) |
| 2957 | partitionedUFEP[p][subset] = epSelectors[subset].GetEndpoints(channelWeights); |
| 2958 | } |
| 2959 | |
| 2960 | // Generate UFEP for single |
| 2961 | { |
| 2962 | EndpointSelector<3, 8> epSelector; |
| 2963 | |
| 2964 | for (int pass = 0; pass < NumEndpointSelectorPasses; pass++) |
| 2965 | { |
| 2966 | for (int px = 0; px < 16; px++) |
| 2967 | epSelector.ContributePass(preWeightedPixels[px], pass, ParallelMath::MakeFloat(1.0f)); |
| 2968 | |
| 2969 | epSelector.FinishPass(pass); |
| 2970 | } |
| 2971 | |
| 2972 | singleUFEP = epSelector.GetEndpoints(channelWeights); |
| 2973 | } |
| 2974 | |
| 2975 | for (int partitionedInt = 0; partitionedInt < 2; partitionedInt++) |
| 2976 | { |
| 2977 | bool partitioned = (partitionedInt == 1); |
| 2978 | |
| 2979 | for (int aPrec = BC7Data::g_maxHDRPrecision; aPrec >= 0; aPrec--) |
| 2980 | { |
| 2981 | if (!BC7Data::g_hdrModesExistForPrecision[partitionedInt][aPrec]) |
| 2982 | continue; |
| 2983 | |
| 2984 | int numPartitions = partitioned ? 32 : 1; |
| 2985 | int numSubsets = partitioned ? 2 : 1; |
| 2986 | int indexBits = partitioned ? 3 : 4; |
| 2987 | int indexRange = (1 << indexBits); |
| 2988 | |
| 2989 | for (int p = 0; p < numPartitions; p++) |
| 2990 | { |
| 2991 | int partitionMask = partitioned ? BC7Data::g_partitionMap[p] : 0; |
| 2992 | |
| 2993 | const int MaxMetaRounds = MaxTweakRounds * MaxRefineRounds; |
| 2994 | |
| 2995 | MAInt16 metaEndPointsQuantized[MaxMetaRounds][2][2][3]; |
| 2996 | MUInt15 metaIndexes[MaxMetaRounds][16]; |
| 2997 | MFloat metaError[MaxMetaRounds][2]; |
| 2998 | |
| 2999 | bool roundValid[MaxMetaRounds][2]; |
| 3000 | |
| 3001 | for (int r = 0; r < MaxMetaRounds; r++) |
| 3002 | for (int subset = 0; subset < 2; subset++) |
| 3003 | roundValid[r][subset] = true; |
| 3004 | |
| 3005 | for (int subset = 0; subset < numSubsets; subset++) |
| 3006 | { |
| 3007 | for (int tweak = 0; tweak < MaxTweakRounds; tweak++) |
| 3008 | { |
| 3009 | EndpointRefiner<3> refiners[2]; |
| 3010 | |
| 3011 | bool abortRemainingRefines = false; |
| 3012 | for (int refinePass = 0; refinePass < MaxRefineRounds; refinePass++) |
| 3013 | { |
| 3014 | int metaRound = tweak * MaxRefineRounds + refinePass; |
| 3015 | |
| 3016 | if (tweak >= numTweakRounds || refinePass >= numRefineRounds) |
| 3017 | abortRemainingRefines = true; |
| 3018 | |
| 3019 | if (abortRemainingRefines) |
| 3020 | { |
| 3021 | roundValid[metaRound][subset] = false; |
| 3022 | continue; |
| 3023 | } |
| 3024 | |
| 3025 | MAInt16(&mrQuantizedEndPoints)[2][2][3] = metaEndPointsQuantized[metaRound]; |
| 3026 | MUInt15(&mrIndexes)[16] = metaIndexes[metaRound]; |
| 3027 | |
| 3028 | MSInt16 endPointsColorSpace[2][3]; |
| 3029 | |
| 3030 | if (refinePass == 0) |
| 3031 | { |
| 3032 | UnfinishedEndpoints<3> ufep = partitioned ? partitionedUFEP[p][subset] : singleUFEP; |
| 3033 | |
| 3034 | if (isSigned) |
| 3035 | ufep.FinishHDRSigned(tweak, indexRange, endPointsColorSpace[0], endPointsColorSpace[1], &rtn); |
| 3036 | else |
| 3037 | ufep.FinishHDRUnsigned(tweak, indexRange, endPointsColorSpace[0], endPointsColorSpace[1], &rtn); |
| 3038 | } |
| 3039 | else |
| 3040 | refiners[subset].GetRefinedEndpointsHDR(endPointsColorSpace, isSigned, &rtn); |
| 3041 | |
| 3042 | refiners[subset].Init(indexRange, channelWeights); |
| 3043 | |
| 3044 | int fixupIndex = (subset == 0) ? 0 : BC7Data::g_fixupIndexes2[p]; |
| 3045 | |
| 3046 | IndexSelectorHDR<3> indexSelector; |
| 3047 | if (isSigned) |
| 3048 | QuantizeEndpointsSigned(endPointsColorSpace, floatPixels2CL, floatPixelsLinearWeighted, mrQuantizedEndPoints[subset], mrIndexes, indexSelector, fixupIndex, aPrec, indexRange, channelWeights, fastIndexing, &rtn); |
| 3049 | else |
| 3050 | QuantizeEndpointsUnsigned(endPointsColorSpace, floatPixels2CL, floatPixelsLinearWeighted, mrQuantizedEndPoints[subset], mrIndexes, indexSelector, fixupIndex, aPrec, indexRange, channelWeights, fastIndexing, &rtn); |
| 3051 | |
| 3052 | if (metaRound > 0) |
| 3053 | { |
| 3054 | ParallelMath::Int16CompFlag anySame = ParallelMath::MakeBoolInt16(false); |
| 3055 | |
| 3056 | for (int prevRound = 0; prevRound < metaRound; prevRound++) |
| 3057 | { |
| 3058 | MAInt16(&prevRoundEPs)[2][3] = metaEndPointsQuantized[prevRound][subset]; |
| 3059 | |
| 3060 | ParallelMath::Int16CompFlag same = ParallelMath::MakeBoolInt16(true); |
| 3061 | |
| 3062 | for (int epi = 0; epi < 2; epi++) |
| 3063 | for (int ch = 0; ch < 3; ch++) |
| 3064 | same = (same & ParallelMath::Equal(prevRoundEPs[epi][ch], mrQuantizedEndPoints[subset][epi][ch])); |
| 3065 | |
| 3066 | anySame = (anySame | same); |
| 3067 | if (ParallelMath::AllSet(anySame)) |
| 3068 | break; |
| 3069 | } |
| 3070 | |
| 3071 | if (ParallelMath::AllSet(anySame)) |
| 3072 | { |
| 3073 | roundValid[metaRound][subset] = false; |
| 3074 | continue; |
| 3075 | } |
| 3076 | } |
| 3077 | |
| 3078 | MFloat subsetError = ParallelMath::MakeFloatZero(); |
| 3079 | |
| 3080 | { |
| 3081 | for (int px = 0; px < 16; px++) |
| 3082 | { |
| 3083 | if (subset != ((partitionMask >> px) & 1)) |
| 3084 | continue; |
| 3085 | |
| 3086 | MUInt15 index; |
| 3087 | if (px == fixupIndex) |
| 3088 | index = mrIndexes[px]; |
| 3089 | else |
| 3090 | { |
| 3091 | index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixels2CL[px], &rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[px], &rtn); |
| 3092 | mrIndexes[px] = index; |
| 3093 | } |
| 3094 | |
| 3095 | MSInt16 reconstructed[3]; |
| 3096 | if (isSigned) |
| 3097 | indexSelector.ReconstructHDRSigned(mrIndexes[px], reconstructed); |
| 3098 | else |
| 3099 | indexSelector.ReconstructHDRUnsigned(mrIndexes[px], reconstructed); |
| 3100 | |
| 3101 | subsetError = subsetError + (fastIndexing ? BCCommon::ComputeErrorHDRFast<3>(flags, reconstructed, pixels[px], channelWeightsSq) : BCCommon::ComputeErrorHDRSlow<3>(flags, reconstructed, pixels[px], channelWeightsSq)); |
| 3102 | |
| 3103 | if (refinePass != numRefineRounds - 1) |
| 3104 | refiners[subset].ContributeUnweightedPW(preWeightedPixels[px], index); |
| 3105 | } |
| 3106 | } |
| 3107 | |
| 3108 | metaError[metaRound][subset] = subsetError; |
| 3109 | } |
| 3110 | } |
| 3111 | } |
| 3112 | |
| 3113 | // Now we have a bunch of attempts, but not all of them will fit in the delta coding scheme |
| 3114 | int numMeta1 = partitioned ? MaxMetaRounds : 1; |
| 3115 | for (int meta0 = 0; meta0 < MaxMetaRounds; meta0++) |
| 3116 | { |
| 3117 | if (!roundValid[meta0][0]) |
| 3118 | continue; |
| 3119 | |
| 3120 | for (int meta1 = 0; meta1 < numMeta1; meta1++) |
| 3121 | { |
| 3122 | MFloat combinedError = metaError[meta0][0]; |
| 3123 | if (partitioned) |
| 3124 | { |
| 3125 | if (!roundValid[meta1][1]) |
| 3126 | continue; |
| 3127 | |
| 3128 | combinedError = combinedError + metaError[meta1][1]; |
| 3129 | } |
| 3130 | |
| 3131 | ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(combinedError, bestError); |
| 3132 | if (!ParallelMath::AnySet(errorBetter)) |
| 3133 | continue; |
| 3134 | |
| 3135 | ParallelMath::Int16CompFlag needsCommit = ParallelMath::FloatFlagToInt16(errorBetter); |
| 3136 | |
| 3137 | // Figure out if this is encodable |
| 3138 | for (int mode = 0; mode < BC7Data::g_numHDRModes; mode++) |
| 3139 | { |
| 3140 | const BC7Data::BC6HModeInfo &modeInfo = BC7Data::g_hdrModes[mode]; |
| 3141 | |
| 3142 | if (modeInfo.m_partitioned != partitioned || modeInfo.m_aPrec != aPrec) |
| 3143 | continue; |
| 3144 | |
| 3145 | MAInt16 encodedEPs[2][2][3]; |
| 3146 | ParallelMath::Int16CompFlag isLegal; |
| 3147 | if (partitioned) |
| 3148 | EvaluatePartitionedLegality(metaEndPointsQuantized[meta0][0], metaEndPointsQuantized[meta1][1], modeInfo.m_aPrec, modeInfo.m_bPrec, modeInfo.m_transformed, encodedEPs, isLegal); |
| 3149 | else |
| 3150 | EvaluateSingleLegality(metaEndPointsQuantized[meta0][0], modeInfo.m_aPrec, modeInfo.m_bPrec, modeInfo.m_transformed, encodedEPs[0], isLegal); |
| 3151 | |
| 3152 | ParallelMath::Int16CompFlag isLegalAndBetter = (ParallelMath::FloatFlagToInt16(errorBetter) & isLegal); |
| 3153 | if (!ParallelMath::AnySet(isLegalAndBetter)) |
| 3154 | continue; |
| 3155 | |
| 3156 | ParallelMath::FloatCompFlag isLegalAndBetterFloat = ParallelMath::Int16FlagToFloat(isLegalAndBetter); |
| 3157 | |
| 3158 | ParallelMath::ConditionalSet(bestError, isLegalAndBetterFloat, combinedError); |
| 3159 | ParallelMath::ConditionalSet(bestMode, isLegalAndBetter, ParallelMath::MakeUInt15(static_cast<uint16_t>(mode))); |
| 3160 | ParallelMath::ConditionalSet(bestPartition, isLegalAndBetter, ParallelMath::MakeUInt15(static_cast<uint16_t>(p))); |
| 3161 | |
| 3162 | for (int subset = 0; subset < numSubsets; subset++) |
| 3163 | { |
| 3164 | for (int epi = 0; epi < 2; epi++) |
| 3165 | { |
| 3166 | for (int ch = 0; ch < 3; ch++) |
| 3167 | ParallelMath::ConditionalSet(bestEndPoints[subset][epi][ch], isLegalAndBetter, encodedEPs[subset][epi][ch]); |
| 3168 | } |
| 3169 | } |
| 3170 | |
| 3171 | for (int px = 0; px < 16; px++) |
| 3172 | { |
| 3173 | int subset = ((partitionMask >> px) & 1); |
| 3174 | if (subset == 0) |
| 3175 | ParallelMath::ConditionalSet(bestIndexes[px], isLegalAndBetter, metaIndexes[meta0][px]); |
| 3176 | else |
| 3177 | ParallelMath::ConditionalSet(bestIndexes[px], isLegalAndBetter, metaIndexes[meta1][px]); |
| 3178 | } |
| 3179 | |
| 3180 | needsCommit = ParallelMath::AndNot(needsCommit, isLegalAndBetter); |
| 3181 | if (!ParallelMath::AnySet(needsCommit)) |
| 3182 | break; |
| 3183 | } |
| 3184 | } |
| 3185 | } |
| 3186 | } |
| 3187 | } |
| 3188 | } |
| 3189 | |
| 3190 | // At this point, everything should be set |
| 3191 | for (int block = 0; block < ParallelMath::ParallelSize; block++) |
| 3192 | { |
| 3193 | ParallelMath::ScalarUInt16 mode = ParallelMath::Extract(bestMode, block); |
| 3194 | ParallelMath::ScalarUInt16 partition = ParallelMath::Extract(bestPartition, block); |
| 3195 | int32_t eps[2][2][3]; |
| 3196 | ParallelMath::ScalarUInt16 indexes[16]; |
| 3197 | |
| 3198 | const BC7Data::BC6HModeInfo& modeInfo = BC7Data::g_hdrModes[mode]; |
| 3199 | |
| 3200 | const BC6HData::ModeDescriptor *desc = BC6HData::g_modeDescriptors[mode]; |
| 3201 | |
| 3202 | const size_t = modeInfo.m_partitioned ? 82 : 65; |
| 3203 | |
| 3204 | for (int subset = 0; subset < 2; subset++) |
| 3205 | { |
| 3206 | for (int epi = 0; epi < 2; epi++) |
| 3207 | { |
| 3208 | for (int ch = 0; ch < 3; ch++) |
| 3209 | eps[subset][epi][ch] = ParallelMath::Extract(bestEndPoints[subset][epi][ch], block); |
| 3210 | } |
| 3211 | } |
| 3212 | |
| 3213 | for (int px = 0; px < 16; px++) |
| 3214 | indexes[px] = ParallelMath::Extract(bestIndexes[px], block); |
| 3215 | |
| 3216 | uint16_t modeID = modeInfo.m_modeID; |
| 3217 | |
| 3218 | PackingVector pv; |
| 3219 | pv.Init(); |
| 3220 | |
| 3221 | for (size_t i = 0; i < headerBits; i++) { |
| 3222 | int32_t codedValue = 0; |
| 3223 | switch (desc[i].m_eField) { |
| 3224 | case BC6HData::M: |
| 3225 | codedValue = modeID; |
| 3226 | break; |
| 3227 | case BC6HData::D: |
| 3228 | codedValue = partition; |
| 3229 | break; |
| 3230 | case BC6HData::RW: |
| 3231 | codedValue = eps[0][0][0]; |
| 3232 | break; |
| 3233 | case BC6HData::RX: |
| 3234 | codedValue = eps[0][1][0]; |
| 3235 | break; |
| 3236 | case BC6HData::RY: |
| 3237 | codedValue = eps[1][0][0]; |
| 3238 | break; |
| 3239 | case BC6HData::RZ: |
| 3240 | codedValue = eps[1][1][0]; |
| 3241 | break; |
| 3242 | case BC6HData::GW: |
| 3243 | codedValue = eps[0][0][1]; |
| 3244 | break; |
| 3245 | case BC6HData::GX: |
| 3246 | codedValue = eps[0][1][1]; |
| 3247 | break; |
| 3248 | case BC6HData::GY: |
| 3249 | codedValue = eps[1][0][1]; |
| 3250 | break; |
| 3251 | case BC6HData::GZ: |
| 3252 | codedValue = eps[1][1][1]; |
| 3253 | break; |
| 3254 | case BC6HData::BW: |
| 3255 | codedValue = eps[0][0][2]; |
| 3256 | break; |
| 3257 | case BC6HData::BX: |
| 3258 | codedValue = eps[0][1][2]; |
| 3259 | break; |
| 3260 | case BC6HData::BY: |
| 3261 | codedValue = eps[1][0][2]; |
| 3262 | break; |
| 3263 | case BC6HData::BZ: |
| 3264 | codedValue = eps[1][1][2]; |
| 3265 | break; |
| 3266 | default: |
| 3267 | assert(false); |
| 3268 | break; |
| 3269 | } |
| 3270 | pv.Pack(static_cast<uint16_t>((codedValue >> desc[i].m_uBit) & 1), 1); |
| 3271 | } |
| 3272 | |
| 3273 | int fixupIndex1 = 0; |
| 3274 | int indexBits = 4; |
| 3275 | if (modeInfo.m_partitioned) |
| 3276 | { |
| 3277 | fixupIndex1 = BC7Data::g_fixupIndexes2[partition]; |
| 3278 | indexBits = 3; |
| 3279 | } |
| 3280 | |
| 3281 | for (int px = 0; px < 16; px++) |
| 3282 | { |
| 3283 | ParallelMath::ScalarUInt16 index = ParallelMath::Extract(bestIndexes[px], block); |
| 3284 | if (px == 0 || px == fixupIndex1) |
| 3285 | pv.Pack(index, indexBits - 1); |
| 3286 | else |
| 3287 | pv.Pack(index, indexBits); |
| 3288 | } |
| 3289 | |
| 3290 | pv.Flush(packedBlocks + 16 * block); |
| 3291 | } |
| 3292 | } |
| 3293 | |
| 3294 | void cvtt::Internal::BC6HComputer::SignExtendSingle(int &v, int bits) |
| 3295 | { |
| 3296 | if (v & (1 << (bits - 1))) |
| 3297 | v |= -(1 << bits); |
| 3298 | } |
| 3299 | |
| 3300 | void cvtt::Internal::BC6HComputer::UnpackOne(PixelBlockF16 &output, const uint8_t *pBC, bool isSigned) |
| 3301 | { |
| 3302 | UnpackingVector pv; |
| 3303 | pv.Init(pBC); |
| 3304 | |
| 3305 | int numModeBits = 2; |
| 3306 | int modeBits = pv.Unpack(2); |
| 3307 | if (modeBits != 0 && modeBits != 1) |
| 3308 | { |
| 3309 | modeBits |= pv.Unpack(3) << 2; |
| 3310 | numModeBits += 3; |
| 3311 | } |
| 3312 | |
| 3313 | int mode = -1; |
| 3314 | for (int possibleMode = 0; possibleMode < BC7Data::g_numHDRModes; possibleMode++) |
| 3315 | { |
| 3316 | if (BC7Data::g_hdrModes[possibleMode].m_modeID == modeBits) |
| 3317 | { |
| 3318 | mode = possibleMode; |
| 3319 | break; |
| 3320 | } |
| 3321 | } |
| 3322 | |
| 3323 | if (mode < 0) |
| 3324 | { |
| 3325 | for (int px = 0; px < 16; px++) |
| 3326 | { |
| 3327 | for (int ch = 0; ch < 3; ch++) |
| 3328 | output.m_pixels[px][ch] = 0; |
| 3329 | output.m_pixels[px][3] = 0x3c00; // 1.0 |
| 3330 | } |
| 3331 | return; |
| 3332 | } |
| 3333 | |
| 3334 | const BC7Data::BC6HModeInfo& modeInfo = BC7Data::g_hdrModes[mode]; |
| 3335 | const size_t = modeInfo.m_partitioned ? 82 : 65; |
| 3336 | const BC6HData::ModeDescriptor *desc = BC6HData::g_modeDescriptors[mode]; |
| 3337 | |
| 3338 | int32_t partition = 0; |
| 3339 | int32_t eps[2][2][3]; |
| 3340 | |
| 3341 | for (int subset = 0; subset < 2; subset++) |
| 3342 | for (int epi = 0; epi < 2; epi++) |
| 3343 | for (int ch = 0; ch < 3; ch++) |
| 3344 | eps[subset][epi][ch] = 0; |
| 3345 | |
| 3346 | for (size_t i = numModeBits; i < headerBits; i++) { |
| 3347 | int32_t *pCodedValue = NULL; |
| 3348 | |
| 3349 | switch (desc[i].m_eField) { |
| 3350 | case BC6HData::D: |
| 3351 | pCodedValue = &partition; |
| 3352 | break; |
| 3353 | case BC6HData::RW: |
| 3354 | pCodedValue = &eps[0][0][0]; |
| 3355 | break; |
| 3356 | case BC6HData::RX: |
| 3357 | pCodedValue = &eps[0][1][0]; |
| 3358 | break; |
| 3359 | case BC6HData::RY: |
| 3360 | pCodedValue = &eps[1][0][0]; |
| 3361 | break; |
| 3362 | case BC6HData::RZ: |
| 3363 | pCodedValue = &eps[1][1][0]; |
| 3364 | break; |
| 3365 | case BC6HData::GW: |
| 3366 | pCodedValue = &eps[0][0][1]; |
| 3367 | break; |
| 3368 | case BC6HData::GX: |
| 3369 | pCodedValue = &eps[0][1][1]; |
| 3370 | break; |
| 3371 | case BC6HData::GY: |
| 3372 | pCodedValue = &eps[1][0][1]; |
| 3373 | break; |
| 3374 | case BC6HData::GZ: |
| 3375 | pCodedValue = &eps[1][1][1]; |
| 3376 | break; |
| 3377 | case BC6HData::BW: |
| 3378 | pCodedValue = &eps[0][0][2]; |
| 3379 | break; |
| 3380 | case BC6HData::BX: |
| 3381 | pCodedValue = &eps[0][1][2]; |
| 3382 | break; |
| 3383 | case BC6HData::BY: |
| 3384 | pCodedValue = &eps[1][0][2]; |
| 3385 | break; |
| 3386 | case BC6HData::BZ: |
| 3387 | pCodedValue = &eps[1][1][2]; |
| 3388 | break; |
| 3389 | default: |
| 3390 | assert(false); |
| 3391 | break; |
| 3392 | } |
| 3393 | |
| 3394 | (*pCodedValue) |= pv.Unpack(1) << desc[i].m_uBit; |
| 3395 | } |
| 3396 | |
| 3397 | uint16_t modeID = modeInfo.m_modeID; |
| 3398 | |
| 3399 | int fixupIndex1 = 0; |
| 3400 | int indexBits = 4; |
| 3401 | int numSubsets = 1; |
| 3402 | if (modeInfo.m_partitioned) |
| 3403 | { |
| 3404 | fixupIndex1 = BC7Data::g_fixupIndexes2[partition]; |
| 3405 | indexBits = 3; |
| 3406 | numSubsets = 2; |
| 3407 | } |
| 3408 | |
| 3409 | int indexes[16]; |
| 3410 | for (int px = 0; px < 16; px++) |
| 3411 | { |
| 3412 | if (px == 0 || px == fixupIndex1) |
| 3413 | indexes[px] = pv.Unpack(indexBits - 1); |
| 3414 | else |
| 3415 | indexes[px] = pv.Unpack(indexBits); |
| 3416 | } |
| 3417 | |
| 3418 | if (modeInfo.m_partitioned) |
| 3419 | { |
| 3420 | for (int ch = 0; ch < 3; ch++) |
| 3421 | { |
| 3422 | if (isSigned) |
| 3423 | SignExtendSingle(eps[0][0][ch], modeInfo.m_aPrec); |
| 3424 | if (modeInfo.m_transformed || isSigned) |
| 3425 | { |
| 3426 | SignExtendSingle(eps[0][1][ch], modeInfo.m_bPrec[ch]); |
| 3427 | SignExtendSingle(eps[1][0][ch], modeInfo.m_bPrec[ch]); |
| 3428 | SignExtendSingle(eps[1][1][ch], modeInfo.m_bPrec[ch]); |
| 3429 | } |
| 3430 | } |
| 3431 | } |
| 3432 | else |
| 3433 | { |
| 3434 | for (int ch = 0; ch < 3; ch++) |
| 3435 | { |
| 3436 | if (isSigned) |
| 3437 | SignExtendSingle(eps[0][0][ch], modeInfo.m_aPrec); |
| 3438 | if (modeInfo.m_transformed || isSigned) |
| 3439 | SignExtendSingle(eps[0][1][ch], modeInfo.m_bPrec[ch]); |
| 3440 | } |
| 3441 | } |
| 3442 | |
| 3443 | int aPrec = modeInfo.m_aPrec; |
| 3444 | |
| 3445 | if (modeInfo.m_transformed) |
| 3446 | { |
| 3447 | for (int ch = 0; ch < 3; ch++) |
| 3448 | { |
| 3449 | int wrapMask = (1 << aPrec) - 1; |
| 3450 | |
| 3451 | eps[0][1][ch] = ((eps[0][0][ch] + eps[0][1][ch]) & wrapMask); |
| 3452 | if (isSigned) |
| 3453 | SignExtendSingle(eps[0][1][ch], aPrec); |
| 3454 | |
| 3455 | if (modeInfo.m_partitioned) |
| 3456 | { |
| 3457 | eps[1][0][ch] = ((eps[0][0][ch] + eps[1][0][ch]) & wrapMask); |
| 3458 | eps[1][1][ch] = ((eps[0][0][ch] + eps[1][1][ch]) & wrapMask); |
| 3459 | |
| 3460 | if (isSigned) |
| 3461 | { |
| 3462 | SignExtendSingle(eps[1][0][ch], aPrec); |
| 3463 | SignExtendSingle(eps[1][1][ch], aPrec); |
| 3464 | } |
| 3465 | } |
| 3466 | } |
| 3467 | } |
| 3468 | |
| 3469 | // Unquantize endpoints |
| 3470 | for (int subset = 0; subset < numSubsets; subset++) |
| 3471 | { |
| 3472 | for (int epi = 0; epi < 2; epi++) |
| 3473 | { |
| 3474 | for (int ch = 0; ch < 3; ch++) |
| 3475 | { |
| 3476 | int &v = eps[subset][epi][ch]; |
| 3477 | |
| 3478 | if (isSigned) |
| 3479 | { |
| 3480 | if (aPrec >= 16) |
| 3481 | { |
| 3482 | // Nothing |
| 3483 | } |
| 3484 | else |
| 3485 | { |
| 3486 | bool s = false; |
| 3487 | int comp = v; |
| 3488 | if (v < 0) |
| 3489 | { |
| 3490 | s = true; |
| 3491 | comp = -comp; |
| 3492 | } |
| 3493 | |
| 3494 | int unq = 0; |
| 3495 | if (comp == 0) |
| 3496 | unq = 0; |
| 3497 | else if (comp >= ((1 << (aPrec - 1)) - 1)) |
| 3498 | unq = 0x7fff; |
| 3499 | else |
| 3500 | unq = ((comp << 15) + 0x4000) >> (aPrec - 1); |
| 3501 | |
| 3502 | if (s) |
| 3503 | unq = -unq; |
| 3504 | |
| 3505 | v = unq; |
| 3506 | } |
| 3507 | } |
| 3508 | else |
| 3509 | { |
| 3510 | if (aPrec >= 15) |
| 3511 | { |
| 3512 | // Nothing |
| 3513 | } |
| 3514 | else if (v == 0) |
| 3515 | { |
| 3516 | // Nothing |
| 3517 | } |
| 3518 | else if (v == ((1 << aPrec) - 1)) |
| 3519 | v = 0xffff; |
| 3520 | else |
| 3521 | v = ((v << 16) + 0x8000) >> aPrec; |
| 3522 | } |
| 3523 | } |
| 3524 | } |
| 3525 | } |
| 3526 | |
| 3527 | const int *weights = BC7Data::g_weightTables[indexBits]; |
| 3528 | |
| 3529 | for (int px = 0; px < 16; px++) |
| 3530 | { |
| 3531 | int subset = 0; |
| 3532 | if (modeInfo.m_partitioned) |
| 3533 | subset = (BC7Data::g_partitionMap[partition] >> px) & 1; |
| 3534 | |
| 3535 | int w = weights[indexes[px]]; |
| 3536 | for (int ch = 0; ch < 3; ch++) |
| 3537 | { |
| 3538 | int comp = ((64 - w) * eps[subset][0][ch] + w * eps[subset][1][ch] + 32) >> 6; |
| 3539 | |
| 3540 | if (isSigned) |
| 3541 | { |
| 3542 | if (comp < 0) |
| 3543 | comp = -(((-comp) * 31) >> 5); |
| 3544 | else |
| 3545 | comp = (comp * 31) >> 5; |
| 3546 | |
| 3547 | int s = 0; |
| 3548 | if (comp < 0) |
| 3549 | { |
| 3550 | s = 0x8000; |
| 3551 | comp = -comp; |
| 3552 | } |
| 3553 | |
| 3554 | output.m_pixels[px][ch] = static_cast<uint16_t>(s | comp); |
| 3555 | } |
| 3556 | else |
| 3557 | { |
| 3558 | comp = (comp * 31) >> 6; |
| 3559 | output.m_pixels[px][ch] = static_cast<uint16_t>(comp); |
| 3560 | } |
| 3561 | } |
| 3562 | output.m_pixels[px][3] = 0x3c00; // 1.0 |
| 3563 | } |
| 3564 | } |
| 3565 | |
| 3566 | void cvtt::Kernels::ConfigureBC7EncodingPlanFromQuality(BC7EncodingPlan &encodingPlan, int quality) |
| 3567 | { |
| 3568 | static const int kMaxQuality = 100; |
| 3569 | |
| 3570 | if (quality < 1) |
| 3571 | quality = 1; |
| 3572 | else if (quality > kMaxQuality) |
| 3573 | quality = kMaxQuality; |
| 3574 | |
| 3575 | const int numRGBModes = cvtt::Tables::BC7Prio::g_bc7NumPrioCodesRGB * quality / kMaxQuality; |
| 3576 | const int numRGBAModes = cvtt::Tables::BC7Prio::g_bc7NumPrioCodesRGBA * quality / kMaxQuality; |
| 3577 | |
| 3578 | const uint16_t *prioLists[] = { cvtt::Tables::BC7Prio::g_bc7PrioCodesRGB, cvtt::Tables::BC7Prio::g_bc7PrioCodesRGBA }; |
| 3579 | const int prioListSizes[] = { numRGBModes, numRGBAModes }; |
| 3580 | |
| 3581 | BC7FineTuningParams ftParams; |
| 3582 | memset(&ftParams, 0, sizeof(ftParams)); |
| 3583 | |
| 3584 | for (int listIndex = 0; listIndex < 2; listIndex++) |
| 3585 | { |
| 3586 | int prioListSize = prioListSizes[listIndex]; |
| 3587 | const uint16_t *prioList = prioLists[listIndex]; |
| 3588 | |
| 3589 | for (int prioIndex = 0; prioIndex < prioListSize; prioIndex++) |
| 3590 | { |
| 3591 | const uint16_t packedMode = prioList[prioIndex]; |
| 3592 | |
| 3593 | uint8_t seedPoints = static_cast<uint8_t>(cvtt::Tables::BC7Prio::UnpackSeedPointCount(packedMode)); |
| 3594 | int mode = cvtt::Tables::BC7Prio::UnpackMode(packedMode); |
| 3595 | |
| 3596 | switch (mode) |
| 3597 | { |
| 3598 | case 0: |
| 3599 | ftParams.mode0SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints; |
| 3600 | break; |
| 3601 | case 1: |
| 3602 | ftParams.mode1SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints; |
| 3603 | break; |
| 3604 | case 2: |
| 3605 | ftParams.mode2SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints; |
| 3606 | break; |
| 3607 | case 3: |
| 3608 | ftParams.mode3SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints; |
| 3609 | break; |
| 3610 | case 4: |
| 3611 | ftParams.mode4SP[cvtt::Tables::BC7Prio::UnpackRotation(packedMode)][cvtt::Tables::BC7Prio::UnpackIndexSelector(packedMode)] = seedPoints; |
| 3612 | break; |
| 3613 | case 5: |
| 3614 | ftParams.mode5SP[cvtt::Tables::BC7Prio::UnpackRotation(packedMode)] = seedPoints; |
| 3615 | break; |
| 3616 | case 6: |
| 3617 | ftParams.mode6SP = seedPoints; |
| 3618 | break; |
| 3619 | case 7: |
| 3620 | ftParams.mode7SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints; |
| 3621 | break; |
| 3622 | } |
| 3623 | } |
| 3624 | } |
| 3625 | |
| 3626 | ConfigureBC7EncodingPlanFromFineTuningParams(encodingPlan, ftParams); |
| 3627 | } |
| 3628 | |
| 3629 | // Generates a BC7 encoding plan from fine-tuning parameters. |
| 3630 | bool cvtt::Kernels::ConfigureBC7EncodingPlanFromFineTuningParams(BC7EncodingPlan &encodingPlan, const BC7FineTuningParams ¶ms) |
| 3631 | { |
| 3632 | memset(&encodingPlan, 0, sizeof(encodingPlan)); |
| 3633 | |
| 3634 | // Mode 0 |
| 3635 | for (int partition = 0; partition < 16; partition++) |
| 3636 | { |
| 3637 | uint8_t sp = params.mode0SP[partition]; |
| 3638 | if (sp == 0) |
| 3639 | continue; |
| 3640 | |
| 3641 | encodingPlan.mode0PartitionEnabled |= static_cast<uint16_t>(1) << partition; |
| 3642 | |
| 3643 | for (int subset = 0; subset < 3; subset++) |
| 3644 | { |
| 3645 | int shape = cvtt::Internal::BC7Data::g_shapes3[partition][subset]; |
| 3646 | encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp); |
| 3647 | } |
| 3648 | } |
| 3649 | |
| 3650 | // Mode 1 |
| 3651 | for (int partition = 0; partition < 64; partition++) |
| 3652 | { |
| 3653 | uint8_t sp = params.mode1SP[partition]; |
| 3654 | if (sp == 0) |
| 3655 | continue; |
| 3656 | |
| 3657 | encodingPlan.mode1PartitionEnabled |= static_cast<uint64_t>(1) << partition; |
| 3658 | |
| 3659 | for (int subset = 0; subset < 2; subset++) |
| 3660 | { |
| 3661 | int shape = cvtt::Internal::BC7Data::g_shapes2[partition][subset]; |
| 3662 | encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp); |
| 3663 | } |
| 3664 | } |
| 3665 | |
| 3666 | // Mode 2 |
| 3667 | for (int partition = 0; partition < 64; partition++) |
| 3668 | { |
| 3669 | uint8_t sp = params.mode2SP[partition]; |
| 3670 | if (sp == 0) |
| 3671 | continue; |
| 3672 | |
| 3673 | encodingPlan.mode2PartitionEnabled |= static_cast<uint64_t>(1) << partition; |
| 3674 | |
| 3675 | for (int subset = 0; subset < 3; subset++) |
| 3676 | { |
| 3677 | int shape = cvtt::Internal::BC7Data::g_shapes3[partition][subset]; |
| 3678 | encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp); |
| 3679 | } |
| 3680 | } |
| 3681 | |
| 3682 | // Mode 3 |
| 3683 | for (int partition = 0; partition < 64; partition++) |
| 3684 | { |
| 3685 | uint8_t sp = params.mode3SP[partition]; |
| 3686 | if (sp == 0) |
| 3687 | continue; |
| 3688 | |
| 3689 | encodingPlan.mode3PartitionEnabled |= static_cast<uint64_t>(1) << partition; |
| 3690 | |
| 3691 | for (int subset = 0; subset < 2; subset++) |
| 3692 | { |
| 3693 | int shape = cvtt::Internal::BC7Data::g_shapes2[partition][subset]; |
| 3694 | encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp); |
| 3695 | } |
| 3696 | } |
| 3697 | |
| 3698 | // Mode 4 |
| 3699 | for (int rotation = 0; rotation < 4; rotation++) |
| 3700 | { |
| 3701 | for (int indexMode = 0; indexMode < 2; indexMode++) |
| 3702 | encodingPlan.mode4SP[rotation][indexMode] = params.mode4SP[rotation][indexMode]; |
| 3703 | } |
| 3704 | |
| 3705 | // Mode 5 |
| 3706 | for (int rotation = 0; rotation < 4; rotation++) |
| 3707 | encodingPlan.mode5SP[rotation] = params.mode5SP[rotation]; |
| 3708 | |
| 3709 | // Mode 6 |
| 3710 | { |
| 3711 | uint8_t sp = params.mode6SP; |
| 3712 | if (sp != 0) |
| 3713 | { |
| 3714 | encodingPlan.mode6Enabled = true; |
| 3715 | |
| 3716 | int shape = cvtt::Internal::BC7Data::g_shapes1[0][0]; |
| 3717 | encodingPlan.seedPointsForShapeRGBA[shape] = std::max(encodingPlan.seedPointsForShapeRGBA[shape], sp); |
| 3718 | } |
| 3719 | } |
| 3720 | |
| 3721 | // Mode 7 |
| 3722 | for (int partition = 0; partition < 64; partition++) |
| 3723 | { |
| 3724 | uint8_t sp = params.mode7SP[partition]; |
| 3725 | if (sp == 0) |
| 3726 | continue; |
| 3727 | |
| 3728 | encodingPlan.mode7RGBAPartitionEnabled |= static_cast<uint64_t>(1) << partition; |
| 3729 | |
| 3730 | for (int subset = 0; subset < 2; subset++) |
| 3731 | { |
| 3732 | int shape = cvtt::Internal::BC7Data::g_shapes2[partition][subset]; |
| 3733 | encodingPlan.seedPointsForShapeRGBA[shape] = std::max(encodingPlan.seedPointsForShapeRGBA[shape], sp); |
| 3734 | } |
| 3735 | } |
| 3736 | |
| 3737 | for (int i = 0; i < BC7EncodingPlan::kNumRGBShapes; i++) |
| 3738 | { |
| 3739 | if (encodingPlan.seedPointsForShapeRGB[i] > 0) |
| 3740 | { |
| 3741 | encodingPlan.rgbShapeList[encodingPlan.rgbNumShapesToEvaluate] = i; |
| 3742 | encodingPlan.rgbNumShapesToEvaluate++; |
| 3743 | } |
| 3744 | } |
| 3745 | |
| 3746 | for (int i = 0; i < BC7EncodingPlan::kNumRGBAShapes; i++) |
| 3747 | { |
| 3748 | if (encodingPlan.seedPointsForShapeRGBA[i] > 0) |
| 3749 | { |
| 3750 | encodingPlan.rgbaShapeList[encodingPlan.rgbaNumShapesToEvaluate] = i; |
| 3751 | encodingPlan.rgbaNumShapesToEvaluate++; |
| 3752 | } |
| 3753 | } |
| 3754 | |
| 3755 | encodingPlan.mode7RGBPartitionEnabled = (encodingPlan.mode7RGBAPartitionEnabled & ~encodingPlan.mode3PartitionEnabled); |
| 3756 | |
| 3757 | return true; |
| 3758 | } |
| 3759 | |
| 3760 | #endif |
| 3761 | |