| 1 | // Copyright 2016 The SwiftShader Authors. All Rights Reserved. | 
|---|
| 2 | // | 
|---|
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|---|
| 4 | // you may not use this file except in compliance with the License. | 
|---|
| 5 | // You may obtain a copy of the License at | 
|---|
| 6 | // | 
|---|
| 7 | //    http://www.apache.org/licenses/LICENSE-2.0 | 
|---|
| 8 | // | 
|---|
| 9 | // Unless required by applicable law or agreed to in writing, software | 
|---|
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, | 
|---|
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|---|
| 12 | // See the License for the specific language governing permissions and | 
|---|
| 13 | // limitations under the License. | 
|---|
| 14 |  | 
|---|
| 15 | #include "VertexRoutine.hpp" | 
|---|
| 16 |  | 
|---|
| 17 | #include "Constants.hpp" | 
|---|
| 18 | #include "SpirvShader.hpp" | 
|---|
| 19 | #include "Device/Vertex.hpp" | 
|---|
| 20 | #include "Device/Renderer.hpp" | 
|---|
| 21 | #include "Vulkan/VkDebug.hpp" | 
|---|
| 22 | #include "System/Half.hpp" | 
|---|
| 23 |  | 
|---|
| 24 | namespace sw | 
|---|
| 25 | { | 
|---|
| 26 | VertexRoutine::VertexRoutine( | 
|---|
| 27 | const VertexProcessor::State &state, | 
|---|
| 28 | vk::PipelineLayout const *pipelineLayout, | 
|---|
| 29 | SpirvShader const *spirvShader) | 
|---|
| 30 | : routine(pipelineLayout), | 
|---|
| 31 | state(state), | 
|---|
| 32 | spirvShader(spirvShader) | 
|---|
| 33 | { | 
|---|
| 34 | spirvShader->emitProlog(&routine); | 
|---|
| 35 | } | 
|---|
| 36 |  | 
|---|
| 37 | VertexRoutine::~VertexRoutine() | 
|---|
| 38 | { | 
|---|
| 39 | } | 
|---|
| 40 |  | 
|---|
| 41 | void VertexRoutine::generate() | 
|---|
| 42 | { | 
|---|
| 43 | Pointer<Byte> cache = task + OFFSET(VertexTask,vertexCache); | 
|---|
| 44 | Pointer<Byte> vertexCache = cache + OFFSET(VertexCache,vertex); | 
|---|
| 45 | Pointer<UInt> tagCache = Pointer<UInt>(cache + OFFSET(VertexCache,tag)); | 
|---|
| 46 |  | 
|---|
| 47 | UInt vertexCount = *Pointer<UInt>(task + OFFSET(VertexTask,vertexCount)); | 
|---|
| 48 |  | 
|---|
| 49 | constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants)); | 
|---|
| 50 |  | 
|---|
| 51 | // Check the cache one vertex index at a time. If a hit occurs, copy from the cache to the 'vertex' output buffer. | 
|---|
| 52 | // On a cache miss, process a SIMD width of consecutive indices from the input batch. They're written to the cache | 
|---|
| 53 | // in reverse order to guarantee that the first one doesn't get evicted and can be written out. | 
|---|
| 54 |  | 
|---|
| 55 | Do | 
|---|
| 56 | { | 
|---|
| 57 | UInt index = *batch; | 
|---|
| 58 | UInt cacheIndex = index & VertexCache::TAG_MASK; | 
|---|
| 59 |  | 
|---|
| 60 | If(tagCache[cacheIndex] != index) | 
|---|
| 61 | { | 
|---|
| 62 | readInput(batch); | 
|---|
| 63 | program(batch, vertexCount); | 
|---|
| 64 | computeClipFlags(); | 
|---|
| 65 |  | 
|---|
| 66 | writeCache(vertexCache, tagCache, batch); | 
|---|
| 67 | } | 
|---|
| 68 |  | 
|---|
| 69 | Pointer<Byte> cacheEntry = vertexCache + cacheIndex * UInt((int)sizeof(Vertex)); | 
|---|
| 70 |  | 
|---|
| 71 | // For points, vertexCount is 1 per primitive, so duplicate vertex for all 3 vertices of the primitive | 
|---|
| 72 | for(int i = 0; i < (state.isPoint ? 3 : 1); i++) | 
|---|
| 73 | { | 
|---|
| 74 | writeVertex(vertex, cacheEntry); | 
|---|
| 75 | vertex += sizeof(Vertex); | 
|---|
| 76 | } | 
|---|
| 77 |  | 
|---|
| 78 | batch = Pointer<UInt>(Pointer<Byte>(batch) + sizeof(uint32_t)); | 
|---|
| 79 | vertexCount--; | 
|---|
| 80 | } | 
|---|
| 81 | Until(vertexCount == 0) | 
|---|
| 82 |  | 
|---|
| 83 | Return(); | 
|---|
| 84 | } | 
|---|
| 85 |  | 
|---|
| 86 | void VertexRoutine::readInput(Pointer<UInt> &batch) | 
|---|
| 87 | { | 
|---|
| 88 | for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i += 4) | 
|---|
| 89 | { | 
|---|
| 90 | if(spirvShader->inputs[i + 0].Type != SpirvShader::ATTRIBTYPE_UNUSED || | 
|---|
| 91 | spirvShader->inputs[i + 1].Type != SpirvShader::ATTRIBTYPE_UNUSED || | 
|---|
| 92 | spirvShader->inputs[i + 2].Type != SpirvShader::ATTRIBTYPE_UNUSED || | 
|---|
| 93 | spirvShader->inputs[i + 3].Type != SpirvShader::ATTRIBTYPE_UNUSED) | 
|---|
| 94 | { | 
|---|
| 95 | Pointer<Byte> input = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, input) + sizeof(void*) * (i / 4)); | 
|---|
| 96 | UInt stride = *Pointer<UInt>(data + OFFSET(DrawData, stride) + sizeof(uint32_t) * (i / 4)); | 
|---|
| 97 | Int baseVertex = *Pointer<Int>(data + OFFSET(DrawData, baseVertex)); | 
|---|
| 98 | UInt robustnessSize(0); | 
|---|
| 99 | if(state.robustBufferAccess) | 
|---|
| 100 | { | 
|---|
| 101 | robustnessSize = *Pointer<UInt>(data + OFFSET(DrawData, robustnessSize) + sizeof(uint32_t) * (i / 4)); | 
|---|
| 102 | } | 
|---|
| 103 |  | 
|---|
| 104 | auto value = readStream(input, stride, state.input[i / 4], batch, state.robustBufferAccess, robustnessSize, baseVertex); | 
|---|
| 105 | routine.inputs[i + 0] = value.x; | 
|---|
| 106 | routine.inputs[i + 1] = value.y; | 
|---|
| 107 | routine.inputs[i + 2] = value.z; | 
|---|
| 108 | routine.inputs[i + 3] = value.w; | 
|---|
| 109 | } | 
|---|
| 110 | } | 
|---|
| 111 | } | 
|---|
| 112 |  | 
|---|
| 113 | void VertexRoutine::computeClipFlags() | 
|---|
| 114 | { | 
|---|
| 115 | auto it = spirvShader->outputBuiltins.find(spv::BuiltInPosition); | 
|---|
| 116 | assert(it != spirvShader->outputBuiltins.end()); | 
|---|
| 117 | assert(it->second.SizeInComponents == 4); | 
|---|
| 118 | auto &pos = routine.getVariable(it->second.Id); | 
|---|
| 119 | auto posX = pos[it->second.FirstComponent + 0]; | 
|---|
| 120 | auto posY = pos[it->second.FirstComponent + 1]; | 
|---|
| 121 | auto posZ = pos[it->second.FirstComponent + 2]; | 
|---|
| 122 | auto posW = pos[it->second.FirstComponent + 3]; | 
|---|
| 123 |  | 
|---|
| 124 | Int4 maxX = CmpLT(posW, posX); | 
|---|
| 125 | Int4 maxY = CmpLT(posW, posY); | 
|---|
| 126 | Int4 maxZ = CmpLT(posW, posZ); | 
|---|
| 127 | Int4 minX = CmpNLE(-posW, posX); | 
|---|
| 128 | Int4 minY = CmpNLE(-posW, posY); | 
|---|
| 129 | Int4 minZ = CmpNLE(Float4(0.0f), posZ); | 
|---|
| 130 |  | 
|---|
| 131 | clipFlags =  Pointer<Int>(constants + OFFSET(Constants,maxX))[SignMask(maxX)]; | 
|---|
| 132 | clipFlags |= Pointer<Int>(constants + OFFSET(Constants,maxY))[SignMask(maxY)]; | 
|---|
| 133 | clipFlags |= Pointer<Int>(constants + OFFSET(Constants,maxZ))[SignMask(maxZ)]; | 
|---|
| 134 | clipFlags |= Pointer<Int>(constants + OFFSET(Constants,minX))[SignMask(minX)]; | 
|---|
| 135 | clipFlags |= Pointer<Int>(constants + OFFSET(Constants,minY))[SignMask(minY)]; | 
|---|
| 136 | clipFlags |= Pointer<Int>(constants + OFFSET(Constants,minZ))[SignMask(minZ)]; | 
|---|
| 137 |  | 
|---|
| 138 | Int4 finiteX = CmpLE(Abs(posX), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); | 
|---|
| 139 | Int4 finiteY = CmpLE(Abs(posY), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); | 
|---|
| 140 | Int4 finiteZ = CmpLE(Abs(posZ), *Pointer<Float4>(constants + OFFSET(Constants,maxPos))); | 
|---|
| 141 |  | 
|---|
| 142 | Int4 finiteXYZ = finiteX & finiteY & finiteZ; | 
|---|
| 143 | clipFlags |= Pointer<Int>(constants + OFFSET(Constants,fini))[SignMask(finiteXYZ)]; | 
|---|
| 144 | } | 
|---|
| 145 |  | 
|---|
| 146 | Vector4f VertexRoutine::readStream(Pointer<Byte> &buffer, UInt &stride, const Stream &stream, Pointer<UInt> &batch, | 
|---|
| 147 | bool robustBufferAccess, UInt & robustnessSize, Int baseVertex) | 
|---|
| 148 | { | 
|---|
| 149 | Vector4f v; | 
|---|
| 150 | // Because of the following rule in the Vulkan spec, we do not care if a very large negative | 
|---|
| 151 | // baseVertex would overflow all the way back into a valid region of the index buffer: | 
|---|
| 152 | // "Out-of-bounds buffer loads will return any of the following values : | 
|---|
| 153 | //  - Values from anywhere within the memory range(s) bound to the buffer (possibly including | 
|---|
| 154 | //    bytes of memory past the end of the buffer, up to the end of the bound range)." | 
|---|
| 155 | UInt4 offsets = (*Pointer<UInt4>(As<Pointer<UInt4>>(batch)) + As<UInt4>(Int4(baseVertex))) * UInt4(stride); | 
|---|
| 156 |  | 
|---|
| 157 | Pointer<Byte> source0 = buffer + offsets.x; | 
|---|
| 158 | Pointer<Byte> source1 = buffer + offsets.y; | 
|---|
| 159 | Pointer<Byte> source2 = buffer + offsets.z; | 
|---|
| 160 | Pointer<Byte> source3 = buffer + offsets.w; | 
|---|
| 161 |  | 
|---|
| 162 | UInt4 zero(0); | 
|---|
| 163 | if (robustBufferAccess) | 
|---|
| 164 | { | 
|---|
| 165 | // TODO(b/141124876): Optimize for wide-vector gather operations. | 
|---|
| 166 | UInt4 limits = offsets + UInt4(stream.bytesPerAttrib()); | 
|---|
| 167 | Pointer<Byte> zeroSource = As<Pointer<Byte>>(&zero); | 
|---|
| 168 | source0 = IfThenElse(limits.x <= robustnessSize, source0, zeroSource); | 
|---|
| 169 | source1 = IfThenElse(limits.y <= robustnessSize, source1, zeroSource); | 
|---|
| 170 | source2 = IfThenElse(limits.z <= robustnessSize, source2, zeroSource); | 
|---|
| 171 | source3 = IfThenElse(limits.w <= robustnessSize, source3, zeroSource); | 
|---|
| 172 | } | 
|---|
| 173 |  | 
|---|
| 174 | bool isNativeFloatAttrib = (stream.attribType == SpirvShader::ATTRIBTYPE_FLOAT) || stream.normalized; | 
|---|
| 175 |  | 
|---|
| 176 | switch(stream.type) | 
|---|
| 177 | { | 
|---|
| 178 | case STREAMTYPE_FLOAT: | 
|---|
| 179 | { | 
|---|
| 180 | if(stream.count == 0) | 
|---|
| 181 | { | 
|---|
| 182 | // Null stream, all default components | 
|---|
| 183 | } | 
|---|
| 184 | else | 
|---|
| 185 | { | 
|---|
| 186 | if(stream.count == 1) | 
|---|
| 187 | { | 
|---|
| 188 | v.x.x = *Pointer<Float>(source0); | 
|---|
| 189 | v.x.y = *Pointer<Float>(source1); | 
|---|
| 190 | v.x.z = *Pointer<Float>(source2); | 
|---|
| 191 | v.x.w = *Pointer<Float>(source3); | 
|---|
| 192 | } | 
|---|
| 193 | else | 
|---|
| 194 | { | 
|---|
| 195 | v.x = *Pointer<Float4>(source0); | 
|---|
| 196 | v.y = *Pointer<Float4>(source1); | 
|---|
| 197 | v.z = *Pointer<Float4>(source2); | 
|---|
| 198 | v.w = *Pointer<Float4>(source3); | 
|---|
| 199 |  | 
|---|
| 200 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); | 
|---|
| 201 | } | 
|---|
| 202 |  | 
|---|
| 203 | switch(stream.attribType) | 
|---|
| 204 | { | 
|---|
| 205 | case SpirvShader::ATTRIBTYPE_INT: | 
|---|
| 206 | if(stream.count >= 1) v.x = As<Float4>(Int4(v.x)); | 
|---|
| 207 | if(stream.count >= 2) v.x = As<Float4>(Int4(v.y)); | 
|---|
| 208 | if(stream.count >= 3) v.x = As<Float4>(Int4(v.z)); | 
|---|
| 209 | if(stream.count >= 4) v.x = As<Float4>(Int4(v.w)); | 
|---|
| 210 | break; | 
|---|
| 211 | case SpirvShader::ATTRIBTYPE_UINT: | 
|---|
| 212 | if(stream.count >= 1) v.x = As<Float4>(UInt4(v.x)); | 
|---|
| 213 | if(stream.count >= 2) v.x = As<Float4>(UInt4(v.y)); | 
|---|
| 214 | if(stream.count >= 3) v.x = As<Float4>(UInt4(v.z)); | 
|---|
| 215 | if(stream.count >= 4) v.x = As<Float4>(UInt4(v.w)); | 
|---|
| 216 | break; | 
|---|
| 217 | default: | 
|---|
| 218 | break; | 
|---|
| 219 | } | 
|---|
| 220 | } | 
|---|
| 221 | } | 
|---|
| 222 | break; | 
|---|
| 223 | case STREAMTYPE_BYTE: | 
|---|
| 224 | if(isNativeFloatAttrib) // Stream: UByte, Shader attrib: Float | 
|---|
| 225 | { | 
|---|
| 226 | v.x = Float4(*Pointer<Byte4>(source0)); | 
|---|
| 227 | v.y = Float4(*Pointer<Byte4>(source1)); | 
|---|
| 228 | v.z = Float4(*Pointer<Byte4>(source2)); | 
|---|
| 229 | v.w = Float4(*Pointer<Byte4>(source3)); | 
|---|
| 230 |  | 
|---|
| 231 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); | 
|---|
| 232 |  | 
|---|
| 233 | if(stream.normalized) | 
|---|
| 234 | { | 
|---|
| 235 | if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); | 
|---|
| 236 | if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); | 
|---|
| 237 | if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); | 
|---|
| 238 | if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); | 
|---|
| 239 | } | 
|---|
| 240 | } | 
|---|
| 241 | else // Stream: UByte, Shader attrib: Int / UInt | 
|---|
| 242 | { | 
|---|
| 243 | v.x = As<Float4>(Int4(*Pointer<Byte4>(source0))); | 
|---|
| 244 | v.y = As<Float4>(Int4(*Pointer<Byte4>(source1))); | 
|---|
| 245 | v.z = As<Float4>(Int4(*Pointer<Byte4>(source2))); | 
|---|
| 246 | v.w = As<Float4>(Int4(*Pointer<Byte4>(source3))); | 
|---|
| 247 |  | 
|---|
| 248 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); | 
|---|
| 249 | } | 
|---|
| 250 | break; | 
|---|
| 251 | case STREAMTYPE_SBYTE: | 
|---|
| 252 | if(isNativeFloatAttrib) // Stream: SByte, Shader attrib: Float | 
|---|
| 253 | { | 
|---|
| 254 | v.x = Float4(*Pointer<SByte4>(source0)); | 
|---|
| 255 | v.y = Float4(*Pointer<SByte4>(source1)); | 
|---|
| 256 | v.z = Float4(*Pointer<SByte4>(source2)); | 
|---|
| 257 | v.w = Float4(*Pointer<SByte4>(source3)); | 
|---|
| 258 |  | 
|---|
| 259 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); | 
|---|
| 260 |  | 
|---|
| 261 | if(stream.normalized) | 
|---|
| 262 | { | 
|---|
| 263 | if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); | 
|---|
| 264 | if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); | 
|---|
| 265 | if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); | 
|---|
| 266 | if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleSByte)); | 
|---|
| 267 | } | 
|---|
| 268 | } | 
|---|
| 269 | else // Stream: SByte, Shader attrib: Int / UInt | 
|---|
| 270 | { | 
|---|
| 271 | v.x = As<Float4>(Int4(*Pointer<SByte4>(source0))); | 
|---|
| 272 | v.y = As<Float4>(Int4(*Pointer<SByte4>(source1))); | 
|---|
| 273 | v.z = As<Float4>(Int4(*Pointer<SByte4>(source2))); | 
|---|
| 274 | v.w = As<Float4>(Int4(*Pointer<SByte4>(source3))); | 
|---|
| 275 |  | 
|---|
| 276 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); | 
|---|
| 277 | } | 
|---|
| 278 | break; | 
|---|
| 279 | case STREAMTYPE_COLOR: | 
|---|
| 280 | { | 
|---|
| 281 | v.x = Float4(*Pointer<Byte4>(source0)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); | 
|---|
| 282 | v.y = Float4(*Pointer<Byte4>(source1)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); | 
|---|
| 283 | v.z = Float4(*Pointer<Byte4>(source2)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); | 
|---|
| 284 | v.w = Float4(*Pointer<Byte4>(source3)) * *Pointer<Float4>(constants + OFFSET(Constants,unscaleByte)); | 
|---|
| 285 |  | 
|---|
| 286 | transpose4x4(v.x, v.y, v.z, v.w); | 
|---|
| 287 |  | 
|---|
| 288 | // Swap red and blue | 
|---|
| 289 | Float4 t = v.x; | 
|---|
| 290 | v.x = v.z; | 
|---|
| 291 | v.z = t; | 
|---|
| 292 | } | 
|---|
| 293 | break; | 
|---|
| 294 | case STREAMTYPE_SHORT: | 
|---|
| 295 | if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float | 
|---|
| 296 | { | 
|---|
| 297 | v.x = Float4(*Pointer<Short4>(source0)); | 
|---|
| 298 | v.y = Float4(*Pointer<Short4>(source1)); | 
|---|
| 299 | v.z = Float4(*Pointer<Short4>(source2)); | 
|---|
| 300 | v.w = Float4(*Pointer<Short4>(source3)); | 
|---|
| 301 |  | 
|---|
| 302 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); | 
|---|
| 303 |  | 
|---|
| 304 | if(stream.normalized) | 
|---|
| 305 | { | 
|---|
| 306 | if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); | 
|---|
| 307 | if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); | 
|---|
| 308 | if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); | 
|---|
| 309 | if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleShort)); | 
|---|
| 310 | } | 
|---|
| 311 | } | 
|---|
| 312 | else // Stream: Short, Shader attrib: Int/UInt, no type conversion | 
|---|
| 313 | { | 
|---|
| 314 | v.x = As<Float4>(Int4(*Pointer<Short4>(source0))); | 
|---|
| 315 | v.y = As<Float4>(Int4(*Pointer<Short4>(source1))); | 
|---|
| 316 | v.z = As<Float4>(Int4(*Pointer<Short4>(source2))); | 
|---|
| 317 | v.w = As<Float4>(Int4(*Pointer<Short4>(source3))); | 
|---|
| 318 |  | 
|---|
| 319 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); | 
|---|
| 320 | } | 
|---|
| 321 | break; | 
|---|
| 322 | case STREAMTYPE_USHORT: | 
|---|
| 323 | if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float | 
|---|
| 324 | { | 
|---|
| 325 | v.x = Float4(*Pointer<UShort4>(source0)); | 
|---|
| 326 | v.y = Float4(*Pointer<UShort4>(source1)); | 
|---|
| 327 | v.z = Float4(*Pointer<UShort4>(source2)); | 
|---|
| 328 | v.w = Float4(*Pointer<UShort4>(source3)); | 
|---|
| 329 |  | 
|---|
| 330 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); | 
|---|
| 331 |  | 
|---|
| 332 | if(stream.normalized) | 
|---|
| 333 | { | 
|---|
| 334 | if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); | 
|---|
| 335 | if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); | 
|---|
| 336 | if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); | 
|---|
| 337 | if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants,unscaleUShort)); | 
|---|
| 338 | } | 
|---|
| 339 | } | 
|---|
| 340 | else // Stream: UShort, Shader attrib: Int/UInt, no type conversion | 
|---|
| 341 | { | 
|---|
| 342 | v.x = As<Float4>(Int4(*Pointer<UShort4>(source0))); | 
|---|
| 343 | v.y = As<Float4>(Int4(*Pointer<UShort4>(source1))); | 
|---|
| 344 | v.z = As<Float4>(Int4(*Pointer<UShort4>(source2))); | 
|---|
| 345 | v.w = As<Float4>(Int4(*Pointer<UShort4>(source3))); | 
|---|
| 346 |  | 
|---|
| 347 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); | 
|---|
| 348 | } | 
|---|
| 349 | break; | 
|---|
| 350 | case STREAMTYPE_INT: | 
|---|
| 351 | if(isNativeFloatAttrib) // Stream: Int, Shader attrib: Float | 
|---|
| 352 | { | 
|---|
| 353 | v.x = Float4(*Pointer<Int4>(source0)); | 
|---|
| 354 | v.y = Float4(*Pointer<Int4>(source1)); | 
|---|
| 355 | v.z = Float4(*Pointer<Int4>(source2)); | 
|---|
| 356 | v.w = Float4(*Pointer<Int4>(source3)); | 
|---|
| 357 |  | 
|---|
| 358 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); | 
|---|
| 359 |  | 
|---|
| 360 | if(stream.normalized) | 
|---|
| 361 | { | 
|---|
| 362 | if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); | 
|---|
| 363 | if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); | 
|---|
| 364 | if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); | 
|---|
| 365 | if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleInt)); | 
|---|
| 366 | } | 
|---|
| 367 | } | 
|---|
| 368 | else // Stream: Int, Shader attrib: Int/UInt, no type conversion | 
|---|
| 369 | { | 
|---|
| 370 | v.x = *Pointer<Float4>(source0); | 
|---|
| 371 | v.y = *Pointer<Float4>(source1); | 
|---|
| 372 | v.z = *Pointer<Float4>(source2); | 
|---|
| 373 | v.w = *Pointer<Float4>(source3); | 
|---|
| 374 |  | 
|---|
| 375 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); | 
|---|
| 376 | } | 
|---|
| 377 | break; | 
|---|
| 378 | case STREAMTYPE_UINT: | 
|---|
| 379 | if(isNativeFloatAttrib) // Stream: UInt, Shader attrib: Float | 
|---|
| 380 | { | 
|---|
| 381 | v.x = Float4(*Pointer<UInt4>(source0)); | 
|---|
| 382 | v.y = Float4(*Pointer<UInt4>(source1)); | 
|---|
| 383 | v.z = Float4(*Pointer<UInt4>(source2)); | 
|---|
| 384 | v.w = Float4(*Pointer<UInt4>(source3)); | 
|---|
| 385 |  | 
|---|
| 386 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); | 
|---|
| 387 |  | 
|---|
| 388 | if(stream.normalized) | 
|---|
| 389 | { | 
|---|
| 390 | if(stream.count >= 1) v.x *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); | 
|---|
| 391 | if(stream.count >= 2) v.y *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); | 
|---|
| 392 | if(stream.count >= 3) v.z *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); | 
|---|
| 393 | if(stream.count >= 4) v.w *= *Pointer<Float4>(constants + OFFSET(Constants, unscaleUInt)); | 
|---|
| 394 | } | 
|---|
| 395 | } | 
|---|
| 396 | else // Stream: UInt, Shader attrib: Int/UInt, no type conversion | 
|---|
| 397 | { | 
|---|
| 398 | v.x = *Pointer<Float4>(source0); | 
|---|
| 399 | v.y = *Pointer<Float4>(source1); | 
|---|
| 400 | v.z = *Pointer<Float4>(source2); | 
|---|
| 401 | v.w = *Pointer<Float4>(source3); | 
|---|
| 402 |  | 
|---|
| 403 | transpose4xN(v.x, v.y, v.z, v.w, stream.count); | 
|---|
| 404 | } | 
|---|
| 405 | break; | 
|---|
| 406 | case STREAMTYPE_HALF: | 
|---|
| 407 | { | 
|---|
| 408 | if(stream.count >= 1) | 
|---|
| 409 | { | 
|---|
| 410 | UShort x0 = *Pointer<UShort>(source0 + 0); | 
|---|
| 411 | UShort x1 = *Pointer<UShort>(source1 + 0); | 
|---|
| 412 | UShort x2 = *Pointer<UShort>(source2 + 0); | 
|---|
| 413 | UShort x3 = *Pointer<UShort>(source3 + 0); | 
|---|
| 414 |  | 
|---|
| 415 | v.x.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x0) * 4); | 
|---|
| 416 | v.x.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x1) * 4); | 
|---|
| 417 | v.x.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x2) * 4); | 
|---|
| 418 | v.x.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(x3) * 4); | 
|---|
| 419 | } | 
|---|
| 420 |  | 
|---|
| 421 | if(stream.count >= 2) | 
|---|
| 422 | { | 
|---|
| 423 | UShort y0 = *Pointer<UShort>(source0 + 2); | 
|---|
| 424 | UShort y1 = *Pointer<UShort>(source1 + 2); | 
|---|
| 425 | UShort y2 = *Pointer<UShort>(source2 + 2); | 
|---|
| 426 | UShort y3 = *Pointer<UShort>(source3 + 2); | 
|---|
| 427 |  | 
|---|
| 428 | v.y.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y0) * 4); | 
|---|
| 429 | v.y.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y1) * 4); | 
|---|
| 430 | v.y.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y2) * 4); | 
|---|
| 431 | v.y.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(y3) * 4); | 
|---|
| 432 | } | 
|---|
| 433 |  | 
|---|
| 434 | if(stream.count >= 3) | 
|---|
| 435 | { | 
|---|
| 436 | UShort z0 = *Pointer<UShort>(source0 + 4); | 
|---|
| 437 | UShort z1 = *Pointer<UShort>(source1 + 4); | 
|---|
| 438 | UShort z2 = *Pointer<UShort>(source2 + 4); | 
|---|
| 439 | UShort z3 = *Pointer<UShort>(source3 + 4); | 
|---|
| 440 |  | 
|---|
| 441 | v.z.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z0) * 4); | 
|---|
| 442 | v.z.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z1) * 4); | 
|---|
| 443 | v.z.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z2) * 4); | 
|---|
| 444 | v.z.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(z3) * 4); | 
|---|
| 445 | } | 
|---|
| 446 |  | 
|---|
| 447 | if(stream.count >= 4) | 
|---|
| 448 | { | 
|---|
| 449 | UShort w0 = *Pointer<UShort>(source0 + 6); | 
|---|
| 450 | UShort w1 = *Pointer<UShort>(source1 + 6); | 
|---|
| 451 | UShort w2 = *Pointer<UShort>(source2 + 6); | 
|---|
| 452 | UShort w3 = *Pointer<UShort>(source3 + 6); | 
|---|
| 453 |  | 
|---|
| 454 | v.w.x = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w0) * 4); | 
|---|
| 455 | v.w.y = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w1) * 4); | 
|---|
| 456 | v.w.z = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w2) * 4); | 
|---|
| 457 | v.w.w = *Pointer<Float>(constants + OFFSET(Constants,half2float) + Int(w3) * 4); | 
|---|
| 458 | } | 
|---|
| 459 | } | 
|---|
| 460 | break; | 
|---|
| 461 | case STREAMTYPE_2_10_10_10_INT: | 
|---|
| 462 | { | 
|---|
| 463 | Int4 src; | 
|---|
| 464 | src = Insert(src, *Pointer<Int>(source0), 0); | 
|---|
| 465 | src = Insert(src, *Pointer<Int>(source1), 1); | 
|---|
| 466 | src = Insert(src, *Pointer<Int>(source2), 2); | 
|---|
| 467 | src = Insert(src, *Pointer<Int>(source3), 3); | 
|---|
| 468 |  | 
|---|
| 469 | v.x = Float4((src << 22) >> 22); | 
|---|
| 470 | v.y = Float4((src << 12) >> 22); | 
|---|
| 471 | v.z = Float4((src << 02) >> 22); | 
|---|
| 472 | v.w = Float4(src >> 30); | 
|---|
| 473 |  | 
|---|
| 474 | if(stream.normalized) | 
|---|
| 475 | { | 
|---|
| 476 | v.x = Max(v.x * Float4(1.0f / 0x1FF), Float4(-1.0f)); | 
|---|
| 477 | v.y = Max(v.y * Float4(1.0f / 0x1FF), Float4(-1.0f)); | 
|---|
| 478 | v.z = Max(v.z * Float4(1.0f / 0x1FF), Float4(-1.0f)); | 
|---|
| 479 | v.w = Max(v.w, Float4(-1.0f)); | 
|---|
| 480 | } | 
|---|
| 481 | } | 
|---|
| 482 | break; | 
|---|
| 483 | case STREAMTYPE_2_10_10_10_UINT: | 
|---|
| 484 | { | 
|---|
| 485 | Int4 src; | 
|---|
| 486 | src = Insert(src, *Pointer<Int>(source0), 0); | 
|---|
| 487 | src = Insert(src, *Pointer<Int>(source1), 1); | 
|---|
| 488 | src = Insert(src, *Pointer<Int>(source2), 2); | 
|---|
| 489 | src = Insert(src, *Pointer<Int>(source3), 3); | 
|---|
| 490 |  | 
|---|
| 491 | v.x = Float4(src & Int4(0x3FF)); | 
|---|
| 492 | v.y = Float4((src >> 10) & Int4(0x3FF)); | 
|---|
| 493 | v.z = Float4((src >> 20) & Int4(0x3FF)); | 
|---|
| 494 | v.w = Float4((src >> 30) & Int4(0x3)); | 
|---|
| 495 |  | 
|---|
| 496 | if(stream.normalized) | 
|---|
| 497 | { | 
|---|
| 498 | v.x *= Float4(1.0f / 0x3FF); | 
|---|
| 499 | v.y *= Float4(1.0f / 0x3FF); | 
|---|
| 500 | v.z *= Float4(1.0f / 0x3FF); | 
|---|
| 501 | v.w *= Float4(1.0f / 0x3); | 
|---|
| 502 | } | 
|---|
| 503 | } | 
|---|
| 504 | break; | 
|---|
| 505 | default: | 
|---|
| 506 | UNSUPPORTED( "stream.type %d", int(stream.type)); | 
|---|
| 507 | } | 
|---|
| 508 |  | 
|---|
| 509 | if(stream.count < 1) v.x = Float4(0.0f); | 
|---|
| 510 | if(stream.count < 2) v.y = Float4(0.0f); | 
|---|
| 511 | if(stream.count < 3) v.z = Float4(0.0f); | 
|---|
| 512 | if(stream.count < 4) v.w = isNativeFloatAttrib ? As<Float4>(Float4(1.0f)) : As<Float4>(Int4(1)); | 
|---|
| 513 |  | 
|---|
| 514 | return v; | 
|---|
| 515 | } | 
|---|
| 516 |  | 
|---|
| 517 | void VertexRoutine::writeCache(Pointer<Byte> &vertexCache, Pointer<UInt> &tagCache, Pointer<UInt> &batch) | 
|---|
| 518 | { | 
|---|
| 519 | UInt index0 = batch[0]; | 
|---|
| 520 | UInt index1 = batch[1]; | 
|---|
| 521 | UInt index2 = batch[2]; | 
|---|
| 522 | UInt index3 = batch[3]; | 
|---|
| 523 |  | 
|---|
| 524 | UInt cacheIndex0 = index0 & VertexCache::TAG_MASK; | 
|---|
| 525 | UInt cacheIndex1 = index1 & VertexCache::TAG_MASK; | 
|---|
| 526 | UInt cacheIndex2 = index2 & VertexCache::TAG_MASK; | 
|---|
| 527 | UInt cacheIndex3 = index3 & VertexCache::TAG_MASK; | 
|---|
| 528 |  | 
|---|
| 529 | // We processed a SIMD group of vertices, with the first one being the one that missed the cache tag check. | 
|---|
| 530 | // Write them out in reverse order here and below to ensure the first one is now guaranteed to be in the cache. | 
|---|
| 531 | tagCache[cacheIndex3] = index3; | 
|---|
| 532 | tagCache[cacheIndex2] = index2; | 
|---|
| 533 | tagCache[cacheIndex1] = index1; | 
|---|
| 534 | tagCache[cacheIndex0] = index0; | 
|---|
| 535 |  | 
|---|
| 536 | auto it = spirvShader->outputBuiltins.find(spv::BuiltInPosition); | 
|---|
| 537 | assert(it != spirvShader->outputBuiltins.end()); | 
|---|
| 538 | assert(it->second.SizeInComponents == 4); | 
|---|
| 539 | auto &position = routine.getVariable(it->second.Id); | 
|---|
| 540 |  | 
|---|
| 541 | Vector4f pos; | 
|---|
| 542 | pos.x = position[it->second.FirstComponent + 0]; | 
|---|
| 543 | pos.y = position[it->second.FirstComponent + 1]; | 
|---|
| 544 | pos.z = position[it->second.FirstComponent + 2]; | 
|---|
| 545 | pos.w = position[it->second.FirstComponent + 3]; | 
|---|
| 546 |  | 
|---|
| 547 | // Projection and viewport transform. | 
|---|
| 548 | Float4 w = As<Float4>(As<Int4>(pos.w) | (As<Int4>(CmpEQ(pos.w, Float4(0.0f))) & As<Int4>(Float4(1.0f)))); | 
|---|
| 549 | Float4 rhw = Float4(1.0f) / w; | 
|---|
| 550 |  | 
|---|
| 551 | Vector4f proj; | 
|---|
| 552 | proj.x = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,X0xF)) + pos.x * rhw * *Pointer<Float4>(data + OFFSET(DrawData,WxF)))); | 
|---|
| 553 | proj.y = As<Float4>(RoundInt(*Pointer<Float4>(data + OFFSET(DrawData,Y0xF)) + pos.y * rhw * *Pointer<Float4>(data + OFFSET(DrawData,HxF)))); | 
|---|
| 554 | proj.z = pos.z * rhw; | 
|---|
| 555 | proj.w = rhw; | 
|---|
| 556 |  | 
|---|
| 557 | transpose4x4(pos.x, pos.y, pos.z, pos.w); | 
|---|
| 558 |  | 
|---|
| 559 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,position), 16) = pos.w; | 
|---|
| 560 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,position), 16) = pos.z; | 
|---|
| 561 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,position), 16) = pos.y; | 
|---|
| 562 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,position), 16) = pos.x; | 
|---|
| 563 |  | 
|---|
| 564 | it = spirvShader->outputBuiltins.find(spv::BuiltInPointSize); | 
|---|
| 565 | if(it != spirvShader->outputBuiltins.end()) | 
|---|
| 566 | { | 
|---|
| 567 | assert(it->second.SizeInComponents == 1); | 
|---|
| 568 | auto psize = routine.getVariable(it->second.Id)[it->second.FirstComponent]; | 
|---|
| 569 |  | 
|---|
| 570 | *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,pointSize)) = Extract(psize, 3); | 
|---|
| 571 | *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,pointSize)) = Extract(psize, 2); | 
|---|
| 572 | *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,pointSize)) = Extract(psize, 1); | 
|---|
| 573 | *Pointer<Float>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,pointSize)) = Extract(psize, 0); | 
|---|
| 574 | } | 
|---|
| 575 |  | 
|---|
| 576 | *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 24) & 0x0000000FF; | 
|---|
| 577 | *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 16) & 0x0000000FF; | 
|---|
| 578 | *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 8)  & 0x0000000FF; | 
|---|
| 579 | *Pointer<Int>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,clipFlags)) = (clipFlags >> 0)  & 0x0000000FF; | 
|---|
| 580 |  | 
|---|
| 581 | transpose4x4(proj.x, proj.y, proj.z, proj.w); | 
|---|
| 582 |  | 
|---|
| 583 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,projected), 16) = proj.w; | 
|---|
| 584 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,projected), 16) = proj.z; | 
|---|
| 585 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,projected), 16) = proj.y; | 
|---|
| 586 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,projected), 16) = proj.x; | 
|---|
| 587 |  | 
|---|
| 588 | for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i += 4) | 
|---|
| 589 | { | 
|---|
| 590 | if(spirvShader->outputs[i + 0].Type != SpirvShader::ATTRIBTYPE_UNUSED || | 
|---|
| 591 | spirvShader->outputs[i + 1].Type != SpirvShader::ATTRIBTYPE_UNUSED || | 
|---|
| 592 | spirvShader->outputs[i + 2].Type != SpirvShader::ATTRIBTYPE_UNUSED || | 
|---|
| 593 | spirvShader->outputs[i + 3].Type != SpirvShader::ATTRIBTYPE_UNUSED) | 
|---|
| 594 | { | 
|---|
| 595 | Vector4f v; | 
|---|
| 596 | v.x = routine.outputs[i + 0]; | 
|---|
| 597 | v.y = routine.outputs[i + 1]; | 
|---|
| 598 | v.z = routine.outputs[i + 2]; | 
|---|
| 599 | v.w = routine.outputs[i + 3]; | 
|---|
| 600 |  | 
|---|
| 601 | transpose4x4(v.x, v.y, v.z, v.w); | 
|---|
| 602 |  | 
|---|
| 603 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex3 + OFFSET(Vertex,v[i]), 16) = v.w; | 
|---|
| 604 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex2 + OFFSET(Vertex,v[i]), 16) = v.z; | 
|---|
| 605 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex1 + OFFSET(Vertex,v[i]), 16) = v.y; | 
|---|
| 606 | *Pointer<Float4>(vertexCache + sizeof(Vertex) * cacheIndex0 + OFFSET(Vertex,v[i]), 16) = v.x; | 
|---|
| 607 | } | 
|---|
| 608 | } | 
|---|
| 609 | } | 
|---|
| 610 |  | 
|---|
| 611 | void VertexRoutine::writeVertex(const Pointer<Byte> &vertex, Pointer<Byte> &cacheEntry) | 
|---|
| 612 | { | 
|---|
| 613 | *Pointer<Int4>(vertex + OFFSET(Vertex,position)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex,position)); | 
|---|
| 614 | *Pointer<Int>(vertex + OFFSET(Vertex,pointSize)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex,pointSize)); | 
|---|
| 615 |  | 
|---|
| 616 | *Pointer<Int>(vertex + OFFSET(Vertex,clipFlags)) = *Pointer<Int>(cacheEntry + OFFSET(Vertex,clipFlags)); | 
|---|
| 617 | *Pointer<Int4>(vertex + OFFSET(Vertex,projected)) = *Pointer<Int4>(cacheEntry + OFFSET(Vertex,projected)); | 
|---|
| 618 |  | 
|---|
| 619 | for(int i = 0; i < MAX_INTERFACE_COMPONENTS; i++) | 
|---|
| 620 | { | 
|---|
| 621 | if(spirvShader->outputs[i].Type != SpirvShader::ATTRIBTYPE_UNUSED) | 
|---|
| 622 | { | 
|---|
| 623 | *Pointer<Int>(vertex + OFFSET(Vertex, v[i]), 4) = *Pointer<Int>(cacheEntry + OFFSET(Vertex, v[i]), 4); | 
|---|
| 624 | } | 
|---|
| 625 | } | 
|---|
| 626 | } | 
|---|
| 627 | } | 
|---|
| 628 |  | 
|---|