1 | // |
2 | // Copyright (C) 2018 Google, Inc. |
3 | // |
4 | // All rights reserved. |
5 | // |
6 | // Redistribution and use in source and binary forms, with or without |
7 | // modification, are permitted provided that the following conditions |
8 | // are met: |
9 | // |
10 | // Redistributions of source code must retain the above copyright |
11 | // notice, this list of conditions and the following disclaimer. |
12 | // |
13 | // Redistributions in binary form must reproduce the above |
14 | // copyright notice, this list of conditions and the following |
15 | // disclaimer in the documentation and/or other materials provided |
16 | // with the distribution. |
17 | // |
18 | // Neither the name of 3Dlabs Inc. Ltd. nor the names of its |
19 | // contributors may be used to endorse or promote products derived |
20 | // from this software without specific prior written permission. |
21 | // |
22 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
23 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
24 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
25 | // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
26 | // COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
27 | // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
28 | // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
29 | // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
30 | // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
31 | // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
32 | // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
33 | // POSSIBILITY OF SUCH DAMAGE. |
34 | |
35 | // |
36 | // Post-processing for SPIR-V IR, in internal form, not standard binary form. |
37 | // |
38 | |
39 | #include <cassert> |
40 | #include <cstdlib> |
41 | |
42 | #include <unordered_map> |
43 | #include <unordered_set> |
44 | #include <algorithm> |
45 | |
46 | #include "SpvBuilder.h" |
47 | #include "spirv.hpp" |
48 | |
49 | namespace spv { |
50 | #include "GLSL.std.450.h" |
51 | #include "GLSL.ext.KHR.h" |
52 | #include "GLSL.ext.EXT.h" |
53 | #include "GLSL.ext.AMD.h" |
54 | #include "GLSL.ext.NV.h" |
55 | #include "GLSL.ext.ARM.h" |
56 | } |
57 | |
58 | namespace spv { |
59 | |
60 | #ifndef GLSLANG_WEB |
61 | // Hook to visit each operand type and result type of an instruction. |
62 | // Will be called multiple times for one instruction, once for each typed |
63 | // operand and the result. |
64 | void Builder::postProcessType(const Instruction& inst, Id typeId) |
65 | { |
66 | // Characterize the type being questioned |
67 | Id basicTypeOp = getMostBasicTypeClass(typeId); |
68 | int width = 0; |
69 | if (basicTypeOp == OpTypeFloat || basicTypeOp == OpTypeInt) |
70 | width = getScalarTypeWidth(typeId); |
71 | |
72 | // Do opcode-specific checks |
73 | switch (inst.getOpCode()) { |
74 | case OpLoad: |
75 | case OpStore: |
76 | if (basicTypeOp == OpTypeStruct) { |
77 | if (containsType(typeId, OpTypeInt, 8)) |
78 | addCapability(CapabilityInt8); |
79 | if (containsType(typeId, OpTypeInt, 16)) |
80 | addCapability(CapabilityInt16); |
81 | if (containsType(typeId, OpTypeFloat, 16)) |
82 | addCapability(CapabilityFloat16); |
83 | } else { |
84 | StorageClass storageClass = getStorageClass(inst.getIdOperand(0)); |
85 | if (width == 8) { |
86 | switch (storageClass) { |
87 | case StorageClassPhysicalStorageBufferEXT: |
88 | case StorageClassUniform: |
89 | case StorageClassStorageBuffer: |
90 | case StorageClassPushConstant: |
91 | break; |
92 | default: |
93 | addCapability(CapabilityInt8); |
94 | break; |
95 | } |
96 | } else if (width == 16) { |
97 | switch (storageClass) { |
98 | case StorageClassPhysicalStorageBufferEXT: |
99 | case StorageClassUniform: |
100 | case StorageClassStorageBuffer: |
101 | case StorageClassPushConstant: |
102 | case StorageClassInput: |
103 | case StorageClassOutput: |
104 | break; |
105 | default: |
106 | if (basicTypeOp == OpTypeInt) |
107 | addCapability(CapabilityInt16); |
108 | if (basicTypeOp == OpTypeFloat) |
109 | addCapability(CapabilityFloat16); |
110 | break; |
111 | } |
112 | } |
113 | } |
114 | break; |
115 | case OpCopyObject: |
116 | break; |
117 | case OpFConvert: |
118 | case OpSConvert: |
119 | case OpUConvert: |
120 | // Look for any 8/16-bit storage capabilities. If there are none, assume that |
121 | // the convert instruction requires the Float16/Int8/16 capability. |
122 | if (containsType(typeId, OpTypeFloat, 16) || containsType(typeId, OpTypeInt, 16)) { |
123 | bool foundStorage = false; |
124 | for (auto it = capabilities.begin(); it != capabilities.end(); ++it) { |
125 | spv::Capability cap = *it; |
126 | if (cap == spv::CapabilityStorageInputOutput16 || |
127 | cap == spv::CapabilityStoragePushConstant16 || |
128 | cap == spv::CapabilityStorageUniformBufferBlock16 || |
129 | cap == spv::CapabilityStorageUniform16) { |
130 | foundStorage = true; |
131 | break; |
132 | } |
133 | } |
134 | if (!foundStorage) { |
135 | if (containsType(typeId, OpTypeFloat, 16)) |
136 | addCapability(CapabilityFloat16); |
137 | if (containsType(typeId, OpTypeInt, 16)) |
138 | addCapability(CapabilityInt16); |
139 | } |
140 | } |
141 | if (containsType(typeId, OpTypeInt, 8)) { |
142 | bool foundStorage = false; |
143 | for (auto it = capabilities.begin(); it != capabilities.end(); ++it) { |
144 | spv::Capability cap = *it; |
145 | if (cap == spv::CapabilityStoragePushConstant8 || |
146 | cap == spv::CapabilityUniformAndStorageBuffer8BitAccess || |
147 | cap == spv::CapabilityStorageBuffer8BitAccess) { |
148 | foundStorage = true; |
149 | break; |
150 | } |
151 | } |
152 | if (!foundStorage) { |
153 | addCapability(CapabilityInt8); |
154 | } |
155 | } |
156 | break; |
157 | case OpExtInst: |
158 | switch (inst.getImmediateOperand(1)) { |
159 | case GLSLstd450Frexp: |
160 | case GLSLstd450FrexpStruct: |
161 | if (getSpvVersion() < spv::Spv_1_3 && containsType(typeId, OpTypeInt, 16)) |
162 | addExtension(spv::E_SPV_AMD_gpu_shader_int16); |
163 | break; |
164 | case GLSLstd450InterpolateAtCentroid: |
165 | case GLSLstd450InterpolateAtSample: |
166 | case GLSLstd450InterpolateAtOffset: |
167 | if (getSpvVersion() < spv::Spv_1_3 && containsType(typeId, OpTypeFloat, 16)) |
168 | addExtension(spv::E_SPV_AMD_gpu_shader_half_float); |
169 | break; |
170 | default: |
171 | break; |
172 | } |
173 | break; |
174 | case OpAccessChain: |
175 | case OpPtrAccessChain: |
176 | if (isPointerType(typeId)) |
177 | break; |
178 | if (basicTypeOp == OpTypeInt) { |
179 | if (width == 16) |
180 | addCapability(CapabilityInt16); |
181 | else if (width == 8) |
182 | addCapability(CapabilityInt8); |
183 | } |
184 | default: |
185 | if (basicTypeOp == OpTypeInt) { |
186 | if (width == 16) |
187 | addCapability(CapabilityInt16); |
188 | else if (width == 8) |
189 | addCapability(CapabilityInt8); |
190 | else if (width == 64) |
191 | addCapability(CapabilityInt64); |
192 | } else if (basicTypeOp == OpTypeFloat) { |
193 | if (width == 16) |
194 | addCapability(CapabilityFloat16); |
195 | else if (width == 64) |
196 | addCapability(CapabilityFloat64); |
197 | } |
198 | break; |
199 | } |
200 | } |
201 | |
202 | // Called for each instruction that resides in a block. |
203 | void Builder::postProcess(Instruction& inst) |
204 | { |
205 | // Add capabilities based simply on the opcode. |
206 | switch (inst.getOpCode()) { |
207 | case OpExtInst: |
208 | switch (inst.getImmediateOperand(1)) { |
209 | case GLSLstd450InterpolateAtCentroid: |
210 | case GLSLstd450InterpolateAtSample: |
211 | case GLSLstd450InterpolateAtOffset: |
212 | addCapability(CapabilityInterpolationFunction); |
213 | break; |
214 | default: |
215 | break; |
216 | } |
217 | break; |
218 | case OpDPdxFine: |
219 | case OpDPdyFine: |
220 | case OpFwidthFine: |
221 | case OpDPdxCoarse: |
222 | case OpDPdyCoarse: |
223 | case OpFwidthCoarse: |
224 | addCapability(CapabilityDerivativeControl); |
225 | break; |
226 | |
227 | case OpImageQueryLod: |
228 | case OpImageQuerySize: |
229 | case OpImageQuerySizeLod: |
230 | case OpImageQuerySamples: |
231 | case OpImageQueryLevels: |
232 | addCapability(CapabilityImageQuery); |
233 | break; |
234 | |
235 | case OpGroupNonUniformPartitionNV: |
236 | addExtension(E_SPV_NV_shader_subgroup_partitioned); |
237 | addCapability(CapabilityGroupNonUniformPartitionedNV); |
238 | break; |
239 | |
240 | case OpLoad: |
241 | case OpStore: |
242 | { |
243 | // For any load/store to a PhysicalStorageBufferEXT, walk the accesschain |
244 | // index list to compute the misalignment. The pre-existing alignment value |
245 | // (set via Builder::AccessChain::alignment) only accounts for the base of |
246 | // the reference type and any scalar component selection in the accesschain, |
247 | // and this function computes the rest from the SPIR-V Offset decorations. |
248 | Instruction *accessChain = module.getInstruction(inst.getIdOperand(0)); |
249 | if (accessChain->getOpCode() == OpAccessChain) { |
250 | Instruction *base = module.getInstruction(accessChain->getIdOperand(0)); |
251 | // Get the type of the base of the access chain. It must be a pointer type. |
252 | Id typeId = base->getTypeId(); |
253 | Instruction *type = module.getInstruction(typeId); |
254 | assert(type->getOpCode() == OpTypePointer); |
255 | if (type->getImmediateOperand(0) != StorageClassPhysicalStorageBufferEXT) { |
256 | break; |
257 | } |
258 | // Get the pointee type. |
259 | typeId = type->getIdOperand(1); |
260 | type = module.getInstruction(typeId); |
261 | // Walk the index list for the access chain. For each index, find any |
262 | // misalignment that can apply when accessing the member/element via |
263 | // Offset/ArrayStride/MatrixStride decorations, and bitwise OR them all |
264 | // together. |
265 | int alignment = 0; |
266 | for (int i = 1; i < accessChain->getNumOperands(); ++i) { |
267 | Instruction *idx = module.getInstruction(accessChain->getIdOperand(i)); |
268 | if (type->getOpCode() == OpTypeStruct) { |
269 | assert(idx->getOpCode() == OpConstant); |
270 | unsigned int c = idx->getImmediateOperand(0); |
271 | |
272 | const auto function = [&](const std::unique_ptr<Instruction>& decoration) { |
273 | if (decoration.get()->getOpCode() == OpMemberDecorate && |
274 | decoration.get()->getIdOperand(0) == typeId && |
275 | decoration.get()->getImmediateOperand(1) == c && |
276 | (decoration.get()->getImmediateOperand(2) == DecorationOffset || |
277 | decoration.get()->getImmediateOperand(2) == DecorationMatrixStride)) { |
278 | alignment |= decoration.get()->getImmediateOperand(3); |
279 | } |
280 | }; |
281 | std::for_each(decorations.begin(), decorations.end(), function); |
282 | // get the next member type |
283 | typeId = type->getIdOperand(c); |
284 | type = module.getInstruction(typeId); |
285 | } else if (type->getOpCode() == OpTypeArray || |
286 | type->getOpCode() == OpTypeRuntimeArray) { |
287 | const auto function = [&](const std::unique_ptr<Instruction>& decoration) { |
288 | if (decoration.get()->getOpCode() == OpDecorate && |
289 | decoration.get()->getIdOperand(0) == typeId && |
290 | decoration.get()->getImmediateOperand(1) == DecorationArrayStride) { |
291 | alignment |= decoration.get()->getImmediateOperand(2); |
292 | } |
293 | }; |
294 | std::for_each(decorations.begin(), decorations.end(), function); |
295 | // Get the element type |
296 | typeId = type->getIdOperand(0); |
297 | type = module.getInstruction(typeId); |
298 | } else { |
299 | // Once we get to any non-aggregate type, we're done. |
300 | break; |
301 | } |
302 | } |
303 | assert(inst.getNumOperands() >= 3); |
304 | unsigned int memoryAccess = inst.getImmediateOperand((inst.getOpCode() == OpStore) ? 2 : 1); |
305 | assert(memoryAccess & MemoryAccessAlignedMask); |
306 | static_cast<void>(memoryAccess); |
307 | // Compute the index of the alignment operand. |
308 | int alignmentIdx = 2; |
309 | if (inst.getOpCode() == OpStore) |
310 | alignmentIdx++; |
311 | // Merge new and old (mis)alignment |
312 | alignment |= inst.getImmediateOperand(alignmentIdx); |
313 | // Pick the LSB |
314 | alignment = alignment & ~(alignment & (alignment-1)); |
315 | // update the Aligned operand |
316 | inst.setImmediateOperand(alignmentIdx, alignment); |
317 | } |
318 | break; |
319 | } |
320 | |
321 | default: |
322 | break; |
323 | } |
324 | |
325 | // Checks based on type |
326 | if (inst.getTypeId() != NoType) |
327 | postProcessType(inst, inst.getTypeId()); |
328 | for (int op = 0; op < inst.getNumOperands(); ++op) { |
329 | if (inst.isIdOperand(op)) { |
330 | // In blocks, these are always result ids, but we are relying on |
331 | // getTypeId() to return NoType for things like OpLabel. |
332 | if (getTypeId(inst.getIdOperand(op)) != NoType) |
333 | postProcessType(inst, getTypeId(inst.getIdOperand(op))); |
334 | } |
335 | } |
336 | } |
337 | #endif |
338 | |
339 | // comment in header |
340 | void Builder::postProcessCFG() |
341 | { |
342 | // reachableBlocks is the set of blockss reached via control flow, or which are |
343 | // unreachable continue targert or unreachable merge. |
344 | std::unordered_set<const Block*> reachableBlocks; |
345 | std::unordered_map<Block*, Block*> ; |
346 | std::unordered_set<Block*> unreachableMerges; |
347 | std::unordered_set<Id> unreachableDefinitions; |
348 | // Collect IDs defined in unreachable blocks. For each function, label the |
349 | // reachable blocks first. Then for each unreachable block, collect the |
350 | // result IDs of the instructions in it. |
351 | for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) { |
352 | Function* f = *fi; |
353 | Block* entry = f->getEntryBlock(); |
354 | inReadableOrder(entry, |
355 | [&reachableBlocks, &unreachableMerges, &headerForUnreachableContinue] |
356 | (Block* b, ReachReason why, Block* ) { |
357 | reachableBlocks.insert(b); |
358 | if (why == ReachDeadContinue) headerForUnreachableContinue[b] = header; |
359 | if (why == ReachDeadMerge) unreachableMerges.insert(b); |
360 | }); |
361 | for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) { |
362 | Block* b = *bi; |
363 | if (unreachableMerges.count(b) != 0 || headerForUnreachableContinue.count(b) != 0) { |
364 | auto ii = b->getInstructions().cbegin(); |
365 | ++ii; // Keep potential decorations on the label. |
366 | for (; ii != b->getInstructions().cend(); ++ii) |
367 | unreachableDefinitions.insert(ii->get()->getResultId()); |
368 | } else if (reachableBlocks.count(b) == 0) { |
369 | // The normal case for unreachable code. All definitions are considered dead. |
370 | for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ++ii) |
371 | unreachableDefinitions.insert(ii->get()->getResultId()); |
372 | } |
373 | } |
374 | } |
375 | |
376 | // Modify unreachable merge blocks and unreachable continue targets. |
377 | // Delete their contents. |
378 | for (auto mergeIter = unreachableMerges.begin(); mergeIter != unreachableMerges.end(); ++mergeIter) { |
379 | (*mergeIter)->rewriteAsCanonicalUnreachableMerge(); |
380 | } |
381 | for (auto continueIter = headerForUnreachableContinue.begin(); |
382 | continueIter != headerForUnreachableContinue.end(); |
383 | ++continueIter) { |
384 | Block* continue_target = continueIter->first; |
385 | Block* = continueIter->second; |
386 | continue_target->rewriteAsCanonicalUnreachableContinue(header); |
387 | } |
388 | |
389 | // Remove unneeded decorations, for unreachable instructions |
390 | decorations.erase(std::remove_if(decorations.begin(), decorations.end(), |
391 | [&unreachableDefinitions](std::unique_ptr<Instruction>& I) -> bool { |
392 | Id decoration_id = I.get()->getIdOperand(0); |
393 | return unreachableDefinitions.count(decoration_id) != 0; |
394 | }), |
395 | decorations.end()); |
396 | } |
397 | |
398 | #ifndef GLSLANG_WEB |
399 | // comment in header |
400 | void Builder::postProcessFeatures() { |
401 | // Add per-instruction capabilities, extensions, etc., |
402 | |
403 | // Look for any 8/16 bit type in physical storage buffer class, and set the |
404 | // appropriate capability. This happens in createSpvVariable for other storage |
405 | // classes, but there isn't always a variable for physical storage buffer. |
406 | for (int t = 0; t < (int)groupedTypes[OpTypePointer].size(); ++t) { |
407 | Instruction* type = groupedTypes[OpTypePointer][t]; |
408 | if (type->getImmediateOperand(0) == (unsigned)StorageClassPhysicalStorageBufferEXT) { |
409 | if (containsType(type->getIdOperand(1), OpTypeInt, 8)) { |
410 | addIncorporatedExtension(spv::E_SPV_KHR_8bit_storage, spv::Spv_1_5); |
411 | addCapability(spv::CapabilityStorageBuffer8BitAccess); |
412 | } |
413 | if (containsType(type->getIdOperand(1), OpTypeInt, 16) || |
414 | containsType(type->getIdOperand(1), OpTypeFloat, 16)) { |
415 | addIncorporatedExtension(spv::E_SPV_KHR_16bit_storage, spv::Spv_1_3); |
416 | addCapability(spv::CapabilityStorageBuffer16BitAccess); |
417 | } |
418 | } |
419 | } |
420 | |
421 | // process all block-contained instructions |
422 | for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) { |
423 | Function* f = *fi; |
424 | for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) { |
425 | Block* b = *bi; |
426 | for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ii++) |
427 | postProcess(*ii->get()); |
428 | |
429 | // For all local variables that contain pointers to PhysicalStorageBufferEXT, check whether |
430 | // there is an existing restrict/aliased decoration. If we don't find one, add Aliased as the |
431 | // default. |
432 | for (auto vi = b->getLocalVariables().cbegin(); vi != b->getLocalVariables().cend(); vi++) { |
433 | const Instruction& inst = *vi->get(); |
434 | Id resultId = inst.getResultId(); |
435 | if (containsPhysicalStorageBufferOrArray(getDerefTypeId(resultId))) { |
436 | bool foundDecoration = false; |
437 | const auto function = [&](const std::unique_ptr<Instruction>& decoration) { |
438 | if (decoration.get()->getIdOperand(0) == resultId && |
439 | decoration.get()->getOpCode() == OpDecorate && |
440 | (decoration.get()->getImmediateOperand(1) == spv::DecorationAliasedPointerEXT || |
441 | decoration.get()->getImmediateOperand(1) == spv::DecorationRestrictPointerEXT)) { |
442 | foundDecoration = true; |
443 | } |
444 | }; |
445 | std::for_each(decorations.begin(), decorations.end(), function); |
446 | if (!foundDecoration) { |
447 | addDecoration(resultId, spv::DecorationAliasedPointerEXT); |
448 | } |
449 | } |
450 | } |
451 | } |
452 | } |
453 | |
454 | // If any Vulkan memory model-specific functionality is used, update the |
455 | // OpMemoryModel to match. |
456 | if (capabilities.find(spv::CapabilityVulkanMemoryModelKHR) != capabilities.end()) { |
457 | memoryModel = spv::MemoryModelVulkanKHR; |
458 | addIncorporatedExtension(spv::E_SPV_KHR_vulkan_memory_model, spv::Spv_1_5); |
459 | } |
460 | |
461 | // Add Aliased decoration if there's more than one Workgroup Block variable. |
462 | if (capabilities.find(spv::CapabilityWorkgroupMemoryExplicitLayoutKHR) != capabilities.end()) { |
463 | assert(entryPoints.size() == 1); |
464 | auto &ep = entryPoints[0]; |
465 | |
466 | std::vector<Id> workgroup_variables; |
467 | for (int i = 0; i < (int)ep->getNumOperands(); i++) { |
468 | if (!ep->isIdOperand(i)) |
469 | continue; |
470 | |
471 | const Id id = ep->getIdOperand(i); |
472 | const Instruction *instr = module.getInstruction(id); |
473 | if (instr->getOpCode() != spv::OpVariable) |
474 | continue; |
475 | |
476 | if (instr->getImmediateOperand(0) == spv::StorageClassWorkgroup) |
477 | workgroup_variables.push_back(id); |
478 | } |
479 | |
480 | if (workgroup_variables.size() > 1) { |
481 | for (size_t i = 0; i < workgroup_variables.size(); i++) |
482 | addDecoration(workgroup_variables[i], spv::DecorationAliased); |
483 | } |
484 | } |
485 | } |
486 | #endif |
487 | |
488 | // comment in header |
489 | void Builder::postProcess() { |
490 | postProcessCFG(); |
491 | #ifndef GLSLANG_WEB |
492 | postProcessFeatures(); |
493 | #endif |
494 | } |
495 | |
496 | }; // end spv namespace |
497 | |