1 | // Copyright (c) 2016 Google Inc. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #ifndef INCLUDE_SPIRV_TOOLS_OPTIMIZER_HPP_ |
16 | #define INCLUDE_SPIRV_TOOLS_OPTIMIZER_HPP_ |
17 | |
18 | #include <memory> |
19 | #include <ostream> |
20 | #include <string> |
21 | #include <unordered_map> |
22 | #include <vector> |
23 | |
24 | #include "libspirv.hpp" |
25 | |
26 | namespace spvtools { |
27 | |
28 | namespace opt { |
29 | class Pass; |
30 | } |
31 | |
32 | // C++ interface for SPIR-V optimization functionalities. It wraps the context |
33 | // (including target environment and the corresponding SPIR-V grammar) and |
34 | // provides methods for registering optimization passes and optimizing. |
35 | // |
36 | // Instances of this class provides basic thread-safety guarantee. |
37 | class Optimizer { |
38 | public: |
39 | // The token for an optimization pass. It is returned via one of the |
40 | // Create*Pass() standalone functions at the end of this header file and |
41 | // consumed by the RegisterPass() method. Tokens are one-time objects that |
42 | // only support move; copying is not allowed. |
43 | struct PassToken { |
44 | struct Impl; // Opaque struct for holding inernal data. |
45 | |
46 | PassToken(std::unique_ptr<Impl>); |
47 | |
48 | // Tokens for built-in passes should be created using Create*Pass functions |
49 | // below; for out-of-tree passes, use this constructor instead. |
50 | // Note that this API isn't guaranteed to be stable and may change without |
51 | // preserving source or binary compatibility in the future. |
52 | PassToken(std::unique_ptr<opt::Pass>&& pass); |
53 | |
54 | // Tokens can only be moved. Copying is disabled. |
55 | PassToken(const PassToken&) = delete; |
56 | PassToken(PassToken&&); |
57 | PassToken& operator=(const PassToken&) = delete; |
58 | PassToken& operator=(PassToken&&); |
59 | |
60 | ~PassToken(); |
61 | |
62 | std::unique_ptr<Impl> impl_; // Unique pointer to internal data. |
63 | }; |
64 | |
65 | // Constructs an instance with the given target |env|, which is used to decode |
66 | // the binaries to be optimized later. |
67 | // |
68 | // The instance will have an empty message consumer, which ignores all |
69 | // messages from the library. Use SetMessageConsumer() to supply a consumer |
70 | // if messages are of concern. |
71 | // |
72 | // For collections of passes that are meant to transform the input into |
73 | // another execution environment, then the source environment should be |
74 | // supplied. e.g. for VulkanToWebGPUPasses the environment should be |
75 | // SPV_ENV_VULKAN_1_1 not SPV_ENV_WEBGPU_0. |
76 | explicit Optimizer(spv_target_env env); |
77 | |
78 | // Disables copy/move constructor/assignment operations. |
79 | Optimizer(const Optimizer&) = delete; |
80 | Optimizer(Optimizer&&) = delete; |
81 | Optimizer& operator=(const Optimizer&) = delete; |
82 | Optimizer& operator=(Optimizer&&) = delete; |
83 | |
84 | // Destructs this instance. |
85 | ~Optimizer(); |
86 | |
87 | // Sets the message consumer to the given |consumer|. The |consumer| will be |
88 | // invoked once for each message communicated from the library. |
89 | void SetMessageConsumer(MessageConsumer consumer); |
90 | |
91 | // Returns a reference to the registered message consumer. |
92 | const MessageConsumer& consumer() const; |
93 | |
94 | // Registers the given |pass| to this optimizer. Passes will be run in the |
95 | // exact order of registration. The token passed in will be consumed by this |
96 | // method. |
97 | Optimizer& RegisterPass(PassToken&& pass); |
98 | |
99 | // Registers passes that attempt to improve performance of generated code. |
100 | // This sequence of passes is subject to constant review and will change |
101 | // from time to time. |
102 | Optimizer& RegisterPerformancePasses(); |
103 | |
104 | // Registers passes that attempt to improve the size of generated code. |
105 | // This sequence of passes is subject to constant review and will change |
106 | // from time to time. |
107 | Optimizer& RegisterSizePasses(); |
108 | |
109 | // Registers passes that have been prescribed for converting from Vulkan to |
110 | // WebGPU. This sequence of passes is subject to constant review and will |
111 | // change from time to time. |
112 | Optimizer& RegisterVulkanToWebGPUPasses(); |
113 | |
114 | // Registers passes that have been prescribed for converting from WebGPU to |
115 | // Vulkan. This sequence of passes is subject to constant review and will |
116 | // change from time to time. |
117 | Optimizer& RegisterWebGPUToVulkanPasses(); |
118 | |
119 | // Registers passes that attempt to legalize the generated code. |
120 | // |
121 | // Note: this recipe is specially designed for legalizing SPIR-V. It should be |
122 | // used by compilers after translating HLSL source code literally. It should |
123 | // *not* be used by general workloads for performance or size improvement. |
124 | // |
125 | // This sequence of passes is subject to constant review and will change |
126 | // from time to time. |
127 | Optimizer& RegisterLegalizationPasses(); |
128 | |
129 | // Register passes specified in the list of |flags|. Each flag must be a |
130 | // string of a form accepted by Optimizer::FlagHasValidForm(). |
131 | // |
132 | // If the list of flags contains an invalid entry, it returns false and an |
133 | // error message is emitted to the MessageConsumer object (use |
134 | // Optimizer::SetMessageConsumer to define a message consumer, if needed). |
135 | // |
136 | // If all the passes are registered successfully, it returns true. |
137 | bool RegisterPassesFromFlags(const std::vector<std::string>& flags); |
138 | |
139 | // Registers the optimization pass associated with |flag|. This only accepts |
140 | // |flag| values of the form "--pass_name[=pass_args]". If no such pass |
141 | // exists, it returns false. Otherwise, the pass is registered and it returns |
142 | // true. |
143 | // |
144 | // The following flags have special meaning: |
145 | // |
146 | // -O: Registers all performance optimization passes |
147 | // (Optimizer::RegisterPerformancePasses) |
148 | // |
149 | // -Os: Registers all size optimization passes |
150 | // (Optimizer::RegisterSizePasses). |
151 | // |
152 | // --legalize-hlsl: Registers all passes that legalize SPIR-V generated by an |
153 | // HLSL front-end. |
154 | bool RegisterPassFromFlag(const std::string& flag); |
155 | |
156 | // Validates that |flag| has a valid format. Strings accepted: |
157 | // |
158 | // --pass_name[=pass_args] |
159 | // -O |
160 | // -Os |
161 | // |
162 | // If |flag| takes one of the forms above, it returns true. Otherwise, it |
163 | // returns false. |
164 | bool FlagHasValidForm(const std::string& flag) const; |
165 | |
166 | // Allows changing, after creation time, the target environment to be |
167 | // optimized for and validated. Should be called before calling Run(). |
168 | void SetTargetEnv(const spv_target_env env); |
169 | |
170 | // Optimizes the given SPIR-V module |original_binary| and writes the |
171 | // optimized binary into |optimized_binary|. The optimized binary uses |
172 | // the same SPIR-V version as the original binary. |
173 | // |
174 | // Returns true on successful optimization, whether or not the module is |
175 | // modified. Returns false if |original_binary| fails to validate or if errors |
176 | // occur when processing |original_binary| using any of the registered passes. |
177 | // In that case, no further passes are executed and the contents in |
178 | // |optimized_binary| may be invalid. |
179 | // |
180 | // By default, the binary is validated before any transforms are performed, |
181 | // and optionally after each transform. Validation uses SPIR-V spec rules |
182 | // for the SPIR-V version named in the binary's header (at word offset 1). |
183 | // Additionally, if the target environment is a client API (such as |
184 | // Vulkan 1.1), then validate for that client API version, to the extent |
185 | // that it is verifiable from data in the binary itself. |
186 | // |
187 | // It's allowed to alias |original_binary| to the start of |optimized_binary|. |
188 | bool Run(const uint32_t* original_binary, size_t original_binary_size, |
189 | std::vector<uint32_t>* optimized_binary) const; |
190 | |
191 | // DEPRECATED: Same as above, except passes |options| to the validator when |
192 | // trying to validate the binary. If |skip_validation| is true, then the |
193 | // caller is guaranteeing that |original_binary| is valid, and the validator |
194 | // will not be run. The |max_id_bound| is the limit on the max id in the |
195 | // module. |
196 | bool Run(const uint32_t* original_binary, const size_t original_binary_size, |
197 | std::vector<uint32_t>* optimized_binary, |
198 | const ValidatorOptions& options, bool skip_validation) const; |
199 | |
200 | // Same as above, except it takes an options object. See the documentation |
201 | // for |OptimizerOptions| to see which options can be set. |
202 | // |
203 | // By default, the binary is validated before any transforms are performed, |
204 | // and optionally after each transform. Validation uses SPIR-V spec rules |
205 | // for the SPIR-V version named in the binary's header (at word offset 1). |
206 | // Additionally, if the target environment is a client API (such as |
207 | // Vulkan 1.1), then validate for that client API version, to the extent |
208 | // that it is verifiable from data in the binary itself, or from the |
209 | // validator options set on the optimizer options. |
210 | bool Run(const uint32_t* original_binary, const size_t original_binary_size, |
211 | std::vector<uint32_t>* optimized_binary, |
212 | const spv_optimizer_options opt_options) const; |
213 | |
214 | // Returns a vector of strings with all the pass names added to this |
215 | // optimizer's pass manager. These strings are valid until the associated |
216 | // pass manager is destroyed. |
217 | std::vector<const char*> GetPassNames() const; |
218 | |
219 | // Sets the option to print the disassembly before each pass and after the |
220 | // last pass. If |out| is null, then no output is generated. Otherwise, |
221 | // output is sent to the |out| output stream. |
222 | Optimizer& SetPrintAll(std::ostream* out); |
223 | |
224 | // Sets the option to print the resource utilization of each pass. If |out| |
225 | // is null, then no output is generated. Otherwise, output is sent to the |
226 | // |out| output stream. |
227 | Optimizer& SetTimeReport(std::ostream* out); |
228 | |
229 | // Sets the option to validate the module after each pass. |
230 | Optimizer& SetValidateAfterAll(bool validate); |
231 | |
232 | private: |
233 | struct Impl; // Opaque struct for holding internal data. |
234 | std::unique_ptr<Impl> impl_; // Unique pointer to internal data. |
235 | }; |
236 | |
237 | // Creates a null pass. |
238 | // A null pass does nothing to the SPIR-V module to be optimized. |
239 | Optimizer::PassToken CreateNullPass(); |
240 | |
241 | // Creates a strip-atomic-counter-memory pass. |
242 | // A strip-atomic-counter-memory pass removes all usages of the |
243 | // AtomicCounterMemory bit in Memory Semantics bitmasks. This bit is a no-op in |
244 | // Vulkan, so isn't needed in that env. And the related capability is not |
245 | // allowed in WebGPU, so it is not allowed in that env. |
246 | Optimizer::PassToken CreateStripAtomicCounterMemoryPass(); |
247 | |
248 | // Creates a strip-debug-info pass. |
249 | // A strip-debug-info pass removes all debug instructions (as documented in |
250 | // Section 3.32.2 of the SPIR-V spec) of the SPIR-V module to be optimized. |
251 | Optimizer::PassToken CreateStripDebugInfoPass(); |
252 | |
253 | // Creates a strip-reflect-info pass. |
254 | // A strip-reflect-info pass removes all reflections instructions. |
255 | // For now, this is limited to removing decorations defined in |
256 | // SPV_GOOGLE_hlsl_functionality1. The coverage may expand in |
257 | // the future. |
258 | Optimizer::PassToken CreateStripReflectInfoPass(); |
259 | |
260 | // Creates an eliminate-dead-functions pass. |
261 | // An eliminate-dead-functions pass will remove all functions that are not in |
262 | // the call trees rooted at entry points and exported functions. These |
263 | // functions are not needed because they will never be called. |
264 | Optimizer::PassToken CreateEliminateDeadFunctionsPass(); |
265 | |
266 | // Creates an eliminate-dead-members pass. |
267 | // An eliminate-dead-members pass will remove all unused members of structures. |
268 | // This will not affect the data layout of the remaining members. |
269 | Optimizer::PassToken CreateEliminateDeadMembersPass(); |
270 | |
271 | // Creates a set-spec-constant-default-value pass from a mapping from spec-ids |
272 | // to the default values in the form of string. |
273 | // A set-spec-constant-default-value pass sets the default values for the |
274 | // spec constants that have SpecId decorations (i.e., those defined by |
275 | // OpSpecConstant{|True|False} instructions). |
276 | Optimizer::PassToken CreateSetSpecConstantDefaultValuePass( |
277 | const std::unordered_map<uint32_t, std::string>& id_value_map); |
278 | |
279 | // Creates a set-spec-constant-default-value pass from a mapping from spec-ids |
280 | // to the default values in the form of bit pattern. |
281 | // A set-spec-constant-default-value pass sets the default values for the |
282 | // spec constants that have SpecId decorations (i.e., those defined by |
283 | // OpSpecConstant{|True|False} instructions). |
284 | Optimizer::PassToken CreateSetSpecConstantDefaultValuePass( |
285 | const std::unordered_map<uint32_t, std::vector<uint32_t>>& id_value_map); |
286 | |
287 | // Creates a flatten-decoration pass. |
288 | // A flatten-decoration pass replaces grouped decorations with equivalent |
289 | // ungrouped decorations. That is, it replaces each OpDecorationGroup |
290 | // instruction and associated OpGroupDecorate and OpGroupMemberDecorate |
291 | // instructions with equivalent OpDecorate and OpMemberDecorate instructions. |
292 | // The pass does not attempt to preserve debug information for instructions |
293 | // it removes. |
294 | Optimizer::PassToken CreateFlattenDecorationPass(); |
295 | |
296 | // Creates a freeze-spec-constant-value pass. |
297 | // A freeze-spec-constant pass specializes the value of spec constants to |
298 | // their default values. This pass only processes the spec constants that have |
299 | // SpecId decorations (defined by OpSpecConstant, OpSpecConstantTrue, or |
300 | // OpSpecConstantFalse instructions) and replaces them with their normal |
301 | // counterparts (OpConstant, OpConstantTrue, or OpConstantFalse). The |
302 | // corresponding SpecId annotation instructions will also be removed. This |
303 | // pass does not fold the newly added normal constants and does not process |
304 | // other spec constants defined by OpSpecConstantComposite or |
305 | // OpSpecConstantOp. |
306 | Optimizer::PassToken CreateFreezeSpecConstantValuePass(); |
307 | |
308 | // Creates a fold-spec-constant-op-and-composite pass. |
309 | // A fold-spec-constant-op-and-composite pass folds spec constants defined by |
310 | // OpSpecConstantOp or OpSpecConstantComposite instruction, to normal Constants |
311 | // defined by OpConstantTrue, OpConstantFalse, OpConstant, OpConstantNull, or |
312 | // OpConstantComposite instructions. Note that spec constants defined with |
313 | // OpSpecConstant, OpSpecConstantTrue, or OpSpecConstantFalse instructions are |
314 | // not handled, as these instructions indicate their value are not determined |
315 | // and can be changed in future. A spec constant is foldable if all of its |
316 | // value(s) can be determined from the module. E.g., an integer spec constant |
317 | // defined with OpSpecConstantOp instruction can be folded if its value won't |
318 | // change later. This pass will replace the original OpSpecContantOp instruction |
319 | // with an OpConstant instruction. When folding composite spec constants, |
320 | // new instructions may be inserted to define the components of the composite |
321 | // constant first, then the original spec constants will be replaced by |
322 | // OpConstantComposite instructions. |
323 | // |
324 | // There are some operations not supported yet: |
325 | // OpSConvert, OpFConvert, OpQuantizeToF16 and |
326 | // all the operations under Kernel capability. |
327 | // TODO(qining): Add support for the operations listed above. |
328 | Optimizer::PassToken CreateFoldSpecConstantOpAndCompositePass(); |
329 | |
330 | // Creates a unify-constant pass. |
331 | // A unify-constant pass de-duplicates the constants. Constants with the exact |
332 | // same value and identical form will be unified and only one constant will |
333 | // be kept for each unique pair of type and value. |
334 | // There are several cases not handled by this pass: |
335 | // 1) Constants defined by OpConstantNull instructions (null constants) and |
336 | // constants defined by OpConstantFalse, OpConstant or OpConstantComposite |
337 | // with value 0 (zero-valued normal constants) are not considered equivalent. |
338 | // So null constants won't be used to replace zero-valued normal constants, |
339 | // vice versa. |
340 | // 2) Whenever there are decorations to the constant's result id id, the |
341 | // constant won't be handled, which means, it won't be used to replace any |
342 | // other constants, neither can other constants replace it. |
343 | // 3) NaN in float point format with different bit patterns are not unified. |
344 | Optimizer::PassToken CreateUnifyConstantPass(); |
345 | |
346 | // Creates a eliminate-dead-constant pass. |
347 | // A eliminate-dead-constant pass removes dead constants, including normal |
348 | // contants defined by OpConstant, OpConstantComposite, OpConstantTrue, or |
349 | // OpConstantFalse and spec constants defined by OpSpecConstant, |
350 | // OpSpecConstantComposite, OpSpecConstantTrue, OpSpecConstantFalse or |
351 | // OpSpecConstantOp. |
352 | Optimizer::PassToken CreateEliminateDeadConstantPass(); |
353 | |
354 | // Creates a strength-reduction pass. |
355 | // A strength-reduction pass will look for opportunities to replace an |
356 | // instruction with an equivalent and less expensive one. For example, |
357 | // multiplying by a power of 2 can be replaced by a bit shift. |
358 | Optimizer::PassToken CreateStrengthReductionPass(); |
359 | |
360 | // Creates a block merge pass. |
361 | // This pass searches for blocks with a single Branch to a block with no |
362 | // other predecessors and merges the blocks into a single block. Continue |
363 | // blocks and Merge blocks are not candidates for the second block. |
364 | // |
365 | // The pass is most useful after Dead Branch Elimination, which can leave |
366 | // such sequences of blocks. Merging them makes subsequent passes more |
367 | // effective, such as single block local store-load elimination. |
368 | // |
369 | // While this pass reduces the number of occurrences of this sequence, at |
370 | // this time it does not guarantee all such sequences are eliminated. |
371 | // |
372 | // Presence of phi instructions can inhibit this optimization. Handling |
373 | // these is left for future improvements. |
374 | Optimizer::PassToken CreateBlockMergePass(); |
375 | |
376 | // Creates an exhaustive inline pass. |
377 | // An exhaustive inline pass attempts to exhaustively inline all function |
378 | // calls in all functions in an entry point call tree. The intent is to enable, |
379 | // albeit through brute force, analysis and optimization across function |
380 | // calls by subsequent optimization passes. As the inlining is exhaustive, |
381 | // there is no attempt to optimize for size or runtime performance. Functions |
382 | // that are not in the call tree of an entry point are not changed. |
383 | Optimizer::PassToken CreateInlineExhaustivePass(); |
384 | |
385 | // Creates an opaque inline pass. |
386 | // An opaque inline pass inlines all function calls in all functions in all |
387 | // entry point call trees where the called function contains an opaque type |
388 | // in either its parameter types or return type. An opaque type is currently |
389 | // defined as Image, Sampler or SampledImage. The intent is to enable, albeit |
390 | // through brute force, analysis and optimization across these function calls |
391 | // by subsequent passes in order to remove the storing of opaque types which is |
392 | // not legal in Vulkan. Functions that are not in the call tree of an entry |
393 | // point are not changed. |
394 | Optimizer::PassToken CreateInlineOpaquePass(); |
395 | |
396 | // Creates a single-block local variable load/store elimination pass. |
397 | // For every entry point function, do single block memory optimization of |
398 | // function variables referenced only with non-access-chain loads and stores. |
399 | // For each targeted variable load, if previous store to that variable in the |
400 | // block, replace the load's result id with the value id of the store. |
401 | // If previous load within the block, replace the current load's result id |
402 | // with the previous load's result id. In either case, delete the current |
403 | // load. Finally, check if any remaining stores are useless, and delete store |
404 | // and variable if possible. |
405 | // |
406 | // The presence of access chain references and function calls can inhibit |
407 | // the above optimization. |
408 | // |
409 | // Only modules with relaxed logical addressing (see opt/instruction.h) are |
410 | // currently processed. |
411 | // |
412 | // This pass is most effective if preceeded by Inlining and |
413 | // LocalAccessChainConvert. This pass will reduce the work needed to be done |
414 | // by LocalSingleStoreElim and LocalMultiStoreElim. |
415 | // |
416 | // Only functions in the call tree of an entry point are processed. |
417 | Optimizer::PassToken CreateLocalSingleBlockLoadStoreElimPass(); |
418 | |
419 | // Create dead branch elimination pass. |
420 | // For each entry point function, this pass will look for SelectionMerge |
421 | // BranchConditionals with constant condition and convert to a Branch to |
422 | // the indicated label. It will delete resulting dead blocks. |
423 | // |
424 | // For all phi functions in merge block, replace all uses with the id |
425 | // corresponding to the living predecessor. |
426 | // |
427 | // Note that some branches and blocks may be left to avoid creating invalid |
428 | // control flow. Improving this is left to future work. |
429 | // |
430 | // This pass is most effective when preceeded by passes which eliminate |
431 | // local loads and stores, effectively propagating constant values where |
432 | // possible. |
433 | Optimizer::PassToken CreateDeadBranchElimPass(); |
434 | |
435 | // Creates an SSA local variable load/store elimination pass. |
436 | // For every entry point function, eliminate all loads and stores of function |
437 | // scope variables only referenced with non-access-chain loads and stores. |
438 | // Eliminate the variables as well. |
439 | // |
440 | // The presence of access chain references and function calls can inhibit |
441 | // the above optimization. |
442 | // |
443 | // Only shader modules with relaxed logical addressing (see opt/instruction.h) |
444 | // are currently processed. Currently modules with any extensions enabled are |
445 | // not processed. This is left for future work. |
446 | // |
447 | // This pass is most effective if preceeded by Inlining and |
448 | // LocalAccessChainConvert. LocalSingleStoreElim and LocalSingleBlockElim |
449 | // will reduce the work that this pass has to do. |
450 | Optimizer::PassToken CreateLocalMultiStoreElimPass(); |
451 | |
452 | // Creates a local access chain conversion pass. |
453 | // A local access chain conversion pass identifies all function scope |
454 | // variables which are accessed only with loads, stores and access chains |
455 | // with constant indices. It then converts all loads and stores of such |
456 | // variables into equivalent sequences of loads, stores, extracts and inserts. |
457 | // |
458 | // This pass only processes entry point functions. It currently only converts |
459 | // non-nested, non-ptr access chains. It does not process modules with |
460 | // non-32-bit integer types present. Optional memory access options on loads |
461 | // and stores are ignored as we are only processing function scope variables. |
462 | // |
463 | // This pass unifies access to these variables to a single mode and simplifies |
464 | // subsequent analysis and elimination of these variables along with their |
465 | // loads and stores allowing values to propagate to their points of use where |
466 | // possible. |
467 | Optimizer::PassToken CreateLocalAccessChainConvertPass(); |
468 | |
469 | // Creates a local single store elimination pass. |
470 | // For each entry point function, this pass eliminates loads and stores for |
471 | // function scope variable that are stored to only once, where possible. Only |
472 | // whole variable loads and stores are eliminated; access-chain references are |
473 | // not optimized. Replace all loads of such variables with the value that is |
474 | // stored and eliminate any resulting dead code. |
475 | // |
476 | // Currently, the presence of access chains and function calls can inhibit this |
477 | // pass, however the Inlining and LocalAccessChainConvert passes can make it |
478 | // more effective. In additional, many non-load/store memory operations are |
479 | // not supported and will prohibit optimization of a function. Support of |
480 | // these operations are future work. |
481 | // |
482 | // Only shader modules with relaxed logical addressing (see opt/instruction.h) |
483 | // are currently processed. |
484 | // |
485 | // This pass will reduce the work needed to be done by LocalSingleBlockElim |
486 | // and LocalMultiStoreElim and can improve the effectiveness of other passes |
487 | // such as DeadBranchElimination which depend on values for their analysis. |
488 | Optimizer::PassToken CreateLocalSingleStoreElimPass(); |
489 | |
490 | // Creates an insert/extract elimination pass. |
491 | // This pass processes each entry point function in the module, searching for |
492 | // extracts on a sequence of inserts. It further searches the sequence for an |
493 | // insert with indices identical to the extract. If such an insert can be |
494 | // found before hitting a conflicting insert, the extract's result id is |
495 | // replaced with the id of the values from the insert. |
496 | // |
497 | // Besides removing extracts this pass enables subsequent dead code elimination |
498 | // passes to delete the inserts. This pass performs best after access chains are |
499 | // converted to inserts and extracts and local loads and stores are eliminated. |
500 | Optimizer::PassToken (); |
501 | |
502 | // Creates a dead insert elimination pass. |
503 | // This pass processes each entry point function in the module, searching for |
504 | // unreferenced inserts into composite types. These are most often unused |
505 | // stores to vector components. They are unused because they are never |
506 | // referenced, or because there is another insert to the same component between |
507 | // the insert and the reference. After removing the inserts, dead code |
508 | // elimination is attempted on the inserted values. |
509 | // |
510 | // This pass performs best after access chains are converted to inserts and |
511 | // extracts and local loads and stores are eliminated. While executing this |
512 | // pass can be advantageous on its own, it is also advantageous to execute |
513 | // this pass after CreateInsertExtractPass() as it will remove any unused |
514 | // inserts created by that pass. |
515 | Optimizer::PassToken CreateDeadInsertElimPass(); |
516 | |
517 | // Create aggressive dead code elimination pass |
518 | // This pass eliminates unused code from the module. In addition, |
519 | // it detects and eliminates code which may have spurious uses but which do |
520 | // not contribute to the output of the function. The most common cause of |
521 | // such code sequences is summations in loops whose result is no longer used |
522 | // due to dead code elimination. This optimization has additional compile |
523 | // time cost over standard dead code elimination. |
524 | // |
525 | // This pass only processes entry point functions. It also only processes |
526 | // shaders with relaxed logical addressing (see opt/instruction.h). It |
527 | // currently will not process functions with function calls. Unreachable |
528 | // functions are deleted. |
529 | // |
530 | // This pass will be made more effective by first running passes that remove |
531 | // dead control flow and inlines function calls. |
532 | // |
533 | // This pass can be especially useful after running Local Access Chain |
534 | // Conversion, which tends to cause cycles of dead code to be left after |
535 | // Store/Load elimination passes are completed. These cycles cannot be |
536 | // eliminated with standard dead code elimination. |
537 | Optimizer::PassToken CreateAggressiveDCEPass(); |
538 | |
539 | // Create line propagation pass |
540 | // This pass propagates line information based on the rules for OpLine and |
541 | // OpNoline and clones an appropriate line instruction into every instruction |
542 | // which does not already have debug line instructions. |
543 | // |
544 | // This pass is intended to maximize preservation of source line information |
545 | // through passes which delete, move and clone instructions. Ideally it should |
546 | // be run before any such pass. It is a bookend pass with EliminateDeadLines |
547 | // which can be used to remove redundant line instructions at the end of a |
548 | // run of such passes and reduce final output file size. |
549 | Optimizer::PassToken CreatePropagateLineInfoPass(); |
550 | |
551 | // Create dead line elimination pass |
552 | // This pass eliminates redundant line instructions based on the rules for |
553 | // OpLine and OpNoline. Its main purpose is to reduce the size of the file |
554 | // need to store the SPIR-V without losing line information. |
555 | // |
556 | // This is a bookend pass with PropagateLines which attaches line instructions |
557 | // to every instruction to preserve line information during passes which |
558 | // delete, move and clone instructions. DeadLineElim should be run after |
559 | // PropagateLines and all such subsequent passes. Normally it would be one |
560 | // of the last passes to be run. |
561 | Optimizer::PassToken CreateRedundantLineInfoElimPass(); |
562 | |
563 | // Creates a compact ids pass. |
564 | // The pass remaps result ids to a compact and gapless range starting from %1. |
565 | Optimizer::PassToken CreateCompactIdsPass(); |
566 | |
567 | // Creates a remove duplicate pass. |
568 | // This pass removes various duplicates: |
569 | // * duplicate capabilities; |
570 | // * duplicate extended instruction imports; |
571 | // * duplicate types; |
572 | // * duplicate decorations. |
573 | Optimizer::PassToken CreateRemoveDuplicatesPass(); |
574 | |
575 | // Creates a CFG cleanup pass. |
576 | // This pass removes cruft from the control flow graph of functions that are |
577 | // reachable from entry points and exported functions. It currently includes the |
578 | // following functionality: |
579 | // |
580 | // - Removal of unreachable basic blocks. |
581 | Optimizer::PassToken CreateCFGCleanupPass(); |
582 | |
583 | // Create dead variable elimination pass. |
584 | // This pass will delete module scope variables, along with their decorations, |
585 | // that are not referenced. |
586 | Optimizer::PassToken CreateDeadVariableEliminationPass(); |
587 | |
588 | // create merge return pass. |
589 | // changes functions that have multiple return statements so they have a single |
590 | // return statement. |
591 | // |
592 | // for structured control flow it is assumed that the only unreachable blocks in |
593 | // the function are trivial merge and continue blocks. |
594 | // |
595 | // a trivial merge block contains the label and an opunreachable instructions, |
596 | // nothing else. a trivial continue block contain a label and an opbranch to |
597 | // the header, nothing else. |
598 | // |
599 | // these conditions are guaranteed to be met after running dead-branch |
600 | // elimination. |
601 | Optimizer::PassToken CreateMergeReturnPass(); |
602 | |
603 | // Create value numbering pass. |
604 | // This pass will look for instructions in the same basic block that compute the |
605 | // same value, and remove the redundant ones. |
606 | Optimizer::PassToken CreateLocalRedundancyEliminationPass(); |
607 | |
608 | // Create LICM pass. |
609 | // This pass will look for invariant instructions inside loops and hoist them to |
610 | // the loops preheader. |
611 | Optimizer::PassToken CreateLoopInvariantCodeMotionPass(); |
612 | |
613 | // Creates a loop fission pass. |
614 | // This pass will split all top level loops whose register pressure exceedes the |
615 | // given |threshold|. |
616 | Optimizer::PassToken CreateLoopFissionPass(size_t threshold); |
617 | |
618 | // Creates a loop fusion pass. |
619 | // This pass will look for adjacent loops that are compatible and legal to be |
620 | // fused. The fuse all such loops as long as the register usage for the fused |
621 | // loop stays under the threshold defined by |max_registers_per_loop|. |
622 | Optimizer::PassToken CreateLoopFusionPass(size_t max_registers_per_loop); |
623 | |
624 | // Creates a loop peeling pass. |
625 | // This pass will look for conditions inside a loop that are true or false only |
626 | // for the N first or last iteration. For loop with such condition, those N |
627 | // iterations of the loop will be executed outside of the main loop. |
628 | // To limit code size explosion, the loop peeling can only happen if the code |
629 | // size growth for each loop is under |code_growth_threshold|. |
630 | Optimizer::PassToken CreateLoopPeelingPass(); |
631 | |
632 | // Creates a loop unswitch pass. |
633 | // This pass will look for loop independent branch conditions and move the |
634 | // condition out of the loop and version the loop based on the taken branch. |
635 | // Works best after LICM and local multi store elimination pass. |
636 | Optimizer::PassToken CreateLoopUnswitchPass(); |
637 | |
638 | // Create global value numbering pass. |
639 | // This pass will look for instructions where the same value is computed on all |
640 | // paths leading to the instruction. Those instructions are deleted. |
641 | Optimizer::PassToken CreateRedundancyEliminationPass(); |
642 | |
643 | // Create scalar replacement pass. |
644 | // This pass replaces composite function scope variables with variables for each |
645 | // element if those elements are accessed individually. The parameter is a |
646 | // limit on the number of members in the composite variable that the pass will |
647 | // consider replacing. |
648 | Optimizer::PassToken CreateScalarReplacementPass(uint32_t size_limit = 100); |
649 | |
650 | // Create a private to local pass. |
651 | // This pass looks for variables delcared in the private storage class that are |
652 | // used in only one function. Those variables are moved to the function storage |
653 | // class in the function that they are used. |
654 | Optimizer::PassToken CreatePrivateToLocalPass(); |
655 | |
656 | // Creates a conditional constant propagation (CCP) pass. |
657 | // This pass implements the SSA-CCP algorithm in |
658 | // |
659 | // Constant propagation with conditional branches, |
660 | // Wegman and Zadeck, ACM TOPLAS 13(2):181-210. |
661 | // |
662 | // Constant values in expressions and conditional jumps are folded and |
663 | // simplified. This may reduce code size by removing never executed jump targets |
664 | // and computations with constant operands. |
665 | Optimizer::PassToken CreateCCPPass(); |
666 | |
667 | // Creates a workaround driver bugs pass. This pass attempts to work around |
668 | // a known driver bug (issue #1209) by identifying the bad code sequences and |
669 | // rewriting them. |
670 | // |
671 | // Current workaround: Avoid OpUnreachable instructions in loops. |
672 | Optimizer::PassToken CreateWorkaround1209Pass(); |
673 | |
674 | // Creates a pass that converts if-then-else like assignments into OpSelect. |
675 | Optimizer::PassToken CreateIfConversionPass(); |
676 | |
677 | // Creates a pass that will replace instructions that are not valid for the |
678 | // current shader stage by constants. Has no effect on non-shader modules. |
679 | Optimizer::PassToken CreateReplaceInvalidOpcodePass(); |
680 | |
681 | // Creates a pass that simplifies instructions using the instruction folder. |
682 | Optimizer::PassToken CreateSimplificationPass(); |
683 | |
684 | // Create loop unroller pass. |
685 | // Creates a pass to unroll loops which have the "Unroll" loop control |
686 | // mask set. The loops must meet a specific criteria in order to be unrolled |
687 | // safely this criteria is checked before doing the unroll by the |
688 | // LoopUtils::CanPerformUnroll method. Any loop that does not meet the criteria |
689 | // won't be unrolled. See CanPerformUnroll LoopUtils.h for more information. |
690 | Optimizer::PassToken CreateLoopUnrollPass(bool fully_unroll, int factor = 0); |
691 | |
692 | // Create the SSA rewrite pass. |
693 | // This pass converts load/store operations on function local variables into |
694 | // operations on SSA IDs. This allows SSA optimizers to act on these variables. |
695 | // Only variables that are local to the function and of supported types are |
696 | // processed (see IsSSATargetVar for details). |
697 | Optimizer::PassToken CreateSSARewritePass(); |
698 | |
699 | // Create pass to convert relaxed precision instructions to half precision. |
700 | // This pass converts as many relaxed float32 arithmetic operations to half as |
701 | // possible. It converts any float32 operands to half if needed. It converts |
702 | // any resulting half precision values back to float32 as needed. No variables |
703 | // are changed. No image operations are changed. |
704 | // |
705 | // Best if run after function scope store/load and composite operation |
706 | // eliminations are run. Also best if followed by instruction simplification, |
707 | // redundancy elimination and DCE. |
708 | Optimizer::PassToken CreateConvertRelaxedToHalfPass(); |
709 | |
710 | // Create relax float ops pass. |
711 | // This pass decorates all float32 result instructions with RelaxedPrecision |
712 | // if not already so decorated. |
713 | Optimizer::PassToken CreateRelaxFloatOpsPass(); |
714 | |
715 | // Create copy propagate arrays pass. |
716 | // This pass looks to copy propagate memory references for arrays. It looks |
717 | // for specific code patterns to recognize array copies. |
718 | Optimizer::PassToken CreateCopyPropagateArraysPass(); |
719 | |
720 | // Create a vector dce pass. |
721 | // This pass looks for components of vectors that are unused, and removes them |
722 | // from the vector. Note this would still leave around lots of dead code that |
723 | // a pass of ADCE will be able to remove. |
724 | Optimizer::PassToken CreateVectorDCEPass(); |
725 | |
726 | // Create a pass to reduce the size of loads. |
727 | // This pass looks for loads of structures where only a few of its members are |
728 | // used. It replaces the loads feeding an OpExtract with an OpAccessChain and |
729 | // a load of the specific elements. |
730 | Optimizer::PassToken CreateReduceLoadSizePass(); |
731 | |
732 | // Create a pass to combine chained access chains. |
733 | // This pass looks for access chains fed by other access chains and combines |
734 | // them into a single instruction where possible. |
735 | Optimizer::PassToken CreateCombineAccessChainsPass(); |
736 | |
737 | // Create a pass to instrument bindless descriptor checking |
738 | // This pass instruments all bindless references to check that descriptor |
739 | // array indices are inbounds, and if the descriptor indexing extension is |
740 | // enabled, that the descriptor has been initialized. If the reference is |
741 | // invalid, a record is written to the debug output buffer (if space allows) |
742 | // and a null value is returned. This pass is designed to support bindless |
743 | // validation in the Vulkan validation layers. |
744 | // |
745 | // TODO(greg-lunarg): Add support for buffer references. Currently only does |
746 | // checking for image references. |
747 | // |
748 | // Dead code elimination should be run after this pass as the original, |
749 | // potentially invalid code is not removed and could cause undefined behavior, |
750 | // including crashes. It may also be beneficial to run Simplification |
751 | // (ie Constant Propagation), DeadBranchElim and BlockMerge after this pass to |
752 | // optimize instrument code involving the testing of compile-time constants. |
753 | // It is also generally recommended that this pass (and all |
754 | // instrumentation passes) be run after any legalization and optimization |
755 | // passes. This will give better analysis for the instrumentation and avoid |
756 | // potentially de-optimizing the instrument code, for example, inlining |
757 | // the debug record output function throughout the module. |
758 | // |
759 | // The instrumentation will read and write buffers in debug |
760 | // descriptor set |desc_set|. It will write |shader_id| in each output record |
761 | // to identify the shader module which generated the record. |
762 | // |input_length_enable| controls instrumentation of runtime descriptor array |
763 | // references, and |input_init_enable| controls instrumentation of descriptor |
764 | // initialization checking, both of which require input buffer support. |
765 | // |version| specifies the buffer record format. |
766 | Optimizer::PassToken CreateInstBindlessCheckPass( |
767 | uint32_t desc_set, uint32_t shader_id, bool input_length_enable = false, |
768 | bool input_init_enable = false, uint32_t version = 2); |
769 | |
770 | // Create a pass to instrument physical buffer address checking |
771 | // This pass instruments all physical buffer address references to check that |
772 | // all referenced bytes fall in a valid buffer. If the reference is |
773 | // invalid, a record is written to the debug output buffer (if space allows) |
774 | // and a null value is returned. This pass is designed to support buffer |
775 | // address validation in the Vulkan validation layers. |
776 | // |
777 | // Dead code elimination should be run after this pass as the original, |
778 | // potentially invalid code is not removed and could cause undefined behavior, |
779 | // including crashes. Instruction simplification would likely also be |
780 | // beneficial. It is also generally recommended that this pass (and all |
781 | // instrumentation passes) be run after any legalization and optimization |
782 | // passes. This will give better analysis for the instrumentation and avoid |
783 | // potentially de-optimizing the instrument code, for example, inlining |
784 | // the debug record output function throughout the module. |
785 | // |
786 | // The instrumentation will read and write buffers in debug |
787 | // descriptor set |desc_set|. It will write |shader_id| in each output record |
788 | // to identify the shader module which generated the record. |
789 | // |version| specifies the output buffer record format. |
790 | Optimizer::PassToken CreateInstBuffAddrCheckPass(uint32_t desc_set, |
791 | uint32_t shader_id, |
792 | uint32_t version = 2); |
793 | |
794 | // Create a pass to instrument OpDebugPrintf instructions. |
795 | // This pass replaces all OpDebugPrintf instructions with instructions to write |
796 | // a record containing the string id and the all specified values into a special |
797 | // printf output buffer (if space allows). This pass is designed to support |
798 | // the printf validation in the Vulkan validation layers. |
799 | // |
800 | // The instrumentation will write buffers in debug descriptor set |desc_set|. |
801 | // It will write |shader_id| in each output record to identify the shader |
802 | // module which generated the record. |
803 | Optimizer::PassToken CreateInstDebugPrintfPass(uint32_t desc_set, |
804 | uint32_t shader_id); |
805 | |
806 | // Create a pass to upgrade to the VulkanKHR memory model. |
807 | // This pass upgrades the Logical GLSL450 memory model to Logical VulkanKHR. |
808 | // Additionally, it modifies memory, image, atomic and barrier operations to |
809 | // conform to that model's requirements. |
810 | Optimizer::PassToken CreateUpgradeMemoryModelPass(); |
811 | |
812 | // Create a pass to do code sinking. Code sinking is a transformation |
813 | // where an instruction is moved into a more deeply nested construct. |
814 | Optimizer::PassToken CreateCodeSinkingPass(); |
815 | |
816 | // Create a pass to adds initializers for OpVariable calls that require them |
817 | // in WebGPU. Currently this pass naively initializes variables that are |
818 | // missing an initializer with a null value. In the future it may initialize |
819 | // variables to the first value stored in them, if that is a constant. |
820 | Optimizer::PassToken CreateGenerateWebGPUInitializersPass(); |
821 | |
822 | // Create a pass to fix incorrect storage classes. In order to make code |
823 | // generation simpler, DXC may generate code where the storage classes do not |
824 | // match up correctly. This pass will fix the errors that it can. |
825 | Optimizer::PassToken CreateFixStorageClassPass(); |
826 | |
827 | // Create a pass to legalize OpVectorShuffle operands going into WebGPU. WebGPU |
828 | // forbids using 0xFFFFFFFF, which indicates an undefined result, so this pass |
829 | // converts those literals to 0. |
830 | Optimizer::PassToken CreateLegalizeVectorShufflePass(); |
831 | |
832 | // Create a pass to decompose initialized variables into a seperate variable |
833 | // declaration and an initial store. |
834 | Optimizer::PassToken CreateDecomposeInitializedVariablesPass(); |
835 | |
836 | // Create a pass to attempt to split up invalid unreachable merge-blocks and |
837 | // continue-targets to legalize for WebGPU. |
838 | Optimizer::PassToken CreateSplitInvalidUnreachablePass(); |
839 | |
840 | // Creates a graphics robust access pass. |
841 | // |
842 | // This pass injects code to clamp indexed accesses to buffers and internal |
843 | // arrays, providing guarantees satisfying Vulkan's robustBufferAccess rules. |
844 | // |
845 | // TODO(dneto): Clamps coordinates and sample index for pointer calculations |
846 | // into storage images (OpImageTexelPointer). For an cube array image, it |
847 | // assumes the maximum layer count times 6 is at most 0xffffffff. |
848 | // |
849 | // NOTE: This pass will fail with a message if: |
850 | // - The module is not a Shader module. |
851 | // - The module declares VariablePointers, VariablePointersStorageBuffer, or |
852 | // RuntimeDescriptorArrayEXT capabilities. |
853 | // - The module uses an addressing model other than Logical |
854 | // - Access chain indices are wider than 64 bits. |
855 | // - Access chain index for a struct is not an OpConstant integer or is out |
856 | // of range. (The module is already invalid if that is the case.) |
857 | // - TODO(dneto): The OpImageTexelPointer coordinate component is not 32-bits |
858 | // wide. |
859 | // |
860 | // NOTE: Access chain indices are always treated as signed integers. So |
861 | // if an array has a fixed size of more than 2^31 elements, then elements |
862 | // from 2^31 and above are never accessible with a 32-bit index, |
863 | // signed or unsigned. For this case, this pass will clamp the index |
864 | // between 0 and at 2^31-1, inclusive. |
865 | // Similarly, if an array has more then 2^15 element and is accessed with |
866 | // a 16-bit index, then elements from 2^15 and above are not accessible. |
867 | // In this case, the pass will clamp the index between 0 and 2^15-1 |
868 | // inclusive. |
869 | Optimizer::PassToken CreateGraphicsRobustAccessPass(); |
870 | |
871 | // Create descriptor scalar replacement pass. |
872 | // This pass replaces every array variable |desc| that has a DescriptorSet and |
873 | // Binding decorations with a new variable for each element of the array. |
874 | // Suppose |desc| was bound at binding |b|. Then the variable corresponding to |
875 | // |desc[i]| will have binding |b+i|. The descriptor set will be the same. It |
876 | // is assumed that no other variable already has a binding that will used by one |
877 | // of the new variables. If not, the pass will generate invalid Spir-V. All |
878 | // accesses to |desc| must be OpAccessChain instructions with a literal index |
879 | // for the first index. |
880 | Optimizer::PassToken CreateDescriptorScalarReplacementPass(); |
881 | |
882 | // Create a pass to replace all OpKill instruction with a function call to a |
883 | // function that has a single OpKill. This allows more code to be inlined. |
884 | Optimizer::PassToken CreateWrapOpKillPass(); |
885 | |
886 | // Replaces the extensions VK_AMD_shader_ballot,VK_AMD_gcn_shader, and |
887 | // VK_AMD_shader_trinary_minmax with equivalent code using core instructions and |
888 | // capabilities. |
889 | Optimizer::PassToken CreateAmdExtToKhrPass(); |
890 | |
891 | } // namespace spvtools |
892 | |
893 | #endif // INCLUDE_SPIRV_TOOLS_OPTIMIZER_HPP_ |
894 | |