1//************************************ bs::framework - Copyright 2018 Marko Pintera **************************************//
2//*********** Licensed under the MIT license. See LICENSE.md for full terms. This notice is not to be removed. ***********//
3#pragma once
4
5#include "BsRenderBeastPrerequisites.h"
6#include "Utility/BsModule.h"
7#include "Renderer/BsParamBlocks.h"
8#include "Renderer/BsRendererMaterial.h"
9#include "RenderAPI/BsGpuPipelineParamInfo.h"
10
11namespace bs { namespace ct
12{
13 /** @addtogroup RenderBeast
14 * @{
15 */
16
17 BS_PARAM_BLOCK_BEGIN(RadixSortParamsDef)
18 BS_PARAM_BLOCK_ENTRY(int, gBitOffset)
19 BS_PARAM_BLOCK_ENTRY(int, gTilesPerGroup)
20 BS_PARAM_BLOCK_ENTRY(int, gNumGroups)
21 BS_PARAM_BLOCK_ENTRY(int, gNumExtraTiles)
22 BS_PARAM_BLOCK_ENTRY(int, gNumExtraKeys)
23 BS_PARAM_BLOCK_END
24
25 extern RadixSortParamsDef gRadixSortParamsDef;
26
27 /** Buffers to contain both inputs and outputs for the GpuSort algorithm. */
28 struct GpuSortBuffers
29 {
30 SPtr<GpuBuffer> keys[2];
31 SPtr<GpuBuffer> values[2];
32 };
33
34 /** Zeroes out the provided buffer. Should be called on the count output buffer before executing RadixSortCountMat. */
35 class RadixSortClearMat : public RendererMaterial<RadixSortClearMat>
36 {
37 RMAT_DEF_CUSTOMIZED("RadixSortClear.bsl");
38
39 public:
40 RadixSortClearMat();
41
42 /**
43 * Executes the material, running the compute kernel.
44 *
45 * @param[in] outputCounts Pre-allocated buffer to zero out.
46 */
47 void execute(const SPtr<GpuBuffer>& outputCounts);
48
49 GpuParamBuffer mOutputParam;
50 };
51
52 /**
53 * Counts the occurrences of each digit in the input key buffer. Keys are separated into chunks per group and generated
54 * counts only account for occurrences within that specific group. The resulting count can be provided to
55 * RadixSortPrefixScanMat to generate per-digit offsets.
56 */
57 class RadixSortCountMat : public RendererMaterial<RadixSortCountMat>
58 {
59 RMAT_DEF_CUSTOMIZED("RadixSortCount.bsl");
60
61 public:
62 RadixSortCountMat();
63
64 /**
65 * Executes the material, running the compute kernel.
66 *
67 * @param[in] numGroups Number of groups to separate the key inputs in.
68 * @param[in] params Allocated and initialized parameter buffer using RadixSortParamsDef definition.
69 * @param[in] inputKeys A set of keys to sort. @p params buffer needs to be initialized with data
70 * representing this buffer.
71 * @param[in] outputCounts Pre-allocated buffer to contain the resulting per-digit counts for each group.
72 */
73 void execute(UINT32 numGroups, const SPtr<GpuParamBlockBuffer>& params, const SPtr<GpuBuffer>& inputKeys,
74 const SPtr<GpuBuffer>& outputCounts);
75
76 GpuParamBuffer mInputKeysParam;
77 GpuParamBuffer mOutputCountsParam;
78 };
79
80 /**
81 * Calculates the offsets required for sorting a set of keys. The input is expected to be generated by
82 * RadixSortCountMat. The offsets are generated per-group and let RadixSortReorderMat know at which global location to
83 * place the keys in that group.
84 */
85 class RadixSortPrefixScanMat : public RendererMaterial<RadixSortPrefixScanMat>
86 {
87 RMAT_DEF_CUSTOMIZED("RadixSortPrefixScan.bsl");
88
89 public:
90 RadixSortPrefixScanMat();
91
92 /*
93 * Executes the material, running the compute kernel.
94 *
95 * @param[in] params Allocated and initialized parameter buffer using RadixSortParamsDef definition.
96 * @param[in] inputCounts Counts as output by the RadixSortCountMat material.
97 * @param[in] outputCounts Pre-allocated buffer to contain the resulting per-digit counts for each group.
98 */
99 void execute(const SPtr<GpuParamBlockBuffer>& params, const SPtr<GpuBuffer>& inputCounts,
100 const SPtr<GpuBuffer>& outputOffsets);
101
102 GpuParamBuffer mInputCountsParam;
103 GpuParamBuffer mOutputOffsetsParam;
104 };
105
106 /**
107 * Applies offsets calculated by RadixSortPrefixScanMat in order to reorder a set of keys and/or values. The reordered
108 * values are output into a second set of key/value buffers.
109 */
110 class RadixSortReorderMat : public RendererMaterial<RadixSortReorderMat>
111 {
112 RMAT_DEF_CUSTOMIZED("RadixSortReorder.bsl");
113
114 public:
115 RadixSortReorderMat();
116
117 /*
118 * Executes the material, running the compute kernel.
119 *
120 * @param[in] numGroups Number of groups to separate the key inputs in.
121 * @param[in] params Allocated and initialized parameter buffer using RadixSortParamsDef definition.
122 * @param[in] inputOffsets Offsets as output by the RadixSortPrefixScanMat material.
123 * @param[in] buffers A set of input and output buffers. Key buffers are required, while value buffers
124 * are optional.
125 * @param[in] inputBufferIdx Determines which of the two key/value buffers are considered to be inputs. The other
126 * buffers are considered outputs and will contain the output data.
127 * @return Index of the buffer that should be used as input for the next iteration of the
128 * algorithm. (In case the keys were only partially sorted)
129 */
130 void execute(UINT32 numGroups, const SPtr<GpuParamBlockBuffer>& params, const SPtr<GpuBuffer>& inputOffsets,
131 const GpuSortBuffers& buffers, UINT32 inputBufferIdx);
132
133 GpuParamBuffer mInputOffsetsBufferParam;
134 GpuParamBuffer mInputKeysBufferParam;
135 GpuParamBuffer mInputValuesBufferParam;
136 GpuParamBuffer mOutputKeysBufferParam;
137 GpuParamBuffer mOutputValuesBufferParam;
138 };
139
140 /**
141 * Performs a parallel radix sort using the GPU. Best used for sorting large amounts of data (5000+). (CPU is likely
142 * to be more performant for smaller sets).
143 */
144 class GpuSort : public Module<GpuSort>
145 {
146 public:
147 GpuSort();
148
149 /**
150 * Sorts the input keys and outputs them in the provided buffer. Optionally also moves a set of values along with
151 * the keys.
152 *
153 * @param[in] buffers Buffers that contain the keys and values to sort, as well as space to put sorted keys
154 * and values in. Both key buffers must be provided however value buffers are optional and
155 * only required if you need to move values along with keys. All buffers must be be
156 * standard buffers with 32-bit unsigned integer elements, with random writes enabled.
157 * All buffers must be of the same size. Buffers respecting these rules can be created
158 * easily be calling createSortBuffers().
159 *
160 * Buffers at index 0 will be used as input, and final output will be a buffer at the
161 * index as returned by this method.
162 * @param[in] numKeys Number of keys to sort. This must be smaller or equal to the number of elements in the
163 * provided buffers.
164 * @param[in] keyMask Mask that controls which portion of the keys are relevant for sorting. This serves as
165 * an optimization as less bits that need to be sorted, the faster can the algorithm run.
166 * (e.g. if you only need to sort the first 16 bits of the key, provide 0xFFFF as the
167 * mask).
168 * @return Index of the buffer in @p buffers that will contain the final sorted keys and/or values.
169 */
170 UINT32 sort(const GpuSortBuffers& buffers, UINT32 numKeys, UINT32 keyMask = 0xFFFFFFFF);
171
172 /**
173 * Creates a set of buffers buffer that can be used when calling the sort() method.
174 *
175 * @param[in] numElements Number of elements the buffers should contain.
176 * @param[in] values True if the value buffers should be created. If false only key buffers will be
177 * created.
178 * @return A set of buffers to store unsorted keys (and optionally values), as well as space
179 * for the resulting sorted keys (and optionally values).
180 */
181 static GpuSortBuffers createSortBuffers(UINT32 numElements, bool values = false);
182
183 private:
184 SPtr<GpuBuffer> mHelperBuffers[2];
185 };
186
187 /* @} */
188}}
189