1 | //************************************ bs::framework - Copyright 2018 Marko Pintera **************************************// |
2 | //*********** Licensed under the MIT license. See LICENSE.md for full terms. This notice is not to be removed. ***********// |
3 | #include "BsGpuParticleSimulation.h" |
4 | #include "Renderer/BsParamBlocks.h" |
5 | #include "Renderer/BsRendererMaterial.h" |
6 | #include "Renderer/BsGpuResourcePool.h" |
7 | #include "RenderAPI/BsVertexBuffer.h" |
8 | #include "RenderAPI/BsIndexBuffer.h" |
9 | #include "RenderAPI/BsGpuBuffer.h" |
10 | #include "RenderAPI/BsVertexDataDesc.h" |
11 | #include "RenderAPI/BsGpuPipelineParamInfo.h" |
12 | #include "Particles/BsVectorField.h" |
13 | #include "Particles/BsParticleDistribution.h" |
14 | #include "Math/BsVector3.h" |
15 | #include "BsRendererParticles.h" |
16 | #include "BsRendererScene.h" |
17 | #include "BsRenderBeast.h" |
18 | #include "Utility/BsGpuSort.h" |
19 | |
20 | namespace bs { namespace ct |
21 | { |
22 | BS_PARAM_BLOCK_BEGIN(GpuParticleTileVertexParamsDef) |
23 | BS_PARAM_BLOCK_ENTRY(Vector4, gUVToNDC) |
24 | BS_PARAM_BLOCK_END |
25 | |
26 | GpuParticleTileVertexParamsDef gGpuParticleTileVertexParamsDef; |
27 | |
28 | /** Material used for clearing tiles in the texture used for particle GPU simulation. */ |
29 | class GpuParticleClearMat : public RendererMaterial<GpuParticleClearMat> |
30 | { |
31 | RMAT_DEF_CUSTOMIZED("GpuParticleClear.bsl" ); |
32 | |
33 | public: |
34 | GpuParticleClearMat(); |
35 | |
36 | /** Binds the material to the pipeline, along with the @p tileUVs buffer containing locations of tiles to clear. */ |
37 | void bind(const SPtr<GpuBuffer>& tileUVs); |
38 | |
39 | private: |
40 | GpuParamBuffer mTileUVParam; |
41 | }; |
42 | |
43 | /** Material used for adding new particles into the particle state textures. */ |
44 | class GpuParticleInjectMat : public RendererMaterial<GpuParticleInjectMat> |
45 | { |
46 | RMAT_DEF("GpuParticleInject.bsl" ); |
47 | |
48 | public: |
49 | GpuParticleInjectMat(); |
50 | }; |
51 | |
52 | /** Material used for adding new curves into the curve texture. */ |
53 | class GpuParticleCurveInjectMat : public RendererMaterial<GpuParticleCurveInjectMat> |
54 | { |
55 | RMAT_DEF("GpuParticleCurveInject.bsl" ); |
56 | |
57 | public: |
58 | GpuParticleCurveInjectMat(); |
59 | }; |
60 | |
61 | BS_PARAM_BLOCK_BEGIN(VectorFieldParamsDef) |
62 | BS_PARAM_BLOCK_ENTRY(Vector3, gFieldBounds) |
63 | BS_PARAM_BLOCK_ENTRY(float, gFieldIntensity) |
64 | BS_PARAM_BLOCK_ENTRY(Vector3, gFieldTiling) |
65 | BS_PARAM_BLOCK_ENTRY(float, gFieldTightness) |
66 | BS_PARAM_BLOCK_ENTRY(Matrix4, gWorldToField) |
67 | BS_PARAM_BLOCK_ENTRY(Matrix3, gFieldToWorld) |
68 | BS_PARAM_BLOCK_END |
69 | |
70 | VectorFieldParamsDef gVectorFieldParamsDef; |
71 | |
72 | BS_PARAM_BLOCK_BEGIN(GpuParticleDepthCollisionParamsDef) |
73 | BS_PARAM_BLOCK_ENTRY(float, gCollisionRange) |
74 | BS_PARAM_BLOCK_ENTRY(float, gRestitution) |
75 | BS_PARAM_BLOCK_ENTRY(float, gDampening) |
76 | BS_PARAM_BLOCK_ENTRY(float, gCollisionRadiusScale) |
77 | BS_PARAM_BLOCK_ENTRY(Vector2, gSizeScaleCurveOffset) |
78 | BS_PARAM_BLOCK_ENTRY(Vector2, gSizeScaleCurveScale) |
79 | BS_PARAM_BLOCK_END |
80 | |
81 | GpuParticleDepthCollisionParamsDef gGpuParticleDepthCollisionParamsDef; |
82 | |
83 | BS_PARAM_BLOCK_BEGIN(GpuParticleSimulateParamsDef) |
84 | BS_PARAM_BLOCK_ENTRY(INT32, gNumVectorFields) |
85 | BS_PARAM_BLOCK_ENTRY(INT32, gNumIterations) |
86 | BS_PARAM_BLOCK_ENTRY(float, gDT) |
87 | BS_PARAM_BLOCK_ENTRY(float, gDrag) |
88 | BS_PARAM_BLOCK_ENTRY(Vector3, gAcceleration) |
89 | BS_PARAM_BLOCK_END |
90 | |
91 | GpuParticleSimulateParamsDef gGpuParticleSimulateParamsDef; |
92 | |
93 | /** |
94 | * Material used for performing GPU particle simulation. State is read from the provided input textures and output |
95 | * into the output textures bound as render targets. |
96 | */ |
97 | class GpuParticleSimulateMat : public RendererMaterial<GpuParticleSimulateMat> |
98 | { |
99 | RMAT_DEF_CUSTOMIZED("GpuParticleSimulate.bsl" ); |
100 | |
101 | /** Helper method used for initializing variations of this material. */ |
102 | template<UINT32 DEPTH_COLLISIONS> |
103 | static const ShaderVariation& getVariation() |
104 | { |
105 | static ShaderVariation variation = ShaderVariation( |
106 | { |
107 | ShaderVariation::Param("DEPTH_COLLISIONS" , DEPTH_COLLISIONS) |
108 | }); |
109 | |
110 | return variation; |
111 | } |
112 | public: |
113 | GpuParticleSimulateMat(); |
114 | |
115 | /** Binds the material to the pipeline along with any frame-static parameters. */ |
116 | void bindGlobal(GpuParticleResources& resources, const SPtr<GpuParamBlockBuffer>& viewParams, |
117 | const SPtr<Texture>& depth, const SPtr<Texture>& normals, const SPtr<GpuParamBlockBuffer>& simulationParams); |
118 | |
119 | /** |
120 | * Binds parameters that change with every material dispatch. |
121 | * |
122 | * @param[in] tileUVs Sets the UV offsets of individual tiles for a particular particle system |
123 | * that's being rendered. |
124 | * @param[in] perObjectParams General purpose particle system parameters. |
125 | * @param[in] vectorFieldParams Information about the currently bound vector field, if any. |
126 | * @param[in] vectorFieldTexture 3D texture representing the vector field, or null if none. |
127 | * @param[in] depthCollisionParams Parameter buffer for controlling depth buffer collisions, if enabled. |
128 | * |
129 | */ |
130 | void bindPerCallParams(const SPtr<GpuBuffer>& tileUVs, const SPtr<GpuParamBlockBuffer>& perObjectParams, |
131 | const SPtr<GpuParamBlockBuffer>& vectorFieldParams, const SPtr<Texture>& vectorFieldTexture, |
132 | const SPtr<GpuParamBlockBuffer>& depthCollisionParams); |
133 | |
134 | /** Returns the material variation matching the provided parameters. */ |
135 | static GpuParticleSimulateMat* getVariation(bool depthCollisions, bool localSpace); |
136 | private: |
137 | GpuParamBuffer mTileUVParam; |
138 | GpuParamTexture mPosAndTimeTexParam; |
139 | GpuParamTexture mVelocityTexParam; |
140 | GpuParamTexture mSizeRotationTexParam; |
141 | GpuParamTexture mCurvesTexParam; |
142 | GpuParamTexture mDepthTexParam; |
143 | GpuParamTexture mNormalsTexParam; |
144 | GpuParamBinding mParamsBinding; |
145 | GpuParamBinding mPerCameraBinding; |
146 | GpuParamBinding mPerObjectBinding; |
147 | |
148 | GpuParamBinding mVectorFieldBinding; |
149 | GpuParamTexture mVectorFieldTexParam; |
150 | |
151 | GpuParamBinding mDepthCollisionBinding; |
152 | |
153 | bool mSupportsDepthCollisions; |
154 | }; |
155 | |
156 | BS_PARAM_BLOCK_BEGIN(GpuParticleBoundsParamsDef) |
157 | BS_PARAM_BLOCK_ENTRY(UINT32, gIterationsPerGroup) |
158 | BS_PARAM_BLOCK_ENTRY(UINT32, gNumExtraIterations) |
159 | BS_PARAM_BLOCK_ENTRY(UINT32, gNumParticles) |
160 | BS_PARAM_BLOCK_END |
161 | |
162 | GpuParticleBoundsParamsDef gGpuParticleBoundsParamsDef; |
163 | |
164 | /** Material used for calculating particle system bounds. */ |
165 | class GpuParticleBoundsMat : public RendererMaterial<GpuParticleBoundsMat> |
166 | { |
167 | static constexpr UINT32 NUM_THREADS = 64; |
168 | |
169 | RMAT_DEF_CUSTOMIZED("GpuParticleBounds.bsl" ); |
170 | |
171 | public: |
172 | GpuParticleBoundsMat(); |
173 | |
174 | /** Binds the material to the pipeline along with the global input texture containing particle positions and times. */ |
175 | void bind(const SPtr<Texture>& positionAndTime); |
176 | |
177 | /** |
178 | * Executes the material, calculating the bounds. Note that this function reads back from the GPU and should not |
179 | * be called at runtime. |
180 | * |
181 | * @param[in] indices Buffer containing offsets into the position texture for each particle. |
182 | * @param[in] numParticles Number of particle in the provided indices buffer. |
183 | */ |
184 | AABox execute(const SPtr<GpuBuffer>& indices, UINT32 numParticles); |
185 | |
186 | private: |
187 | GpuParamBuffer mParticleIndicesParam; |
188 | GpuParamBuffer mOutputParam; |
189 | GpuParamTexture mPosAndTimeTexParam; |
190 | SPtr<GpuParamBlockBuffer> mInputBuffer; |
191 | }; |
192 | |
193 | BS_PARAM_BLOCK_BEGIN(GpuParticleSortPrepareParamDef) |
194 | BS_PARAM_BLOCK_ENTRY(INT32, gIterationsPerGroup) |
195 | BS_PARAM_BLOCK_ENTRY(INT32, gNumExtraIterations) |
196 | BS_PARAM_BLOCK_ENTRY(INT32, gNumParticles) |
197 | BS_PARAM_BLOCK_ENTRY(INT32, gOutputOffset) |
198 | BS_PARAM_BLOCK_ENTRY(INT32, gSystemKey) |
199 | BS_PARAM_BLOCK_ENTRY(Vector3, gLocalViewOrigin) |
200 | BS_PARAM_BLOCK_END |
201 | |
202 | GpuParticleSortPrepareParamDef gGpuParticleSortPrepareParamDef; |
203 | |
204 | /** Material used for preparing key/values buffers used for particle sorting. */ |
205 | class GpuParticleSortPrepareMat : public RendererMaterial<GpuParticleSortPrepareMat> |
206 | { |
207 | static constexpr UINT32 NUM_THREADS = 64; |
208 | |
209 | RMAT_DEF_CUSTOMIZED("GpuParticleSortPrepare.bsl" ); |
210 | |
211 | public: |
212 | GpuParticleSortPrepareMat(); |
213 | |
214 | /** Binds the material to the pipeline along with the global input texture containing particle positions and times. */ |
215 | void bind(const SPtr<Texture>& positionAndTime); |
216 | |
217 | /** |
218 | * Executes the material, generating sort data for a particular particle system and injecting it into the specified |
219 | * location in the key and index buffers. |
220 | * |
221 | * @param[in] system System whose particles to insert into the sort key/index buffers. |
222 | * @param[in] systemIdx Sequential index of the system to insert into the sort buffers. |
223 | * @param[in] offset Offset into the key/index buffer at which to insert the sort data. |
224 | * @param[in] viewOrigin View origin to use for determining sorting keys, in world space. |
225 | * @param[out] outKeys Pre-allocated buffer that will receive the keys used for sorting. The buffer must |
226 | * be GPU writable and use a 1x 32-bit integer format. |
227 | * @param[out] outIndices Pre-allocated buffer that will receive the indices to be sorted. The buffer must |
228 | * be GPU writable and use a 2x 16-bit integer format. Must have the same capacity |
229 | * as @p outKeys. |
230 | * @return Number of particle that were written to the buffers. |
231 | */ |
232 | UINT32 execute(const GpuParticleSystem& system, UINT32 systemIdx, const Vector3& viewOrigin, UINT32 offset, |
233 | const SPtr<GpuBuffer>& outKeys, const SPtr<GpuBuffer>& outIndices); |
234 | |
235 | private: |
236 | GpuParamBuffer mInputIndicesParam; |
237 | GpuParamBuffer mOutputKeysParam; |
238 | GpuParamBuffer mOutputIndicesParam; |
239 | GpuParamTexture mPosAndTimeTexParam; |
240 | SPtr<GpuParamBlockBuffer> mInputBuffer; |
241 | }; |
242 | |
243 | static constexpr UINT32 TILES_PER_INSTANCE = 8; |
244 | static constexpr UINT32 PARTICLES_PER_INSTANCE = TILES_PER_INSTANCE * GpuParticleResources::PARTICLES_PER_TILE; |
245 | |
246 | /** Contains a variety of helper buffers and declarations used for GPU particle simulation. */ |
247 | struct GpuParticleHelperBuffers |
248 | { |
249 | static constexpr UINT32 NUM_SCRATCH_TILES = 512; |
250 | static constexpr UINT32 NUM_SCRATCH_PARTICLES = 4096; |
251 | |
252 | GpuParticleHelperBuffers(); |
253 | |
254 | SPtr<VertexBuffer> tileUVs; |
255 | SPtr<VertexBuffer> particleUVs; |
256 | SPtr<IndexBuffer> spriteIndices; |
257 | SPtr<VertexDeclaration> tileVertexDecl; |
258 | SPtr<VertexDeclaration> injectVertexDecl; |
259 | SPtr<GpuBuffer> tileScratch; |
260 | SPtr<VertexBuffer> injectScratch; |
261 | }; |
262 | |
263 | GpuParticleResources::GpuParticleResources() |
264 | { |
265 | // Allocate textures |
266 | TEXTURE_DESC positionAndTimeDesc; |
267 | positionAndTimeDesc.format = PF_RGBA32F; |
268 | positionAndTimeDesc.width = TEX_SIZE; |
269 | positionAndTimeDesc.height = TEX_SIZE; |
270 | positionAndTimeDesc.usage = TU_RENDERTARGET; |
271 | |
272 | TEXTURE_DESC velocityDesc; |
273 | velocityDesc.format = PF_RGBA16F; |
274 | velocityDesc.width = TEX_SIZE; |
275 | velocityDesc.height = TEX_SIZE; |
276 | velocityDesc.usage = TU_RENDERTARGET; |
277 | |
278 | for (UINT32 i = 0; i < 2; i++) |
279 | { |
280 | mStateTextures[i].positionAndTimeTex = Texture::create(positionAndTimeDesc); |
281 | mStateTextures[i].velocityTex = Texture::create(velocityDesc); |
282 | } |
283 | |
284 | TEXTURE_DESC sizeAndRotationDesc; |
285 | sizeAndRotationDesc.format = PF_RGBA16F; |
286 | sizeAndRotationDesc.width = TEX_SIZE; |
287 | sizeAndRotationDesc.height = TEX_SIZE; |
288 | sizeAndRotationDesc.usage = TU_RENDERTARGET; |
289 | |
290 | mStaticTextures.sizeAndRotationTex = Texture::create(sizeAndRotationDesc); |
291 | |
292 | RENDER_TEXTURE_DESC staticRtDesc; |
293 | staticRtDesc.colorSurfaces[0].texture = mStaticTextures.sizeAndRotationTex; |
294 | |
295 | for (UINT32 i = 0; i < 2; i++) |
296 | { |
297 | RENDER_TEXTURE_DESC simulationRTDesc; |
298 | simulationRTDesc.colorSurfaces[0].texture = mStateTextures[i].positionAndTimeTex; |
299 | simulationRTDesc.colorSurfaces[1].texture = mStateTextures[i].velocityTex; |
300 | |
301 | mSimulateRT[i] = RenderTexture::create(simulationRTDesc); |
302 | |
303 | RENDER_TEXTURE_DESC injectRTDesc; |
304 | injectRTDesc.colorSurfaces[0].texture = mStateTextures[i].positionAndTimeTex; |
305 | injectRTDesc.colorSurfaces[1].texture = mStateTextures[i].velocityTex; |
306 | injectRTDesc.colorSurfaces[2].texture = mStaticTextures.sizeAndRotationTex; |
307 | mInjectRT[i] = RenderTexture::create(injectRTDesc); |
308 | } |
309 | |
310 | // Allocate the buffer containing keys used for sorting |
311 | GPU_BUFFER_DESC sortKeysBufferDesc; |
312 | sortKeysBufferDesc.type = GBT_STANDARD; |
313 | sortKeysBufferDesc.format = BF_32X1U; |
314 | sortKeysBufferDesc.elementCount = TEX_SIZE * TEX_SIZE; |
315 | sortKeysBufferDesc.usage = GBU_LOADSTORE; |
316 | |
317 | mSortBuffers.keys[0] = GpuBuffer::create(sortKeysBufferDesc); |
318 | mSortBuffers.keys[1] = GpuBuffer::create(sortKeysBufferDesc); |
319 | |
320 | // Allocate the buffer containing sorted particle indices |
321 | GPU_BUFFER_DESC sortedIndicesBufferDesc; |
322 | sortedIndicesBufferDesc.type = GBT_STANDARD; |
323 | sortedIndicesBufferDesc.format = BF_16X2U; |
324 | sortedIndicesBufferDesc.elementCount = TEX_SIZE * TEX_SIZE; |
325 | sortedIndicesBufferDesc.usage = GBU_LOADSTORE; |
326 | |
327 | mSortedIndices[0] = GpuBuffer::create(sortedIndicesBufferDesc); |
328 | mSortedIndices[1] = GpuBuffer::create(sortedIndicesBufferDesc); |
329 | |
330 | mSortBuffers.values[0] = mSortedIndices[0]->getView(GBT_STANDARD, BF_32X1U); |
331 | mSortBuffers.values[1] = mSortedIndices[1]->getView(GBT_STANDARD, BF_32X1U); |
332 | |
333 | // Clear the free tile linked list |
334 | for (UINT32 i = 0; i < TILE_COUNT; i++) |
335 | mFreeTiles[i] = TILE_COUNT - i - 1; |
336 | } |
337 | |
338 | UINT32 GpuParticleResources::allocTile() |
339 | { |
340 | if (mNumFreeTiles > 0) |
341 | { |
342 | mNumFreeTiles--; |
343 | return mFreeTiles[mNumFreeTiles]; |
344 | } |
345 | |
346 | return (UINT32)-1; |
347 | } |
348 | |
349 | void GpuParticleResources::freeTile(UINT32 tile) |
350 | { |
351 | assert(tile < TILE_COUNT); |
352 | assert(mNumFreeTiles < TILE_COUNT); |
353 | |
354 | mFreeTiles[mNumFreeTiles] = tile; |
355 | mNumFreeTiles++; |
356 | } |
357 | |
358 | Vector2I GpuParticleResources::getTileOffset(UINT32 tileId) |
359 | { |
360 | return Vector2I( |
361 | (tileId % TILE_COUNT_1D) * TILE_SIZE, |
362 | (tileId / TILE_COUNT_1D) * TILE_SIZE); |
363 | } |
364 | |
365 | Vector2 GpuParticleResources::getTileCoords(UINT32 tileId) |
366 | { |
367 | return Vector2( |
368 | Math::frac(tileId / (float)TILE_COUNT_1D), |
369 | (UINT32)(tileId / TILE_COUNT_1D) / (float)TILE_COUNT_1D); |
370 | } |
371 | |
372 | Vector2I GpuParticleResources::getParticleOffset(UINT32 subTileId) |
373 | { |
374 | return Vector2I( |
375 | subTileId % TILE_SIZE, |
376 | subTileId / TILE_SIZE); |
377 | } |
378 | |
379 | Vector2 GpuParticleResources::getParticleCoords(UINT32 subTileId) |
380 | { |
381 | const Vector2I tileOffset = getParticleOffset(subTileId); |
382 | return tileOffset / (float)TEX_SIZE; |
383 | } |
384 | |
385 | const SPtr<GpuBuffer>& GpuParticleResources::getSortedIndices() const |
386 | { |
387 | return mSortedIndices[mSortedIndicesBufferIdx]; |
388 | } |
389 | |
390 | GpuParticleHelperBuffers::GpuParticleHelperBuffers() |
391 | { |
392 | // Prepare vertex declaration for rendering tiles |
393 | SPtr<VertexDataDesc> tileVertexDesc = bs_shared_ptr_new<VertexDataDesc>(); |
394 | tileVertexDesc->addVertElem(VET_FLOAT2, VES_TEXCOORD); |
395 | |
396 | tileVertexDecl = VertexDeclaration::create(tileVertexDesc); |
397 | |
398 | // Prepare vertex declaration for injecting new particles |
399 | SPtr<VertexDataDesc> injectVertexDesc = bs_shared_ptr_new<VertexDataDesc>(); |
400 | injectVertexDesc->addVertElem(VET_FLOAT4, VES_TEXCOORD, 0, 0, 1); // Position & time, per instance |
401 | injectVertexDesc->addVertElem(VET_FLOAT4, VES_TEXCOORD, 1, 0, 1); // Velocity, per instance |
402 | injectVertexDesc->addVertElem(VET_FLOAT2, VES_TEXCOORD, 2, 0, 1); // Size, per instance |
403 | injectVertexDesc->addVertElem(VET_FLOAT1, VES_TEXCOORD, 3, 0, 1); // Rotation, per instance |
404 | injectVertexDesc->addVertElem(VET_FLOAT2, VES_TEXCOORD, 4, 0, 1); // Data UV, per instance |
405 | injectVertexDesc->addVertElem(VET_FLOAT2, VES_TEXCOORD, 5, 1); // Sprite texture coordinates |
406 | |
407 | injectVertexDecl = VertexDeclaration::create(injectVertexDesc); |
408 | |
409 | // Prepare UV coordinates for rendering tiles |
410 | VERTEX_BUFFER_DESC tileUVBufferDesc; |
411 | tileUVBufferDesc.numVerts = PARTICLES_PER_INSTANCE * 4; |
412 | tileUVBufferDesc.vertexSize = tileVertexDesc->getVertexStride(); |
413 | |
414 | tileUVs = VertexBuffer::create(tileUVBufferDesc); |
415 | |
416 | auto* const tileUVData = (Vector2*)tileUVs->lock(GBL_WRITE_ONLY_DISCARD); |
417 | const float tileUVScale = GpuParticleResources::TILE_SIZE / (float)GpuParticleResources::TEX_SIZE; |
418 | for (UINT32 i = 0; i < PARTICLES_PER_INSTANCE; i++) |
419 | { |
420 | tileUVData[i * 4 + 0] = Vector2(0.0f, 0.0f) * tileUVScale; |
421 | tileUVData[i * 4 + 1] = Vector2(1.0f, 0.0f) * tileUVScale; |
422 | tileUVData[i * 4 + 2] = Vector2(1.0f, 1.0f) * tileUVScale; |
423 | tileUVData[i * 4 + 3] = Vector2(0.0f, 1.0f) * tileUVScale; |
424 | } |
425 | |
426 | tileUVs->unlock(); |
427 | |
428 | // Prepare UV coordinates for rendering particles |
429 | VERTEX_BUFFER_DESC particleUVBufferDesc; |
430 | particleUVBufferDesc.numVerts = PARTICLES_PER_INSTANCE * 4; |
431 | particleUVBufferDesc.vertexSize = tileVertexDesc->getVertexStride(); |
432 | |
433 | particleUVs = VertexBuffer::create(particleUVBufferDesc); |
434 | |
435 | auto* const particleUVData = (Vector2*)particleUVs->lock(GBL_WRITE_ONLY_DISCARD); |
436 | const float particleUVScale = 1.0f / (float)GpuParticleResources::TEX_SIZE; |
437 | for (UINT32 i = 0; i < PARTICLES_PER_INSTANCE; i++) |
438 | { |
439 | particleUVData[i * 4 + 0] = Vector2(0.0f, 0.0f) * particleUVScale; |
440 | particleUVData[i * 4 + 1] = Vector2(1.0f, 0.0f) * particleUVScale; |
441 | particleUVData[i * 4 + 2] = Vector2(1.0f, 1.0f) * particleUVScale; |
442 | particleUVData[i * 4 + 3] = Vector2(0.0f, 1.0f) * particleUVScale; |
443 | } |
444 | |
445 | particleUVs->unlock(); |
446 | |
447 | // Prepare indices for rendering tiles & particles |
448 | INDEX_BUFFER_DESC spriteIndexBufferDesc; |
449 | spriteIndexBufferDesc.indexType = IT_16BIT; |
450 | spriteIndexBufferDesc.numIndices = PARTICLES_PER_INSTANCE * 6; |
451 | |
452 | spriteIndices = IndexBuffer::create(spriteIndexBufferDesc); |
453 | |
454 | auto* const indices = (UINT16*)spriteIndices->lock(GBL_WRITE_ONLY_DISCARD); |
455 | |
456 | const Conventions& rapiConventions = gCaps().conventions; |
457 | for (UINT32 i = 0; i < PARTICLES_PER_INSTANCE; i++) |
458 | { |
459 | // If UV is flipped, then our tile will be upside down so we need to change index order so it doesn't |
460 | // get culled. |
461 | if (rapiConventions.uvYAxis == Conventions::Axis::Up) |
462 | { |
463 | indices[i * 6 + 0] = i * 4 + 2; indices[i * 6 + 1] = i * 4 + 1; indices[i * 6 + 2] = i * 4 + 0; |
464 | indices[i * 6 + 3] = i * 4 + 3; indices[i * 6 + 4] = i * 4 + 2; indices[i * 6 + 5] = i * 4 + 0; |
465 | } |
466 | else |
467 | { |
468 | indices[i * 6 + 0] = i * 4 + 0; indices[i * 6 + 1] = i * 4 + 1; indices[i * 6 + 2] = i * 4 + 2; |
469 | indices[i * 6 + 3] = i * 4 + 0; indices[i * 6 + 4] = i * 4 + 2; indices[i * 6 + 5] = i * 4 + 3; |
470 | } |
471 | } |
472 | |
473 | spriteIndices->unlock(); |
474 | |
475 | // Prepare a scratch buffer we'll use to clear tiles |
476 | GPU_BUFFER_DESC tileScratchBufferDesc; |
477 | tileScratchBufferDesc.type = GBT_STANDARD; |
478 | tileScratchBufferDesc.format = BF_32X2F; |
479 | tileScratchBufferDesc.elementCount = NUM_SCRATCH_TILES; |
480 | tileScratchBufferDesc.usage = GBU_DYNAMIC; |
481 | |
482 | tileScratch = GpuBuffer::create(tileScratchBufferDesc); |
483 | |
484 | // Prepare a scratch buffer we'll use to inject new particles |
485 | VERTEX_BUFFER_DESC injectScratchBufferDesc; |
486 | injectScratchBufferDesc.numVerts = NUM_SCRATCH_PARTICLES; |
487 | injectScratchBufferDesc.vertexSize = injectVertexDesc->getVertexStride(0); |
488 | injectScratchBufferDesc.usage = GBU_DYNAMIC; |
489 | |
490 | injectScratch = VertexBuffer::create(injectScratchBufferDesc); |
491 | } |
492 | |
493 | GpuParticleSystem::GpuParticleSystem(ParticleSystem* parent) |
494 | :mParent(parent) |
495 | { |
496 | GpuParticleSimulation::instance().addSystem(this); |
497 | } |
498 | |
499 | GpuParticleSystem::~GpuParticleSystem() |
500 | { |
501 | GpuParticleSimulation::instance().removeSystem(this); |
502 | } |
503 | |
504 | bool GpuParticleSystem::allocateTiles(GpuParticleResources& resources, Vector<GpuParticle>& newParticles, |
505 | Vector<UINT32>& newTiles) |
506 | { |
507 | GpuParticleTile cachedTile = mLastAllocatedTile == (UINT32)-1 ? GpuParticleTile() : mTiles[mLastAllocatedTile]; |
508 | Vector2 tileUV = GpuParticleResources::getTileCoords(cachedTile.id); |
509 | |
510 | bool newTilesAdded = false; |
511 | for (UINT32 i = 0; i < (UINT32)newParticles.size(); i++) |
512 | { |
513 | UINT32 tileIdx; |
514 | |
515 | // Use the last allocated tile if there's room |
516 | if (cachedTile.numFreeParticles > 0) |
517 | tileIdx = mLastAllocatedTile; |
518 | else |
519 | { |
520 | // Otherwise try to find an inactive tile |
521 | if (mNumActiveTiles < (UINT32)mTiles.size()) |
522 | { |
523 | tileIdx = mActiveTiles.find(false); |
524 | mActiveTiles[tileIdx] = true; |
525 | } |
526 | // And finally just allocate a new tile if no room elsewhere |
527 | else |
528 | { |
529 | const UINT32 tileId = resources.allocTile(); |
530 | if (tileId == (UINT32)-1) |
531 | return newTilesAdded; // Out of space in the texture |
532 | |
533 | GpuParticleTile newTile; |
534 | newTile.id = tileId; |
535 | newTile.lifetime = 0.0f; |
536 | |
537 | tileIdx = (UINT32)mTiles.size(); |
538 | newTiles.push_back(newTile.id); |
539 | mTiles.push_back(newTile); |
540 | mActiveTiles.add(true); |
541 | |
542 | newTilesAdded = true; |
543 | } |
544 | |
545 | mLastAllocatedTile = tileIdx; |
546 | tileUV = GpuParticleResources::getTileCoords(mTiles[tileIdx].id); |
547 | mTiles[tileIdx].numFreeParticles = GpuParticleResources::PARTICLES_PER_TILE; |
548 | |
549 | cachedTile = mTiles[tileIdx]; |
550 | mNumActiveTiles++; |
551 | } |
552 | |
553 | GpuParticleTile& tile = mTiles[tileIdx]; |
554 | GpuParticle& particle = newParticles[i]; |
555 | |
556 | const UINT32 tileParticleIdx = GpuParticleResources::PARTICLES_PER_TILE - tile.numFreeParticles; |
557 | particle.dataUV = tileUV + GpuParticleResources::getParticleCoords(tileParticleIdx); |
558 | |
559 | tile.numFreeParticles--; |
560 | tile.lifetime = std::max(tile.lifetime, mTime + particle.lifetime); |
561 | |
562 | cachedTile.numFreeParticles--; |
563 | } |
564 | |
565 | return newTilesAdded; |
566 | } |
567 | |
568 | void GpuParticleSystem::detectInactiveTiles() |
569 | { |
570 | mNumActiveTiles = 0; |
571 | for (UINT32 i = 0; i < (UINT32)mTiles.size(); i++) |
572 | { |
573 | if (mTiles[i].lifetime >= mTime) |
574 | { |
575 | mNumActiveTiles++; |
576 | continue; |
577 | } |
578 | |
579 | mActiveTiles[i] = false; |
580 | |
581 | if (mLastAllocatedTile == i) |
582 | mLastAllocatedTile = (UINT32)-1; |
583 | } |
584 | } |
585 | |
586 | bool GpuParticleSystem::freeInactiveTiles(GpuParticleResources& resources) |
587 | { |
588 | const UINT32 numFreeTiles = (UINT32)mTiles.size() - mNumActiveTiles; |
589 | for(UINT32 i = 0; i < numFreeTiles; i++) |
590 | { |
591 | const UINT32 freeIdx = mActiveTiles.find(false); |
592 | assert(freeIdx != (UINT32)-1); |
593 | |
594 | const UINT32 lastIdx = (UINT32)mTiles.size() - 1; |
595 | |
596 | if (freeIdx != lastIdx) |
597 | { |
598 | std::swap(mTiles[freeIdx], mTiles[lastIdx]); |
599 | std::swap(mActiveTiles[freeIdx], mActiveTiles[lastIdx]); |
600 | } |
601 | |
602 | resources.freeTile(mTiles[lastIdx].id); |
603 | |
604 | mTiles.erase(mTiles.end() - 1); |
605 | mActiveTiles.remove(lastIdx); |
606 | } |
607 | |
608 | // Tile order changed so this might no longer be valid |
609 | if (numFreeTiles > 0) |
610 | mLastAllocatedTile = (UINT32)-1; |
611 | |
612 | return numFreeTiles > 0; |
613 | } |
614 | |
615 | void GpuParticleSystem::updateGpuBuffers() |
616 | { |
617 | const auto numTiles = (UINT32)mTiles.size(); |
618 | const UINT32 numTilesToAllocates = Math::divideAndRoundUp(numTiles, TILES_PER_INSTANCE) * TILES_PER_INSTANCE; |
619 | |
620 | // Tile offsets buffer |
621 | if(numTiles > 0) |
622 | { |
623 | GPU_BUFFER_DESC tilesBufferDesc; |
624 | tilesBufferDesc.type = GBT_STANDARD; |
625 | tilesBufferDesc.format = BF_32X2F; |
626 | tilesBufferDesc.elementCount = numTilesToAllocates; |
627 | tilesBufferDesc.usage = GBU_DYNAMIC; |
628 | |
629 | mTileUVs = GpuBuffer::create(tilesBufferDesc); |
630 | |
631 | auto* tileUVs = (Vector2*)mTileUVs->lock(GBL_WRITE_ONLY_NO_OVERWRITE); |
632 | for (UINT32 i = 0; i < numTiles; i++) |
633 | tileUVs[i] = GpuParticleResources::getTileCoords(mTiles[i].id); |
634 | |
635 | for (UINT32 i = numTiles; i < numTilesToAllocates; i++) |
636 | tileUVs[i] = Vector2(2.0f, 2.0f); // Out of range |
637 | |
638 | mTileUVs->unlock(); |
639 | } |
640 | |
641 | // Particle data offsets |
642 | const UINT32 numParticles = numTiles * GpuParticleResources::PARTICLES_PER_TILE; |
643 | |
644 | if(numParticles > 0) |
645 | { |
646 | GPU_BUFFER_DESC particleUVDesc; |
647 | particleUVDesc.type = GBT_STANDARD; |
648 | particleUVDesc.format = BF_16X2U; |
649 | particleUVDesc.elementCount = numParticles; |
650 | particleUVDesc.usage = GBU_DYNAMIC; |
651 | |
652 | mParticleIndices = GpuBuffer::create(particleUVDesc); |
653 | auto* particleIndices = (UINT32*)mParticleIndices->lock(GBL_WRITE_ONLY_NO_OVERWRITE); |
654 | |
655 | UINT32 idx = 0; |
656 | for (UINT32 i = 0; i < numTiles; i++) |
657 | { |
658 | const Vector2I tileOffset = GpuParticleResources::getTileOffset(mTiles[i].id); |
659 | for (UINT32 y = 0; y < GpuParticleResources::TILE_SIZE; y++) |
660 | { |
661 | for (UINT32 x = 0; x < GpuParticleResources::TILE_SIZE; x++) |
662 | { |
663 | const Vector2I offset = tileOffset + Vector2I(x, y); |
664 | particleIndices[idx++] = (offset.x & 0xFFFF) | (offset.y << 16); |
665 | } |
666 | } |
667 | } |
668 | |
669 | mParticleIndices->unlock(); |
670 | } |
671 | } |
672 | |
673 | void GpuParticleSystem::advanceTime(float dt) |
674 | { |
675 | const ParticleSystemSettings& settings = mParent->getSettings(); |
676 | |
677 | float timeStep; |
678 | mTime = bs::ParticleSystem::_advanceTime(mTime, dt, settings.duration, settings.isLooping, timeStep); |
679 | } |
680 | |
681 | AABox GpuParticleSystem::getBounds() const |
682 | { |
683 | const ParticleSystemSettings& settings = mParent->getSettings(); |
684 | |
685 | if(settings.useAutomaticBounds) |
686 | return AABox::INF_BOX; |
687 | |
688 | return settings.customBounds; |
689 | } |
690 | |
691 | struct GpuParticleSimulation::Pimpl |
692 | { |
693 | GpuParticleResources resources; |
694 | GpuParticleHelperBuffers helperBuffers; |
695 | SPtr<GpuParamBlockBuffer> vectorFieldParams; |
696 | SPtr<GpuParamBlockBuffer> depthCollisionParams; |
697 | SPtr<GpuParamBlockBuffer> simulationParams; |
698 | UnorderedSet<GpuParticleSystem*> systems; |
699 | }; |
700 | |
701 | GpuParticleSimulation::GpuParticleSimulation() |
702 | :m(bs_new<Pimpl>()) |
703 | { |
704 | m->vectorFieldParams = gVectorFieldParamsDef.createBuffer(); |
705 | m->depthCollisionParams = gGpuParticleDepthCollisionParamsDef.createBuffer(); |
706 | m->simulationParams = gGpuParticleSimulateParamsDef.createBuffer(); |
707 | } |
708 | |
709 | GpuParticleSimulation::~GpuParticleSimulation() |
710 | { |
711 | bs_delete(m); |
712 | } |
713 | |
714 | void GpuParticleSimulation::addSystem(GpuParticleSystem* system) |
715 | { |
716 | m->systems.insert(system); |
717 | } |
718 | |
719 | void GpuParticleSimulation::removeSystem(GpuParticleSystem* system) |
720 | { |
721 | m->systems.erase(system); |
722 | } |
723 | |
724 | void GpuParticleSimulation::simulate(const SceneInfo& sceneInfo, const ParticlePerFrameData* simData, |
725 | const SPtr<GpuParamBlockBuffer>& viewParams, const GBufferTextures& gbuffer, float dt) |
726 | { |
727 | m->resources.swap(); |
728 | m->resources.getCurveTexture().applyChanges(); |
729 | |
730 | Vector<UINT32> newTiles; |
731 | Vector<GpuParticle> allNewParticles; |
732 | for (auto& entry : m->systems) |
733 | { |
734 | entry->detectInactiveTiles(); |
735 | |
736 | bool tilesDirty = false; |
737 | const auto iterFind = simData->gpuData.find(entry->getParent()->getId()); |
738 | if(iterFind != simData->gpuData.end()) |
739 | { |
740 | Vector<GpuParticle>& newParticles = iterFind->second->particles; |
741 | tilesDirty = entry->allocateTiles(m->resources, newParticles, newTiles); |
742 | |
743 | allNewParticles.insert(allNewParticles.end(), newParticles.begin(), newParticles.end()); |
744 | } |
745 | |
746 | entry->advanceTime(dt); |
747 | tilesDirty |= entry->freeInactiveTiles(m->resources); |
748 | |
749 | if (tilesDirty) |
750 | entry->updateGpuBuffers(); |
751 | } |
752 | |
753 | RenderAPI& rapi = RenderAPI::instance(); |
754 | rapi.setRenderTarget(m->resources.getInjectTarget()); |
755 | |
756 | clearTiles(newTiles); |
757 | injectParticles(allNewParticles); |
758 | |
759 | // Simulate |
760 | // TODO - Run multiple iterations for more stable simulation at lower/erratic framerates |
761 | gGpuParticleSimulateParamsDef.gDT.set(m->simulationParams, dt); |
762 | gGpuParticleSimulateParamsDef.gNumIterations.set(m->simulationParams, 1); |
763 | |
764 | rapi.setRenderTarget(m->resources.getSimulationTarget()); |
765 | rapi.setVertexDeclaration(m->helperBuffers.tileVertexDecl); |
766 | |
767 | SPtr<VertexBuffer> buffers[] = { m->helperBuffers.tileUVs }; |
768 | rapi.setVertexBuffers(0, buffers, (UINT32)bs_size(buffers)); |
769 | rapi.setIndexBuffer(m->helperBuffers.spriteIndices); |
770 | rapi.setDrawOperation(DOT_TRIANGLE_LIST); |
771 | |
772 | enum class SimType { Normal, DepthCollisionsWorld, DepthCollisionsLocal, Count }; |
773 | |
774 | for(UINT32 i = 0; i < (UINT32)SimType::Count; i++) |
775 | { |
776 | const SimType type = (SimType)i; |
777 | const bool simulateDepthCollisions = type == SimType::DepthCollisionsWorld || |
778 | type == SimType::DepthCollisionsLocal; |
779 | const bool localSpace = type == SimType::DepthCollisionsLocal; |
780 | |
781 | GpuParticleSimulateMat* simulateMat = GpuParticleSimulateMat::getVariation(simulateDepthCollisions, localSpace); |
782 | simulateMat->bindGlobal(m->resources, viewParams, gbuffer.depth, gbuffer.normals, m->simulationParams); |
783 | |
784 | for (auto& entry : m->systems) |
785 | { |
786 | if (entry->getNumTiles() == 0) |
787 | continue; |
788 | |
789 | ParticleSystem* parentSystem = entry->getParent(); |
790 | |
791 | const ParticleGpuSimulationSettings& simSettings = parentSystem->getGpuSimulationSettings(); |
792 | if(simSettings.depthCollision.enabled != simulateDepthCollisions) |
793 | continue; |
794 | |
795 | if(simulateDepthCollisions) |
796 | { |
797 | const ParticleSystemSettings& settings = parentSystem->getSettings(); |
798 | bool isLocal = settings.simulationSpace == ParticleSimulationSpace::Local; |
799 | if(isLocal != localSpace) |
800 | continue; |
801 | } |
802 | |
803 | const RendererParticles& rendererParticles = sceneInfo.particleSystems[parentSystem->getRendererId()]; |
804 | |
805 | prepareBuffers(entry, rendererParticles); |
806 | |
807 | SPtr<Texture> vfTexture; |
808 | if (simSettings.vectorField.vectorField) |
809 | vfTexture = simSettings.vectorField.vectorField->getTexture(); |
810 | |
811 | simulateMat->bindPerCallParams(entry->getTileUVs(), rendererParticles.perObjectParamBuffer, |
812 | m->vectorFieldParams, vfTexture, m->depthCollisionParams); |
813 | |
814 | const UINT32 tileCount = entry->getNumTiles(); |
815 | const UINT32 numInstances = Math::divideAndRoundUp(tileCount, TILES_PER_INSTANCE); |
816 | rapi.drawIndexed(0, TILES_PER_INSTANCE * 6, 0, TILES_PER_INSTANCE * 4, numInstances); |
817 | } |
818 | } |
819 | } |
820 | |
821 | void GpuParticleSimulation::sort(const RendererView& view) |
822 | { |
823 | const bool supportsCompute = gRenderBeast()->getFeatureSet() == RenderBeastFeatureSet::Desktop; |
824 | if(!supportsCompute) |
825 | return; |
826 | |
827 | // Make sure that the position texture isn't bound for rendering |
828 | RenderAPI& rapi = RenderAPI::instance(); |
829 | rapi.setRenderTarget(nullptr); |
830 | |
831 | const Vector3& viewOrigin = view.getProperties().viewOrigin; |
832 | |
833 | GpuParticleSortPrepareMat* prepareMat = GpuParticleSortPrepareMat::get(); |
834 | prepareMat->bind(m->resources.getCurrentState().positionAndTimeTex); |
835 | |
836 | UINT32 systemIdx = 0; |
837 | UINT32 offset = 0; |
838 | for (auto& entry : m->systems) |
839 | { |
840 | if (entry->getNumTiles() == 0) |
841 | { |
842 | entry->setSortInfo(false, 0); |
843 | continue; |
844 | } |
845 | |
846 | ParticleSystem* parentSystem = entry->getParent(); |
847 | |
848 | const ParticleSystemSettings& settings = parentSystem->getSettings(); |
849 | if (settings.sortMode != ParticleSortMode::Distance) |
850 | { |
851 | entry->setSortInfo(false, 0); |
852 | continue; |
853 | } |
854 | |
855 | entry->setSortInfo(true, offset); |
856 | |
857 | offset += prepareMat->execute(*entry, systemIdx, viewOrigin, offset, |
858 | m->resources.mSortBuffers.keys[0], |
859 | m->resources.mSortedIndices[0]); |
860 | |
861 | systemIdx++; |
862 | } |
863 | |
864 | const UINT32 numSystemsToSort = systemIdx; |
865 | if(numSystemsToSort == 0) |
866 | return; |
867 | |
868 | const UINT32 totalNumKeys = offset; |
869 | const UINT32 keyMask = 0xFFFF | (Math::ceilToInt(Math::log2((float)(numSystemsToSort + 1))) << 16); |
870 | const UINT32 outputBufferIdx = GpuSort::instance().sort(m->resources.mSortBuffers, totalNumKeys, keyMask); |
871 | |
872 | m->resources.mSortedIndicesBufferIdx = outputBufferIdx; |
873 | } |
874 | |
875 | void GpuParticleSimulation::prepareBuffers(const GpuParticleSystem* system, const RendererParticles& rendererInfo) |
876 | { |
877 | ParticleSystem* parentSystem = system->getParent(); |
878 | |
879 | const ParticleSystemSettings& settings = parentSystem->getSettings(); |
880 | const ParticleGpuSimulationSettings& simSettings = parentSystem->getGpuSimulationSettings(); |
881 | |
882 | const Random& random = system->getRandom(); |
883 | const float time = system->getTime(); |
884 | const float nrmTime = time / settings.duration; |
885 | |
886 | gGpuParticleSimulateParamsDef.gDrag.set(m->simulationParams, simSettings.drag); |
887 | gGpuParticleSimulateParamsDef.gAcceleration.set(m->simulationParams, simSettings.acceleration); |
888 | |
889 | SPtr<Texture> vfTexture; |
890 | if(simSettings.vectorField.vectorField) |
891 | vfTexture = simSettings.vectorField.vectorField->getTexture(); |
892 | |
893 | if(vfTexture) |
894 | { |
895 | gGpuParticleSimulateParamsDef.gNumVectorFields.set(m->simulationParams, 1); |
896 | |
897 | const SPtr<VectorField>& vectorField = simSettings.vectorField.vectorField; |
898 | const VECTOR_FIELD_DESC& vfDesc = vectorField->getDesc(); |
899 | |
900 | const Vector3 tiling( |
901 | simSettings.vectorField.tilingX ? 0.0f : 1.0f, |
902 | simSettings.vectorField.tilingY ? 0.0f : 1.0f, |
903 | simSettings.vectorField.tilingZ ? 0.0f : 1.0f |
904 | ); |
905 | |
906 | gVectorFieldParamsDef.gFieldBounds.set(m->vectorFieldParams, vfDesc.bounds.getSize()); |
907 | gVectorFieldParamsDef.gFieldTightness.set(m->vectorFieldParams, simSettings.vectorField.tightness); |
908 | gVectorFieldParamsDef.gFieldTiling.set(m->vectorFieldParams, tiling); |
909 | gVectorFieldParamsDef.gFieldIntensity.set(m->vectorFieldParams, simSettings.vectorField.intensity); |
910 | |
911 | const Vector3 rotationRate = simSettings.vectorField.rotationRate.evaluate(nrmTime, random) * time; |
912 | const Quaternion addedRotation(Degree(rotationRate.x), Degree(rotationRate.y), Degree(rotationRate.z)); |
913 | |
914 | const Vector3 offset = vfDesc.bounds.getMin() + simSettings.vectorField.offset; |
915 | const Quaternion rotation = simSettings.vectorField.rotation * addedRotation; |
916 | const Vector3 scale = vfDesc.bounds.getSize() * simSettings.vectorField.scale; |
917 | |
918 | Matrix4 fieldToWorld = Matrix4::TRS(offset, rotation, scale); |
919 | fieldToWorld = rendererInfo.localToWorld * fieldToWorld; |
920 | |
921 | const Matrix3 fieldToWorld3x3 = fieldToWorld.get3x3(); |
922 | |
923 | gVectorFieldParamsDef.gFieldToWorld.set(m->vectorFieldParams, fieldToWorld3x3); |
924 | gVectorFieldParamsDef.gWorldToField.set(m->vectorFieldParams, fieldToWorld.inverseAffine()); |
925 | } |
926 | else |
927 | gGpuParticleSimulateParamsDef.gNumVectorFields.set(m->simulationParams, 0); |
928 | |
929 | const ParticleDepthCollisionSettings& depthCollisionSettings = simSettings.depthCollision; |
930 | if(depthCollisionSettings.enabled) |
931 | { |
932 | Vector3 scale3D = rendererInfo.particleSystem->getTransform().getScale(); |
933 | float uniformScale = std::max(std::max(scale3D.x, scale3D.y), scale3D.z); |
934 | |
935 | gGpuParticleDepthCollisionParamsDef.gCollisionRange.set(m->depthCollisionParams, 2.0f); |
936 | gGpuParticleDepthCollisionParamsDef.gCollisionRadiusScale.set(m->depthCollisionParams, |
937 | depthCollisionSettings.radiusScale * uniformScale); |
938 | gGpuParticleDepthCollisionParamsDef.gDampening.set(m->depthCollisionParams, |
939 | depthCollisionSettings.dampening); |
940 | gGpuParticleDepthCollisionParamsDef.gRestitution.set(m->depthCollisionParams, |
941 | depthCollisionSettings.restitution); |
942 | |
943 | const Vector2 sizeScaleUVOffset = |
944 | GpuParticleCurves::getUVOffset(rendererInfo.sizeScaleFrameIdxCurveAlloc); |
945 | const float sizeScaleUVScale = |
946 | GpuParticleCurves::getUVScale(rendererInfo.sizeScaleFrameIdxCurveAlloc); |
947 | |
948 | gGpuParticleDepthCollisionParamsDef.gSizeScaleCurveOffset.set(m->depthCollisionParams, sizeScaleUVOffset); |
949 | gGpuParticleDepthCollisionParamsDef.gSizeScaleCurveScale.set(m->depthCollisionParams, Vector2(sizeScaleUVScale, 0.0f)); |
950 | } |
951 | } |
952 | |
953 | void GpuParticleSimulation::clearTiles(const Vector<UINT32>& tiles) |
954 | { |
955 | const auto numTiles = (UINT32)tiles.size(); |
956 | if(numTiles == 0) |
957 | return; |
958 | |
959 | const UINT32 numIterations = Math::divideAndRoundUp(numTiles, GpuParticleHelperBuffers::NUM_SCRATCH_TILES); |
960 | |
961 | GpuParticleClearMat* clearMat = GpuParticleClearMat::get(); |
962 | clearMat->bind(m->helperBuffers.tileScratch); |
963 | |
964 | RenderAPI& rapi = RenderAPI::instance(); |
965 | rapi.setVertexDeclaration(m->helperBuffers.tileVertexDecl); |
966 | |
967 | SPtr<VertexBuffer> buffers[] = { m->helperBuffers.tileUVs }; |
968 | rapi.setVertexBuffers(0, buffers, (UINT32)bs_size(buffers)); |
969 | rapi.setIndexBuffer(m->helperBuffers.spriteIndices); |
970 | rapi.setDrawOperation(DOT_TRIANGLE_LIST); |
971 | |
972 | UINT32 tileStart = 0; |
973 | for (UINT32 i = 0; i < numIterations; i++) |
974 | { |
975 | static_assert(GpuParticleHelperBuffers::NUM_SCRATCH_TILES % TILES_PER_INSTANCE == 0, |
976 | "Tile scratch buffer size must be divisble with number of tiles per instance." ); |
977 | |
978 | const UINT32 tileEnd = std::min(numTiles, tileStart + GpuParticleHelperBuffers::NUM_SCRATCH_TILES); |
979 | |
980 | auto* tileUVs = (Vector2*)m->helperBuffers.tileScratch->lock(GBL_WRITE_ONLY_DISCARD); |
981 | for (UINT32 j = tileStart; j < tileEnd; j++) |
982 | tileUVs[j - tileStart] = GpuParticleResources::getTileCoords(tiles[j]); |
983 | |
984 | const UINT32 alignedTileEnd = Math::divideAndRoundUp(tileEnd, TILES_PER_INSTANCE) * TILES_PER_INSTANCE; |
985 | for (UINT32 j = tileEnd; j < alignedTileEnd; j++) |
986 | tileUVs[j - tileEnd] = Vector2(2.0f, 2.0f); // Out of bounds (we don't want to accidentaly clear used tiles) |
987 | |
988 | m->helperBuffers.tileScratch->unlock(); |
989 | |
990 | const UINT32 numInstances = (alignedTileEnd - tileStart) / TILES_PER_INSTANCE; |
991 | rapi.drawIndexed(0, TILES_PER_INSTANCE * 6, 0, TILES_PER_INSTANCE * 4, numInstances); |
992 | |
993 | tileStart = alignedTileEnd; |
994 | } |
995 | } |
996 | |
997 | void GpuParticleSimulation::injectParticles(const Vector<GpuParticle>& particles) |
998 | { |
999 | const auto numParticles = (UINT32)particles.size(); |
1000 | const UINT32 numIterations = Math::divideAndRoundUp(numParticles, GpuParticleHelperBuffers::NUM_SCRATCH_PARTICLES); |
1001 | |
1002 | GpuParticleInjectMat* injectMat = GpuParticleInjectMat::get(); |
1003 | injectMat->bind(); |
1004 | |
1005 | RenderAPI& rapi = RenderAPI::instance(); |
1006 | rapi.setVertexDeclaration(m->helperBuffers.injectVertexDecl); |
1007 | |
1008 | SPtr<VertexBuffer> buffers[] = { m->helperBuffers.injectScratch, m->helperBuffers.particleUVs }; |
1009 | rapi.setVertexBuffers(0, buffers, (UINT32)bs_size(buffers)); |
1010 | rapi.setIndexBuffer(m->helperBuffers.spriteIndices); |
1011 | rapi.setDrawOperation(DOT_TRIANGLE_LIST); |
1012 | |
1013 | UINT32 particleStart = 0; |
1014 | for (UINT32 i = 0; i < numIterations; i++) |
1015 | { |
1016 | const UINT32 particleEnd = std::min(numParticles, particleStart + GpuParticleHelperBuffers::NUM_SCRATCH_PARTICLES); |
1017 | |
1018 | auto* particleData = (GpuParticleVertex*)m->helperBuffers.injectScratch->lock(GBL_WRITE_ONLY_DISCARD); |
1019 | for (UINT32 j = particleStart; j < particleEnd; j++) |
1020 | particleData[j - particleStart] = particles[j].getVertex(); |
1021 | |
1022 | m->helperBuffers.injectScratch->unlock(); |
1023 | |
1024 | rapi.drawIndexed(0, 6, 0, 4, particleEnd - particleStart); |
1025 | particleStart = particleEnd; |
1026 | } |
1027 | } |
1028 | |
1029 | GpuParticleResources& GpuParticleSimulation::getResources() const |
1030 | { |
1031 | return m->resources; |
1032 | } |
1033 | |
1034 | SPtr<GpuParamBlockBuffer> createGpuParticleVertexInputBuffer() |
1035 | { |
1036 | SPtr<GpuParamBlockBuffer> inputBuffer = gGpuParticleTileVertexParamsDef.createBuffer(); |
1037 | |
1038 | // [0, 1] -> [-1, 1] and flip Y |
1039 | Vector4 uvToNdc(2.0f, -2.0f, -1.0f, 1.0f); |
1040 | |
1041 | const Conventions& rapiConventions = gCaps().conventions; |
1042 | |
1043 | // Either of these flips the Y axis, but if they're both true they cancel out |
1044 | if ((rapiConventions.uvYAxis == Conventions::Axis::Up) ^ (rapiConventions.ndcYAxis == Conventions::Axis::Down)) |
1045 | { |
1046 | uvToNdc.y = -uvToNdc.y; |
1047 | uvToNdc.w = -uvToNdc.w; |
1048 | } |
1049 | |
1050 | gGpuParticleTileVertexParamsDef.gUVToNDC.set(inputBuffer, uvToNdc); |
1051 | |
1052 | return inputBuffer; |
1053 | } |
1054 | |
1055 | GpuParticleClearMat::GpuParticleClearMat() |
1056 | { |
1057 | const SPtr<GpuParamBlockBuffer> inputBuffer = createGpuParticleVertexInputBuffer(); |
1058 | |
1059 | mParams->setParamBlockBuffer(GPT_VERTEX_PROGRAM, "Input" , inputBuffer); |
1060 | mParams->getBufferParam(GPT_VERTEX_PROGRAM, "gTileUVs" , mTileUVParam); |
1061 | } |
1062 | |
1063 | void GpuParticleClearMat::_initDefines(ShaderDefines& defines) |
1064 | { |
1065 | defines.set("TILES_PER_INSTANCE" , TILES_PER_INSTANCE); |
1066 | } |
1067 | |
1068 | void GpuParticleClearMat::bind(const SPtr<GpuBuffer>& tileUVs) |
1069 | { |
1070 | mTileUVParam.set(tileUVs); |
1071 | |
1072 | RendererMaterial::bind(); |
1073 | } |
1074 | |
1075 | GpuParticleInjectMat::GpuParticleInjectMat() |
1076 | { |
1077 | const SPtr<GpuParamBlockBuffer> inputBuffer = createGpuParticleVertexInputBuffer(); |
1078 | mParams->setParamBlockBuffer(GPT_VERTEX_PROGRAM, "Input" , inputBuffer); |
1079 | } |
1080 | |
1081 | GpuParticleCurveInjectMat::GpuParticleCurveInjectMat() |
1082 | { |
1083 | const SPtr<GpuParamBlockBuffer> inputBuffer = createGpuParticleVertexInputBuffer(); |
1084 | mParams->setParamBlockBuffer(GPT_VERTEX_PROGRAM, "Input" , inputBuffer); |
1085 | } |
1086 | |
1087 | GpuParticleSimulateMat::GpuParticleSimulateMat() |
1088 | { |
1089 | const SPtr<GpuParamBlockBuffer> inputBuffer = createGpuParticleVertexInputBuffer(); |
1090 | mParams->setParamBlockBuffer(GPT_VERTEX_PROGRAM, "Input" , inputBuffer); |
1091 | |
1092 | mParams->getParamInfo()->getBinding( |
1093 | GPT_FRAGMENT_PROGRAM, |
1094 | GpuPipelineParamInfoBase::ParamType::ParamBlock, |
1095 | "Params" , |
1096 | mParamsBinding |
1097 | ); |
1098 | |
1099 | mParams->getParamInfo()->getBinding( |
1100 | GPT_FRAGMENT_PROGRAM, |
1101 | GpuPipelineParamInfoBase::ParamType::ParamBlock, |
1102 | "VectorFieldParams" , |
1103 | mVectorFieldBinding |
1104 | ); |
1105 | |
1106 | mParams->getBufferParam(GPT_VERTEX_PROGRAM, "gTileUVs" , mTileUVParam); |
1107 | mParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gPosAndTimeTex" , mPosAndTimeTexParam); |
1108 | mParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gVelocityTex" , mVelocityTexParam); |
1109 | mParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gVectorFieldTex" , mVectorFieldTexParam); |
1110 | |
1111 | mSupportsDepthCollisions = mVariation.getUInt("DEPTH_COLLISIONS" ) > 0; |
1112 | if(mSupportsDepthCollisions) |
1113 | { |
1114 | mParams->getParamInfo()->getBinding( |
1115 | GPT_FRAGMENT_PROGRAM, |
1116 | GpuPipelineParamInfoBase::ParamType::ParamBlock, |
1117 | "PerCamera" , |
1118 | mPerCameraBinding |
1119 | ); |
1120 | |
1121 | mParams->getParamInfo()->getBinding( |
1122 | GPT_FRAGMENT_PROGRAM, |
1123 | GpuPipelineParamInfoBase::ParamType::ParamBlock, |
1124 | "PerObject" , |
1125 | mPerObjectBinding |
1126 | ); |
1127 | |
1128 | mParams->getParamInfo()->getBinding( |
1129 | GPT_FRAGMENT_PROGRAM, |
1130 | GpuPipelineParamInfoBase::ParamType::ParamBlock, |
1131 | "DepthCollisionParams" , |
1132 | mDepthCollisionBinding |
1133 | ); |
1134 | |
1135 | mParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gSizeRotationTex" , mSizeRotationTexParam); |
1136 | mParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gCurvesTex" , mCurvesTexParam); |
1137 | mParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gDepthTex" , mDepthTexParam); |
1138 | mParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gNormalsTex" , mNormalsTexParam); |
1139 | } |
1140 | } |
1141 | |
1142 | void GpuParticleSimulateMat::_initDefines(ShaderDefines& defines) |
1143 | { |
1144 | defines.set("TILES_PER_INSTANCE" , TILES_PER_INSTANCE); |
1145 | } |
1146 | |
1147 | void GpuParticleSimulateMat::bindGlobal(GpuParticleResources& resources, const SPtr<GpuParamBlockBuffer>& viewParams, |
1148 | const SPtr<Texture>& depth, const SPtr<Texture>& normals, const SPtr<GpuParamBlockBuffer>& simulationParams) |
1149 | { |
1150 | GpuParticleStateTextures& prevState = resources.getPreviousState(); |
1151 | const GpuParticleStaticTextures& staticTextures = resources.getStaticTextures(); |
1152 | GpuParticleCurves& curveTexture = resources.getCurveTexture(); |
1153 | |
1154 | mParams->setParamBlockBuffer(mParamsBinding.set, mParamsBinding.slot, simulationParams); |
1155 | |
1156 | mPosAndTimeTexParam.set(prevState.positionAndTimeTex); |
1157 | mVelocityTexParam.set(prevState.velocityTex); |
1158 | |
1159 | if(mSupportsDepthCollisions) |
1160 | { |
1161 | mParams->setParamBlockBuffer(mPerCameraBinding.set, mPerCameraBinding.slot, viewParams); |
1162 | |
1163 | mSizeRotationTexParam.set(staticTextures.sizeAndRotationTex); |
1164 | mCurvesTexParam.set(curveTexture.getTexture()); |
1165 | mDepthTexParam.set(depth); |
1166 | mNormalsTexParam.set(normals); |
1167 | } |
1168 | |
1169 | RendererMaterial::bind(false); |
1170 | } |
1171 | |
1172 | void GpuParticleSimulateMat::bindPerCallParams(const SPtr<GpuBuffer>& tileUVs, |
1173 | const SPtr<GpuParamBlockBuffer>& perObjectParams, const SPtr<GpuParamBlockBuffer>& vectorFieldParams, |
1174 | const SPtr<Texture>& vectorFieldTexture, const SPtr<GpuParamBlockBuffer>& depthCollisionParams) |
1175 | { |
1176 | mTileUVParam.set(tileUVs); |
1177 | mParams->setParamBlockBuffer(mVectorFieldBinding.set, mVectorFieldBinding.slot, vectorFieldParams); |
1178 | mVectorFieldTexParam.set(vectorFieldTexture); |
1179 | |
1180 | if(mSupportsDepthCollisions) |
1181 | { |
1182 | mParams->setParamBlockBuffer(mPerObjectBinding.set, mPerObjectBinding.slot, perObjectParams); |
1183 | mParams->setParamBlockBuffer(mDepthCollisionBinding.set, mDepthCollisionBinding.slot, depthCollisionParams); |
1184 | } |
1185 | |
1186 | bindParams(); |
1187 | } |
1188 | |
1189 | GpuParticleSimulateMat* GpuParticleSimulateMat::getVariation(bool depthCollisions, bool localSpace) |
1190 | { |
1191 | if(depthCollisions) |
1192 | { |
1193 | if(localSpace) |
1194 | return get(getVariation<2>()); |
1195 | |
1196 | return get(getVariation<1>()); |
1197 | } |
1198 | |
1199 | return get(getVariation<0>()); |
1200 | } |
1201 | |
1202 | GpuParticleBoundsMat::GpuParticleBoundsMat() |
1203 | { |
1204 | mInputBuffer = gGpuParticleBoundsParamsDef.createBuffer(); |
1205 | mParams->setParamBlockBuffer(GPT_COMPUTE_PROGRAM, "Input" , mInputBuffer); |
1206 | |
1207 | mParams->getBufferParam(GPT_COMPUTE_PROGRAM, "gParticleIndices" , mParticleIndicesParam); |
1208 | mParams->getBufferParam(GPT_COMPUTE_PROGRAM, "gOutput" , mOutputParam); |
1209 | mParams->getTextureParam(GPT_COMPUTE_PROGRAM, "gPosAndTimeTex" , mPosAndTimeTexParam); |
1210 | } |
1211 | |
1212 | void GpuParticleBoundsMat::_initDefines(ShaderDefines& defines) |
1213 | { |
1214 | defines.set("NUM_THREADS" , NUM_THREADS); |
1215 | } |
1216 | |
1217 | void GpuParticleBoundsMat::bind(const SPtr<Texture>& positionAndTime) |
1218 | { |
1219 | mPosAndTimeTexParam.set(positionAndTime); |
1220 | |
1221 | RendererMaterial::bind(); |
1222 | } |
1223 | |
1224 | AABox GpuParticleBoundsMat::execute(const SPtr<GpuBuffer>& indices, UINT32 numParticles) |
1225 | { |
1226 | static constexpr UINT32 MAX_NUM_GROUPS = 128; |
1227 | |
1228 | const UINT32 numIterations = Math::divideAndRoundUp(numParticles, NUM_THREADS); |
1229 | const UINT32 numGroups = std::min(numIterations, MAX_NUM_GROUPS); |
1230 | |
1231 | const UINT32 iterationsPerGroup = numIterations / numGroups; |
1232 | const UINT32 = numIterations % numGroups; |
1233 | |
1234 | gGpuParticleBoundsParamsDef.gIterationsPerGroup.set(mInputBuffer, iterationsPerGroup); |
1235 | gGpuParticleBoundsParamsDef.gNumExtraIterations.set(mInputBuffer, extraIterations); |
1236 | gGpuParticleBoundsParamsDef.gNumParticles.set(mInputBuffer, numParticles); |
1237 | |
1238 | GPU_BUFFER_DESC outputDesc; |
1239 | outputDesc.type = GBT_STANDARD; |
1240 | outputDesc.format = BF_32X2U; |
1241 | outputDesc.elementCount = numGroups * 2; |
1242 | outputDesc.usage = GBU_DYNAMIC; |
1243 | |
1244 | SPtr<GpuBuffer> output = GpuBuffer::create(outputDesc); |
1245 | |
1246 | mParticleIndicesParam.set(indices); |
1247 | mOutputParam.set(output); |
1248 | |
1249 | RenderAPI::instance().dispatchCompute(numGroups); |
1250 | |
1251 | Vector3 min = Vector3::INF; |
1252 | Vector3 max = -Vector3::INF; |
1253 | |
1254 | const Vector3* data = (Vector3*)output->lock(GBL_READ_ONLY); |
1255 | for(UINT32 i = 0; i < numGroups; i++) |
1256 | { |
1257 | min = Vector3::min(min, data[i * 2 + 0]); |
1258 | max = Vector3::min(max, data[i * 2 + 1]); |
1259 | } |
1260 | |
1261 | output->unlock(); |
1262 | |
1263 | return AABox(min, max); |
1264 | } |
1265 | |
1266 | GpuParticleSortPrepareMat::GpuParticleSortPrepareMat() |
1267 | { |
1268 | mInputBuffer = gGpuParticleSortPrepareParamDef.createBuffer(); |
1269 | mParams->setParamBlockBuffer(GPT_COMPUTE_PROGRAM, "Input" , mInputBuffer); |
1270 | |
1271 | mParams->getBufferParam(GPT_COMPUTE_PROGRAM, "gInputIndices" , mInputIndicesParam); |
1272 | mParams->getBufferParam(GPT_COMPUTE_PROGRAM, "gOutputKeys" , mOutputKeysParam); |
1273 | mParams->getBufferParam(GPT_COMPUTE_PROGRAM, "gOutputIndices" , mOutputIndicesParam); |
1274 | mParams->getTextureParam(GPT_COMPUTE_PROGRAM, "gPosAndTimeTex" , mPosAndTimeTexParam); |
1275 | } |
1276 | |
1277 | void GpuParticleSortPrepareMat::_initDefines(ShaderDefines& defines) |
1278 | { |
1279 | defines.set("NUM_THREADS" , NUM_THREADS); |
1280 | } |
1281 | |
1282 | void GpuParticleSortPrepareMat::bind(const SPtr<Texture>& positionAndTime) |
1283 | { |
1284 | mPosAndTimeTexParam.set(positionAndTime); |
1285 | |
1286 | RendererMaterial::bind(false); |
1287 | } |
1288 | |
1289 | UINT32 GpuParticleSortPrepareMat::execute(const GpuParticleSystem& system, UINT32 systemIdx, const Vector3& viewOrigin, |
1290 | UINT32 offset, const SPtr<GpuBuffer>& outKeys, const SPtr<GpuBuffer>& outIndices) |
1291 | { |
1292 | static constexpr UINT32 MAX_NUM_GROUPS = 128; |
1293 | |
1294 | assert(systemIdx < std::pow(2, 16)); |
1295 | |
1296 | const UINT32 numParticles = system.getNumTiles() * GpuParticleResources::PARTICLES_PER_TILE; |
1297 | |
1298 | const UINT32 numIterations = Math::divideAndRoundUp(numParticles, NUM_THREADS); |
1299 | const UINT32 numGroups = std::min(numIterations, MAX_NUM_GROUPS); |
1300 | |
1301 | const UINT32 iterationsPerGroup = numIterations / numGroups; |
1302 | const UINT32 = numIterations % numGroups; |
1303 | |
1304 | Vector3 localViewOrigin; |
1305 | ParticleSystem* parentSystem = system.getParent(); |
1306 | if(parentSystem->getSettings().simulationSpace == ParticleSimulationSpace::Local) |
1307 | { |
1308 | const Matrix4& worldToLocal = parentSystem->getTransform().getInvMatrix(); |
1309 | localViewOrigin = worldToLocal.multiplyAffine(viewOrigin); |
1310 | } |
1311 | else |
1312 | localViewOrigin = viewOrigin; |
1313 | |
1314 | gGpuParticleSortPrepareParamDef.gIterationsPerGroup.set(mInputBuffer, iterationsPerGroup); |
1315 | gGpuParticleSortPrepareParamDef.gNumExtraIterations.set(mInputBuffer, extraIterations); |
1316 | gGpuParticleSortPrepareParamDef.gNumParticles.set(mInputBuffer, numParticles); |
1317 | gGpuParticleSortPrepareParamDef.gOutputOffset.set(mInputBuffer, offset); |
1318 | gGpuParticleSortPrepareParamDef.gSystemKey.set(mInputBuffer, systemIdx << 16); |
1319 | gGpuParticleSortPrepareParamDef.gLocalViewOrigin.set(mInputBuffer, localViewOrigin); |
1320 | |
1321 | mInputIndicesParam.set(system.getParticleIndices()); |
1322 | mOutputKeysParam.set(outKeys); |
1323 | mOutputIndicesParam.set(outIndices); |
1324 | |
1325 | bindParams(); |
1326 | RenderAPI::instance().dispatchCompute(numGroups); |
1327 | return numParticles; |
1328 | } |
1329 | |
1330 | struct GpuParticleCurveInject |
1331 | { |
1332 | Color color; |
1333 | Vector2 dataUV; |
1334 | }; |
1335 | |
1336 | GpuParticleCurves::GpuParticleCurves() |
1337 | { |
1338 | TEXTURE_DESC textureDesc; |
1339 | textureDesc.format = PF_RGBA16F; |
1340 | textureDesc.width = TEX_SIZE; |
1341 | textureDesc.height = TEX_SIZE; |
1342 | textureDesc.usage = TU_RENDERTARGET; |
1343 | |
1344 | mCurveTexture = Texture::create(textureDesc); |
1345 | |
1346 | RENDER_TEXTURE_DESC rtDesc; |
1347 | rtDesc.colorSurfaces[0].texture = mCurveTexture; |
1348 | |
1349 | mRT = RenderTexture::create(rtDesc); |
1350 | |
1351 | // Prepare vertex declaration for injecting new curves |
1352 | SPtr<VertexDataDesc> injectVertexDesc = bs_shared_ptr_new<VertexDataDesc>(); |
1353 | injectVertexDesc->addVertElem(VET_FLOAT4, VES_TEXCOORD, 0, 0, 1); // Color, per instance |
1354 | injectVertexDesc->addVertElem(VET_FLOAT2, VES_TEXCOORD, 1, 0, 1); // Data UV, per instance |
1355 | injectVertexDesc->addVertElem(VET_FLOAT2, VES_TEXCOORD, 2, 1); // Pixel texture coordinates |
1356 | |
1357 | mInjectVertexDecl = VertexDeclaration::create(injectVertexDesc); |
1358 | |
1359 | // Prepare UV coordinates for injecting curves |
1360 | VERTEX_BUFFER_DESC injectUVBufferDesc; |
1361 | injectUVBufferDesc.numVerts = 4; |
1362 | injectUVBufferDesc.vertexSize = injectVertexDesc->getVertexStride(1); |
1363 | |
1364 | mInjectUV = VertexBuffer::create(injectUVBufferDesc); |
1365 | |
1366 | auto* const tileUVData = (Vector2*)mInjectUV->lock(GBL_WRITE_ONLY_DISCARD); |
1367 | const float tileUVScale = 1.0f / (float)TEX_SIZE; |
1368 | tileUVData[0] = Vector2(0.0f, 0.0f) * tileUVScale; |
1369 | tileUVData[1] = Vector2(1.0f, 0.0f) * tileUVScale; |
1370 | tileUVData[2] = Vector2(1.0f, 1.0f) * tileUVScale; |
1371 | tileUVData[3] = Vector2(0.0f, 1.0f) * tileUVScale; |
1372 | |
1373 | mInjectUV->unlock(); |
1374 | |
1375 | // Prepare indices for injecting curves |
1376 | INDEX_BUFFER_DESC injectIndexBufferDesc; |
1377 | injectIndexBufferDesc.indexType = IT_16BIT; |
1378 | injectIndexBufferDesc.numIndices = 6; |
1379 | |
1380 | mInjectIndices = IndexBuffer::create(injectIndexBufferDesc); |
1381 | |
1382 | const Conventions& rapiConventions = gCaps().conventions; |
1383 | |
1384 | auto* const indices = (UINT16*)mInjectIndices->lock(GBL_WRITE_ONLY_DISCARD); |
1385 | |
1386 | // If UV is flipped, then our tile will be upside down so we need to change index order so it doesn't |
1387 | // get culled. |
1388 | if (rapiConventions.uvYAxis == Conventions::Axis::Up) |
1389 | { |
1390 | indices[0] = 2; indices[1] = 1; indices[2] = 0; |
1391 | indices[3] = 3; indices[4] = 2; indices[5] = 0; |
1392 | } |
1393 | else |
1394 | { |
1395 | indices[0] = 0; indices[1] = 1; indices[2] = 2; |
1396 | indices[3] = 0; indices[4] = 2; indices[5] = 3; |
1397 | } |
1398 | |
1399 | mInjectIndices->unlock(); |
1400 | |
1401 | // Prepare a scratch buffer we'll use to inject new curves |
1402 | VERTEX_BUFFER_DESC injectScratchBufferDesc; |
1403 | injectScratchBufferDesc.numVerts = SCRATCH_NUM_VERTICES; |
1404 | injectScratchBufferDesc.vertexSize = injectVertexDesc->getVertexStride(0); |
1405 | injectScratchBufferDesc.usage = GBU_DYNAMIC; |
1406 | |
1407 | mInjectScratch = VertexBuffer::create(injectScratchBufferDesc); |
1408 | } |
1409 | |
1410 | GpuParticleCurves::~GpuParticleCurves() |
1411 | { |
1412 | for(auto& entry : mPendingAllocations) |
1413 | mPendingAllocator.free(entry.pixels); |
1414 | |
1415 | mPendingAllocator.clear(); |
1416 | } |
1417 | |
1418 | TextureRowAllocation GpuParticleCurves::alloc(Color* pixels, uint32_t count) |
1419 | { |
1420 | PendingAllocation pendingAlloc; |
1421 | pendingAlloc.allocation = mRowAllocator.alloc(count); |
1422 | |
1423 | if(pendingAlloc.allocation.length == 0) |
1424 | return pendingAlloc.allocation; |
1425 | |
1426 | pendingAlloc.pixels = (Color*)mPendingAllocator.alloc(sizeof(Color) * count); |
1427 | memcpy(pendingAlloc.pixels, pixels, sizeof(Color) * count); |
1428 | |
1429 | mPendingAllocations.push_back(pendingAlloc); |
1430 | return pendingAlloc.allocation; |
1431 | } |
1432 | |
1433 | void GpuParticleCurves::free(const TextureRowAllocation& alloc) |
1434 | { |
1435 | mRowAllocator.free(alloc); |
1436 | } |
1437 | |
1438 | void GpuParticleCurves::applyChanges() |
1439 | { |
1440 | const auto numCurves = (UINT32)mPendingAllocations.size(); |
1441 | if(numCurves == 0) |
1442 | return; |
1443 | |
1444 | GpuParticleCurveInjectMat* injectMat = GpuParticleCurveInjectMat::get(); |
1445 | injectMat->bind(); |
1446 | |
1447 | RenderAPI& rapi = RenderAPI::instance(); |
1448 | rapi.setRenderTarget(mRT); |
1449 | rapi.setVertexDeclaration(mInjectVertexDecl); |
1450 | |
1451 | SPtr<VertexBuffer> buffers[] = { mInjectScratch, mInjectUV }; |
1452 | rapi.setVertexBuffers(0, buffers, (UINT32)bs_size(buffers)); |
1453 | rapi.setIndexBuffer(mInjectIndices); |
1454 | rapi.setDrawOperation(DOT_TRIANGLE_LIST); |
1455 | |
1456 | UINT32 curveIdx = 0; |
1457 | |
1458 | auto* data = (GpuParticleCurveInject*)mInjectScratch->lock(GBL_WRITE_ONLY_DISCARD); |
1459 | while(curveIdx < numCurves) |
1460 | { |
1461 | UINT32 count = 0; |
1462 | for(; curveIdx < numCurves; curveIdx++) |
1463 | { |
1464 | const PendingAllocation& pendingAlloc = mPendingAllocations[curveIdx]; |
1465 | |
1466 | const UINT32 entryCount = pendingAlloc.allocation.length; |
1467 | if((count + entryCount) > SCRATCH_NUM_VERTICES) |
1468 | break; |
1469 | |
1470 | for(UINT32 i = 0; i < entryCount; i++) |
1471 | { |
1472 | data[count].color = pendingAlloc.pixels[i]; |
1473 | data[count].dataUV = Vector2( |
1474 | (pendingAlloc.allocation.x + i) / (float)TEX_SIZE, |
1475 | pendingAlloc.allocation.y / (float)TEX_SIZE); |
1476 | |
1477 | count++; |
1478 | } |
1479 | } |
1480 | |
1481 | mInjectScratch->unlock(); |
1482 | rapi.drawIndexed(0, 6, 0, 4, count); |
1483 | |
1484 | data = (GpuParticleCurveInject*)mInjectScratch->lock(GBL_WRITE_ONLY_DISCARD); |
1485 | } |
1486 | |
1487 | mInjectScratch->unlock(); |
1488 | |
1489 | for(auto& entry : mPendingAllocations) |
1490 | mPendingAllocator.free(entry.pixels); |
1491 | |
1492 | mPendingAllocations.clear(); |
1493 | mPendingAllocator.clear(); |
1494 | } |
1495 | |
1496 | Vector2 GpuParticleCurves::getUVOffset(const TextureRowAllocation& alloc) |
1497 | { |
1498 | return Vector2( |
1499 | ((float)alloc.x + 0.5f) / TEX_SIZE, |
1500 | ((float)alloc.y + 0.5f) / TEX_SIZE |
1501 | ); |
1502 | } |
1503 | |
1504 | float GpuParticleCurves::getUVScale(const TextureRowAllocation& alloc) |
1505 | { |
1506 | if(alloc.length == 0) |
1507 | return 0.0f; |
1508 | |
1509 | return (alloc.length - 1) / (float)TEX_SIZE; |
1510 | } |
1511 | }} |
1512 | |