1//************************************ bs::framework - Copyright 2018 Marko Pintera **************************************//
2//*********** Licensed under the MIT license. See LICENSE.md for full terms. This notice is not to be removed. ***********//
3#include "BsGpuParticleSimulation.h"
4#include "Renderer/BsParamBlocks.h"
5#include "Renderer/BsRendererMaterial.h"
6#include "Renderer/BsGpuResourcePool.h"
7#include "RenderAPI/BsVertexBuffer.h"
8#include "RenderAPI/BsIndexBuffer.h"
9#include "RenderAPI/BsGpuBuffer.h"
10#include "RenderAPI/BsVertexDataDesc.h"
11#include "RenderAPI/BsGpuPipelineParamInfo.h"
12#include "Particles/BsVectorField.h"
13#include "Particles/BsParticleDistribution.h"
14#include "Math/BsVector3.h"
15#include "BsRendererParticles.h"
16#include "BsRendererScene.h"
17#include "BsRenderBeast.h"
18#include "Utility/BsGpuSort.h"
19
20namespace bs { namespace ct
21{
22 BS_PARAM_BLOCK_BEGIN(GpuParticleTileVertexParamsDef)
23 BS_PARAM_BLOCK_ENTRY(Vector4, gUVToNDC)
24 BS_PARAM_BLOCK_END
25
26 GpuParticleTileVertexParamsDef gGpuParticleTileVertexParamsDef;
27
28 /** Material used for clearing tiles in the texture used for particle GPU simulation. */
29 class GpuParticleClearMat : public RendererMaterial<GpuParticleClearMat>
30 {
31 RMAT_DEF_CUSTOMIZED("GpuParticleClear.bsl");
32
33 public:
34 GpuParticleClearMat();
35
36 /** Binds the material to the pipeline, along with the @p tileUVs buffer containing locations of tiles to clear. */
37 void bind(const SPtr<GpuBuffer>& tileUVs);
38
39 private:
40 GpuParamBuffer mTileUVParam;
41 };
42
43 /** Material used for adding new particles into the particle state textures. */
44 class GpuParticleInjectMat : public RendererMaterial<GpuParticleInjectMat>
45 {
46 RMAT_DEF("GpuParticleInject.bsl");
47
48 public:
49 GpuParticleInjectMat();
50 };
51
52 /** Material used for adding new curves into the curve texture. */
53 class GpuParticleCurveInjectMat : public RendererMaterial<GpuParticleCurveInjectMat>
54 {
55 RMAT_DEF("GpuParticleCurveInject.bsl");
56
57 public:
58 GpuParticleCurveInjectMat();
59 };
60
61 BS_PARAM_BLOCK_BEGIN(VectorFieldParamsDef)
62 BS_PARAM_BLOCK_ENTRY(Vector3, gFieldBounds)
63 BS_PARAM_BLOCK_ENTRY(float, gFieldIntensity)
64 BS_PARAM_BLOCK_ENTRY(Vector3, gFieldTiling)
65 BS_PARAM_BLOCK_ENTRY(float, gFieldTightness)
66 BS_PARAM_BLOCK_ENTRY(Matrix4, gWorldToField)
67 BS_PARAM_BLOCK_ENTRY(Matrix3, gFieldToWorld)
68 BS_PARAM_BLOCK_END
69
70 VectorFieldParamsDef gVectorFieldParamsDef;
71
72 BS_PARAM_BLOCK_BEGIN(GpuParticleDepthCollisionParamsDef)
73 BS_PARAM_BLOCK_ENTRY(float, gCollisionRange)
74 BS_PARAM_BLOCK_ENTRY(float, gRestitution)
75 BS_PARAM_BLOCK_ENTRY(float, gDampening)
76 BS_PARAM_BLOCK_ENTRY(float, gCollisionRadiusScale)
77 BS_PARAM_BLOCK_ENTRY(Vector2, gSizeScaleCurveOffset)
78 BS_PARAM_BLOCK_ENTRY(Vector2, gSizeScaleCurveScale)
79 BS_PARAM_BLOCK_END
80
81 GpuParticleDepthCollisionParamsDef gGpuParticleDepthCollisionParamsDef;
82
83 BS_PARAM_BLOCK_BEGIN(GpuParticleSimulateParamsDef)
84 BS_PARAM_BLOCK_ENTRY(INT32, gNumVectorFields)
85 BS_PARAM_BLOCK_ENTRY(INT32, gNumIterations)
86 BS_PARAM_BLOCK_ENTRY(float, gDT)
87 BS_PARAM_BLOCK_ENTRY(float, gDrag)
88 BS_PARAM_BLOCK_ENTRY(Vector3, gAcceleration)
89 BS_PARAM_BLOCK_END
90
91 GpuParticleSimulateParamsDef gGpuParticleSimulateParamsDef;
92
93 /**
94 * Material used for performing GPU particle simulation. State is read from the provided input textures and output
95 * into the output textures bound as render targets.
96 */
97 class GpuParticleSimulateMat : public RendererMaterial<GpuParticleSimulateMat>
98 {
99 RMAT_DEF_CUSTOMIZED("GpuParticleSimulate.bsl");
100
101 /** Helper method used for initializing variations of this material. */
102 template<UINT32 DEPTH_COLLISIONS>
103 static const ShaderVariation& getVariation()
104 {
105 static ShaderVariation variation = ShaderVariation(
106 {
107 ShaderVariation::Param("DEPTH_COLLISIONS", DEPTH_COLLISIONS)
108 });
109
110 return variation;
111 }
112 public:
113 GpuParticleSimulateMat();
114
115 /** Binds the material to the pipeline along with any frame-static parameters. */
116 void bindGlobal(GpuParticleResources& resources, const SPtr<GpuParamBlockBuffer>& viewParams,
117 const SPtr<Texture>& depth, const SPtr<Texture>& normals, const SPtr<GpuParamBlockBuffer>& simulationParams);
118
119 /**
120 * Binds parameters that change with every material dispatch.
121 *
122 * @param[in] tileUVs Sets the UV offsets of individual tiles for a particular particle system
123 * that's being rendered.
124 * @param[in] perObjectParams General purpose particle system parameters.
125 * @param[in] vectorFieldParams Information about the currently bound vector field, if any.
126 * @param[in] vectorFieldTexture 3D texture representing the vector field, or null if none.
127 * @param[in] depthCollisionParams Parameter buffer for controlling depth buffer collisions, if enabled.
128 *
129 */
130 void bindPerCallParams(const SPtr<GpuBuffer>& tileUVs, const SPtr<GpuParamBlockBuffer>& perObjectParams,
131 const SPtr<GpuParamBlockBuffer>& vectorFieldParams, const SPtr<Texture>& vectorFieldTexture,
132 const SPtr<GpuParamBlockBuffer>& depthCollisionParams);
133
134 /** Returns the material variation matching the provided parameters. */
135 static GpuParticleSimulateMat* getVariation(bool depthCollisions, bool localSpace);
136 private:
137 GpuParamBuffer mTileUVParam;
138 GpuParamTexture mPosAndTimeTexParam;
139 GpuParamTexture mVelocityTexParam;
140 GpuParamTexture mSizeRotationTexParam;
141 GpuParamTexture mCurvesTexParam;
142 GpuParamTexture mDepthTexParam;
143 GpuParamTexture mNormalsTexParam;
144 GpuParamBinding mParamsBinding;
145 GpuParamBinding mPerCameraBinding;
146 GpuParamBinding mPerObjectBinding;
147
148 GpuParamBinding mVectorFieldBinding;
149 GpuParamTexture mVectorFieldTexParam;
150
151 GpuParamBinding mDepthCollisionBinding;
152
153 bool mSupportsDepthCollisions;
154 };
155
156 BS_PARAM_BLOCK_BEGIN(GpuParticleBoundsParamsDef)
157 BS_PARAM_BLOCK_ENTRY(UINT32, gIterationsPerGroup)
158 BS_PARAM_BLOCK_ENTRY(UINT32, gNumExtraIterations)
159 BS_PARAM_BLOCK_ENTRY(UINT32, gNumParticles)
160 BS_PARAM_BLOCK_END
161
162 GpuParticleBoundsParamsDef gGpuParticleBoundsParamsDef;
163
164 /** Material used for calculating particle system bounds. */
165 class GpuParticleBoundsMat : public RendererMaterial<GpuParticleBoundsMat>
166 {
167 static constexpr UINT32 NUM_THREADS = 64;
168
169 RMAT_DEF_CUSTOMIZED("GpuParticleBounds.bsl");
170
171 public:
172 GpuParticleBoundsMat();
173
174 /** Binds the material to the pipeline along with the global input texture containing particle positions and times. */
175 void bind(const SPtr<Texture>& positionAndTime);
176
177 /**
178 * Executes the material, calculating the bounds. Note that this function reads back from the GPU and should not
179 * be called at runtime.
180 *
181 * @param[in] indices Buffer containing offsets into the position texture for each particle.
182 * @param[in] numParticles Number of particle in the provided indices buffer.
183 */
184 AABox execute(const SPtr<GpuBuffer>& indices, UINT32 numParticles);
185
186 private:
187 GpuParamBuffer mParticleIndicesParam;
188 GpuParamBuffer mOutputParam;
189 GpuParamTexture mPosAndTimeTexParam;
190 SPtr<GpuParamBlockBuffer> mInputBuffer;
191 };
192
193 BS_PARAM_BLOCK_BEGIN(GpuParticleSortPrepareParamDef)
194 BS_PARAM_BLOCK_ENTRY(INT32, gIterationsPerGroup)
195 BS_PARAM_BLOCK_ENTRY(INT32, gNumExtraIterations)
196 BS_PARAM_BLOCK_ENTRY(INT32, gNumParticles)
197 BS_PARAM_BLOCK_ENTRY(INT32, gOutputOffset)
198 BS_PARAM_BLOCK_ENTRY(INT32, gSystemKey)
199 BS_PARAM_BLOCK_ENTRY(Vector3, gLocalViewOrigin)
200 BS_PARAM_BLOCK_END
201
202 GpuParticleSortPrepareParamDef gGpuParticleSortPrepareParamDef;
203
204 /** Material used for preparing key/values buffers used for particle sorting. */
205 class GpuParticleSortPrepareMat : public RendererMaterial<GpuParticleSortPrepareMat>
206 {
207 static constexpr UINT32 NUM_THREADS = 64;
208
209 RMAT_DEF_CUSTOMIZED("GpuParticleSortPrepare.bsl");
210
211 public:
212 GpuParticleSortPrepareMat();
213
214 /** Binds the material to the pipeline along with the global input texture containing particle positions and times. */
215 void bind(const SPtr<Texture>& positionAndTime);
216
217 /**
218 * Executes the material, generating sort data for a particular particle system and injecting it into the specified
219 * location in the key and index buffers.
220 *
221 * @param[in] system System whose particles to insert into the sort key/index buffers.
222 * @param[in] systemIdx Sequential index of the system to insert into the sort buffers.
223 * @param[in] offset Offset into the key/index buffer at which to insert the sort data.
224 * @param[in] viewOrigin View origin to use for determining sorting keys, in world space.
225 * @param[out] outKeys Pre-allocated buffer that will receive the keys used for sorting. The buffer must
226 * be GPU writable and use a 1x 32-bit integer format.
227 * @param[out] outIndices Pre-allocated buffer that will receive the indices to be sorted. The buffer must
228 * be GPU writable and use a 2x 16-bit integer format. Must have the same capacity
229 * as @p outKeys.
230 * @return Number of particle that were written to the buffers.
231 */
232 UINT32 execute(const GpuParticleSystem& system, UINT32 systemIdx, const Vector3& viewOrigin, UINT32 offset,
233 const SPtr<GpuBuffer>& outKeys, const SPtr<GpuBuffer>& outIndices);
234
235 private:
236 GpuParamBuffer mInputIndicesParam;
237 GpuParamBuffer mOutputKeysParam;
238 GpuParamBuffer mOutputIndicesParam;
239 GpuParamTexture mPosAndTimeTexParam;
240 SPtr<GpuParamBlockBuffer> mInputBuffer;
241 };
242
243 static constexpr UINT32 TILES_PER_INSTANCE = 8;
244 static constexpr UINT32 PARTICLES_PER_INSTANCE = TILES_PER_INSTANCE * GpuParticleResources::PARTICLES_PER_TILE;
245
246 /** Contains a variety of helper buffers and declarations used for GPU particle simulation. */
247 struct GpuParticleHelperBuffers
248 {
249 static constexpr UINT32 NUM_SCRATCH_TILES = 512;
250 static constexpr UINT32 NUM_SCRATCH_PARTICLES = 4096;
251
252 GpuParticleHelperBuffers();
253
254 SPtr<VertexBuffer> tileUVs;
255 SPtr<VertexBuffer> particleUVs;
256 SPtr<IndexBuffer> spriteIndices;
257 SPtr<VertexDeclaration> tileVertexDecl;
258 SPtr<VertexDeclaration> injectVertexDecl;
259 SPtr<GpuBuffer> tileScratch;
260 SPtr<VertexBuffer> injectScratch;
261 };
262
263 GpuParticleResources::GpuParticleResources()
264 {
265 // Allocate textures
266 TEXTURE_DESC positionAndTimeDesc;
267 positionAndTimeDesc.format = PF_RGBA32F;
268 positionAndTimeDesc.width = TEX_SIZE;
269 positionAndTimeDesc.height = TEX_SIZE;
270 positionAndTimeDesc.usage = TU_RENDERTARGET;
271
272 TEXTURE_DESC velocityDesc;
273 velocityDesc.format = PF_RGBA16F;
274 velocityDesc.width = TEX_SIZE;
275 velocityDesc.height = TEX_SIZE;
276 velocityDesc.usage = TU_RENDERTARGET;
277
278 for (UINT32 i = 0; i < 2; i++)
279 {
280 mStateTextures[i].positionAndTimeTex = Texture::create(positionAndTimeDesc);
281 mStateTextures[i].velocityTex = Texture::create(velocityDesc);
282 }
283
284 TEXTURE_DESC sizeAndRotationDesc;
285 sizeAndRotationDesc.format = PF_RGBA16F;
286 sizeAndRotationDesc.width = TEX_SIZE;
287 sizeAndRotationDesc.height = TEX_SIZE;
288 sizeAndRotationDesc.usage = TU_RENDERTARGET;
289
290 mStaticTextures.sizeAndRotationTex = Texture::create(sizeAndRotationDesc);
291
292 RENDER_TEXTURE_DESC staticRtDesc;
293 staticRtDesc.colorSurfaces[0].texture = mStaticTextures.sizeAndRotationTex;
294
295 for (UINT32 i = 0; i < 2; i++)
296 {
297 RENDER_TEXTURE_DESC simulationRTDesc;
298 simulationRTDesc.colorSurfaces[0].texture = mStateTextures[i].positionAndTimeTex;
299 simulationRTDesc.colorSurfaces[1].texture = mStateTextures[i].velocityTex;
300
301 mSimulateRT[i] = RenderTexture::create(simulationRTDesc);
302
303 RENDER_TEXTURE_DESC injectRTDesc;
304 injectRTDesc.colorSurfaces[0].texture = mStateTextures[i].positionAndTimeTex;
305 injectRTDesc.colorSurfaces[1].texture = mStateTextures[i].velocityTex;
306 injectRTDesc.colorSurfaces[2].texture = mStaticTextures.sizeAndRotationTex;
307 mInjectRT[i] = RenderTexture::create(injectRTDesc);
308 }
309
310 // Allocate the buffer containing keys used for sorting
311 GPU_BUFFER_DESC sortKeysBufferDesc;
312 sortKeysBufferDesc.type = GBT_STANDARD;
313 sortKeysBufferDesc.format = BF_32X1U;
314 sortKeysBufferDesc.elementCount = TEX_SIZE * TEX_SIZE;
315 sortKeysBufferDesc.usage = GBU_LOADSTORE;
316
317 mSortBuffers.keys[0] = GpuBuffer::create(sortKeysBufferDesc);
318 mSortBuffers.keys[1] = GpuBuffer::create(sortKeysBufferDesc);
319
320 // Allocate the buffer containing sorted particle indices
321 GPU_BUFFER_DESC sortedIndicesBufferDesc;
322 sortedIndicesBufferDesc.type = GBT_STANDARD;
323 sortedIndicesBufferDesc.format = BF_16X2U;
324 sortedIndicesBufferDesc.elementCount = TEX_SIZE * TEX_SIZE;
325 sortedIndicesBufferDesc.usage = GBU_LOADSTORE;
326
327 mSortedIndices[0] = GpuBuffer::create(sortedIndicesBufferDesc);
328 mSortedIndices[1] = GpuBuffer::create(sortedIndicesBufferDesc);
329
330 mSortBuffers.values[0] = mSortedIndices[0]->getView(GBT_STANDARD, BF_32X1U);
331 mSortBuffers.values[1] = mSortedIndices[1]->getView(GBT_STANDARD, BF_32X1U);
332
333 // Clear the free tile linked list
334 for (UINT32 i = 0; i < TILE_COUNT; i++)
335 mFreeTiles[i] = TILE_COUNT - i - 1;
336 }
337
338 UINT32 GpuParticleResources::allocTile()
339 {
340 if (mNumFreeTiles > 0)
341 {
342 mNumFreeTiles--;
343 return mFreeTiles[mNumFreeTiles];
344 }
345
346 return (UINT32)-1;
347 }
348
349 void GpuParticleResources::freeTile(UINT32 tile)
350 {
351 assert(tile < TILE_COUNT);
352 assert(mNumFreeTiles < TILE_COUNT);
353
354 mFreeTiles[mNumFreeTiles] = tile;
355 mNumFreeTiles++;
356 }
357
358 Vector2I GpuParticleResources::getTileOffset(UINT32 tileId)
359 {
360 return Vector2I(
361 (tileId % TILE_COUNT_1D) * TILE_SIZE,
362 (tileId / TILE_COUNT_1D) * TILE_SIZE);
363 }
364
365 Vector2 GpuParticleResources::getTileCoords(UINT32 tileId)
366 {
367 return Vector2(
368 Math::frac(tileId / (float)TILE_COUNT_1D),
369 (UINT32)(tileId / TILE_COUNT_1D) / (float)TILE_COUNT_1D);
370 }
371
372 Vector2I GpuParticleResources::getParticleOffset(UINT32 subTileId)
373 {
374 return Vector2I(
375 subTileId % TILE_SIZE,
376 subTileId / TILE_SIZE);
377 }
378
379 Vector2 GpuParticleResources::getParticleCoords(UINT32 subTileId)
380 {
381 const Vector2I tileOffset = getParticleOffset(subTileId);
382 return tileOffset / (float)TEX_SIZE;
383 }
384
385 const SPtr<GpuBuffer>& GpuParticleResources::getSortedIndices() const
386 {
387 return mSortedIndices[mSortedIndicesBufferIdx];
388 }
389
390 GpuParticleHelperBuffers::GpuParticleHelperBuffers()
391 {
392 // Prepare vertex declaration for rendering tiles
393 SPtr<VertexDataDesc> tileVertexDesc = bs_shared_ptr_new<VertexDataDesc>();
394 tileVertexDesc->addVertElem(VET_FLOAT2, VES_TEXCOORD);
395
396 tileVertexDecl = VertexDeclaration::create(tileVertexDesc);
397
398 // Prepare vertex declaration for injecting new particles
399 SPtr<VertexDataDesc> injectVertexDesc = bs_shared_ptr_new<VertexDataDesc>();
400 injectVertexDesc->addVertElem(VET_FLOAT4, VES_TEXCOORD, 0, 0, 1); // Position & time, per instance
401 injectVertexDesc->addVertElem(VET_FLOAT4, VES_TEXCOORD, 1, 0, 1); // Velocity, per instance
402 injectVertexDesc->addVertElem(VET_FLOAT2, VES_TEXCOORD, 2, 0, 1); // Size, per instance
403 injectVertexDesc->addVertElem(VET_FLOAT1, VES_TEXCOORD, 3, 0, 1); // Rotation, per instance
404 injectVertexDesc->addVertElem(VET_FLOAT2, VES_TEXCOORD, 4, 0, 1); // Data UV, per instance
405 injectVertexDesc->addVertElem(VET_FLOAT2, VES_TEXCOORD, 5, 1); // Sprite texture coordinates
406
407 injectVertexDecl = VertexDeclaration::create(injectVertexDesc);
408
409 // Prepare UV coordinates for rendering tiles
410 VERTEX_BUFFER_DESC tileUVBufferDesc;
411 tileUVBufferDesc.numVerts = PARTICLES_PER_INSTANCE * 4;
412 tileUVBufferDesc.vertexSize = tileVertexDesc->getVertexStride();
413
414 tileUVs = VertexBuffer::create(tileUVBufferDesc);
415
416 auto* const tileUVData = (Vector2*)tileUVs->lock(GBL_WRITE_ONLY_DISCARD);
417 const float tileUVScale = GpuParticleResources::TILE_SIZE / (float)GpuParticleResources::TEX_SIZE;
418 for (UINT32 i = 0; i < PARTICLES_PER_INSTANCE; i++)
419 {
420 tileUVData[i * 4 + 0] = Vector2(0.0f, 0.0f) * tileUVScale;
421 tileUVData[i * 4 + 1] = Vector2(1.0f, 0.0f) * tileUVScale;
422 tileUVData[i * 4 + 2] = Vector2(1.0f, 1.0f) * tileUVScale;
423 tileUVData[i * 4 + 3] = Vector2(0.0f, 1.0f) * tileUVScale;
424 }
425
426 tileUVs->unlock();
427
428 // Prepare UV coordinates for rendering particles
429 VERTEX_BUFFER_DESC particleUVBufferDesc;
430 particleUVBufferDesc.numVerts = PARTICLES_PER_INSTANCE * 4;
431 particleUVBufferDesc.vertexSize = tileVertexDesc->getVertexStride();
432
433 particleUVs = VertexBuffer::create(particleUVBufferDesc);
434
435 auto* const particleUVData = (Vector2*)particleUVs->lock(GBL_WRITE_ONLY_DISCARD);
436 const float particleUVScale = 1.0f / (float)GpuParticleResources::TEX_SIZE;
437 for (UINT32 i = 0; i < PARTICLES_PER_INSTANCE; i++)
438 {
439 particleUVData[i * 4 + 0] = Vector2(0.0f, 0.0f) * particleUVScale;
440 particleUVData[i * 4 + 1] = Vector2(1.0f, 0.0f) * particleUVScale;
441 particleUVData[i * 4 + 2] = Vector2(1.0f, 1.0f) * particleUVScale;
442 particleUVData[i * 4 + 3] = Vector2(0.0f, 1.0f) * particleUVScale;
443 }
444
445 particleUVs->unlock();
446
447 // Prepare indices for rendering tiles & particles
448 INDEX_BUFFER_DESC spriteIndexBufferDesc;
449 spriteIndexBufferDesc.indexType = IT_16BIT;
450 spriteIndexBufferDesc.numIndices = PARTICLES_PER_INSTANCE * 6;
451
452 spriteIndices = IndexBuffer::create(spriteIndexBufferDesc);
453
454 auto* const indices = (UINT16*)spriteIndices->lock(GBL_WRITE_ONLY_DISCARD);
455
456 const Conventions& rapiConventions = gCaps().conventions;
457 for (UINT32 i = 0; i < PARTICLES_PER_INSTANCE; i++)
458 {
459 // If UV is flipped, then our tile will be upside down so we need to change index order so it doesn't
460 // get culled.
461 if (rapiConventions.uvYAxis == Conventions::Axis::Up)
462 {
463 indices[i * 6 + 0] = i * 4 + 2; indices[i * 6 + 1] = i * 4 + 1; indices[i * 6 + 2] = i * 4 + 0;
464 indices[i * 6 + 3] = i * 4 + 3; indices[i * 6 + 4] = i * 4 + 2; indices[i * 6 + 5] = i * 4 + 0;
465 }
466 else
467 {
468 indices[i * 6 + 0] = i * 4 + 0; indices[i * 6 + 1] = i * 4 + 1; indices[i * 6 + 2] = i * 4 + 2;
469 indices[i * 6 + 3] = i * 4 + 0; indices[i * 6 + 4] = i * 4 + 2; indices[i * 6 + 5] = i * 4 + 3;
470 }
471 }
472
473 spriteIndices->unlock();
474
475 // Prepare a scratch buffer we'll use to clear tiles
476 GPU_BUFFER_DESC tileScratchBufferDesc;
477 tileScratchBufferDesc.type = GBT_STANDARD;
478 tileScratchBufferDesc.format = BF_32X2F;
479 tileScratchBufferDesc.elementCount = NUM_SCRATCH_TILES;
480 tileScratchBufferDesc.usage = GBU_DYNAMIC;
481
482 tileScratch = GpuBuffer::create(tileScratchBufferDesc);
483
484 // Prepare a scratch buffer we'll use to inject new particles
485 VERTEX_BUFFER_DESC injectScratchBufferDesc;
486 injectScratchBufferDesc.numVerts = NUM_SCRATCH_PARTICLES;
487 injectScratchBufferDesc.vertexSize = injectVertexDesc->getVertexStride(0);
488 injectScratchBufferDesc.usage = GBU_DYNAMIC;
489
490 injectScratch = VertexBuffer::create(injectScratchBufferDesc);
491 }
492
493 GpuParticleSystem::GpuParticleSystem(ParticleSystem* parent)
494 :mParent(parent)
495 {
496 GpuParticleSimulation::instance().addSystem(this);
497 }
498
499 GpuParticleSystem::~GpuParticleSystem()
500 {
501 GpuParticleSimulation::instance().removeSystem(this);
502 }
503
504 bool GpuParticleSystem::allocateTiles(GpuParticleResources& resources, Vector<GpuParticle>& newParticles,
505 Vector<UINT32>& newTiles)
506 {
507 GpuParticleTile cachedTile = mLastAllocatedTile == (UINT32)-1 ? GpuParticleTile() : mTiles[mLastAllocatedTile];
508 Vector2 tileUV = GpuParticleResources::getTileCoords(cachedTile.id);
509
510 bool newTilesAdded = false;
511 for (UINT32 i = 0; i < (UINT32)newParticles.size(); i++)
512 {
513 UINT32 tileIdx;
514
515 // Use the last allocated tile if there's room
516 if (cachedTile.numFreeParticles > 0)
517 tileIdx = mLastAllocatedTile;
518 else
519 {
520 // Otherwise try to find an inactive tile
521 if (mNumActiveTiles < (UINT32)mTiles.size())
522 {
523 tileIdx = mActiveTiles.find(false);
524 mActiveTiles[tileIdx] = true;
525 }
526 // And finally just allocate a new tile if no room elsewhere
527 else
528 {
529 const UINT32 tileId = resources.allocTile();
530 if (tileId == (UINT32)-1)
531 return newTilesAdded; // Out of space in the texture
532
533 GpuParticleTile newTile;
534 newTile.id = tileId;
535 newTile.lifetime = 0.0f;
536
537 tileIdx = (UINT32)mTiles.size();
538 newTiles.push_back(newTile.id);
539 mTiles.push_back(newTile);
540 mActiveTiles.add(true);
541
542 newTilesAdded = true;
543 }
544
545 mLastAllocatedTile = tileIdx;
546 tileUV = GpuParticleResources::getTileCoords(mTiles[tileIdx].id);
547 mTiles[tileIdx].numFreeParticles = GpuParticleResources::PARTICLES_PER_TILE;
548
549 cachedTile = mTiles[tileIdx];
550 mNumActiveTiles++;
551 }
552
553 GpuParticleTile& tile = mTiles[tileIdx];
554 GpuParticle& particle = newParticles[i];
555
556 const UINT32 tileParticleIdx = GpuParticleResources::PARTICLES_PER_TILE - tile.numFreeParticles;
557 particle.dataUV = tileUV + GpuParticleResources::getParticleCoords(tileParticleIdx);
558
559 tile.numFreeParticles--;
560 tile.lifetime = std::max(tile.lifetime, mTime + particle.lifetime);
561
562 cachedTile.numFreeParticles--;
563 }
564
565 return newTilesAdded;
566 }
567
568 void GpuParticleSystem::detectInactiveTiles()
569 {
570 mNumActiveTiles = 0;
571 for (UINT32 i = 0; i < (UINT32)mTiles.size(); i++)
572 {
573 if (mTiles[i].lifetime >= mTime)
574 {
575 mNumActiveTiles++;
576 continue;
577 }
578
579 mActiveTiles[i] = false;
580
581 if (mLastAllocatedTile == i)
582 mLastAllocatedTile = (UINT32)-1;
583 }
584 }
585
586 bool GpuParticleSystem::freeInactiveTiles(GpuParticleResources& resources)
587 {
588 const UINT32 numFreeTiles = (UINT32)mTiles.size() - mNumActiveTiles;
589 for(UINT32 i = 0; i < numFreeTiles; i++)
590 {
591 const UINT32 freeIdx = mActiveTiles.find(false);
592 assert(freeIdx != (UINT32)-1);
593
594 const UINT32 lastIdx = (UINT32)mTiles.size() - 1;
595
596 if (freeIdx != lastIdx)
597 {
598 std::swap(mTiles[freeIdx], mTiles[lastIdx]);
599 std::swap(mActiveTiles[freeIdx], mActiveTiles[lastIdx]);
600 }
601
602 resources.freeTile(mTiles[lastIdx].id);
603
604 mTiles.erase(mTiles.end() - 1);
605 mActiveTiles.remove(lastIdx);
606 }
607
608 // Tile order changed so this might no longer be valid
609 if (numFreeTiles > 0)
610 mLastAllocatedTile = (UINT32)-1;
611
612 return numFreeTiles > 0;
613 }
614
615 void GpuParticleSystem::updateGpuBuffers()
616 {
617 const auto numTiles = (UINT32)mTiles.size();
618 const UINT32 numTilesToAllocates = Math::divideAndRoundUp(numTiles, TILES_PER_INSTANCE) * TILES_PER_INSTANCE;
619
620 // Tile offsets buffer
621 if(numTiles > 0)
622 {
623 GPU_BUFFER_DESC tilesBufferDesc;
624 tilesBufferDesc.type = GBT_STANDARD;
625 tilesBufferDesc.format = BF_32X2F;
626 tilesBufferDesc.elementCount = numTilesToAllocates;
627 tilesBufferDesc.usage = GBU_DYNAMIC;
628
629 mTileUVs = GpuBuffer::create(tilesBufferDesc);
630
631 auto* tileUVs = (Vector2*)mTileUVs->lock(GBL_WRITE_ONLY_NO_OVERWRITE);
632 for (UINT32 i = 0; i < numTiles; i++)
633 tileUVs[i] = GpuParticleResources::getTileCoords(mTiles[i].id);
634
635 for (UINT32 i = numTiles; i < numTilesToAllocates; i++)
636 tileUVs[i] = Vector2(2.0f, 2.0f); // Out of range
637
638 mTileUVs->unlock();
639 }
640
641 // Particle data offsets
642 const UINT32 numParticles = numTiles * GpuParticleResources::PARTICLES_PER_TILE;
643
644 if(numParticles > 0)
645 {
646 GPU_BUFFER_DESC particleUVDesc;
647 particleUVDesc.type = GBT_STANDARD;
648 particleUVDesc.format = BF_16X2U;
649 particleUVDesc.elementCount = numParticles;
650 particleUVDesc.usage = GBU_DYNAMIC;
651
652 mParticleIndices = GpuBuffer::create(particleUVDesc);
653 auto* particleIndices = (UINT32*)mParticleIndices->lock(GBL_WRITE_ONLY_NO_OVERWRITE);
654
655 UINT32 idx = 0;
656 for (UINT32 i = 0; i < numTiles; i++)
657 {
658 const Vector2I tileOffset = GpuParticleResources::getTileOffset(mTiles[i].id);
659 for (UINT32 y = 0; y < GpuParticleResources::TILE_SIZE; y++)
660 {
661 for (UINT32 x = 0; x < GpuParticleResources::TILE_SIZE; x++)
662 {
663 const Vector2I offset = tileOffset + Vector2I(x, y);
664 particleIndices[idx++] = (offset.x & 0xFFFF) | (offset.y << 16);
665 }
666 }
667 }
668
669 mParticleIndices->unlock();
670 }
671 }
672
673 void GpuParticleSystem::advanceTime(float dt)
674 {
675 const ParticleSystemSettings& settings = mParent->getSettings();
676
677 float timeStep;
678 mTime = bs::ParticleSystem::_advanceTime(mTime, dt, settings.duration, settings.isLooping, timeStep);
679 }
680
681 AABox GpuParticleSystem::getBounds() const
682 {
683 const ParticleSystemSettings& settings = mParent->getSettings();
684
685 if(settings.useAutomaticBounds)
686 return AABox::INF_BOX;
687
688 return settings.customBounds;
689 }
690
691 struct GpuParticleSimulation::Pimpl
692 {
693 GpuParticleResources resources;
694 GpuParticleHelperBuffers helperBuffers;
695 SPtr<GpuParamBlockBuffer> vectorFieldParams;
696 SPtr<GpuParamBlockBuffer> depthCollisionParams;
697 SPtr<GpuParamBlockBuffer> simulationParams;
698 UnorderedSet<GpuParticleSystem*> systems;
699 };
700
701 GpuParticleSimulation::GpuParticleSimulation()
702 :m(bs_new<Pimpl>())
703 {
704 m->vectorFieldParams = gVectorFieldParamsDef.createBuffer();
705 m->depthCollisionParams = gGpuParticleDepthCollisionParamsDef.createBuffer();
706 m->simulationParams = gGpuParticleSimulateParamsDef.createBuffer();
707 }
708
709 GpuParticleSimulation::~GpuParticleSimulation()
710 {
711 bs_delete(m);
712 }
713
714 void GpuParticleSimulation::addSystem(GpuParticleSystem* system)
715 {
716 m->systems.insert(system);
717 }
718
719 void GpuParticleSimulation::removeSystem(GpuParticleSystem* system)
720 {
721 m->systems.erase(system);
722 }
723
724 void GpuParticleSimulation::simulate(const SceneInfo& sceneInfo, const ParticlePerFrameData* simData,
725 const SPtr<GpuParamBlockBuffer>& viewParams, const GBufferTextures& gbuffer, float dt)
726 {
727 m->resources.swap();
728 m->resources.getCurveTexture().applyChanges();
729
730 Vector<UINT32> newTiles;
731 Vector<GpuParticle> allNewParticles;
732 for (auto& entry : m->systems)
733 {
734 entry->detectInactiveTiles();
735
736 bool tilesDirty = false;
737 const auto iterFind = simData->gpuData.find(entry->getParent()->getId());
738 if(iterFind != simData->gpuData.end())
739 {
740 Vector<GpuParticle>& newParticles = iterFind->second->particles;
741 tilesDirty = entry->allocateTiles(m->resources, newParticles, newTiles);
742
743 allNewParticles.insert(allNewParticles.end(), newParticles.begin(), newParticles.end());
744 }
745
746 entry->advanceTime(dt);
747 tilesDirty |= entry->freeInactiveTiles(m->resources);
748
749 if (tilesDirty)
750 entry->updateGpuBuffers();
751 }
752
753 RenderAPI& rapi = RenderAPI::instance();
754 rapi.setRenderTarget(m->resources.getInjectTarget());
755
756 clearTiles(newTiles);
757 injectParticles(allNewParticles);
758
759 // Simulate
760 // TODO - Run multiple iterations for more stable simulation at lower/erratic framerates
761 gGpuParticleSimulateParamsDef.gDT.set(m->simulationParams, dt);
762 gGpuParticleSimulateParamsDef.gNumIterations.set(m->simulationParams, 1);
763
764 rapi.setRenderTarget(m->resources.getSimulationTarget());
765 rapi.setVertexDeclaration(m->helperBuffers.tileVertexDecl);
766
767 SPtr<VertexBuffer> buffers[] = { m->helperBuffers.tileUVs };
768 rapi.setVertexBuffers(0, buffers, (UINT32)bs_size(buffers));
769 rapi.setIndexBuffer(m->helperBuffers.spriteIndices);
770 rapi.setDrawOperation(DOT_TRIANGLE_LIST);
771
772 enum class SimType { Normal, DepthCollisionsWorld, DepthCollisionsLocal, Count };
773
774 for(UINT32 i = 0; i < (UINT32)SimType::Count; i++)
775 {
776 const SimType type = (SimType)i;
777 const bool simulateDepthCollisions = type == SimType::DepthCollisionsWorld ||
778 type == SimType::DepthCollisionsLocal;
779 const bool localSpace = type == SimType::DepthCollisionsLocal;
780
781 GpuParticleSimulateMat* simulateMat = GpuParticleSimulateMat::getVariation(simulateDepthCollisions, localSpace);
782 simulateMat->bindGlobal(m->resources, viewParams, gbuffer.depth, gbuffer.normals, m->simulationParams);
783
784 for (auto& entry : m->systems)
785 {
786 if (entry->getNumTiles() == 0)
787 continue;
788
789 ParticleSystem* parentSystem = entry->getParent();
790
791 const ParticleGpuSimulationSettings& simSettings = parentSystem->getGpuSimulationSettings();
792 if(simSettings.depthCollision.enabled != simulateDepthCollisions)
793 continue;
794
795 if(simulateDepthCollisions)
796 {
797 const ParticleSystemSettings& settings = parentSystem->getSettings();
798 bool isLocal = settings.simulationSpace == ParticleSimulationSpace::Local;
799 if(isLocal != localSpace)
800 continue;
801 }
802
803 const RendererParticles& rendererParticles = sceneInfo.particleSystems[parentSystem->getRendererId()];
804
805 prepareBuffers(entry, rendererParticles);
806
807 SPtr<Texture> vfTexture;
808 if (simSettings.vectorField.vectorField)
809 vfTexture = simSettings.vectorField.vectorField->getTexture();
810
811 simulateMat->bindPerCallParams(entry->getTileUVs(), rendererParticles.perObjectParamBuffer,
812 m->vectorFieldParams, vfTexture, m->depthCollisionParams);
813
814 const UINT32 tileCount = entry->getNumTiles();
815 const UINT32 numInstances = Math::divideAndRoundUp(tileCount, TILES_PER_INSTANCE);
816 rapi.drawIndexed(0, TILES_PER_INSTANCE * 6, 0, TILES_PER_INSTANCE * 4, numInstances);
817 }
818 }
819 }
820
821 void GpuParticleSimulation::sort(const RendererView& view)
822 {
823 const bool supportsCompute = gRenderBeast()->getFeatureSet() == RenderBeastFeatureSet::Desktop;
824 if(!supportsCompute)
825 return;
826
827 // Make sure that the position texture isn't bound for rendering
828 RenderAPI& rapi = RenderAPI::instance();
829 rapi.setRenderTarget(nullptr);
830
831 const Vector3& viewOrigin = view.getProperties().viewOrigin;
832
833 GpuParticleSortPrepareMat* prepareMat = GpuParticleSortPrepareMat::get();
834 prepareMat->bind(m->resources.getCurrentState().positionAndTimeTex);
835
836 UINT32 systemIdx = 0;
837 UINT32 offset = 0;
838 for (auto& entry : m->systems)
839 {
840 if (entry->getNumTiles() == 0)
841 {
842 entry->setSortInfo(false, 0);
843 continue;
844 }
845
846 ParticleSystem* parentSystem = entry->getParent();
847
848 const ParticleSystemSettings& settings = parentSystem->getSettings();
849 if (settings.sortMode != ParticleSortMode::Distance)
850 {
851 entry->setSortInfo(false, 0);
852 continue;
853 }
854
855 entry->setSortInfo(true, offset);
856
857 offset += prepareMat->execute(*entry, systemIdx, viewOrigin, offset,
858 m->resources.mSortBuffers.keys[0],
859 m->resources.mSortedIndices[0]);
860
861 systemIdx++;
862 }
863
864 const UINT32 numSystemsToSort = systemIdx;
865 if(numSystemsToSort == 0)
866 return;
867
868 const UINT32 totalNumKeys = offset;
869 const UINT32 keyMask = 0xFFFF | (Math::ceilToInt(Math::log2((float)(numSystemsToSort + 1))) << 16);
870 const UINT32 outputBufferIdx = GpuSort::instance().sort(m->resources.mSortBuffers, totalNumKeys, keyMask);
871
872 m->resources.mSortedIndicesBufferIdx = outputBufferIdx;
873 }
874
875 void GpuParticleSimulation::prepareBuffers(const GpuParticleSystem* system, const RendererParticles& rendererInfo)
876 {
877 ParticleSystem* parentSystem = system->getParent();
878
879 const ParticleSystemSettings& settings = parentSystem->getSettings();
880 const ParticleGpuSimulationSettings& simSettings = parentSystem->getGpuSimulationSettings();
881
882 const Random& random = system->getRandom();
883 const float time = system->getTime();
884 const float nrmTime = time / settings.duration;
885
886 gGpuParticleSimulateParamsDef.gDrag.set(m->simulationParams, simSettings.drag);
887 gGpuParticleSimulateParamsDef.gAcceleration.set(m->simulationParams, simSettings.acceleration);
888
889 SPtr<Texture> vfTexture;
890 if(simSettings.vectorField.vectorField)
891 vfTexture = simSettings.vectorField.vectorField->getTexture();
892
893 if(vfTexture)
894 {
895 gGpuParticleSimulateParamsDef.gNumVectorFields.set(m->simulationParams, 1);
896
897 const SPtr<VectorField>& vectorField = simSettings.vectorField.vectorField;
898 const VECTOR_FIELD_DESC& vfDesc = vectorField->getDesc();
899
900 const Vector3 tiling(
901 simSettings.vectorField.tilingX ? 0.0f : 1.0f,
902 simSettings.vectorField.tilingY ? 0.0f : 1.0f,
903 simSettings.vectorField.tilingZ ? 0.0f : 1.0f
904 );
905
906 gVectorFieldParamsDef.gFieldBounds.set(m->vectorFieldParams, vfDesc.bounds.getSize());
907 gVectorFieldParamsDef.gFieldTightness.set(m->vectorFieldParams, simSettings.vectorField.tightness);
908 gVectorFieldParamsDef.gFieldTiling.set(m->vectorFieldParams, tiling);
909 gVectorFieldParamsDef.gFieldIntensity.set(m->vectorFieldParams, simSettings.vectorField.intensity);
910
911 const Vector3 rotationRate = simSettings.vectorField.rotationRate.evaluate(nrmTime, random) * time;
912 const Quaternion addedRotation(Degree(rotationRate.x), Degree(rotationRate.y), Degree(rotationRate.z));
913
914 const Vector3 offset = vfDesc.bounds.getMin() + simSettings.vectorField.offset;
915 const Quaternion rotation = simSettings.vectorField.rotation * addedRotation;
916 const Vector3 scale = vfDesc.bounds.getSize() * simSettings.vectorField.scale;
917
918 Matrix4 fieldToWorld = Matrix4::TRS(offset, rotation, scale);
919 fieldToWorld = rendererInfo.localToWorld * fieldToWorld;
920
921 const Matrix3 fieldToWorld3x3 = fieldToWorld.get3x3();
922
923 gVectorFieldParamsDef.gFieldToWorld.set(m->vectorFieldParams, fieldToWorld3x3);
924 gVectorFieldParamsDef.gWorldToField.set(m->vectorFieldParams, fieldToWorld.inverseAffine());
925 }
926 else
927 gGpuParticleSimulateParamsDef.gNumVectorFields.set(m->simulationParams, 0);
928
929 const ParticleDepthCollisionSettings& depthCollisionSettings = simSettings.depthCollision;
930 if(depthCollisionSettings.enabled)
931 {
932 Vector3 scale3D = rendererInfo.particleSystem->getTransform().getScale();
933 float uniformScale = std::max(std::max(scale3D.x, scale3D.y), scale3D.z);
934
935 gGpuParticleDepthCollisionParamsDef.gCollisionRange.set(m->depthCollisionParams, 2.0f);
936 gGpuParticleDepthCollisionParamsDef.gCollisionRadiusScale.set(m->depthCollisionParams,
937 depthCollisionSettings.radiusScale * uniformScale);
938 gGpuParticleDepthCollisionParamsDef.gDampening.set(m->depthCollisionParams,
939 depthCollisionSettings.dampening);
940 gGpuParticleDepthCollisionParamsDef.gRestitution.set(m->depthCollisionParams,
941 depthCollisionSettings.restitution);
942
943 const Vector2 sizeScaleUVOffset =
944 GpuParticleCurves::getUVOffset(rendererInfo.sizeScaleFrameIdxCurveAlloc);
945 const float sizeScaleUVScale =
946 GpuParticleCurves::getUVScale(rendererInfo.sizeScaleFrameIdxCurveAlloc);
947
948 gGpuParticleDepthCollisionParamsDef.gSizeScaleCurveOffset.set(m->depthCollisionParams, sizeScaleUVOffset);
949 gGpuParticleDepthCollisionParamsDef.gSizeScaleCurveScale.set(m->depthCollisionParams, Vector2(sizeScaleUVScale, 0.0f));
950 }
951 }
952
953 void GpuParticleSimulation::clearTiles(const Vector<UINT32>& tiles)
954 {
955 const auto numTiles = (UINT32)tiles.size();
956 if(numTiles == 0)
957 return;
958
959 const UINT32 numIterations = Math::divideAndRoundUp(numTiles, GpuParticleHelperBuffers::NUM_SCRATCH_TILES);
960
961 GpuParticleClearMat* clearMat = GpuParticleClearMat::get();
962 clearMat->bind(m->helperBuffers.tileScratch);
963
964 RenderAPI& rapi = RenderAPI::instance();
965 rapi.setVertexDeclaration(m->helperBuffers.tileVertexDecl);
966
967 SPtr<VertexBuffer> buffers[] = { m->helperBuffers.tileUVs };
968 rapi.setVertexBuffers(0, buffers, (UINT32)bs_size(buffers));
969 rapi.setIndexBuffer(m->helperBuffers.spriteIndices);
970 rapi.setDrawOperation(DOT_TRIANGLE_LIST);
971
972 UINT32 tileStart = 0;
973 for (UINT32 i = 0; i < numIterations; i++)
974 {
975 static_assert(GpuParticleHelperBuffers::NUM_SCRATCH_TILES % TILES_PER_INSTANCE == 0,
976 "Tile scratch buffer size must be divisble with number of tiles per instance.");
977
978 const UINT32 tileEnd = std::min(numTiles, tileStart + GpuParticleHelperBuffers::NUM_SCRATCH_TILES);
979
980 auto* tileUVs = (Vector2*)m->helperBuffers.tileScratch->lock(GBL_WRITE_ONLY_DISCARD);
981 for (UINT32 j = tileStart; j < tileEnd; j++)
982 tileUVs[j - tileStart] = GpuParticleResources::getTileCoords(tiles[j]);
983
984 const UINT32 alignedTileEnd = Math::divideAndRoundUp(tileEnd, TILES_PER_INSTANCE) * TILES_PER_INSTANCE;
985 for (UINT32 j = tileEnd; j < alignedTileEnd; j++)
986 tileUVs[j - tileEnd] = Vector2(2.0f, 2.0f); // Out of bounds (we don't want to accidentaly clear used tiles)
987
988 m->helperBuffers.tileScratch->unlock();
989
990 const UINT32 numInstances = (alignedTileEnd - tileStart) / TILES_PER_INSTANCE;
991 rapi.drawIndexed(0, TILES_PER_INSTANCE * 6, 0, TILES_PER_INSTANCE * 4, numInstances);
992
993 tileStart = alignedTileEnd;
994 }
995 }
996
997 void GpuParticleSimulation::injectParticles(const Vector<GpuParticle>& particles)
998 {
999 const auto numParticles = (UINT32)particles.size();
1000 const UINT32 numIterations = Math::divideAndRoundUp(numParticles, GpuParticleHelperBuffers::NUM_SCRATCH_PARTICLES);
1001
1002 GpuParticleInjectMat* injectMat = GpuParticleInjectMat::get();
1003 injectMat->bind();
1004
1005 RenderAPI& rapi = RenderAPI::instance();
1006 rapi.setVertexDeclaration(m->helperBuffers.injectVertexDecl);
1007
1008 SPtr<VertexBuffer> buffers[] = { m->helperBuffers.injectScratch, m->helperBuffers.particleUVs };
1009 rapi.setVertexBuffers(0, buffers, (UINT32)bs_size(buffers));
1010 rapi.setIndexBuffer(m->helperBuffers.spriteIndices);
1011 rapi.setDrawOperation(DOT_TRIANGLE_LIST);
1012
1013 UINT32 particleStart = 0;
1014 for (UINT32 i = 0; i < numIterations; i++)
1015 {
1016 const UINT32 particleEnd = std::min(numParticles, particleStart + GpuParticleHelperBuffers::NUM_SCRATCH_PARTICLES);
1017
1018 auto* particleData = (GpuParticleVertex*)m->helperBuffers.injectScratch->lock(GBL_WRITE_ONLY_DISCARD);
1019 for (UINT32 j = particleStart; j < particleEnd; j++)
1020 particleData[j - particleStart] = particles[j].getVertex();
1021
1022 m->helperBuffers.injectScratch->unlock();
1023
1024 rapi.drawIndexed(0, 6, 0, 4, particleEnd - particleStart);
1025 particleStart = particleEnd;
1026 }
1027 }
1028
1029 GpuParticleResources& GpuParticleSimulation::getResources() const
1030 {
1031 return m->resources;
1032 }
1033
1034 SPtr<GpuParamBlockBuffer> createGpuParticleVertexInputBuffer()
1035 {
1036 SPtr<GpuParamBlockBuffer> inputBuffer = gGpuParticleTileVertexParamsDef.createBuffer();
1037
1038 // [0, 1] -> [-1, 1] and flip Y
1039 Vector4 uvToNdc(2.0f, -2.0f, -1.0f, 1.0f);
1040
1041 const Conventions& rapiConventions = gCaps().conventions;
1042
1043 // Either of these flips the Y axis, but if they're both true they cancel out
1044 if ((rapiConventions.uvYAxis == Conventions::Axis::Up) ^ (rapiConventions.ndcYAxis == Conventions::Axis::Down))
1045 {
1046 uvToNdc.y = -uvToNdc.y;
1047 uvToNdc.w = -uvToNdc.w;
1048 }
1049
1050 gGpuParticleTileVertexParamsDef.gUVToNDC.set(inputBuffer, uvToNdc);
1051
1052 return inputBuffer;
1053 }
1054
1055 GpuParticleClearMat::GpuParticleClearMat()
1056 {
1057 const SPtr<GpuParamBlockBuffer> inputBuffer = createGpuParticleVertexInputBuffer();
1058
1059 mParams->setParamBlockBuffer(GPT_VERTEX_PROGRAM, "Input", inputBuffer);
1060 mParams->getBufferParam(GPT_VERTEX_PROGRAM, "gTileUVs", mTileUVParam);
1061 }
1062
1063 void GpuParticleClearMat::_initDefines(ShaderDefines& defines)
1064 {
1065 defines.set("TILES_PER_INSTANCE", TILES_PER_INSTANCE);
1066 }
1067
1068 void GpuParticleClearMat::bind(const SPtr<GpuBuffer>& tileUVs)
1069 {
1070 mTileUVParam.set(tileUVs);
1071
1072 RendererMaterial::bind();
1073 }
1074
1075 GpuParticleInjectMat::GpuParticleInjectMat()
1076 {
1077 const SPtr<GpuParamBlockBuffer> inputBuffer = createGpuParticleVertexInputBuffer();
1078 mParams->setParamBlockBuffer(GPT_VERTEX_PROGRAM, "Input", inputBuffer);
1079 }
1080
1081 GpuParticleCurveInjectMat::GpuParticleCurveInjectMat()
1082 {
1083 const SPtr<GpuParamBlockBuffer> inputBuffer = createGpuParticleVertexInputBuffer();
1084 mParams->setParamBlockBuffer(GPT_VERTEX_PROGRAM, "Input", inputBuffer);
1085 }
1086
1087 GpuParticleSimulateMat::GpuParticleSimulateMat()
1088 {
1089 const SPtr<GpuParamBlockBuffer> inputBuffer = createGpuParticleVertexInputBuffer();
1090 mParams->setParamBlockBuffer(GPT_VERTEX_PROGRAM, "Input", inputBuffer);
1091
1092 mParams->getParamInfo()->getBinding(
1093 GPT_FRAGMENT_PROGRAM,
1094 GpuPipelineParamInfoBase::ParamType::ParamBlock,
1095 "Params",
1096 mParamsBinding
1097 );
1098
1099 mParams->getParamInfo()->getBinding(
1100 GPT_FRAGMENT_PROGRAM,
1101 GpuPipelineParamInfoBase::ParamType::ParamBlock,
1102 "VectorFieldParams",
1103 mVectorFieldBinding
1104 );
1105
1106 mParams->getBufferParam(GPT_VERTEX_PROGRAM, "gTileUVs", mTileUVParam);
1107 mParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gPosAndTimeTex", mPosAndTimeTexParam);
1108 mParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gVelocityTex", mVelocityTexParam);
1109 mParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gVectorFieldTex", mVectorFieldTexParam);
1110
1111 mSupportsDepthCollisions = mVariation.getUInt("DEPTH_COLLISIONS") > 0;
1112 if(mSupportsDepthCollisions)
1113 {
1114 mParams->getParamInfo()->getBinding(
1115 GPT_FRAGMENT_PROGRAM,
1116 GpuPipelineParamInfoBase::ParamType::ParamBlock,
1117 "PerCamera",
1118 mPerCameraBinding
1119 );
1120
1121 mParams->getParamInfo()->getBinding(
1122 GPT_FRAGMENT_PROGRAM,
1123 GpuPipelineParamInfoBase::ParamType::ParamBlock,
1124 "PerObject",
1125 mPerObjectBinding
1126 );
1127
1128 mParams->getParamInfo()->getBinding(
1129 GPT_FRAGMENT_PROGRAM,
1130 GpuPipelineParamInfoBase::ParamType::ParamBlock,
1131 "DepthCollisionParams",
1132 mDepthCollisionBinding
1133 );
1134
1135 mParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gSizeRotationTex", mSizeRotationTexParam);
1136 mParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gCurvesTex", mCurvesTexParam);
1137 mParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gDepthTex", mDepthTexParam);
1138 mParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gNormalsTex", mNormalsTexParam);
1139 }
1140 }
1141
1142 void GpuParticleSimulateMat::_initDefines(ShaderDefines& defines)
1143 {
1144 defines.set("TILES_PER_INSTANCE", TILES_PER_INSTANCE);
1145 }
1146
1147 void GpuParticleSimulateMat::bindGlobal(GpuParticleResources& resources, const SPtr<GpuParamBlockBuffer>& viewParams,
1148 const SPtr<Texture>& depth, const SPtr<Texture>& normals, const SPtr<GpuParamBlockBuffer>& simulationParams)
1149 {
1150 GpuParticleStateTextures& prevState = resources.getPreviousState();
1151 const GpuParticleStaticTextures& staticTextures = resources.getStaticTextures();
1152 GpuParticleCurves& curveTexture = resources.getCurveTexture();
1153
1154 mParams->setParamBlockBuffer(mParamsBinding.set, mParamsBinding.slot, simulationParams);
1155
1156 mPosAndTimeTexParam.set(prevState.positionAndTimeTex);
1157 mVelocityTexParam.set(prevState.velocityTex);
1158
1159 if(mSupportsDepthCollisions)
1160 {
1161 mParams->setParamBlockBuffer(mPerCameraBinding.set, mPerCameraBinding.slot, viewParams);
1162
1163 mSizeRotationTexParam.set(staticTextures.sizeAndRotationTex);
1164 mCurvesTexParam.set(curveTexture.getTexture());
1165 mDepthTexParam.set(depth);
1166 mNormalsTexParam.set(normals);
1167 }
1168
1169 RendererMaterial::bind(false);
1170 }
1171
1172 void GpuParticleSimulateMat::bindPerCallParams(const SPtr<GpuBuffer>& tileUVs,
1173 const SPtr<GpuParamBlockBuffer>& perObjectParams, const SPtr<GpuParamBlockBuffer>& vectorFieldParams,
1174 const SPtr<Texture>& vectorFieldTexture, const SPtr<GpuParamBlockBuffer>& depthCollisionParams)
1175 {
1176 mTileUVParam.set(tileUVs);
1177 mParams->setParamBlockBuffer(mVectorFieldBinding.set, mVectorFieldBinding.slot, vectorFieldParams);
1178 mVectorFieldTexParam.set(vectorFieldTexture);
1179
1180 if(mSupportsDepthCollisions)
1181 {
1182 mParams->setParamBlockBuffer(mPerObjectBinding.set, mPerObjectBinding.slot, perObjectParams);
1183 mParams->setParamBlockBuffer(mDepthCollisionBinding.set, mDepthCollisionBinding.slot, depthCollisionParams);
1184 }
1185
1186 bindParams();
1187 }
1188
1189 GpuParticleSimulateMat* GpuParticleSimulateMat::getVariation(bool depthCollisions, bool localSpace)
1190 {
1191 if(depthCollisions)
1192 {
1193 if(localSpace)
1194 return get(getVariation<2>());
1195
1196 return get(getVariation<1>());
1197 }
1198
1199 return get(getVariation<0>());
1200 }
1201
1202 GpuParticleBoundsMat::GpuParticleBoundsMat()
1203 {
1204 mInputBuffer = gGpuParticleBoundsParamsDef.createBuffer();
1205 mParams->setParamBlockBuffer(GPT_COMPUTE_PROGRAM, "Input", mInputBuffer);
1206
1207 mParams->getBufferParam(GPT_COMPUTE_PROGRAM, "gParticleIndices", mParticleIndicesParam);
1208 mParams->getBufferParam(GPT_COMPUTE_PROGRAM, "gOutput", mOutputParam);
1209 mParams->getTextureParam(GPT_COMPUTE_PROGRAM, "gPosAndTimeTex", mPosAndTimeTexParam);
1210 }
1211
1212 void GpuParticleBoundsMat::_initDefines(ShaderDefines& defines)
1213 {
1214 defines.set("NUM_THREADS", NUM_THREADS);
1215 }
1216
1217 void GpuParticleBoundsMat::bind(const SPtr<Texture>& positionAndTime)
1218 {
1219 mPosAndTimeTexParam.set(positionAndTime);
1220
1221 RendererMaterial::bind();
1222 }
1223
1224 AABox GpuParticleBoundsMat::execute(const SPtr<GpuBuffer>& indices, UINT32 numParticles)
1225 {
1226 static constexpr UINT32 MAX_NUM_GROUPS = 128;
1227
1228 const UINT32 numIterations = Math::divideAndRoundUp(numParticles, NUM_THREADS);
1229 const UINT32 numGroups = std::min(numIterations, MAX_NUM_GROUPS);
1230
1231 const UINT32 iterationsPerGroup = numIterations / numGroups;
1232 const UINT32 extraIterations = numIterations % numGroups;
1233
1234 gGpuParticleBoundsParamsDef.gIterationsPerGroup.set(mInputBuffer, iterationsPerGroup);
1235 gGpuParticleBoundsParamsDef.gNumExtraIterations.set(mInputBuffer, extraIterations);
1236 gGpuParticleBoundsParamsDef.gNumParticles.set(mInputBuffer, numParticles);
1237
1238 GPU_BUFFER_DESC outputDesc;
1239 outputDesc.type = GBT_STANDARD;
1240 outputDesc.format = BF_32X2U;
1241 outputDesc.elementCount = numGroups * 2;
1242 outputDesc.usage = GBU_DYNAMIC;
1243
1244 SPtr<GpuBuffer> output = GpuBuffer::create(outputDesc);
1245
1246 mParticleIndicesParam.set(indices);
1247 mOutputParam.set(output);
1248
1249 RenderAPI::instance().dispatchCompute(numGroups);
1250
1251 Vector3 min = Vector3::INF;
1252 Vector3 max = -Vector3::INF;
1253
1254 const Vector3* data = (Vector3*)output->lock(GBL_READ_ONLY);
1255 for(UINT32 i = 0; i < numGroups; i++)
1256 {
1257 min = Vector3::min(min, data[i * 2 + 0]);
1258 max = Vector3::min(max, data[i * 2 + 1]);
1259 }
1260
1261 output->unlock();
1262
1263 return AABox(min, max);
1264 }
1265
1266 GpuParticleSortPrepareMat::GpuParticleSortPrepareMat()
1267 {
1268 mInputBuffer = gGpuParticleSortPrepareParamDef.createBuffer();
1269 mParams->setParamBlockBuffer(GPT_COMPUTE_PROGRAM, "Input", mInputBuffer);
1270
1271 mParams->getBufferParam(GPT_COMPUTE_PROGRAM, "gInputIndices", mInputIndicesParam);
1272 mParams->getBufferParam(GPT_COMPUTE_PROGRAM, "gOutputKeys", mOutputKeysParam);
1273 mParams->getBufferParam(GPT_COMPUTE_PROGRAM, "gOutputIndices", mOutputIndicesParam);
1274 mParams->getTextureParam(GPT_COMPUTE_PROGRAM, "gPosAndTimeTex", mPosAndTimeTexParam);
1275 }
1276
1277 void GpuParticleSortPrepareMat::_initDefines(ShaderDefines& defines)
1278 {
1279 defines.set("NUM_THREADS", NUM_THREADS);
1280 }
1281
1282 void GpuParticleSortPrepareMat::bind(const SPtr<Texture>& positionAndTime)
1283 {
1284 mPosAndTimeTexParam.set(positionAndTime);
1285
1286 RendererMaterial::bind(false);
1287 }
1288
1289 UINT32 GpuParticleSortPrepareMat::execute(const GpuParticleSystem& system, UINT32 systemIdx, const Vector3& viewOrigin,
1290 UINT32 offset, const SPtr<GpuBuffer>& outKeys, const SPtr<GpuBuffer>& outIndices)
1291 {
1292 static constexpr UINT32 MAX_NUM_GROUPS = 128;
1293
1294 assert(systemIdx < std::pow(2, 16));
1295
1296 const UINT32 numParticles = system.getNumTiles() * GpuParticleResources::PARTICLES_PER_TILE;
1297
1298 const UINT32 numIterations = Math::divideAndRoundUp(numParticles, NUM_THREADS);
1299 const UINT32 numGroups = std::min(numIterations, MAX_NUM_GROUPS);
1300
1301 const UINT32 iterationsPerGroup = numIterations / numGroups;
1302 const UINT32 extraIterations = numIterations % numGroups;
1303
1304 Vector3 localViewOrigin;
1305 ParticleSystem* parentSystem = system.getParent();
1306 if(parentSystem->getSettings().simulationSpace == ParticleSimulationSpace::Local)
1307 {
1308 const Matrix4& worldToLocal = parentSystem->getTransform().getInvMatrix();
1309 localViewOrigin = worldToLocal.multiplyAffine(viewOrigin);
1310 }
1311 else
1312 localViewOrigin = viewOrigin;
1313
1314 gGpuParticleSortPrepareParamDef.gIterationsPerGroup.set(mInputBuffer, iterationsPerGroup);
1315 gGpuParticleSortPrepareParamDef.gNumExtraIterations.set(mInputBuffer, extraIterations);
1316 gGpuParticleSortPrepareParamDef.gNumParticles.set(mInputBuffer, numParticles);
1317 gGpuParticleSortPrepareParamDef.gOutputOffset.set(mInputBuffer, offset);
1318 gGpuParticleSortPrepareParamDef.gSystemKey.set(mInputBuffer, systemIdx << 16);
1319 gGpuParticleSortPrepareParamDef.gLocalViewOrigin.set(mInputBuffer, localViewOrigin);
1320
1321 mInputIndicesParam.set(system.getParticleIndices());
1322 mOutputKeysParam.set(outKeys);
1323 mOutputIndicesParam.set(outIndices);
1324
1325 bindParams();
1326 RenderAPI::instance().dispatchCompute(numGroups);
1327 return numParticles;
1328 }
1329
1330 struct GpuParticleCurveInject
1331 {
1332 Color color;
1333 Vector2 dataUV;
1334 };
1335
1336 GpuParticleCurves::GpuParticleCurves()
1337 {
1338 TEXTURE_DESC textureDesc;
1339 textureDesc.format = PF_RGBA16F;
1340 textureDesc.width = TEX_SIZE;
1341 textureDesc.height = TEX_SIZE;
1342 textureDesc.usage = TU_RENDERTARGET;
1343
1344 mCurveTexture = Texture::create(textureDesc);
1345
1346 RENDER_TEXTURE_DESC rtDesc;
1347 rtDesc.colorSurfaces[0].texture = mCurveTexture;
1348
1349 mRT = RenderTexture::create(rtDesc);
1350
1351 // Prepare vertex declaration for injecting new curves
1352 SPtr<VertexDataDesc> injectVertexDesc = bs_shared_ptr_new<VertexDataDesc>();
1353 injectVertexDesc->addVertElem(VET_FLOAT4, VES_TEXCOORD, 0, 0, 1); // Color, per instance
1354 injectVertexDesc->addVertElem(VET_FLOAT2, VES_TEXCOORD, 1, 0, 1); // Data UV, per instance
1355 injectVertexDesc->addVertElem(VET_FLOAT2, VES_TEXCOORD, 2, 1); // Pixel texture coordinates
1356
1357 mInjectVertexDecl = VertexDeclaration::create(injectVertexDesc);
1358
1359 // Prepare UV coordinates for injecting curves
1360 VERTEX_BUFFER_DESC injectUVBufferDesc;
1361 injectUVBufferDesc.numVerts = 4;
1362 injectUVBufferDesc.vertexSize = injectVertexDesc->getVertexStride(1);
1363
1364 mInjectUV = VertexBuffer::create(injectUVBufferDesc);
1365
1366 auto* const tileUVData = (Vector2*)mInjectUV->lock(GBL_WRITE_ONLY_DISCARD);
1367 const float tileUVScale = 1.0f / (float)TEX_SIZE;
1368 tileUVData[0] = Vector2(0.0f, 0.0f) * tileUVScale;
1369 tileUVData[1] = Vector2(1.0f, 0.0f) * tileUVScale;
1370 tileUVData[2] = Vector2(1.0f, 1.0f) * tileUVScale;
1371 tileUVData[3] = Vector2(0.0f, 1.0f) * tileUVScale;
1372
1373 mInjectUV->unlock();
1374
1375 // Prepare indices for injecting curves
1376 INDEX_BUFFER_DESC injectIndexBufferDesc;
1377 injectIndexBufferDesc.indexType = IT_16BIT;
1378 injectIndexBufferDesc.numIndices = 6;
1379
1380 mInjectIndices = IndexBuffer::create(injectIndexBufferDesc);
1381
1382 const Conventions& rapiConventions = gCaps().conventions;
1383
1384 auto* const indices = (UINT16*)mInjectIndices->lock(GBL_WRITE_ONLY_DISCARD);
1385
1386 // If UV is flipped, then our tile will be upside down so we need to change index order so it doesn't
1387 // get culled.
1388 if (rapiConventions.uvYAxis == Conventions::Axis::Up)
1389 {
1390 indices[0] = 2; indices[1] = 1; indices[2] = 0;
1391 indices[3] = 3; indices[4] = 2; indices[5] = 0;
1392 }
1393 else
1394 {
1395 indices[0] = 0; indices[1] = 1; indices[2] = 2;
1396 indices[3] = 0; indices[4] = 2; indices[5] = 3;
1397 }
1398
1399 mInjectIndices->unlock();
1400
1401 // Prepare a scratch buffer we'll use to inject new curves
1402 VERTEX_BUFFER_DESC injectScratchBufferDesc;
1403 injectScratchBufferDesc.numVerts = SCRATCH_NUM_VERTICES;
1404 injectScratchBufferDesc.vertexSize = injectVertexDesc->getVertexStride(0);
1405 injectScratchBufferDesc.usage = GBU_DYNAMIC;
1406
1407 mInjectScratch = VertexBuffer::create(injectScratchBufferDesc);
1408 }
1409
1410 GpuParticleCurves::~GpuParticleCurves()
1411 {
1412 for(auto& entry : mPendingAllocations)
1413 mPendingAllocator.free(entry.pixels);
1414
1415 mPendingAllocator.clear();
1416 }
1417
1418 TextureRowAllocation GpuParticleCurves::alloc(Color* pixels, uint32_t count)
1419 {
1420 PendingAllocation pendingAlloc;
1421 pendingAlloc.allocation = mRowAllocator.alloc(count);
1422
1423 if(pendingAlloc.allocation.length == 0)
1424 return pendingAlloc.allocation;
1425
1426 pendingAlloc.pixels = (Color*)mPendingAllocator.alloc(sizeof(Color) * count);
1427 memcpy(pendingAlloc.pixels, pixels, sizeof(Color) * count);
1428
1429 mPendingAllocations.push_back(pendingAlloc);
1430 return pendingAlloc.allocation;
1431 }
1432
1433 void GpuParticleCurves::free(const TextureRowAllocation& alloc)
1434 {
1435 mRowAllocator.free(alloc);
1436 }
1437
1438 void GpuParticleCurves::applyChanges()
1439 {
1440 const auto numCurves = (UINT32)mPendingAllocations.size();
1441 if(numCurves == 0)
1442 return;
1443
1444 GpuParticleCurveInjectMat* injectMat = GpuParticleCurveInjectMat::get();
1445 injectMat->bind();
1446
1447 RenderAPI& rapi = RenderAPI::instance();
1448 rapi.setRenderTarget(mRT);
1449 rapi.setVertexDeclaration(mInjectVertexDecl);
1450
1451 SPtr<VertexBuffer> buffers[] = { mInjectScratch, mInjectUV };
1452 rapi.setVertexBuffers(0, buffers, (UINT32)bs_size(buffers));
1453 rapi.setIndexBuffer(mInjectIndices);
1454 rapi.setDrawOperation(DOT_TRIANGLE_LIST);
1455
1456 UINT32 curveIdx = 0;
1457
1458 auto* data = (GpuParticleCurveInject*)mInjectScratch->lock(GBL_WRITE_ONLY_DISCARD);
1459 while(curveIdx < numCurves)
1460 {
1461 UINT32 count = 0;
1462 for(; curveIdx < numCurves; curveIdx++)
1463 {
1464 const PendingAllocation& pendingAlloc = mPendingAllocations[curveIdx];
1465
1466 const UINT32 entryCount = pendingAlloc.allocation.length;
1467 if((count + entryCount) > SCRATCH_NUM_VERTICES)
1468 break;
1469
1470 for(UINT32 i = 0; i < entryCount; i++)
1471 {
1472 data[count].color = pendingAlloc.pixels[i];
1473 data[count].dataUV = Vector2(
1474 (pendingAlloc.allocation.x + i) / (float)TEX_SIZE,
1475 pendingAlloc.allocation.y / (float)TEX_SIZE);
1476
1477 count++;
1478 }
1479 }
1480
1481 mInjectScratch->unlock();
1482 rapi.drawIndexed(0, 6, 0, 4, count);
1483
1484 data = (GpuParticleCurveInject*)mInjectScratch->lock(GBL_WRITE_ONLY_DISCARD);
1485 }
1486
1487 mInjectScratch->unlock();
1488
1489 for(auto& entry : mPendingAllocations)
1490 mPendingAllocator.free(entry.pixels);
1491
1492 mPendingAllocations.clear();
1493 mPendingAllocator.clear();
1494 }
1495
1496 Vector2 GpuParticleCurves::getUVOffset(const TextureRowAllocation& alloc)
1497 {
1498 return Vector2(
1499 ((float)alloc.x + 0.5f) / TEX_SIZE,
1500 ((float)alloc.y + 0.5f) / TEX_SIZE
1501 );
1502 }
1503
1504 float GpuParticleCurves::getUVScale(const TextureRowAllocation& alloc)
1505 {
1506 if(alloc.length == 0)
1507 return 0.0f;
1508
1509 return (alloc.length - 1) / (float)TEX_SIZE;
1510 }
1511}}
1512