| 1 | //************************************ bs::framework - Copyright 2018 Marko Pintera **************************************// |
| 2 | //*********** Licensed under the MIT license. See LICENSE.md for full terms. This notice is not to be removed. ***********// |
| 3 | #include "BsGpuParticleSimulation.h" |
| 4 | #include "Renderer/BsParamBlocks.h" |
| 5 | #include "Renderer/BsRendererMaterial.h" |
| 6 | #include "Renderer/BsGpuResourcePool.h" |
| 7 | #include "RenderAPI/BsVertexBuffer.h" |
| 8 | #include "RenderAPI/BsIndexBuffer.h" |
| 9 | #include "RenderAPI/BsGpuBuffer.h" |
| 10 | #include "RenderAPI/BsVertexDataDesc.h" |
| 11 | #include "RenderAPI/BsGpuPipelineParamInfo.h" |
| 12 | #include "Particles/BsVectorField.h" |
| 13 | #include "Particles/BsParticleDistribution.h" |
| 14 | #include "Math/BsVector3.h" |
| 15 | #include "BsRendererParticles.h" |
| 16 | #include "BsRendererScene.h" |
| 17 | #include "BsRenderBeast.h" |
| 18 | #include "Utility/BsGpuSort.h" |
| 19 | |
| 20 | namespace bs { namespace ct |
| 21 | { |
| 22 | BS_PARAM_BLOCK_BEGIN(GpuParticleTileVertexParamsDef) |
| 23 | BS_PARAM_BLOCK_ENTRY(Vector4, gUVToNDC) |
| 24 | BS_PARAM_BLOCK_END |
| 25 | |
| 26 | GpuParticleTileVertexParamsDef gGpuParticleTileVertexParamsDef; |
| 27 | |
| 28 | /** Material used for clearing tiles in the texture used for particle GPU simulation. */ |
| 29 | class GpuParticleClearMat : public RendererMaterial<GpuParticleClearMat> |
| 30 | { |
| 31 | RMAT_DEF_CUSTOMIZED("GpuParticleClear.bsl" ); |
| 32 | |
| 33 | public: |
| 34 | GpuParticleClearMat(); |
| 35 | |
| 36 | /** Binds the material to the pipeline, along with the @p tileUVs buffer containing locations of tiles to clear. */ |
| 37 | void bind(const SPtr<GpuBuffer>& tileUVs); |
| 38 | |
| 39 | private: |
| 40 | GpuParamBuffer mTileUVParam; |
| 41 | }; |
| 42 | |
| 43 | /** Material used for adding new particles into the particle state textures. */ |
| 44 | class GpuParticleInjectMat : public RendererMaterial<GpuParticleInjectMat> |
| 45 | { |
| 46 | RMAT_DEF("GpuParticleInject.bsl" ); |
| 47 | |
| 48 | public: |
| 49 | GpuParticleInjectMat(); |
| 50 | }; |
| 51 | |
| 52 | /** Material used for adding new curves into the curve texture. */ |
| 53 | class GpuParticleCurveInjectMat : public RendererMaterial<GpuParticleCurveInjectMat> |
| 54 | { |
| 55 | RMAT_DEF("GpuParticleCurveInject.bsl" ); |
| 56 | |
| 57 | public: |
| 58 | GpuParticleCurveInjectMat(); |
| 59 | }; |
| 60 | |
| 61 | BS_PARAM_BLOCK_BEGIN(VectorFieldParamsDef) |
| 62 | BS_PARAM_BLOCK_ENTRY(Vector3, gFieldBounds) |
| 63 | BS_PARAM_BLOCK_ENTRY(float, gFieldIntensity) |
| 64 | BS_PARAM_BLOCK_ENTRY(Vector3, gFieldTiling) |
| 65 | BS_PARAM_BLOCK_ENTRY(float, gFieldTightness) |
| 66 | BS_PARAM_BLOCK_ENTRY(Matrix4, gWorldToField) |
| 67 | BS_PARAM_BLOCK_ENTRY(Matrix3, gFieldToWorld) |
| 68 | BS_PARAM_BLOCK_END |
| 69 | |
| 70 | VectorFieldParamsDef gVectorFieldParamsDef; |
| 71 | |
| 72 | BS_PARAM_BLOCK_BEGIN(GpuParticleDepthCollisionParamsDef) |
| 73 | BS_PARAM_BLOCK_ENTRY(float, gCollisionRange) |
| 74 | BS_PARAM_BLOCK_ENTRY(float, gRestitution) |
| 75 | BS_PARAM_BLOCK_ENTRY(float, gDampening) |
| 76 | BS_PARAM_BLOCK_ENTRY(float, gCollisionRadiusScale) |
| 77 | BS_PARAM_BLOCK_ENTRY(Vector2, gSizeScaleCurveOffset) |
| 78 | BS_PARAM_BLOCK_ENTRY(Vector2, gSizeScaleCurveScale) |
| 79 | BS_PARAM_BLOCK_END |
| 80 | |
| 81 | GpuParticleDepthCollisionParamsDef gGpuParticleDepthCollisionParamsDef; |
| 82 | |
| 83 | BS_PARAM_BLOCK_BEGIN(GpuParticleSimulateParamsDef) |
| 84 | BS_PARAM_BLOCK_ENTRY(INT32, gNumVectorFields) |
| 85 | BS_PARAM_BLOCK_ENTRY(INT32, gNumIterations) |
| 86 | BS_PARAM_BLOCK_ENTRY(float, gDT) |
| 87 | BS_PARAM_BLOCK_ENTRY(float, gDrag) |
| 88 | BS_PARAM_BLOCK_ENTRY(Vector3, gAcceleration) |
| 89 | BS_PARAM_BLOCK_END |
| 90 | |
| 91 | GpuParticleSimulateParamsDef gGpuParticleSimulateParamsDef; |
| 92 | |
| 93 | /** |
| 94 | * Material used for performing GPU particle simulation. State is read from the provided input textures and output |
| 95 | * into the output textures bound as render targets. |
| 96 | */ |
| 97 | class GpuParticleSimulateMat : public RendererMaterial<GpuParticleSimulateMat> |
| 98 | { |
| 99 | RMAT_DEF_CUSTOMIZED("GpuParticleSimulate.bsl" ); |
| 100 | |
| 101 | /** Helper method used for initializing variations of this material. */ |
| 102 | template<UINT32 DEPTH_COLLISIONS> |
| 103 | static const ShaderVariation& getVariation() |
| 104 | { |
| 105 | static ShaderVariation variation = ShaderVariation( |
| 106 | { |
| 107 | ShaderVariation::Param("DEPTH_COLLISIONS" , DEPTH_COLLISIONS) |
| 108 | }); |
| 109 | |
| 110 | return variation; |
| 111 | } |
| 112 | public: |
| 113 | GpuParticleSimulateMat(); |
| 114 | |
| 115 | /** Binds the material to the pipeline along with any frame-static parameters. */ |
| 116 | void bindGlobal(GpuParticleResources& resources, const SPtr<GpuParamBlockBuffer>& viewParams, |
| 117 | const SPtr<Texture>& depth, const SPtr<Texture>& normals, const SPtr<GpuParamBlockBuffer>& simulationParams); |
| 118 | |
| 119 | /** |
| 120 | * Binds parameters that change with every material dispatch. |
| 121 | * |
| 122 | * @param[in] tileUVs Sets the UV offsets of individual tiles for a particular particle system |
| 123 | * that's being rendered. |
| 124 | * @param[in] perObjectParams General purpose particle system parameters. |
| 125 | * @param[in] vectorFieldParams Information about the currently bound vector field, if any. |
| 126 | * @param[in] vectorFieldTexture 3D texture representing the vector field, or null if none. |
| 127 | * @param[in] depthCollisionParams Parameter buffer for controlling depth buffer collisions, if enabled. |
| 128 | * |
| 129 | */ |
| 130 | void bindPerCallParams(const SPtr<GpuBuffer>& tileUVs, const SPtr<GpuParamBlockBuffer>& perObjectParams, |
| 131 | const SPtr<GpuParamBlockBuffer>& vectorFieldParams, const SPtr<Texture>& vectorFieldTexture, |
| 132 | const SPtr<GpuParamBlockBuffer>& depthCollisionParams); |
| 133 | |
| 134 | /** Returns the material variation matching the provided parameters. */ |
| 135 | static GpuParticleSimulateMat* getVariation(bool depthCollisions, bool localSpace); |
| 136 | private: |
| 137 | GpuParamBuffer mTileUVParam; |
| 138 | GpuParamTexture mPosAndTimeTexParam; |
| 139 | GpuParamTexture mVelocityTexParam; |
| 140 | GpuParamTexture mSizeRotationTexParam; |
| 141 | GpuParamTexture mCurvesTexParam; |
| 142 | GpuParamTexture mDepthTexParam; |
| 143 | GpuParamTexture mNormalsTexParam; |
| 144 | GpuParamBinding mParamsBinding; |
| 145 | GpuParamBinding mPerCameraBinding; |
| 146 | GpuParamBinding mPerObjectBinding; |
| 147 | |
| 148 | GpuParamBinding mVectorFieldBinding; |
| 149 | GpuParamTexture mVectorFieldTexParam; |
| 150 | |
| 151 | GpuParamBinding mDepthCollisionBinding; |
| 152 | |
| 153 | bool mSupportsDepthCollisions; |
| 154 | }; |
| 155 | |
| 156 | BS_PARAM_BLOCK_BEGIN(GpuParticleBoundsParamsDef) |
| 157 | BS_PARAM_BLOCK_ENTRY(UINT32, gIterationsPerGroup) |
| 158 | BS_PARAM_BLOCK_ENTRY(UINT32, gNumExtraIterations) |
| 159 | BS_PARAM_BLOCK_ENTRY(UINT32, gNumParticles) |
| 160 | BS_PARAM_BLOCK_END |
| 161 | |
| 162 | GpuParticleBoundsParamsDef gGpuParticleBoundsParamsDef; |
| 163 | |
| 164 | /** Material used for calculating particle system bounds. */ |
| 165 | class GpuParticleBoundsMat : public RendererMaterial<GpuParticleBoundsMat> |
| 166 | { |
| 167 | static constexpr UINT32 NUM_THREADS = 64; |
| 168 | |
| 169 | RMAT_DEF_CUSTOMIZED("GpuParticleBounds.bsl" ); |
| 170 | |
| 171 | public: |
| 172 | GpuParticleBoundsMat(); |
| 173 | |
| 174 | /** Binds the material to the pipeline along with the global input texture containing particle positions and times. */ |
| 175 | void bind(const SPtr<Texture>& positionAndTime); |
| 176 | |
| 177 | /** |
| 178 | * Executes the material, calculating the bounds. Note that this function reads back from the GPU and should not |
| 179 | * be called at runtime. |
| 180 | * |
| 181 | * @param[in] indices Buffer containing offsets into the position texture for each particle. |
| 182 | * @param[in] numParticles Number of particle in the provided indices buffer. |
| 183 | */ |
| 184 | AABox execute(const SPtr<GpuBuffer>& indices, UINT32 numParticles); |
| 185 | |
| 186 | private: |
| 187 | GpuParamBuffer mParticleIndicesParam; |
| 188 | GpuParamBuffer mOutputParam; |
| 189 | GpuParamTexture mPosAndTimeTexParam; |
| 190 | SPtr<GpuParamBlockBuffer> mInputBuffer; |
| 191 | }; |
| 192 | |
| 193 | BS_PARAM_BLOCK_BEGIN(GpuParticleSortPrepareParamDef) |
| 194 | BS_PARAM_BLOCK_ENTRY(INT32, gIterationsPerGroup) |
| 195 | BS_PARAM_BLOCK_ENTRY(INT32, gNumExtraIterations) |
| 196 | BS_PARAM_BLOCK_ENTRY(INT32, gNumParticles) |
| 197 | BS_PARAM_BLOCK_ENTRY(INT32, gOutputOffset) |
| 198 | BS_PARAM_BLOCK_ENTRY(INT32, gSystemKey) |
| 199 | BS_PARAM_BLOCK_ENTRY(Vector3, gLocalViewOrigin) |
| 200 | BS_PARAM_BLOCK_END |
| 201 | |
| 202 | GpuParticleSortPrepareParamDef gGpuParticleSortPrepareParamDef; |
| 203 | |
| 204 | /** Material used for preparing key/values buffers used for particle sorting. */ |
| 205 | class GpuParticleSortPrepareMat : public RendererMaterial<GpuParticleSortPrepareMat> |
| 206 | { |
| 207 | static constexpr UINT32 NUM_THREADS = 64; |
| 208 | |
| 209 | RMAT_DEF_CUSTOMIZED("GpuParticleSortPrepare.bsl" ); |
| 210 | |
| 211 | public: |
| 212 | GpuParticleSortPrepareMat(); |
| 213 | |
| 214 | /** Binds the material to the pipeline along with the global input texture containing particle positions and times. */ |
| 215 | void bind(const SPtr<Texture>& positionAndTime); |
| 216 | |
| 217 | /** |
| 218 | * Executes the material, generating sort data for a particular particle system and injecting it into the specified |
| 219 | * location in the key and index buffers. |
| 220 | * |
| 221 | * @param[in] system System whose particles to insert into the sort key/index buffers. |
| 222 | * @param[in] systemIdx Sequential index of the system to insert into the sort buffers. |
| 223 | * @param[in] offset Offset into the key/index buffer at which to insert the sort data. |
| 224 | * @param[in] viewOrigin View origin to use for determining sorting keys, in world space. |
| 225 | * @param[out] outKeys Pre-allocated buffer that will receive the keys used for sorting. The buffer must |
| 226 | * be GPU writable and use a 1x 32-bit integer format. |
| 227 | * @param[out] outIndices Pre-allocated buffer that will receive the indices to be sorted. The buffer must |
| 228 | * be GPU writable and use a 2x 16-bit integer format. Must have the same capacity |
| 229 | * as @p outKeys. |
| 230 | * @return Number of particle that were written to the buffers. |
| 231 | */ |
| 232 | UINT32 execute(const GpuParticleSystem& system, UINT32 systemIdx, const Vector3& viewOrigin, UINT32 offset, |
| 233 | const SPtr<GpuBuffer>& outKeys, const SPtr<GpuBuffer>& outIndices); |
| 234 | |
| 235 | private: |
| 236 | GpuParamBuffer mInputIndicesParam; |
| 237 | GpuParamBuffer mOutputKeysParam; |
| 238 | GpuParamBuffer mOutputIndicesParam; |
| 239 | GpuParamTexture mPosAndTimeTexParam; |
| 240 | SPtr<GpuParamBlockBuffer> mInputBuffer; |
| 241 | }; |
| 242 | |
| 243 | static constexpr UINT32 TILES_PER_INSTANCE = 8; |
| 244 | static constexpr UINT32 PARTICLES_PER_INSTANCE = TILES_PER_INSTANCE * GpuParticleResources::PARTICLES_PER_TILE; |
| 245 | |
| 246 | /** Contains a variety of helper buffers and declarations used for GPU particle simulation. */ |
| 247 | struct GpuParticleHelperBuffers |
| 248 | { |
| 249 | static constexpr UINT32 NUM_SCRATCH_TILES = 512; |
| 250 | static constexpr UINT32 NUM_SCRATCH_PARTICLES = 4096; |
| 251 | |
| 252 | GpuParticleHelperBuffers(); |
| 253 | |
| 254 | SPtr<VertexBuffer> tileUVs; |
| 255 | SPtr<VertexBuffer> particleUVs; |
| 256 | SPtr<IndexBuffer> spriteIndices; |
| 257 | SPtr<VertexDeclaration> tileVertexDecl; |
| 258 | SPtr<VertexDeclaration> injectVertexDecl; |
| 259 | SPtr<GpuBuffer> tileScratch; |
| 260 | SPtr<VertexBuffer> injectScratch; |
| 261 | }; |
| 262 | |
| 263 | GpuParticleResources::GpuParticleResources() |
| 264 | { |
| 265 | // Allocate textures |
| 266 | TEXTURE_DESC positionAndTimeDesc; |
| 267 | positionAndTimeDesc.format = PF_RGBA32F; |
| 268 | positionAndTimeDesc.width = TEX_SIZE; |
| 269 | positionAndTimeDesc.height = TEX_SIZE; |
| 270 | positionAndTimeDesc.usage = TU_RENDERTARGET; |
| 271 | |
| 272 | TEXTURE_DESC velocityDesc; |
| 273 | velocityDesc.format = PF_RGBA16F; |
| 274 | velocityDesc.width = TEX_SIZE; |
| 275 | velocityDesc.height = TEX_SIZE; |
| 276 | velocityDesc.usage = TU_RENDERTARGET; |
| 277 | |
| 278 | for (UINT32 i = 0; i < 2; i++) |
| 279 | { |
| 280 | mStateTextures[i].positionAndTimeTex = Texture::create(positionAndTimeDesc); |
| 281 | mStateTextures[i].velocityTex = Texture::create(velocityDesc); |
| 282 | } |
| 283 | |
| 284 | TEXTURE_DESC sizeAndRotationDesc; |
| 285 | sizeAndRotationDesc.format = PF_RGBA16F; |
| 286 | sizeAndRotationDesc.width = TEX_SIZE; |
| 287 | sizeAndRotationDesc.height = TEX_SIZE; |
| 288 | sizeAndRotationDesc.usage = TU_RENDERTARGET; |
| 289 | |
| 290 | mStaticTextures.sizeAndRotationTex = Texture::create(sizeAndRotationDesc); |
| 291 | |
| 292 | RENDER_TEXTURE_DESC staticRtDesc; |
| 293 | staticRtDesc.colorSurfaces[0].texture = mStaticTextures.sizeAndRotationTex; |
| 294 | |
| 295 | for (UINT32 i = 0; i < 2; i++) |
| 296 | { |
| 297 | RENDER_TEXTURE_DESC simulationRTDesc; |
| 298 | simulationRTDesc.colorSurfaces[0].texture = mStateTextures[i].positionAndTimeTex; |
| 299 | simulationRTDesc.colorSurfaces[1].texture = mStateTextures[i].velocityTex; |
| 300 | |
| 301 | mSimulateRT[i] = RenderTexture::create(simulationRTDesc); |
| 302 | |
| 303 | RENDER_TEXTURE_DESC injectRTDesc; |
| 304 | injectRTDesc.colorSurfaces[0].texture = mStateTextures[i].positionAndTimeTex; |
| 305 | injectRTDesc.colorSurfaces[1].texture = mStateTextures[i].velocityTex; |
| 306 | injectRTDesc.colorSurfaces[2].texture = mStaticTextures.sizeAndRotationTex; |
| 307 | mInjectRT[i] = RenderTexture::create(injectRTDesc); |
| 308 | } |
| 309 | |
| 310 | // Allocate the buffer containing keys used for sorting |
| 311 | GPU_BUFFER_DESC sortKeysBufferDesc; |
| 312 | sortKeysBufferDesc.type = GBT_STANDARD; |
| 313 | sortKeysBufferDesc.format = BF_32X1U; |
| 314 | sortKeysBufferDesc.elementCount = TEX_SIZE * TEX_SIZE; |
| 315 | sortKeysBufferDesc.usage = GBU_LOADSTORE; |
| 316 | |
| 317 | mSortBuffers.keys[0] = GpuBuffer::create(sortKeysBufferDesc); |
| 318 | mSortBuffers.keys[1] = GpuBuffer::create(sortKeysBufferDesc); |
| 319 | |
| 320 | // Allocate the buffer containing sorted particle indices |
| 321 | GPU_BUFFER_DESC sortedIndicesBufferDesc; |
| 322 | sortedIndicesBufferDesc.type = GBT_STANDARD; |
| 323 | sortedIndicesBufferDesc.format = BF_16X2U; |
| 324 | sortedIndicesBufferDesc.elementCount = TEX_SIZE * TEX_SIZE; |
| 325 | sortedIndicesBufferDesc.usage = GBU_LOADSTORE; |
| 326 | |
| 327 | mSortedIndices[0] = GpuBuffer::create(sortedIndicesBufferDesc); |
| 328 | mSortedIndices[1] = GpuBuffer::create(sortedIndicesBufferDesc); |
| 329 | |
| 330 | mSortBuffers.values[0] = mSortedIndices[0]->getView(GBT_STANDARD, BF_32X1U); |
| 331 | mSortBuffers.values[1] = mSortedIndices[1]->getView(GBT_STANDARD, BF_32X1U); |
| 332 | |
| 333 | // Clear the free tile linked list |
| 334 | for (UINT32 i = 0; i < TILE_COUNT; i++) |
| 335 | mFreeTiles[i] = TILE_COUNT - i - 1; |
| 336 | } |
| 337 | |
| 338 | UINT32 GpuParticleResources::allocTile() |
| 339 | { |
| 340 | if (mNumFreeTiles > 0) |
| 341 | { |
| 342 | mNumFreeTiles--; |
| 343 | return mFreeTiles[mNumFreeTiles]; |
| 344 | } |
| 345 | |
| 346 | return (UINT32)-1; |
| 347 | } |
| 348 | |
| 349 | void GpuParticleResources::freeTile(UINT32 tile) |
| 350 | { |
| 351 | assert(tile < TILE_COUNT); |
| 352 | assert(mNumFreeTiles < TILE_COUNT); |
| 353 | |
| 354 | mFreeTiles[mNumFreeTiles] = tile; |
| 355 | mNumFreeTiles++; |
| 356 | } |
| 357 | |
| 358 | Vector2I GpuParticleResources::getTileOffset(UINT32 tileId) |
| 359 | { |
| 360 | return Vector2I( |
| 361 | (tileId % TILE_COUNT_1D) * TILE_SIZE, |
| 362 | (tileId / TILE_COUNT_1D) * TILE_SIZE); |
| 363 | } |
| 364 | |
| 365 | Vector2 GpuParticleResources::getTileCoords(UINT32 tileId) |
| 366 | { |
| 367 | return Vector2( |
| 368 | Math::frac(tileId / (float)TILE_COUNT_1D), |
| 369 | (UINT32)(tileId / TILE_COUNT_1D) / (float)TILE_COUNT_1D); |
| 370 | } |
| 371 | |
| 372 | Vector2I GpuParticleResources::getParticleOffset(UINT32 subTileId) |
| 373 | { |
| 374 | return Vector2I( |
| 375 | subTileId % TILE_SIZE, |
| 376 | subTileId / TILE_SIZE); |
| 377 | } |
| 378 | |
| 379 | Vector2 GpuParticleResources::getParticleCoords(UINT32 subTileId) |
| 380 | { |
| 381 | const Vector2I tileOffset = getParticleOffset(subTileId); |
| 382 | return tileOffset / (float)TEX_SIZE; |
| 383 | } |
| 384 | |
| 385 | const SPtr<GpuBuffer>& GpuParticleResources::getSortedIndices() const |
| 386 | { |
| 387 | return mSortedIndices[mSortedIndicesBufferIdx]; |
| 388 | } |
| 389 | |
| 390 | GpuParticleHelperBuffers::GpuParticleHelperBuffers() |
| 391 | { |
| 392 | // Prepare vertex declaration for rendering tiles |
| 393 | SPtr<VertexDataDesc> tileVertexDesc = bs_shared_ptr_new<VertexDataDesc>(); |
| 394 | tileVertexDesc->addVertElem(VET_FLOAT2, VES_TEXCOORD); |
| 395 | |
| 396 | tileVertexDecl = VertexDeclaration::create(tileVertexDesc); |
| 397 | |
| 398 | // Prepare vertex declaration for injecting new particles |
| 399 | SPtr<VertexDataDesc> injectVertexDesc = bs_shared_ptr_new<VertexDataDesc>(); |
| 400 | injectVertexDesc->addVertElem(VET_FLOAT4, VES_TEXCOORD, 0, 0, 1); // Position & time, per instance |
| 401 | injectVertexDesc->addVertElem(VET_FLOAT4, VES_TEXCOORD, 1, 0, 1); // Velocity, per instance |
| 402 | injectVertexDesc->addVertElem(VET_FLOAT2, VES_TEXCOORD, 2, 0, 1); // Size, per instance |
| 403 | injectVertexDesc->addVertElem(VET_FLOAT1, VES_TEXCOORD, 3, 0, 1); // Rotation, per instance |
| 404 | injectVertexDesc->addVertElem(VET_FLOAT2, VES_TEXCOORD, 4, 0, 1); // Data UV, per instance |
| 405 | injectVertexDesc->addVertElem(VET_FLOAT2, VES_TEXCOORD, 5, 1); // Sprite texture coordinates |
| 406 | |
| 407 | injectVertexDecl = VertexDeclaration::create(injectVertexDesc); |
| 408 | |
| 409 | // Prepare UV coordinates for rendering tiles |
| 410 | VERTEX_BUFFER_DESC tileUVBufferDesc; |
| 411 | tileUVBufferDesc.numVerts = PARTICLES_PER_INSTANCE * 4; |
| 412 | tileUVBufferDesc.vertexSize = tileVertexDesc->getVertexStride(); |
| 413 | |
| 414 | tileUVs = VertexBuffer::create(tileUVBufferDesc); |
| 415 | |
| 416 | auto* const tileUVData = (Vector2*)tileUVs->lock(GBL_WRITE_ONLY_DISCARD); |
| 417 | const float tileUVScale = GpuParticleResources::TILE_SIZE / (float)GpuParticleResources::TEX_SIZE; |
| 418 | for (UINT32 i = 0; i < PARTICLES_PER_INSTANCE; i++) |
| 419 | { |
| 420 | tileUVData[i * 4 + 0] = Vector2(0.0f, 0.0f) * tileUVScale; |
| 421 | tileUVData[i * 4 + 1] = Vector2(1.0f, 0.0f) * tileUVScale; |
| 422 | tileUVData[i * 4 + 2] = Vector2(1.0f, 1.0f) * tileUVScale; |
| 423 | tileUVData[i * 4 + 3] = Vector2(0.0f, 1.0f) * tileUVScale; |
| 424 | } |
| 425 | |
| 426 | tileUVs->unlock(); |
| 427 | |
| 428 | // Prepare UV coordinates for rendering particles |
| 429 | VERTEX_BUFFER_DESC particleUVBufferDesc; |
| 430 | particleUVBufferDesc.numVerts = PARTICLES_PER_INSTANCE * 4; |
| 431 | particleUVBufferDesc.vertexSize = tileVertexDesc->getVertexStride(); |
| 432 | |
| 433 | particleUVs = VertexBuffer::create(particleUVBufferDesc); |
| 434 | |
| 435 | auto* const particleUVData = (Vector2*)particleUVs->lock(GBL_WRITE_ONLY_DISCARD); |
| 436 | const float particleUVScale = 1.0f / (float)GpuParticleResources::TEX_SIZE; |
| 437 | for (UINT32 i = 0; i < PARTICLES_PER_INSTANCE; i++) |
| 438 | { |
| 439 | particleUVData[i * 4 + 0] = Vector2(0.0f, 0.0f) * particleUVScale; |
| 440 | particleUVData[i * 4 + 1] = Vector2(1.0f, 0.0f) * particleUVScale; |
| 441 | particleUVData[i * 4 + 2] = Vector2(1.0f, 1.0f) * particleUVScale; |
| 442 | particleUVData[i * 4 + 3] = Vector2(0.0f, 1.0f) * particleUVScale; |
| 443 | } |
| 444 | |
| 445 | particleUVs->unlock(); |
| 446 | |
| 447 | // Prepare indices for rendering tiles & particles |
| 448 | INDEX_BUFFER_DESC spriteIndexBufferDesc; |
| 449 | spriteIndexBufferDesc.indexType = IT_16BIT; |
| 450 | spriteIndexBufferDesc.numIndices = PARTICLES_PER_INSTANCE * 6; |
| 451 | |
| 452 | spriteIndices = IndexBuffer::create(spriteIndexBufferDesc); |
| 453 | |
| 454 | auto* const indices = (UINT16*)spriteIndices->lock(GBL_WRITE_ONLY_DISCARD); |
| 455 | |
| 456 | const Conventions& rapiConventions = gCaps().conventions; |
| 457 | for (UINT32 i = 0; i < PARTICLES_PER_INSTANCE; i++) |
| 458 | { |
| 459 | // If UV is flipped, then our tile will be upside down so we need to change index order so it doesn't |
| 460 | // get culled. |
| 461 | if (rapiConventions.uvYAxis == Conventions::Axis::Up) |
| 462 | { |
| 463 | indices[i * 6 + 0] = i * 4 + 2; indices[i * 6 + 1] = i * 4 + 1; indices[i * 6 + 2] = i * 4 + 0; |
| 464 | indices[i * 6 + 3] = i * 4 + 3; indices[i * 6 + 4] = i * 4 + 2; indices[i * 6 + 5] = i * 4 + 0; |
| 465 | } |
| 466 | else |
| 467 | { |
| 468 | indices[i * 6 + 0] = i * 4 + 0; indices[i * 6 + 1] = i * 4 + 1; indices[i * 6 + 2] = i * 4 + 2; |
| 469 | indices[i * 6 + 3] = i * 4 + 0; indices[i * 6 + 4] = i * 4 + 2; indices[i * 6 + 5] = i * 4 + 3; |
| 470 | } |
| 471 | } |
| 472 | |
| 473 | spriteIndices->unlock(); |
| 474 | |
| 475 | // Prepare a scratch buffer we'll use to clear tiles |
| 476 | GPU_BUFFER_DESC tileScratchBufferDesc; |
| 477 | tileScratchBufferDesc.type = GBT_STANDARD; |
| 478 | tileScratchBufferDesc.format = BF_32X2F; |
| 479 | tileScratchBufferDesc.elementCount = NUM_SCRATCH_TILES; |
| 480 | tileScratchBufferDesc.usage = GBU_DYNAMIC; |
| 481 | |
| 482 | tileScratch = GpuBuffer::create(tileScratchBufferDesc); |
| 483 | |
| 484 | // Prepare a scratch buffer we'll use to inject new particles |
| 485 | VERTEX_BUFFER_DESC injectScratchBufferDesc; |
| 486 | injectScratchBufferDesc.numVerts = NUM_SCRATCH_PARTICLES; |
| 487 | injectScratchBufferDesc.vertexSize = injectVertexDesc->getVertexStride(0); |
| 488 | injectScratchBufferDesc.usage = GBU_DYNAMIC; |
| 489 | |
| 490 | injectScratch = VertexBuffer::create(injectScratchBufferDesc); |
| 491 | } |
| 492 | |
| 493 | GpuParticleSystem::GpuParticleSystem(ParticleSystem* parent) |
| 494 | :mParent(parent) |
| 495 | { |
| 496 | GpuParticleSimulation::instance().addSystem(this); |
| 497 | } |
| 498 | |
| 499 | GpuParticleSystem::~GpuParticleSystem() |
| 500 | { |
| 501 | GpuParticleSimulation::instance().removeSystem(this); |
| 502 | } |
| 503 | |
| 504 | bool GpuParticleSystem::allocateTiles(GpuParticleResources& resources, Vector<GpuParticle>& newParticles, |
| 505 | Vector<UINT32>& newTiles) |
| 506 | { |
| 507 | GpuParticleTile cachedTile = mLastAllocatedTile == (UINT32)-1 ? GpuParticleTile() : mTiles[mLastAllocatedTile]; |
| 508 | Vector2 tileUV = GpuParticleResources::getTileCoords(cachedTile.id); |
| 509 | |
| 510 | bool newTilesAdded = false; |
| 511 | for (UINT32 i = 0; i < (UINT32)newParticles.size(); i++) |
| 512 | { |
| 513 | UINT32 tileIdx; |
| 514 | |
| 515 | // Use the last allocated tile if there's room |
| 516 | if (cachedTile.numFreeParticles > 0) |
| 517 | tileIdx = mLastAllocatedTile; |
| 518 | else |
| 519 | { |
| 520 | // Otherwise try to find an inactive tile |
| 521 | if (mNumActiveTiles < (UINT32)mTiles.size()) |
| 522 | { |
| 523 | tileIdx = mActiveTiles.find(false); |
| 524 | mActiveTiles[tileIdx] = true; |
| 525 | } |
| 526 | // And finally just allocate a new tile if no room elsewhere |
| 527 | else |
| 528 | { |
| 529 | const UINT32 tileId = resources.allocTile(); |
| 530 | if (tileId == (UINT32)-1) |
| 531 | return newTilesAdded; // Out of space in the texture |
| 532 | |
| 533 | GpuParticleTile newTile; |
| 534 | newTile.id = tileId; |
| 535 | newTile.lifetime = 0.0f; |
| 536 | |
| 537 | tileIdx = (UINT32)mTiles.size(); |
| 538 | newTiles.push_back(newTile.id); |
| 539 | mTiles.push_back(newTile); |
| 540 | mActiveTiles.add(true); |
| 541 | |
| 542 | newTilesAdded = true; |
| 543 | } |
| 544 | |
| 545 | mLastAllocatedTile = tileIdx; |
| 546 | tileUV = GpuParticleResources::getTileCoords(mTiles[tileIdx].id); |
| 547 | mTiles[tileIdx].numFreeParticles = GpuParticleResources::PARTICLES_PER_TILE; |
| 548 | |
| 549 | cachedTile = mTiles[tileIdx]; |
| 550 | mNumActiveTiles++; |
| 551 | } |
| 552 | |
| 553 | GpuParticleTile& tile = mTiles[tileIdx]; |
| 554 | GpuParticle& particle = newParticles[i]; |
| 555 | |
| 556 | const UINT32 tileParticleIdx = GpuParticleResources::PARTICLES_PER_TILE - tile.numFreeParticles; |
| 557 | particle.dataUV = tileUV + GpuParticleResources::getParticleCoords(tileParticleIdx); |
| 558 | |
| 559 | tile.numFreeParticles--; |
| 560 | tile.lifetime = std::max(tile.lifetime, mTime + particle.lifetime); |
| 561 | |
| 562 | cachedTile.numFreeParticles--; |
| 563 | } |
| 564 | |
| 565 | return newTilesAdded; |
| 566 | } |
| 567 | |
| 568 | void GpuParticleSystem::detectInactiveTiles() |
| 569 | { |
| 570 | mNumActiveTiles = 0; |
| 571 | for (UINT32 i = 0; i < (UINT32)mTiles.size(); i++) |
| 572 | { |
| 573 | if (mTiles[i].lifetime >= mTime) |
| 574 | { |
| 575 | mNumActiveTiles++; |
| 576 | continue; |
| 577 | } |
| 578 | |
| 579 | mActiveTiles[i] = false; |
| 580 | |
| 581 | if (mLastAllocatedTile == i) |
| 582 | mLastAllocatedTile = (UINT32)-1; |
| 583 | } |
| 584 | } |
| 585 | |
| 586 | bool GpuParticleSystem::freeInactiveTiles(GpuParticleResources& resources) |
| 587 | { |
| 588 | const UINT32 numFreeTiles = (UINT32)mTiles.size() - mNumActiveTiles; |
| 589 | for(UINT32 i = 0; i < numFreeTiles; i++) |
| 590 | { |
| 591 | const UINT32 freeIdx = mActiveTiles.find(false); |
| 592 | assert(freeIdx != (UINT32)-1); |
| 593 | |
| 594 | const UINT32 lastIdx = (UINT32)mTiles.size() - 1; |
| 595 | |
| 596 | if (freeIdx != lastIdx) |
| 597 | { |
| 598 | std::swap(mTiles[freeIdx], mTiles[lastIdx]); |
| 599 | std::swap(mActiveTiles[freeIdx], mActiveTiles[lastIdx]); |
| 600 | } |
| 601 | |
| 602 | resources.freeTile(mTiles[lastIdx].id); |
| 603 | |
| 604 | mTiles.erase(mTiles.end() - 1); |
| 605 | mActiveTiles.remove(lastIdx); |
| 606 | } |
| 607 | |
| 608 | // Tile order changed so this might no longer be valid |
| 609 | if (numFreeTiles > 0) |
| 610 | mLastAllocatedTile = (UINT32)-1; |
| 611 | |
| 612 | return numFreeTiles > 0; |
| 613 | } |
| 614 | |
| 615 | void GpuParticleSystem::updateGpuBuffers() |
| 616 | { |
| 617 | const auto numTiles = (UINT32)mTiles.size(); |
| 618 | const UINT32 numTilesToAllocates = Math::divideAndRoundUp(numTiles, TILES_PER_INSTANCE) * TILES_PER_INSTANCE; |
| 619 | |
| 620 | // Tile offsets buffer |
| 621 | if(numTiles > 0) |
| 622 | { |
| 623 | GPU_BUFFER_DESC tilesBufferDesc; |
| 624 | tilesBufferDesc.type = GBT_STANDARD; |
| 625 | tilesBufferDesc.format = BF_32X2F; |
| 626 | tilesBufferDesc.elementCount = numTilesToAllocates; |
| 627 | tilesBufferDesc.usage = GBU_DYNAMIC; |
| 628 | |
| 629 | mTileUVs = GpuBuffer::create(tilesBufferDesc); |
| 630 | |
| 631 | auto* tileUVs = (Vector2*)mTileUVs->lock(GBL_WRITE_ONLY_NO_OVERWRITE); |
| 632 | for (UINT32 i = 0; i < numTiles; i++) |
| 633 | tileUVs[i] = GpuParticleResources::getTileCoords(mTiles[i].id); |
| 634 | |
| 635 | for (UINT32 i = numTiles; i < numTilesToAllocates; i++) |
| 636 | tileUVs[i] = Vector2(2.0f, 2.0f); // Out of range |
| 637 | |
| 638 | mTileUVs->unlock(); |
| 639 | } |
| 640 | |
| 641 | // Particle data offsets |
| 642 | const UINT32 numParticles = numTiles * GpuParticleResources::PARTICLES_PER_TILE; |
| 643 | |
| 644 | if(numParticles > 0) |
| 645 | { |
| 646 | GPU_BUFFER_DESC particleUVDesc; |
| 647 | particleUVDesc.type = GBT_STANDARD; |
| 648 | particleUVDesc.format = BF_16X2U; |
| 649 | particleUVDesc.elementCount = numParticles; |
| 650 | particleUVDesc.usage = GBU_DYNAMIC; |
| 651 | |
| 652 | mParticleIndices = GpuBuffer::create(particleUVDesc); |
| 653 | auto* particleIndices = (UINT32*)mParticleIndices->lock(GBL_WRITE_ONLY_NO_OVERWRITE); |
| 654 | |
| 655 | UINT32 idx = 0; |
| 656 | for (UINT32 i = 0; i < numTiles; i++) |
| 657 | { |
| 658 | const Vector2I tileOffset = GpuParticleResources::getTileOffset(mTiles[i].id); |
| 659 | for (UINT32 y = 0; y < GpuParticleResources::TILE_SIZE; y++) |
| 660 | { |
| 661 | for (UINT32 x = 0; x < GpuParticleResources::TILE_SIZE; x++) |
| 662 | { |
| 663 | const Vector2I offset = tileOffset + Vector2I(x, y); |
| 664 | particleIndices[idx++] = (offset.x & 0xFFFF) | (offset.y << 16); |
| 665 | } |
| 666 | } |
| 667 | } |
| 668 | |
| 669 | mParticleIndices->unlock(); |
| 670 | } |
| 671 | } |
| 672 | |
| 673 | void GpuParticleSystem::advanceTime(float dt) |
| 674 | { |
| 675 | const ParticleSystemSettings& settings = mParent->getSettings(); |
| 676 | |
| 677 | float timeStep; |
| 678 | mTime = bs::ParticleSystem::_advanceTime(mTime, dt, settings.duration, settings.isLooping, timeStep); |
| 679 | } |
| 680 | |
| 681 | AABox GpuParticleSystem::getBounds() const |
| 682 | { |
| 683 | const ParticleSystemSettings& settings = mParent->getSettings(); |
| 684 | |
| 685 | if(settings.useAutomaticBounds) |
| 686 | return AABox::INF_BOX; |
| 687 | |
| 688 | return settings.customBounds; |
| 689 | } |
| 690 | |
| 691 | struct GpuParticleSimulation::Pimpl |
| 692 | { |
| 693 | GpuParticleResources resources; |
| 694 | GpuParticleHelperBuffers helperBuffers; |
| 695 | SPtr<GpuParamBlockBuffer> vectorFieldParams; |
| 696 | SPtr<GpuParamBlockBuffer> depthCollisionParams; |
| 697 | SPtr<GpuParamBlockBuffer> simulationParams; |
| 698 | UnorderedSet<GpuParticleSystem*> systems; |
| 699 | }; |
| 700 | |
| 701 | GpuParticleSimulation::GpuParticleSimulation() |
| 702 | :m(bs_new<Pimpl>()) |
| 703 | { |
| 704 | m->vectorFieldParams = gVectorFieldParamsDef.createBuffer(); |
| 705 | m->depthCollisionParams = gGpuParticleDepthCollisionParamsDef.createBuffer(); |
| 706 | m->simulationParams = gGpuParticleSimulateParamsDef.createBuffer(); |
| 707 | } |
| 708 | |
| 709 | GpuParticleSimulation::~GpuParticleSimulation() |
| 710 | { |
| 711 | bs_delete(m); |
| 712 | } |
| 713 | |
| 714 | void GpuParticleSimulation::addSystem(GpuParticleSystem* system) |
| 715 | { |
| 716 | m->systems.insert(system); |
| 717 | } |
| 718 | |
| 719 | void GpuParticleSimulation::removeSystem(GpuParticleSystem* system) |
| 720 | { |
| 721 | m->systems.erase(system); |
| 722 | } |
| 723 | |
| 724 | void GpuParticleSimulation::simulate(const SceneInfo& sceneInfo, const ParticlePerFrameData* simData, |
| 725 | const SPtr<GpuParamBlockBuffer>& viewParams, const GBufferTextures& gbuffer, float dt) |
| 726 | { |
| 727 | m->resources.swap(); |
| 728 | m->resources.getCurveTexture().applyChanges(); |
| 729 | |
| 730 | Vector<UINT32> newTiles; |
| 731 | Vector<GpuParticle> allNewParticles; |
| 732 | for (auto& entry : m->systems) |
| 733 | { |
| 734 | entry->detectInactiveTiles(); |
| 735 | |
| 736 | bool tilesDirty = false; |
| 737 | const auto iterFind = simData->gpuData.find(entry->getParent()->getId()); |
| 738 | if(iterFind != simData->gpuData.end()) |
| 739 | { |
| 740 | Vector<GpuParticle>& newParticles = iterFind->second->particles; |
| 741 | tilesDirty = entry->allocateTiles(m->resources, newParticles, newTiles); |
| 742 | |
| 743 | allNewParticles.insert(allNewParticles.end(), newParticles.begin(), newParticles.end()); |
| 744 | } |
| 745 | |
| 746 | entry->advanceTime(dt); |
| 747 | tilesDirty |= entry->freeInactiveTiles(m->resources); |
| 748 | |
| 749 | if (tilesDirty) |
| 750 | entry->updateGpuBuffers(); |
| 751 | } |
| 752 | |
| 753 | RenderAPI& rapi = RenderAPI::instance(); |
| 754 | rapi.setRenderTarget(m->resources.getInjectTarget()); |
| 755 | |
| 756 | clearTiles(newTiles); |
| 757 | injectParticles(allNewParticles); |
| 758 | |
| 759 | // Simulate |
| 760 | // TODO - Run multiple iterations for more stable simulation at lower/erratic framerates |
| 761 | gGpuParticleSimulateParamsDef.gDT.set(m->simulationParams, dt); |
| 762 | gGpuParticleSimulateParamsDef.gNumIterations.set(m->simulationParams, 1); |
| 763 | |
| 764 | rapi.setRenderTarget(m->resources.getSimulationTarget()); |
| 765 | rapi.setVertexDeclaration(m->helperBuffers.tileVertexDecl); |
| 766 | |
| 767 | SPtr<VertexBuffer> buffers[] = { m->helperBuffers.tileUVs }; |
| 768 | rapi.setVertexBuffers(0, buffers, (UINT32)bs_size(buffers)); |
| 769 | rapi.setIndexBuffer(m->helperBuffers.spriteIndices); |
| 770 | rapi.setDrawOperation(DOT_TRIANGLE_LIST); |
| 771 | |
| 772 | enum class SimType { Normal, DepthCollisionsWorld, DepthCollisionsLocal, Count }; |
| 773 | |
| 774 | for(UINT32 i = 0; i < (UINT32)SimType::Count; i++) |
| 775 | { |
| 776 | const SimType type = (SimType)i; |
| 777 | const bool simulateDepthCollisions = type == SimType::DepthCollisionsWorld || |
| 778 | type == SimType::DepthCollisionsLocal; |
| 779 | const bool localSpace = type == SimType::DepthCollisionsLocal; |
| 780 | |
| 781 | GpuParticleSimulateMat* simulateMat = GpuParticleSimulateMat::getVariation(simulateDepthCollisions, localSpace); |
| 782 | simulateMat->bindGlobal(m->resources, viewParams, gbuffer.depth, gbuffer.normals, m->simulationParams); |
| 783 | |
| 784 | for (auto& entry : m->systems) |
| 785 | { |
| 786 | if (entry->getNumTiles() == 0) |
| 787 | continue; |
| 788 | |
| 789 | ParticleSystem* parentSystem = entry->getParent(); |
| 790 | |
| 791 | const ParticleGpuSimulationSettings& simSettings = parentSystem->getGpuSimulationSettings(); |
| 792 | if(simSettings.depthCollision.enabled != simulateDepthCollisions) |
| 793 | continue; |
| 794 | |
| 795 | if(simulateDepthCollisions) |
| 796 | { |
| 797 | const ParticleSystemSettings& settings = parentSystem->getSettings(); |
| 798 | bool isLocal = settings.simulationSpace == ParticleSimulationSpace::Local; |
| 799 | if(isLocal != localSpace) |
| 800 | continue; |
| 801 | } |
| 802 | |
| 803 | const RendererParticles& rendererParticles = sceneInfo.particleSystems[parentSystem->getRendererId()]; |
| 804 | |
| 805 | prepareBuffers(entry, rendererParticles); |
| 806 | |
| 807 | SPtr<Texture> vfTexture; |
| 808 | if (simSettings.vectorField.vectorField) |
| 809 | vfTexture = simSettings.vectorField.vectorField->getTexture(); |
| 810 | |
| 811 | simulateMat->bindPerCallParams(entry->getTileUVs(), rendererParticles.perObjectParamBuffer, |
| 812 | m->vectorFieldParams, vfTexture, m->depthCollisionParams); |
| 813 | |
| 814 | const UINT32 tileCount = entry->getNumTiles(); |
| 815 | const UINT32 numInstances = Math::divideAndRoundUp(tileCount, TILES_PER_INSTANCE); |
| 816 | rapi.drawIndexed(0, TILES_PER_INSTANCE * 6, 0, TILES_PER_INSTANCE * 4, numInstances); |
| 817 | } |
| 818 | } |
| 819 | } |
| 820 | |
| 821 | void GpuParticleSimulation::sort(const RendererView& view) |
| 822 | { |
| 823 | const bool supportsCompute = gRenderBeast()->getFeatureSet() == RenderBeastFeatureSet::Desktop; |
| 824 | if(!supportsCompute) |
| 825 | return; |
| 826 | |
| 827 | // Make sure that the position texture isn't bound for rendering |
| 828 | RenderAPI& rapi = RenderAPI::instance(); |
| 829 | rapi.setRenderTarget(nullptr); |
| 830 | |
| 831 | const Vector3& viewOrigin = view.getProperties().viewOrigin; |
| 832 | |
| 833 | GpuParticleSortPrepareMat* prepareMat = GpuParticleSortPrepareMat::get(); |
| 834 | prepareMat->bind(m->resources.getCurrentState().positionAndTimeTex); |
| 835 | |
| 836 | UINT32 systemIdx = 0; |
| 837 | UINT32 offset = 0; |
| 838 | for (auto& entry : m->systems) |
| 839 | { |
| 840 | if (entry->getNumTiles() == 0) |
| 841 | { |
| 842 | entry->setSortInfo(false, 0); |
| 843 | continue; |
| 844 | } |
| 845 | |
| 846 | ParticleSystem* parentSystem = entry->getParent(); |
| 847 | |
| 848 | const ParticleSystemSettings& settings = parentSystem->getSettings(); |
| 849 | if (settings.sortMode != ParticleSortMode::Distance) |
| 850 | { |
| 851 | entry->setSortInfo(false, 0); |
| 852 | continue; |
| 853 | } |
| 854 | |
| 855 | entry->setSortInfo(true, offset); |
| 856 | |
| 857 | offset += prepareMat->execute(*entry, systemIdx, viewOrigin, offset, |
| 858 | m->resources.mSortBuffers.keys[0], |
| 859 | m->resources.mSortedIndices[0]); |
| 860 | |
| 861 | systemIdx++; |
| 862 | } |
| 863 | |
| 864 | const UINT32 numSystemsToSort = systemIdx; |
| 865 | if(numSystemsToSort == 0) |
| 866 | return; |
| 867 | |
| 868 | const UINT32 totalNumKeys = offset; |
| 869 | const UINT32 keyMask = 0xFFFF | (Math::ceilToInt(Math::log2((float)(numSystemsToSort + 1))) << 16); |
| 870 | const UINT32 outputBufferIdx = GpuSort::instance().sort(m->resources.mSortBuffers, totalNumKeys, keyMask); |
| 871 | |
| 872 | m->resources.mSortedIndicesBufferIdx = outputBufferIdx; |
| 873 | } |
| 874 | |
| 875 | void GpuParticleSimulation::prepareBuffers(const GpuParticleSystem* system, const RendererParticles& rendererInfo) |
| 876 | { |
| 877 | ParticleSystem* parentSystem = system->getParent(); |
| 878 | |
| 879 | const ParticleSystemSettings& settings = parentSystem->getSettings(); |
| 880 | const ParticleGpuSimulationSettings& simSettings = parentSystem->getGpuSimulationSettings(); |
| 881 | |
| 882 | const Random& random = system->getRandom(); |
| 883 | const float time = system->getTime(); |
| 884 | const float nrmTime = time / settings.duration; |
| 885 | |
| 886 | gGpuParticleSimulateParamsDef.gDrag.set(m->simulationParams, simSettings.drag); |
| 887 | gGpuParticleSimulateParamsDef.gAcceleration.set(m->simulationParams, simSettings.acceleration); |
| 888 | |
| 889 | SPtr<Texture> vfTexture; |
| 890 | if(simSettings.vectorField.vectorField) |
| 891 | vfTexture = simSettings.vectorField.vectorField->getTexture(); |
| 892 | |
| 893 | if(vfTexture) |
| 894 | { |
| 895 | gGpuParticleSimulateParamsDef.gNumVectorFields.set(m->simulationParams, 1); |
| 896 | |
| 897 | const SPtr<VectorField>& vectorField = simSettings.vectorField.vectorField; |
| 898 | const VECTOR_FIELD_DESC& vfDesc = vectorField->getDesc(); |
| 899 | |
| 900 | const Vector3 tiling( |
| 901 | simSettings.vectorField.tilingX ? 0.0f : 1.0f, |
| 902 | simSettings.vectorField.tilingY ? 0.0f : 1.0f, |
| 903 | simSettings.vectorField.tilingZ ? 0.0f : 1.0f |
| 904 | ); |
| 905 | |
| 906 | gVectorFieldParamsDef.gFieldBounds.set(m->vectorFieldParams, vfDesc.bounds.getSize()); |
| 907 | gVectorFieldParamsDef.gFieldTightness.set(m->vectorFieldParams, simSettings.vectorField.tightness); |
| 908 | gVectorFieldParamsDef.gFieldTiling.set(m->vectorFieldParams, tiling); |
| 909 | gVectorFieldParamsDef.gFieldIntensity.set(m->vectorFieldParams, simSettings.vectorField.intensity); |
| 910 | |
| 911 | const Vector3 rotationRate = simSettings.vectorField.rotationRate.evaluate(nrmTime, random) * time; |
| 912 | const Quaternion addedRotation(Degree(rotationRate.x), Degree(rotationRate.y), Degree(rotationRate.z)); |
| 913 | |
| 914 | const Vector3 offset = vfDesc.bounds.getMin() + simSettings.vectorField.offset; |
| 915 | const Quaternion rotation = simSettings.vectorField.rotation * addedRotation; |
| 916 | const Vector3 scale = vfDesc.bounds.getSize() * simSettings.vectorField.scale; |
| 917 | |
| 918 | Matrix4 fieldToWorld = Matrix4::TRS(offset, rotation, scale); |
| 919 | fieldToWorld = rendererInfo.localToWorld * fieldToWorld; |
| 920 | |
| 921 | const Matrix3 fieldToWorld3x3 = fieldToWorld.get3x3(); |
| 922 | |
| 923 | gVectorFieldParamsDef.gFieldToWorld.set(m->vectorFieldParams, fieldToWorld3x3); |
| 924 | gVectorFieldParamsDef.gWorldToField.set(m->vectorFieldParams, fieldToWorld.inverseAffine()); |
| 925 | } |
| 926 | else |
| 927 | gGpuParticleSimulateParamsDef.gNumVectorFields.set(m->simulationParams, 0); |
| 928 | |
| 929 | const ParticleDepthCollisionSettings& depthCollisionSettings = simSettings.depthCollision; |
| 930 | if(depthCollisionSettings.enabled) |
| 931 | { |
| 932 | Vector3 scale3D = rendererInfo.particleSystem->getTransform().getScale(); |
| 933 | float uniformScale = std::max(std::max(scale3D.x, scale3D.y), scale3D.z); |
| 934 | |
| 935 | gGpuParticleDepthCollisionParamsDef.gCollisionRange.set(m->depthCollisionParams, 2.0f); |
| 936 | gGpuParticleDepthCollisionParamsDef.gCollisionRadiusScale.set(m->depthCollisionParams, |
| 937 | depthCollisionSettings.radiusScale * uniformScale); |
| 938 | gGpuParticleDepthCollisionParamsDef.gDampening.set(m->depthCollisionParams, |
| 939 | depthCollisionSettings.dampening); |
| 940 | gGpuParticleDepthCollisionParamsDef.gRestitution.set(m->depthCollisionParams, |
| 941 | depthCollisionSettings.restitution); |
| 942 | |
| 943 | const Vector2 sizeScaleUVOffset = |
| 944 | GpuParticleCurves::getUVOffset(rendererInfo.sizeScaleFrameIdxCurveAlloc); |
| 945 | const float sizeScaleUVScale = |
| 946 | GpuParticleCurves::getUVScale(rendererInfo.sizeScaleFrameIdxCurveAlloc); |
| 947 | |
| 948 | gGpuParticleDepthCollisionParamsDef.gSizeScaleCurveOffset.set(m->depthCollisionParams, sizeScaleUVOffset); |
| 949 | gGpuParticleDepthCollisionParamsDef.gSizeScaleCurveScale.set(m->depthCollisionParams, Vector2(sizeScaleUVScale, 0.0f)); |
| 950 | } |
| 951 | } |
| 952 | |
| 953 | void GpuParticleSimulation::clearTiles(const Vector<UINT32>& tiles) |
| 954 | { |
| 955 | const auto numTiles = (UINT32)tiles.size(); |
| 956 | if(numTiles == 0) |
| 957 | return; |
| 958 | |
| 959 | const UINT32 numIterations = Math::divideAndRoundUp(numTiles, GpuParticleHelperBuffers::NUM_SCRATCH_TILES); |
| 960 | |
| 961 | GpuParticleClearMat* clearMat = GpuParticleClearMat::get(); |
| 962 | clearMat->bind(m->helperBuffers.tileScratch); |
| 963 | |
| 964 | RenderAPI& rapi = RenderAPI::instance(); |
| 965 | rapi.setVertexDeclaration(m->helperBuffers.tileVertexDecl); |
| 966 | |
| 967 | SPtr<VertexBuffer> buffers[] = { m->helperBuffers.tileUVs }; |
| 968 | rapi.setVertexBuffers(0, buffers, (UINT32)bs_size(buffers)); |
| 969 | rapi.setIndexBuffer(m->helperBuffers.spriteIndices); |
| 970 | rapi.setDrawOperation(DOT_TRIANGLE_LIST); |
| 971 | |
| 972 | UINT32 tileStart = 0; |
| 973 | for (UINT32 i = 0; i < numIterations; i++) |
| 974 | { |
| 975 | static_assert(GpuParticleHelperBuffers::NUM_SCRATCH_TILES % TILES_PER_INSTANCE == 0, |
| 976 | "Tile scratch buffer size must be divisble with number of tiles per instance." ); |
| 977 | |
| 978 | const UINT32 tileEnd = std::min(numTiles, tileStart + GpuParticleHelperBuffers::NUM_SCRATCH_TILES); |
| 979 | |
| 980 | auto* tileUVs = (Vector2*)m->helperBuffers.tileScratch->lock(GBL_WRITE_ONLY_DISCARD); |
| 981 | for (UINT32 j = tileStart; j < tileEnd; j++) |
| 982 | tileUVs[j - tileStart] = GpuParticleResources::getTileCoords(tiles[j]); |
| 983 | |
| 984 | const UINT32 alignedTileEnd = Math::divideAndRoundUp(tileEnd, TILES_PER_INSTANCE) * TILES_PER_INSTANCE; |
| 985 | for (UINT32 j = tileEnd; j < alignedTileEnd; j++) |
| 986 | tileUVs[j - tileEnd] = Vector2(2.0f, 2.0f); // Out of bounds (we don't want to accidentaly clear used tiles) |
| 987 | |
| 988 | m->helperBuffers.tileScratch->unlock(); |
| 989 | |
| 990 | const UINT32 numInstances = (alignedTileEnd - tileStart) / TILES_PER_INSTANCE; |
| 991 | rapi.drawIndexed(0, TILES_PER_INSTANCE * 6, 0, TILES_PER_INSTANCE * 4, numInstances); |
| 992 | |
| 993 | tileStart = alignedTileEnd; |
| 994 | } |
| 995 | } |
| 996 | |
| 997 | void GpuParticleSimulation::injectParticles(const Vector<GpuParticle>& particles) |
| 998 | { |
| 999 | const auto numParticles = (UINT32)particles.size(); |
| 1000 | const UINT32 numIterations = Math::divideAndRoundUp(numParticles, GpuParticleHelperBuffers::NUM_SCRATCH_PARTICLES); |
| 1001 | |
| 1002 | GpuParticleInjectMat* injectMat = GpuParticleInjectMat::get(); |
| 1003 | injectMat->bind(); |
| 1004 | |
| 1005 | RenderAPI& rapi = RenderAPI::instance(); |
| 1006 | rapi.setVertexDeclaration(m->helperBuffers.injectVertexDecl); |
| 1007 | |
| 1008 | SPtr<VertexBuffer> buffers[] = { m->helperBuffers.injectScratch, m->helperBuffers.particleUVs }; |
| 1009 | rapi.setVertexBuffers(0, buffers, (UINT32)bs_size(buffers)); |
| 1010 | rapi.setIndexBuffer(m->helperBuffers.spriteIndices); |
| 1011 | rapi.setDrawOperation(DOT_TRIANGLE_LIST); |
| 1012 | |
| 1013 | UINT32 particleStart = 0; |
| 1014 | for (UINT32 i = 0; i < numIterations; i++) |
| 1015 | { |
| 1016 | const UINT32 particleEnd = std::min(numParticles, particleStart + GpuParticleHelperBuffers::NUM_SCRATCH_PARTICLES); |
| 1017 | |
| 1018 | auto* particleData = (GpuParticleVertex*)m->helperBuffers.injectScratch->lock(GBL_WRITE_ONLY_DISCARD); |
| 1019 | for (UINT32 j = particleStart; j < particleEnd; j++) |
| 1020 | particleData[j - particleStart] = particles[j].getVertex(); |
| 1021 | |
| 1022 | m->helperBuffers.injectScratch->unlock(); |
| 1023 | |
| 1024 | rapi.drawIndexed(0, 6, 0, 4, particleEnd - particleStart); |
| 1025 | particleStart = particleEnd; |
| 1026 | } |
| 1027 | } |
| 1028 | |
| 1029 | GpuParticleResources& GpuParticleSimulation::getResources() const |
| 1030 | { |
| 1031 | return m->resources; |
| 1032 | } |
| 1033 | |
| 1034 | SPtr<GpuParamBlockBuffer> createGpuParticleVertexInputBuffer() |
| 1035 | { |
| 1036 | SPtr<GpuParamBlockBuffer> inputBuffer = gGpuParticleTileVertexParamsDef.createBuffer(); |
| 1037 | |
| 1038 | // [0, 1] -> [-1, 1] and flip Y |
| 1039 | Vector4 uvToNdc(2.0f, -2.0f, -1.0f, 1.0f); |
| 1040 | |
| 1041 | const Conventions& rapiConventions = gCaps().conventions; |
| 1042 | |
| 1043 | // Either of these flips the Y axis, but if they're both true they cancel out |
| 1044 | if ((rapiConventions.uvYAxis == Conventions::Axis::Up) ^ (rapiConventions.ndcYAxis == Conventions::Axis::Down)) |
| 1045 | { |
| 1046 | uvToNdc.y = -uvToNdc.y; |
| 1047 | uvToNdc.w = -uvToNdc.w; |
| 1048 | } |
| 1049 | |
| 1050 | gGpuParticleTileVertexParamsDef.gUVToNDC.set(inputBuffer, uvToNdc); |
| 1051 | |
| 1052 | return inputBuffer; |
| 1053 | } |
| 1054 | |
| 1055 | GpuParticleClearMat::GpuParticleClearMat() |
| 1056 | { |
| 1057 | const SPtr<GpuParamBlockBuffer> inputBuffer = createGpuParticleVertexInputBuffer(); |
| 1058 | |
| 1059 | mParams->setParamBlockBuffer(GPT_VERTEX_PROGRAM, "Input" , inputBuffer); |
| 1060 | mParams->getBufferParam(GPT_VERTEX_PROGRAM, "gTileUVs" , mTileUVParam); |
| 1061 | } |
| 1062 | |
| 1063 | void GpuParticleClearMat::_initDefines(ShaderDefines& defines) |
| 1064 | { |
| 1065 | defines.set("TILES_PER_INSTANCE" , TILES_PER_INSTANCE); |
| 1066 | } |
| 1067 | |
| 1068 | void GpuParticleClearMat::bind(const SPtr<GpuBuffer>& tileUVs) |
| 1069 | { |
| 1070 | mTileUVParam.set(tileUVs); |
| 1071 | |
| 1072 | RendererMaterial::bind(); |
| 1073 | } |
| 1074 | |
| 1075 | GpuParticleInjectMat::GpuParticleInjectMat() |
| 1076 | { |
| 1077 | const SPtr<GpuParamBlockBuffer> inputBuffer = createGpuParticleVertexInputBuffer(); |
| 1078 | mParams->setParamBlockBuffer(GPT_VERTEX_PROGRAM, "Input" , inputBuffer); |
| 1079 | } |
| 1080 | |
| 1081 | GpuParticleCurveInjectMat::GpuParticleCurveInjectMat() |
| 1082 | { |
| 1083 | const SPtr<GpuParamBlockBuffer> inputBuffer = createGpuParticleVertexInputBuffer(); |
| 1084 | mParams->setParamBlockBuffer(GPT_VERTEX_PROGRAM, "Input" , inputBuffer); |
| 1085 | } |
| 1086 | |
| 1087 | GpuParticleSimulateMat::GpuParticleSimulateMat() |
| 1088 | { |
| 1089 | const SPtr<GpuParamBlockBuffer> inputBuffer = createGpuParticleVertexInputBuffer(); |
| 1090 | mParams->setParamBlockBuffer(GPT_VERTEX_PROGRAM, "Input" , inputBuffer); |
| 1091 | |
| 1092 | mParams->getParamInfo()->getBinding( |
| 1093 | GPT_FRAGMENT_PROGRAM, |
| 1094 | GpuPipelineParamInfoBase::ParamType::ParamBlock, |
| 1095 | "Params" , |
| 1096 | mParamsBinding |
| 1097 | ); |
| 1098 | |
| 1099 | mParams->getParamInfo()->getBinding( |
| 1100 | GPT_FRAGMENT_PROGRAM, |
| 1101 | GpuPipelineParamInfoBase::ParamType::ParamBlock, |
| 1102 | "VectorFieldParams" , |
| 1103 | mVectorFieldBinding |
| 1104 | ); |
| 1105 | |
| 1106 | mParams->getBufferParam(GPT_VERTEX_PROGRAM, "gTileUVs" , mTileUVParam); |
| 1107 | mParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gPosAndTimeTex" , mPosAndTimeTexParam); |
| 1108 | mParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gVelocityTex" , mVelocityTexParam); |
| 1109 | mParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gVectorFieldTex" , mVectorFieldTexParam); |
| 1110 | |
| 1111 | mSupportsDepthCollisions = mVariation.getUInt("DEPTH_COLLISIONS" ) > 0; |
| 1112 | if(mSupportsDepthCollisions) |
| 1113 | { |
| 1114 | mParams->getParamInfo()->getBinding( |
| 1115 | GPT_FRAGMENT_PROGRAM, |
| 1116 | GpuPipelineParamInfoBase::ParamType::ParamBlock, |
| 1117 | "PerCamera" , |
| 1118 | mPerCameraBinding |
| 1119 | ); |
| 1120 | |
| 1121 | mParams->getParamInfo()->getBinding( |
| 1122 | GPT_FRAGMENT_PROGRAM, |
| 1123 | GpuPipelineParamInfoBase::ParamType::ParamBlock, |
| 1124 | "PerObject" , |
| 1125 | mPerObjectBinding |
| 1126 | ); |
| 1127 | |
| 1128 | mParams->getParamInfo()->getBinding( |
| 1129 | GPT_FRAGMENT_PROGRAM, |
| 1130 | GpuPipelineParamInfoBase::ParamType::ParamBlock, |
| 1131 | "DepthCollisionParams" , |
| 1132 | mDepthCollisionBinding |
| 1133 | ); |
| 1134 | |
| 1135 | mParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gSizeRotationTex" , mSizeRotationTexParam); |
| 1136 | mParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gCurvesTex" , mCurvesTexParam); |
| 1137 | mParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gDepthTex" , mDepthTexParam); |
| 1138 | mParams->getTextureParam(GPT_FRAGMENT_PROGRAM, "gNormalsTex" , mNormalsTexParam); |
| 1139 | } |
| 1140 | } |
| 1141 | |
| 1142 | void GpuParticleSimulateMat::_initDefines(ShaderDefines& defines) |
| 1143 | { |
| 1144 | defines.set("TILES_PER_INSTANCE" , TILES_PER_INSTANCE); |
| 1145 | } |
| 1146 | |
| 1147 | void GpuParticleSimulateMat::bindGlobal(GpuParticleResources& resources, const SPtr<GpuParamBlockBuffer>& viewParams, |
| 1148 | const SPtr<Texture>& depth, const SPtr<Texture>& normals, const SPtr<GpuParamBlockBuffer>& simulationParams) |
| 1149 | { |
| 1150 | GpuParticleStateTextures& prevState = resources.getPreviousState(); |
| 1151 | const GpuParticleStaticTextures& staticTextures = resources.getStaticTextures(); |
| 1152 | GpuParticleCurves& curveTexture = resources.getCurveTexture(); |
| 1153 | |
| 1154 | mParams->setParamBlockBuffer(mParamsBinding.set, mParamsBinding.slot, simulationParams); |
| 1155 | |
| 1156 | mPosAndTimeTexParam.set(prevState.positionAndTimeTex); |
| 1157 | mVelocityTexParam.set(prevState.velocityTex); |
| 1158 | |
| 1159 | if(mSupportsDepthCollisions) |
| 1160 | { |
| 1161 | mParams->setParamBlockBuffer(mPerCameraBinding.set, mPerCameraBinding.slot, viewParams); |
| 1162 | |
| 1163 | mSizeRotationTexParam.set(staticTextures.sizeAndRotationTex); |
| 1164 | mCurvesTexParam.set(curveTexture.getTexture()); |
| 1165 | mDepthTexParam.set(depth); |
| 1166 | mNormalsTexParam.set(normals); |
| 1167 | } |
| 1168 | |
| 1169 | RendererMaterial::bind(false); |
| 1170 | } |
| 1171 | |
| 1172 | void GpuParticleSimulateMat::bindPerCallParams(const SPtr<GpuBuffer>& tileUVs, |
| 1173 | const SPtr<GpuParamBlockBuffer>& perObjectParams, const SPtr<GpuParamBlockBuffer>& vectorFieldParams, |
| 1174 | const SPtr<Texture>& vectorFieldTexture, const SPtr<GpuParamBlockBuffer>& depthCollisionParams) |
| 1175 | { |
| 1176 | mTileUVParam.set(tileUVs); |
| 1177 | mParams->setParamBlockBuffer(mVectorFieldBinding.set, mVectorFieldBinding.slot, vectorFieldParams); |
| 1178 | mVectorFieldTexParam.set(vectorFieldTexture); |
| 1179 | |
| 1180 | if(mSupportsDepthCollisions) |
| 1181 | { |
| 1182 | mParams->setParamBlockBuffer(mPerObjectBinding.set, mPerObjectBinding.slot, perObjectParams); |
| 1183 | mParams->setParamBlockBuffer(mDepthCollisionBinding.set, mDepthCollisionBinding.slot, depthCollisionParams); |
| 1184 | } |
| 1185 | |
| 1186 | bindParams(); |
| 1187 | } |
| 1188 | |
| 1189 | GpuParticleSimulateMat* GpuParticleSimulateMat::getVariation(bool depthCollisions, bool localSpace) |
| 1190 | { |
| 1191 | if(depthCollisions) |
| 1192 | { |
| 1193 | if(localSpace) |
| 1194 | return get(getVariation<2>()); |
| 1195 | |
| 1196 | return get(getVariation<1>()); |
| 1197 | } |
| 1198 | |
| 1199 | return get(getVariation<0>()); |
| 1200 | } |
| 1201 | |
| 1202 | GpuParticleBoundsMat::GpuParticleBoundsMat() |
| 1203 | { |
| 1204 | mInputBuffer = gGpuParticleBoundsParamsDef.createBuffer(); |
| 1205 | mParams->setParamBlockBuffer(GPT_COMPUTE_PROGRAM, "Input" , mInputBuffer); |
| 1206 | |
| 1207 | mParams->getBufferParam(GPT_COMPUTE_PROGRAM, "gParticleIndices" , mParticleIndicesParam); |
| 1208 | mParams->getBufferParam(GPT_COMPUTE_PROGRAM, "gOutput" , mOutputParam); |
| 1209 | mParams->getTextureParam(GPT_COMPUTE_PROGRAM, "gPosAndTimeTex" , mPosAndTimeTexParam); |
| 1210 | } |
| 1211 | |
| 1212 | void GpuParticleBoundsMat::_initDefines(ShaderDefines& defines) |
| 1213 | { |
| 1214 | defines.set("NUM_THREADS" , NUM_THREADS); |
| 1215 | } |
| 1216 | |
| 1217 | void GpuParticleBoundsMat::bind(const SPtr<Texture>& positionAndTime) |
| 1218 | { |
| 1219 | mPosAndTimeTexParam.set(positionAndTime); |
| 1220 | |
| 1221 | RendererMaterial::bind(); |
| 1222 | } |
| 1223 | |
| 1224 | AABox GpuParticleBoundsMat::execute(const SPtr<GpuBuffer>& indices, UINT32 numParticles) |
| 1225 | { |
| 1226 | static constexpr UINT32 MAX_NUM_GROUPS = 128; |
| 1227 | |
| 1228 | const UINT32 numIterations = Math::divideAndRoundUp(numParticles, NUM_THREADS); |
| 1229 | const UINT32 numGroups = std::min(numIterations, MAX_NUM_GROUPS); |
| 1230 | |
| 1231 | const UINT32 iterationsPerGroup = numIterations / numGroups; |
| 1232 | const UINT32 = numIterations % numGroups; |
| 1233 | |
| 1234 | gGpuParticleBoundsParamsDef.gIterationsPerGroup.set(mInputBuffer, iterationsPerGroup); |
| 1235 | gGpuParticleBoundsParamsDef.gNumExtraIterations.set(mInputBuffer, extraIterations); |
| 1236 | gGpuParticleBoundsParamsDef.gNumParticles.set(mInputBuffer, numParticles); |
| 1237 | |
| 1238 | GPU_BUFFER_DESC outputDesc; |
| 1239 | outputDesc.type = GBT_STANDARD; |
| 1240 | outputDesc.format = BF_32X2U; |
| 1241 | outputDesc.elementCount = numGroups * 2; |
| 1242 | outputDesc.usage = GBU_DYNAMIC; |
| 1243 | |
| 1244 | SPtr<GpuBuffer> output = GpuBuffer::create(outputDesc); |
| 1245 | |
| 1246 | mParticleIndicesParam.set(indices); |
| 1247 | mOutputParam.set(output); |
| 1248 | |
| 1249 | RenderAPI::instance().dispatchCompute(numGroups); |
| 1250 | |
| 1251 | Vector3 min = Vector3::INF; |
| 1252 | Vector3 max = -Vector3::INF; |
| 1253 | |
| 1254 | const Vector3* data = (Vector3*)output->lock(GBL_READ_ONLY); |
| 1255 | for(UINT32 i = 0; i < numGroups; i++) |
| 1256 | { |
| 1257 | min = Vector3::min(min, data[i * 2 + 0]); |
| 1258 | max = Vector3::min(max, data[i * 2 + 1]); |
| 1259 | } |
| 1260 | |
| 1261 | output->unlock(); |
| 1262 | |
| 1263 | return AABox(min, max); |
| 1264 | } |
| 1265 | |
| 1266 | GpuParticleSortPrepareMat::GpuParticleSortPrepareMat() |
| 1267 | { |
| 1268 | mInputBuffer = gGpuParticleSortPrepareParamDef.createBuffer(); |
| 1269 | mParams->setParamBlockBuffer(GPT_COMPUTE_PROGRAM, "Input" , mInputBuffer); |
| 1270 | |
| 1271 | mParams->getBufferParam(GPT_COMPUTE_PROGRAM, "gInputIndices" , mInputIndicesParam); |
| 1272 | mParams->getBufferParam(GPT_COMPUTE_PROGRAM, "gOutputKeys" , mOutputKeysParam); |
| 1273 | mParams->getBufferParam(GPT_COMPUTE_PROGRAM, "gOutputIndices" , mOutputIndicesParam); |
| 1274 | mParams->getTextureParam(GPT_COMPUTE_PROGRAM, "gPosAndTimeTex" , mPosAndTimeTexParam); |
| 1275 | } |
| 1276 | |
| 1277 | void GpuParticleSortPrepareMat::_initDefines(ShaderDefines& defines) |
| 1278 | { |
| 1279 | defines.set("NUM_THREADS" , NUM_THREADS); |
| 1280 | } |
| 1281 | |
| 1282 | void GpuParticleSortPrepareMat::bind(const SPtr<Texture>& positionAndTime) |
| 1283 | { |
| 1284 | mPosAndTimeTexParam.set(positionAndTime); |
| 1285 | |
| 1286 | RendererMaterial::bind(false); |
| 1287 | } |
| 1288 | |
| 1289 | UINT32 GpuParticleSortPrepareMat::execute(const GpuParticleSystem& system, UINT32 systemIdx, const Vector3& viewOrigin, |
| 1290 | UINT32 offset, const SPtr<GpuBuffer>& outKeys, const SPtr<GpuBuffer>& outIndices) |
| 1291 | { |
| 1292 | static constexpr UINT32 MAX_NUM_GROUPS = 128; |
| 1293 | |
| 1294 | assert(systemIdx < std::pow(2, 16)); |
| 1295 | |
| 1296 | const UINT32 numParticles = system.getNumTiles() * GpuParticleResources::PARTICLES_PER_TILE; |
| 1297 | |
| 1298 | const UINT32 numIterations = Math::divideAndRoundUp(numParticles, NUM_THREADS); |
| 1299 | const UINT32 numGroups = std::min(numIterations, MAX_NUM_GROUPS); |
| 1300 | |
| 1301 | const UINT32 iterationsPerGroup = numIterations / numGroups; |
| 1302 | const UINT32 = numIterations % numGroups; |
| 1303 | |
| 1304 | Vector3 localViewOrigin; |
| 1305 | ParticleSystem* parentSystem = system.getParent(); |
| 1306 | if(parentSystem->getSettings().simulationSpace == ParticleSimulationSpace::Local) |
| 1307 | { |
| 1308 | const Matrix4& worldToLocal = parentSystem->getTransform().getInvMatrix(); |
| 1309 | localViewOrigin = worldToLocal.multiplyAffine(viewOrigin); |
| 1310 | } |
| 1311 | else |
| 1312 | localViewOrigin = viewOrigin; |
| 1313 | |
| 1314 | gGpuParticleSortPrepareParamDef.gIterationsPerGroup.set(mInputBuffer, iterationsPerGroup); |
| 1315 | gGpuParticleSortPrepareParamDef.gNumExtraIterations.set(mInputBuffer, extraIterations); |
| 1316 | gGpuParticleSortPrepareParamDef.gNumParticles.set(mInputBuffer, numParticles); |
| 1317 | gGpuParticleSortPrepareParamDef.gOutputOffset.set(mInputBuffer, offset); |
| 1318 | gGpuParticleSortPrepareParamDef.gSystemKey.set(mInputBuffer, systemIdx << 16); |
| 1319 | gGpuParticleSortPrepareParamDef.gLocalViewOrigin.set(mInputBuffer, localViewOrigin); |
| 1320 | |
| 1321 | mInputIndicesParam.set(system.getParticleIndices()); |
| 1322 | mOutputKeysParam.set(outKeys); |
| 1323 | mOutputIndicesParam.set(outIndices); |
| 1324 | |
| 1325 | bindParams(); |
| 1326 | RenderAPI::instance().dispatchCompute(numGroups); |
| 1327 | return numParticles; |
| 1328 | } |
| 1329 | |
| 1330 | struct GpuParticleCurveInject |
| 1331 | { |
| 1332 | Color color; |
| 1333 | Vector2 dataUV; |
| 1334 | }; |
| 1335 | |
| 1336 | GpuParticleCurves::GpuParticleCurves() |
| 1337 | { |
| 1338 | TEXTURE_DESC textureDesc; |
| 1339 | textureDesc.format = PF_RGBA16F; |
| 1340 | textureDesc.width = TEX_SIZE; |
| 1341 | textureDesc.height = TEX_SIZE; |
| 1342 | textureDesc.usage = TU_RENDERTARGET; |
| 1343 | |
| 1344 | mCurveTexture = Texture::create(textureDesc); |
| 1345 | |
| 1346 | RENDER_TEXTURE_DESC rtDesc; |
| 1347 | rtDesc.colorSurfaces[0].texture = mCurveTexture; |
| 1348 | |
| 1349 | mRT = RenderTexture::create(rtDesc); |
| 1350 | |
| 1351 | // Prepare vertex declaration for injecting new curves |
| 1352 | SPtr<VertexDataDesc> injectVertexDesc = bs_shared_ptr_new<VertexDataDesc>(); |
| 1353 | injectVertexDesc->addVertElem(VET_FLOAT4, VES_TEXCOORD, 0, 0, 1); // Color, per instance |
| 1354 | injectVertexDesc->addVertElem(VET_FLOAT2, VES_TEXCOORD, 1, 0, 1); // Data UV, per instance |
| 1355 | injectVertexDesc->addVertElem(VET_FLOAT2, VES_TEXCOORD, 2, 1); // Pixel texture coordinates |
| 1356 | |
| 1357 | mInjectVertexDecl = VertexDeclaration::create(injectVertexDesc); |
| 1358 | |
| 1359 | // Prepare UV coordinates for injecting curves |
| 1360 | VERTEX_BUFFER_DESC injectUVBufferDesc; |
| 1361 | injectUVBufferDesc.numVerts = 4; |
| 1362 | injectUVBufferDesc.vertexSize = injectVertexDesc->getVertexStride(1); |
| 1363 | |
| 1364 | mInjectUV = VertexBuffer::create(injectUVBufferDesc); |
| 1365 | |
| 1366 | auto* const tileUVData = (Vector2*)mInjectUV->lock(GBL_WRITE_ONLY_DISCARD); |
| 1367 | const float tileUVScale = 1.0f / (float)TEX_SIZE; |
| 1368 | tileUVData[0] = Vector2(0.0f, 0.0f) * tileUVScale; |
| 1369 | tileUVData[1] = Vector2(1.0f, 0.0f) * tileUVScale; |
| 1370 | tileUVData[2] = Vector2(1.0f, 1.0f) * tileUVScale; |
| 1371 | tileUVData[3] = Vector2(0.0f, 1.0f) * tileUVScale; |
| 1372 | |
| 1373 | mInjectUV->unlock(); |
| 1374 | |
| 1375 | // Prepare indices for injecting curves |
| 1376 | INDEX_BUFFER_DESC injectIndexBufferDesc; |
| 1377 | injectIndexBufferDesc.indexType = IT_16BIT; |
| 1378 | injectIndexBufferDesc.numIndices = 6; |
| 1379 | |
| 1380 | mInjectIndices = IndexBuffer::create(injectIndexBufferDesc); |
| 1381 | |
| 1382 | const Conventions& rapiConventions = gCaps().conventions; |
| 1383 | |
| 1384 | auto* const indices = (UINT16*)mInjectIndices->lock(GBL_WRITE_ONLY_DISCARD); |
| 1385 | |
| 1386 | // If UV is flipped, then our tile will be upside down so we need to change index order so it doesn't |
| 1387 | // get culled. |
| 1388 | if (rapiConventions.uvYAxis == Conventions::Axis::Up) |
| 1389 | { |
| 1390 | indices[0] = 2; indices[1] = 1; indices[2] = 0; |
| 1391 | indices[3] = 3; indices[4] = 2; indices[5] = 0; |
| 1392 | } |
| 1393 | else |
| 1394 | { |
| 1395 | indices[0] = 0; indices[1] = 1; indices[2] = 2; |
| 1396 | indices[3] = 0; indices[4] = 2; indices[5] = 3; |
| 1397 | } |
| 1398 | |
| 1399 | mInjectIndices->unlock(); |
| 1400 | |
| 1401 | // Prepare a scratch buffer we'll use to inject new curves |
| 1402 | VERTEX_BUFFER_DESC injectScratchBufferDesc; |
| 1403 | injectScratchBufferDesc.numVerts = SCRATCH_NUM_VERTICES; |
| 1404 | injectScratchBufferDesc.vertexSize = injectVertexDesc->getVertexStride(0); |
| 1405 | injectScratchBufferDesc.usage = GBU_DYNAMIC; |
| 1406 | |
| 1407 | mInjectScratch = VertexBuffer::create(injectScratchBufferDesc); |
| 1408 | } |
| 1409 | |
| 1410 | GpuParticleCurves::~GpuParticleCurves() |
| 1411 | { |
| 1412 | for(auto& entry : mPendingAllocations) |
| 1413 | mPendingAllocator.free(entry.pixels); |
| 1414 | |
| 1415 | mPendingAllocator.clear(); |
| 1416 | } |
| 1417 | |
| 1418 | TextureRowAllocation GpuParticleCurves::alloc(Color* pixels, uint32_t count) |
| 1419 | { |
| 1420 | PendingAllocation pendingAlloc; |
| 1421 | pendingAlloc.allocation = mRowAllocator.alloc(count); |
| 1422 | |
| 1423 | if(pendingAlloc.allocation.length == 0) |
| 1424 | return pendingAlloc.allocation; |
| 1425 | |
| 1426 | pendingAlloc.pixels = (Color*)mPendingAllocator.alloc(sizeof(Color) * count); |
| 1427 | memcpy(pendingAlloc.pixels, pixels, sizeof(Color) * count); |
| 1428 | |
| 1429 | mPendingAllocations.push_back(pendingAlloc); |
| 1430 | return pendingAlloc.allocation; |
| 1431 | } |
| 1432 | |
| 1433 | void GpuParticleCurves::free(const TextureRowAllocation& alloc) |
| 1434 | { |
| 1435 | mRowAllocator.free(alloc); |
| 1436 | } |
| 1437 | |
| 1438 | void GpuParticleCurves::applyChanges() |
| 1439 | { |
| 1440 | const auto numCurves = (UINT32)mPendingAllocations.size(); |
| 1441 | if(numCurves == 0) |
| 1442 | return; |
| 1443 | |
| 1444 | GpuParticleCurveInjectMat* injectMat = GpuParticleCurveInjectMat::get(); |
| 1445 | injectMat->bind(); |
| 1446 | |
| 1447 | RenderAPI& rapi = RenderAPI::instance(); |
| 1448 | rapi.setRenderTarget(mRT); |
| 1449 | rapi.setVertexDeclaration(mInjectVertexDecl); |
| 1450 | |
| 1451 | SPtr<VertexBuffer> buffers[] = { mInjectScratch, mInjectUV }; |
| 1452 | rapi.setVertexBuffers(0, buffers, (UINT32)bs_size(buffers)); |
| 1453 | rapi.setIndexBuffer(mInjectIndices); |
| 1454 | rapi.setDrawOperation(DOT_TRIANGLE_LIST); |
| 1455 | |
| 1456 | UINT32 curveIdx = 0; |
| 1457 | |
| 1458 | auto* data = (GpuParticleCurveInject*)mInjectScratch->lock(GBL_WRITE_ONLY_DISCARD); |
| 1459 | while(curveIdx < numCurves) |
| 1460 | { |
| 1461 | UINT32 count = 0; |
| 1462 | for(; curveIdx < numCurves; curveIdx++) |
| 1463 | { |
| 1464 | const PendingAllocation& pendingAlloc = mPendingAllocations[curveIdx]; |
| 1465 | |
| 1466 | const UINT32 entryCount = pendingAlloc.allocation.length; |
| 1467 | if((count + entryCount) > SCRATCH_NUM_VERTICES) |
| 1468 | break; |
| 1469 | |
| 1470 | for(UINT32 i = 0; i < entryCount; i++) |
| 1471 | { |
| 1472 | data[count].color = pendingAlloc.pixels[i]; |
| 1473 | data[count].dataUV = Vector2( |
| 1474 | (pendingAlloc.allocation.x + i) / (float)TEX_SIZE, |
| 1475 | pendingAlloc.allocation.y / (float)TEX_SIZE); |
| 1476 | |
| 1477 | count++; |
| 1478 | } |
| 1479 | } |
| 1480 | |
| 1481 | mInjectScratch->unlock(); |
| 1482 | rapi.drawIndexed(0, 6, 0, 4, count); |
| 1483 | |
| 1484 | data = (GpuParticleCurveInject*)mInjectScratch->lock(GBL_WRITE_ONLY_DISCARD); |
| 1485 | } |
| 1486 | |
| 1487 | mInjectScratch->unlock(); |
| 1488 | |
| 1489 | for(auto& entry : mPendingAllocations) |
| 1490 | mPendingAllocator.free(entry.pixels); |
| 1491 | |
| 1492 | mPendingAllocations.clear(); |
| 1493 | mPendingAllocator.clear(); |
| 1494 | } |
| 1495 | |
| 1496 | Vector2 GpuParticleCurves::getUVOffset(const TextureRowAllocation& alloc) |
| 1497 | { |
| 1498 | return Vector2( |
| 1499 | ((float)alloc.x + 0.5f) / TEX_SIZE, |
| 1500 | ((float)alloc.y + 0.5f) / TEX_SIZE |
| 1501 | ); |
| 1502 | } |
| 1503 | |
| 1504 | float GpuParticleCurves::getUVScale(const TextureRowAllocation& alloc) |
| 1505 | { |
| 1506 | if(alloc.length == 0) |
| 1507 | return 0.0f; |
| 1508 | |
| 1509 | return (alloc.length - 1) / (float)TEX_SIZE; |
| 1510 | } |
| 1511 | }} |
| 1512 | |