1//************************************ bs::framework - Copyright 2018 Marko Pintera **************************************//
2//*********** Licensed under the MIT license. See LICENSE.md for full terms. This notice is not to be removed. ***********//
3#include "Particles/BsParticleManager.h"
4#include "Particles/BsParticleSystem.h"
5#include "Utility/BsTime.h"
6#include "Threading/BsTaskScheduler.h"
7#include "Allocators/BsPoolAlloc.h"
8#include "Private/Particles/BsParticleSet.h"
9#include "Animation/BsAnimationManager.h"
10#include "Image/BsPixelUtil.h"
11
12namespace bs
13{
14 /** Helper method used for writing particle data into the @p pixels buffer. */
15 template<class T, class PR>
16 void iterateOverPixels(PixelData& pixels, UINT32 count, UINT32 stride, PR predicate)
17 {
18 auto dest = (UINT8*)pixels.getData();
19
20 UINT32 x = 0;
21 for (UINT32 i = 0; i < count; i++)
22 {
23 predicate((T*)dest, i);
24
25 dest += stride;
26 x++;
27
28 if (x >= pixels.getWidth())
29 {
30 x = 0;
31 dest += pixels.getRowSkip() * PixelUtil::getNumElemBytes(pixels.getFormat());;
32 }
33 }
34 }
35
36 /** Helper method used for writing particle data into the @p pixels buffer. */
37 template<class T, class PR>
38 void iterateOverPixels(PixelData& pixels, UINT32 count, PR predicate)
39 {
40 iterateOverPixels<T>(pixels, count, sizeof(T), predicate);
41 }
42
43 /**
44 * Maintains a pool of buffers that are used for passing results of particle simulation from the simulation to the
45 * core thread.
46 */
47 class ParticleSimulationDataPool
48 {
49 /** Contains a list of buffers for the specified size. */
50 struct BuffersPerSize
51 {
52 Vector<ParticleRenderData*> buffers;
53 UINT32 nextFreeIdx = 0;
54 };
55
56 public:
57 ~ParticleSimulationDataPool()
58 {
59 Lock lock(mMutex);
60
61 for (auto& sizeEntry : mBillboardBufferList)
62 {
63 for (auto& entry : sizeEntry.second.buffers)
64 mBillboardAlloc.destruct(static_cast<ParticleBillboardRenderData*>(entry));
65 }
66
67 for (auto& sizeEntry : mMeshBufferList)
68 {
69 for (auto& entry : sizeEntry.second.buffers)
70 mMeshAlloc.destruct(static_cast<ParticleMeshRenderData*>(entry));
71 }
72
73 for (auto& entry : mGPUBufferList)
74 mGPUAlloc.destruct(entry);
75 }
76
77 /**
78 * Returns a set of buffers containing particle data from the provided particle set. Usable for rendering the
79 * results of the CPU particle simulation as billboards.
80 */
81 ParticleBillboardRenderData* allocCPUBillboard(const ParticleSet& particleSet)
82 {
83 const UINT32 size = particleSet.determineTextureSize();
84
85 ParticleBillboardRenderData* output = nullptr;
86
87 {
88 Lock lock(mMutex);
89
90 BuffersPerSize& buffers = mBillboardBufferList[size];
91 if (buffers.nextFreeIdx < (UINT32)buffers.buffers.size())
92 {
93 output = static_cast<ParticleBillboardRenderData*>(buffers.buffers[buffers.nextFreeIdx]);
94 buffers.nextFreeIdx++;
95 }
96 }
97
98 if (!output)
99 {
100 output = createNewBillboardBuffersCPU(size);
101
102 Lock lock(mMutex);
103
104 BuffersPerSize& buffers = mBillboardBufferList[size];
105 buffers.buffers.push_back(output);
106 buffers.nextFreeIdx++;
107 }
108
109 // Populate buffer contents
110 const UINT32 count = particleSet.getParticleCount();
111 const ParticleSetData& particles = particleSet.getParticles();
112
113 // TODO: Use non-temporal writes?
114 iterateOverPixels<Vector4>(output->positionAndRotation, count,
115 [&particles](Vector4* dst, UINT32 idx)
116 {
117 dst->x = particles.position[idx].x;
118 dst->y = particles.position[idx].y;
119 dst->z = particles.position[idx].z;
120 dst->w = particles.rotation[idx].x * Math::DEG2RAD;
121
122 });
123
124 iterateOverPixels<RGBA>(output->color, count,
125 [&particles](RGBA* dst, UINT32 idx)
126 {
127 *dst = particles.color[idx];
128 });
129
130 iterateOverPixels<UINT16>(output->sizeAndFrameIdx, count, sizeof(UINT16) * 4,
131 [&particles](UINT16* dst, UINT32 idx)
132 {
133 dst[0] = Bitwise::floatToHalf(particles.size[idx].x);
134 dst[1] = Bitwise::floatToHalf(particles.size[idx].y);
135 dst[2] = Bitwise::floatToHalf(particles.frame[idx]);
136 });
137
138 output->indices.clear();
139 output->indices.resize(count);
140
141 return output;
142 }
143
144 /**
145 * Returns a set of buffers containing particle data from the provided particle set. Usable for rendering the
146 * results of the CPU particle simulation as 3D meshes.
147 */
148 ParticleMeshRenderData* allocCPUMesh(const ParticleSet& particleSet)
149 {
150 const UINT32 size = particleSet.determineTextureSize();
151
152 ParticleMeshRenderData* output = nullptr;
153
154 {
155 Lock lock(mMutex);
156
157 BuffersPerSize& buffers = mMeshBufferList[size];
158 if (buffers.nextFreeIdx < (UINT32)buffers.buffers.size())
159 {
160 output = static_cast<ParticleMeshRenderData*>(buffers.buffers[buffers.nextFreeIdx]);
161 buffers.nextFreeIdx++;
162 }
163 }
164
165 if (!output)
166 {
167 output = createNewMeshBuffersCPU(size);
168
169 Lock lock(mMutex);
170
171 BuffersPerSize& buffers = mMeshBufferList[size];
172 buffers.buffers.push_back(output);
173 buffers.nextFreeIdx++;
174 }
175
176 // Populate buffer contents
177 const UINT32 count = particleSet.getParticleCount();
178 const ParticleSetData& particles = particleSet.getParticles();
179
180 // TODO: Use non-temporal writes?
181 iterateOverPixels<Vector4>(output->position, count,
182 [&particles](Vector4* dst, UINT32 idx)
183 {
184 dst->x = particles.position[idx].x;
185 dst->y = particles.position[idx].y;
186 dst->z = particles.position[idx].z;
187
188 });
189
190 iterateOverPixels<RGBA>(output->color, count,
191 [&particles](RGBA* dst, UINT32 idx)
192 {
193 *dst = particles.color[idx];
194 });
195
196 iterateOverPixels<UINT16>(output->rotation, count, sizeof(UINT16) * 4,
197 [&particles](UINT16* dst, UINT32 idx)
198 {
199 dst[0] = Bitwise::floatToHalf(particles.rotation[idx].x * Math::DEG2RAD);
200 dst[1] = Bitwise::floatToHalf(particles.rotation[idx].y * Math::DEG2RAD);
201 dst[2] = Bitwise::floatToHalf(particles.rotation[idx].z * Math::DEG2RAD);
202 });
203
204 iterateOverPixels<UINT16>(output->size, count, sizeof(UINT16) * 4,
205 [&particles](UINT16* dst, UINT32 idx)
206 {
207 dst[0] = Bitwise::floatToHalf(particles.size[idx].x);
208 dst[1] = Bitwise::floatToHalf(particles.size[idx].y);
209 dst[2] = Bitwise::floatToHalf(particles.size[idx].z);
210 });
211
212 output->indices.clear();
213 output->indices.resize(count);
214
215 return output;
216 }
217
218 /**
219 * Returns a list of particles from the provided particle set that may be used for inserting the particles into the
220 * GPU simulation.
221 */
222 ParticleGPUSimulationData* allocGPU(const ParticleSet& particleSet)
223 {
224 ParticleGPUSimulationData* output = nullptr;
225
226 {
227 Lock lock(mMutex);
228
229 if (mNextFreeGPUBuffer < (UINT32)mGPUBufferList.size())
230 {
231 output = mGPUBufferList[mNextFreeGPUBuffer];
232 mNextFreeGPUBuffer++;
233 }
234 }
235
236 if (!output)
237 {
238 output = createNewBuffersGPU();
239
240 Lock lock(mMutex);
241
242 mGPUBufferList.push_back(output);
243 mNextFreeGPUBuffer++;
244 }
245
246 // Populate buffer contents
247 const UINT32 count = particleSet.getParticleCount();
248 const ParticleSetData& particles = particleSet.getParticles();
249
250 output->particles.clear();
251 output->particles.resize(count);
252
253 // TODO: Use non-temporal writes?
254 for (UINT32 i = 0; i < count; i++)
255 {
256 GpuParticle particle;
257 particle.position = particles.position[i];
258 particle.lifetime = particles.lifetime[i];
259 particle.initialLifetime = particles.initialLifetime[i];
260 particle.velocity = particles.velocity[i];
261 particle.size = Vector2(particles.size[i].x, particles.size[i].z);
262 particle.rotation = particles.rotation[i].z;
263
264 output->particles[i] = particle;
265 }
266
267 return output;
268 }
269
270 /** Makes all the buffers available for allocations. Does not free internal buffer memory. */
271 void clear()
272 {
273 Lock lock(mMutex);
274
275 for(auto& buffers : mBillboardBufferList)
276 buffers.second.nextFreeIdx = 0;
277
278 for(auto& buffers : mMeshBufferList)
279 buffers.second.nextFreeIdx = 0;
280
281 mNextFreeGPUBuffer = 0;
282 }
283
284 private:
285 /** Allocates a new set of CPU buffers used for billboard rendering of the provided @p size width and height. */
286 ParticleBillboardRenderData* createNewBillboardBuffersCPU(UINT32 size)
287 {
288 auto output = mBillboardAlloc.construct<ParticleBillboardRenderData>();
289
290 output->positionAndRotation = PixelData(size, size, 1, PF_RGBA32F);
291 output->color = PixelData(size, size, 1, PF_RGBA8);
292 output->sizeAndFrameIdx = PixelData(size, size, 1, PF_RGBA16F);
293
294 // Note: Potentially allocate them all in one large block
295 output->positionAndRotation.allocateInternalBuffer();
296 output->color.allocateInternalBuffer();
297 output->sizeAndFrameIdx.allocateInternalBuffer();
298
299 return output;
300 }
301
302 /** Allocates a new set of CPU buffers used for mesh rendering of the provided @p size width and height. */
303 ParticleMeshRenderData* createNewMeshBuffersCPU(UINT32 size)
304 {
305 auto output = mMeshAlloc.construct<ParticleMeshRenderData>();
306
307 output->position = PixelData(size, size, 1, PF_RGBA32F);
308 output->color = PixelData(size, size, 1, PF_RGBA8);
309 output->size = PixelData(size, size, 1, PF_RGBA16F);
310 output->rotation = PixelData(size, size, 1, PF_RGBA16F);
311
312 // Note: Potentially allocate them all in one large block
313 output->position.allocateInternalBuffer();
314 output->color.allocateInternalBuffer();
315 output->size.allocateInternalBuffer();
316 output->rotation.allocateInternalBuffer();
317
318 return output;
319 }
320
321 /** Allocates a new set of GPU buffers of the provided @p size width and height. */
322 ParticleGPUSimulationData* createNewBuffersGPU()
323 {
324 return mGPUAlloc.construct<ParticleGPUSimulationData>();
325 }
326
327 UnorderedMap<UINT32, BuffersPerSize> mBillboardBufferList;
328 UnorderedMap<UINT32, BuffersPerSize> mMeshBufferList;
329 Vector<ParticleGPUSimulationData*> mGPUBufferList;
330 UINT32 mNextFreeGPUBuffer = 0;
331
332 PoolAlloc<sizeof(ParticleBillboardRenderData), 32, 4, true> mBillboardAlloc;
333 PoolAlloc<sizeof(ParticleMeshRenderData), 32, 4, true> mMeshAlloc;
334 PoolAlloc<sizeof(ParticleGPUSimulationData), 32, 4, true> mGPUAlloc;
335 Mutex mMutex;
336 };
337
338 struct ParticleManager::Members
339 {
340 // TODO - Perhaps sharing one pool is better
341 ParticleSimulationDataPool simDataPool[CoreThread::NUM_SYNC_BUFFERS];
342 };
343
344 ParticleManager::ParticleManager()
345 :m(bs_new<Members>())
346 { }
347
348 ParticleManager::~ParticleManager()
349 {
350 bs_delete(m);
351 }
352
353 ParticlePerFrameData* ParticleManager::update(const EvaluatedAnimationData& animData)
354 {
355 // Note: Allow the worker threads to work alongside the main thread? Would require extra synchronization but
356 // potentially no benefit?
357
358 // Advance the buffers (last write buffer becomes read buffer)
359 if (mSwapBuffers)
360 {
361 mReadBufferIdx = (mReadBufferIdx + 1) % CoreThread::NUM_SYNC_BUFFERS;
362 mWriteBufferIdx = (mWriteBufferIdx + 1) % CoreThread::NUM_SYNC_BUFFERS;
363
364 mSwapBuffers = false;
365 }
366
367 if(mPaused)
368 return &mSimulationData[mReadBufferIdx];
369
370 // TODO - Perform culling (but only on deterministic particle systems). In which case cache the bounds so we don't
371 // need to recalculate them below
372
373 // Prepare the write buffer
374 ParticlePerFrameData& simulationData = mSimulationData[mWriteBufferIdx];
375 simulationData.cpuData.clear();
376 simulationData.gpuData.clear();
377
378 // Queue evaluation tasks
379 {
380 Lock lock(mMutex);
381 mNumActiveWorkers = (UINT32)mSystems.size();
382 }
383
384 float timeDelta = gTime().getFrameDelta();
385
386 ParticleSimulationDataPool& simDataPool = m->simDataPool[mWriteBufferIdx];
387 simDataPool.clear();
388
389 for (auto& system : mSystems)
390 {
391 const auto evaluateWorker = [this, timeDelta, system, &animData, &simDataPool, &simulationData]()
392 {
393 // Advance the simulation
394 system->_simulate(timeDelta, &animData);
395
396 ParticleRenderData* simulationDataCPU = nullptr;
397 ParticleGPUSimulationData* simulationDataGPU = nullptr;
398 if(system->mParticleSet)
399 {
400 // Generate simulation data to transfer to the core thread
401 const UINT32 numParticles = system->mParticleSet->getParticleCount();
402 const ParticleSystemSettings& settings = system->getSettings();
403
404 if(settings.gpuSimulation)
405 simulationDataGPU = simDataPool.allocGPU(*system->mParticleSet);
406 else
407 {
408 if(settings.renderMode == ParticleRenderMode::Billboard)
409 simulationDataCPU = simDataPool.allocCPUBillboard(*system->mParticleSet);
410 else
411 simulationDataCPU = simDataPool.allocCPUMesh(*system->mParticleSet);
412
413 simulationDataCPU->numParticles = numParticles;
414
415 if(settings.useAutomaticBounds)
416 simulationDataCPU->bounds = system->_calculateBounds();
417 else
418 simulationDataCPU->bounds = settings.customBounds;
419
420 // If using a camera-independant sorting mode, sort the particles right away
421 switch (settings.sortMode)
422 {
423 default:
424 case ParticleSortMode::None: // No sort, just point the indices back to themselves
425 for (UINT32 i = 0; i < numParticles; i++)
426 simulationDataCPU->indices[i] = i;
427 break;
428 case ParticleSortMode::OldToYoung:
429 case ParticleSortMode::YoungToOld:
430 sortParticles(*system->mParticleSet, settings.sortMode, Vector3::ZERO, simulationDataCPU->indices.data());
431 break;
432 case ParticleSortMode::Distance: break;
433 }
434 }
435 }
436
437 {
438 Lock lock(mMutex);
439
440 assert(mNumActiveWorkers > 0);
441 mNumActiveWorkers--;
442
443 if(simulationDataCPU)
444 simulationData.cpuData[system->mId] = simulationDataCPU;
445 else if(simulationDataGPU)
446 simulationData.gpuData[system->mId] = simulationDataGPU;
447 }
448
449 mWorkerDoneSignal.notify_one();
450 };
451
452 SPtr<Task> task = Task::create("ParticleWorker", evaluateWorker);
453 TaskScheduler::instance().addTask(task);
454 }
455
456 // Wait for tasks to complete
457 TaskScheduler::instance().addWorker(); // Make the current core available for work (since this thread waits)
458 {
459 Lock lock(mMutex);
460
461 while (mNumActiveWorkers > 0)
462 mWorkerDoneSignal.wait(lock);
463 }
464 TaskScheduler::instance().removeWorker();
465
466 mSwapBuffers = true;
467
468 return &mSimulationData[mWriteBufferIdx];
469 }
470
471 void ParticleManager::sortParticles(const ParticleSet& set, ParticleSortMode sortMode, const Vector3& viewPoint,
472 UINT32* indices)
473 {
474 assert(sortMode != ParticleSortMode::None);
475
476 struct ParticleSortData
477 {
478 ParticleSortData(float key, UINT32 idx)
479 :key(key), idx(idx)
480 { }
481
482 float key;
483 UINT32 idx;
484 };
485
486 const UINT32 count = set.getParticleCount();
487 const ParticleSetData& particles = set.getParticles();
488
489 bs_frame_mark();
490 {
491 FrameVector<ParticleSortData> sortData;
492 sortData.reserve(count);
493
494 switch(sortMode)
495 {
496 default:
497 case ParticleSortMode::Distance:
498 for(UINT32 i = 0; i < count; i++)
499 {
500 float distance = viewPoint.squaredDistance(particles.position[i]);
501 sortData.emplace_back(distance, i);
502 }
503 break;
504 case ParticleSortMode::OldToYoung:
505 for(UINT32 i = 0; i < count; i++)
506 {
507 float lifetime = particles.lifetime[i];
508 sortData.emplace_back(lifetime, i);
509 }
510 break;
511 case ParticleSortMode::YoungToOld:
512 for(UINT32 i = 0; i < count; i++)
513 {
514 float lifetime = particles.initialLifetime[i] - particles.lifetime[i];
515 sortData.emplace_back(lifetime, i);
516 }
517 break;
518 }
519
520 std::sort(sortData.begin(), sortData.end(),
521 [](const ParticleSortData& lhs, const ParticleSortData& rhs)
522 {
523 return rhs.key < lhs.key;
524 });
525
526 for (UINT32 i = 0; i < count; i++)
527 indices[i] = sortData[i].idx;
528 }
529 bs_frame_clear();
530 }
531
532 UINT32 ParticleManager::registerParticleSystem(ParticleSystem* system)
533 {
534 mSystems.insert(system);
535
536 return mNextId++;
537 }
538
539 void ParticleManager::unregisterParticleSystem(ParticleSystem* system)
540 {
541 mSystems.erase(system);
542 }
543}
544