1 | //************************************ bs::framework - Copyright 2018 Marko Pintera **************************************// |
2 | //*********** Licensed under the MIT license. See LICENSE.md for full terms. This notice is not to be removed. ***********// |
3 | #include "Particles/BsParticleManager.h" |
4 | #include "Particles/BsParticleSystem.h" |
5 | #include "Utility/BsTime.h" |
6 | #include "Threading/BsTaskScheduler.h" |
7 | #include "Allocators/BsPoolAlloc.h" |
8 | #include "Private/Particles/BsParticleSet.h" |
9 | #include "Animation/BsAnimationManager.h" |
10 | #include "Image/BsPixelUtil.h" |
11 | |
12 | namespace bs |
13 | { |
14 | /** Helper method used for writing particle data into the @p pixels buffer. */ |
15 | template<class T, class PR> |
16 | void iterateOverPixels(PixelData& pixels, UINT32 count, UINT32 stride, PR predicate) |
17 | { |
18 | auto dest = (UINT8*)pixels.getData(); |
19 | |
20 | UINT32 x = 0; |
21 | for (UINT32 i = 0; i < count; i++) |
22 | { |
23 | predicate((T*)dest, i); |
24 | |
25 | dest += stride; |
26 | x++; |
27 | |
28 | if (x >= pixels.getWidth()) |
29 | { |
30 | x = 0; |
31 | dest += pixels.getRowSkip() * PixelUtil::getNumElemBytes(pixels.getFormat());; |
32 | } |
33 | } |
34 | } |
35 | |
36 | /** Helper method used for writing particle data into the @p pixels buffer. */ |
37 | template<class T, class PR> |
38 | void iterateOverPixels(PixelData& pixels, UINT32 count, PR predicate) |
39 | { |
40 | iterateOverPixels<T>(pixels, count, sizeof(T), predicate); |
41 | } |
42 | |
43 | /** |
44 | * Maintains a pool of buffers that are used for passing results of particle simulation from the simulation to the |
45 | * core thread. |
46 | */ |
47 | class ParticleSimulationDataPool |
48 | { |
49 | /** Contains a list of buffers for the specified size. */ |
50 | struct BuffersPerSize |
51 | { |
52 | Vector<ParticleRenderData*> buffers; |
53 | UINT32 nextFreeIdx = 0; |
54 | }; |
55 | |
56 | public: |
57 | ~ParticleSimulationDataPool() |
58 | { |
59 | Lock lock(mMutex); |
60 | |
61 | for (auto& sizeEntry : mBillboardBufferList) |
62 | { |
63 | for (auto& entry : sizeEntry.second.buffers) |
64 | mBillboardAlloc.destruct(static_cast<ParticleBillboardRenderData*>(entry)); |
65 | } |
66 | |
67 | for (auto& sizeEntry : mMeshBufferList) |
68 | { |
69 | for (auto& entry : sizeEntry.second.buffers) |
70 | mMeshAlloc.destruct(static_cast<ParticleMeshRenderData*>(entry)); |
71 | } |
72 | |
73 | for (auto& entry : mGPUBufferList) |
74 | mGPUAlloc.destruct(entry); |
75 | } |
76 | |
77 | /** |
78 | * Returns a set of buffers containing particle data from the provided particle set. Usable for rendering the |
79 | * results of the CPU particle simulation as billboards. |
80 | */ |
81 | ParticleBillboardRenderData* allocCPUBillboard(const ParticleSet& particleSet) |
82 | { |
83 | const UINT32 size = particleSet.determineTextureSize(); |
84 | |
85 | ParticleBillboardRenderData* output = nullptr; |
86 | |
87 | { |
88 | Lock lock(mMutex); |
89 | |
90 | BuffersPerSize& buffers = mBillboardBufferList[size]; |
91 | if (buffers.nextFreeIdx < (UINT32)buffers.buffers.size()) |
92 | { |
93 | output = static_cast<ParticleBillboardRenderData*>(buffers.buffers[buffers.nextFreeIdx]); |
94 | buffers.nextFreeIdx++; |
95 | } |
96 | } |
97 | |
98 | if (!output) |
99 | { |
100 | output = createNewBillboardBuffersCPU(size); |
101 | |
102 | Lock lock(mMutex); |
103 | |
104 | BuffersPerSize& buffers = mBillboardBufferList[size]; |
105 | buffers.buffers.push_back(output); |
106 | buffers.nextFreeIdx++; |
107 | } |
108 | |
109 | // Populate buffer contents |
110 | const UINT32 count = particleSet.getParticleCount(); |
111 | const ParticleSetData& particles = particleSet.getParticles(); |
112 | |
113 | // TODO: Use non-temporal writes? |
114 | iterateOverPixels<Vector4>(output->positionAndRotation, count, |
115 | [&particles](Vector4* dst, UINT32 idx) |
116 | { |
117 | dst->x = particles.position[idx].x; |
118 | dst->y = particles.position[idx].y; |
119 | dst->z = particles.position[idx].z; |
120 | dst->w = particles.rotation[idx].x * Math::DEG2RAD; |
121 | |
122 | }); |
123 | |
124 | iterateOverPixels<RGBA>(output->color, count, |
125 | [&particles](RGBA* dst, UINT32 idx) |
126 | { |
127 | *dst = particles.color[idx]; |
128 | }); |
129 | |
130 | iterateOverPixels<UINT16>(output->sizeAndFrameIdx, count, sizeof(UINT16) * 4, |
131 | [&particles](UINT16* dst, UINT32 idx) |
132 | { |
133 | dst[0] = Bitwise::floatToHalf(particles.size[idx].x); |
134 | dst[1] = Bitwise::floatToHalf(particles.size[idx].y); |
135 | dst[2] = Bitwise::floatToHalf(particles.frame[idx]); |
136 | }); |
137 | |
138 | output->indices.clear(); |
139 | output->indices.resize(count); |
140 | |
141 | return output; |
142 | } |
143 | |
144 | /** |
145 | * Returns a set of buffers containing particle data from the provided particle set. Usable for rendering the |
146 | * results of the CPU particle simulation as 3D meshes. |
147 | */ |
148 | ParticleMeshRenderData* allocCPUMesh(const ParticleSet& particleSet) |
149 | { |
150 | const UINT32 size = particleSet.determineTextureSize(); |
151 | |
152 | ParticleMeshRenderData* output = nullptr; |
153 | |
154 | { |
155 | Lock lock(mMutex); |
156 | |
157 | BuffersPerSize& buffers = mMeshBufferList[size]; |
158 | if (buffers.nextFreeIdx < (UINT32)buffers.buffers.size()) |
159 | { |
160 | output = static_cast<ParticleMeshRenderData*>(buffers.buffers[buffers.nextFreeIdx]); |
161 | buffers.nextFreeIdx++; |
162 | } |
163 | } |
164 | |
165 | if (!output) |
166 | { |
167 | output = createNewMeshBuffersCPU(size); |
168 | |
169 | Lock lock(mMutex); |
170 | |
171 | BuffersPerSize& buffers = mMeshBufferList[size]; |
172 | buffers.buffers.push_back(output); |
173 | buffers.nextFreeIdx++; |
174 | } |
175 | |
176 | // Populate buffer contents |
177 | const UINT32 count = particleSet.getParticleCount(); |
178 | const ParticleSetData& particles = particleSet.getParticles(); |
179 | |
180 | // TODO: Use non-temporal writes? |
181 | iterateOverPixels<Vector4>(output->position, count, |
182 | [&particles](Vector4* dst, UINT32 idx) |
183 | { |
184 | dst->x = particles.position[idx].x; |
185 | dst->y = particles.position[idx].y; |
186 | dst->z = particles.position[idx].z; |
187 | |
188 | }); |
189 | |
190 | iterateOverPixels<RGBA>(output->color, count, |
191 | [&particles](RGBA* dst, UINT32 idx) |
192 | { |
193 | *dst = particles.color[idx]; |
194 | }); |
195 | |
196 | iterateOverPixels<UINT16>(output->rotation, count, sizeof(UINT16) * 4, |
197 | [&particles](UINT16* dst, UINT32 idx) |
198 | { |
199 | dst[0] = Bitwise::floatToHalf(particles.rotation[idx].x * Math::DEG2RAD); |
200 | dst[1] = Bitwise::floatToHalf(particles.rotation[idx].y * Math::DEG2RAD); |
201 | dst[2] = Bitwise::floatToHalf(particles.rotation[idx].z * Math::DEG2RAD); |
202 | }); |
203 | |
204 | iterateOverPixels<UINT16>(output->size, count, sizeof(UINT16) * 4, |
205 | [&particles](UINT16* dst, UINT32 idx) |
206 | { |
207 | dst[0] = Bitwise::floatToHalf(particles.size[idx].x); |
208 | dst[1] = Bitwise::floatToHalf(particles.size[idx].y); |
209 | dst[2] = Bitwise::floatToHalf(particles.size[idx].z); |
210 | }); |
211 | |
212 | output->indices.clear(); |
213 | output->indices.resize(count); |
214 | |
215 | return output; |
216 | } |
217 | |
218 | /** |
219 | * Returns a list of particles from the provided particle set that may be used for inserting the particles into the |
220 | * GPU simulation. |
221 | */ |
222 | ParticleGPUSimulationData* allocGPU(const ParticleSet& particleSet) |
223 | { |
224 | ParticleGPUSimulationData* output = nullptr; |
225 | |
226 | { |
227 | Lock lock(mMutex); |
228 | |
229 | if (mNextFreeGPUBuffer < (UINT32)mGPUBufferList.size()) |
230 | { |
231 | output = mGPUBufferList[mNextFreeGPUBuffer]; |
232 | mNextFreeGPUBuffer++; |
233 | } |
234 | } |
235 | |
236 | if (!output) |
237 | { |
238 | output = createNewBuffersGPU(); |
239 | |
240 | Lock lock(mMutex); |
241 | |
242 | mGPUBufferList.push_back(output); |
243 | mNextFreeGPUBuffer++; |
244 | } |
245 | |
246 | // Populate buffer contents |
247 | const UINT32 count = particleSet.getParticleCount(); |
248 | const ParticleSetData& particles = particleSet.getParticles(); |
249 | |
250 | output->particles.clear(); |
251 | output->particles.resize(count); |
252 | |
253 | // TODO: Use non-temporal writes? |
254 | for (UINT32 i = 0; i < count; i++) |
255 | { |
256 | GpuParticle particle; |
257 | particle.position = particles.position[i]; |
258 | particle.lifetime = particles.lifetime[i]; |
259 | particle.initialLifetime = particles.initialLifetime[i]; |
260 | particle.velocity = particles.velocity[i]; |
261 | particle.size = Vector2(particles.size[i].x, particles.size[i].z); |
262 | particle.rotation = particles.rotation[i].z; |
263 | |
264 | output->particles[i] = particle; |
265 | } |
266 | |
267 | return output; |
268 | } |
269 | |
270 | /** Makes all the buffers available for allocations. Does not free internal buffer memory. */ |
271 | void clear() |
272 | { |
273 | Lock lock(mMutex); |
274 | |
275 | for(auto& buffers : mBillboardBufferList) |
276 | buffers.second.nextFreeIdx = 0; |
277 | |
278 | for(auto& buffers : mMeshBufferList) |
279 | buffers.second.nextFreeIdx = 0; |
280 | |
281 | mNextFreeGPUBuffer = 0; |
282 | } |
283 | |
284 | private: |
285 | /** Allocates a new set of CPU buffers used for billboard rendering of the provided @p size width and height. */ |
286 | ParticleBillboardRenderData* createNewBillboardBuffersCPU(UINT32 size) |
287 | { |
288 | auto output = mBillboardAlloc.construct<ParticleBillboardRenderData>(); |
289 | |
290 | output->positionAndRotation = PixelData(size, size, 1, PF_RGBA32F); |
291 | output->color = PixelData(size, size, 1, PF_RGBA8); |
292 | output->sizeAndFrameIdx = PixelData(size, size, 1, PF_RGBA16F); |
293 | |
294 | // Note: Potentially allocate them all in one large block |
295 | output->positionAndRotation.allocateInternalBuffer(); |
296 | output->color.allocateInternalBuffer(); |
297 | output->sizeAndFrameIdx.allocateInternalBuffer(); |
298 | |
299 | return output; |
300 | } |
301 | |
302 | /** Allocates a new set of CPU buffers used for mesh rendering of the provided @p size width and height. */ |
303 | ParticleMeshRenderData* createNewMeshBuffersCPU(UINT32 size) |
304 | { |
305 | auto output = mMeshAlloc.construct<ParticleMeshRenderData>(); |
306 | |
307 | output->position = PixelData(size, size, 1, PF_RGBA32F); |
308 | output->color = PixelData(size, size, 1, PF_RGBA8); |
309 | output->size = PixelData(size, size, 1, PF_RGBA16F); |
310 | output->rotation = PixelData(size, size, 1, PF_RGBA16F); |
311 | |
312 | // Note: Potentially allocate them all in one large block |
313 | output->position.allocateInternalBuffer(); |
314 | output->color.allocateInternalBuffer(); |
315 | output->size.allocateInternalBuffer(); |
316 | output->rotation.allocateInternalBuffer(); |
317 | |
318 | return output; |
319 | } |
320 | |
321 | /** Allocates a new set of GPU buffers of the provided @p size width and height. */ |
322 | ParticleGPUSimulationData* createNewBuffersGPU() |
323 | { |
324 | return mGPUAlloc.construct<ParticleGPUSimulationData>(); |
325 | } |
326 | |
327 | UnorderedMap<UINT32, BuffersPerSize> mBillboardBufferList; |
328 | UnorderedMap<UINT32, BuffersPerSize> mMeshBufferList; |
329 | Vector<ParticleGPUSimulationData*> mGPUBufferList; |
330 | UINT32 mNextFreeGPUBuffer = 0; |
331 | |
332 | PoolAlloc<sizeof(ParticleBillboardRenderData), 32, 4, true> mBillboardAlloc; |
333 | PoolAlloc<sizeof(ParticleMeshRenderData), 32, 4, true> mMeshAlloc; |
334 | PoolAlloc<sizeof(ParticleGPUSimulationData), 32, 4, true> mGPUAlloc; |
335 | Mutex mMutex; |
336 | }; |
337 | |
338 | struct ParticleManager::Members |
339 | { |
340 | // TODO - Perhaps sharing one pool is better |
341 | ParticleSimulationDataPool simDataPool[CoreThread::NUM_SYNC_BUFFERS]; |
342 | }; |
343 | |
344 | ParticleManager::ParticleManager() |
345 | :m(bs_new<Members>()) |
346 | { } |
347 | |
348 | ParticleManager::~ParticleManager() |
349 | { |
350 | bs_delete(m); |
351 | } |
352 | |
353 | ParticlePerFrameData* ParticleManager::update(const EvaluatedAnimationData& animData) |
354 | { |
355 | // Note: Allow the worker threads to work alongside the main thread? Would require extra synchronization but |
356 | // potentially no benefit? |
357 | |
358 | // Advance the buffers (last write buffer becomes read buffer) |
359 | if (mSwapBuffers) |
360 | { |
361 | mReadBufferIdx = (mReadBufferIdx + 1) % CoreThread::NUM_SYNC_BUFFERS; |
362 | mWriteBufferIdx = (mWriteBufferIdx + 1) % CoreThread::NUM_SYNC_BUFFERS; |
363 | |
364 | mSwapBuffers = false; |
365 | } |
366 | |
367 | if(mPaused) |
368 | return &mSimulationData[mReadBufferIdx]; |
369 | |
370 | // TODO - Perform culling (but only on deterministic particle systems). In which case cache the bounds so we don't |
371 | // need to recalculate them below |
372 | |
373 | // Prepare the write buffer |
374 | ParticlePerFrameData& simulationData = mSimulationData[mWriteBufferIdx]; |
375 | simulationData.cpuData.clear(); |
376 | simulationData.gpuData.clear(); |
377 | |
378 | // Queue evaluation tasks |
379 | { |
380 | Lock lock(mMutex); |
381 | mNumActiveWorkers = (UINT32)mSystems.size(); |
382 | } |
383 | |
384 | float timeDelta = gTime().getFrameDelta(); |
385 | |
386 | ParticleSimulationDataPool& simDataPool = m->simDataPool[mWriteBufferIdx]; |
387 | simDataPool.clear(); |
388 | |
389 | for (auto& system : mSystems) |
390 | { |
391 | const auto evaluateWorker = [this, timeDelta, system, &animData, &simDataPool, &simulationData]() |
392 | { |
393 | // Advance the simulation |
394 | system->_simulate(timeDelta, &animData); |
395 | |
396 | ParticleRenderData* simulationDataCPU = nullptr; |
397 | ParticleGPUSimulationData* simulationDataGPU = nullptr; |
398 | if(system->mParticleSet) |
399 | { |
400 | // Generate simulation data to transfer to the core thread |
401 | const UINT32 numParticles = system->mParticleSet->getParticleCount(); |
402 | const ParticleSystemSettings& settings = system->getSettings(); |
403 | |
404 | if(settings.gpuSimulation) |
405 | simulationDataGPU = simDataPool.allocGPU(*system->mParticleSet); |
406 | else |
407 | { |
408 | if(settings.renderMode == ParticleRenderMode::Billboard) |
409 | simulationDataCPU = simDataPool.allocCPUBillboard(*system->mParticleSet); |
410 | else |
411 | simulationDataCPU = simDataPool.allocCPUMesh(*system->mParticleSet); |
412 | |
413 | simulationDataCPU->numParticles = numParticles; |
414 | |
415 | if(settings.useAutomaticBounds) |
416 | simulationDataCPU->bounds = system->_calculateBounds(); |
417 | else |
418 | simulationDataCPU->bounds = settings.customBounds; |
419 | |
420 | // If using a camera-independant sorting mode, sort the particles right away |
421 | switch (settings.sortMode) |
422 | { |
423 | default: |
424 | case ParticleSortMode::None: // No sort, just point the indices back to themselves |
425 | for (UINT32 i = 0; i < numParticles; i++) |
426 | simulationDataCPU->indices[i] = i; |
427 | break; |
428 | case ParticleSortMode::OldToYoung: |
429 | case ParticleSortMode::YoungToOld: |
430 | sortParticles(*system->mParticleSet, settings.sortMode, Vector3::ZERO, simulationDataCPU->indices.data()); |
431 | break; |
432 | case ParticleSortMode::Distance: break; |
433 | } |
434 | } |
435 | } |
436 | |
437 | { |
438 | Lock lock(mMutex); |
439 | |
440 | assert(mNumActiveWorkers > 0); |
441 | mNumActiveWorkers--; |
442 | |
443 | if(simulationDataCPU) |
444 | simulationData.cpuData[system->mId] = simulationDataCPU; |
445 | else if(simulationDataGPU) |
446 | simulationData.gpuData[system->mId] = simulationDataGPU; |
447 | } |
448 | |
449 | mWorkerDoneSignal.notify_one(); |
450 | }; |
451 | |
452 | SPtr<Task> task = Task::create("ParticleWorker" , evaluateWorker); |
453 | TaskScheduler::instance().addTask(task); |
454 | } |
455 | |
456 | // Wait for tasks to complete |
457 | TaskScheduler::instance().addWorker(); // Make the current core available for work (since this thread waits) |
458 | { |
459 | Lock lock(mMutex); |
460 | |
461 | while (mNumActiveWorkers > 0) |
462 | mWorkerDoneSignal.wait(lock); |
463 | } |
464 | TaskScheduler::instance().removeWorker(); |
465 | |
466 | mSwapBuffers = true; |
467 | |
468 | return &mSimulationData[mWriteBufferIdx]; |
469 | } |
470 | |
471 | void ParticleManager::sortParticles(const ParticleSet& set, ParticleSortMode sortMode, const Vector3& viewPoint, |
472 | UINT32* indices) |
473 | { |
474 | assert(sortMode != ParticleSortMode::None); |
475 | |
476 | struct ParticleSortData |
477 | { |
478 | ParticleSortData(float key, UINT32 idx) |
479 | :key(key), idx(idx) |
480 | { } |
481 | |
482 | float key; |
483 | UINT32 idx; |
484 | }; |
485 | |
486 | const UINT32 count = set.getParticleCount(); |
487 | const ParticleSetData& particles = set.getParticles(); |
488 | |
489 | bs_frame_mark(); |
490 | { |
491 | FrameVector<ParticleSortData> sortData; |
492 | sortData.reserve(count); |
493 | |
494 | switch(sortMode) |
495 | { |
496 | default: |
497 | case ParticleSortMode::Distance: |
498 | for(UINT32 i = 0; i < count; i++) |
499 | { |
500 | float distance = viewPoint.squaredDistance(particles.position[i]); |
501 | sortData.emplace_back(distance, i); |
502 | } |
503 | break; |
504 | case ParticleSortMode::OldToYoung: |
505 | for(UINT32 i = 0; i < count; i++) |
506 | { |
507 | float lifetime = particles.lifetime[i]; |
508 | sortData.emplace_back(lifetime, i); |
509 | } |
510 | break; |
511 | case ParticleSortMode::YoungToOld: |
512 | for(UINT32 i = 0; i < count; i++) |
513 | { |
514 | float lifetime = particles.initialLifetime[i] - particles.lifetime[i]; |
515 | sortData.emplace_back(lifetime, i); |
516 | } |
517 | break; |
518 | } |
519 | |
520 | std::sort(sortData.begin(), sortData.end(), |
521 | [](const ParticleSortData& lhs, const ParticleSortData& rhs) |
522 | { |
523 | return rhs.key < lhs.key; |
524 | }); |
525 | |
526 | for (UINT32 i = 0; i < count; i++) |
527 | indices[i] = sortData[i].idx; |
528 | } |
529 | bs_frame_clear(); |
530 | } |
531 | |
532 | UINT32 ParticleManager::registerParticleSystem(ParticleSystem* system) |
533 | { |
534 | mSystems.insert(system); |
535 | |
536 | return mNextId++; |
537 | } |
538 | |
539 | void ParticleManager::unregisterParticleSystem(ParticleSystem* system) |
540 | { |
541 | mSystems.erase(system); |
542 | } |
543 | } |
544 | |