1/**
2 * Copyright (c) 2006-2023 LOVE Development Team
3 *
4 * This software is provided 'as-is', without any express or implied
5 * warranty. In no event will the authors be held liable for any damages
6 * arising from the use of this software.
7 *
8 * Permission is granted to anyone to use this software for any purpose,
9 * including commercial applications, and to alter it and redistribute it
10 * freely, subject to the following restrictions:
11 *
12 * 1. The origin of this software must not be misrepresented; you must not
13 * claim that you wrote the original software. If you use this software
14 * in a product, an acknowledgment in the product documentation would be
15 * appreciated but is not required.
16 * 2. Altered source versions must be plainly marked as such, and must not be
17 * misrepresented as being the original software.
18 * 3. This notice may not be removed or altered from any source distribution.
19 **/
20
21#include "common/config.h"
22#include "StreamBuffer.h"
23#include "OpenGL.h"
24#include "FenceSync.h"
25#include "graphics/Volatile.h"
26#include "common/Exception.h"
27#include "common/memory.h"
28
29#include <vector>
30#include <algorithm>
31
32namespace love
33{
34namespace graphics
35{
36namespace opengl
37{
38
39// Typically this should be 3 frames, but we only do per-frame syncing right now
40// so we add an extra frame to reduce the (small) chance of stalls.
41static const int BUFFER_FRAMES = 4;
42
43class StreamBufferClientMemory final : public love::graphics::StreamBuffer
44{
45public:
46
47 StreamBufferClientMemory(BufferType mode, size_t size)
48 : love::graphics::StreamBuffer(mode, size)
49 , data(nullptr)
50 {
51 try
52 {
53 data = new uint8[size];
54 }
55 catch (std::exception &)
56 {
57 throw love::Exception("Out of memory.");
58 }
59 }
60
61 virtual ~StreamBufferClientMemory()
62 {
63 delete[] data;
64 }
65
66 MapInfo map(size_t /*minsize*/) override
67 {
68 return MapInfo(data, bufferSize);
69 }
70
71 size_t unmap(size_t /*usedsize*/) override
72 {
73 return (size_t) data;
74 }
75
76 void markUsed(size_t /*usedsize*/) override { }
77 ptrdiff_t getHandle() const override { return 0; }
78
79private:
80
81 uint8 *data;
82
83}; // StreamBufferClientMemory
84
85class StreamBufferSubDataOrphan final : public love::graphics::StreamBuffer, public Volatile
86{
87public:
88
89 StreamBufferSubDataOrphan(BufferType mode, size_t size)
90 : love::graphics::StreamBuffer(mode, size)
91 , vbo(0)
92 , glMode(OpenGL::getGLBufferType(mode))
93 , data(nullptr)
94 , orphan(false)
95 {
96 try
97 {
98 data = new uint8[size];
99 }
100 catch (std::exception &)
101 {
102 throw love::Exception("Out of memory.");
103 }
104
105 loadVolatile();
106 }
107
108 virtual ~StreamBufferSubDataOrphan()
109 {
110 unloadVolatile();
111 delete[] data;
112 }
113
114 MapInfo map(size_t /*minsize*/) override
115 {
116 if (orphan)
117 {
118 orphan = false;
119 frameGPUReadOffset = 0;
120 gl.bindBuffer(mode, vbo);
121 glBufferData(glMode, bufferSize, nullptr, GL_STREAM_DRAW);
122 }
123
124 return MapInfo(data, bufferSize - frameGPUReadOffset);
125 }
126
127 size_t unmap(size_t usedsize) override
128 {
129 gl.bindBuffer(mode, vbo);
130 glBufferSubData(glMode, frameGPUReadOffset, usedsize, data);
131 return frameGPUReadOffset;
132 }
133
134 void markUsed(size_t usedsize) override
135 {
136 frameGPUReadOffset += usedsize;
137 }
138
139 void nextFrame() override
140 {
141 // Orphan the buffer before its first use in the next frame.
142 frameGPUReadOffset = 0;
143 orphan = true;
144 }
145
146 ptrdiff_t getHandle() const override { return vbo; }
147
148 bool loadVolatile() override
149 {
150 if (vbo != 0)
151 return true;
152
153 glGenBuffers(1, &vbo);
154 gl.bindBuffer(mode, vbo);
155 glBufferData(glMode, bufferSize, nullptr, GL_STREAM_DRAW);
156
157 frameGPUReadOffset = 0;
158 orphan = false;
159
160 return true;
161 }
162
163 void unloadVolatile() override
164 {
165 if (vbo == 0)
166 return;
167
168 gl.deleteBuffer(vbo);
169 vbo = 0;
170 }
171
172protected:
173
174 GLuint vbo;
175 GLenum glMode;
176
177 uint8 *data;
178
179 bool orphan;
180
181}; // StreamBufferSubDataOrphan
182
183class StreamBufferSync : public love::graphics::StreamBuffer
184{
185public:
186
187 StreamBufferSync(BufferType type, size_t size)
188 : love::graphics::StreamBuffer(type, size)
189 , frameIndex(0)
190 , syncs()
191 {}
192
193 virtual ~StreamBufferSync() {}
194
195 void nextFrame() override
196 {
197 // Insert a GPU fence for this frame's section of the data, we'll wait
198 // for it when we try to map that data for writing in subsequent frames.
199 syncs[frameIndex].fence();
200
201 frameIndex = (frameIndex + 1) % BUFFER_FRAMES;
202 frameGPUReadOffset = 0;
203 }
204
205 void markUsed(size_t usedsize) override
206 {
207 // We insert a fence for all data from this frame at the end of the
208 // frame (in nextFrame), rather than doing anything more fine-grained.
209 frameGPUReadOffset += usedsize;
210 }
211
212protected:
213
214 int frameIndex;
215 FenceSync syncs[BUFFER_FRAMES];
216
217}; // StreamBufferSync
218
219class StreamBufferMapSync final : public StreamBufferSync, public Volatile
220{
221public:
222
223 StreamBufferMapSync(BufferType type, size_t size)
224 : StreamBufferSync(type, size)
225 , vbo(0)
226 , glMode(OpenGL::getGLBufferType(mode))
227 {
228 loadVolatile();
229 }
230
231 ~StreamBufferMapSync()
232 {
233 unloadVolatile();
234 }
235
236 MapInfo map(size_t /*minsize*/) override
237 {
238 gl.bindBuffer(mode, vbo);
239
240 // Make sure this frame's section of the buffer is done being used.
241 syncs[frameIndex].cpuWait();
242
243 MapInfo info;
244 info.size = bufferSize - frameGPUReadOffset;
245
246 GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT | GL_MAP_UNSYNCHRONIZED_BIT;
247
248 size_t mapoffset = (frameIndex * bufferSize) + frameGPUReadOffset;
249 info.data = (uint8 *) glMapBufferRange(glMode, mapoffset, info.size, flags);
250
251 return info;
252 }
253
254 size_t unmap(size_t usedsize) override
255 {
256 gl.bindBuffer(mode, vbo);
257 glFlushMappedBufferRange(glMode, 0, usedsize);
258 glUnmapBuffer(glMode);
259
260 return (frameIndex * bufferSize) + frameGPUReadOffset;
261 }
262
263 ptrdiff_t getHandle() const override { return vbo; }
264
265 bool loadVolatile() override
266 {
267 if (vbo != 0)
268 return true;
269
270 glGenBuffers(1, &vbo);
271 gl.bindBuffer(mode, vbo);
272 glBufferData(glMode, bufferSize * BUFFER_FRAMES, nullptr, GL_STREAM_DRAW);
273
274 frameGPUReadOffset = 0;
275 frameIndex = 0;
276
277 return true;
278 }
279
280 void unloadVolatile() override
281 {
282 if (vbo != 0)
283 {
284 gl.deleteBuffer(vbo);
285 vbo = 0;
286 }
287
288 for (FenceSync &sync : syncs)
289 sync.cleanup();
290 }
291
292private:
293
294 GLuint vbo;
295 GLenum glMode;
296
297}; // StreamBufferMapSync
298
299class StreamBufferPersistentMapSync final : public StreamBufferSync, public Volatile
300{
301public:
302
303 // Coherent mapping is supposedly faster on intel/nvidia aside from a couple
304 // old nvidia GPUs.
305 StreamBufferPersistentMapSync(BufferType type, size_t size, bool coherent = true)
306 : StreamBufferSync(type, size)
307 , vbo(0)
308 , glMode(OpenGL::getGLBufferType(mode))
309 , data(nullptr)
310 , coherent(coherent)
311 {
312 loadVolatile();
313 }
314
315 ~StreamBufferPersistentMapSync()
316 {
317 unloadVolatile();
318 }
319
320 MapInfo map(size_t /*minsize*/) override
321 {
322 // Make sure this frame's section of the buffer is done being used.
323 syncs[frameIndex].cpuWait();
324
325 MapInfo info;
326 info.size = bufferSize - frameGPUReadOffset;
327 info.data = data + (frameIndex * bufferSize) + frameGPUReadOffset;
328 return info;
329 }
330
331 size_t unmap(size_t usedsize) override
332 {
333 size_t offset = (frameIndex * bufferSize) + frameGPUReadOffset;
334
335 if (!coherent)
336 {
337 gl.bindBuffer(mode, vbo);
338 glFlushMappedBufferRange(glMode, offset, usedsize);
339 }
340
341 return offset;
342 }
343
344 ptrdiff_t getHandle() const override { return vbo; }
345
346 bool loadVolatile() override
347 {
348 if (vbo != 0)
349 return true;
350
351 glGenBuffers(1, &vbo);
352 gl.bindBuffer(mode, vbo);
353
354 GLbitfield storageflags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
355 GLbitfield mapflags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
356
357 storageflags |= (coherent ? GL_MAP_COHERENT_BIT : 0);
358 mapflags |= (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT);
359
360 glBufferStorage(glMode, bufferSize * BUFFER_FRAMES, nullptr, storageflags);
361 data = (uint8 *) glMapBufferRange(glMode, 0, bufferSize * BUFFER_FRAMES, mapflags);
362
363 frameGPUReadOffset = 0;
364 frameIndex = 0;
365
366 return true;
367 }
368
369 void unloadVolatile() override
370 {
371 if (vbo != 0)
372 {
373 gl.bindBuffer(mode, vbo);
374 glUnmapBuffer(glMode);
375 gl.deleteBuffer(vbo);
376 vbo = 0;
377 }
378
379 for (FenceSync &sync : syncs)
380 sync.cleanup();
381 }
382
383private:
384
385 GLuint vbo;
386 GLenum glMode;
387 uint8 *data;
388 bool coherent;
389
390}; // StreamBufferPersistentMapSync
391
392class StreamBufferPinnedMemory final : public StreamBufferSync, public Volatile
393{
394public:
395
396 StreamBufferPinnedMemory(BufferType type, size_t size)
397 : StreamBufferSync(type, size)
398 , vbo(0)
399 , glMode(OpenGL::getGLBufferType(mode))
400 , data(nullptr)
401 , alignedSize(0)
402 {
403 size_t alignment = getPageSize();
404 alignedSize = alignUp(size * BUFFER_FRAMES, alignment);
405
406 if (!alignedMalloc((void **) &data, alignedSize, alignment))
407 throw love::Exception("Out of memory.");
408
409 if (!loadVolatile())
410 {
411 ptrdiff_t pointer = (ptrdiff_t) data;
412 alignedFree(data);
413 throw love::Exception("AMD Pinned Memory StreamBuffer implementation failed to create buffer (address: %p, alignment: %ld, aiigned size: %ld)", pointer, alignment, alignedSize);
414 }
415 }
416
417 ~StreamBufferPinnedMemory()
418 {
419 unloadVolatile();
420 alignedFree(data);
421 }
422
423 MapInfo map(size_t /*minsize*/) override
424 {
425 // Make sure this frame's section of the buffer is done being used.
426 syncs[frameIndex].cpuWait();
427
428 MapInfo info;
429 info.size = bufferSize - frameGPUReadOffset;
430 info.data = data + (frameIndex * bufferSize) + frameGPUReadOffset;
431 return info;
432 }
433
434 size_t unmap(size_t /*usedsize*/) override
435 {
436 size_t offset = (frameIndex * bufferSize) + frameGPUReadOffset;
437 return offset;
438 }
439
440 ptrdiff_t getHandle() const override { return vbo; }
441
442 bool loadVolatile() override
443 {
444 if (vbo != 0)
445 return true;
446
447 glGenBuffers(1, &vbo);
448
449 while (glGetError() != GL_NO_ERROR)
450 /* Clear errors. */;
451
452 glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, vbo);
453 glBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, alignedSize, data, GL_STREAM_DRAW);
454
455 if (glGetError() != GL_NO_ERROR)
456 {
457 gl.deleteBuffer(vbo);
458 vbo = 0;
459 return false;
460 }
461
462 frameGPUReadOffset = 0;
463 frameIndex = 0;
464
465 return true;
466 }
467
468 void unloadVolatile() override
469 {
470 if (vbo != 0)
471 {
472 // Make sure the GPU has completed all work before freeing the
473 // memory. glFlush+sync.cpuWait doesn't seem to be enough.
474 glFinish();
475
476 gl.bindBuffer(mode, vbo);
477 gl.deleteBuffer(vbo);
478 vbo = 0;
479 }
480
481 for (FenceSync &sync : syncs)
482 sync.cleanup();
483 }
484
485private:
486
487 GLuint vbo;
488 GLenum glMode;
489 uint8 *data;
490 size_t alignedSize;
491
492}; // StreamBufferPinnedMemory
493
494love::graphics::StreamBuffer *CreateStreamBuffer(BufferType mode, size_t size)
495{
496 if (gl.isCoreProfile())
497 {
498 if (!gl.bugs.clientWaitSyncStalls)
499 {
500 // AMD's pinned memory seems to be faster than persistent mapping,
501 // on AMD GPUs.
502 if (GLAD_AMD_pinned_memory && gl.getVendor() == OpenGL::VENDOR_AMD)
503 {
504 try
505 {
506 return new StreamBufferPinnedMemory(mode, size);
507 }
508 catch (love::Exception &)
509 {
510 // According to the spec, pinned memory can fail if the RAM
511 // allocation can't be mapped to the GPU's address space.
512 // This seems to happen in practice on Mesa + amdgpu:
513 // https://bitbucket.org/rude/love/issues/1540
514 // Fall through to other implementations when that happens.
515 }
516 }
517
518 if (GLAD_VERSION_4_4 || GLAD_ARB_buffer_storage)
519 return new StreamBufferPersistentMapSync(mode, size);
520
521 // Most modern drivers have a separate internal thread which queues
522 // GL commands for the GPU. The queue causes mapping to stall until
523 // the items in the queue are flushed, which makes this approach
524 // slow on most drivers. On macOS, having a separate driver thread
525 // is opt-in via an API, and we don't do it, so we can use this
526 // instead of the (potentially slower) SubData approach.
527#ifdef LOVE_MACOSX
528 return new StreamBufferMapSync(mode, size);
529#endif
530 }
531
532 return new StreamBufferSubDataOrphan(mode, size);
533 }
534 else
535 return new StreamBufferClientMemory(mode, size);
536}
537
538} // opengl
539} // graphics
540} // love
541