StreamBuffer.cpp source code [LOVE/modules/graphics/opengl/StreamBuffer.cpp]

1	/**
2	* Copyright (c) 2006-2023 LOVE Development Team
3	*
4	* This software is provided 'as-is', without any express or implied
5	* warranty. In no event will the authors be held liable for any damages
6	* arising from the use of this software.
7	*
8	* Permission is granted to anyone to use this software for any purpose,
9	* including commercial applications, and to alter it and redistribute it
10	* freely, subject to the following restrictions:
11	*
12	* 1. The origin of this software must not be misrepresented; you must not
13	* claim that you wrote the original software. If you use this software
14	* in a product, an acknowledgment in the product documentation would be
15	* appreciated but is not required.
16	* 2. Altered source versions must be plainly marked as such, and must not be
17	* misrepresented as being the original software.
18	* 3. This notice may not be removed or altered from any source distribution.
19	**/
20
21	#include "common/config.h"
22	#include "StreamBuffer.h"
23	#include "OpenGL.h"
24	#include "FenceSync.h"
25	#include "graphics/Volatile.h"
26	#include "common/Exception.h"
27	#include "common/memory.h"
28
29	#include <vector>
30	#include <algorithm>
31
32	namespace love
33	{
34	namespace graphics
35	{
36	namespace opengl
37	{
38
39	// Typically this should be 3 frames, but we only do per-frame syncing right now
40	// so we add an extra frame to reduce the (small) chance of stalls.
41	static const int BUFFER_FRAMES = `4`;
42
43	class StreamBufferClientMemory final : public love::graphics::StreamBuffer
44	{
45	public:
46
47	StreamBufferClientMemory(BufferType mode, size_t size)
48	: love::graphics::StreamBuffer (mode, size)
49	, data(nullptr)
50	{
51	try
52	{
53	data = new uint8[size];
54	}
55	catch (std::exception &)
56	{
57	throw love::Exception ("Out of memory.");
58	}
59	}
60
61	virtual ~StreamBufferClientMemory()
62	{
63	delete[] data;
64	}
65
66	MapInfo map(size_t /minsize/) override
67	{
68	return MapInfo (data, bufferSize);
69	}
70
71	size_t unmap(size_t /usedsize/) override
72	{
73	return (size_t) data;
74	}
75
76	void markUsed(size_t /usedsize/) override { }
77	ptrdiff_t getHandle() const override { return `0`; }
78
79	private:
80
81	uint8 *data;
82
83	}; // StreamBufferClientMemory
84
85	class StreamBufferSubDataOrphan final : public love::graphics::StreamBuffer, public Volatile
86	{
87	public:
88
89	StreamBufferSubDataOrphan(BufferType mode, size_t size)
90	: love::graphics::StreamBuffer (mode, size)
91	, vbo(`0`)
92	, glMode(OpenGL::getGLBufferType(mode))
93	, data(nullptr)
94	, orphan(false)
95	{
96	try
97	{
98	data = new uint8[size];
99	}
100	catch (std::exception &)
101	{
102	throw love::Exception ("Out of memory.");
103	}
104
105	loadVolatile();
106	}
107
108	virtual ~StreamBufferSubDataOrphan()
109	{
110	unloadVolatile();
111	delete[] data;
112	}
113
114	MapInfo map(size_t /minsize/) override
115	{
116	if (orphan)
117	{
118	orphan = false;
119	frameGPUReadOffset = `0`;
120	gl.bindBuffer(mode, vbo);
121	glBufferData(glMode, bufferSize, nullptr, GL_STREAM_DRAW);
122	}
123
124	return MapInfo (data, bufferSize - frameGPUReadOffset);
125	}
126
127	size_t unmap(size_t usedsize) override
128	{
129	gl.bindBuffer(mode, vbo);
130	glBufferSubData(glMode, frameGPUReadOffset, usedsize, data);
131	return frameGPUReadOffset;
132	}
133
134	void markUsed(size_t usedsize) override
135	{
136	frameGPUReadOffset += usedsize;
137	}
138
139	void nextFrame() override
140	{
141	// Orphan the buffer before its first use in the next frame.
142	frameGPUReadOffset = `0`;
143	orphan = true;
144	}
145
146	ptrdiff_t getHandle() const override { return vbo; }
147
148	bool loadVolatile() override
149	{
150	if (vbo != `0`)
151	return true;
152
153	glGenBuffers(`1`, &vbo);
154	gl.bindBuffer(mode, vbo);
155	glBufferData(glMode, bufferSize, nullptr, GL_STREAM_DRAW);
156
157	frameGPUReadOffset = `0`;
158	orphan = false;
159
160	return true;
161	}
162
163	void unloadVolatile() override
164	{
165	if (vbo == `0`)
166	return;
167
168	gl.deleteBuffer(vbo);
169	vbo = `0`;
170	}
171
172	protected:
173
174	GLuint vbo;
175	GLenum glMode;
176
177	uint8 *data;
178
179	bool orphan;
180
181	}; // StreamBufferSubDataOrphan
182
183	class StreamBufferSync : public love::graphics::StreamBuffer
184	{
185	public:
186
187	StreamBufferSync(BufferType type, size_t size)
188	: love::graphics::StreamBuffer (type, size)
189	, frameIndex(`0`)
190	, syncs ()
191	{}
192
193	virtual ~StreamBufferSync() {}
194
195	void nextFrame() override
196	{
197	// Insert a GPU fence for this frame's section of the data, we'll wait
198	// for it when we try to map that data for writing in subsequent frames.
199	syncs[frameIndex].fence();
200
201	frameIndex = (frameIndex + `1`) % BUFFER_FRAMES;
202	frameGPUReadOffset = `0`;
203	}
204
205	void markUsed(size_t usedsize) override
206	{
207	// We insert a fence for all data from this frame at the end of the
208	// frame (in nextFrame), rather than doing anything more fine-grained.
209	frameGPUReadOffset += usedsize;
210	}
211
212	protected:
213
214	int frameIndex;
215	FenceSync syncs[BUFFER_FRAMES];
216
217	}; // StreamBufferSync
218
219	class StreamBufferMapSync final : public StreamBufferSync, public Volatile
220	{
221	public:
222
223	StreamBufferMapSync(BufferType type, size_t size)
224	: StreamBufferSync (type, size)
225	, vbo(`0`)
226	, glMode(OpenGL::getGLBufferType(mode))
227	{
228	loadVolatile();
229	}
230
231	~StreamBufferMapSync()
232	{
233	unloadVolatile();
234	}
235
236	MapInfo map(size_t /minsize/) override
237	{
238	gl.bindBuffer(mode, vbo);
239
240	// Make sure this frame's section of the buffer is done being used.
241	syncs[frameIndex].cpuWait();
242
243	MapInfo info;
244	info.size = bufferSize - frameGPUReadOffset;
245
246	GLbitfield flags = GL_MAP_WRITE_BIT \| GL_MAP_FLUSH_EXPLICIT_BIT \| GL_MAP_UNSYNCHRONIZED_BIT;
247
248	size_t mapoffset = (frameIndex * bufferSize) + frameGPUReadOffset;
249	info.data = (uint8 *) glMapBufferRange(glMode, mapoffset, info.size, flags);
250
251	return info;
252	}
253
254	size_t unmap(size_t usedsize) override
255	{
256	gl.bindBuffer(mode, vbo);
257	glFlushMappedBufferRange(glMode, `0`, usedsize);
258	glUnmapBuffer(glMode);
259
260	return (frameIndex * bufferSize) + frameGPUReadOffset;
261	}
262
263	ptrdiff_t getHandle() const override { return vbo; }
264
265	bool loadVolatile() override
266	{
267	if (vbo != `0`)
268	return true;
269
270	glGenBuffers(`1`, &vbo);
271	gl.bindBuffer(mode, vbo);
272	glBufferData(glMode, bufferSize * BUFFER_FRAMES, nullptr, GL_STREAM_DRAW);
273
274	frameGPUReadOffset = `0`;
275	frameIndex = `0`;
276
277	return true;
278	}
279
280	void unloadVolatile() override
281	{
282	if (vbo != `0`)
283	{
284	gl.deleteBuffer(vbo);
285	vbo = `0`;
286	}
287
288	for (FenceSync &sync : syncs)
289	sync.cleanup();
290	}
291
292	private:
293
294	GLuint vbo;
295	GLenum glMode;
296
297	}; // StreamBufferMapSync
298
299	class StreamBufferPersistentMapSync final : public StreamBufferSync, public Volatile
300	{
301	public:
302
303	// Coherent mapping is supposedly faster on intel/nvidia aside from a couple
304	// old nvidia GPUs.
305	StreamBufferPersistentMapSync(BufferType type, size_t size, bool coherent = true)
306	: StreamBufferSync (type, size)
307	, vbo(`0`)
308	, glMode(OpenGL::getGLBufferType(mode))
309	, data(nullptr)
310	, coherent(coherent)
311	{
312	loadVolatile();
313	}
314
315	~StreamBufferPersistentMapSync()
316	{
317	unloadVolatile();
318	}
319
320	MapInfo map(size_t /minsize/) override
321	{
322	// Make sure this frame's section of the buffer is done being used.
323	syncs[frameIndex].cpuWait();
324
325	MapInfo info;
326	info.size = bufferSize - frameGPUReadOffset;
327	info.data = data + (frameIndex * bufferSize) + frameGPUReadOffset;
328	return info;
329	}
330
331	size_t unmap(size_t usedsize) override
332	{
333	size_t offset = (frameIndex * bufferSize) + frameGPUReadOffset;
334
335	if (!coherent)
336	{
337	gl.bindBuffer(mode, vbo);
338	glFlushMappedBufferRange(glMode, offset, usedsize);
339	}
340
341	return offset;
342	}
343
344	ptrdiff_t getHandle() const override { return vbo; }
345
346	bool loadVolatile() override
347	{
348	if (vbo != `0`)
349	return true;
350
351	glGenBuffers(`1`, &vbo);
352	gl.bindBuffer(mode, vbo);
353
354	GLbitfield storageflags = GL_MAP_WRITE_BIT \| GL_MAP_PERSISTENT_BIT;
355	GLbitfield mapflags = GL_MAP_WRITE_BIT \| GL_MAP_PERSISTENT_BIT;
356
357	storageflags \|= (coherent ? GL_MAP_COHERENT_BIT : `0`);
358	mapflags \|= (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT);
359
360	glBufferStorage(glMode, bufferSize * BUFFER_FRAMES, nullptr, storageflags);
361	data = (uint8 ) glMapBufferRange(glMode, `0`, bufferSize BUFFER_FRAMES, mapflags);
362
363	frameGPUReadOffset = `0`;
364	frameIndex = `0`;
365
366	return true;
367	}
368
369	void unloadVolatile() override
370	{
371	if (vbo != `0`)
372	{
373	gl.bindBuffer(mode, vbo);
374	glUnmapBuffer(glMode);
375	gl.deleteBuffer(vbo);
376	vbo = `0`;
377	}
378
379	for (FenceSync &sync : syncs)
380	sync.cleanup();
381	}
382
383	private:
384
385	GLuint vbo;
386	GLenum glMode;
387	uint8 *data;
388	bool coherent;
389
390	}; // StreamBufferPersistentMapSync
391
392	class StreamBufferPinnedMemory final : public StreamBufferSync, public Volatile
393	{
394	public:
395
396	StreamBufferPinnedMemory(BufferType type, size_t size)
397	: StreamBufferSync (type, size)
398	, vbo(`0`)
399	, glMode(OpenGL::getGLBufferType(mode))
400	, data(nullptr)
401	, alignedSize(`0`)
402	{
403	size_t alignment = getPageSize();
404	alignedSize = alignUp(size * BUFFER_FRAMES, alignment);
405
406	if (!alignedMalloc((void **) &data, alignedSize, alignment))
407	throw love::Exception ("Out of memory.");
408
409	if (!loadVolatile())
410	{
411	ptrdiff_t pointer = (ptrdiff_t) data;
412	alignedFree(data);
413	throw love::Exception ("AMD Pinned Memory StreamBuffer implementation failed to create buffer (address: %p, alignment: %ld, aiigned size: %ld)", pointer, alignment, alignedSize);
414	}
415	}
416
417	~StreamBufferPinnedMemory()
418	{
419	unloadVolatile();
420	alignedFree(data);
421	}
422
423	MapInfo map(size_t /minsize/) override
424	{
425	// Make sure this frame's section of the buffer is done being used.
426	syncs[frameIndex].cpuWait();
427
428	MapInfo info;
429	info.size = bufferSize - frameGPUReadOffset;
430	info.data = data + (frameIndex * bufferSize) + frameGPUReadOffset;
431	return info;
432	}
433
434	size_t unmap(size_t /usedsize/) override
435	{
436	size_t offset = (frameIndex * bufferSize) + frameGPUReadOffset;
437	return offset;
438	}
439
440	ptrdiff_t getHandle() const override { return vbo; }
441
442	bool loadVolatile() override
443	{
444	if (vbo != `0`)
445	return true;
446
447	glGenBuffers(`1`, &vbo);
448
449	while (glGetError() != GL_NO_ERROR)
450	/ Clear errors. /;
451
452	glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, vbo);
453	glBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, alignedSize, data, GL_STREAM_DRAW);
454
455	if (glGetError() != GL_NO_ERROR)
456	{
457	gl.deleteBuffer(vbo);
458	vbo = `0`;
459	return false;
460	}
461
462	frameGPUReadOffset = `0`;
463	frameIndex = `0`;
464
465	return true;
466	}
467
468	void unloadVolatile() override
469	{
470	if (vbo != `0`)
471	{
472	// Make sure the GPU has completed all work before freeing the
473	// memory. glFlush+sync.cpuWait doesn't seem to be enough.
474	glFinish();
475
476	gl.bindBuffer(mode, vbo);
477	gl.deleteBuffer(vbo);
478	vbo = `0`;
479	}
480
481	for (FenceSync &sync : syncs)
482	sync.cleanup();
483	}
484
485	private:
486
487	GLuint vbo;
488	GLenum glMode;
489	uint8 *data;
490	size_t alignedSize;
491
492	}; // StreamBufferPinnedMemory
493
494	love::graphics::StreamBuffer *CreateStreamBuffer(BufferType mode, size_t size)
495	{
496	if (gl.isCoreProfile())
497	{
498	if (!gl.bugs.clientWaitSyncStalls)
499	{
500	// AMD's pinned memory seems to be faster than persistent mapping,
501	// on AMD GPUs.
502	if (GLAD_AMD_pinned_memory && gl.getVendor() == OpenGL::VENDOR_AMD)
503	{
504	try
505	{
506	return new StreamBufferPinnedMemory (mode, size);
507	}
508	catch (love::Exception &)
509	{
510	// According to the spec, pinned memory can fail if the RAM
511	// allocation can't be mapped to the GPU's address space.
512	// This seems to happen in practice on Mesa + amdgpu:
513	// https://bitbucket.org/rude/love/issues/1540
514	// Fall through to other implementations when that happens.
515	}
516	}
517
518	if (GLAD_VERSION_4_4 \|\| GLAD_ARB_buffer_storage)
519	return new StreamBufferPersistentMapSync (mode, size);
520
521	// Most modern drivers have a separate internal thread which queues
522	// GL commands for the GPU. The queue causes mapping to stall until
523	// the items in the queue are flushed, which makes this approach
524	// slow on most drivers. On macOS, having a separate driver thread
525	// is opt-in via an API, and we don't do it, so we can use this
526	// instead of the (potentially slower) SubData approach.
527	#ifdef LOVE_MACOSX
528	return new StreamBufferMapSync(mode, size);
529	#endif
530	}
531
532	return new StreamBufferSubDataOrphan (mode, size);
533	}
534	else
535	return new StreamBufferClientMemory (mode, size);
536	}
537
538	} // opengl
539	} // graphics
540	} // love
541

Browse the source code of LOVE/modules/graphics/opengl/StreamBuffer.cpp