1//************************************ bs::framework - Copyright 2018 Marko Pintera **************************************//
2//*********** Licensed under the MIT license. See LICENSE.md for full terms. This notice is not to be removed. ***********//
3#pragma once
4
5#include "BsCorePrerequisites.h"
6#include "Utility/BsModule.h"
7#include "Profiling/BsRenderStats.h"
8#include "Allocators/BsPoolAlloc.h"
9
10namespace bs
11{
12 /** @addtogroup Profiling
13 * @{
14 */
15
16 /** Contains various profiler statistics about a single GPU profiling sample. */
17 struct GPUProfileSample
18 {
19 String name; /**< Name of the sample for easier identification. */
20 float timeMs; /**< Time in milliseconds it took to execute the sampled block. */
21
22 UINT32 numDrawCalls; /**< Number of draw calls that happened. */
23 UINT32 numRenderTargetChanges; /**< How many times was render target changed. */
24 UINT32 numPresents; /**< How many times did a buffer swap happen on a double buffered render target. */
25 UINT32 numClears; /**< How many times was render target cleared. */
26
27 UINT32 numVertices; /**< Total number of vertices sent to the GPU. */
28 UINT32 numPrimitives; /**< Total number of primitives sent to the GPU. */
29 UINT32 numDrawnSamples; /**< Number of samples drawn by the GPU. */
30
31 UINT32 numPipelineStateChanges; /**< How many times did the pipeline state change. */
32
33 UINT32 numGpuParamBinds; /**< How many times were GPU parameters bound. */
34 UINT32 numVertexBufferBinds; /**< How many times was a vertex buffer bound. */
35 UINT32 numIndexBufferBinds; /**< How many times was an index buffer bound. */
36
37 UINT32 numResourceWrites; /**< How many times were GPU resources written to. */
38 UINT32 numResourceReads; /**< How many times were GPU resources read from. */
39
40 UINT32 numObjectsCreated; /**< How many GPU objects were created. */
41 UINT32 numObjectsDestroyed; /**< How many GPU objects were destroyed. */
42
43 Vector<GPUProfileSample> children;
44 };
45
46 /** Profiler report containing information about GPU sampling data from a single frame. */
47 struct GPUProfilerReport
48 {
49 GPUProfileSample frameSample; /**< Sample containing data for entire frame. */
50 };
51
52 /**
53 * Profiler that measures time and amount of various GPU operations.
54 *
55 * @note Core thread only except where noted otherwise.
56 */
57 class BS_CORE_EXPORT ProfilerGPU : public Module<ProfilerGPU>
58 {
59 private:
60 struct ProfiledSample
61 {
62 ProfilerString name;
63 RenderStatsData startStats;
64 RenderStatsData endStats;
65 SPtr<ct::TimerQuery> activeTimeQuery;
66 SPtr<ct::OcclusionQuery> activeOcclusionQuery;
67
68 Vector<ProfiledSample*> children;
69 };
70
71 public:
72 ProfilerGPU();
73 ~ProfilerGPU();
74
75 /**
76 * Signals a start of a new frame. Every frame will generate a separate profiling report. This call must be followed
77 * by endFrame(), and any sampling operations must happen between beginFrame() and endFrame().
78 */
79 void beginFrame();
80
81 /**
82 * Signals an end of the currently sampled frame. Results of the sampling will be available once
83 * getNumAvailableReports increments. This may take a while as the sampling is scheduled on the core thread and
84 * on the GPU.
85 *
86 * @param[in] discard If true, the results of the frame will not be resolved and it will be discarded.
87 */
88 void endFrame(bool discard = false);
89
90 /**
91 * Begins sample measurement. Must be followed by endSample().
92 *
93 * @param[in] name Unique name for the sample you can later use to find the sampling data.
94 *
95 * @note Must be called between beginFrame()/endFrame() calls.
96 */
97 void beginSample(ProfilerString name);
98
99 /**
100 * Ends sample measurement.
101 *
102 * @param[in] name Unique name for the sample.
103 *
104 * @note
105 * Unique name is primarily needed to more easily identify mismatched begin/end sample pairs. Otherwise the name in
106 * beginSample() would be enough. Must be called between beginFrame()/endFrame() calls.
107 */
108 void endSample(const ProfilerString& name);
109
110 /**
111 * Returns number of profiling reports that are ready but haven't been retrieved yet.
112 *
113 * @note
114 * There is an internal limit of maximum number of available reports, where oldest ones will get deleted so make
115 * sure to call this often if you don't want to miss some.
116 * @note
117 * Thread safe.
118 */
119 UINT32 getNumAvailableReports();
120
121 /**
122 * Gets the oldest report available and removes it from the internal list. Throws an exception if no reports are
123 * available.
124 *
125 * @note Thread safe.
126 */
127 GPUProfilerReport getNextReport();
128
129 public:
130 // ***** INTERNAL ******
131 /** @name Internal
132 * @{
133 */
134
135 /**
136 * To be called once per frame from the Core thread.
137 */
138 void _update();
139
140 /** @} */
141
142 private:
143 /** Assigns start values for the provided sample. */
144 void beginSampleInternal(ProfiledSample& sample, bool issueOcclusion);
145
146 /** Assigns end values for the provided sample. */
147 void endSampleInternal(ProfiledSample& sample);
148
149 /** Creates a new timer query or returns an existing free query. */
150 SPtr<ct::TimerQuery> getTimerQuery() const;
151
152 /** Creates a new occlusion query or returns an existing free query. */
153 SPtr<ct::OcclusionQuery> getOcclusionQuery() const;
154
155 /** Frees the memory used by all the child samples. */
156 void freeSample(ProfiledSample& sample);
157
158 /** Resolves an active sample and converts it to report sample. */
159 void resolveSample(const ProfiledSample& sample, GPUProfileSample& reportSample);
160
161 private:
162 ProfiledSample mFrameSample;
163 bool mIsFrameActive = false;
164 Stack<ProfiledSample*> mActiveSamples;
165
166 Queue<ProfiledSample> mUnresolvedFrames;
167 GPUProfilerReport* mReadyReports = nullptr;
168
169 static const UINT32 MAX_QUEUE_ELEMENTS;
170 UINT32 mReportHeadPos = 0;
171 UINT32 mReportCount = 0;
172
173 PoolAlloc<sizeof(ProfiledSample), 256> mSamplePool;
174
175 mutable Stack<SPtr<ct::TimerQuery>> mFreeTimerQueries;
176 mutable Stack<SPtr<ct::OcclusionQuery>> mFreeOcclusionQueries;
177
178 Mutex mMutex;
179 };
180
181 /** Provides global access to ProfilerGPU instance. */
182 BS_CORE_EXPORT ProfilerGPU& gProfilerGPU();
183
184 /** Profiling macros that allow profiling functionality to be disabled at compile time. */
185#if BS_PROFILING_ENABLED
186 #define BS_GPU_PROFILE_BEGIN(name) gProfilerGPU().beginSample(name);
187 #define BS_GPU_PROFILE_END(name) gProfilerGPU().endSample(name);
188#else
189 #define BS_GPU_PROFILE_BEGIN(name)
190 #define BS_GPU_PROFILE_END(name)
191#endif
192
193 /**
194 * Helper class that performs GPU profiling in the current block. Profiling sample is started when the class is
195 * constructed and ended upon destruction.
196 */
197 struct ProfileGPUBlock
198 {
199#if BS_PROFILING_ENABLED
200 ProfileGPUBlock(ProfilerString name)
201 {
202 mSampleName = std::move(name);
203 gProfilerGPU().beginSample(mSampleName);
204 }
205#else
206 ProfileGPUBlock(const ProfilerString& name)
207 { }
208#endif
209
210#if BS_PROFILING_ENABLED
211 ~ProfileGPUBlock()
212 {
213 gProfilerGPU().endSample(mSampleName);
214 }
215#endif
216
217 private:
218#if BS_PROFILING_ENABLED
219 ProfilerString mSampleName;
220#endif
221 };
222
223 /** @} */
224}