PxGpuDispatcher.h source code [bsFramework/Dependencies/PhysX/include/pxtask/PxGpuDispatcher.h]

1	/*
2	* Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved.
3	*
4	* NVIDIA CORPORATION and its licensors retain all intellectual property
5	* and proprietary rights in and to this software, related documentation
6	* and any modifications thereto. Any use, reproduction, disclosure or
7	* distribution of this software and related documentation without an express
8	* license agreement from NVIDIA CORPORATION is strictly prohibited.
9	*/
10
11	#ifndef PX_GPU_DISPATCHER_H
12	#define PX_GPU_DISPATCHER_H
13
14	#include "pxtask/PxTask.h"
15	#include "pxtask/PxGpuCopyDesc.h"
16
17	/ forward decl to avoid including <cuda.h> /
18	typedef struct CUstream_st* CUstream;
19
20	#ifndef PX_DOXYGEN
21	namespace physx
22	{
23	#endif
24
25	PX_PUSH_PACK_DEFAULT
26
27	class PxCudaContextManager;
28	class PxTaskManager;
29
30	/* \brief A GpuTask dispatcher*
31	*
32	* A PxGpuDispatcher executes GpuTasks submitted by one or more TaskManagers (one
33	* or more scenes). It maintains a CPU worker thread which waits on GpuTask
34	* "groups" to be submitted. The submission API is explicitly sessioned so that
35	* GpuTasks are dispatched together as a group whenever possible to improve
36	* parallelism on the GPU.
37	*
38	* A PxGpuDispatcher cannot be allocated ad-hoc, they are created as a result of
39	* creating a PxCudaContextManager. Every PxCudaContextManager has a PxGpuDispatcher
40	* instance that can be queried. In this way, each PxGpuDispatcher is tied to
41	* exactly one CUDA context.
42	*
43	* A scene will use CPU fallback Tasks for GpuTasks if the PxTaskManager provided
44	* to it does not have a PxGpuDispatcher. For this reason, the PxGpuDispatcher must
45	* be assigned to the PxTaskManager before the PxTaskManager is given to a scene.
46	*
47	* Multiple TaskManagers may safely share a single PxGpuDispatcher instance, thus
48	* enabling scenes to share a CUDA context.
49	*
50	* Only failureDetected() is intended for use by the user. The rest of the
51	* PxGpuDispatcher public methods are reserved for internal use by only both
52	* TaskManagers and GpuTasks.
53	*/
54	class PxGpuDispatcher
55	{
56	public:
57	/* \brief Record the start of a simulation step*
58	*
59	* A PxTaskManager calls this function to record the beginning of a simulation
60	* step. The PxGpuDispatcher uses this notification to initialize the
61	* profiler state.
62	*/
63	virtual void startSimulation() = `0`;
64
65	/* \brief Record the start of a GpuTask batch submission*
66	*
67	* A PxTaskManager calls this function to notify the PxGpuDispatcher that one or
68	* more GpuTasks are about to be submitted for execution. The PxGpuDispatcher
69	* will not read the incoming task queue until it receives one finishGroup()
70	* call for each startGroup() call. This is to ensure as many GpuTasks as
71	* possible are executed together as a group, generating optimal parallelism
72	* on the GPU.
73	*/
74	virtual void startGroup() = `0`;
75
76	/* \brief Submit a GpuTask for execution*
77	*
78	* Submitted tasks are pushed onto an incoming queue. The PxGpuDispatcher
79	* will take the contents of this queue every time the pending group count
80	* reaches 0 and run the group of submitted GpuTasks as an interleaved
81	* group.
82	*/
83	virtual void submitTask(PxTask& task) = `0`;
84
85	/* \brief Record the end of a GpuTask batch submission*
86	*
87	* A PxTaskManager calls this function to notify the PxGpuDispatcher that it is
88	* done submitting a group of GpuTasks (GpuTasks which were all make ready
89	* to run by the same prerequisite dependency becoming resolved). If no
90	* other group submissions are in progress, the PxGpuDispatcher will execute
91	* the set of ready tasks.
92	*/
93	virtual void finishGroup() = `0`;
94
95	/* \brief Add a CUDA completion prerequisite dependency to a task*
96	*
97	* A GpuTask calls this function to add a prerequisite dependency on another
98	* task (usually a CpuTask) preventing that task from starting until all of
99	* the CUDA kernels and copies already launched have been completed. The
100	* PxGpuDispatcher will increment that task's reference count, blocking its
101	* execution, until the CUDA work is complete.
102	*
103	* This is generally only required when a CPU task is expecting the results
104	* of the CUDA kernels to have been copied into host memory.
105	*
106	* This mechanism is not at all not required to ensure CUDA kernels and
107	* copies are issued in the correct order. Kernel issue order is determined
108	* by normal task dependencies. The rule of thumb is to only use a blocking
109	* completion prerequisite if the task in question depends on a completed
110	* GPU->Host DMA.
111	*
112	* The PxGpuDispatcher issues a blocking event record to CUDA for the purposes
113	* of tracking the already submitted CUDA work. When this event is
114	* resolved, the PxGpuDispatcher manually decrements the reference count of
115	* the specified task, allowing it to execute (assuming it does not have
116	* other pending prerequisites).
117	*/
118	virtual void addCompletionPrereq(PxBaseTask& task) = `0`;
119
120	/* \brief Retrieve the PxCudaContextManager associated with this*
121	* PxGpuDispatcher
122	*
123	* Every PxCudaContextManager has one PxGpuDispatcher, and every PxGpuDispatcher
124	* has one PxCudaContextManager.
125	*/
126	virtual PxCudaContextManager* getCudaContextManager() = `0`;
127
128	/* \brief Record the end of a simulation frame*
129	*
130	* A PxTaskManager calls this function to record the completion of its
131	* dependency graph. If profiling is enabled, the PxGpuDispatcher will
132	* trigger the retrieval of profiling data from the GPU at this point.
133	*/
134	virtual void stopSimulation() = `0`;
135
136	/* \brief Returns true if a CUDA call has returned a non-recoverable error*
137	*
138	* A return value of true indicates a fatal error has occurred. To protect
139	* itself, the PxGpuDispatcher enters a fall through mode that allows GpuTasks
140	* to complete without being executed. This allows simulations to continue
141	* but leaves GPU content static or corrupted.
142	*
143	* The user may try to recover from these failures by deleting GPU content
144	* so the visual artifacts are mimimized. But there is no way to recover
145	* the state of the GPU actors before the failure. Once a CUDA context is
146	* in this state, the only recourse is to create a new CUDA context, a new
147	* scene, and start over.
148	*
149	* This is our "Best Effort" attempt to not turn a soft failure into a hard
150	* failure because continued use of a CUDA context after it has returned an
151	* error will usually result in a driver reset. However if the initial
152	* failure was serious enough, a reset may have already occurred by the time
153	* we learn of it.
154	*/
155	virtual bool failureDetected() const = `0`;
156
157	/* \brief Force the PxGpuDispatcher into failure mode*
158	*
159	* This API should be used if user code detects a non-recoverable CUDA
160	* error. This ensures the PxGpuDispatcher does not launch any further
161	* CUDA work. Subsequent calls to failureDetected() will return true.
162	*/
163	virtual void forceFailureMode() = `0`;
164
165	/* \brief Returns a pointer to the current in-use profile buffer*
166	*
167	* The returned pointer should be passed to all kernel launches to enable
168	* CTA/Warp level profiling. If a data collector is not attached, or CTA
169	* profiling is not enabled, the pointer will be zero.
170	*/
171	virtual void* getCurrentProfileBuffer() const = `0`;
172
173	/* \brief Register kernel names with PlatformAnalyzer*
174	*
175	* The returned PxU16 must be stored and used as a base offset for the ID
176	* passed to the KERNEL_START\|STOP_EVENT macros.
177	*/
178	virtual PxU16 registerKernelNames(const char**, PxU16 count) = `0`;
179
180	/* \brief Launch a copy kernel with arbitrary number of copy commands*
181	*
182	* This method is intended to be called from Kernel GpuTasks, but it can
183	* function outside of that context as well.
184	*
185	* If count is 1, the descriptor is passed to the kernel as arguments, so it
186	* may be declared on the stack.
187	*
188	* If count is greater than 1, the kernel will read the descriptors out of
189	* host memory. Because of this, the descriptor array must be located in
190	* page locked (pinned) memory. The provided descriptors may be modified by
191	* this method (converting host pointers to their GPU mapped equivalents)
192	* and should be considered owned by CUDA until the current batch of work
193	* has completed, so descriptor arrays should not be freed or modified until
194	* you have received a completion notification.
195	*
196	* If your GPU does not support mapping of page locked memory (SM>=1.1),
197	* this function degrades to calling CUDA copy methods.
198	*/
199	virtual void launchCopyKernel(PxGpuCopyDesc* desc, PxU32 count, CUstream stream) = `0`;
200
201	/* \brief Query pre launch task that runs before launching gpu kernels.*
202	*
203	* This is part of an optional feature to schedule multiple gpu features
204	* at the same time to get kernels to run in parallel.
205	* \note Do not set the continuation on the returned task, but use addPreLaunchDependent().
206	*/
207	virtual PxBaseTask& getPreLaunchTask() = `0`;
208
209	/* \brief Adds a gpu launch task that gets executed after the pre launch task.*
210	*
211	* This is part of an optional feature to schedule multiple gpu features
212	* at the same time to get kernels to run in parallel.
213	* \note Each call adds a reference to the pre-launch task.
214	*/
215	virtual void addPreLaunchDependent(PxBaseTask& dependent) = `0`;
216
217	/* \brief Query post launch task that runs after the gpu is done.*
218	*
219	* This is part of an optional feature to schedule multiple gpu features
220	* at the same time to get kernels to run in parallel.
221	* \note Do not set the continuation on the returned task, but use addPostLaunchDependent().
222	*/
223	virtual PxBaseTask& getPostLaunchTask() = `0`;
224
225	/* \brief Adds a task that gets executed after the post launch task.*
226	*
227	* This is part of an optional feature to schedule multiple gpu features
228	* at the same time to get kernels to run in parallel.
229	* \note Each call adds a reference to the pre-launch task.
230	*/
231	virtual void addPostLaunchDependent(PxBaseTask& dependent) = `0`;
232
233	protected:
234	/* \brief protected destructor*
235	*
236	* GpuDispatchers are allocated and freed by their PxCudaContextManager.
237	*/
238	virtual ~PxGpuDispatcher() {}
239	};
240
241	PX_POP_PACK
242
243	#ifndef PX_DOXYGEN
244	} // end physx namespace
245	#endif
246
247	#endif
248

Browse the source code of bsFramework/Dependencies/PhysX/include/pxtask/PxGpuDispatcher.h