PxCudaContextManager.h source code [bsFramework/Dependencies/PhysX/include/pxtask/PxCudaContextManager.h]

1	/*
2	* Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved.
3	*
4	* NVIDIA CORPORATION and its licensors retain all intellectual property
5	* and proprietary rights in and to this software, related documentation
6	* and any modifications thereto. Any use, reproduction, disclosure or
7	* distribution of this software and related documentation without an express
8	* license agreement from NVIDIA CORPORATION is strictly prohibited.
9	*/
10
11
12	#ifndef PX_CUDA_CONTEXT_MANAGER_H
13	#define PX_CUDA_CONTEXT_MANAGER_H
14
15	#include "pxtask/PxCudaMemoryManager.h"
16
17	/ Forward decl to avoid inclusion of cuda.h /
18	typedef struct CUctx_st *CUcontext;
19	typedef struct CUgraphicsResource_st *CUgraphicsResource;
20
21	#ifndef PX_DOXYGEN
22	namespace physx
23	{
24	#endif
25
26	class PxProfileZoneManager;
27
28	/* \brief Possible graphic/CUDA interoperability modes for context /
29	struct PxCudaInteropMode
30	{
31	/**
32	* \brief Possible graphic/CUDA interoperability modes for context
33	*/
34	enum Enum
35	{
36	NO_INTEROP = `0`,
37	D3D10_INTEROP,
38	D3D11_INTEROP,
39	OGL_INTEROP,
40
41	COUNT
42	};
43	};
44
45
46	//! \brief Descriptor used to create a PxCudaContextManager
47	class PxCudaContextManagerDesc
48	{
49	public:
50	/**
51	* \brief The CUDA context to manage
52	*
53	* If left NULL, the PxCudaContextManager will create a new context. If
54	* graphicsDevice is also not NULL, this new CUDA context will be bound to
55	* that graphics device, enabling the use of CUDA/Graphics interop features.
56	*
57	* If ctx is not NULL, the specified context must be applied to the thread
58	* that is allocating the PxCudaContextManager at creation time (aka, it
59	* cannot be popped). The PxCudaContextManager will take ownership of the
60	* context until the manager is released. All access to the context must be
61	* gated by lock acquisition.
62	*
63	* If the user provides a context for the PxCudaContextManager, the context
64	* _must_ have either been created on the GPU ordinal returned by
65	* PxGetSuggestedCudaDeviceOrdinal() or on your graphics device.
66	*
67	* It is perfectly acceptable to allocate device or host pinned memory from
68	* the context outside the scope of the PxCudaMemoryManager, so long as you
69	* manage its eventual cleanup.
70	*/
71	CUcontext *ctx;
72
73	/**
74	* \brief D3D device pointer or OpenGl context handle
75	*
76	* Only applicable when ctx is NULL, thus forcing a new context to be
77	* created. In that case, the created context will be bound to this
78	* graphics device.
79	*/
80	void *graphicsDevice;
81
82	#if PX_SUPPORT_GPU_PHYSX
83	/**
84	* \brief Application-specific GUID
85	*
86	* If your application employs PhysX modules that use CUDA you need to use a GUID
87	* so that patches for new architectures can be released for your game.You can obtain a GUID for your
88	* application from Nvidia.
89	*/
90	const char* appGUID;
91	#endif
92	/**
93	* \brief The CUDA/Graphics interop mode of this context
94	*
95	* If ctx is NULL, this value describes the nature of the graphicsDevice
96	* pointer provided by the user. Else it describes the nature of the
97	* context provided by the user.
98	*/
99	PxCudaInteropMode::Enum interopMode;
100
101
102	/**
103	* \brief Size of persistent memory
104	*
105	* This memory is allocated up front and stays allocated until the
106	* PxCudaContextManager is released. Size is in bytes, has to be power of two
107	* and bigger than the page size. Set to 0 to only use dynamic pages.
108	*
109	* Note: On Vista O/S and above, there is a per-memory allocation overhead
110	* to every CUDA work submission, so we recommend that you carefully tune
111	* this initial base memory size to closely approximate the amount of
112	* memory your application will consume.
113	*/
114	PxU32 memoryBaseSize[PxCudaBufferMemorySpace::COUNT];
115
116	/**
117	* \brief Size of memory pages
118	*
119	* The memory manager will dynamically grow and shrink in blocks multiple of
120	* this page size. Size has to be power of two and bigger than 0.
121	*/
122	PxU32 memoryPageSize[PxCudaBufferMemorySpace::COUNT];
123
124	/**
125	* \brief Maximum size of memory that the memory manager will allocate
126	*/
127	PxU32 maxMemorySize[PxCudaBufferMemorySpace::COUNT];
128
129	PX_INLINE PxCudaContextManagerDesc()
130	{
131	ctx = NULL;
132	interopMode = PxCudaInteropMode::NO_INTEROP;
133	graphicsDevice = `0`;
134	#if PX_SUPPORT_GPU_PHYSX
135	appGUID = NULL;
136	#endif
137	for(PxU32 i = `0`; i < PxCudaBufferMemorySpace::COUNT; i++)
138	{
139	memoryBaseSize[i] = `0`;
140	memoryPageSize[i] = `2` * `1024`*`1024`;
141	maxMemorySize[i] = PX_MAX_U32;
142	}
143	}
144	};
145
146
147	/**
148	* \brief Manages memory, thread locks, and task scheduling for a CUDA context
149	*
150	* A PxCudaContextManager manages access to a single CUDA context, allowing it to
151	* be shared between multiple scenes. Memory allocations are dynamic: starting
152	* with an initial heap size and growing on demand by a configurable page size.
153	* The context must be acquired from the manager before using any CUDA APIs.
154	*
155	* The PxCudaContextManager is based on the CUDA driver API and explictly does not
156	* support the the CUDA runtime API (aka, CUDART).
157	*
158	* To enable CUDA use by an APEX scene, a PxCudaContextManager must be created
159	* (supplying your own CUDA context, or allowing a new context to be allocated
160	* for you), the PxGpuDispatcher for that context is retrieved via the
161	* getGpuDispatcher() method, and this is assigned to the TaskManager that is
162	* given to the scene via its NxApexSceneDesc.
163	*/
164	class PxCudaContextManager
165	{
166	public:
167	/**
168	* \brief Acquire the CUDA context for the current thread
169	*
170	* Acquisitions are allowed to be recursive within a single thread.
171	* You can acquire the context multiple times so long as you release
172	* it the same count.
173	*
174	* The context must be acquired before using most CUDA functions.
175	*
176	* It is not necessary to acquire the CUDA context inside GpuTask
177	* launch functions, because the PxGpuDispatcher will have already
178	* acquired the context for its worker thread. However it is not
179	* harmfull to (re)acquire the context in code that is shared between
180	* GpuTasks and non-task functions.
181	*/
182	virtual void acquireContext() = `0`;
183
184	/**
185	* \brief Release the CUDA context from the current thread
186	*
187	* The CUDA context should be released as soon as practically
188	* possible, to allow other CPU threads (including the
189	* PxGpuDispatcher) to work efficiently.
190	*/
191	virtual void releaseContext() = `0`;
192
193	/**
194	* \brief Return the PxCudaMemoryManager instance associated with this
195	* CUDA context
196	*/
197	virtual PxCudaMemoryManager *getMemoryManager() = `0`;
198
199	/**
200	* \brief Return the PxGpuDispatcher instance associated with this
201	* CUDA context
202	*/
203	virtual class PxGpuDispatcher *getGpuDispatcher() = `0`;
204
205	/**
206	* \brief Context manager has a valid CUDA context
207	*
208	* This method should be called after creating a PxCudaContextManager,
209	* especially if the manager was responsible for allocating its own
210	* CUDA context (desc.ctx == NULL). If it returns false, there is
211	* no point in assigning this manager's PxGpuDispatcher to a
212	* TaskManager as it will be unable to execute GpuTasks.
213	*/
214	virtual bool contextIsValid() const = `0`;
215
216	/ Query CUDA context and device properties, without acquiring context /
217
218	virtual bool supportsArchSM10() const = `0`; //!< G80
219	virtual bool supportsArchSM11() const = `0`; //!< G92
220	virtual bool supportsArchSM12() const = `0`; //!< GT200
221	virtual bool supportsArchSM13() const = `0`; //!< GT260
222	virtual bool supportsArchSM20() const = `0`; //!< GF100
223	virtual bool supportsArchSM30() const = `0`; //!< GK100
224	virtual bool supportsArchSM35() const = `0`; //!< GK110
225	virtual bool supportsArchSM50() const = `0`; //!< GM100
226	virtual bool supportsArchSM52() const = `0`; //!< GM200
227	virtual bool isIntegrated() const = `0`; //!< true if GPU is an integrated (MCP) part
228	virtual bool canMapHostMemory() const = `0`; //!< true if GPU map host memory to GPU (0-copy)
229	virtual int getDriverVersion() const = `0`; //!< returns cached value of cuGetDriverVersion()
230	virtual size_t getDeviceTotalMemBytes() const = `0`; //!< returns cached value of device memory size
231	virtual int getMultiprocessorCount() const = `0`; //!< returns cache value of SM unit count
232	virtual unsigned int getClockRate() const = `0`; //!< returns cached value of SM clock frequency
233	virtual int getSharedMemPerBlock() const = `0`; //!< returns total amount of shared memory available per block in bytes
234	virtual unsigned int getMaxThreadsPerBlock() const = `0`; //!< returns the maximum number of threads per block
235	virtual const char getDeviceName() const* = `0`; //!< returns device name retrieved from driver
236	virtual PxCudaInteropMode::Enum getInteropMode() const = `0`; //!< interop mode the context was created with
237
238	virtual void setUsingConcurrentStreams(bool) = `0`; //!< turn on/off using concurrent streams for GPU work
239	virtual bool getUsingConcurrentStreams() const = `0`; //!< true if GPU work can run in concurrent streams
240	/ End query methods that don't require context to be acquired /
241
242	/**
243	* \brief Register a rendering resource with CUDA
244	*
245	* This function is called to register render resources (allocated
246	* from OpenGL) with CUDA so that the memory may be shared
247	* between the two systems. This is only required for render
248	* resources that are designed for interop use. In APEX, each
249	* render resource descriptor that could support interop has a
250	* 'registerInCUDA' boolean variable.
251	*
252	* The function must be called again any time your graphics device
253	* is reset, to re-register the resource.
254	*
255	* Returns true if the registration succeeded. A registered
256	* resource must be unregistered before it can be released.
257	*
258	* \param resource [OUT] the handle to the resource that can be used with CUDA
259	* \param buffer [IN] GLuint buffer index to be mapped to cuda
260	*/
261	virtual bool registerResourceInCudaGL(CUgraphicsResource &resource, PxU32 buffer) = `0`;
262
263	/**
264	* \brief Register a rendering resource with CUDA
265	*
266	* This function is called to register render resources (allocated
267	* from Direct3D) with CUDA so that the memory may be shared
268	* between the two systems. This is only required for render
269	* resources that are designed for interop use. In APEX, each
270	* render resource descriptor that could support interop has a
271	* 'registerInCUDA' boolean variable.
272	*
273	* The function must be called again any time your graphics device
274	* is reset, to re-register the resource.
275	*
276	* Returns true if the registration succeeded. A registered
277	* resource must be unregistered before it can be released.
278	*
279	* \param resource [OUT] the handle to the resource that can be used with CUDA
280	* \param resourcePointer [IN] A pointer to either IDirect3DResource9, or ID3D10Device, or ID3D11Resource to be registered.
281	*/
282	virtual bool registerResourceInCudaD3D(CUgraphicsResource &resource, void *resourcePointer) = `0`;
283
284	/**
285	* \brief Unregister a rendering resource with CUDA
286	*
287	* If a render resource was successfully registered with CUDA using
288	* the registerResourceInCuda***() methods, this function must be called
289	* to unregister the resource before the it can be released.
290	*/
291	virtual bool unregisterResourceInCuda(CUgraphicsResource resource) = `0`;
292
293	/**
294	* \brief Determine if the user has configured a dedicated PhysX GPU in the NV Control Panel
295	* \note If using CUDA Interop, this will always return false
296	* \returns 1 if there is a dedicated PhysX GPU
297	* \returns 0 if there is NOT a dedicated PhysX GPU
298	* \returns -1 if the routine is not implemented
299	*/
300	virtual int usingDedicatedPhysXGPU() const = `0`;
301
302	/**
303	* \brief Release the PxCudaContextManager
304	*
305	* When the manager instance is released, it also releases its
306	* PxGpuDispatcher instance and PxCudaMemoryManager. Before the memory
307	* manager is released, it frees all allocated memory pages. If the
308	* PxCudaContextManager created the CUDA context it was responsible
309	* for, it also frees that context.
310	*
311	* Do not release the PxCudaContextManager if there are any scenes
312	* using its PxGpuDispatcher. Those scenes must be released first
313	* since there is no safe way to remove a PxGpuDispatcher from a
314	* TaskManager once the TaskManager has been given to a scene.
315	*
316	*/
317	virtual void release() = `0`;
318
319	protected:
320
321	/**
322	* \brief protected destructor, use release() method
323	*/
324	virtual ~PxCudaContextManager() {}
325	};
326
327	/**
328	* \brief Convenience class for holding CUDA lock within a scope
329	*/
330	class PxScopedCudaLock
331	{
332	public:
333	/**
334	* \brief ScopedCudaLock constructor
335	*/
336	PxScopedCudaLock(PxCudaContextManager& ctx) : mCtx(&ctx)
337	{
338	mCtx->acquireContext();
339	}
340
341	/**
342	* \brief ScopedCudaLock destructor
343	*/
344	~PxScopedCudaLock()
345	{
346	mCtx->releaseContext();
347	}
348
349	protected:
350
351	/**
352	* \brief CUDA context manager pointer (initialized in the the constructor)
353	*/
354	PxCudaContextManager* mCtx;
355	};
356
357	#if PX_SUPPORT_GPU_PHYSX
358	/**
359	* \brief Ask the NVIDIA control panel which GPU has been selected for use by
360	* PhysX. Returns -1 if no PhysX capable GPU is found or GPU PhysX has
361	* been disabled.
362	*/
363	int PxGetSuggestedCudaDeviceOrdinal(PxErrorCallback& errc);
364
365	/**
366	* \brief Allocate a CUDA Context manager, complete with heaps and task dispatcher.
367	* You only need one CUDA context manager per GPU device you intend to use for
368	* CUDA tasks. If mgr is NULL, no profiling of CUDA code will be possible.
369	*/
370	PxCudaContextManager* PxCreateCudaContextManager(PxFoundation& foundation, const PxCudaContextManagerDesc& desc, physx::PxProfileZoneManager* mgr);
371
372	/**
373	* \brief get handle of physx GPU module
374	*/
375	void* PxLoadPhysxGPUModule(const char* appGUID = NULL);
376	#endif
377
378	#ifndef PX_DOXYGEN
379	} // end physx namespace
380	#endif
381
382	#endif
383

Browse the source code of bsFramework/Dependencies/PhysX/include/pxtask/PxCudaContextManager.h