1/*
2 * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved.
3 *
4 * NVIDIA CORPORATION and its licensors retain all intellectual property
5 * and proprietary rights in and to this software, related documentation
6 * and any modifications thereto. Any use, reproduction, disclosure or
7 * distribution of this software and related documentation without an express
8 * license agreement from NVIDIA CORPORATION is strictly prohibited.
9 */
10
11
12#ifndef PX_CUDA_CONTEXT_MANAGER_H
13#define PX_CUDA_CONTEXT_MANAGER_H
14
15#include "pxtask/PxCudaMemoryManager.h"
16
17/* Forward decl to avoid inclusion of cuda.h */
18typedef struct CUctx_st *CUcontext;
19typedef struct CUgraphicsResource_st *CUgraphicsResource;
20
21#ifndef PX_DOXYGEN
22namespace physx
23{
24#endif
25
26class PxProfileZoneManager;
27
28/** \brief Possible graphic/CUDA interoperability modes for context */
29struct PxCudaInteropMode
30{
31 /**
32 * \brief Possible graphic/CUDA interoperability modes for context
33 */
34 enum Enum
35 {
36 NO_INTEROP = 0,
37 D3D10_INTEROP,
38 D3D11_INTEROP,
39 OGL_INTEROP,
40
41 COUNT
42 };
43};
44
45
46//! \brief Descriptor used to create a PxCudaContextManager
47class PxCudaContextManagerDesc
48{
49public:
50 /**
51 * \brief The CUDA context to manage
52 *
53 * If left NULL, the PxCudaContextManager will create a new context. If
54 * graphicsDevice is also not NULL, this new CUDA context will be bound to
55 * that graphics device, enabling the use of CUDA/Graphics interop features.
56 *
57 * If ctx is not NULL, the specified context must be applied to the thread
58 * that is allocating the PxCudaContextManager at creation time (aka, it
59 * cannot be popped). The PxCudaContextManager will take ownership of the
60 * context until the manager is released. All access to the context must be
61 * gated by lock acquisition.
62 *
63 * If the user provides a context for the PxCudaContextManager, the context
64 * _must_ have either been created on the GPU ordinal returned by
65 * PxGetSuggestedCudaDeviceOrdinal() or on your graphics device.
66 *
67 * It is perfectly acceptable to allocate device or host pinned memory from
68 * the context outside the scope of the PxCudaMemoryManager, so long as you
69 * manage its eventual cleanup.
70 */
71 CUcontext *ctx;
72
73 /**
74 * \brief D3D device pointer or OpenGl context handle
75 *
76 * Only applicable when ctx is NULL, thus forcing a new context to be
77 * created. In that case, the created context will be bound to this
78 * graphics device.
79 */
80 void *graphicsDevice;
81
82#if PX_SUPPORT_GPU_PHYSX
83 /**
84 * \brief Application-specific GUID
85 *
86 * If your application employs PhysX modules that use CUDA you need to use a GUID
87 * so that patches for new architectures can be released for your game.You can obtain a GUID for your
88 * application from Nvidia.
89 */
90 const char* appGUID;
91#endif
92 /**
93 * \brief The CUDA/Graphics interop mode of this context
94 *
95 * If ctx is NULL, this value describes the nature of the graphicsDevice
96 * pointer provided by the user. Else it describes the nature of the
97 * context provided by the user.
98 */
99 PxCudaInteropMode::Enum interopMode;
100
101
102 /**
103 * \brief Size of persistent memory
104 *
105 * This memory is allocated up front and stays allocated until the
106 * PxCudaContextManager is released. Size is in bytes, has to be power of two
107 * and bigger than the page size. Set to 0 to only use dynamic pages.
108 *
109 * Note: On Vista O/S and above, there is a per-memory allocation overhead
110 * to every CUDA work submission, so we recommend that you carefully tune
111 * this initial base memory size to closely approximate the amount of
112 * memory your application will consume.
113 */
114 PxU32 memoryBaseSize[PxCudaBufferMemorySpace::COUNT];
115
116 /**
117 * \brief Size of memory pages
118 *
119 * The memory manager will dynamically grow and shrink in blocks multiple of
120 * this page size. Size has to be power of two and bigger than 0.
121 */
122 PxU32 memoryPageSize[PxCudaBufferMemorySpace::COUNT];
123
124 /**
125 * \brief Maximum size of memory that the memory manager will allocate
126 */
127 PxU32 maxMemorySize[PxCudaBufferMemorySpace::COUNT];
128
129 PX_INLINE PxCudaContextManagerDesc()
130 {
131 ctx = NULL;
132 interopMode = PxCudaInteropMode::NO_INTEROP;
133 graphicsDevice = 0;
134#if PX_SUPPORT_GPU_PHYSX
135 appGUID = NULL;
136#endif
137 for(PxU32 i = 0; i < PxCudaBufferMemorySpace::COUNT; i++)
138 {
139 memoryBaseSize[i] = 0;
140 memoryPageSize[i] = 2 * 1024*1024;
141 maxMemorySize[i] = PX_MAX_U32;
142 }
143 }
144};
145
146
147/**
148 * \brief Manages memory, thread locks, and task scheduling for a CUDA context
149 *
150 * A PxCudaContextManager manages access to a single CUDA context, allowing it to
151 * be shared between multiple scenes. Memory allocations are dynamic: starting
152 * with an initial heap size and growing on demand by a configurable page size.
153 * The context must be acquired from the manager before using any CUDA APIs.
154 *
155 * The PxCudaContextManager is based on the CUDA driver API and explictly does not
156 * support the the CUDA runtime API (aka, CUDART).
157 *
158 * To enable CUDA use by an APEX scene, a PxCudaContextManager must be created
159 * (supplying your own CUDA context, or allowing a new context to be allocated
160 * for you), the PxGpuDispatcher for that context is retrieved via the
161 * getGpuDispatcher() method, and this is assigned to the TaskManager that is
162 * given to the scene via its NxApexSceneDesc.
163 */
164class PxCudaContextManager
165{
166public:
167 /**
168 * \brief Acquire the CUDA context for the current thread
169 *
170 * Acquisitions are allowed to be recursive within a single thread.
171 * You can acquire the context multiple times so long as you release
172 * it the same count.
173 *
174 * The context must be acquired before using most CUDA functions.
175 *
176 * It is not necessary to acquire the CUDA context inside GpuTask
177 * launch functions, because the PxGpuDispatcher will have already
178 * acquired the context for its worker thread. However it is not
179 * harmfull to (re)acquire the context in code that is shared between
180 * GpuTasks and non-task functions.
181 */
182 virtual void acquireContext() = 0;
183
184 /**
185 * \brief Release the CUDA context from the current thread
186 *
187 * The CUDA context should be released as soon as practically
188 * possible, to allow other CPU threads (including the
189 * PxGpuDispatcher) to work efficiently.
190 */
191 virtual void releaseContext() = 0;
192
193 /**
194 * \brief Return the PxCudaMemoryManager instance associated with this
195 * CUDA context
196 */
197 virtual PxCudaMemoryManager *getMemoryManager() = 0;
198
199 /**
200 * \brief Return the PxGpuDispatcher instance associated with this
201 * CUDA context
202 */
203 virtual class PxGpuDispatcher *getGpuDispatcher() = 0;
204
205 /**
206 * \brief Context manager has a valid CUDA context
207 *
208 * This method should be called after creating a PxCudaContextManager,
209 * especially if the manager was responsible for allocating its own
210 * CUDA context (desc.ctx == NULL). If it returns false, there is
211 * no point in assigning this manager's PxGpuDispatcher to a
212 * TaskManager as it will be unable to execute GpuTasks.
213 */
214 virtual bool contextIsValid() const = 0;
215
216 /* Query CUDA context and device properties, without acquiring context */
217
218 virtual bool supportsArchSM10() const = 0; //!< G80
219 virtual bool supportsArchSM11() const = 0; //!< G92
220 virtual bool supportsArchSM12() const = 0; //!< GT200
221 virtual bool supportsArchSM13() const = 0; //!< GT260
222 virtual bool supportsArchSM20() const = 0; //!< GF100
223 virtual bool supportsArchSM30() const = 0; //!< GK100
224 virtual bool supportsArchSM35() const = 0; //!< GK110
225 virtual bool supportsArchSM50() const = 0; //!< GM100
226 virtual bool supportsArchSM52() const = 0; //!< GM200
227 virtual bool isIntegrated() const = 0; //!< true if GPU is an integrated (MCP) part
228 virtual bool canMapHostMemory() const = 0; //!< true if GPU map host memory to GPU (0-copy)
229 virtual int getDriverVersion() const = 0; //!< returns cached value of cuGetDriverVersion()
230 virtual size_t getDeviceTotalMemBytes() const = 0; //!< returns cached value of device memory size
231 virtual int getMultiprocessorCount() const = 0; //!< returns cache value of SM unit count
232 virtual unsigned int getClockRate() const = 0; //!< returns cached value of SM clock frequency
233 virtual int getSharedMemPerBlock() const = 0; //!< returns total amount of shared memory available per block in bytes
234 virtual unsigned int getMaxThreadsPerBlock() const = 0; //!< returns the maximum number of threads per block
235 virtual const char *getDeviceName() const = 0; //!< returns device name retrieved from driver
236 virtual PxCudaInteropMode::Enum getInteropMode() const = 0; //!< interop mode the context was created with
237
238 virtual void setUsingConcurrentStreams(bool) = 0; //!< turn on/off using concurrent streams for GPU work
239 virtual bool getUsingConcurrentStreams() const = 0; //!< true if GPU work can run in concurrent streams
240 /* End query methods that don't require context to be acquired */
241
242 /**
243 * \brief Register a rendering resource with CUDA
244 *
245 * This function is called to register render resources (allocated
246 * from OpenGL) with CUDA so that the memory may be shared
247 * between the two systems. This is only required for render
248 * resources that are designed for interop use. In APEX, each
249 * render resource descriptor that could support interop has a
250 * 'registerInCUDA' boolean variable.
251 *
252 * The function must be called again any time your graphics device
253 * is reset, to re-register the resource.
254 *
255 * Returns true if the registration succeeded. A registered
256 * resource must be unregistered before it can be released.
257 *
258 * \param resource [OUT] the handle to the resource that can be used with CUDA
259 * \param buffer [IN] GLuint buffer index to be mapped to cuda
260 */
261 virtual bool registerResourceInCudaGL(CUgraphicsResource &resource, PxU32 buffer) = 0;
262
263 /**
264 * \brief Register a rendering resource with CUDA
265 *
266 * This function is called to register render resources (allocated
267 * from Direct3D) with CUDA so that the memory may be shared
268 * between the two systems. This is only required for render
269 * resources that are designed for interop use. In APEX, each
270 * render resource descriptor that could support interop has a
271 * 'registerInCUDA' boolean variable.
272 *
273 * The function must be called again any time your graphics device
274 * is reset, to re-register the resource.
275 *
276 * Returns true if the registration succeeded. A registered
277 * resource must be unregistered before it can be released.
278 *
279 * \param resource [OUT] the handle to the resource that can be used with CUDA
280 * \param resourcePointer [IN] A pointer to either IDirect3DResource9, or ID3D10Device, or ID3D11Resource to be registered.
281 */
282 virtual bool registerResourceInCudaD3D(CUgraphicsResource &resource, void *resourcePointer) = 0;
283
284 /**
285 * \brief Unregister a rendering resource with CUDA
286 *
287 * If a render resource was successfully registered with CUDA using
288 * the registerResourceInCuda***() methods, this function must be called
289 * to unregister the resource before the it can be released.
290 */
291 virtual bool unregisterResourceInCuda(CUgraphicsResource resource) = 0;
292
293 /**
294 * \brief Determine if the user has configured a dedicated PhysX GPU in the NV Control Panel
295 * \note If using CUDA Interop, this will always return false
296 * \returns 1 if there is a dedicated PhysX GPU
297 * \returns 0 if there is NOT a dedicated PhysX GPU
298 * \returns -1 if the routine is not implemented
299 */
300 virtual int usingDedicatedPhysXGPU() const = 0;
301
302 /**
303 * \brief Release the PxCudaContextManager
304 *
305 * When the manager instance is released, it also releases its
306 * PxGpuDispatcher instance and PxCudaMemoryManager. Before the memory
307 * manager is released, it frees all allocated memory pages. If the
308 * PxCudaContextManager created the CUDA context it was responsible
309 * for, it also frees that context.
310 *
311 * Do not release the PxCudaContextManager if there are any scenes
312 * using its PxGpuDispatcher. Those scenes must be released first
313 * since there is no safe way to remove a PxGpuDispatcher from a
314 * TaskManager once the TaskManager has been given to a scene.
315 *
316 */
317 virtual void release() = 0;
318
319protected:
320
321 /**
322 * \brief protected destructor, use release() method
323 */
324 virtual ~PxCudaContextManager() {}
325};
326
327/**
328 * \brief Convenience class for holding CUDA lock within a scope
329 */
330class PxScopedCudaLock
331{
332public:
333 /**
334 * \brief ScopedCudaLock constructor
335 */
336 PxScopedCudaLock(PxCudaContextManager& ctx) : mCtx(&ctx)
337 {
338 mCtx->acquireContext();
339 }
340
341 /**
342 * \brief ScopedCudaLock destructor
343 */
344 ~PxScopedCudaLock()
345 {
346 mCtx->releaseContext();
347 }
348
349protected:
350
351 /**
352 * \brief CUDA context manager pointer (initialized in the the constructor)
353 */
354 PxCudaContextManager* mCtx;
355};
356
357#if PX_SUPPORT_GPU_PHYSX
358/**
359 * \brief Ask the NVIDIA control panel which GPU has been selected for use by
360 * PhysX. Returns -1 if no PhysX capable GPU is found or GPU PhysX has
361 * been disabled.
362 */
363int PxGetSuggestedCudaDeviceOrdinal(PxErrorCallback& errc);
364
365/**
366 * \brief Allocate a CUDA Context manager, complete with heaps and task dispatcher.
367 * You only need one CUDA context manager per GPU device you intend to use for
368 * CUDA tasks. If mgr is NULL, no profiling of CUDA code will be possible.
369 */
370PxCudaContextManager* PxCreateCudaContextManager(PxFoundation& foundation, const PxCudaContextManagerDesc& desc, physx::PxProfileZoneManager* mgr);
371
372/**
373 * \brief get handle of physx GPU module
374 */
375void* PxLoadPhysxGPUModule(const char* appGUID = NULL);
376#endif
377
378#ifndef PX_DOXYGEN
379} // end physx namespace
380#endif
381
382#endif
383