1 | /* |
2 | * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved. |
3 | * |
4 | * NVIDIA CORPORATION and its licensors retain all intellectual property |
5 | * and proprietary rights in and to this software, related documentation |
6 | * and any modifications thereto. Any use, reproduction, disclosure or |
7 | * distribution of this software and related documentation without an express |
8 | * license agreement from NVIDIA CORPORATION is strictly prohibited. |
9 | */ |
10 | |
11 | #ifndef PX_SPU_TASK_H |
12 | #define PX_SPU_TASK_H |
13 | |
14 | #include "pxtask/PxTask.h" |
15 | #include "pxtask/PxSpuDispatcher.h" |
16 | |
17 | #include "physxprofilesdk/PxProfileZone.h" |
18 | |
19 | #ifndef PX_DOXYGEN |
20 | namespace physx |
21 | { |
22 | #endif |
23 | |
24 | /** |
25 | \brief A task to be executed on one or more SPUs |
26 | |
27 | Each PxSpuTask can run in a data parallel fashion on up to 6 SPUs. To coordinate the |
28 | workers, each SPU will be passed its own set of arguments. |
29 | |
30 | When all SPU workers have completed their work, the task is considered complete and the |
31 | SpuDispatcher will call release on the task, this in turn will call removeReference() |
32 | on the task's continuation. |
33 | |
34 | In this way LightCpuTasks may be launched automatically at PxSpuTask completion and vice versa. |
35 | |
36 | Users should not need to implement or create SpuTasks directly. The SDK creates the tasks |
37 | internally and will submit them to the TaskManager's SpuDispatcher for execution. The |
38 | SpuDispatcher that will be used is configured on a per-scene basis through the PxSceneDesc. |
39 | |
40 | @see SpuDispatcher |
41 | @see PxSceneDesc |
42 | */ |
43 | class PxSpuTask : public PxLightCpuTask |
44 | { |
45 | public: |
46 | |
47 | static const PxU32 kMaxSpus = 6; //!< The maximum number of SPUs |
48 | static const PxU32 kArgsPerSpu = 2; //!< Arguments per SPU |
49 | |
50 | /** |
51 | \brief Construct a new PxSpuTask object |
52 | \param[in] elfStart The starting address of the embedded SPU binary |
53 | \param[in] elfSize The size in bytes of the embedded SPU binary |
54 | \param[in] numSpus The number of SPU workers this task will run across |
55 | \param[in] args A pointer to an array of arguments, must be at least kArgsPerSpu*numSpus big |
56 | */ |
57 | PxSpuTask(const void* elfStart, PxU32 elfSize, PxU32 numSpus=1, const PxU32* args=NULL) |
58 | : mElfStart(elfStart) |
59 | , mElfSize(elfSize) |
60 | , mNbSpusToRun(numSpus) |
61 | , mNbSpusFinished(0) |
62 | , mEmitProfile(false) |
63 | { |
64 | if (args) |
65 | { |
66 | memcpy(mArgs, args, mNbSpusToRun*kArgsPerSpu*sizeof(PxU32)); |
67 | } |
68 | } |
69 | |
70 | virtual ~PxSpuTask() {} |
71 | |
72 | /** |
73 | \brief Return the number of SPUs used to run this task |
74 | */ |
75 | PX_INLINE PxU32 getSpuCount() const |
76 | { |
77 | return mNbSpusToRun; |
78 | } |
79 | |
80 | /** |
81 | \brief Set the number of SPUs to be used when running this task |
82 | */ |
83 | PX_INLINE void setSpuCount(PxU32 numSpusToRun) |
84 | { |
85 | PX_ASSERT(numSpusToRun); |
86 | mNbSpusToRun = numSpusToRun; |
87 | } |
88 | |
89 | /** |
90 | \brief Retrieve the per-SPU argument |
91 | \param[in] spuIndex The SPU that we want to retrieve the argument for |
92 | \return A pointer to the parameters for the given SPU index |
93 | */ |
94 | PX_INLINE const PxU32* getArgs(PxU32 spuIndex) const |
95 | { |
96 | PX_ASSERT(spuIndex < kMaxSpus); |
97 | return mArgs[spuIndex]; |
98 | } |
99 | |
100 | |
101 | /** |
102 | \brief Set the arguments for a given SPU worker |
103 | \param[in] spuIndex The index of the SPU worker whose arguments are to be set |
104 | \param[in] arg0 The first argument to be passed to this worker |
105 | \param[in] arg1 The second argument to be passed to this worker |
106 | */ |
107 | PX_INLINE void setArgs(PxU32 spuIndex, PxU32 arg0, PxU32 arg1) |
108 | { |
109 | PX_ASSERT(spuIndex < kMaxSpus); |
110 | PxU32* arguments = mArgs[spuIndex]; |
111 | arguments[0]=arg0; |
112 | arguments[1]=arg1; |
113 | } |
114 | |
115 | /** |
116 | \brief Return the address to the start of the embedded elf binary for this task |
117 | */ |
118 | PX_INLINE const void* getElfStart() const |
119 | { |
120 | return mElfStart; |
121 | } |
122 | |
123 | /** |
124 | \brief Return the size of the embedded elf binary for this task |
125 | */ |
126 | PX_INLINE PxU32 getElfSize() const |
127 | { |
128 | return mElfSize; |
129 | } |
130 | |
131 | /** |
132 | \brief Called by the SpuDispatcher when a SPU worker has completed, when all |
133 | workers have completed the task is considered finished and the continuation will |
134 | have it's ref count decremented. |
135 | */ |
136 | PX_INLINE void notifySpuFinish() |
137 | { |
138 | ++mNbSpusFinished; |
139 | |
140 | // if all SPU tasks have finished clean-up and release |
141 | if (mNbSpusFinished == mNbSpusToRun) |
142 | { |
143 | // emit profiling event |
144 | if (mEmitProfile) |
145 | { |
146 | getTaskManager()->emitStopEvent(*this, PxProfileEventSender::CrossThreadId); |
147 | mEmitProfile = false; |
148 | } |
149 | |
150 | mNbSpusFinished = 0; |
151 | release(); |
152 | } |
153 | } |
154 | |
155 | /** |
156 | \brief Modifies PxLightCpuTask's behavior by submitting to the SpuDispatcher |
157 | */ |
158 | virtual void removeReference() |
159 | { |
160 | PX_ASSERT(mTm); |
161 | mTm->decrReference(*this); |
162 | } |
163 | |
164 | /** |
165 | \brief Allow the task to perform PPU side intialization before the task is |
166 | scheduled to the SPUs. |
167 | |
168 | This should be called by the SpuDispatcher from whichever thread calls |
169 | submitTask(); the task should be scheduled to the SPUs immediately |
170 | following this function returning. |
171 | */ |
172 | virtual void run() {} |
173 | |
174 | /** |
175 | \brief The same as run() but will emit PVD profile events. |
176 | */ |
177 | void runProfiled() |
178 | { |
179 | // emit profiling event |
180 | getTaskManager()->emitStartEvent(*this, PxProfileEventSender::CrossThreadId); |
181 | mEmitProfile = true; |
182 | |
183 | run(); |
184 | } |
185 | |
186 | |
187 | protected: |
188 | |
189 | const void* mElfStart; //!< A pointer to the start of the ELF image |
190 | PxU32 mElfSize; //!< The size of the ELF image |
191 | PxU32 mNbSpusToRun; //!< The number of SPUs to run |
192 | PxU32 mNbSpusFinished; //!< The number of SPUs finished |
193 | PxU32 mArgs[kMaxSpus][kArgsPerSpu]; //!< The arguments for the SPUs |
194 | bool mEmitProfile; //!< Stores the profile event state if runProfiled() is used |
195 | |
196 | } |
197 | // wrap this in a macro so Doxygen doesn't get confused and output it |
198 | #ifndef PX_DOXYGEN |
199 | PX_ALIGN_SUFFIX(16) |
200 | #endif |
201 | ; |
202 | |
203 | #ifndef PX_DOXYGEN |
204 | } // end physx namespace |
205 | #endif |
206 | |
207 | #endif |
208 | |