1 | //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This contains code to emit OpenMP nodes as LLVM code. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "CGCleanup.h" |
14 | #include "CGOpenMPRuntime.h" |
15 | #include "CodeGenFunction.h" |
16 | #include "CodeGenModule.h" |
17 | #include "TargetInfo.h" |
18 | #include "clang/AST/ASTContext.h" |
19 | #include "clang/AST/Attr.h" |
20 | #include "clang/AST/DeclOpenMP.h" |
21 | #include "clang/AST/OpenMPClause.h" |
22 | #include "clang/AST/Stmt.h" |
23 | #include "clang/AST/StmtOpenMP.h" |
24 | #include "clang/AST/StmtVisitor.h" |
25 | #include "clang/Basic/OpenMPKinds.h" |
26 | #include "clang/Basic/PrettyStackTrace.h" |
27 | #include "llvm/ADT/SmallSet.h" |
28 | #include "llvm/BinaryFormat/Dwarf.h" |
29 | #include "llvm/Frontend/OpenMP/OMPConstants.h" |
30 | #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" |
31 | #include "llvm/IR/Constants.h" |
32 | #include "llvm/IR/DebugInfoMetadata.h" |
33 | #include "llvm/IR/Instructions.h" |
34 | #include "llvm/IR/IntrinsicInst.h" |
35 | #include "llvm/IR/Metadata.h" |
36 | #include "llvm/Support/AtomicOrdering.h" |
37 | #include <optional> |
38 | using namespace clang; |
39 | using namespace CodeGen; |
40 | using namespace llvm::omp; |
41 | |
42 | static const VarDecl *getBaseDecl(const Expr *Ref); |
43 | |
44 | namespace { |
45 | /// Lexical scope for OpenMP executable constructs, that handles correct codegen |
46 | /// for captured expressions. |
47 | class OMPLexicalScope : public CodeGenFunction::LexicalScope { |
48 | void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
49 | for (const auto *C : S.clauses()) { |
50 | if (const auto *CPI = OMPClauseWithPreInit::get(C)) { |
51 | if (const auto *PreInit = |
52 | cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { |
53 | for (const auto *I : PreInit->decls()) { |
54 | if (!I->hasAttr<OMPCaptureNoInitAttr>()) { |
55 | CGF.EmitVarDecl(cast<VarDecl>(*I)); |
56 | } else { |
57 | CodeGenFunction::AutoVarEmission Emission = |
58 | CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); |
59 | CGF.EmitAutoVarCleanups(Emission); |
60 | } |
61 | } |
62 | } |
63 | } |
64 | } |
65 | } |
66 | CodeGenFunction::OMPPrivateScope InlinedShareds; |
67 | |
68 | static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { |
69 | return CGF.LambdaCaptureFields.lookup(VD) || |
70 | (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || |
71 | (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && |
72 | cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); |
73 | } |
74 | |
75 | public: |
76 | OMPLexicalScope( |
77 | CodeGenFunction &CGF, const OMPExecutableDirective &S, |
78 | const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt, |
79 | const bool EmitPreInitStmt = true) |
80 | : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), |
81 | InlinedShareds(CGF) { |
82 | if (EmitPreInitStmt) |
83 | emitPreInitStmt(CGF, S); |
84 | if (!CapturedRegion) |
85 | return; |
86 | assert(S.hasAssociatedStmt() && |
87 | "Expected associated statement for inlined directive." ); |
88 | const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); |
89 | for (const auto &C : CS->captures()) { |
90 | if (C.capturesVariable() || C.capturesVariableByCopy()) { |
91 | auto *VD = C.getCapturedVar(); |
92 | assert(VD == VD->getCanonicalDecl() && |
93 | "Canonical decl must be captured." ); |
94 | DeclRefExpr DRE( |
95 | CGF.getContext(), const_cast<VarDecl *>(VD), |
96 | isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && |
97 | InlinedShareds.isGlobalVarCaptured(VD)), |
98 | VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); |
99 | InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); |
100 | } |
101 | } |
102 | (void)InlinedShareds.Privatize(); |
103 | } |
104 | }; |
105 | |
106 | /// Lexical scope for OpenMP parallel construct, that handles correct codegen |
107 | /// for captured expressions. |
108 | class OMPParallelScope final : public OMPLexicalScope { |
109 | bool EmitPreInitStmt(const OMPExecutableDirective &S) { |
110 | OpenMPDirectiveKind Kind = S.getDirectiveKind(); |
111 | return !(isOpenMPTargetExecutionDirective(Kind) || |
112 | isOpenMPLoopBoundSharingDirective(Kind)) && |
113 | isOpenMPParallelDirective(Kind); |
114 | } |
115 | |
116 | public: |
117 | OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) |
118 | : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, |
119 | EmitPreInitStmt(S)) {} |
120 | }; |
121 | |
122 | /// Lexical scope for OpenMP teams construct, that handles correct codegen |
123 | /// for captured expressions. |
124 | class OMPTeamsScope final : public OMPLexicalScope { |
125 | bool EmitPreInitStmt(const OMPExecutableDirective &S) { |
126 | OpenMPDirectiveKind Kind = S.getDirectiveKind(); |
127 | return !isOpenMPTargetExecutionDirective(Kind) && |
128 | isOpenMPTeamsDirective(Kind); |
129 | } |
130 | |
131 | public: |
132 | OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) |
133 | : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, |
134 | EmitPreInitStmt(S)) {} |
135 | }; |
136 | |
137 | /// Private scope for OpenMP loop-based directives, that supports capturing |
138 | /// of used expression from loop statement. |
139 | class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { |
140 | void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) { |
141 | const DeclStmt *PreInits; |
142 | CodeGenFunction::OMPMapVars PreCondVars; |
143 | if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { |
144 | llvm::DenseSet<const VarDecl *> EmittedAsPrivate; |
145 | for (const auto *E : LD->counters()) { |
146 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
147 | EmittedAsPrivate.insert(VD->getCanonicalDecl()); |
148 | (void)PreCondVars.setVarAddr( |
149 | CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); |
150 | } |
151 | // Mark private vars as undefs. |
152 | for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) { |
153 | for (const Expr *IRef : C->varlists()) { |
154 | const auto *OrigVD = |
155 | cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); |
156 | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
157 | QualType OrigVDTy = OrigVD->getType().getNonReferenceType(); |
158 | (void)PreCondVars.setVarAddr( |
159 | CGF, OrigVD, |
160 | Address(llvm::UndefValue::get(CGF.ConvertTypeForMem( |
161 | CGF.getContext().getPointerType(OrigVDTy))), |
162 | CGF.ConvertTypeForMem(OrigVDTy), |
163 | CGF.getContext().getDeclAlign(OrigVD))); |
164 | } |
165 | } |
166 | } |
167 | (void)PreCondVars.apply(CGF); |
168 | // Emit init, __range and __end variables for C++ range loops. |
169 | (void)OMPLoopBasedDirective::doForAllLoops( |
170 | LD->getInnermostCapturedStmt()->getCapturedStmt(), |
171 | /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(), |
172 | [&CGF](unsigned Cnt, const Stmt *CurStmt) { |
173 | if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) { |
174 | if (const Stmt *Init = CXXFor->getInit()) |
175 | CGF.EmitStmt(Init); |
176 | CGF.EmitStmt(CXXFor->getRangeStmt()); |
177 | CGF.EmitStmt(CXXFor->getEndStmt()); |
178 | } |
179 | return false; |
180 | }); |
181 | PreInits = cast_or_null<DeclStmt>(LD->getPreInits()); |
182 | } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) { |
183 | PreInits = cast_or_null<DeclStmt>(Tile->getPreInits()); |
184 | } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) { |
185 | PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits()); |
186 | } else { |
187 | llvm_unreachable("Unknown loop-based directive kind." ); |
188 | } |
189 | if (PreInits) { |
190 | for (const auto *I : PreInits->decls()) |
191 | CGF.EmitVarDecl(cast<VarDecl>(*I)); |
192 | } |
193 | PreCondVars.restore(CGF); |
194 | } |
195 | |
196 | public: |
197 | OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) |
198 | : CodeGenFunction::RunCleanupsScope(CGF) { |
199 | emitPreInitStmt(CGF, S); |
200 | } |
201 | }; |
202 | |
203 | class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { |
204 | CodeGenFunction::OMPPrivateScope InlinedShareds; |
205 | |
206 | static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { |
207 | return CGF.LambdaCaptureFields.lookup(VD) || |
208 | (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || |
209 | (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && |
210 | cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); |
211 | } |
212 | |
213 | public: |
214 | OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) |
215 | : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), |
216 | InlinedShareds(CGF) { |
217 | for (const auto *C : S.clauses()) { |
218 | if (const auto *CPI = OMPClauseWithPreInit::get(C)) { |
219 | if (const auto *PreInit = |
220 | cast_or_null<DeclStmt>(CPI->getPreInitStmt())) { |
221 | for (const auto *I : PreInit->decls()) { |
222 | if (!I->hasAttr<OMPCaptureNoInitAttr>()) { |
223 | CGF.EmitVarDecl(cast<VarDecl>(*I)); |
224 | } else { |
225 | CodeGenFunction::AutoVarEmission Emission = |
226 | CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); |
227 | CGF.EmitAutoVarCleanups(Emission); |
228 | } |
229 | } |
230 | } |
231 | } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) { |
232 | for (const Expr *E : UDP->varlists()) { |
233 | const Decl *D = cast<DeclRefExpr>(E)->getDecl(); |
234 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) |
235 | CGF.EmitVarDecl(*OED); |
236 | } |
237 | } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) { |
238 | for (const Expr *E : UDP->varlists()) { |
239 | const Decl *D = getBaseDecl(E); |
240 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) |
241 | CGF.EmitVarDecl(*OED); |
242 | } |
243 | } |
244 | } |
245 | if (!isOpenMPSimdDirective(S.getDirectiveKind())) |
246 | CGF.EmitOMPPrivateClause(S, InlinedShareds); |
247 | if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { |
248 | if (const Expr *E = TG->getReductionRef()) |
249 | CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); |
250 | } |
251 | // Temp copy arrays for inscan reductions should not be emitted as they are |
252 | // not used in simd only mode. |
253 | llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps; |
254 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
255 | if (C->getModifier() != OMPC_REDUCTION_inscan) |
256 | continue; |
257 | for (const Expr *E : C->copy_array_temps()) |
258 | CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl()); |
259 | } |
260 | const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); |
261 | while (CS) { |
262 | for (auto &C : CS->captures()) { |
263 | if (C.capturesVariable() || C.capturesVariableByCopy()) { |
264 | auto *VD = C.getCapturedVar(); |
265 | if (CopyArrayTemps.contains(VD)) |
266 | continue; |
267 | assert(VD == VD->getCanonicalDecl() && |
268 | "Canonical decl must be captured." ); |
269 | DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), |
270 | isCapturedVar(CGF, VD) || |
271 | (CGF.CapturedStmtInfo && |
272 | InlinedShareds.isGlobalVarCaptured(VD)), |
273 | VD->getType().getNonReferenceType(), VK_LValue, |
274 | C.getLocation()); |
275 | InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); |
276 | } |
277 | } |
278 | CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt()); |
279 | } |
280 | (void)InlinedShareds.Privatize(); |
281 | } |
282 | }; |
283 | |
284 | } // namespace |
285 | |
286 | static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, |
287 | const OMPExecutableDirective &S, |
288 | const RegionCodeGenTy &CodeGen); |
289 | |
290 | LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { |
291 | if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { |
292 | if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { |
293 | OrigVD = OrigVD->getCanonicalDecl(); |
294 | bool IsCaptured = |
295 | LambdaCaptureFields.lookup(OrigVD) || |
296 | (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || |
297 | (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); |
298 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, |
299 | OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); |
300 | return EmitLValue(&DRE); |
301 | } |
302 | } |
303 | return EmitLValue(E); |
304 | } |
305 | |
306 | llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { |
307 | ASTContext &C = getContext(); |
308 | llvm::Value *Size = nullptr; |
309 | auto SizeInChars = C.getTypeSizeInChars(Ty); |
310 | if (SizeInChars.isZero()) { |
311 | // getTypeSizeInChars() returns 0 for a VLA. |
312 | while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) { |
313 | VlaSizePair VlaSize = getVLASize(VAT); |
314 | Ty = VlaSize.Type; |
315 | Size = |
316 | Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) : VlaSize.NumElts; |
317 | } |
318 | SizeInChars = C.getTypeSizeInChars(Ty); |
319 | if (SizeInChars.isZero()) |
320 | return llvm::ConstantInt::get(SizeTy, /*V=*/0); |
321 | return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); |
322 | } |
323 | return CGM.getSize(SizeInChars); |
324 | } |
325 | |
326 | void CodeGenFunction::GenerateOpenMPCapturedVars( |
327 | const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { |
328 | const RecordDecl *RD = S.getCapturedRecordDecl(); |
329 | auto CurField = RD->field_begin(); |
330 | auto CurCap = S.captures().begin(); |
331 | for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), |
332 | E = S.capture_init_end(); |
333 | I != E; ++I, ++CurField, ++CurCap) { |
334 | if (CurField->hasCapturedVLAType()) { |
335 | const VariableArrayType *VAT = CurField->getCapturedVLAType(); |
336 | llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; |
337 | CapturedVars.push_back(Val); |
338 | } else if (CurCap->capturesThis()) { |
339 | CapturedVars.push_back(CXXThisValue); |
340 | } else if (CurCap->capturesVariableByCopy()) { |
341 | llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation()); |
342 | |
343 | // If the field is not a pointer, we need to save the actual value |
344 | // and load it as a void pointer. |
345 | if (!CurField->getType()->isAnyPointerType()) { |
346 | ASTContext &Ctx = getContext(); |
347 | Address DstAddr = CreateMemTemp( |
348 | Ctx.getUIntPtrType(), |
349 | Twine(CurCap->getCapturedVar()->getName(), ".casted" )); |
350 | LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType()); |
351 | |
352 | llvm::Value *SrcAddrVal = EmitScalarConversion( |
353 | DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()), |
354 | Ctx.getPointerType(CurField->getType()), CurCap->getLocation()); |
355 | LValue SrcLV = |
356 | MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType()); |
357 | |
358 | // Store the value using the source type pointer. |
359 | EmitStoreThroughLValue(RValue::get(CV), SrcLV); |
360 | |
361 | // Load the value using the destination type pointer. |
362 | CV = EmitLoadOfScalar(DstLV, CurCap->getLocation()); |
363 | } |
364 | CapturedVars.push_back(CV); |
365 | } else { |
366 | assert(CurCap->capturesVariable() && "Expected capture by reference." ); |
367 | CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer()); |
368 | } |
369 | } |
370 | } |
371 | |
372 | static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, |
373 | QualType DstType, StringRef Name, |
374 | LValue AddrLV) { |
375 | ASTContext &Ctx = CGF.getContext(); |
376 | |
377 | llvm::Value *CastedPtr = CGF.EmitScalarConversion( |
378 | AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(), |
379 | Ctx.getPointerType(DstType), Loc); |
380 | Address TmpAddr = |
381 | CGF.MakeNaturalAlignAddrLValue(CastedPtr, DstType).getAddress(CGF); |
382 | return TmpAddr; |
383 | } |
384 | |
385 | static QualType getCanonicalParamType(ASTContext &C, QualType T) { |
386 | if (T->isLValueReferenceType()) |
387 | return C.getLValueReferenceType( |
388 | getCanonicalParamType(C, T.getNonReferenceType()), |
389 | /*SpelledAsLValue=*/false); |
390 | if (T->isPointerType()) |
391 | return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); |
392 | if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { |
393 | if (const auto *VLA = dyn_cast<VariableArrayType>(A)) |
394 | return getCanonicalParamType(C, VLA->getElementType()); |
395 | if (!A->isVariablyModifiedType()) |
396 | return C.getCanonicalType(T); |
397 | } |
398 | return C.getCanonicalParamType(T); |
399 | } |
400 | |
401 | namespace { |
402 | /// Contains required data for proper outlined function codegen. |
403 | struct FunctionOptions { |
404 | /// Captured statement for which the function is generated. |
405 | const CapturedStmt *S = nullptr; |
406 | /// true if cast to/from UIntPtr is required for variables captured by |
407 | /// value. |
408 | const bool UIntPtrCastRequired = true; |
409 | /// true if only casted arguments must be registered as local args or VLA |
410 | /// sizes. |
411 | const bool RegisterCastedArgsOnly = false; |
412 | /// Name of the generated function. |
413 | const StringRef FunctionName; |
414 | /// Location of the non-debug version of the outlined function. |
415 | SourceLocation Loc; |
416 | explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, |
417 | bool RegisterCastedArgsOnly, StringRef FunctionName, |
418 | SourceLocation Loc) |
419 | : S(S), UIntPtrCastRequired(UIntPtrCastRequired), |
420 | RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), |
421 | FunctionName(FunctionName), Loc(Loc) {} |
422 | }; |
423 | } // namespace |
424 | |
425 | static llvm::Function *emitOutlinedFunctionPrologue( |
426 | CodeGenFunction &CGF, FunctionArgList &Args, |
427 | llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> |
428 | &LocalAddrs, |
429 | llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> |
430 | &VLASizes, |
431 | llvm::Value *&CXXThisValue, const FunctionOptions &FO) { |
432 | const CapturedDecl *CD = FO.S->getCapturedDecl(); |
433 | const RecordDecl *RD = FO.S->getCapturedRecordDecl(); |
434 | assert(CD->hasBody() && "missing CapturedDecl body" ); |
435 | |
436 | CXXThisValue = nullptr; |
437 | // Build the argument list. |
438 | CodeGenModule &CGM = CGF.CGM; |
439 | ASTContext &Ctx = CGM.getContext(); |
440 | FunctionArgList TargetArgs; |
441 | Args.append(CD->param_begin(), |
442 | std::next(CD->param_begin(), CD->getContextParamPosition())); |
443 | TargetArgs.append( |
444 | CD->param_begin(), |
445 | std::next(CD->param_begin(), CD->getContextParamPosition())); |
446 | auto I = FO.S->captures().begin(); |
447 | FunctionDecl *DebugFunctionDecl = nullptr; |
448 | if (!FO.UIntPtrCastRequired) { |
449 | FunctionProtoType::ExtProtoInfo EPI; |
450 | QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, std::nullopt, EPI); |
451 | DebugFunctionDecl = FunctionDecl::Create( |
452 | Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), |
453 | SourceLocation(), DeclarationName(), FunctionTy, |
454 | Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static, |
455 | /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false, |
456 | /*hasWrittenPrototype=*/false); |
457 | } |
458 | for (const FieldDecl *FD : RD->fields()) { |
459 | QualType ArgType = FD->getType(); |
460 | IdentifierInfo *II = nullptr; |
461 | VarDecl *CapVar = nullptr; |
462 | |
463 | // If this is a capture by copy and the type is not a pointer, the outlined |
464 | // function argument type should be uintptr and the value properly casted to |
465 | // uintptr. This is necessary given that the runtime library is only able to |
466 | // deal with pointers. We can pass in the same way the VLA type sizes to the |
467 | // outlined function. |
468 | if (FO.UIntPtrCastRequired && |
469 | ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || |
470 | I->capturesVariableArrayType())) |
471 | ArgType = Ctx.getUIntPtrType(); |
472 | |
473 | if (I->capturesVariable() || I->capturesVariableByCopy()) { |
474 | CapVar = I->getCapturedVar(); |
475 | II = CapVar->getIdentifier(); |
476 | } else if (I->capturesThis()) { |
477 | II = &Ctx.Idents.get("this" ); |
478 | } else { |
479 | assert(I->capturesVariableArrayType()); |
480 | II = &Ctx.Idents.get("vla" ); |
481 | } |
482 | if (ArgType->isVariablyModifiedType()) |
483 | ArgType = getCanonicalParamType(Ctx, ArgType); |
484 | VarDecl *Arg; |
485 | if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) { |
486 | Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), |
487 | II, ArgType, |
488 | ImplicitParamDecl::ThreadPrivateVar); |
489 | } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) { |
490 | Arg = ParmVarDecl::Create( |
491 | Ctx, DebugFunctionDecl, |
492 | CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(), |
493 | CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, |
494 | /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); |
495 | } else { |
496 | Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), |
497 | II, ArgType, ImplicitParamDecl::Other); |
498 | } |
499 | Args.emplace_back(Arg); |
500 | // Do not cast arguments if we emit function with non-original types. |
501 | TargetArgs.emplace_back( |
502 | FO.UIntPtrCastRequired |
503 | ? Arg |
504 | : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); |
505 | ++I; |
506 | } |
507 | Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1), |
508 | CD->param_end()); |
509 | TargetArgs.append( |
510 | std::next(CD->param_begin(), CD->getContextParamPosition() + 1), |
511 | CD->param_end()); |
512 | |
513 | // Create the function declaration. |
514 | const CGFunctionInfo &FuncInfo = |
515 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); |
516 | llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); |
517 | |
518 | auto *F = |
519 | llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, |
520 | FO.FunctionName, &CGM.getModule()); |
521 | CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); |
522 | if (CD->isNothrow()) |
523 | F->setDoesNotThrow(); |
524 | F->setDoesNotRecurse(); |
525 | |
526 | // Always inline the outlined function if optimizations are enabled. |
527 | if (CGM.getCodeGenOpts().OptimizationLevel != 0) { |
528 | F->removeFnAttr(llvm::Attribute::NoInline); |
529 | F->addFnAttr(llvm::Attribute::AlwaysInline); |
530 | } |
531 | |
532 | // Generate the function. |
533 | CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, |
534 | FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(), |
535 | FO.UIntPtrCastRequired ? FO.Loc |
536 | : CD->getBody()->getBeginLoc()); |
537 | unsigned Cnt = CD->getContextParamPosition(); |
538 | I = FO.S->captures().begin(); |
539 | for (const FieldDecl *FD : RD->fields()) { |
540 | // Do not map arguments if we emit function with non-original types. |
541 | Address LocalAddr(Address::invalid()); |
542 | if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { |
543 | LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], |
544 | TargetArgs[Cnt]); |
545 | } else { |
546 | LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); |
547 | } |
548 | // If we are capturing a pointer by copy we don't need to do anything, just |
549 | // use the value that we get from the arguments. |
550 | if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { |
551 | const VarDecl *CurVD = I->getCapturedVar(); |
552 | if (!FO.RegisterCastedArgsOnly) |
553 | LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); |
554 | ++Cnt; |
555 | ++I; |
556 | continue; |
557 | } |
558 | |
559 | LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), |
560 | AlignmentSource::Decl); |
561 | if (FD->hasCapturedVLAType()) { |
562 | if (FO.UIntPtrCastRequired) { |
563 | ArgLVal = CGF.MakeAddrLValue( |
564 | castValueFromUintptr(CGF, I->getLocation(), FD->getType(), |
565 | Args[Cnt]->getName(), ArgLVal), |
566 | FD->getType(), AlignmentSource::Decl); |
567 | } |
568 | llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); |
569 | const VariableArrayType *VAT = FD->getCapturedVLAType(); |
570 | VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg); |
571 | } else if (I->capturesVariable()) { |
572 | const VarDecl *Var = I->getCapturedVar(); |
573 | QualType VarTy = Var->getType(); |
574 | Address ArgAddr = ArgLVal.getAddress(CGF); |
575 | if (ArgLVal.getType()->isLValueReferenceType()) { |
576 | ArgAddr = CGF.EmitLoadOfReference(ArgLVal); |
577 | } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { |
578 | assert(ArgLVal.getType()->isPointerType()); |
579 | ArgAddr = CGF.EmitLoadOfPointer( |
580 | ArgAddr, ArgLVal.getType()->castAs<PointerType>()); |
581 | } |
582 | if (!FO.RegisterCastedArgsOnly) { |
583 | LocalAddrs.insert( |
584 | {Args[Cnt], {Var, ArgAddr.withAlignment(Ctx.getDeclAlign(Var))}}); |
585 | } |
586 | } else if (I->capturesVariableByCopy()) { |
587 | assert(!FD->getType()->isAnyPointerType() && |
588 | "Not expecting a captured pointer." ); |
589 | const VarDecl *Var = I->getCapturedVar(); |
590 | LocalAddrs.insert({Args[Cnt], |
591 | {Var, FO.UIntPtrCastRequired |
592 | ? castValueFromUintptr( |
593 | CGF, I->getLocation(), FD->getType(), |
594 | Args[Cnt]->getName(), ArgLVal) |
595 | : ArgLVal.getAddress(CGF)}}); |
596 | } else { |
597 | // If 'this' is captured, load it into CXXThisValue. |
598 | assert(I->capturesThis()); |
599 | CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); |
600 | LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}}); |
601 | } |
602 | ++Cnt; |
603 | ++I; |
604 | } |
605 | |
606 | return F; |
607 | } |
608 | |
609 | llvm::Function * |
610 | CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, |
611 | SourceLocation Loc) { |
612 | assert( |
613 | CapturedStmtInfo && |
614 | "CapturedStmtInfo should be set when generating the captured function" ); |
615 | const CapturedDecl *CD = S.getCapturedDecl(); |
616 | // Build the argument list. |
617 | bool NeedWrapperFunction = |
618 | getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo(); |
619 | FunctionArgList Args; |
620 | llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; |
621 | llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; |
622 | SmallString<256> Buffer; |
623 | llvm::raw_svector_ostream Out(Buffer); |
624 | Out << CapturedStmtInfo->getHelperName(); |
625 | if (NeedWrapperFunction) |
626 | Out << "_debug__" ; |
627 | FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, |
628 | Out.str(), Loc); |
629 | llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, |
630 | VLASizes, CXXThisValue, FO); |
631 | CodeGenFunction::OMPPrivateScope LocalScope(*this); |
632 | for (const auto &LocalAddrPair : LocalAddrs) { |
633 | if (LocalAddrPair.second.first) { |
634 | LocalScope.addPrivate(LocalAddrPair.second.first, |
635 | LocalAddrPair.second.second); |
636 | } |
637 | } |
638 | (void)LocalScope.Privatize(); |
639 | for (const auto &VLASizePair : VLASizes) |
640 | VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; |
641 | PGO.assignRegionCounters(GlobalDecl(CD), F); |
642 | CapturedStmtInfo->EmitBody(*this, CD->getBody()); |
643 | (void)LocalScope.ForceCleanup(); |
644 | FinishFunction(CD->getBodyRBrace()); |
645 | if (!NeedWrapperFunction) |
646 | return F; |
647 | |
648 | FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, |
649 | /*RegisterCastedArgsOnly=*/true, |
650 | CapturedStmtInfo->getHelperName(), Loc); |
651 | CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); |
652 | WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; |
653 | Args.clear(); |
654 | LocalAddrs.clear(); |
655 | VLASizes.clear(); |
656 | llvm::Function *WrapperF = |
657 | emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, |
658 | WrapperCGF.CXXThisValue, WrapperFO); |
659 | llvm::SmallVector<llvm::Value *, 4> CallArgs; |
660 | auto *PI = F->arg_begin(); |
661 | for (const auto *Arg : Args) { |
662 | llvm::Value *CallArg; |
663 | auto I = LocalAddrs.find(Arg); |
664 | if (I != LocalAddrs.end()) { |
665 | LValue LV = WrapperCGF.MakeAddrLValue( |
666 | I->second.second, |
667 | I->second.first ? I->second.first->getType() : Arg->getType(), |
668 | AlignmentSource::Decl); |
669 | if (LV.getType()->isAnyComplexType()) |
670 | LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
671 | LV.getAddress(WrapperCGF), |
672 | PI->getType()->getPointerTo( |
673 | LV.getAddress(WrapperCGF).getAddressSpace()), |
674 | PI->getType())); |
675 | CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); |
676 | } else { |
677 | auto EI = VLASizes.find(Arg); |
678 | if (EI != VLASizes.end()) { |
679 | CallArg = EI->second.second; |
680 | } else { |
681 | LValue LV = |
682 | WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), |
683 | Arg->getType(), AlignmentSource::Decl); |
684 | CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); |
685 | } |
686 | } |
687 | CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); |
688 | ++PI; |
689 | } |
690 | CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs); |
691 | WrapperCGF.FinishFunction(); |
692 | return WrapperF; |
693 | } |
694 | |
695 | //===----------------------------------------------------------------------===// |
696 | // OpenMP Directive Emission |
697 | //===----------------------------------------------------------------------===// |
698 | void CodeGenFunction::EmitOMPAggregateAssign( |
699 | Address DestAddr, Address SrcAddr, QualType OriginalType, |
700 | const llvm::function_ref<void(Address, Address)> CopyGen) { |
701 | // Perform element-by-element initialization. |
702 | QualType ElementTy; |
703 | |
704 | // Drill down to the base element type on both arrays. |
705 | const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); |
706 | llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr); |
707 | SrcAddr = SrcAddr.withElementType(DestAddr.getElementType()); |
708 | |
709 | llvm::Value *SrcBegin = SrcAddr.getPointer(); |
710 | llvm::Value *DestBegin = DestAddr.getPointer(); |
711 | // Cast from pointer to array type to pointer to single element. |
712 | llvm::Value *DestEnd = Builder.CreateInBoundsGEP(DestAddr.getElementType(), |
713 | DestBegin, NumElements); |
714 | |
715 | // The basic structure here is a while-do loop. |
716 | llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body" ); |
717 | llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done" ); |
718 | llvm::Value *IsEmpty = |
719 | Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty" ); |
720 | Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); |
721 | |
722 | // Enter the loop body, making that address the current address. |
723 | llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); |
724 | EmitBlock(BodyBB); |
725 | |
726 | CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy); |
727 | |
728 | llvm::PHINode *SrcElementPHI = |
729 | Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast" ); |
730 | SrcElementPHI->addIncoming(SrcBegin, EntryBB); |
731 | Address SrcElementCurrent = |
732 | Address(SrcElementPHI, SrcAddr.getElementType(), |
733 | SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); |
734 | |
735 | llvm::PHINode *DestElementPHI = Builder.CreatePHI( |
736 | DestBegin->getType(), 2, "omp.arraycpy.destElementPast" ); |
737 | DestElementPHI->addIncoming(DestBegin, EntryBB); |
738 | Address DestElementCurrent = |
739 | Address(DestElementPHI, DestAddr.getElementType(), |
740 | DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); |
741 | |
742 | // Emit copy. |
743 | CopyGen(DestElementCurrent, SrcElementCurrent); |
744 | |
745 | // Shift the address forward by one element. |
746 | llvm::Value *DestElementNext = |
747 | Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI, |
748 | /*Idx0=*/1, "omp.arraycpy.dest.element" ); |
749 | llvm::Value *SrcElementNext = |
750 | Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI, |
751 | /*Idx0=*/1, "omp.arraycpy.src.element" ); |
752 | // Check whether we've reached the end. |
753 | llvm::Value *Done = |
754 | Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done" ); |
755 | Builder.CreateCondBr(Done, DoneBB, BodyBB); |
756 | DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); |
757 | SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock()); |
758 | |
759 | // Done. |
760 | EmitBlock(DoneBB, /*IsFinished=*/true); |
761 | } |
762 | |
763 | void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, |
764 | Address SrcAddr, const VarDecl *DestVD, |
765 | const VarDecl *SrcVD, const Expr *Copy) { |
766 | if (OriginalType->isArrayType()) { |
767 | const auto *BO = dyn_cast<BinaryOperator>(Copy); |
768 | if (BO && BO->getOpcode() == BO_Assign) { |
769 | // Perform simple memcpy for simple copying. |
770 | LValue Dest = MakeAddrLValue(DestAddr, OriginalType); |
771 | LValue Src = MakeAddrLValue(SrcAddr, OriginalType); |
772 | EmitAggregateAssign(Dest, Src, OriginalType); |
773 | } else { |
774 | // For arrays with complex element types perform element by element |
775 | // copying. |
776 | EmitOMPAggregateAssign( |
777 | DestAddr, SrcAddr, OriginalType, |
778 | [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { |
779 | // Working with the single array element, so have to remap |
780 | // destination and source variables to corresponding array |
781 | // elements. |
782 | CodeGenFunction::OMPPrivateScope Remap(*this); |
783 | Remap.addPrivate(DestVD, DestElement); |
784 | Remap.addPrivate(SrcVD, SrcElement); |
785 | (void)Remap.Privatize(); |
786 | EmitIgnoredExpr(Copy); |
787 | }); |
788 | } |
789 | } else { |
790 | // Remap pseudo source variable to private copy. |
791 | CodeGenFunction::OMPPrivateScope Remap(*this); |
792 | Remap.addPrivate(SrcVD, SrcAddr); |
793 | Remap.addPrivate(DestVD, DestAddr); |
794 | (void)Remap.Privatize(); |
795 | // Emit copying of the whole variable. |
796 | EmitIgnoredExpr(Copy); |
797 | } |
798 | } |
799 | |
800 | bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, |
801 | OMPPrivateScope &PrivateScope) { |
802 | if (!HaveInsertPoint()) |
803 | return false; |
804 | bool DeviceConstTarget = |
805 | getLangOpts().OpenMPIsTargetDevice && |
806 | isOpenMPTargetExecutionDirective(D.getDirectiveKind()); |
807 | bool FirstprivateIsLastprivate = false; |
808 | llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates; |
809 | for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { |
810 | for (const auto *D : C->varlists()) |
811 | Lastprivates.try_emplace( |
812 | cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(), |
813 | C->getKind()); |
814 | } |
815 | llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; |
816 | llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; |
817 | getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); |
818 | // Force emission of the firstprivate copy if the directive does not emit |
819 | // outlined function, like omp for, omp simd, omp distribute etc. |
820 | bool MustEmitFirstprivateCopy = |
821 | CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; |
822 | for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { |
823 | const auto *IRef = C->varlist_begin(); |
824 | const auto *InitsRef = C->inits().begin(); |
825 | for (const Expr *IInit : C->private_copies()) { |
826 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
827 | bool ThisFirstprivateIsLastprivate = |
828 | Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; |
829 | const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD); |
830 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); |
831 | if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && |
832 | !FD->getType()->isReferenceType() && |
833 | (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { |
834 | EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); |
835 | ++IRef; |
836 | ++InitsRef; |
837 | continue; |
838 | } |
839 | // Do not emit copy for firstprivate constant variables in target regions, |
840 | // captured by reference. |
841 | if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && |
842 | FD && FD->getType()->isReferenceType() && |
843 | (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { |
844 | EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); |
845 | ++IRef; |
846 | ++InitsRef; |
847 | continue; |
848 | } |
849 | FirstprivateIsLastprivate = |
850 | FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; |
851 | if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { |
852 | const auto *VDInit = |
853 | cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); |
854 | bool IsRegistered; |
855 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
856 | /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, |
857 | (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); |
858 | LValue OriginalLVal; |
859 | if (!FD) { |
860 | // Check if the firstprivate variable is just a constant value. |
861 | ConstantEmission CE = tryEmitAsConstant(&DRE); |
862 | if (CE && !CE.isReference()) { |
863 | // Constant value, no need to create a copy. |
864 | ++IRef; |
865 | ++InitsRef; |
866 | continue; |
867 | } |
868 | if (CE && CE.isReference()) { |
869 | OriginalLVal = CE.getReferenceLValue(*this, &DRE); |
870 | } else { |
871 | assert(!CE && "Expected non-constant firstprivate." ); |
872 | OriginalLVal = EmitLValue(&DRE); |
873 | } |
874 | } else { |
875 | OriginalLVal = EmitLValue(&DRE); |
876 | } |
877 | QualType Type = VD->getType(); |
878 | if (Type->isArrayType()) { |
879 | // Emit VarDecl with copy init for arrays. |
880 | // Get the address of the original variable captured in current |
881 | // captured region. |
882 | AutoVarEmission Emission = EmitAutoVarAlloca(*VD); |
883 | const Expr *Init = VD->getInit(); |
884 | if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) { |
885 | // Perform simple memcpy. |
886 | LValue Dest = MakeAddrLValue(Emission.getAllocatedAddress(), Type); |
887 | EmitAggregateAssign(Dest, OriginalLVal, Type); |
888 | } else { |
889 | EmitOMPAggregateAssign( |
890 | Emission.getAllocatedAddress(), OriginalLVal.getAddress(*this), |
891 | Type, |
892 | [this, VDInit, Init](Address DestElement, Address SrcElement) { |
893 | // Clean up any temporaries needed by the |
894 | // initialization. |
895 | RunCleanupsScope InitScope(*this); |
896 | // Emit initialization for single element. |
897 | setAddrOfLocalVar(VDInit, SrcElement); |
898 | EmitAnyExprToMem(Init, DestElement, |
899 | Init->getType().getQualifiers(), |
900 | /*IsInitializer*/ false); |
901 | LocalDeclMap.erase(VDInit); |
902 | }); |
903 | } |
904 | EmitAutoVarCleanups(Emission); |
905 | IsRegistered = |
906 | PrivateScope.addPrivate(OrigVD, Emission.getAllocatedAddress()); |
907 | } else { |
908 | Address OriginalAddr = OriginalLVal.getAddress(*this); |
909 | // Emit private VarDecl with copy init. |
910 | // Remap temp VDInit variable to the address of the original |
911 | // variable (for proper handling of captured global variables). |
912 | setAddrOfLocalVar(VDInit, OriginalAddr); |
913 | EmitDecl(*VD); |
914 | LocalDeclMap.erase(VDInit); |
915 | Address VDAddr = GetAddrOfLocalVar(VD); |
916 | if (ThisFirstprivateIsLastprivate && |
917 | Lastprivates[OrigVD->getCanonicalDecl()] == |
918 | OMPC_LASTPRIVATE_conditional) { |
919 | // Create/init special variable for lastprivate conditionals. |
920 | llvm::Value *V = |
921 | EmitLoadOfScalar(MakeAddrLValue(VDAddr, (*IRef)->getType(), |
922 | AlignmentSource::Decl), |
923 | (*IRef)->getExprLoc()); |
924 | VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit( |
925 | *this, OrigVD); |
926 | EmitStoreOfScalar(V, MakeAddrLValue(VDAddr, (*IRef)->getType(), |
927 | AlignmentSource::Decl)); |
928 | LocalDeclMap.erase(VD); |
929 | setAddrOfLocalVar(VD, VDAddr); |
930 | } |
931 | IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr); |
932 | } |
933 | assert(IsRegistered && |
934 | "firstprivate var already registered as private" ); |
935 | // Silence the warning about unused variable. |
936 | (void)IsRegistered; |
937 | } |
938 | ++IRef; |
939 | ++InitsRef; |
940 | } |
941 | } |
942 | return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); |
943 | } |
944 | |
945 | void CodeGenFunction::EmitOMPPrivateClause( |
946 | const OMPExecutableDirective &D, |
947 | CodeGenFunction::OMPPrivateScope &PrivateScope) { |
948 | if (!HaveInsertPoint()) |
949 | return; |
950 | llvm::DenseSet<const VarDecl *> EmittedAsPrivate; |
951 | for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { |
952 | auto IRef = C->varlist_begin(); |
953 | for (const Expr *IInit : C->private_copies()) { |
954 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
955 | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
956 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); |
957 | EmitDecl(*VD); |
958 | // Emit private VarDecl with copy init. |
959 | bool IsRegistered = |
960 | PrivateScope.addPrivate(OrigVD, GetAddrOfLocalVar(VD)); |
961 | assert(IsRegistered && "private var already registered as private" ); |
962 | // Silence the warning about unused variable. |
963 | (void)IsRegistered; |
964 | } |
965 | ++IRef; |
966 | } |
967 | } |
968 | } |
969 | |
970 | bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { |
971 | if (!HaveInsertPoint()) |
972 | return false; |
973 | // threadprivate_var1 = master_threadprivate_var1; |
974 | // operator=(threadprivate_var2, master_threadprivate_var2); |
975 | // ... |
976 | // __kmpc_barrier(&loc, global_tid); |
977 | llvm::DenseSet<const VarDecl *> CopiedVars; |
978 | llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; |
979 | for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { |
980 | auto IRef = C->varlist_begin(); |
981 | auto ISrcRef = C->source_exprs().begin(); |
982 | auto IDestRef = C->destination_exprs().begin(); |
983 | for (const Expr *AssignOp : C->assignment_ops()) { |
984 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
985 | QualType Type = VD->getType(); |
986 | if (CopiedVars.insert(VD->getCanonicalDecl()).second) { |
987 | // Get the address of the master variable. If we are emitting code with |
988 | // TLS support, the address is passed from the master as field in the |
989 | // captured declaration. |
990 | Address MasterAddr = Address::invalid(); |
991 | if (getLangOpts().OpenMPUseTLS && |
992 | getContext().getTargetInfo().isTLSSupported()) { |
993 | assert(CapturedStmtInfo->lookup(VD) && |
994 | "Copyin threadprivates should have been captured!" ); |
995 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, |
996 | (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); |
997 | MasterAddr = EmitLValue(&DRE).getAddress(*this); |
998 | LocalDeclMap.erase(VD); |
999 | } else { |
1000 | MasterAddr = |
1001 | Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) |
1002 | : CGM.GetAddrOfGlobal(VD), |
1003 | CGM.getTypes().ConvertTypeForMem(VD->getType()), |
1004 | getContext().getDeclAlign(VD)); |
1005 | } |
1006 | // Get the address of the threadprivate variable. |
1007 | Address PrivateAddr = EmitLValue(*IRef).getAddress(*this); |
1008 | if (CopiedVars.size() == 1) { |
1009 | // At first check if current thread is a master thread. If it is, no |
1010 | // need to copy data. |
1011 | CopyBegin = createBasicBlock("copyin.not.master" ); |
1012 | CopyEnd = createBasicBlock("copyin.not.master.end" ); |
1013 | // TODO: Avoid ptrtoint conversion. |
1014 | auto *MasterAddrInt = |
1015 | Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy); |
1016 | auto *PrivateAddrInt = |
1017 | Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy); |
1018 | Builder.CreateCondBr( |
1019 | Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin, |
1020 | CopyEnd); |
1021 | EmitBlock(CopyBegin); |
1022 | } |
1023 | const auto *SrcVD = |
1024 | cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); |
1025 | const auto *DestVD = |
1026 | cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); |
1027 | EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp); |
1028 | } |
1029 | ++IRef; |
1030 | ++ISrcRef; |
1031 | ++IDestRef; |
1032 | } |
1033 | } |
1034 | if (CopyEnd) { |
1035 | // Exit out of copying procedure for non-master thread. |
1036 | EmitBlock(CopyEnd, /*IsFinished=*/true); |
1037 | return true; |
1038 | } |
1039 | return false; |
1040 | } |
1041 | |
1042 | bool CodeGenFunction::EmitOMPLastprivateClauseInit( |
1043 | const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { |
1044 | if (!HaveInsertPoint()) |
1045 | return false; |
1046 | bool HasAtLeastOneLastprivate = false; |
1047 | llvm::DenseSet<const VarDecl *> SIMDLCVs; |
1048 | if (isOpenMPSimdDirective(D.getDirectiveKind())) { |
1049 | const auto *LoopDirective = cast<OMPLoopDirective>(&D); |
1050 | for (const Expr *C : LoopDirective->counters()) { |
1051 | SIMDLCVs.insert( |
1052 | cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); |
1053 | } |
1054 | } |
1055 | llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; |
1056 | for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { |
1057 | HasAtLeastOneLastprivate = true; |
1058 | if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && |
1059 | !getLangOpts().OpenMPSimd) |
1060 | break; |
1061 | const auto *IRef = C->varlist_begin(); |
1062 | const auto *IDestRef = C->destination_exprs().begin(); |
1063 | for (const Expr *IInit : C->private_copies()) { |
1064 | // Keep the address of the original variable for future update at the end |
1065 | // of the loop. |
1066 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
1067 | // Taskloops do not require additional initialization, it is done in |
1068 | // runtime support library. |
1069 | if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { |
1070 | const auto *DestVD = |
1071 | cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); |
1072 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
1073 | /*RefersToEnclosingVariableOrCapture=*/ |
1074 | CapturedStmtInfo->lookup(OrigVD) != nullptr, |
1075 | (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); |
1076 | PrivateScope.addPrivate(DestVD, EmitLValue(&DRE).getAddress(*this)); |
1077 | // Check if the variable is also a firstprivate: in this case IInit is |
1078 | // not generated. Initialization of this variable will happen in codegen |
1079 | // for 'firstprivate' clause. |
1080 | if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { |
1081 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); |
1082 | Address VDAddr = Address::invalid(); |
1083 | if (C->getKind() == OMPC_LASTPRIVATE_conditional) { |
1084 | VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit( |
1085 | *this, OrigVD); |
1086 | setAddrOfLocalVar(VD, VDAddr); |
1087 | } else { |
1088 | // Emit private VarDecl with copy init. |
1089 | EmitDecl(*VD); |
1090 | VDAddr = GetAddrOfLocalVar(VD); |
1091 | } |
1092 | bool IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr); |
1093 | assert(IsRegistered && |
1094 | "lastprivate var already registered as private" ); |
1095 | (void)IsRegistered; |
1096 | } |
1097 | } |
1098 | ++IRef; |
1099 | ++IDestRef; |
1100 | } |
1101 | } |
1102 | return HasAtLeastOneLastprivate; |
1103 | } |
1104 | |
1105 | void CodeGenFunction::EmitOMPLastprivateClauseFinal( |
1106 | const OMPExecutableDirective &D, bool NoFinals, |
1107 | llvm::Value *IsLastIterCond) { |
1108 | if (!HaveInsertPoint()) |
1109 | return; |
1110 | // Emit following code: |
1111 | // if (<IsLastIterCond>) { |
1112 | // orig_var1 = private_orig_var1; |
1113 | // ... |
1114 | // orig_varn = private_orig_varn; |
1115 | // } |
1116 | llvm::BasicBlock *ThenBB = nullptr; |
1117 | llvm::BasicBlock *DoneBB = nullptr; |
1118 | if (IsLastIterCond) { |
1119 | // Emit implicit barrier if at least one lastprivate conditional is found |
1120 | // and this is not a simd mode. |
1121 | if (!getLangOpts().OpenMPSimd && |
1122 | llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(), |
1123 | [](const OMPLastprivateClause *C) { |
1124 | return C->getKind() == OMPC_LASTPRIVATE_conditional; |
1125 | })) { |
1126 | CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(), |
1127 | OMPD_unknown, |
1128 | /*EmitChecks=*/false, |
1129 | /*ForceSimpleCall=*/true); |
1130 | } |
1131 | ThenBB = createBasicBlock(".omp.lastprivate.then" ); |
1132 | DoneBB = createBasicBlock(".omp.lastprivate.done" ); |
1133 | Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); |
1134 | EmitBlock(ThenBB); |
1135 | } |
1136 | llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; |
1137 | llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; |
1138 | if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) { |
1139 | auto IC = LoopDirective->counters().begin(); |
1140 | for (const Expr *F : LoopDirective->finals()) { |
1141 | const auto *D = |
1142 | cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl(); |
1143 | if (NoFinals) |
1144 | AlreadyEmittedVars.insert(D); |
1145 | else |
1146 | LoopCountersAndUpdates[D] = F; |
1147 | ++IC; |
1148 | } |
1149 | } |
1150 | for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { |
1151 | auto IRef = C->varlist_begin(); |
1152 | auto ISrcRef = C->source_exprs().begin(); |
1153 | auto IDestRef = C->destination_exprs().begin(); |
1154 | for (const Expr *AssignOp : C->assignment_ops()) { |
1155 | const auto *PrivateVD = |
1156 | cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
1157 | QualType Type = PrivateVD->getType(); |
1158 | const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); |
1159 | if (AlreadyEmittedVars.insert(CanonicalVD).second) { |
1160 | // If lastprivate variable is a loop control variable for loop-based |
1161 | // directive, update its value before copyin back to original |
1162 | // variable. |
1163 | if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) |
1164 | EmitIgnoredExpr(FinalExpr); |
1165 | const auto *SrcVD = |
1166 | cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); |
1167 | const auto *DestVD = |
1168 | cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); |
1169 | // Get the address of the private variable. |
1170 | Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); |
1171 | if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) |
1172 | PrivateAddr = Address( |
1173 | Builder.CreateLoad(PrivateAddr), |
1174 | CGM.getTypes().ConvertTypeForMem(RefTy->getPointeeType()), |
1175 | CGM.getNaturalTypeAlignment(RefTy->getPointeeType())); |
1176 | // Store the last value to the private copy in the last iteration. |
1177 | if (C->getKind() == OMPC_LASTPRIVATE_conditional) |
1178 | CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( |
1179 | *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD, |
1180 | (*IRef)->getExprLoc()); |
1181 | // Get the address of the original variable. |
1182 | Address OriginalAddr = GetAddrOfLocalVar(DestVD); |
1183 | EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); |
1184 | } |
1185 | ++IRef; |
1186 | ++ISrcRef; |
1187 | ++IDestRef; |
1188 | } |
1189 | if (const Expr *PostUpdate = C->getPostUpdateExpr()) |
1190 | EmitIgnoredExpr(PostUpdate); |
1191 | } |
1192 | if (IsLastIterCond) |
1193 | EmitBlock(DoneBB, /*IsFinished=*/true); |
1194 | } |
1195 | |
1196 | void CodeGenFunction::EmitOMPReductionClauseInit( |
1197 | const OMPExecutableDirective &D, |
1198 | CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) { |
1199 | if (!HaveInsertPoint()) |
1200 | return; |
1201 | SmallVector<const Expr *, 4> Shareds; |
1202 | SmallVector<const Expr *, 4> Privates; |
1203 | SmallVector<const Expr *, 4> ReductionOps; |
1204 | SmallVector<const Expr *, 4> LHSs; |
1205 | SmallVector<const Expr *, 4> RHSs; |
1206 | OMPTaskDataTy Data; |
1207 | SmallVector<const Expr *, 4> TaskLHSs; |
1208 | SmallVector<const Expr *, 4> TaskRHSs; |
1209 | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1210 | if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) |
1211 | continue; |
1212 | Shareds.append(C->varlist_begin(), C->varlist_end()); |
1213 | Privates.append(C->privates().begin(), C->privates().end()); |
1214 | ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); |
1215 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
1216 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
1217 | if (C->getModifier() == OMPC_REDUCTION_task) { |
1218 | Data.ReductionVars.append(C->privates().begin(), C->privates().end()); |
1219 | Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); |
1220 | Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); |
1221 | Data.ReductionOps.append(C->reduction_ops().begin(), |
1222 | C->reduction_ops().end()); |
1223 | TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
1224 | TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
1225 | } |
1226 | } |
1227 | ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); |
1228 | unsigned Count = 0; |
1229 | auto *ILHS = LHSs.begin(); |
1230 | auto *IRHS = RHSs.begin(); |
1231 | auto *IPriv = Privates.begin(); |
1232 | for (const Expr *IRef : Shareds) { |
1233 | const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); |
1234 | // Emit private VarDecl with reduction init. |
1235 | RedCG.emitSharedOrigLValue(*this, Count); |
1236 | RedCG.emitAggregateType(*this, Count); |
1237 | AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); |
1238 | RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), |
1239 | RedCG.getSharedLValue(Count).getAddress(*this), |
1240 | [&Emission](CodeGenFunction &CGF) { |
1241 | CGF.EmitAutoVarInit(Emission); |
1242 | return true; |
1243 | }); |
1244 | EmitAutoVarCleanups(Emission); |
1245 | Address BaseAddr = RedCG.adjustPrivateAddress( |
1246 | *this, Count, Emission.getAllocatedAddress()); |
1247 | bool IsRegistered = |
1248 | PrivateScope.addPrivate(RedCG.getBaseDecl(Count), BaseAddr); |
1249 | assert(IsRegistered && "private var already registered as private" ); |
1250 | // Silence the warning about unused variable. |
1251 | (void)IsRegistered; |
1252 | |
1253 | const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); |
1254 | const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); |
1255 | QualType Type = PrivateVD->getType(); |
1256 | bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); |
1257 | if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { |
1258 | // Store the address of the original variable associated with the LHS |
1259 | // implicit variable. |
1260 | PrivateScope.addPrivate(LHSVD, |
1261 | RedCG.getSharedLValue(Count).getAddress(*this)); |
1262 | PrivateScope.addPrivate(RHSVD, GetAddrOfLocalVar(PrivateVD)); |
1263 | } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || |
1264 | isa<ArraySubscriptExpr>(IRef)) { |
1265 | // Store the address of the original variable associated with the LHS |
1266 | // implicit variable. |
1267 | PrivateScope.addPrivate(LHSVD, |
1268 | RedCG.getSharedLValue(Count).getAddress(*this)); |
1269 | PrivateScope.addPrivate(RHSVD, |
1270 | GetAddrOfLocalVar(PrivateVD).withElementType( |
1271 | ConvertTypeForMem(RHSVD->getType()))); |
1272 | } else { |
1273 | QualType Type = PrivateVD->getType(); |
1274 | bool IsArray = getContext().getAsArrayType(Type) != nullptr; |
1275 | Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this); |
1276 | // Store the address of the original variable associated with the LHS |
1277 | // implicit variable. |
1278 | if (IsArray) { |
1279 | OriginalAddr = |
1280 | OriginalAddr.withElementType(ConvertTypeForMem(LHSVD->getType())); |
1281 | } |
1282 | PrivateScope.addPrivate(LHSVD, OriginalAddr); |
1283 | PrivateScope.addPrivate( |
1284 | RHSVD, IsArray ? GetAddrOfLocalVar(PrivateVD).withElementType( |
1285 | ConvertTypeForMem(RHSVD->getType())) |
1286 | : GetAddrOfLocalVar(PrivateVD)); |
1287 | } |
1288 | ++ILHS; |
1289 | ++IRHS; |
1290 | ++IPriv; |
1291 | ++Count; |
1292 | } |
1293 | if (!Data.ReductionVars.empty()) { |
1294 | Data.IsReductionWithTaskMod = true; |
1295 | Data.IsWorksharingReduction = |
1296 | isOpenMPWorksharingDirective(D.getDirectiveKind()); |
1297 | llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit( |
1298 | *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data); |
1299 | const Expr *TaskRedRef = nullptr; |
1300 | switch (D.getDirectiveKind()) { |
1301 | case OMPD_parallel: |
1302 | TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr(); |
1303 | break; |
1304 | case OMPD_for: |
1305 | TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr(); |
1306 | break; |
1307 | case OMPD_sections: |
1308 | TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr(); |
1309 | break; |
1310 | case OMPD_parallel_for: |
1311 | TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr(); |
1312 | break; |
1313 | case OMPD_parallel_master: |
1314 | TaskRedRef = |
1315 | cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr(); |
1316 | break; |
1317 | case OMPD_parallel_sections: |
1318 | TaskRedRef = |
1319 | cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr(); |
1320 | break; |
1321 | case OMPD_target_parallel: |
1322 | TaskRedRef = |
1323 | cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr(); |
1324 | break; |
1325 | case OMPD_target_parallel_for: |
1326 | TaskRedRef = |
1327 | cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr(); |
1328 | break; |
1329 | case OMPD_distribute_parallel_for: |
1330 | TaskRedRef = |
1331 | cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr(); |
1332 | break; |
1333 | case OMPD_teams_distribute_parallel_for: |
1334 | TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D) |
1335 | .getTaskReductionRefExpr(); |
1336 | break; |
1337 | case OMPD_target_teams_distribute_parallel_for: |
1338 | TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D) |
1339 | .getTaskReductionRefExpr(); |
1340 | break; |
1341 | case OMPD_simd: |
1342 | case OMPD_for_simd: |
1343 | case OMPD_section: |
1344 | case OMPD_single: |
1345 | case OMPD_master: |
1346 | case OMPD_critical: |
1347 | case OMPD_parallel_for_simd: |
1348 | case OMPD_task: |
1349 | case OMPD_taskyield: |
1350 | case OMPD_error: |
1351 | case OMPD_barrier: |
1352 | case OMPD_taskwait: |
1353 | case OMPD_taskgroup: |
1354 | case OMPD_flush: |
1355 | case OMPD_depobj: |
1356 | case OMPD_scan: |
1357 | case OMPD_ordered: |
1358 | case OMPD_atomic: |
1359 | case OMPD_teams: |
1360 | case OMPD_target: |
1361 | case OMPD_cancellation_point: |
1362 | case OMPD_cancel: |
1363 | case OMPD_target_data: |
1364 | case OMPD_target_enter_data: |
1365 | case OMPD_target_exit_data: |
1366 | case OMPD_taskloop: |
1367 | case OMPD_taskloop_simd: |
1368 | case OMPD_master_taskloop: |
1369 | case OMPD_master_taskloop_simd: |
1370 | case OMPD_parallel_master_taskloop: |
1371 | case OMPD_parallel_master_taskloop_simd: |
1372 | case OMPD_distribute: |
1373 | case OMPD_target_update: |
1374 | case OMPD_distribute_parallel_for_simd: |
1375 | case OMPD_distribute_simd: |
1376 | case OMPD_target_parallel_for_simd: |
1377 | case OMPD_target_simd: |
1378 | case OMPD_teams_distribute: |
1379 | case OMPD_teams_distribute_simd: |
1380 | case OMPD_teams_distribute_parallel_for_simd: |
1381 | case OMPD_target_teams: |
1382 | case OMPD_target_teams_distribute: |
1383 | case OMPD_target_teams_distribute_parallel_for_simd: |
1384 | case OMPD_target_teams_distribute_simd: |
1385 | case OMPD_declare_target: |
1386 | case OMPD_end_declare_target: |
1387 | case OMPD_threadprivate: |
1388 | case OMPD_allocate: |
1389 | case OMPD_declare_reduction: |
1390 | case OMPD_declare_mapper: |
1391 | case OMPD_declare_simd: |
1392 | case OMPD_requires: |
1393 | case OMPD_declare_variant: |
1394 | case OMPD_begin_declare_variant: |
1395 | case OMPD_end_declare_variant: |
1396 | case OMPD_unknown: |
1397 | default: |
1398 | llvm_unreachable("Enexpected directive with task reductions." ); |
1399 | } |
1400 | |
1401 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl()); |
1402 | EmitVarDecl(*VD); |
1403 | EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD), |
1404 | /*Volatile=*/false, TaskRedRef->getType()); |
1405 | } |
1406 | } |
1407 | |
1408 | void CodeGenFunction::EmitOMPReductionClauseFinal( |
1409 | const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { |
1410 | if (!HaveInsertPoint()) |
1411 | return; |
1412 | llvm::SmallVector<const Expr *, 8> Privates; |
1413 | llvm::SmallVector<const Expr *, 8> LHSExprs; |
1414 | llvm::SmallVector<const Expr *, 8> RHSExprs; |
1415 | llvm::SmallVector<const Expr *, 8> ReductionOps; |
1416 | bool HasAtLeastOneReduction = false; |
1417 | bool IsReductionWithTaskMod = false; |
1418 | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1419 | // Do not emit for inscan reductions. |
1420 | if (C->getModifier() == OMPC_REDUCTION_inscan) |
1421 | continue; |
1422 | HasAtLeastOneReduction = true; |
1423 | Privates.append(C->privates().begin(), C->privates().end()); |
1424 | LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
1425 | RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
1426 | ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); |
1427 | IsReductionWithTaskMod = |
1428 | IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; |
1429 | } |
1430 | if (HasAtLeastOneReduction) { |
1431 | if (IsReductionWithTaskMod) { |
1432 | CGM.getOpenMPRuntime().emitTaskReductionFini( |
1433 | *this, D.getBeginLoc(), |
1434 | isOpenMPWorksharingDirective(D.getDirectiveKind())); |
1435 | } |
1436 | bool WithNowait = D.getSingleClause<OMPNowaitClause>() || |
1437 | isOpenMPParallelDirective(D.getDirectiveKind()) || |
1438 | ReductionKind == OMPD_simd; |
1439 | bool SimpleReduction = ReductionKind == OMPD_simd; |
1440 | // Emit nowait reduction if nowait clause is present or directive is a |
1441 | // parallel directive (it always has implicit barrier). |
1442 | CGM.getOpenMPRuntime().emitReduction( |
1443 | *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, |
1444 | {WithNowait, SimpleReduction, ReductionKind}); |
1445 | } |
1446 | } |
1447 | |
1448 | static void emitPostUpdateForReductionClause( |
1449 | CodeGenFunction &CGF, const OMPExecutableDirective &D, |
1450 | const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { |
1451 | if (!CGF.HaveInsertPoint()) |
1452 | return; |
1453 | llvm::BasicBlock *DoneBB = nullptr; |
1454 | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1455 | if (const Expr *PostUpdate = C->getPostUpdateExpr()) { |
1456 | if (!DoneBB) { |
1457 | if (llvm::Value *Cond = CondGen(CGF)) { |
1458 | // If the first post-update expression is found, emit conditional |
1459 | // block if it was requested. |
1460 | llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu" ); |
1461 | DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done" ); |
1462 | CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB); |
1463 | CGF.EmitBlock(ThenBB); |
1464 | } |
1465 | } |
1466 | CGF.EmitIgnoredExpr(PostUpdate); |
1467 | } |
1468 | } |
1469 | if (DoneBB) |
1470 | CGF.EmitBlock(DoneBB, /*IsFinished=*/true); |
1471 | } |
1472 | |
1473 | namespace { |
1474 | /// Codegen lambda for appending distribute lower and upper bounds to outlined |
1475 | /// parallel function. This is necessary for combined constructs such as |
1476 | /// 'distribute parallel for' |
1477 | typedef llvm::function_ref<void(CodeGenFunction &, |
1478 | const OMPExecutableDirective &, |
1479 | llvm::SmallVectorImpl<llvm::Value *> &)> |
1480 | CodeGenBoundParametersTy; |
1481 | } // anonymous namespace |
1482 | |
1483 | static void |
1484 | checkForLastprivateConditionalUpdate(CodeGenFunction &CGF, |
1485 | const OMPExecutableDirective &S) { |
1486 | if (CGF.getLangOpts().OpenMP < 50) |
1487 | return; |
1488 | llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls; |
1489 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
1490 | for (const Expr *Ref : C->varlists()) { |
1491 | if (!Ref->getType()->isScalarType()) |
1492 | continue; |
1493 | const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
1494 | if (!DRE) |
1495 | continue; |
1496 | PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); |
1497 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); |
1498 | } |
1499 | } |
1500 | for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { |
1501 | for (const Expr *Ref : C->varlists()) { |
1502 | if (!Ref->getType()->isScalarType()) |
1503 | continue; |
1504 | const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
1505 | if (!DRE) |
1506 | continue; |
1507 | PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); |
1508 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); |
1509 | } |
1510 | } |
1511 | for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { |
1512 | for (const Expr *Ref : C->varlists()) { |
1513 | if (!Ref->getType()->isScalarType()) |
1514 | continue; |
1515 | const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
1516 | if (!DRE) |
1517 | continue; |
1518 | PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); |
1519 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); |
1520 | } |
1521 | } |
1522 | // Privates should ne analyzed since they are not captured at all. |
1523 | // Task reductions may be skipped - tasks are ignored. |
1524 | // Firstprivates do not return value but may be passed by reference - no need |
1525 | // to check for updated lastprivate conditional. |
1526 | for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { |
1527 | for (const Expr *Ref : C->varlists()) { |
1528 | if (!Ref->getType()->isScalarType()) |
1529 | continue; |
1530 | const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
1531 | if (!DRE) |
1532 | continue; |
1533 | PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); |
1534 | } |
1535 | } |
1536 | CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional( |
1537 | CGF, S, PrivateDecls); |
1538 | } |
1539 | |
1540 | static void emitCommonOMPParallelDirective( |
1541 | CodeGenFunction &CGF, const OMPExecutableDirective &S, |
1542 | OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, |
1543 | const CodeGenBoundParametersTy &CodeGenBoundParameters) { |
1544 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); |
1545 | llvm::Value *NumThreads = nullptr; |
1546 | llvm::Function *OutlinedFn = |
1547 | CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( |
1548 | CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind, |
1549 | CodeGen); |
1550 | if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { |
1551 | CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); |
1552 | NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), |
1553 | /*IgnoreResultAssign=*/true); |
1554 | CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( |
1555 | CGF, NumThreads, NumThreadsClause->getBeginLoc()); |
1556 | } |
1557 | if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { |
1558 | CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); |
1559 | CGF.CGM.getOpenMPRuntime().emitProcBindClause( |
1560 | CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc()); |
1561 | } |
1562 | const Expr *IfCond = nullptr; |
1563 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
1564 | if (C->getNameModifier() == OMPD_unknown || |
1565 | C->getNameModifier() == OMPD_parallel) { |
1566 | IfCond = C->getCondition(); |
1567 | break; |
1568 | } |
1569 | } |
1570 | |
1571 | OMPParallelScope Scope(CGF, S); |
1572 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
1573 | // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk |
1574 | // lower and upper bounds with the pragma 'for' chunking mechanism. |
1575 | // The following lambda takes care of appending the lower and upper bound |
1576 | // parameters when necessary |
1577 | CodeGenBoundParameters(CGF, S, CapturedVars); |
1578 | CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); |
1579 | CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, |
1580 | CapturedVars, IfCond, NumThreads); |
1581 | } |
1582 | |
1583 | static bool isAllocatableDecl(const VarDecl *VD) { |
1584 | const VarDecl *CVD = VD->getCanonicalDecl(); |
1585 | if (!CVD->hasAttr<OMPAllocateDeclAttr>()) |
1586 | return false; |
1587 | const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); |
1588 | // Use the default allocation. |
1589 | return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || |
1590 | AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && |
1591 | !AA->getAllocator()); |
1592 | } |
1593 | |
1594 | static void emitEmptyBoundParameters(CodeGenFunction &, |
1595 | const OMPExecutableDirective &, |
1596 | llvm::SmallVectorImpl<llvm::Value *> &) {} |
1597 | |
1598 | static void emitOMPCopyinClause(CodeGenFunction &CGF, |
1599 | const OMPExecutableDirective &S) { |
1600 | bool Copyins = CGF.EmitOMPCopyinClause(S); |
1601 | if (Copyins) { |
1602 | // Emit implicit barrier to synchronize threads and avoid data races on |
1603 | // propagation master's thread values of threadprivate variables to local |
1604 | // instances of that variables of all other implicit threads. |
1605 | CGF.CGM.getOpenMPRuntime().emitBarrierCall( |
1606 | CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, |
1607 | /*ForceSimpleCall=*/true); |
1608 | } |
1609 | } |
1610 | |
1611 | Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( |
1612 | CodeGenFunction &CGF, const VarDecl *VD) { |
1613 | CodeGenModule &CGM = CGF.CGM; |
1614 | auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1615 | |
1616 | if (!VD) |
1617 | return Address::invalid(); |
1618 | const VarDecl *CVD = VD->getCanonicalDecl(); |
1619 | if (!isAllocatableDecl(CVD)) |
1620 | return Address::invalid(); |
1621 | llvm::Value *Size; |
1622 | CharUnits Align = CGM.getContext().getDeclAlign(CVD); |
1623 | if (CVD->getType()->isVariablyModifiedType()) { |
1624 | Size = CGF.getTypeSize(CVD->getType()); |
1625 | // Align the size: ((size + align - 1) / align) * align |
1626 | Size = CGF.Builder.CreateNUWAdd( |
1627 | Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); |
1628 | Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); |
1629 | Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); |
1630 | } else { |
1631 | CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); |
1632 | Size = CGM.getSize(Sz.alignTo(Align)); |
1633 | } |
1634 | |
1635 | const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); |
1636 | assert(AA->getAllocator() && |
1637 | "Expected allocator expression for non-default allocator." ); |
1638 | llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); |
1639 | // According to the standard, the original allocator type is a enum (integer). |
1640 | // Convert to pointer type, if required. |
1641 | if (Allocator->getType()->isIntegerTy()) |
1642 | Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); |
1643 | else if (Allocator->getType()->isPointerTy()) |
1644 | Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, |
1645 | CGM.VoidPtrTy); |
1646 | |
1647 | llvm::Value *Addr = OMPBuilder.createOMPAlloc( |
1648 | CGF.Builder, Size, Allocator, |
1649 | getNameWithSeparators({CVD->getName(), ".void.addr" }, "." , "." )); |
1650 | llvm::CallInst *FreeCI = |
1651 | OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator); |
1652 | |
1653 | CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI); |
1654 | Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
1655 | Addr, |
1656 | CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), |
1657 | getNameWithSeparators({CVD->getName(), ".addr" }, "." , "." )); |
1658 | return Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align); |
1659 | } |
1660 | |
1661 | Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate( |
1662 | CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, |
1663 | SourceLocation Loc) { |
1664 | CodeGenModule &CGM = CGF.CGM; |
1665 | if (CGM.getLangOpts().OpenMPUseTLS && |
1666 | CGM.getContext().getTargetInfo().isTLSSupported()) |
1667 | return VDAddr; |
1668 | |
1669 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1670 | |
1671 | llvm::Type *VarTy = VDAddr.getElementType(); |
1672 | llvm::Value *Data = |
1673 | CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy); |
1674 | llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)); |
1675 | std::string Suffix = getNameWithSeparators({"cache" , "" }); |
1676 | llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix); |
1677 | |
1678 | llvm::CallInst *ThreadPrivateCacheCall = |
1679 | OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName); |
1680 | |
1681 | return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment()); |
1682 | } |
1683 | |
1684 | std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators( |
1685 | ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) { |
1686 | SmallString<128> Buffer; |
1687 | llvm::raw_svector_ostream OS(Buffer); |
1688 | StringRef Sep = FirstSeparator; |
1689 | for (StringRef Part : Parts) { |
1690 | OS << Sep << Part; |
1691 | Sep = Separator; |
1692 | } |
1693 | return OS.str().str(); |
1694 | } |
1695 | |
1696 | void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
1697 | CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, |
1698 | InsertPointTy CodeGenIP, Twine RegionName) { |
1699 | CGBuilderTy &Builder = CGF.Builder; |
1700 | Builder.restoreIP(CodeGenIP); |
1701 | llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false, |
1702 | "." + RegionName + ".after" ); |
1703 | |
1704 | { |
1705 | OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB); |
1706 | CGF.EmitStmt(RegionBodyStmt); |
1707 | } |
1708 | |
1709 | if (Builder.saveIP().isSet()) |
1710 | Builder.CreateBr(FiniBB); |
1711 | } |
1712 | |
1713 | void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( |
1714 | CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, |
1715 | InsertPointTy CodeGenIP, Twine RegionName) { |
1716 | CGBuilderTy &Builder = CGF.Builder; |
1717 | Builder.restoreIP(CodeGenIP); |
1718 | llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false, |
1719 | "." + RegionName + ".after" ); |
1720 | |
1721 | { |
1722 | OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB); |
1723 | CGF.EmitStmt(RegionBodyStmt); |
1724 | } |
1725 | |
1726 | if (Builder.saveIP().isSet()) |
1727 | Builder.CreateBr(FiniBB); |
1728 | } |
1729 | |
1730 | void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { |
1731 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
1732 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1733 | // Check if we have any if clause associated with the directive. |
1734 | llvm::Value *IfCond = nullptr; |
1735 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
1736 | IfCond = EmitScalarExpr(C->getCondition(), |
1737 | /*IgnoreResultAssign=*/true); |
1738 | |
1739 | llvm::Value *NumThreads = nullptr; |
1740 | if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) |
1741 | NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(), |
1742 | /*IgnoreResultAssign=*/true); |
1743 | |
1744 | ProcBindKind ProcBind = OMP_PROC_BIND_default; |
1745 | if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) |
1746 | ProcBind = ProcBindClause->getProcBindKind(); |
1747 | |
1748 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
1749 | |
1750 | // The cleanup callback that finalizes all variabels at the given location, |
1751 | // thus calls destructors etc. |
1752 | auto FiniCB = [this](InsertPointTy IP) { |
1753 | OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); |
1754 | }; |
1755 | |
1756 | // Privatization callback that performs appropriate action for |
1757 | // shared/private/firstprivate/lastprivate/copyin/... variables. |
1758 | // |
1759 | // TODO: This defaults to shared right now. |
1760 | auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, |
1761 | llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { |
1762 | // The next line is appropriate only for variables (Val) with the |
1763 | // data-sharing attribute "shared". |
1764 | ReplVal = &Val; |
1765 | |
1766 | return CodeGenIP; |
1767 | }; |
1768 | |
1769 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); |
1770 | const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); |
1771 | |
1772 | auto BodyGenCB = [&, this](InsertPointTy AllocaIP, |
1773 | InsertPointTy CodeGenIP) { |
1774 | OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( |
1775 | *this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel" ); |
1776 | }; |
1777 | |
1778 | CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); |
1779 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); |
1780 | llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( |
1781 | AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); |
1782 | Builder.restoreIP( |
1783 | OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, |
1784 | IfCond, NumThreads, ProcBind, S.hasCancel())); |
1785 | return; |
1786 | } |
1787 | |
1788 | // Emit parallel region as a standalone region. |
1789 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
1790 | Action.Enter(CGF); |
1791 | OMPPrivateScope PrivateScope(CGF); |
1792 | emitOMPCopyinClause(CGF, S); |
1793 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
1794 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
1795 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
1796 | (void)PrivateScope.Privatize(); |
1797 | CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); |
1798 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); |
1799 | }; |
1800 | { |
1801 | auto LPCRegion = |
1802 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
1803 | emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, |
1804 | emitEmptyBoundParameters); |
1805 | emitPostUpdateForReductionClause(*this, S, |
1806 | [](CodeGenFunction &) { return nullptr; }); |
1807 | } |
1808 | // Check for outer lastprivate conditional update. |
1809 | checkForLastprivateConditionalUpdate(*this, S); |
1810 | } |
1811 | |
1812 | void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) { |
1813 | EmitStmt(S.getIfStmt()); |
1814 | } |
1815 | |
1816 | namespace { |
1817 | /// RAII to handle scopes for loop transformation directives. |
1818 | class OMPTransformDirectiveScopeRAII { |
1819 | OMPLoopScope *Scope = nullptr; |
1820 | CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr; |
1821 | CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr; |
1822 | |
1823 | OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) = |
1824 | delete; |
1825 | OMPTransformDirectiveScopeRAII & |
1826 | operator=(const OMPTransformDirectiveScopeRAII &) = delete; |
1827 | |
1828 | public: |
1829 | OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) { |
1830 | if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) { |
1831 | Scope = new OMPLoopScope(CGF, *Dir); |
1832 | CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP); |
1833 | CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI); |
1834 | } |
1835 | } |
1836 | ~OMPTransformDirectiveScopeRAII() { |
1837 | if (!Scope) |
1838 | return; |
1839 | delete CapInfoRAII; |
1840 | delete CGSI; |
1841 | delete Scope; |
1842 | } |
1843 | }; |
1844 | } // namespace |
1845 | |
1846 | static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, |
1847 | int MaxLevel, int Level = 0) { |
1848 | assert(Level < MaxLevel && "Too deep lookup during loop body codegen." ); |
1849 | const Stmt *SimplifiedS = S->IgnoreContainers(); |
1850 | if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) { |
1851 | PrettyStackTraceLoc CrashInfo( |
1852 | CGF.getContext().getSourceManager(), CS->getLBracLoc(), |
1853 | "LLVM IR generation of compound statement ('{}')" ); |
1854 | |
1855 | // Keep track of the current cleanup stack depth, including debug scopes. |
1856 | CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange()); |
1857 | for (const Stmt *CurStmt : CS->body()) |
1858 | emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); |
1859 | return; |
1860 | } |
1861 | if (SimplifiedS == NextLoop) { |
1862 | if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS)) |
1863 | SimplifiedS = Dir->getTransformedStmt(); |
1864 | if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS)) |
1865 | SimplifiedS = CanonLoop->getLoopStmt(); |
1866 | if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { |
1867 | S = For->getBody(); |
1868 | } else { |
1869 | assert(isa<CXXForRangeStmt>(SimplifiedS) && |
1870 | "Expected canonical for loop or range-based for loop." ); |
1871 | const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS); |
1872 | CGF.EmitStmt(CXXFor->getLoopVarStmt()); |
1873 | S = CXXFor->getBody(); |
1874 | } |
1875 | if (Level + 1 < MaxLevel) { |
1876 | NextLoop = OMPLoopDirective::tryToFindNextInnerLoop( |
1877 | S, /*TryImperfectlyNestedLoops=*/true); |
1878 | emitBody(CGF, S, NextLoop, MaxLevel, Level + 1); |
1879 | return; |
1880 | } |
1881 | } |
1882 | CGF.EmitStmt(S); |
1883 | } |
1884 | |
1885 | void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, |
1886 | JumpDest LoopExit) { |
1887 | RunCleanupsScope BodyScope(*this); |
1888 | // Update counters values on current iteration. |
1889 | for (const Expr *UE : D.updates()) |
1890 | EmitIgnoredExpr(UE); |
1891 | // Update the linear variables. |
1892 | // In distribute directives only loop counters may be marked as linear, no |
1893 | // need to generate the code for them. |
1894 | if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { |
1895 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
1896 | for (const Expr *UE : C->updates()) |
1897 | EmitIgnoredExpr(UE); |
1898 | } |
1899 | } |
1900 | |
1901 | // On a continue in the body, jump to the end. |
1902 | JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue" ); |
1903 | BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); |
1904 | for (const Expr *E : D.finals_conditions()) { |
1905 | if (!E) |
1906 | continue; |
1907 | // Check that loop counter in non-rectangular nest fits into the iteration |
1908 | // space. |
1909 | llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next" ); |
1910 | EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(), |
1911 | getProfileCount(D.getBody())); |
1912 | EmitBlock(NextBB); |
1913 | } |
1914 | |
1915 | OMPPrivateScope InscanScope(*this); |
1916 | EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true); |
1917 | bool IsInscanRegion = InscanScope.Privatize(); |
1918 | if (IsInscanRegion) { |
1919 | // Need to remember the block before and after scan directive |
1920 | // to dispatch them correctly depending on the clause used in |
1921 | // this directive, inclusive or exclusive. For inclusive scan the natural |
1922 | // order of the blocks is used, for exclusive clause the blocks must be |
1923 | // executed in reverse order. |
1924 | OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb" ); |
1925 | OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb" ); |
1926 | // No need to allocate inscan exit block, in simd mode it is selected in the |
1927 | // codegen for the scan directive. |
1928 | if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd) |
1929 | OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb" ); |
1930 | OMPScanDispatch = createBasicBlock("omp.inscan.dispatch" ); |
1931 | EmitBranch(OMPScanDispatch); |
1932 | EmitBlock(OMPBeforeScanBlock); |
1933 | } |
1934 | |
1935 | // Emit loop variables for C++ range loops. |
1936 | const Stmt *Body = |
1937 | D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); |
1938 | // Emit loop body. |
1939 | emitBody(*this, Body, |
1940 | OMPLoopBasedDirective::tryToFindNextInnerLoop( |
1941 | Body, /*TryImperfectlyNestedLoops=*/true), |
1942 | D.getLoopsNumber()); |
1943 | |
1944 | // Jump to the dispatcher at the end of the loop body. |
1945 | if (IsInscanRegion) |
1946 | EmitBranch(OMPScanExitBlock); |
1947 | |
1948 | // The end (updates/cleanups). |
1949 | EmitBlock(Continue.getBlock()); |
1950 | BreakContinueStack.pop_back(); |
1951 | } |
1952 | |
1953 | using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>; |
1954 | |
1955 | /// Emit a captured statement and return the function as well as its captured |
1956 | /// closure context. |
1957 | static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF, |
1958 | const CapturedStmt *S) { |
1959 | LValue CapStruct = ParentCGF.InitCapturedStruct(*S); |
1960 | CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true); |
1961 | std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI = |
1962 | std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S); |
1963 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get()); |
1964 | llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S); |
1965 | |
1966 | return {F, CapStruct.getPointer(ParentCGF)}; |
1967 | } |
1968 | |
1969 | /// Emit a call to a previously captured closure. |
1970 | static llvm::CallInst * |
1971 | emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap, |
1972 | llvm::ArrayRef<llvm::Value *> Args) { |
1973 | // Append the closure context to the argument. |
1974 | SmallVector<llvm::Value *> EffectiveArgs; |
1975 | EffectiveArgs.reserve(Args.size() + 1); |
1976 | llvm::append_range(EffectiveArgs, Args); |
1977 | EffectiveArgs.push_back(Cap.second); |
1978 | |
1979 | return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs); |
1980 | } |
1981 | |
1982 | llvm::CanonicalLoopInfo * |
1983 | CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) { |
1984 | assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented" ); |
1985 | |
1986 | // The caller is processing the loop-associated directive processing the \p |
1987 | // Depth loops nested in \p S. Put the previous pending loop-associated |
1988 | // directive to the stack. If the current loop-associated directive is a loop |
1989 | // transformation directive, it will push its generated loops onto the stack |
1990 | // such that together with the loops left here they form the combined loop |
1991 | // nest for the parent loop-associated directive. |
1992 | int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth; |
1993 | ExpectedOMPLoopDepth = Depth; |
1994 | |
1995 | EmitStmt(S); |
1996 | assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops" ); |
1997 | |
1998 | // The last added loop is the outermost one. |
1999 | llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back(); |
2000 | |
2001 | // Pop the \p Depth loops requested by the call from that stack and restore |
2002 | // the previous context. |
2003 | OMPLoopNestStack.pop_back_n(Depth); |
2004 | ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth; |
2005 | |
2006 | return Result; |
2007 | } |
2008 | |
2009 | void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) { |
2010 | const Stmt *SyntacticalLoop = S->getLoopStmt(); |
2011 | if (!getLangOpts().OpenMPIRBuilder) { |
2012 | // Ignore if OpenMPIRBuilder is not enabled. |
2013 | EmitStmt(SyntacticalLoop); |
2014 | return; |
2015 | } |
2016 | |
2017 | LexicalScope ForScope(*this, S->getSourceRange()); |
2018 | |
2019 | // Emit init statements. The Distance/LoopVar funcs may reference variable |
2020 | // declarations they contain. |
2021 | const Stmt *BodyStmt; |
2022 | if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) { |
2023 | if (const Stmt *InitStmt = For->getInit()) |
2024 | EmitStmt(InitStmt); |
2025 | BodyStmt = For->getBody(); |
2026 | } else if (const auto *RangeFor = |
2027 | dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) { |
2028 | if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt()) |
2029 | EmitStmt(RangeStmt); |
2030 | if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt()) |
2031 | EmitStmt(BeginStmt); |
2032 | if (const DeclStmt *EndStmt = RangeFor->getEndStmt()) |
2033 | EmitStmt(EndStmt); |
2034 | if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt()) |
2035 | EmitStmt(LoopVarStmt); |
2036 | BodyStmt = RangeFor->getBody(); |
2037 | } else |
2038 | llvm_unreachable("Expected for-stmt or range-based for-stmt" ); |
2039 | |
2040 | // Emit closure for later use. By-value captures will be captured here. |
2041 | const CapturedStmt *DistanceFunc = S->getDistanceFunc(); |
2042 | EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc); |
2043 | const CapturedStmt *LoopVarFunc = S->getLoopVarFunc(); |
2044 | EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc); |
2045 | |
2046 | // Call the distance function to get the number of iterations of the loop to |
2047 | // come. |
2048 | QualType LogicalTy = DistanceFunc->getCapturedDecl() |
2049 | ->getParam(0) |
2050 | ->getType() |
2051 | .getNonReferenceType(); |
2052 | Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr" ); |
2053 | emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()}); |
2054 | llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count" ); |
2055 | |
2056 | // Emit the loop structure. |
2057 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
2058 | auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, |
2059 | llvm::Value *IndVar) { |
2060 | Builder.restoreIP(CodeGenIP); |
2061 | |
2062 | // Emit the loop body: Convert the logical iteration number to the loop |
2063 | // variable and emit the body. |
2064 | const DeclRefExpr *LoopVarRef = S->getLoopVarRef(); |
2065 | LValue LCVal = EmitLValue(LoopVarRef); |
2066 | Address LoopVarAddress = LCVal.getAddress(*this); |
2067 | emitCapturedStmtCall(*this, LoopVarClosure, |
2068 | {LoopVarAddress.getPointer(), IndVar}); |
2069 | |
2070 | RunCleanupsScope BodyScope(*this); |
2071 | EmitStmt(BodyStmt); |
2072 | }; |
2073 | llvm::CanonicalLoopInfo *CL = |
2074 | OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal); |
2075 | |
2076 | // Finish up the loop. |
2077 | Builder.restoreIP(CL->getAfterIP()); |
2078 | ForScope.ForceCleanup(); |
2079 | |
2080 | // Remember the CanonicalLoopInfo for parent AST nodes consuming it. |
2081 | OMPLoopNestStack.push_back(CL); |
2082 | } |
2083 | |
2084 | void CodeGenFunction::EmitOMPInnerLoop( |
2085 | const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond, |
2086 | const Expr *IncExpr, |
2087 | const llvm::function_ref<void(CodeGenFunction &)> BodyGen, |
2088 | const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { |
2089 | auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end" ); |
2090 | |
2091 | // Start the loop with a block that tests the condition. |
2092 | auto CondBlock = createBasicBlock("omp.inner.for.cond" ); |
2093 | EmitBlock(CondBlock); |
2094 | const SourceRange R = S.getSourceRange(); |
2095 | |
2096 | // If attributes are attached, push to the basic block with them. |
2097 | const auto &OMPED = cast<OMPExecutableDirective>(S); |
2098 | const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); |
2099 | const Stmt *SS = ICS->getCapturedStmt(); |
2100 | const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS); |
2101 | OMPLoopNestStack.clear(); |
2102 | if (AS) |
2103 | LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), |
2104 | AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()), |
2105 | SourceLocToDebugLoc(R.getEnd())); |
2106 | else |
2107 | LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), |
2108 | SourceLocToDebugLoc(R.getEnd())); |
2109 | |
2110 | // If there are any cleanups between here and the loop-exit scope, |
2111 | // create a block to stage a loop exit along. |
2112 | llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); |
2113 | if (RequiresCleanup) |
2114 | ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup" ); |
2115 | |
2116 | llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body" ); |
2117 | |
2118 | // Emit condition. |
2119 | EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S)); |
2120 | if (ExitBlock != LoopExit.getBlock()) { |
2121 | EmitBlock(ExitBlock); |
2122 | EmitBranchThroughCleanup(LoopExit); |
2123 | } |
2124 | |
2125 | EmitBlock(LoopBody); |
2126 | incrementProfileCounter(&S); |
2127 | |
2128 | // Create a block for the increment. |
2129 | JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc" ); |
2130 | BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); |
2131 | |
2132 | BodyGen(*this); |
2133 | |
2134 | // Emit "IV = IV + 1" and a back-edge to the condition block. |
2135 | EmitBlock(Continue.getBlock()); |
2136 | EmitIgnoredExpr(IncExpr); |
2137 | PostIncGen(*this); |
2138 | BreakContinueStack.pop_back(); |
2139 | EmitBranch(CondBlock); |
2140 | LoopStack.pop(); |
2141 | // Emit the fall-through block. |
2142 | EmitBlock(LoopExit.getBlock()); |
2143 | } |
2144 | |
2145 | bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { |
2146 | if (!HaveInsertPoint()) |
2147 | return false; |
2148 | // Emit inits for the linear variables. |
2149 | bool HasLinears = false; |
2150 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
2151 | for (const Expr *Init : C->inits()) { |
2152 | HasLinears = true; |
2153 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); |
2154 | if (const auto *Ref = |
2155 | dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { |
2156 | AutoVarEmission Emission = EmitAutoVarAlloca(*VD); |
2157 | const auto *OrigVD = cast<VarDecl>(Ref->getDecl()); |
2158 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
2159 | CapturedStmtInfo->lookup(OrigVD) != nullptr, |
2160 | VD->getInit()->getType(), VK_LValue, |
2161 | VD->getInit()->getExprLoc()); |
2162 | EmitExprAsInit( |
2163 | &DRE, VD, |
2164 | MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()), |
2165 | /*capturedByInit=*/false); |
2166 | EmitAutoVarCleanups(Emission); |
2167 | } else { |
2168 | EmitVarDecl(*VD); |
2169 | } |
2170 | } |
2171 | // Emit the linear steps for the linear clauses. |
2172 | // If a step is not constant, it is pre-calculated before the loop. |
2173 | if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep())) |
2174 | if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) { |
2175 | EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); |
2176 | // Emit calculation of the linear step. |
2177 | EmitIgnoredExpr(CS); |
2178 | } |
2179 | } |
2180 | return HasLinears; |
2181 | } |
2182 | |
2183 | void CodeGenFunction::EmitOMPLinearClauseFinal( |
2184 | const OMPLoopDirective &D, |
2185 | const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { |
2186 | if (!HaveInsertPoint()) |
2187 | return; |
2188 | llvm::BasicBlock *DoneBB = nullptr; |
2189 | // Emit the final values of the linear variables. |
2190 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
2191 | auto IC = C->varlist_begin(); |
2192 | for (const Expr *F : C->finals()) { |
2193 | if (!DoneBB) { |
2194 | if (llvm::Value *Cond = CondGen(*this)) { |
2195 | // If the first post-update expression is found, emit conditional |
2196 | // block if it was requested. |
2197 | llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu" ); |
2198 | DoneBB = createBasicBlock(".omp.linear.pu.done" ); |
2199 | Builder.CreateCondBr(Cond, ThenBB, DoneBB); |
2200 | EmitBlock(ThenBB); |
2201 | } |
2202 | } |
2203 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); |
2204 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
2205 | CapturedStmtInfo->lookup(OrigVD) != nullptr, |
2206 | (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); |
2207 | Address OrigAddr = EmitLValue(&DRE).getAddress(*this); |
2208 | CodeGenFunction::OMPPrivateScope VarScope(*this); |
2209 | VarScope.addPrivate(OrigVD, OrigAddr); |
2210 | (void)VarScope.Privatize(); |
2211 | EmitIgnoredExpr(F); |
2212 | ++IC; |
2213 | } |
2214 | if (const Expr *PostUpdate = C->getPostUpdateExpr()) |
2215 | EmitIgnoredExpr(PostUpdate); |
2216 | } |
2217 | if (DoneBB) |
2218 | EmitBlock(DoneBB, /*IsFinished=*/true); |
2219 | } |
2220 | |
2221 | static void emitAlignedClause(CodeGenFunction &CGF, |
2222 | const OMPExecutableDirective &D) { |
2223 | if (!CGF.HaveInsertPoint()) |
2224 | return; |
2225 | for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { |
2226 | llvm::APInt ClauseAlignment(64, 0); |
2227 | if (const Expr *AlignmentExpr = Clause->getAlignment()) { |
2228 | auto *AlignmentCI = |
2229 | cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); |
2230 | ClauseAlignment = AlignmentCI->getValue(); |
2231 | } |
2232 | for (const Expr *E : Clause->varlists()) { |
2233 | llvm::APInt Alignment(ClauseAlignment); |
2234 | if (Alignment == 0) { |
2235 | // OpenMP [2.8.1, Description] |
2236 | // If no optional parameter is specified, implementation-defined default |
2237 | // alignments for SIMD instructions on the target platforms are assumed. |
2238 | Alignment = |
2239 | CGF.getContext() |
2240 | .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( |
2241 | E->getType()->getPointeeType())) |
2242 | .getQuantity(); |
2243 | } |
2244 | assert((Alignment == 0 || Alignment.isPowerOf2()) && |
2245 | "alignment is not power of 2" ); |
2246 | if (Alignment != 0) { |
2247 | llvm::Value *PtrValue = CGF.EmitScalarExpr(E); |
2248 | CGF.emitAlignmentAssumption( |
2249 | PtrValue, E, /*No second loc needed*/ SourceLocation(), |
2250 | llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment)); |
2251 | } |
2252 | } |
2253 | } |
2254 | } |
2255 | |
2256 | void CodeGenFunction::EmitOMPPrivateLoopCounters( |
2257 | const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { |
2258 | if (!HaveInsertPoint()) |
2259 | return; |
2260 | auto I = S.private_counters().begin(); |
2261 | for (const Expr *E : S.counters()) { |
2262 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
2263 | const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); |
2264 | // Emit var without initialization. |
2265 | AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD); |
2266 | EmitAutoVarCleanups(VarEmission); |
2267 | LocalDeclMap.erase(PrivateVD); |
2268 | (void)LoopScope.addPrivate(VD, VarEmission.getAllocatedAddress()); |
2269 | if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || |
2270 | VD->hasGlobalStorage()) { |
2271 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), |
2272 | LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), |
2273 | E->getType(), VK_LValue, E->getExprLoc()); |
2274 | (void)LoopScope.addPrivate(PrivateVD, EmitLValue(&DRE).getAddress(*this)); |
2275 | } else { |
2276 | (void)LoopScope.addPrivate(PrivateVD, VarEmission.getAllocatedAddress()); |
2277 | } |
2278 | ++I; |
2279 | } |
2280 | // Privatize extra loop counters used in loops for ordered(n) clauses. |
2281 | for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { |
2282 | if (!C->getNumForLoops()) |
2283 | continue; |
2284 | for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size(); |
2285 | I < E; ++I) { |
2286 | const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); |
2287 | const auto *VD = cast<VarDecl>(DRE->getDecl()); |
2288 | // Override only those variables that can be captured to avoid re-emission |
2289 | // of the variables declared within the loops. |
2290 | if (DRE->refersToEnclosingVariableOrCapture()) { |
2291 | (void)LoopScope.addPrivate( |
2292 | VD, CreateMemTemp(DRE->getType(), VD->getName())); |
2293 | } |
2294 | } |
2295 | } |
2296 | } |
2297 | |
2298 | static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, |
2299 | const Expr *Cond, llvm::BasicBlock *TrueBlock, |
2300 | llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { |
2301 | if (!CGF.HaveInsertPoint()) |
2302 | return; |
2303 | { |
2304 | CodeGenFunction::OMPPrivateScope PreCondScope(CGF); |
2305 | CGF.EmitOMPPrivateLoopCounters(S, PreCondScope); |
2306 | (void)PreCondScope.Privatize(); |
2307 | // Get initial values of real counters. |
2308 | for (const Expr *I : S.inits()) { |
2309 | CGF.EmitIgnoredExpr(I); |
2310 | } |
2311 | } |
2312 | // Create temp loop control variables with their init values to support |
2313 | // non-rectangular loops. |
2314 | CodeGenFunction::OMPMapVars PreCondVars; |
2315 | for (const Expr *E : S.dependent_counters()) { |
2316 | if (!E) |
2317 | continue; |
2318 | assert(!E->getType().getNonReferenceType()->isRecordType() && |
2319 | "dependent counter must not be an iterator." ); |
2320 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
2321 | Address CounterAddr = |
2322 | CGF.CreateMemTemp(VD->getType().getNonReferenceType()); |
2323 | (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr); |
2324 | } |
2325 | (void)PreCondVars.apply(CGF); |
2326 | for (const Expr *E : S.dependent_inits()) { |
2327 | if (!E) |
2328 | continue; |
2329 | CGF.EmitIgnoredExpr(E); |
2330 | } |
2331 | // Check that loop is executed at least one time. |
2332 | CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); |
2333 | PreCondVars.restore(CGF); |
2334 | } |
2335 | |
2336 | void CodeGenFunction::EmitOMPLinearClause( |
2337 | const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { |
2338 | if (!HaveInsertPoint()) |
2339 | return; |
2340 | llvm::DenseSet<const VarDecl *> SIMDLCVs; |
2341 | if (isOpenMPSimdDirective(D.getDirectiveKind())) { |
2342 | const auto *LoopDirective = cast<OMPLoopDirective>(&D); |
2343 | for (const Expr *C : LoopDirective->counters()) { |
2344 | SIMDLCVs.insert( |
2345 | cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl()); |
2346 | } |
2347 | } |
2348 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
2349 | auto CurPrivate = C->privates().begin(); |
2350 | for (const Expr *E : C->varlists()) { |
2351 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
2352 | const auto *PrivateVD = |
2353 | cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); |
2354 | if (!SIMDLCVs.count(VD->getCanonicalDecl())) { |
2355 | // Emit private VarDecl with copy init. |
2356 | EmitVarDecl(*PrivateVD); |
2357 | bool IsRegistered = |
2358 | PrivateScope.addPrivate(VD, GetAddrOfLocalVar(PrivateVD)); |
2359 | assert(IsRegistered && "linear var already registered as private" ); |
2360 | // Silence the warning about unused variable. |
2361 | (void)IsRegistered; |
2362 | } else { |
2363 | EmitVarDecl(*PrivateVD); |
2364 | } |
2365 | ++CurPrivate; |
2366 | } |
2367 | } |
2368 | } |
2369 | |
2370 | static void emitSimdlenSafelenClause(CodeGenFunction &CGF, |
2371 | const OMPExecutableDirective &D) { |
2372 | if (!CGF.HaveInsertPoint()) |
2373 | return; |
2374 | if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { |
2375 | RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), |
2376 | /*ignoreResult=*/true); |
2377 | auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); |
2378 | CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); |
2379 | // In presence of finite 'safelen', it may be unsafe to mark all |
2380 | // the memory instructions parallel, because loop-carried |
2381 | // dependences of 'safelen' iterations are possible. |
2382 | CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); |
2383 | } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { |
2384 | RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), |
2385 | /*ignoreResult=*/true); |
2386 | auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); |
2387 | CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); |
2388 | // In presence of finite 'safelen', it may be unsafe to mark all |
2389 | // the memory instructions parallel, because loop-carried |
2390 | // dependences of 'safelen' iterations are possible. |
2391 | CGF.LoopStack.setParallel(/*Enable=*/false); |
2392 | } |
2393 | } |
2394 | |
2395 | void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { |
2396 | // Walk clauses and process safelen/lastprivate. |
2397 | LoopStack.setParallel(/*Enable=*/true); |
2398 | LoopStack.setVectorizeEnable(); |
2399 | emitSimdlenSafelenClause(*this, D); |
2400 | if (const auto *C = D.getSingleClause<OMPOrderClause>()) |
2401 | if (C->getKind() == OMPC_ORDER_concurrent) |
2402 | LoopStack.setParallel(/*Enable=*/true); |
2403 | if ((D.getDirectiveKind() == OMPD_simd || |
2404 | (getLangOpts().OpenMPSimd && |
2405 | isOpenMPSimdDirective(D.getDirectiveKind()))) && |
2406 | llvm::any_of(D.getClausesOfKind<OMPReductionClause>(), |
2407 | [](const OMPReductionClause *C) { |
2408 | return C->getModifier() == OMPC_REDUCTION_inscan; |
2409 | })) |
2410 | // Disable parallel access in case of prefix sum. |
2411 | LoopStack.setParallel(/*Enable=*/false); |
2412 | } |
2413 | |
2414 | void CodeGenFunction::EmitOMPSimdFinal( |
2415 | const OMPLoopDirective &D, |
2416 | const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { |
2417 | if (!HaveInsertPoint()) |
2418 | return; |
2419 | llvm::BasicBlock *DoneBB = nullptr; |
2420 | auto IC = D.counters().begin(); |
2421 | auto IPC = D.private_counters().begin(); |
2422 | for (const Expr *F : D.finals()) { |
2423 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl()); |
2424 | const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl()); |
2425 | const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD); |
2426 | if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) || |
2427 | OrigVD->hasGlobalStorage() || CED) { |
2428 | if (!DoneBB) { |
2429 | if (llvm::Value *Cond = CondGen(*this)) { |
2430 | // If the first post-update expression is found, emit conditional |
2431 | // block if it was requested. |
2432 | llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then" ); |
2433 | DoneBB = createBasicBlock(".omp.final.done" ); |
2434 | Builder.CreateCondBr(Cond, ThenBB, DoneBB); |
2435 | EmitBlock(ThenBB); |
2436 | } |
2437 | } |
2438 | Address OrigAddr = Address::invalid(); |
2439 | if (CED) { |
2440 | OrigAddr = |
2441 | EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this); |
2442 | } else { |
2443 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), |
2444 | /*RefersToEnclosingVariableOrCapture=*/false, |
2445 | (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); |
2446 | OrigAddr = EmitLValue(&DRE).getAddress(*this); |
2447 | } |
2448 | OMPPrivateScope VarScope(*this); |
2449 | VarScope.addPrivate(OrigVD, OrigAddr); |
2450 | (void)VarScope.Privatize(); |
2451 | EmitIgnoredExpr(F); |
2452 | } |
2453 | ++IC; |
2454 | ++IPC; |
2455 | } |
2456 | if (DoneBB) |
2457 | EmitBlock(DoneBB, /*IsFinished=*/true); |
2458 | } |
2459 | |
2460 | static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, |
2461 | const OMPLoopDirective &S, |
2462 | CodeGenFunction::JumpDest LoopExit) { |
2463 | CGF.EmitOMPLoopBody(S, LoopExit); |
2464 | CGF.EmitStopPoint(&S); |
2465 | } |
2466 | |
2467 | /// Emit a helper variable and return corresponding lvalue. |
2468 | static LValue EmitOMPHelperVar(CodeGenFunction &CGF, |
2469 | const DeclRefExpr *Helper) { |
2470 | auto VDecl = cast<VarDecl>(Helper->getDecl()); |
2471 | CGF.EmitVarDecl(*VDecl); |
2472 | return CGF.EmitLValue(Helper); |
2473 | } |
2474 | |
2475 | static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, |
2476 | const RegionCodeGenTy &SimdInitGen, |
2477 | const RegionCodeGenTy &BodyCodeGen) { |
2478 | auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF, |
2479 | PrePostActionTy &) { |
2480 | CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S); |
2481 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
2482 | SimdInitGen(CGF); |
2483 | |
2484 | BodyCodeGen(CGF); |
2485 | }; |
2486 | auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) { |
2487 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
2488 | CGF.LoopStack.setVectorizeEnable(/*Enable=*/false); |
2489 | |
2490 | BodyCodeGen(CGF); |
2491 | }; |
2492 | const Expr *IfCond = nullptr; |
2493 | if (isOpenMPSimdDirective(S.getDirectiveKind())) { |
2494 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
2495 | if (CGF.getLangOpts().OpenMP >= 50 && |
2496 | (C->getNameModifier() == OMPD_unknown || |
2497 | C->getNameModifier() == OMPD_simd)) { |
2498 | IfCond = C->getCondition(); |
2499 | break; |
2500 | } |
2501 | } |
2502 | } |
2503 | if (IfCond) { |
2504 | CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen); |
2505 | } else { |
2506 | RegionCodeGenTy ThenRCG(ThenGen); |
2507 | ThenRCG(CGF); |
2508 | } |
2509 | } |
2510 | |
2511 | static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, |
2512 | PrePostActionTy &Action) { |
2513 | Action.Enter(CGF); |
2514 | assert(isOpenMPSimdDirective(S.getDirectiveKind()) && |
2515 | "Expected simd directive" ); |
2516 | OMPLoopScope PreInitScope(CGF, S); |
2517 | // if (PreCond) { |
2518 | // for (IV in 0..LastIteration) BODY; |
2519 | // <Final counter/linear vars updates>; |
2520 | // } |
2521 | // |
2522 | if (isOpenMPDistributeDirective(S.getDirectiveKind()) || |
2523 | isOpenMPWorksharingDirective(S.getDirectiveKind()) || |
2524 | isOpenMPTaskLoopDirective(S.getDirectiveKind())) { |
2525 | (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable())); |
2526 | (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable())); |
2527 | } |
2528 | |
2529 | // Emit: if (PreCond) - begin. |
2530 | // If the condition constant folds and can be elided, avoid emitting the |
2531 | // whole loop. |
2532 | bool CondConstant; |
2533 | llvm::BasicBlock *ContBlock = nullptr; |
2534 | if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { |
2535 | if (!CondConstant) |
2536 | return; |
2537 | } else { |
2538 | llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then" ); |
2539 | ContBlock = CGF.createBasicBlock("simd.if.end" ); |
2540 | emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, |
2541 | CGF.getProfileCount(&S)); |
2542 | CGF.EmitBlock(ThenBlock); |
2543 | CGF.incrementProfileCounter(&S); |
2544 | } |
2545 | |
2546 | // Emit the loop iteration variable. |
2547 | const Expr *IVExpr = S.getIterationVariable(); |
2548 | const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); |
2549 | CGF.EmitVarDecl(*IVDecl); |
2550 | CGF.EmitIgnoredExpr(S.getInit()); |
2551 | |
2552 | // Emit the iterations count variable. |
2553 | // If it is not a variable, Sema decided to calculate iterations count on |
2554 | // each iteration (e.g., it is foldable into a constant). |
2555 | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { |
2556 | CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); |
2557 | // Emit calculation of the iterations count. |
2558 | CGF.EmitIgnoredExpr(S.getCalcLastIteration()); |
2559 | } |
2560 | |
2561 | emitAlignedClause(CGF, S); |
2562 | (void)CGF.EmitOMPLinearClauseInit(S); |
2563 | { |
2564 | CodeGenFunction::OMPPrivateScope LoopScope(CGF); |
2565 | CGF.EmitOMPPrivateLoopCounters(S, LoopScope); |
2566 | CGF.EmitOMPLinearClause(S, LoopScope); |
2567 | CGF.EmitOMPPrivateClause(S, LoopScope); |
2568 | CGF.EmitOMPReductionClauseInit(S, LoopScope); |
2569 | CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( |
2570 | CGF, S, CGF.EmitLValue(S.getIterationVariable())); |
2571 | bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); |
2572 | (void)LoopScope.Privatize(); |
2573 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
2574 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); |
2575 | |
2576 | emitCommonSimdLoop( |
2577 | CGF, S, |
2578 | [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
2579 | CGF.EmitOMPSimdInit(S); |
2580 | }, |
2581 | [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
2582 | CGF.EmitOMPInnerLoop( |
2583 | S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), |
2584 | [&S](CodeGenFunction &CGF) { |
2585 | emitOMPLoopBodyWithStopPoint(CGF, S, |
2586 | CodeGenFunction::JumpDest()); |
2587 | }, |
2588 | [](CodeGenFunction &) {}); |
2589 | }); |
2590 | CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); |
2591 | // Emit final copy of the lastprivate variables at the end of loops. |
2592 | if (HasLastprivateClause) |
2593 | CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); |
2594 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); |
2595 | emitPostUpdateForReductionClause(CGF, S, |
2596 | [](CodeGenFunction &) { return nullptr; }); |
2597 | LoopScope.restoreMap(); |
2598 | CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; }); |
2599 | } |
2600 | // Emit: if (PreCond) - end. |
2601 | if (ContBlock) { |
2602 | CGF.EmitBranch(ContBlock); |
2603 | CGF.EmitBlock(ContBlock, true); |
2604 | } |
2605 | } |
2606 | |
2607 | static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) { |
2608 | // Check for unsupported clauses |
2609 | for (OMPClause *C : S.clauses()) { |
2610 | // Currently only order, simdlen and safelen clauses are supported |
2611 | if (!(isa<OMPSimdlenClause>(C) || isa<OMPSafelenClause>(C) || |
2612 | isa<OMPOrderClause>(C) || isa<OMPAlignedClause>(C))) |
2613 | return false; |
2614 | } |
2615 | |
2616 | // Check if we have a statement with the ordered directive. |
2617 | // Visit the statement hierarchy to find a compound statement |
2618 | // with a ordered directive in it. |
2619 | if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(S.getRawStmt())) { |
2620 | if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) { |
2621 | for (const Stmt *SubStmt : SyntacticalLoop->children()) { |
2622 | if (!SubStmt) |
2623 | continue; |
2624 | if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(SubStmt)) { |
2625 | for (const Stmt *CSSubStmt : CS->children()) { |
2626 | if (!CSSubStmt) |
2627 | continue; |
2628 | if (isa<OMPOrderedDirective>(CSSubStmt)) { |
2629 | return false; |
2630 | } |
2631 | } |
2632 | } |
2633 | } |
2634 | } |
2635 | } |
2636 | return true; |
2637 | } |
2638 | static llvm::MapVector<llvm::Value *, llvm::Value *> |
2639 | GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) { |
2640 | llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars; |
2641 | for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) { |
2642 | llvm::APInt ClauseAlignment(64, 0); |
2643 | if (const Expr *AlignmentExpr = Clause->getAlignment()) { |
2644 | auto *AlignmentCI = |
2645 | cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); |
2646 | ClauseAlignment = AlignmentCI->getValue(); |
2647 | } |
2648 | for (const Expr *E : Clause->varlists()) { |
2649 | llvm::APInt Alignment(ClauseAlignment); |
2650 | if (Alignment == 0) { |
2651 | // OpenMP [2.8.1, Description] |
2652 | // If no optional parameter is specified, implementation-defined default |
2653 | // alignments for SIMD instructions on the target platforms are assumed. |
2654 | Alignment = |
2655 | CGF.getContext() |
2656 | .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( |
2657 | E->getType()->getPointeeType())) |
2658 | .getQuantity(); |
2659 | } |
2660 | assert((Alignment == 0 || Alignment.isPowerOf2()) && |
2661 | "alignment is not power of 2" ); |
2662 | llvm::Value *PtrValue = CGF.EmitScalarExpr(E); |
2663 | AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue()); |
2664 | } |
2665 | } |
2666 | return AlignedVars; |
2667 | } |
2668 | |
2669 | void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { |
2670 | bool UseOMPIRBuilder = |
2671 | CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S); |
2672 | if (UseOMPIRBuilder) { |
2673 | auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF, |
2674 | PrePostActionTy &) { |
2675 | // Use the OpenMPIRBuilder if enabled. |
2676 | if (UseOMPIRBuilder) { |
2677 | llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars = |
2678 | GetAlignedMapping(S, CGF); |
2679 | // Emit the associated statement and get its loop representation. |
2680 | const Stmt *Inner = S.getRawStmt(); |
2681 | llvm::CanonicalLoopInfo *CLI = |
2682 | EmitOMPCollapsedCanonicalLoopNest(Inner, 1); |
2683 | |
2684 | llvm::OpenMPIRBuilder &OMPBuilder = |
2685 | CGM.getOpenMPRuntime().getOMPBuilder(); |
2686 | // Add SIMD specific metadata |
2687 | llvm::ConstantInt *Simdlen = nullptr; |
2688 | if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) { |
2689 | RValue Len = |
2690 | this->EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), |
2691 | /*ignoreResult=*/true); |
2692 | auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); |
2693 | Simdlen = Val; |
2694 | } |
2695 | llvm::ConstantInt *Safelen = nullptr; |
2696 | if (const auto *C = S.getSingleClause<OMPSafelenClause>()) { |
2697 | RValue Len = |
2698 | this->EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), |
2699 | /*ignoreResult=*/true); |
2700 | auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); |
2701 | Safelen = Val; |
2702 | } |
2703 | llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown; |
2704 | if (const auto *C = S.getSingleClause<OMPOrderClause>()) { |
2705 | if (C->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent) { |
2706 | Order = llvm::omp::OrderKind::OMP_ORDER_concurrent; |
2707 | } |
2708 | } |
2709 | // Add simd metadata to the collapsed loop. Do not generate |
2710 | // another loop for if clause. Support for if clause is done earlier. |
2711 | OMPBuilder.applySimd(CLI, AlignedVars, |
2712 | /*IfCond*/ nullptr, Order, Simdlen, Safelen); |
2713 | return; |
2714 | } |
2715 | }; |
2716 | { |
2717 | auto LPCRegion = |
2718 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
2719 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
2720 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, |
2721 | CodeGenIRBuilder); |
2722 | } |
2723 | return; |
2724 | } |
2725 | |
2726 | ParentLoopDirectiveForScanRegion ScanRegion(*this, S); |
2727 | OMPFirstScanLoop = true; |
2728 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
2729 | emitOMPSimdRegion(CGF, S, Action); |
2730 | }; |
2731 | { |
2732 | auto LPCRegion = |
2733 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
2734 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
2735 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); |
2736 | } |
2737 | // Check for outer lastprivate conditional update. |
2738 | checkForLastprivateConditionalUpdate(*this, S); |
2739 | } |
2740 | |
2741 | void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) { |
2742 | // Emit the de-sugared statement. |
2743 | OMPTransformDirectiveScopeRAII TileScope(*this, &S); |
2744 | EmitStmt(S.getTransformedStmt()); |
2745 | } |
2746 | |
2747 | void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { |
2748 | bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; |
2749 | |
2750 | if (UseOMPIRBuilder) { |
2751 | auto DL = SourceLocToDebugLoc(S.getBeginLoc()); |
2752 | const Stmt *Inner = S.getRawStmt(); |
2753 | |
2754 | // Consume nested loop. Clear the entire remaining loop stack because a |
2755 | // fully unrolled loop is non-transformable. For partial unrolling the |
2756 | // generated outer loop is pushed back to the stack. |
2757 | llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1); |
2758 | OMPLoopNestStack.clear(); |
2759 | |
2760 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
2761 | |
2762 | bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1; |
2763 | llvm::CanonicalLoopInfo *UnrolledCLI = nullptr; |
2764 | |
2765 | if (S.hasClausesOfKind<OMPFullClause>()) { |
2766 | assert(ExpectedOMPLoopDepth == 0); |
2767 | OMPBuilder.unrollLoopFull(DL, CLI); |
2768 | } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { |
2769 | uint64_t Factor = 0; |
2770 | if (Expr *FactorExpr = PartialClause->getFactor()) { |
2771 | Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue(); |
2772 | assert(Factor >= 1 && "Only positive factors are valid" ); |
2773 | } |
2774 | OMPBuilder.unrollLoopPartial(DL, CLI, Factor, |
2775 | NeedsUnrolledCLI ? &UnrolledCLI : nullptr); |
2776 | } else { |
2777 | OMPBuilder.unrollLoopHeuristic(DL, CLI); |
2778 | } |
2779 | |
2780 | assert((!NeedsUnrolledCLI || UnrolledCLI) && |
2781 | "NeedsUnrolledCLI implies UnrolledCLI to be set" ); |
2782 | if (UnrolledCLI) |
2783 | OMPLoopNestStack.push_back(UnrolledCLI); |
2784 | |
2785 | return; |
2786 | } |
2787 | |
2788 | // This function is only called if the unrolled loop is not consumed by any |
2789 | // other loop-associated construct. Such a loop-associated construct will have |
2790 | // used the transformed AST. |
2791 | |
2792 | // Set the unroll metadata for the next emitted loop. |
2793 | LoopStack.setUnrollState(LoopAttributes::Enable); |
2794 | |
2795 | if (S.hasClausesOfKind<OMPFullClause>()) { |
2796 | LoopStack.setUnrollState(LoopAttributes::Full); |
2797 | } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { |
2798 | if (Expr *FactorExpr = PartialClause->getFactor()) { |
2799 | uint64_t Factor = |
2800 | FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue(); |
2801 | assert(Factor >= 1 && "Only positive factors are valid" ); |
2802 | LoopStack.setUnrollCount(Factor); |
2803 | } |
2804 | } |
2805 | |
2806 | EmitStmt(S.getAssociatedStmt()); |
2807 | } |
2808 | |
2809 | void CodeGenFunction::EmitOMPOuterLoop( |
2810 | bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, |
2811 | CodeGenFunction::OMPPrivateScope &LoopScope, |
2812 | const CodeGenFunction::OMPLoopArguments &LoopArgs, |
2813 | const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, |
2814 | const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { |
2815 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
2816 | |
2817 | const Expr *IVExpr = S.getIterationVariable(); |
2818 | const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); |
2819 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
2820 | |
2821 | JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end" ); |
2822 | |
2823 | // Start the loop with a block that tests the condition. |
2824 | llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond" ); |
2825 | EmitBlock(CondBlock); |
2826 | const SourceRange R = S.getSourceRange(); |
2827 | OMPLoopNestStack.clear(); |
2828 | LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), |
2829 | SourceLocToDebugLoc(R.getEnd())); |
2830 | |
2831 | llvm::Value *BoolCondVal = nullptr; |
2832 | if (!DynamicOrOrdered) { |
2833 | // UB = min(UB, GlobalUB) or |
2834 | // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. |
2835 | // 'distribute parallel for') |
2836 | EmitIgnoredExpr(LoopArgs.EUB); |
2837 | // IV = LB |
2838 | EmitIgnoredExpr(LoopArgs.Init); |
2839 | // IV < UB |
2840 | BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); |
2841 | } else { |
2842 | BoolCondVal = |
2843 | RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL, |
2844 | LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); |
2845 | } |
2846 | |
2847 | // If there are any cleanups between here and the loop-exit scope, |
2848 | // create a block to stage a loop exit along. |
2849 | llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); |
2850 | if (LoopScope.requiresCleanups()) |
2851 | ExitBlock = createBasicBlock("omp.dispatch.cleanup" ); |
2852 | |
2853 | llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body" ); |
2854 | Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); |
2855 | if (ExitBlock != LoopExit.getBlock()) { |
2856 | EmitBlock(ExitBlock); |
2857 | EmitBranchThroughCleanup(LoopExit); |
2858 | } |
2859 | EmitBlock(LoopBody); |
2860 | |
2861 | // Emit "IV = LB" (in case of static schedule, we have already calculated new |
2862 | // LB for loop condition and emitted it above). |
2863 | if (DynamicOrOrdered) |
2864 | EmitIgnoredExpr(LoopArgs.Init); |
2865 | |
2866 | // Create a block for the increment. |
2867 | JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc" ); |
2868 | BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); |
2869 | |
2870 | emitCommonSimdLoop( |
2871 | *this, S, |
2872 | [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { |
2873 | // Generate !llvm.loop.parallel metadata for loads and stores for loops |
2874 | // with dynamic/guided scheduling and without ordered clause. |
2875 | if (!isOpenMPSimdDirective(S.getDirectiveKind())) { |
2876 | CGF.LoopStack.setParallel(!IsMonotonic); |
2877 | if (const auto *C = S.getSingleClause<OMPOrderClause>()) |
2878 | if (C->getKind() == OMPC_ORDER_concurrent) |
2879 | CGF.LoopStack.setParallel(/*Enable=*/true); |
2880 | } else { |
2881 | CGF.EmitOMPSimdInit(S); |
2882 | } |
2883 | }, |
2884 | [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, |
2885 | &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
2886 | SourceLocation Loc = S.getBeginLoc(); |
2887 | // when 'distribute' is not combined with a 'for': |
2888 | // while (idx <= UB) { BODY; ++idx; } |
2889 | // when 'distribute' is combined with a 'for' |
2890 | // (e.g. 'distribute parallel for') |
2891 | // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } |
2892 | CGF.EmitOMPInnerLoop( |
2893 | S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, |
2894 | [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { |
2895 | CodeGenLoop(CGF, S, LoopExit); |
2896 | }, |
2897 | [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { |
2898 | CodeGenOrdered(CGF, Loc, IVSize, IVSigned); |
2899 | }); |
2900 | }); |
2901 | |
2902 | EmitBlock(Continue.getBlock()); |
2903 | BreakContinueStack.pop_back(); |
2904 | if (!DynamicOrOrdered) { |
2905 | // Emit "LB = LB + Stride", "UB = UB + Stride". |
2906 | EmitIgnoredExpr(LoopArgs.NextLB); |
2907 | EmitIgnoredExpr(LoopArgs.NextUB); |
2908 | } |
2909 | |
2910 | EmitBranch(CondBlock); |
2911 | OMPLoopNestStack.clear(); |
2912 | LoopStack.pop(); |
2913 | // Emit the fall-through block. |
2914 | EmitBlock(LoopExit.getBlock()); |
2915 | |
2916 | // Tell the runtime we are done. |
2917 | auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { |
2918 | if (!DynamicOrOrdered) |
2919 | CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), |
2920 | S.getDirectiveKind()); |
2921 | }; |
2922 | OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); |
2923 | } |
2924 | |
2925 | void CodeGenFunction::EmitOMPForOuterLoop( |
2926 | const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, |
2927 | const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, |
2928 | const OMPLoopArguments &LoopArgs, |
2929 | const CodeGenDispatchBoundsTy &CGDispatchBounds) { |
2930 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
2931 | |
2932 | // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). |
2933 | const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind.Schedule); |
2934 | |
2935 | assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule, |
2936 | LoopArgs.Chunk != nullptr)) && |
2937 | "static non-chunked schedule does not need outer loop" ); |
2938 | |
2939 | // Emit outer loop. |
2940 | // |
2941 | // OpenMP [2.7.1, Loop Construct, Description, table 2-1] |
2942 | // When schedule(dynamic,chunk_size) is specified, the iterations are |
2943 | // distributed to threads in the team in chunks as the threads request them. |
2944 | // Each thread executes a chunk of iterations, then requests another chunk, |
2945 | // until no chunks remain to be distributed. Each chunk contains chunk_size |
2946 | // iterations, except for the last chunk to be distributed, which may have |
2947 | // fewer iterations. When no chunk_size is specified, it defaults to 1. |
2948 | // |
2949 | // When schedule(guided,chunk_size) is specified, the iterations are assigned |
2950 | // to threads in the team in chunks as the executing threads request them. |
2951 | // Each thread executes a chunk of iterations, then requests another chunk, |
2952 | // until no chunks remain to be assigned. For a chunk_size of 1, the size of |
2953 | // each chunk is proportional to the number of unassigned iterations divided |
2954 | // by the number of threads in the team, decreasing to 1. For a chunk_size |
2955 | // with value k (greater than 1), the size of each chunk is determined in the |
2956 | // same way, with the restriction that the chunks do not contain fewer than k |
2957 | // iterations (except for the last chunk to be assigned, which may have fewer |
2958 | // than k iterations). |
2959 | // |
2960 | // When schedule(auto) is specified, the decision regarding scheduling is |
2961 | // delegated to the compiler and/or runtime system. The programmer gives the |
2962 | // implementation the freedom to choose any possible mapping of iterations to |
2963 | // threads in the team. |
2964 | // |
2965 | // When schedule(runtime) is specified, the decision regarding scheduling is |
2966 | // deferred until run time, and the schedule and chunk size are taken from the |
2967 | // run-sched-var ICV. If the ICV is set to auto, the schedule is |
2968 | // implementation defined |
2969 | // |
2970 | // while(__kmpc_dispatch_next(&LB, &UB)) { |
2971 | // idx = LB; |
2972 | // while (idx <= UB) { BODY; ++idx; |
2973 | // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. |
2974 | // } // inner loop |
2975 | // } |
2976 | // |
2977 | // OpenMP [2.7.1, Loop Construct, Description, table 2-1] |
2978 | // When schedule(static, chunk_size) is specified, iterations are divided into |
2979 | // chunks of size chunk_size, and the chunks are assigned to the threads in |
2980 | // the team in a round-robin fashion in the order of the thread number. |
2981 | // |
2982 | // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { |
2983 | // while (idx <= UB) { BODY; ++idx; } // inner loop |
2984 | // LB = LB + ST; |
2985 | // UB = UB + ST; |
2986 | // } |
2987 | // |
2988 | |
2989 | const Expr *IVExpr = S.getIterationVariable(); |
2990 | const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); |
2991 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
2992 | |
2993 | if (DynamicOrOrdered) { |
2994 | const std::pair<llvm::Value *, llvm::Value *> DispatchBounds = |
2995 | CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); |
2996 | llvm::Value *LBVal = DispatchBounds.first; |
2997 | llvm::Value *UBVal = DispatchBounds.second; |
2998 | CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, |
2999 | LoopArgs.Chunk}; |
3000 | RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize, |
3001 | IVSigned, Ordered, DipatchRTInputValues); |
3002 | } else { |
3003 | CGOpenMPRuntime::StaticRTInput StaticInit( |
3004 | IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, |
3005 | LoopArgs.ST, LoopArgs.Chunk); |
3006 | RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), |
3007 | ScheduleKind, StaticInit); |
3008 | } |
3009 | |
3010 | auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, |
3011 | const unsigned IVSize, |
3012 | const bool IVSigned) { |
3013 | if (Ordered) { |
3014 | CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, |
3015 | IVSigned); |
3016 | } |
3017 | }; |
3018 | |
3019 | OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, |
3020 | LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); |
3021 | OuterLoopArgs.IncExpr = S.getInc(); |
3022 | OuterLoopArgs.Init = S.getInit(); |
3023 | OuterLoopArgs.Cond = S.getCond(); |
3024 | OuterLoopArgs.NextLB = S.getNextLowerBound(); |
3025 | OuterLoopArgs.NextUB = S.getNextUpperBound(); |
3026 | EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, |
3027 | emitOMPLoopBodyWithStopPoint, CodeGenOrdered); |
3028 | } |
3029 | |
3030 | static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, |
3031 | const unsigned IVSize, const bool IVSigned) {} |
3032 | |
3033 | void CodeGenFunction::EmitOMPDistributeOuterLoop( |
3034 | OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, |
3035 | OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, |
3036 | const CodeGenLoopTy &CodeGenLoopContent) { |
3037 | |
3038 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
3039 | |
3040 | // Emit outer loop. |
3041 | // Same behavior as a OMPForOuterLoop, except that schedule cannot be |
3042 | // dynamic |
3043 | // |
3044 | |
3045 | const Expr *IVExpr = S.getIterationVariable(); |
3046 | const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); |
3047 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
3048 | |
3049 | CGOpenMPRuntime::StaticRTInput StaticInit( |
3050 | IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, |
3051 | LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); |
3052 | RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); |
3053 | |
3054 | // for combined 'distribute' and 'for' the increment expression of distribute |
3055 | // is stored in DistInc. For 'distribute' alone, it is in Inc. |
3056 | Expr *IncExpr; |
3057 | if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) |
3058 | IncExpr = S.getDistInc(); |
3059 | else |
3060 | IncExpr = S.getInc(); |
3061 | |
3062 | // this routine is shared by 'omp distribute parallel for' and |
3063 | // 'omp distribute': select the right EUB expression depending on the |
3064 | // directive |
3065 | OMPLoopArguments OuterLoopArgs; |
3066 | OuterLoopArgs.LB = LoopArgs.LB; |
3067 | OuterLoopArgs.UB = LoopArgs.UB; |
3068 | OuterLoopArgs.ST = LoopArgs.ST; |
3069 | OuterLoopArgs.IL = LoopArgs.IL; |
3070 | OuterLoopArgs.Chunk = LoopArgs.Chunk; |
3071 | OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
3072 | ? S.getCombinedEnsureUpperBound() |
3073 | : S.getEnsureUpperBound(); |
3074 | OuterLoopArgs.IncExpr = IncExpr; |
3075 | OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
3076 | ? S.getCombinedInit() |
3077 | : S.getInit(); |
3078 | OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
3079 | ? S.getCombinedCond() |
3080 | : S.getCond(); |
3081 | OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
3082 | ? S.getCombinedNextLowerBound() |
3083 | : S.getNextLowerBound(); |
3084 | OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
3085 | ? S.getCombinedNextUpperBound() |
3086 | : S.getNextUpperBound(); |
3087 | |
3088 | EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, |
3089 | LoopScope, OuterLoopArgs, CodeGenLoopContent, |
3090 | emitEmptyOrdered); |
3091 | } |
3092 | |
3093 | static std::pair<LValue, LValue> |
3094 | emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, |
3095 | const OMPExecutableDirective &S) { |
3096 | const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); |
3097 | LValue LB = |
3098 | EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); |
3099 | LValue UB = |
3100 | EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); |
3101 | |
3102 | // When composing 'distribute' with 'for' (e.g. as in 'distribute |
3103 | // parallel for') we need to use the 'distribute' |
3104 | // chunk lower and upper bounds rather than the whole loop iteration |
3105 | // space. These are parameters to the outlined function for 'parallel' |
3106 | // and we copy the bounds of the previous schedule into the |
3107 | // the current ones. |
3108 | LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); |
3109 | LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); |
3110 | llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( |
3111 | PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc()); |
3112 | PrevLBVal = CGF.EmitScalarConversion( |
3113 | PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), |
3114 | LS.getIterationVariable()->getType(), |
3115 | LS.getPrevLowerBoundVariable()->getExprLoc()); |
3116 | llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( |
3117 | PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc()); |
3118 | PrevUBVal = CGF.EmitScalarConversion( |
3119 | PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), |
3120 | LS.getIterationVariable()->getType(), |
3121 | LS.getPrevUpperBoundVariable()->getExprLoc()); |
3122 | |
3123 | CGF.EmitStoreOfScalar(PrevLBVal, LB); |
3124 | CGF.EmitStoreOfScalar(PrevUBVal, UB); |
3125 | |
3126 | return {LB, UB}; |
3127 | } |
3128 | |
3129 | /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then |
3130 | /// we need to use the LB and UB expressions generated by the worksharing |
3131 | /// code generation support, whereas in non combined situations we would |
3132 | /// just emit 0 and the LastIteration expression |
3133 | /// This function is necessary due to the difference of the LB and UB |
3134 | /// types for the RT emission routines for 'for_static_init' and |
3135 | /// 'for_dispatch_init' |
3136 | static std::pair<llvm::Value *, llvm::Value *> |
3137 | emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, |
3138 | const OMPExecutableDirective &S, |
3139 | Address LB, Address UB) { |
3140 | const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); |
3141 | const Expr *IVExpr = LS.getIterationVariable(); |
3142 | // when implementing a dynamic schedule for a 'for' combined with a |
3143 | // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop |
3144 | // is not normalized as each team only executes its own assigned |
3145 | // distribute chunk |
3146 | QualType IteratorTy = IVExpr->getType(); |
3147 | llvm::Value *LBVal = |
3148 | CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); |
3149 | llvm::Value *UBVal = |
3150 | CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); |
3151 | return {LBVal, UBVal}; |
3152 | } |
3153 | |
3154 | static void emitDistributeParallelForDistributeInnerBoundParams( |
3155 | CodeGenFunction &CGF, const OMPExecutableDirective &S, |
3156 | llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { |
3157 | const auto &Dir = cast<OMPLoopDirective>(S); |
3158 | LValue LB = |
3159 | CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); |
3160 | llvm::Value *LBCast = |
3161 | CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)), |
3162 | CGF.SizeTy, /*isSigned=*/false); |
3163 | CapturedVars.push_back(LBCast); |
3164 | LValue UB = |
3165 | CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); |
3166 | |
3167 | llvm::Value *UBCast = |
3168 | CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)), |
3169 | CGF.SizeTy, /*isSigned=*/false); |
3170 | CapturedVars.push_back(UBCast); |
3171 | } |
3172 | |
3173 | static void |
3174 | emitInnerParallelForWhenCombined(CodeGenFunction &CGF, |
3175 | const OMPLoopDirective &S, |
3176 | CodeGenFunction::JumpDest LoopExit) { |
3177 | auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, |
3178 | PrePostActionTy &Action) { |
3179 | Action.Enter(CGF); |
3180 | bool HasCancel = false; |
3181 | if (!isOpenMPSimdDirective(S.getDirectiveKind())) { |
3182 | if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) |
3183 | HasCancel = D->hasCancel(); |
3184 | else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) |
3185 | HasCancel = D->hasCancel(); |
3186 | else if (const auto *D = |
3187 | dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) |
3188 | HasCancel = D->hasCancel(); |
3189 | } |
3190 | CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), |
3191 | HasCancel); |
3192 | CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), |
3193 | emitDistributeParallelForInnerBounds, |
3194 | emitDistributeParallelForDispatchBounds); |
3195 | }; |
3196 | |
3197 | emitCommonOMPParallelDirective( |
3198 | CGF, S, |
3199 | isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for, |
3200 | CGInlinedWorksharingLoop, |
3201 | emitDistributeParallelForDistributeInnerBoundParams); |
3202 | } |
3203 | |
3204 | void CodeGenFunction::EmitOMPDistributeParallelForDirective( |
3205 | const OMPDistributeParallelForDirective &S) { |
3206 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
3207 | CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, |
3208 | S.getDistInc()); |
3209 | }; |
3210 | OMPLexicalScope Scope(*this, S, OMPD_parallel); |
3211 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); |
3212 | } |
3213 | |
3214 | void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( |
3215 | const OMPDistributeParallelForSimdDirective &S) { |
3216 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
3217 | CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, |
3218 | S.getDistInc()); |
3219 | }; |
3220 | OMPLexicalScope Scope(*this, S, OMPD_parallel); |
3221 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); |
3222 | } |
3223 | |
3224 | void CodeGenFunction::EmitOMPDistributeSimdDirective( |
3225 | const OMPDistributeSimdDirective &S) { |
3226 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
3227 | CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); |
3228 | }; |
3229 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
3230 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); |
3231 | } |
3232 | |
3233 | void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( |
3234 | CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { |
3235 | // Emit SPMD target parallel for region as a standalone region. |
3236 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3237 | emitOMPSimdRegion(CGF, S, Action); |
3238 | }; |
3239 | llvm::Function *Fn; |
3240 | llvm::Constant *Addr; |
3241 | // Emit target region as a standalone region. |
3242 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
3243 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
3244 | assert(Fn && Addr && "Target device function emission failed." ); |
3245 | } |
3246 | |
3247 | void CodeGenFunction::EmitOMPTargetSimdDirective( |
3248 | const OMPTargetSimdDirective &S) { |
3249 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3250 | emitOMPSimdRegion(CGF, S, Action); |
3251 | }; |
3252 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
3253 | } |
3254 | |
3255 | namespace { |
3256 | struct ScheduleKindModifiersTy { |
3257 | OpenMPScheduleClauseKind Kind; |
3258 | OpenMPScheduleClauseModifier M1; |
3259 | OpenMPScheduleClauseModifier M2; |
3260 | ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, |
3261 | OpenMPScheduleClauseModifier M1, |
3262 | OpenMPScheduleClauseModifier M2) |
3263 | : Kind(Kind), M1(M1), M2(M2) {} |
3264 | }; |
3265 | } // namespace |
3266 | |
3267 | bool CodeGenFunction::EmitOMPWorksharingLoop( |
3268 | const OMPLoopDirective &S, Expr *EUB, |
3269 | const CodeGenLoopBoundsTy &CodeGenLoopBounds, |
3270 | const CodeGenDispatchBoundsTy &CGDispatchBounds) { |
3271 | // Emit the loop iteration variable. |
3272 | const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); |
3273 | const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); |
3274 | EmitVarDecl(*IVDecl); |
3275 | |
3276 | // Emit the iterations count variable. |
3277 | // If it is not a variable, Sema decided to calculate iterations count on each |
3278 | // iteration (e.g., it is foldable into a constant). |
3279 | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { |
3280 | EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); |
3281 | // Emit calculation of the iterations count. |
3282 | EmitIgnoredExpr(S.getCalcLastIteration()); |
3283 | } |
3284 | |
3285 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
3286 | |
3287 | bool HasLastprivateClause; |
3288 | // Check pre-condition. |
3289 | { |
3290 | OMPLoopScope PreInitScope(*this, S); |
3291 | // Skip the entire loop if we don't meet the precondition. |
3292 | // If the condition constant folds and can be elided, avoid emitting the |
3293 | // whole loop. |
3294 | bool CondConstant; |
3295 | llvm::BasicBlock *ContBlock = nullptr; |
3296 | if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { |
3297 | if (!CondConstant) |
3298 | return false; |
3299 | } else { |
3300 | llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then" ); |
3301 | ContBlock = createBasicBlock("omp.precond.end" ); |
3302 | emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, |
3303 | getProfileCount(&S)); |
3304 | EmitBlock(ThenBlock); |
3305 | incrementProfileCounter(&S); |
3306 | } |
3307 | |
3308 | RunCleanupsScope DoacrossCleanupScope(*this); |
3309 | bool Ordered = false; |
3310 | if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { |
3311 | if (OrderedClause->getNumForLoops()) |
3312 | RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations()); |
3313 | else |
3314 | Ordered = true; |
3315 | } |
3316 | |
3317 | llvm::DenseSet<const Expr *> EmittedFinals; |
3318 | emitAlignedClause(*this, S); |
3319 | bool HasLinears = EmitOMPLinearClauseInit(S); |
3320 | // Emit helper vars inits. |
3321 | |
3322 | std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); |
3323 | LValue LB = Bounds.first; |
3324 | LValue UB = Bounds.second; |
3325 | LValue ST = |
3326 | EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); |
3327 | LValue IL = |
3328 | EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); |
3329 | |
3330 | // Emit 'then' code. |
3331 | { |
3332 | OMPPrivateScope LoopScope(*this); |
3333 | if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { |
3334 | // Emit implicit barrier to synchronize threads and avoid data races on |
3335 | // initialization of firstprivate variables and post-update of |
3336 | // lastprivate variables. |
3337 | CGM.getOpenMPRuntime().emitBarrierCall( |
3338 | *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, |
3339 | /*ForceSimpleCall=*/true); |
3340 | } |
3341 | EmitOMPPrivateClause(S, LoopScope); |
3342 | CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( |
3343 | *this, S, EmitLValue(S.getIterationVariable())); |
3344 | HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); |
3345 | EmitOMPReductionClauseInit(S, LoopScope); |
3346 | EmitOMPPrivateLoopCounters(S, LoopScope); |
3347 | EmitOMPLinearClause(S, LoopScope); |
3348 | (void)LoopScope.Privatize(); |
3349 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
3350 | CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); |
3351 | |
3352 | // Detect the loop schedule kind and chunk. |
3353 | const Expr *ChunkExpr = nullptr; |
3354 | OpenMPScheduleTy ScheduleKind; |
3355 | if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { |
3356 | ScheduleKind.Schedule = C->getScheduleKind(); |
3357 | ScheduleKind.M1 = C->getFirstScheduleModifier(); |
3358 | ScheduleKind.M2 = C->getSecondScheduleModifier(); |
3359 | ChunkExpr = C->getChunkSize(); |
3360 | } else { |
3361 | // Default behaviour for schedule clause. |
3362 | CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( |
3363 | *this, S, ScheduleKind.Schedule, ChunkExpr); |
3364 | } |
3365 | bool HasChunkSizeOne = false; |
3366 | llvm::Value *Chunk = nullptr; |
3367 | if (ChunkExpr) { |
3368 | Chunk = EmitScalarExpr(ChunkExpr); |
3369 | Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(), |
3370 | S.getIterationVariable()->getType(), |
3371 | S.getBeginLoc()); |
3372 | Expr::EvalResult Result; |
3373 | if (ChunkExpr->EvaluateAsInt(Result, getContext())) { |
3374 | llvm::APSInt EvaluatedChunk = Result.Val.getInt(); |
3375 | HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); |
3376 | } |
3377 | } |
3378 | const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); |
3379 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
3380 | // OpenMP 4.5, 2.7.1 Loop Construct, Description. |
3381 | // If the static schedule kind is specified or if the ordered clause is |
3382 | // specified, and if no monotonic modifier is specified, the effect will |
3383 | // be as if the monotonic modifier was specified. |
3384 | bool StaticChunkedOne = |
3385 | RT.isStaticChunked(ScheduleKind.Schedule, |
3386 | /* Chunked */ Chunk != nullptr) && |
3387 | HasChunkSizeOne && |
3388 | isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); |
3389 | bool IsMonotonic = |
3390 | Ordered || |
3391 | (ScheduleKind.Schedule == OMPC_SCHEDULE_static && |
3392 | !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || |
3393 | ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || |
3394 | ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || |
3395 | ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; |
3396 | if ((RT.isStaticNonchunked(ScheduleKind.Schedule, |
3397 | /* Chunked */ Chunk != nullptr) || |
3398 | StaticChunkedOne) && |
3399 | !Ordered) { |
3400 | JumpDest LoopExit = |
3401 | getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit" )); |
3402 | emitCommonSimdLoop( |
3403 | *this, S, |
3404 | [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
3405 | if (isOpenMPSimdDirective(S.getDirectiveKind())) { |
3406 | CGF.EmitOMPSimdInit(S); |
3407 | } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { |
3408 | if (C->getKind() == OMPC_ORDER_concurrent) |
3409 | CGF.LoopStack.setParallel(/*Enable=*/true); |
3410 | } |
3411 | }, |
3412 | [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, |
3413 | &S, ScheduleKind, LoopExit, |
3414 | &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
3415 | // OpenMP [2.7.1, Loop Construct, Description, table 2-1] |
3416 | // When no chunk_size is specified, the iteration space is divided |
3417 | // into chunks that are approximately equal in size, and at most |
3418 | // one chunk is distributed to each thread. Note that the size of |
3419 | // the chunks is unspecified in this case. |
3420 | CGOpenMPRuntime::StaticRTInput StaticInit( |
3421 | IVSize, IVSigned, Ordered, IL.getAddress(CGF), |
3422 | LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF), |
3423 | StaticChunkedOne ? Chunk : nullptr); |
3424 | CGF.CGM.getOpenMPRuntime().emitForStaticInit( |
3425 | CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, |
3426 | StaticInit); |
3427 | // UB = min(UB, GlobalUB); |
3428 | if (!StaticChunkedOne) |
3429 | CGF.EmitIgnoredExpr(S.getEnsureUpperBound()); |
3430 | // IV = LB; |
3431 | CGF.EmitIgnoredExpr(S.getInit()); |
3432 | // For unchunked static schedule generate: |
3433 | // |
3434 | // while (idx <= UB) { |
3435 | // BODY; |
3436 | // ++idx; |
3437 | // } |
3438 | // |
3439 | // For static schedule with chunk one: |
3440 | // |
3441 | // while (IV <= PrevUB) { |
3442 | // BODY; |
3443 | // IV += ST; |
3444 | // } |
3445 | CGF.EmitOMPInnerLoop( |
3446 | S, LoopScope.requiresCleanups(), |
3447 | StaticChunkedOne ? S.getCombinedParForInDistCond() |
3448 | : S.getCond(), |
3449 | StaticChunkedOne ? S.getDistInc() : S.getInc(), |
3450 | [&S, LoopExit](CodeGenFunction &CGF) { |
3451 | emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit); |
3452 | }, |
3453 | [](CodeGenFunction &) {}); |
3454 | }); |
3455 | EmitBlock(LoopExit.getBlock()); |
3456 | // Tell the runtime we are done. |
3457 | auto &&CodeGen = [&S](CodeGenFunction &CGF) { |
3458 | CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), |
3459 | S.getDirectiveKind()); |
3460 | }; |
3461 | OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); |
3462 | } else { |
3463 | // Emit the outer loop, which requests its work chunk [LB..UB] from |
3464 | // runtime and runs the inner loop to process it. |
3465 | const OMPLoopArguments LoopArguments( |
3466 | LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), |
3467 | IL.getAddress(*this), Chunk, EUB); |
3468 | EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, |
3469 | LoopArguments, CGDispatchBounds); |
3470 | } |
3471 | if (isOpenMPSimdDirective(S.getDirectiveKind())) { |
3472 | EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { |
3473 | return CGF.Builder.CreateIsNotNull( |
3474 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
3475 | }); |
3476 | } |
3477 | EmitOMPReductionClauseFinal( |
3478 | S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) |
3479 | ? /*Parallel and Simd*/ OMPD_parallel_for_simd |
3480 | : /*Parallel only*/ OMPD_parallel); |
3481 | // Emit post-update of the reduction variables if IsLastIter != 0. |
3482 | emitPostUpdateForReductionClause( |
3483 | *this, S, [IL, &S](CodeGenFunction &CGF) { |
3484 | return CGF.Builder.CreateIsNotNull( |
3485 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
3486 | }); |
3487 | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
3488 | if (HasLastprivateClause) |
3489 | EmitOMPLastprivateClauseFinal( |
3490 | S, isOpenMPSimdDirective(S.getDirectiveKind()), |
3491 | Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); |
3492 | LoopScope.restoreMap(); |
3493 | EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { |
3494 | return CGF.Builder.CreateIsNotNull( |
3495 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
3496 | }); |
3497 | } |
3498 | DoacrossCleanupScope.ForceCleanup(); |
3499 | // We're now done with the loop, so jump to the continuation block. |
3500 | if (ContBlock) { |
3501 | EmitBranch(ContBlock); |
3502 | EmitBlock(ContBlock, /*IsFinished=*/true); |
3503 | } |
3504 | } |
3505 | return HasLastprivateClause; |
3506 | } |
3507 | |
3508 | /// The following two functions generate expressions for the loop lower |
3509 | /// and upper bounds in case of static and dynamic (dispatch) schedule |
3510 | /// of the associated 'for' or 'distribute' loop. |
3511 | static std::pair<LValue, LValue> |
3512 | emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
3513 | const auto &LS = cast<OMPLoopDirective>(S); |
3514 | LValue LB = |
3515 | EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); |
3516 | LValue UB = |
3517 | EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); |
3518 | return {LB, UB}; |
3519 | } |
3520 | |
3521 | /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not |
3522 | /// consider the lower and upper bound expressions generated by the |
3523 | /// worksharing loop support, but we use 0 and the iteration space size as |
3524 | /// constants |
3525 | static std::pair<llvm::Value *, llvm::Value *> |
3526 | emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, |
3527 | Address LB, Address UB) { |
3528 | const auto &LS = cast<OMPLoopDirective>(S); |
3529 | const Expr *IVExpr = LS.getIterationVariable(); |
3530 | const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); |
3531 | llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); |
3532 | llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); |
3533 | return {LBVal, UBVal}; |
3534 | } |
3535 | |
3536 | /// Emits internal temp array declarations for the directive with inscan |
3537 | /// reductions. |
3538 | /// The code is the following: |
3539 | /// \code |
3540 | /// size num_iters = <num_iters>; |
3541 | /// <type> buffer[num_iters]; |
3542 | /// \endcode |
3543 | static void emitScanBasedDirectiveDecls( |
3544 | CodeGenFunction &CGF, const OMPLoopDirective &S, |
3545 | llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { |
3546 | llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( |
3547 | NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); |
3548 | SmallVector<const Expr *, 4> Shareds; |
3549 | SmallVector<const Expr *, 4> Privates; |
3550 | SmallVector<const Expr *, 4> ReductionOps; |
3551 | SmallVector<const Expr *, 4> CopyArrayTemps; |
3552 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
3553 | assert(C->getModifier() == OMPC_REDUCTION_inscan && |
3554 | "Only inscan reductions are expected." ); |
3555 | Shareds.append(C->varlist_begin(), C->varlist_end()); |
3556 | Privates.append(C->privates().begin(), C->privates().end()); |
3557 | ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); |
3558 | CopyArrayTemps.append(C->copy_array_temps().begin(), |
3559 | C->copy_array_temps().end()); |
3560 | } |
3561 | { |
3562 | // Emit buffers for each reduction variables. |
3563 | // ReductionCodeGen is required to emit correctly the code for array |
3564 | // reductions. |
3565 | ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); |
3566 | unsigned Count = 0; |
3567 | auto *ITA = CopyArrayTemps.begin(); |
3568 | for (const Expr *IRef : Privates) { |
3569 | const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); |
3570 | // Emit variably modified arrays, used for arrays/array sections |
3571 | // reductions. |
3572 | if (PrivateVD->getType()->isVariablyModifiedType()) { |
3573 | RedCG.emitSharedOrigLValue(CGF, Count); |
3574 | RedCG.emitAggregateType(CGF, Count); |
3575 | } |
3576 | CodeGenFunction::OpaqueValueMapping DimMapping( |
3577 | CGF, |
3578 | cast<OpaqueValueExpr>( |
3579 | cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe()) |
3580 | ->getSizeExpr()), |
3581 | RValue::get(OMPScanNumIterations)); |
3582 | // Emit temp buffer. |
3583 | CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl())); |
3584 | ++ITA; |
3585 | ++Count; |
3586 | } |
3587 | } |
3588 | } |
3589 | |
3590 | /// Copies final inscan reductions values to the original variables. |
3591 | /// The code is the following: |
3592 | /// \code |
3593 | /// <orig_var> = buffer[num_iters-1]; |
3594 | /// \endcode |
3595 | static void emitScanBasedDirectiveFinals( |
3596 | CodeGenFunction &CGF, const OMPLoopDirective &S, |
3597 | llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { |
3598 | llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( |
3599 | NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); |
3600 | SmallVector<const Expr *, 4> Shareds; |
3601 | SmallVector<const Expr *, 4> LHSs; |
3602 | SmallVector<const Expr *, 4> RHSs; |
3603 | SmallVector<const Expr *, 4> Privates; |
3604 | SmallVector<const Expr *, 4> CopyOps; |
3605 | SmallVector<const Expr *, 4> CopyArrayElems; |
3606 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
3607 | assert(C->getModifier() == OMPC_REDUCTION_inscan && |
3608 | "Only inscan reductions are expected." ); |
3609 | Shareds.append(C->varlist_begin(), C->varlist_end()); |
3610 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
3611 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
3612 | Privates.append(C->privates().begin(), C->privates().end()); |
3613 | CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); |
3614 | CopyArrayElems.append(C->copy_array_elems().begin(), |
3615 | C->copy_array_elems().end()); |
3616 | } |
3617 | // Create temp var and copy LHS value to this temp value. |
3618 | // LHS = TMP[LastIter]; |
3619 | llvm::Value *OMPLast = CGF.Builder.CreateNSWSub( |
3620 | OMPScanNumIterations, |
3621 | llvm::ConstantInt::get(CGF.SizeTy, 1, /*isSigned=*/false)); |
3622 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
3623 | const Expr *PrivateExpr = Privates[I]; |
3624 | const Expr *OrigExpr = Shareds[I]; |
3625 | const Expr *CopyArrayElem = CopyArrayElems[I]; |
3626 | CodeGenFunction::OpaqueValueMapping IdxMapping( |
3627 | CGF, |
3628 | cast<OpaqueValueExpr>( |
3629 | cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), |
3630 | RValue::get(OMPLast)); |
3631 | LValue DestLVal = CGF.EmitLValue(OrigExpr); |
3632 | LValue SrcLVal = CGF.EmitLValue(CopyArrayElem); |
3633 | CGF.EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(CGF), |
3634 | SrcLVal.getAddress(CGF), |
3635 | cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), |
3636 | cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), |
3637 | CopyOps[I]); |
3638 | } |
3639 | } |
3640 | |
3641 | /// Emits the code for the directive with inscan reductions. |
3642 | /// The code is the following: |
3643 | /// \code |
3644 | /// #pragma omp ... |
3645 | /// for (i: 0..<num_iters>) { |
3646 | /// <input phase>; |
3647 | /// buffer[i] = red; |
3648 | /// } |
3649 | /// #pragma omp master // in parallel region |
3650 | /// for (int k = 0; k != ceil(log2(num_iters)); ++k) |
3651 | /// for (size cnt = last_iter; cnt >= pow(2, k); --k) |
3652 | /// buffer[i] op= buffer[i-pow(2,k)]; |
3653 | /// #pragma omp barrier // in parallel region |
3654 | /// #pragma omp ... |
3655 | /// for (0..<num_iters>) { |
3656 | /// red = InclusiveScan ? buffer[i] : buffer[i-1]; |
3657 | /// <scan phase>; |
3658 | /// } |
3659 | /// \endcode |
3660 | static void emitScanBasedDirective( |
3661 | CodeGenFunction &CGF, const OMPLoopDirective &S, |
3662 | llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen, |
3663 | llvm::function_ref<void(CodeGenFunction &)> FirstGen, |
3664 | llvm::function_ref<void(CodeGenFunction &)> SecondGen) { |
3665 | llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( |
3666 | NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); |
3667 | SmallVector<const Expr *, 4> Privates; |
3668 | SmallVector<const Expr *, 4> ReductionOps; |
3669 | SmallVector<const Expr *, 4> LHSs; |
3670 | SmallVector<const Expr *, 4> RHSs; |
3671 | SmallVector<const Expr *, 4> CopyArrayElems; |
3672 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
3673 | assert(C->getModifier() == OMPC_REDUCTION_inscan && |
3674 | "Only inscan reductions are expected." ); |
3675 | Privates.append(C->privates().begin(), C->privates().end()); |
3676 | ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); |
3677 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
3678 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
3679 | CopyArrayElems.append(C->copy_array_elems().begin(), |
3680 | C->copy_array_elems().end()); |
3681 | } |
3682 | CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); |
3683 | { |
3684 | // Emit loop with input phase: |
3685 | // #pragma omp ... |
3686 | // for (i: 0..<num_iters>) { |
3687 | // <input phase>; |
3688 | // buffer[i] = red; |
3689 | // } |
3690 | CGF.OMPFirstScanLoop = true; |
3691 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
3692 | FirstGen(CGF); |
3693 | } |
3694 | // #pragma omp barrier // in parallel region |
3695 | auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems, |
3696 | &ReductionOps, |
3697 | &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3698 | Action.Enter(CGF); |
3699 | // Emit prefix reduction: |
3700 | // #pragma omp master // in parallel region |
3701 | // for (int k = 0; k <= ceil(log2(n)); ++k) |
3702 | llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); |
3703 | llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body" ); |
3704 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit" ); |
3705 | llvm::Function *F = |
3706 | CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy); |
3707 | llvm::Value *Arg = |
3708 | CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy); |
3709 | llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg); |
3710 | F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy); |
3711 | LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal); |
3712 | LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy); |
3713 | llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( |
3714 | OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1)); |
3715 | auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc()); |
3716 | CGF.EmitBlock(LoopBB); |
3717 | auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2); |
3718 | // size pow2k = 1; |
3719 | auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2); |
3720 | Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB); |
3721 | Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB); |
3722 | // for (size i = n - 1; i >= 2 ^ k; --i) |
3723 | // tmp[i] op= tmp[i-pow2k]; |
3724 | llvm::BasicBlock *InnerLoopBB = |
3725 | CGF.createBasicBlock("omp.inner.log.scan.body" ); |
3726 | llvm::BasicBlock *InnerExitBB = |
3727 | CGF.createBasicBlock("omp.inner.log.scan.exit" ); |
3728 | llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K); |
3729 | CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); |
3730 | CGF.EmitBlock(InnerLoopBB); |
3731 | auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2); |
3732 | IVal->addIncoming(NMin1, LoopBB); |
3733 | { |
3734 | CodeGenFunction::OMPPrivateScope PrivScope(CGF); |
3735 | auto *ILHS = LHSs.begin(); |
3736 | auto *IRHS = RHSs.begin(); |
3737 | for (const Expr *CopyArrayElem : CopyArrayElems) { |
3738 | const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); |
3739 | const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); |
3740 | Address LHSAddr = Address::invalid(); |
3741 | { |
3742 | CodeGenFunction::OpaqueValueMapping IdxMapping( |
3743 | CGF, |
3744 | cast<OpaqueValueExpr>( |
3745 | cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), |
3746 | RValue::get(IVal)); |
3747 | LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); |
3748 | } |
3749 | PrivScope.addPrivate(LHSVD, LHSAddr); |
3750 | Address RHSAddr = Address::invalid(); |
3751 | { |
3752 | llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K); |
3753 | CodeGenFunction::OpaqueValueMapping IdxMapping( |
3754 | CGF, |
3755 | cast<OpaqueValueExpr>( |
3756 | cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), |
3757 | RValue::get(OffsetIVal)); |
3758 | RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); |
3759 | } |
3760 | PrivScope.addPrivate(RHSVD, RHSAddr); |
3761 | ++ILHS; |
3762 | ++IRHS; |
3763 | } |
3764 | PrivScope.Privatize(); |
3765 | CGF.CGM.getOpenMPRuntime().emitReduction( |
3766 | CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, |
3767 | {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown}); |
3768 | } |
3769 | llvm::Value *NextIVal = |
3770 | CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1)); |
3771 | IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock()); |
3772 | CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K); |
3773 | CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); |
3774 | CGF.EmitBlock(InnerExitBB); |
3775 | llvm::Value *Next = |
3776 | CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1)); |
3777 | Counter->addIncoming(Next, CGF.Builder.GetInsertBlock()); |
3778 | // pow2k <<= 1; |
3779 | llvm::Value *NextPow2K = |
3780 | CGF.Builder.CreateShl(Pow2K, 1, "" , /*HasNUW=*/true); |
3781 | Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock()); |
3782 | llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal); |
3783 | CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB); |
3784 | auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc()); |
3785 | CGF.EmitBlock(ExitBB); |
3786 | }; |
3787 | if (isOpenMPParallelDirective(S.getDirectiveKind())) { |
3788 | CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); |
3789 | CGF.CGM.getOpenMPRuntime().emitBarrierCall( |
3790 | CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, |
3791 | /*ForceSimpleCall=*/true); |
3792 | } else { |
3793 | RegionCodeGenTy RCG(CodeGen); |
3794 | RCG(CGF); |
3795 | } |
3796 | |
3797 | CGF.OMPFirstScanLoop = false; |
3798 | SecondGen(CGF); |
3799 | } |
3800 | |
3801 | static bool emitWorksharingDirective(CodeGenFunction &CGF, |
3802 | const OMPLoopDirective &S, |
3803 | bool HasCancel) { |
3804 | bool HasLastprivates; |
3805 | if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), |
3806 | [](const OMPReductionClause *C) { |
3807 | return C->getModifier() == OMPC_REDUCTION_inscan; |
3808 | })) { |
3809 | const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { |
3810 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
3811 | OMPLoopScope LoopScope(CGF, S); |
3812 | return CGF.EmitScalarExpr(S.getNumIterations()); |
3813 | }; |
3814 | const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) { |
3815 | CodeGenFunction::OMPCancelStackRAII CancelRegion( |
3816 | CGF, S.getDirectiveKind(), HasCancel); |
3817 | (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), |
3818 | emitForLoopBounds, |
3819 | emitDispatchForLoopBounds); |
3820 | // Emit an implicit barrier at the end. |
3821 | CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), |
3822 | OMPD_for); |
3823 | }; |
3824 | const auto &&SecondGen = [&S, HasCancel, |
3825 | &HasLastprivates](CodeGenFunction &CGF) { |
3826 | CodeGenFunction::OMPCancelStackRAII CancelRegion( |
3827 | CGF, S.getDirectiveKind(), HasCancel); |
3828 | HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), |
3829 | emitForLoopBounds, |
3830 | emitDispatchForLoopBounds); |
3831 | }; |
3832 | if (!isOpenMPParallelDirective(S.getDirectiveKind())) |
3833 | emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen); |
3834 | emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); |
3835 | if (!isOpenMPParallelDirective(S.getDirectiveKind())) |
3836 | emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen); |
3837 | } else { |
3838 | CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), |
3839 | HasCancel); |
3840 | HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), |
3841 | emitForLoopBounds, |
3842 | emitDispatchForLoopBounds); |
3843 | } |
3844 | return HasLastprivates; |
3845 | } |
3846 | |
3847 | static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) { |
3848 | if (S.hasCancel()) |
3849 | return false; |
3850 | for (OMPClause *C : S.clauses()) { |
3851 | if (isa<OMPNowaitClause>(C)) |
3852 | continue; |
3853 | |
3854 | if (auto *SC = dyn_cast<OMPScheduleClause>(C)) { |
3855 | if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) |
3856 | return false; |
3857 | if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) |
3858 | return false; |
3859 | switch (SC->getScheduleKind()) { |
3860 | case OMPC_SCHEDULE_auto: |
3861 | case OMPC_SCHEDULE_dynamic: |
3862 | case OMPC_SCHEDULE_runtime: |
3863 | case OMPC_SCHEDULE_guided: |
3864 | case OMPC_SCHEDULE_static: |
3865 | continue; |
3866 | case OMPC_SCHEDULE_unknown: |
3867 | return false; |
3868 | } |
3869 | } |
3870 | |
3871 | return false; |
3872 | } |
3873 | |
3874 | return true; |
3875 | } |
3876 | |
3877 | static llvm::omp::ScheduleKind |
3878 | convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) { |
3879 | switch (ScheduleClauseKind) { |
3880 | case OMPC_SCHEDULE_unknown: |
3881 | return llvm::omp::OMP_SCHEDULE_Default; |
3882 | case OMPC_SCHEDULE_auto: |
3883 | return llvm::omp::OMP_SCHEDULE_Auto; |
3884 | case OMPC_SCHEDULE_dynamic: |
3885 | return llvm::omp::OMP_SCHEDULE_Dynamic; |
3886 | case OMPC_SCHEDULE_guided: |
3887 | return llvm::omp::OMP_SCHEDULE_Guided; |
3888 | case OMPC_SCHEDULE_runtime: |
3889 | return llvm::omp::OMP_SCHEDULE_Runtime; |
3890 | case OMPC_SCHEDULE_static: |
3891 | return llvm::omp::OMP_SCHEDULE_Static; |
3892 | } |
3893 | llvm_unreachable("Unhandled schedule kind" ); |
3894 | } |
3895 | |
3896 | void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { |
3897 | bool HasLastprivates = false; |
3898 | bool UseOMPIRBuilder = |
3899 | CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S); |
3900 | auto &&CodeGen = [this, &S, &HasLastprivates, |
3901 | UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) { |
3902 | // Use the OpenMPIRBuilder if enabled. |
3903 | if (UseOMPIRBuilder) { |
3904 | bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>(); |
3905 | |
3906 | llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default; |
3907 | llvm::Value *ChunkSize = nullptr; |
3908 | if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) { |
3909 | SchedKind = |
3910 | convertClauseKindToSchedKind(SchedClause->getScheduleKind()); |
3911 | if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize()) |
3912 | ChunkSize = EmitScalarExpr(ChunkSizeExpr); |
3913 | } |
3914 | |
3915 | // Emit the associated statement and get its loop representation. |
3916 | const Stmt *Inner = S.getRawStmt(); |
3917 | llvm::CanonicalLoopInfo *CLI = |
3918 | EmitOMPCollapsedCanonicalLoopNest(Inner, 1); |
3919 | |
3920 | llvm::OpenMPIRBuilder &OMPBuilder = |
3921 | CGM.getOpenMPRuntime().getOMPBuilder(); |
3922 | llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( |
3923 | AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); |
3924 | OMPBuilder.applyWorkshareLoop( |
3925 | Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier, |
3926 | SchedKind, ChunkSize, /*HasSimdModifier=*/false, |
3927 | /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false, |
3928 | /*HasOrderedClause=*/false); |
3929 | return; |
3930 | } |
3931 | |
3932 | HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel()); |
3933 | }; |
3934 | { |
3935 | auto LPCRegion = |
3936 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
3937 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
3938 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, |
3939 | S.hasCancel()); |
3940 | } |
3941 | |
3942 | if (!UseOMPIRBuilder) { |
3943 | // Emit an implicit barrier at the end. |
3944 | if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) |
3945 | CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); |
3946 | } |
3947 | // Check for outer lastprivate conditional update. |
3948 | checkForLastprivateConditionalUpdate(*this, S); |
3949 | } |
3950 | |
3951 | void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { |
3952 | bool HasLastprivates = false; |
3953 | auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, |
3954 | PrePostActionTy &) { |
3955 | HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false); |
3956 | }; |
3957 | { |
3958 | auto LPCRegion = |
3959 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
3960 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
3961 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); |
3962 | } |
3963 | |
3964 | // Emit an implicit barrier at the end. |
3965 | if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) |
3966 | CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); |
3967 | // Check for outer lastprivate conditional update. |
3968 | checkForLastprivateConditionalUpdate(*this, S); |
3969 | } |
3970 | |
3971 | static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, |
3972 | const Twine &Name, |
3973 | llvm::Value *Init = nullptr) { |
3974 | LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); |
3975 | if (Init) |
3976 | CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true); |
3977 | return LVal; |
3978 | } |
3979 | |
3980 | void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { |
3981 | const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); |
3982 | const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); |
3983 | bool HasLastprivates = false; |
3984 | auto &&CodeGen = [&S, CapturedStmt, CS, |
3985 | &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { |
3986 | const ASTContext &C = CGF.getContext(); |
3987 | QualType KmpInt32Ty = |
3988 | C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); |
3989 | // Emit helper vars inits. |
3990 | LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb." , |
3991 | CGF.Builder.getInt32(0)); |
3992 | llvm::ConstantInt *GlobalUBVal = CS != nullptr |
3993 | ? CGF.Builder.getInt32(CS->size() - 1) |
3994 | : CGF.Builder.getInt32(0); |
3995 | LValue UB = |
3996 | createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub." , GlobalUBVal); |
3997 | LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st." , |
3998 | CGF.Builder.getInt32(1)); |
3999 | LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il." , |
4000 | CGF.Builder.getInt32(0)); |
4001 | // Loop counter. |
4002 | LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv." ); |
4003 | OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); |
4004 | CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); |
4005 | OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); |
4006 | CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); |
4007 | // Generate condition for loop. |
4008 | BinaryOperator *Cond = BinaryOperator::Create( |
4009 | C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary, |
4010 | S.getBeginLoc(), FPOptionsOverride()); |
4011 | // Increment for loop counter. |
4012 | UnaryOperator *Inc = UnaryOperator::Create( |
4013 | C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary, |
4014 | S.getBeginLoc(), true, FPOptionsOverride()); |
4015 | auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { |
4016 | // Iterate through all sections and emit a switch construct: |
4017 | // switch (IV) { |
4018 | // case 0: |
4019 | // <SectionStmt[0]>; |
4020 | // break; |
4021 | // ... |
4022 | // case <NumSection> - 1: |
4023 | // <SectionStmt[<NumSection> - 1]>; |
4024 | // break; |
4025 | // } |
4026 | // .omp.sections.exit: |
4027 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit" ); |
4028 | llvm::SwitchInst *SwitchStmt = |
4029 | CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), |
4030 | ExitBB, CS == nullptr ? 1 : CS->size()); |
4031 | if (CS) { |
4032 | unsigned CaseNumber = 0; |
4033 | for (const Stmt *SubStmt : CS->children()) { |
4034 | auto CaseBB = CGF.createBasicBlock(".omp.sections.case" ); |
4035 | CGF.EmitBlock(CaseBB); |
4036 | SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB); |
4037 | CGF.EmitStmt(SubStmt); |
4038 | CGF.EmitBranch(ExitBB); |
4039 | ++CaseNumber; |
4040 | } |
4041 | } else { |
4042 | llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case" ); |
4043 | CGF.EmitBlock(CaseBB); |
4044 | SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB); |
4045 | CGF.EmitStmt(CapturedStmt); |
4046 | CGF.EmitBranch(ExitBB); |
4047 | } |
4048 | CGF.EmitBlock(ExitBB, /*IsFinished=*/true); |
4049 | }; |
4050 | |
4051 | CodeGenFunction::OMPPrivateScope LoopScope(CGF); |
4052 | if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) { |
4053 | // Emit implicit barrier to synchronize threads and avoid data races on |
4054 | // initialization of firstprivate variables and post-update of lastprivate |
4055 | // variables. |
4056 | CGF.CGM.getOpenMPRuntime().emitBarrierCall( |
4057 | CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, |
4058 | /*ForceSimpleCall=*/true); |
4059 | } |
4060 | CGF.EmitOMPPrivateClause(S, LoopScope); |
4061 | CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV); |
4062 | HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); |
4063 | CGF.EmitOMPReductionClauseInit(S, LoopScope); |
4064 | (void)LoopScope.Privatize(); |
4065 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
4066 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); |
4067 | |
4068 | // Emit static non-chunked loop. |
4069 | OpenMPScheduleTy ScheduleKind; |
4070 | ScheduleKind.Schedule = OMPC_SCHEDULE_static; |
4071 | CGOpenMPRuntime::StaticRTInput StaticInit( |
4072 | /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF), |
4073 | LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF)); |
4074 | CGF.CGM.getOpenMPRuntime().emitForStaticInit( |
4075 | CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); |
4076 | // UB = min(UB, GlobalUB); |
4077 | llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc()); |
4078 | llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( |
4079 | CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); |
4080 | CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); |
4081 | // IV = LB; |
4082 | CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV); |
4083 | // while (idx <= UB) { BODY; ++idx; } |
4084 | CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen, |
4085 | [](CodeGenFunction &) {}); |
4086 | // Tell the runtime we are done. |
4087 | auto &&CodeGen = [&S](CodeGenFunction &CGF) { |
4088 | CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), |
4089 | S.getDirectiveKind()); |
4090 | }; |
4091 | CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); |
4092 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); |
4093 | // Emit post-update of the reduction variables if IsLastIter != 0. |
4094 | emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) { |
4095 | return CGF.Builder.CreateIsNotNull( |
4096 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
4097 | }); |
4098 | |
4099 | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
4100 | if (HasLastprivates) |
4101 | CGF.EmitOMPLastprivateClauseFinal( |
4102 | S, /*NoFinals=*/false, |
4103 | CGF.Builder.CreateIsNotNull( |
4104 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc()))); |
4105 | }; |
4106 | |
4107 | bool HasCancel = false; |
4108 | if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S)) |
4109 | HasCancel = OSD->hasCancel(); |
4110 | else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S)) |
4111 | HasCancel = OPSD->hasCancel(); |
4112 | OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); |
4113 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, |
4114 | HasCancel); |
4115 | // Emit barrier for lastprivates only if 'sections' directive has 'nowait' |
4116 | // clause. Otherwise the barrier will be generated by the codegen for the |
4117 | // directive. |
4118 | if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { |
4119 | // Emit implicit barrier to synchronize threads and avoid data races on |
4120 | // initialization of firstprivate variables. |
4121 | CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), |
4122 | OMPD_unknown); |
4123 | } |
4124 | } |
4125 | |
4126 | void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { |
4127 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4128 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4129 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4130 | using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; |
4131 | |
4132 | auto FiniCB = [this](InsertPointTy IP) { |
4133 | OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); |
4134 | }; |
4135 | |
4136 | const CapturedStmt *ICS = S.getInnermostCapturedStmt(); |
4137 | const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); |
4138 | const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); |
4139 | llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; |
4140 | if (CS) { |
4141 | for (const Stmt *SubStmt : CS->children()) { |
4142 | auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP, |
4143 | InsertPointTy CodeGenIP) { |
4144 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4145 | *this, SubStmt, AllocaIP, CodeGenIP, "section" ); |
4146 | }; |
4147 | SectionCBVector.push_back(SectionCB); |
4148 | } |
4149 | } else { |
4150 | auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP, |
4151 | InsertPointTy CodeGenIP) { |
4152 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4153 | *this, CapturedStmt, AllocaIP, CodeGenIP, "section" ); |
4154 | }; |
4155 | SectionCBVector.push_back(SectionCB); |
4156 | } |
4157 | |
4158 | // Privatization callback that performs appropriate action for |
4159 | // shared/private/firstprivate/lastprivate/copyin/... variables. |
4160 | // |
4161 | // TODO: This defaults to shared right now. |
4162 | auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, |
4163 | llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { |
4164 | // The next line is appropriate only for variables (Val) with the |
4165 | // data-sharing attribute "shared". |
4166 | ReplVal = &Val; |
4167 | |
4168 | return CodeGenIP; |
4169 | }; |
4170 | |
4171 | CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP); |
4172 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); |
4173 | llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( |
4174 | AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); |
4175 | Builder.restoreIP(OMPBuilder.createSections( |
4176 | Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(), |
4177 | S.getSingleClause<OMPNowaitClause>())); |
4178 | return; |
4179 | } |
4180 | { |
4181 | auto LPCRegion = |
4182 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4183 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
4184 | EmitSections(S); |
4185 | } |
4186 | // Emit an implicit barrier at the end. |
4187 | if (!S.getSingleClause<OMPNowaitClause>()) { |
4188 | CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), |
4189 | OMPD_sections); |
4190 | } |
4191 | // Check for outer lastprivate conditional update. |
4192 | checkForLastprivateConditionalUpdate(*this, S); |
4193 | } |
4194 | |
4195 | void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { |
4196 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4197 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4198 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4199 | |
4200 | const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); |
4201 | auto FiniCB = [this](InsertPointTy IP) { |
4202 | OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); |
4203 | }; |
4204 | |
4205 | auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, |
4206 | InsertPointTy CodeGenIP) { |
4207 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4208 | *this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section" ); |
4209 | }; |
4210 | |
4211 | LexicalScope Scope(*this, S.getSourceRange()); |
4212 | EmitStopPoint(&S); |
4213 | Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB)); |
4214 | |
4215 | return; |
4216 | } |
4217 | LexicalScope Scope(*this, S.getSourceRange()); |
4218 | EmitStopPoint(&S); |
4219 | EmitStmt(S.getAssociatedStmt()); |
4220 | } |
4221 | |
4222 | void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { |
4223 | llvm::SmallVector<const Expr *, 8> CopyprivateVars; |
4224 | llvm::SmallVector<const Expr *, 8> DestExprs; |
4225 | llvm::SmallVector<const Expr *, 8> SrcExprs; |
4226 | llvm::SmallVector<const Expr *, 8> AssignmentOps; |
4227 | // Check if there are any 'copyprivate' clauses associated with this |
4228 | // 'single' construct. |
4229 | // Build a list of copyprivate variables along with helper expressions |
4230 | // (<source>, <destination>, <destination>=<source> expressions) |
4231 | for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { |
4232 | CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); |
4233 | DestExprs.append(C->destination_exprs().begin(), |
4234 | C->destination_exprs().end()); |
4235 | SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); |
4236 | AssignmentOps.append(C->assignment_ops().begin(), |
4237 | C->assignment_ops().end()); |
4238 | } |
4239 | // Emit code for 'single' region along with 'copyprivate' clauses |
4240 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4241 | Action.Enter(CGF); |
4242 | OMPPrivateScope SingleScope(CGF); |
4243 | (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); |
4244 | CGF.EmitOMPPrivateClause(S, SingleScope); |
4245 | (void)SingleScope.Privatize(); |
4246 | CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); |
4247 | }; |
4248 | { |
4249 | auto LPCRegion = |
4250 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4251 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
4252 | CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(), |
4253 | CopyprivateVars, DestExprs, |
4254 | SrcExprs, AssignmentOps); |
4255 | } |
4256 | // Emit an implicit barrier at the end (to avoid data race on firstprivate |
4257 | // init or if no 'nowait' clause was specified and no 'copyprivate' clause). |
4258 | if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { |
4259 | CGM.getOpenMPRuntime().emitBarrierCall( |
4260 | *this, S.getBeginLoc(), |
4261 | S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); |
4262 | } |
4263 | // Check for outer lastprivate conditional update. |
4264 | checkForLastprivateConditionalUpdate(*this, S); |
4265 | } |
4266 | |
4267 | static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
4268 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4269 | Action.Enter(CGF); |
4270 | CGF.EmitStmt(S.getRawStmt()); |
4271 | }; |
4272 | CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); |
4273 | } |
4274 | |
4275 | void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { |
4276 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4277 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4278 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4279 | |
4280 | const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt(); |
4281 | |
4282 | auto FiniCB = [this](InsertPointTy IP) { |
4283 | OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); |
4284 | }; |
4285 | |
4286 | auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, |
4287 | InsertPointTy CodeGenIP) { |
4288 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4289 | *this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master" ); |
4290 | }; |
4291 | |
4292 | LexicalScope Scope(*this, S.getSourceRange()); |
4293 | EmitStopPoint(&S); |
4294 | Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB)); |
4295 | |
4296 | return; |
4297 | } |
4298 | LexicalScope Scope(*this, S.getSourceRange()); |
4299 | EmitStopPoint(&S); |
4300 | emitMaster(*this, S); |
4301 | } |
4302 | |
4303 | static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
4304 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4305 | Action.Enter(CGF); |
4306 | CGF.EmitStmt(S.getRawStmt()); |
4307 | }; |
4308 | Expr *Filter = nullptr; |
4309 | if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) |
4310 | Filter = FilterClause->getThreadID(); |
4311 | CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(), |
4312 | Filter); |
4313 | } |
4314 | |
4315 | void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) { |
4316 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4317 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4318 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4319 | |
4320 | const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt(); |
4321 | const Expr *Filter = nullptr; |
4322 | if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) |
4323 | Filter = FilterClause->getThreadID(); |
4324 | llvm::Value *FilterVal = Filter |
4325 | ? EmitScalarExpr(Filter, CGM.Int32Ty) |
4326 | : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); |
4327 | |
4328 | auto FiniCB = [this](InsertPointTy IP) { |
4329 | OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); |
4330 | }; |
4331 | |
4332 | auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP, |
4333 | InsertPointTy CodeGenIP) { |
4334 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4335 | *this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked" ); |
4336 | }; |
4337 | |
4338 | LexicalScope Scope(*this, S.getSourceRange()); |
4339 | EmitStopPoint(&S); |
4340 | Builder.restoreIP( |
4341 | OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal)); |
4342 | |
4343 | return; |
4344 | } |
4345 | LexicalScope Scope(*this, S.getSourceRange()); |
4346 | EmitStopPoint(&S); |
4347 | emitMasked(*this, S); |
4348 | } |
4349 | |
4350 | void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { |
4351 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4352 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4353 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4354 | |
4355 | const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt(); |
4356 | const Expr *Hint = nullptr; |
4357 | if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) |
4358 | Hint = HintClause->getHint(); |
4359 | |
4360 | // TODO: This is slightly different from what's currently being done in |
4361 | // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything |
4362 | // about typing is final. |
4363 | llvm::Value *HintInst = nullptr; |
4364 | if (Hint) |
4365 | HintInst = |
4366 | Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false); |
4367 | |
4368 | auto FiniCB = [this](InsertPointTy IP) { |
4369 | OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); |
4370 | }; |
4371 | |
4372 | auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, |
4373 | InsertPointTy CodeGenIP) { |
4374 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4375 | *this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical" ); |
4376 | }; |
4377 | |
4378 | LexicalScope Scope(*this, S.getSourceRange()); |
4379 | EmitStopPoint(&S); |
4380 | Builder.restoreIP(OMPBuilder.createCritical( |
4381 | Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(), |
4382 | HintInst)); |
4383 | |
4384 | return; |
4385 | } |
4386 | |
4387 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4388 | Action.Enter(CGF); |
4389 | CGF.EmitStmt(S.getAssociatedStmt()); |
4390 | }; |
4391 | const Expr *Hint = nullptr; |
4392 | if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) |
4393 | Hint = HintClause->getHint(); |
4394 | LexicalScope Scope(*this, S.getSourceRange()); |
4395 | EmitStopPoint(&S); |
4396 | CGM.getOpenMPRuntime().emitCriticalRegion(*this, |
4397 | S.getDirectiveName().getAsString(), |
4398 | CodeGen, S.getBeginLoc(), Hint); |
4399 | } |
4400 | |
4401 | void CodeGenFunction::EmitOMPParallelForDirective( |
4402 | const OMPParallelForDirective &S) { |
4403 | // Emit directive as a combined directive that consists of two implicit |
4404 | // directives: 'parallel' with 'for' directive. |
4405 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4406 | Action.Enter(CGF); |
4407 | emitOMPCopyinClause(CGF, S); |
4408 | (void)emitWorksharingDirective(CGF, S, S.hasCancel()); |
4409 | }; |
4410 | { |
4411 | const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { |
4412 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
4413 | CGCapturedStmtInfo CGSI(CR_OpenMP); |
4414 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); |
4415 | OMPLoopScope LoopScope(CGF, S); |
4416 | return CGF.EmitScalarExpr(S.getNumIterations()); |
4417 | }; |
4418 | bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), |
4419 | [](const OMPReductionClause *C) { |
4420 | return C->getModifier() == OMPC_REDUCTION_inscan; |
4421 | }); |
4422 | if (IsInscan) |
4423 | emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); |
4424 | auto LPCRegion = |
4425 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4426 | emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, |
4427 | emitEmptyBoundParameters); |
4428 | if (IsInscan) |
4429 | emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen); |
4430 | } |
4431 | // Check for outer lastprivate conditional update. |
4432 | checkForLastprivateConditionalUpdate(*this, S); |
4433 | } |
4434 | |
4435 | void CodeGenFunction::EmitOMPParallelForSimdDirective( |
4436 | const OMPParallelForSimdDirective &S) { |
4437 | // Emit directive as a combined directive that consists of two implicit |
4438 | // directives: 'parallel' with 'for' directive. |
4439 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4440 | Action.Enter(CGF); |
4441 | emitOMPCopyinClause(CGF, S); |
4442 | (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); |
4443 | }; |
4444 | { |
4445 | const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { |
4446 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
4447 | CGCapturedStmtInfo CGSI(CR_OpenMP); |
4448 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); |
4449 | OMPLoopScope LoopScope(CGF, S); |
4450 | return CGF.EmitScalarExpr(S.getNumIterations()); |
4451 | }; |
4452 | bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), |
4453 | [](const OMPReductionClause *C) { |
4454 | return C->getModifier() == OMPC_REDUCTION_inscan; |
4455 | }); |
4456 | if (IsInscan) |
4457 | emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); |
4458 | auto LPCRegion = |
4459 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4460 | emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen, |
4461 | emitEmptyBoundParameters); |
4462 | if (IsInscan) |
4463 | emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen); |
4464 | } |
4465 | // Check for outer lastprivate conditional update. |
4466 | checkForLastprivateConditionalUpdate(*this, S); |
4467 | } |
4468 | |
4469 | void CodeGenFunction::EmitOMPParallelMasterDirective( |
4470 | const OMPParallelMasterDirective &S) { |
4471 | // Emit directive as a combined directive that consists of two implicit |
4472 | // directives: 'parallel' with 'master' directive. |
4473 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4474 | Action.Enter(CGF); |
4475 | OMPPrivateScope PrivateScope(CGF); |
4476 | emitOMPCopyinClause(CGF, S); |
4477 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
4478 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
4479 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
4480 | (void)PrivateScope.Privatize(); |
4481 | emitMaster(CGF, S); |
4482 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); |
4483 | }; |
4484 | { |
4485 | auto LPCRegion = |
4486 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4487 | emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, |
4488 | emitEmptyBoundParameters); |
4489 | emitPostUpdateForReductionClause(*this, S, |
4490 | [](CodeGenFunction &) { return nullptr; }); |
4491 | } |
4492 | // Check for outer lastprivate conditional update. |
4493 | checkForLastprivateConditionalUpdate(*this, S); |
4494 | } |
4495 | |
4496 | void CodeGenFunction::EmitOMPParallelMaskedDirective( |
4497 | const OMPParallelMaskedDirective &S) { |
4498 | // Emit directive as a combined directive that consists of two implicit |
4499 | // directives: 'parallel' with 'masked' directive. |
4500 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4501 | Action.Enter(CGF); |
4502 | OMPPrivateScope PrivateScope(CGF); |
4503 | emitOMPCopyinClause(CGF, S); |
4504 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
4505 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
4506 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
4507 | (void)PrivateScope.Privatize(); |
4508 | emitMasked(CGF, S); |
4509 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); |
4510 | }; |
4511 | { |
4512 | auto LPCRegion = |
4513 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4514 | emitCommonOMPParallelDirective(*this, S, OMPD_masked, CodeGen, |
4515 | emitEmptyBoundParameters); |
4516 | emitPostUpdateForReductionClause(*this, S, |
4517 | [](CodeGenFunction &) { return nullptr; }); |
4518 | } |
4519 | // Check for outer lastprivate conditional update. |
4520 | checkForLastprivateConditionalUpdate(*this, S); |
4521 | } |
4522 | |
4523 | void CodeGenFunction::EmitOMPParallelSectionsDirective( |
4524 | const OMPParallelSectionsDirective &S) { |
4525 | // Emit directive as a combined directive that consists of two implicit |
4526 | // directives: 'parallel' with 'sections' directive. |
4527 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4528 | Action.Enter(CGF); |
4529 | emitOMPCopyinClause(CGF, S); |
4530 | CGF.EmitSections(S); |
4531 | }; |
4532 | { |
4533 | auto LPCRegion = |
4534 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4535 | emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, |
4536 | emitEmptyBoundParameters); |
4537 | } |
4538 | // Check for outer lastprivate conditional update. |
4539 | checkForLastprivateConditionalUpdate(*this, S); |
4540 | } |
4541 | |
4542 | namespace { |
4543 | /// Get the list of variables declared in the context of the untied tasks. |
4544 | class CheckVarsEscapingUntiedTaskDeclContext final |
4545 | : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> { |
4546 | llvm::SmallVector<const VarDecl *, 4> PrivateDecls; |
4547 | |
4548 | public: |
4549 | explicit CheckVarsEscapingUntiedTaskDeclContext() = default; |
4550 | virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default; |
4551 | void VisitDeclStmt(const DeclStmt *S) { |
4552 | if (!S) |
4553 | return; |
4554 | // Need to privatize only local vars, static locals can be processed as is. |
4555 | for (const Decl *D : S->decls()) { |
4556 | if (const auto *VD = dyn_cast_or_null<VarDecl>(D)) |
4557 | if (VD->hasLocalStorage()) |
4558 | PrivateDecls.push_back(VD); |
4559 | } |
4560 | } |
4561 | void VisitOMPExecutableDirective(const OMPExecutableDirective *) {} |
4562 | void VisitCapturedStmt(const CapturedStmt *) {} |
4563 | void VisitLambdaExpr(const LambdaExpr *) {} |
4564 | void VisitBlockExpr(const BlockExpr *) {} |
4565 | void VisitStmt(const Stmt *S) { |
4566 | if (!S) |
4567 | return; |
4568 | for (const Stmt *Child : S->children()) |
4569 | if (Child) |
4570 | Visit(Child); |
4571 | } |
4572 | |
4573 | /// Swaps list of vars with the provided one. |
4574 | ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; } |
4575 | }; |
4576 | } // anonymous namespace |
4577 | |
4578 | static void buildDependences(const OMPExecutableDirective &S, |
4579 | OMPTaskDataTy &Data) { |
4580 | |
4581 | // First look for 'omp_all_memory' and add this first. |
4582 | bool OmpAllMemory = false; |
4583 | if (llvm::any_of( |
4584 | S.getClausesOfKind<OMPDependClause>(), [](const OMPDependClause *C) { |
4585 | return C->getDependencyKind() == OMPC_DEPEND_outallmemory || |
4586 | C->getDependencyKind() == OMPC_DEPEND_inoutallmemory; |
4587 | })) { |
4588 | OmpAllMemory = true; |
4589 | // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are |
4590 | // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to |
4591 | // simplify. |
4592 | OMPTaskDataTy::DependData &DD = |
4593 | Data.Dependences.emplace_back(OMPC_DEPEND_outallmemory, |
4594 | /*IteratorExpr=*/nullptr); |
4595 | // Add a nullptr Expr to simplify the codegen in emitDependData. |
4596 | DD.DepExprs.push_back(nullptr); |
4597 | } |
4598 | // Add remaining dependences skipping any 'out' or 'inout' if they are |
4599 | // overridden by 'omp_all_memory'. |
4600 | for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { |
4601 | OpenMPDependClauseKind Kind = C->getDependencyKind(); |
4602 | if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory) |
4603 | continue; |
4604 | if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout)) |
4605 | continue; |
4606 | OMPTaskDataTy::DependData &DD = |
4607 | Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); |
4608 | DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); |
4609 | } |
4610 | } |
4611 | |
4612 | void CodeGenFunction::EmitOMPTaskBasedDirective( |
4613 | const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, |
4614 | const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, |
4615 | OMPTaskDataTy &Data) { |
4616 | // Emit outlined function for task construct. |
4617 | const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); |
4618 | auto I = CS->getCapturedDecl()->param_begin(); |
4619 | auto PartId = std::next(I); |
4620 | auto TaskT = std::next(I, 4); |
4621 | // Check if the task is final |
4622 | if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { |
4623 | // If the condition constant folds and can be elided, try to avoid emitting |
4624 | // the condition and the dead arm of the if/else. |
4625 | const Expr *Cond = Clause->getCondition(); |
4626 | bool CondConstant; |
4627 | if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) |
4628 | Data.Final.setInt(CondConstant); |
4629 | else |
4630 | Data.Final.setPointer(EvaluateExprAsBool(Cond)); |
4631 | } else { |
4632 | // By default the task is not final. |
4633 | Data.Final.setInt(/*IntVal=*/false); |
4634 | } |
4635 | // Check if the task has 'priority' clause. |
4636 | if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { |
4637 | const Expr *Prio = Clause->getPriority(); |
4638 | Data.Priority.setInt(/*IntVal=*/true); |
4639 | Data.Priority.setPointer(EmitScalarConversion( |
4640 | EmitScalarExpr(Prio), Prio->getType(), |
4641 | getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), |
4642 | Prio->getExprLoc())); |
4643 | } |
4644 | // The first function argument for tasks is a thread id, the second one is a |
4645 | // part id (0 for tied tasks, >=0 for untied task). |
4646 | llvm::DenseSet<const VarDecl *> EmittedAsPrivate; |
4647 | // Get list of private variables. |
4648 | for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { |
4649 | auto IRef = C->varlist_begin(); |
4650 | for (const Expr *IInit : C->private_copies()) { |
4651 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
4652 | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
4653 | Data.PrivateVars.push_back(*IRef); |
4654 | Data.PrivateCopies.push_back(IInit); |
4655 | } |
4656 | ++IRef; |
4657 | } |
4658 | } |
4659 | EmittedAsPrivate.clear(); |
4660 | // Get list of firstprivate variables. |
4661 | for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { |
4662 | auto IRef = C->varlist_begin(); |
4663 | auto IElemInitRef = C->inits().begin(); |
4664 | for (const Expr *IInit : C->private_copies()) { |
4665 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
4666 | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
4667 | Data.FirstprivateVars.push_back(*IRef); |
4668 | Data.FirstprivateCopies.push_back(IInit); |
4669 | Data.FirstprivateInits.push_back(*IElemInitRef); |
4670 | } |
4671 | ++IRef; |
4672 | ++IElemInitRef; |
4673 | } |
4674 | } |
4675 | // Get list of lastprivate variables (for taskloops). |
4676 | llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; |
4677 | for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { |
4678 | auto IRef = C->varlist_begin(); |
4679 | auto ID = C->destination_exprs().begin(); |
4680 | for (const Expr *IInit : C->private_copies()) { |
4681 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
4682 | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
4683 | Data.LastprivateVars.push_back(*IRef); |
4684 | Data.LastprivateCopies.push_back(IInit); |
4685 | } |
4686 | LastprivateDstsOrigs.insert( |
4687 | std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), |
4688 | cast<DeclRefExpr>(*IRef))); |
4689 | ++IRef; |
4690 | ++ID; |
4691 | } |
4692 | } |
4693 | SmallVector<const Expr *, 4> LHSs; |
4694 | SmallVector<const Expr *, 4> RHSs; |
4695 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
4696 | Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); |
4697 | Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); |
4698 | Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); |
4699 | Data.ReductionOps.append(C->reduction_ops().begin(), |
4700 | C->reduction_ops().end()); |
4701 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
4702 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
4703 | } |
4704 | Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( |
4705 | *this, S.getBeginLoc(), LHSs, RHSs, Data); |
4706 | // Build list of dependences. |
4707 | buildDependences(S, Data); |
4708 | // Get list of local vars for untied tasks. |
4709 | if (!Data.Tied) { |
4710 | CheckVarsEscapingUntiedTaskDeclContext Checker; |
4711 | Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt()); |
4712 | Data.PrivateLocals.append(Checker.getPrivateDecls().begin(), |
4713 | Checker.getPrivateDecls().end()); |
4714 | } |
4715 | auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, |
4716 | CapturedRegion](CodeGenFunction &CGF, |
4717 | PrePostActionTy &Action) { |
4718 | llvm::MapVector<CanonicalDeclPtr<const VarDecl>, |
4719 | std::pair<Address, Address>> |
4720 | UntiedLocalVars; |
4721 | // Set proper addresses for generated private copies. |
4722 | OMPPrivateScope Scope(CGF); |
4723 | // Generate debug info for variables present in shared clause. |
4724 | if (auto *DI = CGF.getDebugInfo()) { |
4725 | llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields = |
4726 | CGF.CapturedStmtInfo->getCaptureFields(); |
4727 | llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue(); |
4728 | if (CaptureFields.size() && ContextValue) { |
4729 | unsigned CharWidth = CGF.getContext().getCharWidth(); |
4730 | // The shared variables are packed together as members of structure. |
4731 | // So the address of each shared variable can be computed by adding |
4732 | // offset of it (within record) to the base address of record. For each |
4733 | // shared variable, debug intrinsic llvm.dbg.declare is generated with |
4734 | // appropriate expressions (DIExpression). |
4735 | // Ex: |
4736 | // %12 = load %struct.anon*, %struct.anon** %__context.addr.i |
4737 | // call void @llvm.dbg.declare(metadata %struct.anon* %12, |
4738 | // metadata !svar1, |
4739 | // metadata !DIExpression(DW_OP_deref)) |
4740 | // call void @llvm.dbg.declare(metadata %struct.anon* %12, |
4741 | // metadata !svar2, |
4742 | // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref)) |
4743 | for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) { |
4744 | const VarDecl *SharedVar = It->first; |
4745 | RecordDecl *CaptureRecord = It->second->getParent(); |
4746 | const ASTRecordLayout &Layout = |
4747 | CGF.getContext().getASTRecordLayout(CaptureRecord); |
4748 | unsigned Offset = |
4749 | Layout.getFieldOffset(It->second->getFieldIndex()) / CharWidth; |
4750 | if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo()) |
4751 | (void)DI->EmitDeclareOfAutoVariable(SharedVar, ContextValue, |
4752 | CGF.Builder, false); |
4753 | llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back(); |
4754 | // Get the call dbg.declare instruction we just created and update |
4755 | // its DIExpression to add offset to base address. |
4756 | if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&Last)) { |
4757 | SmallVector<uint64_t, 8> Ops; |
4758 | // Add offset to the base address if non zero. |
4759 | if (Offset) { |
4760 | Ops.push_back(llvm::dwarf::DW_OP_plus_uconst); |
4761 | Ops.push_back(Offset); |
4762 | } |
4763 | Ops.push_back(llvm::dwarf::DW_OP_deref); |
4764 | auto &Ctx = DDI->getContext(); |
4765 | llvm::DIExpression *DIExpr = llvm::DIExpression::get(Ctx, Ops); |
4766 | Last.setOperand(2, llvm::MetadataAsValue::get(Ctx, DIExpr)); |
4767 | } |
4768 | } |
4769 | } |
4770 | } |
4771 | llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; |
4772 | if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || |
4773 | !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) { |
4774 | enum { PrivatesParam = 2, CopyFnParam = 3 }; |
4775 | llvm::Value *CopyFn = CGF.Builder.CreateLoad( |
4776 | CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); |
4777 | llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( |
4778 | CS->getCapturedDecl()->getParam(PrivatesParam))); |
4779 | // Map privates. |
4780 | llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; |
4781 | llvm::SmallVector<llvm::Value *, 16> CallArgs; |
4782 | llvm::SmallVector<llvm::Type *, 4> ParamTypes; |
4783 | CallArgs.push_back(PrivatesPtr); |
4784 | ParamTypes.push_back(PrivatesPtr->getType()); |
4785 | for (const Expr *E : Data.PrivateVars) { |
4786 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
4787 | Address PrivatePtr = CGF.CreateMemTemp( |
4788 | CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr" ); |
4789 | PrivatePtrs.emplace_back(VD, PrivatePtr); |
4790 | CallArgs.push_back(PrivatePtr.getPointer()); |
4791 | ParamTypes.push_back(PrivatePtr.getType()); |
4792 | } |
4793 | for (const Expr *E : Data.FirstprivateVars) { |
4794 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
4795 | Address PrivatePtr = |
4796 | CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), |
4797 | ".firstpriv.ptr.addr" ); |
4798 | PrivatePtrs.emplace_back(VD, PrivatePtr); |
4799 | FirstprivatePtrs.emplace_back(VD, PrivatePtr); |
4800 | CallArgs.push_back(PrivatePtr.getPointer()); |
4801 | ParamTypes.push_back(PrivatePtr.getType()); |
4802 | } |
4803 | for (const Expr *E : Data.LastprivateVars) { |
4804 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
4805 | Address PrivatePtr = |
4806 | CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), |
4807 | ".lastpriv.ptr.addr" ); |
4808 | PrivatePtrs.emplace_back(VD, PrivatePtr); |
4809 | CallArgs.push_back(PrivatePtr.getPointer()); |
4810 | ParamTypes.push_back(PrivatePtr.getType()); |
4811 | } |
4812 | for (const VarDecl *VD : Data.PrivateLocals) { |
4813 | QualType Ty = VD->getType().getNonReferenceType(); |
4814 | if (VD->getType()->isLValueReferenceType()) |
4815 | Ty = CGF.getContext().getPointerType(Ty); |
4816 | if (isAllocatableDecl(VD)) |
4817 | Ty = CGF.getContext().getPointerType(Ty); |
4818 | Address PrivatePtr = CGF.CreateMemTemp( |
4819 | CGF.getContext().getPointerType(Ty), ".local.ptr.addr" ); |
4820 | auto Result = UntiedLocalVars.insert( |
4821 | std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid()))); |
4822 | // If key exists update in place. |
4823 | if (Result.second == false) |
4824 | *Result.first = std::make_pair( |
4825 | VD, std::make_pair(PrivatePtr, Address::invalid())); |
4826 | CallArgs.push_back(PrivatePtr.getPointer()); |
4827 | ParamTypes.push_back(PrivatePtr.getType()); |
4828 | } |
4829 | auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), |
4830 | ParamTypes, /*isVarArg=*/false); |
4831 | CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4832 | CopyFn, CopyFnTy->getPointerTo()); |
4833 | CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( |
4834 | CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); |
4835 | for (const auto &Pair : LastprivateDstsOrigs) { |
4836 | const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); |
4837 | DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), |
4838 | /*RefersToEnclosingVariableOrCapture=*/ |
4839 | CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, |
4840 | Pair.second->getType(), VK_LValue, |
4841 | Pair.second->getExprLoc()); |
4842 | Scope.addPrivate(Pair.first, CGF.EmitLValue(&DRE).getAddress(CGF)); |
4843 | } |
4844 | for (const auto &Pair : PrivatePtrs) { |
4845 | Address Replacement = Address( |
4846 | CGF.Builder.CreateLoad(Pair.second), |
4847 | CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()), |
4848 | CGF.getContext().getDeclAlign(Pair.first)); |
4849 | Scope.addPrivate(Pair.first, Replacement); |
4850 | if (auto *DI = CGF.getDebugInfo()) |
4851 | if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo()) |
4852 | (void)DI->EmitDeclareOfAutoVariable( |
4853 | Pair.first, Pair.second.getPointer(), CGF.Builder, |
4854 | /*UsePointerValue*/ true); |
4855 | } |
4856 | // Adjust mapping for internal locals by mapping actual memory instead of |
4857 | // a pointer to this memory. |
4858 | for (auto &Pair : UntiedLocalVars) { |
4859 | QualType VDType = Pair.first->getType().getNonReferenceType(); |
4860 | if (Pair.first->getType()->isLValueReferenceType()) |
4861 | VDType = CGF.getContext().getPointerType(VDType); |
4862 | if (isAllocatableDecl(Pair.first)) { |
4863 | llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); |
4864 | Address Replacement( |
4865 | Ptr, |
4866 | CGF.ConvertTypeForMem(CGF.getContext().getPointerType(VDType)), |
4867 | CGF.getPointerAlign()); |
4868 | Pair.second.first = Replacement; |
4869 | Ptr = CGF.Builder.CreateLoad(Replacement); |
4870 | Replacement = Address(Ptr, CGF.ConvertTypeForMem(VDType), |
4871 | CGF.getContext().getDeclAlign(Pair.first)); |
4872 | Pair.second.second = Replacement; |
4873 | } else { |
4874 | llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); |
4875 | Address Replacement(Ptr, CGF.ConvertTypeForMem(VDType), |
4876 | CGF.getContext().getDeclAlign(Pair.first)); |
4877 | Pair.second.first = Replacement; |
4878 | } |
4879 | } |
4880 | } |
4881 | if (Data.Reductions) { |
4882 | OMPPrivateScope FirstprivateScope(CGF); |
4883 | for (const auto &Pair : FirstprivatePtrs) { |
4884 | Address Replacement( |
4885 | CGF.Builder.CreateLoad(Pair.second), |
4886 | CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()), |
4887 | CGF.getContext().getDeclAlign(Pair.first)); |
4888 | FirstprivateScope.addPrivate(Pair.first, Replacement); |
4889 | } |
4890 | (void)FirstprivateScope.Privatize(); |
4891 | OMPLexicalScope LexScope(CGF, S, CapturedRegion); |
4892 | ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, |
4893 | Data.ReductionCopies, Data.ReductionOps); |
4894 | llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( |
4895 | CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); |
4896 | for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { |
4897 | RedCG.emitSharedOrigLValue(CGF, Cnt); |
4898 | RedCG.emitAggregateType(CGF, Cnt); |
4899 | // FIXME: This must removed once the runtime library is fixed. |
4900 | // Emit required threadprivate variables for |
4901 | // initializer/combiner/finalizer. |
4902 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), |
4903 | RedCG, Cnt); |
4904 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
4905 | CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); |
4906 | Replacement = |
4907 | Address(CGF.EmitScalarConversion( |
4908 | Replacement.getPointer(), CGF.getContext().VoidPtrTy, |
4909 | CGF.getContext().getPointerType( |
4910 | Data.ReductionCopies[Cnt]->getType()), |
4911 | Data.ReductionCopies[Cnt]->getExprLoc()), |
4912 | CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()), |
4913 | Replacement.getAlignment()); |
4914 | Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); |
4915 | Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); |
4916 | } |
4917 | } |
4918 | // Privatize all private variables except for in_reduction items. |
4919 | (void)Scope.Privatize(); |
4920 | SmallVector<const Expr *, 4> InRedVars; |
4921 | SmallVector<const Expr *, 4> InRedPrivs; |
4922 | SmallVector<const Expr *, 4> InRedOps; |
4923 | SmallVector<const Expr *, 4> TaskgroupDescriptors; |
4924 | for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { |
4925 | auto IPriv = C->privates().begin(); |
4926 | auto IRed = C->reduction_ops().begin(); |
4927 | auto ITD = C->taskgroup_descriptors().begin(); |
4928 | for (const Expr *Ref : C->varlists()) { |
4929 | InRedVars.emplace_back(Ref); |
4930 | InRedPrivs.emplace_back(*IPriv); |
4931 | InRedOps.emplace_back(*IRed); |
4932 | TaskgroupDescriptors.emplace_back(*ITD); |
4933 | std::advance(IPriv, 1); |
4934 | std::advance(IRed, 1); |
4935 | std::advance(ITD, 1); |
4936 | } |
4937 | } |
4938 | // Privatize in_reduction items here, because taskgroup descriptors must be |
4939 | // privatized earlier. |
4940 | OMPPrivateScope InRedScope(CGF); |
4941 | if (!InRedVars.empty()) { |
4942 | ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); |
4943 | for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { |
4944 | RedCG.emitSharedOrigLValue(CGF, Cnt); |
4945 | RedCG.emitAggregateType(CGF, Cnt); |
4946 | // The taskgroup descriptor variable is always implicit firstprivate and |
4947 | // privatized already during processing of the firstprivates. |
4948 | // FIXME: This must removed once the runtime library is fixed. |
4949 | // Emit required threadprivate variables for |
4950 | // initializer/combiner/finalizer. |
4951 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), |
4952 | RedCG, Cnt); |
4953 | llvm::Value *ReductionsPtr; |
4954 | if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { |
4955 | ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), |
4956 | TRExpr->getExprLoc()); |
4957 | } else { |
4958 | ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
4959 | } |
4960 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
4961 | CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); |
4962 | Replacement = Address( |
4963 | CGF.EmitScalarConversion( |
4964 | Replacement.getPointer(), CGF.getContext().VoidPtrTy, |
4965 | CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), |
4966 | InRedPrivs[Cnt]->getExprLoc()), |
4967 | CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()), |
4968 | Replacement.getAlignment()); |
4969 | Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); |
4970 | InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); |
4971 | } |
4972 | } |
4973 | (void)InRedScope.Privatize(); |
4974 | |
4975 | CGOpenMPRuntime::UntiedTaskLocalDeclsRAII (CGF, |
4976 | UntiedLocalVars); |
4977 | Action.Enter(CGF); |
4978 | BodyGen(CGF); |
4979 | }; |
4980 | llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( |
4981 | S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, |
4982 | Data.NumberOfParts); |
4983 | OMPLexicalScope Scope(*this, S, std::nullopt, |
4984 | !isOpenMPParallelDirective(S.getDirectiveKind()) && |
4985 | !isOpenMPSimdDirective(S.getDirectiveKind())); |
4986 | TaskGen(*this, OutlinedFn, Data); |
4987 | } |
4988 | |
4989 | static ImplicitParamDecl * |
4990 | createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, |
4991 | QualType Ty, CapturedDecl *CD, |
4992 | SourceLocation Loc) { |
4993 | auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, |
4994 | ImplicitParamDecl::Other); |
4995 | auto *OrigRef = DeclRefExpr::Create( |
4996 | C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, |
4997 | /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); |
4998 | auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, |
4999 | ImplicitParamDecl::Other); |
5000 | auto *PrivateRef = DeclRefExpr::Create( |
5001 | C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, |
5002 | /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); |
5003 | QualType ElemType = C.getBaseElementType(Ty); |
5004 | auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, |
5005 | ImplicitParamDecl::Other); |
5006 | auto *InitRef = DeclRefExpr::Create( |
5007 | C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, |
5008 | /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); |
5009 | PrivateVD->setInitStyle(VarDecl::CInit); |
5010 | PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, |
5011 | InitRef, /*BasePath=*/nullptr, |
5012 | VK_PRValue, FPOptionsOverride())); |
5013 | Data.FirstprivateVars.emplace_back(OrigRef); |
5014 | Data.FirstprivateCopies.emplace_back(PrivateRef); |
5015 | Data.FirstprivateInits.emplace_back(InitRef); |
5016 | return OrigVD; |
5017 | } |
5018 | |
5019 | void CodeGenFunction::EmitOMPTargetTaskBasedDirective( |
5020 | const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, |
5021 | OMPTargetDataInfo &InputInfo) { |
5022 | // Emit outlined function for task construct. |
5023 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); |
5024 | Address CapturedStruct = GenerateCapturedStmtArgument(*CS); |
5025 | QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); |
5026 | auto I = CS->getCapturedDecl()->param_begin(); |
5027 | auto PartId = std::next(I); |
5028 | auto TaskT = std::next(I, 4); |
5029 | OMPTaskDataTy Data; |
5030 | // The task is not final. |
5031 | Data.Final.setInt(/*IntVal=*/false); |
5032 | // Get list of firstprivate variables. |
5033 | for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { |
5034 | auto IRef = C->varlist_begin(); |
5035 | auto IElemInitRef = C->inits().begin(); |
5036 | for (auto *IInit : C->private_copies()) { |
5037 | Data.FirstprivateVars.push_back(*IRef); |
5038 | Data.FirstprivateCopies.push_back(IInit); |
5039 | Data.FirstprivateInits.push_back(*IElemInitRef); |
5040 | ++IRef; |
5041 | ++IElemInitRef; |
5042 | } |
5043 | } |
5044 | SmallVector<const Expr *, 4> LHSs; |
5045 | SmallVector<const Expr *, 4> RHSs; |
5046 | for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { |
5047 | Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); |
5048 | Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); |
5049 | Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); |
5050 | Data.ReductionOps.append(C->reduction_ops().begin(), |
5051 | C->reduction_ops().end()); |
5052 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
5053 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
5054 | } |
5055 | OMPPrivateScope TargetScope(*this); |
5056 | VarDecl *BPVD = nullptr; |
5057 | VarDecl *PVD = nullptr; |
5058 | VarDecl *SVD = nullptr; |
5059 | VarDecl *MVD = nullptr; |
5060 | if (InputInfo.NumberOfTargetItems > 0) { |
5061 | auto *CD = CapturedDecl::Create( |
5062 | getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); |
5063 | llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); |
5064 | QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType( |
5065 | getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal, |
5066 | /*IndexTypeQuals=*/0); |
5067 | BPVD = createImplicitFirstprivateForType( |
5068 | getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); |
5069 | PVD = createImplicitFirstprivateForType( |
5070 | getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); |
5071 | QualType SizesType = getContext().getConstantArrayType( |
5072 | getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), |
5073 | ArrSize, nullptr, ArrayType::Normal, |
5074 | /*IndexTypeQuals=*/0); |
5075 | SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, |
5076 | S.getBeginLoc()); |
5077 | TargetScope.addPrivate(BPVD, InputInfo.BasePointersArray); |
5078 | TargetScope.addPrivate(PVD, InputInfo.PointersArray); |
5079 | TargetScope.addPrivate(SVD, InputInfo.SizesArray); |
5080 | // If there is no user-defined mapper, the mapper array will be nullptr. In |
5081 | // this case, we don't need to privatize it. |
5082 | if (!isa_and_nonnull<llvm::ConstantPointerNull>( |
5083 | InputInfo.MappersArray.getPointer())) { |
5084 | MVD = createImplicitFirstprivateForType( |
5085 | getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); |
5086 | TargetScope.addPrivate(MVD, InputInfo.MappersArray); |
5087 | } |
5088 | } |
5089 | (void)TargetScope.Privatize(); |
5090 | buildDependences(S, Data); |
5091 | auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, |
5092 | &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { |
5093 | // Set proper addresses for generated private copies. |
5094 | OMPPrivateScope Scope(CGF); |
5095 | if (!Data.FirstprivateVars.empty()) { |
5096 | enum { PrivatesParam = 2, CopyFnParam = 3 }; |
5097 | llvm::Value *CopyFn = CGF.Builder.CreateLoad( |
5098 | CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); |
5099 | llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( |
5100 | CS->getCapturedDecl()->getParam(PrivatesParam))); |
5101 | // Map privates. |
5102 | llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; |
5103 | llvm::SmallVector<llvm::Value *, 16> CallArgs; |
5104 | llvm::SmallVector<llvm::Type *, 4> ParamTypes; |
5105 | CallArgs.push_back(PrivatesPtr); |
5106 | ParamTypes.push_back(PrivatesPtr->getType()); |
5107 | for (const Expr *E : Data.FirstprivateVars) { |
5108 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
5109 | Address PrivatePtr = |
5110 | CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), |
5111 | ".firstpriv.ptr.addr" ); |
5112 | PrivatePtrs.emplace_back(VD, PrivatePtr); |
5113 | CallArgs.push_back(PrivatePtr.getPointer()); |
5114 | ParamTypes.push_back(PrivatePtr.getType()); |
5115 | } |
5116 | auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), |
5117 | ParamTypes, /*isVarArg=*/false); |
5118 | CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
5119 | CopyFn, CopyFnTy->getPointerTo()); |
5120 | CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( |
5121 | CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); |
5122 | for (const auto &Pair : PrivatePtrs) { |
5123 | Address Replacement( |
5124 | CGF.Builder.CreateLoad(Pair.second), |
5125 | CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()), |
5126 | CGF.getContext().getDeclAlign(Pair.first)); |
5127 | Scope.addPrivate(Pair.first, Replacement); |
5128 | } |
5129 | } |
5130 | CGF.processInReduction(S, Data, CGF, CS, Scope); |
5131 | if (InputInfo.NumberOfTargetItems > 0) { |
5132 | InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( |
5133 | CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0); |
5134 | InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( |
5135 | CGF.GetAddrOfLocalVar(PVD), /*Index=*/0); |
5136 | InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( |
5137 | CGF.GetAddrOfLocalVar(SVD), /*Index=*/0); |
5138 | // If MVD is nullptr, the mapper array is not privatized |
5139 | if (MVD) |
5140 | InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP( |
5141 | CGF.GetAddrOfLocalVar(MVD), /*Index=*/0); |
5142 | } |
5143 | |
5144 | Action.Enter(CGF); |
5145 | OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); |
5146 | BodyGen(CGF); |
5147 | }; |
5148 | llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( |
5149 | S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, |
5150 | Data.NumberOfParts); |
5151 | llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); |
5152 | IntegerLiteral IfCond(getContext(), TrueOrFalse, |
5153 | getContext().getIntTypeForBitwidth(32, /*Signed=*/0), |
5154 | SourceLocation()); |
5155 | CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn, |
5156 | SharedsTy, CapturedStruct, &IfCond, Data); |
5157 | } |
5158 | |
5159 | void CodeGenFunction::processInReduction(const OMPExecutableDirective &S, |
5160 | OMPTaskDataTy &Data, |
5161 | CodeGenFunction &CGF, |
5162 | const CapturedStmt *CS, |
5163 | OMPPrivateScope &Scope) { |
5164 | if (Data.Reductions) { |
5165 | OpenMPDirectiveKind CapturedRegion = S.getDirectiveKind(); |
5166 | OMPLexicalScope LexScope(CGF, S, CapturedRegion); |
5167 | ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, |
5168 | Data.ReductionCopies, Data.ReductionOps); |
5169 | llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( |
5170 | CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(4))); |
5171 | for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { |
5172 | RedCG.emitSharedOrigLValue(CGF, Cnt); |
5173 | RedCG.emitAggregateType(CGF, Cnt); |
5174 | // FIXME: This must removed once the runtime library is fixed. |
5175 | // Emit required threadprivate variables for |
5176 | // initializer/combiner/finalizer. |
5177 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), |
5178 | RedCG, Cnt); |
5179 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
5180 | CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); |
5181 | Replacement = |
5182 | Address(CGF.EmitScalarConversion( |
5183 | Replacement.getPointer(), CGF.getContext().VoidPtrTy, |
5184 | CGF.getContext().getPointerType( |
5185 | Data.ReductionCopies[Cnt]->getType()), |
5186 | Data.ReductionCopies[Cnt]->getExprLoc()), |
5187 | CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()), |
5188 | Replacement.getAlignment()); |
5189 | Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); |
5190 | Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); |
5191 | } |
5192 | } |
5193 | (void)Scope.Privatize(); |
5194 | SmallVector<const Expr *, 4> InRedVars; |
5195 | SmallVector<const Expr *, 4> InRedPrivs; |
5196 | SmallVector<const Expr *, 4> InRedOps; |
5197 | SmallVector<const Expr *, 4> TaskgroupDescriptors; |
5198 | for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { |
5199 | auto IPriv = C->privates().begin(); |
5200 | auto IRed = C->reduction_ops().begin(); |
5201 | auto ITD = C->taskgroup_descriptors().begin(); |
5202 | for (const Expr *Ref : C->varlists()) { |
5203 | InRedVars.emplace_back(Ref); |
5204 | InRedPrivs.emplace_back(*IPriv); |
5205 | InRedOps.emplace_back(*IRed); |
5206 | TaskgroupDescriptors.emplace_back(*ITD); |
5207 | std::advance(IPriv, 1); |
5208 | std::advance(IRed, 1); |
5209 | std::advance(ITD, 1); |
5210 | } |
5211 | } |
5212 | OMPPrivateScope InRedScope(CGF); |
5213 | if (!InRedVars.empty()) { |
5214 | ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); |
5215 | for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { |
5216 | RedCG.emitSharedOrigLValue(CGF, Cnt); |
5217 | RedCG.emitAggregateType(CGF, Cnt); |
5218 | // FIXME: This must removed once the runtime library is fixed. |
5219 | // Emit required threadprivate variables for |
5220 | // initializer/combiner/finalizer. |
5221 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), |
5222 | RedCG, Cnt); |
5223 | llvm::Value *ReductionsPtr; |
5224 | if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { |
5225 | ReductionsPtr = |
5226 | CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), TRExpr->getExprLoc()); |
5227 | } else { |
5228 | ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
5229 | } |
5230 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
5231 | CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); |
5232 | Replacement = Address( |
5233 | CGF.EmitScalarConversion( |
5234 | Replacement.getPointer(), CGF.getContext().VoidPtrTy, |
5235 | CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), |
5236 | InRedPrivs[Cnt]->getExprLoc()), |
5237 | CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()), |
5238 | Replacement.getAlignment()); |
5239 | Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); |
5240 | InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement); |
5241 | } |
5242 | } |
5243 | (void)InRedScope.Privatize(); |
5244 | } |
5245 | |
5246 | void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { |
5247 | // Emit outlined function for task construct. |
5248 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); |
5249 | Address CapturedStruct = GenerateCapturedStmtArgument(*CS); |
5250 | QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); |
5251 | const Expr *IfCond = nullptr; |
5252 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
5253 | if (C->getNameModifier() == OMPD_unknown || |
5254 | C->getNameModifier() == OMPD_task) { |
5255 | IfCond = C->getCondition(); |
5256 | break; |
5257 | } |
5258 | } |
5259 | |
5260 | OMPTaskDataTy Data; |
5261 | // Check if we should emit tied or untied task. |
5262 | Data.Tied = !S.getSingleClause<OMPUntiedClause>(); |
5263 | auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { |
5264 | CGF.EmitStmt(CS->getCapturedStmt()); |
5265 | }; |
5266 | auto &&TaskGen = [&S, SharedsTy, CapturedStruct, |
5267 | IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, |
5268 | const OMPTaskDataTy &Data) { |
5269 | CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn, |
5270 | SharedsTy, CapturedStruct, IfCond, |
5271 | Data); |
5272 | }; |
5273 | auto LPCRegion = |
5274 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
5275 | EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); |
5276 | } |
5277 | |
5278 | void CodeGenFunction::EmitOMPTaskyieldDirective( |
5279 | const OMPTaskyieldDirective &S) { |
5280 | CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc()); |
5281 | } |
5282 | |
5283 | void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) { |
5284 | const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>(); |
5285 | Expr *ME = MC ? MC->getMessageString() : nullptr; |
5286 | const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>(); |
5287 | bool IsFatal = false; |
5288 | if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal) |
5289 | IsFatal = true; |
5290 | CGM.getOpenMPRuntime().emitErrorCall(*this, S.getBeginLoc(), ME, IsFatal); |
5291 | } |
5292 | |
5293 | void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { |
5294 | CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); |
5295 | } |
5296 | |
5297 | void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { |
5298 | OMPTaskDataTy Data; |
5299 | // Build list of dependences |
5300 | buildDependences(S, Data); |
5301 | Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>(); |
5302 | CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data); |
5303 | } |
5304 | |
5305 | bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) { |
5306 | return T.clauses().empty(); |
5307 | } |
5308 | |
5309 | void CodeGenFunction::EmitOMPTaskgroupDirective( |
5310 | const OMPTaskgroupDirective &S) { |
5311 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
5312 | if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S)) { |
5313 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
5314 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
5315 | InsertPointTy AllocaIP(AllocaInsertPt->getParent(), |
5316 | AllocaInsertPt->getIterator()); |
5317 | |
5318 | auto BodyGenCB = [&, this](InsertPointTy AllocaIP, |
5319 | InsertPointTy CodeGenIP) { |
5320 | Builder.restoreIP(CodeGenIP); |
5321 | EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); |
5322 | }; |
5323 | CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; |
5324 | if (!CapturedStmtInfo) |
5325 | CapturedStmtInfo = &CapStmtInfo; |
5326 | Builder.restoreIP(OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB)); |
5327 | return; |
5328 | } |
5329 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
5330 | Action.Enter(CGF); |
5331 | if (const Expr *E = S.getReductionRef()) { |
5332 | SmallVector<const Expr *, 4> LHSs; |
5333 | SmallVector<const Expr *, 4> RHSs; |
5334 | OMPTaskDataTy Data; |
5335 | for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { |
5336 | Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); |
5337 | Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); |
5338 | Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); |
5339 | Data.ReductionOps.append(C->reduction_ops().begin(), |
5340 | C->reduction_ops().end()); |
5341 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
5342 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
5343 | } |
5344 | llvm::Value *ReductionDesc = |
5345 | CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), |
5346 | LHSs, RHSs, Data); |
5347 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
5348 | CGF.EmitVarDecl(*VD); |
5349 | CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), |
5350 | /*Volatile=*/false, E->getType()); |
5351 | } |
5352 | CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); |
5353 | }; |
5354 | CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc()); |
5355 | } |
5356 | |
5357 | void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { |
5358 | llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>() |
5359 | ? llvm::AtomicOrdering::NotAtomic |
5360 | : llvm::AtomicOrdering::AcquireRelease; |
5361 | CGM.getOpenMPRuntime().emitFlush( |
5362 | *this, |
5363 | [&S]() -> ArrayRef<const Expr *> { |
5364 | if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) |
5365 | return llvm::ArrayRef(FlushClause->varlist_begin(), |
5366 | FlushClause->varlist_end()); |
5367 | return std::nullopt; |
5368 | }(), |
5369 | S.getBeginLoc(), AO); |
5370 | } |
5371 | |
5372 | void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) { |
5373 | const auto *DO = S.getSingleClause<OMPDepobjClause>(); |
5374 | LValue DOLVal = EmitLValue(DO->getDepobj()); |
5375 | if (const auto *DC = S.getSingleClause<OMPDependClause>()) { |
5376 | OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(), |
5377 | DC->getModifier()); |
5378 | Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end()); |
5379 | Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause( |
5380 | *this, Dependencies, DC->getBeginLoc()); |
5381 | EmitStoreOfScalar(DepAddr.getPointer(), DOLVal); |
5382 | return; |
5383 | } |
5384 | if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) { |
5385 | CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc()); |
5386 | return; |
5387 | } |
5388 | if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) { |
5389 | CGM.getOpenMPRuntime().emitUpdateClause( |
5390 | *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc()); |
5391 | return; |
5392 | } |
5393 | } |
5394 | |
5395 | void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { |
5396 | if (!OMPParentLoopDirectiveForScan) |
5397 | return; |
5398 | const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; |
5399 | bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>(); |
5400 | SmallVector<const Expr *, 4> Shareds; |
5401 | SmallVector<const Expr *, 4> Privates; |
5402 | SmallVector<const Expr *, 4> LHSs; |
5403 | SmallVector<const Expr *, 4> RHSs; |
5404 | SmallVector<const Expr *, 4> ReductionOps; |
5405 | SmallVector<const Expr *, 4> CopyOps; |
5406 | SmallVector<const Expr *, 4> CopyArrayTemps; |
5407 | SmallVector<const Expr *, 4> CopyArrayElems; |
5408 | for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) { |
5409 | if (C->getModifier() != OMPC_REDUCTION_inscan) |
5410 | continue; |
5411 | Shareds.append(C->varlist_begin(), C->varlist_end()); |
5412 | Privates.append(C->privates().begin(), C->privates().end()); |
5413 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
5414 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
5415 | ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); |
5416 | CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); |
5417 | CopyArrayTemps.append(C->copy_array_temps().begin(), |
5418 | C->copy_array_temps().end()); |
5419 | CopyArrayElems.append(C->copy_array_elems().begin(), |
5420 | C->copy_array_elems().end()); |
5421 | } |
5422 | if (ParentDir.getDirectiveKind() == OMPD_simd || |
5423 | (getLangOpts().OpenMPSimd && |
5424 | isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) { |
5425 | // For simd directive and simd-based directives in simd only mode, use the |
5426 | // following codegen: |
5427 | // int x = 0; |
5428 | // #pragma omp simd reduction(inscan, +: x) |
5429 | // for (..) { |
5430 | // <first part> |
5431 | // #pragma omp scan inclusive(x) |
5432 | // <second part> |
5433 | // } |
5434 | // is transformed to: |
5435 | // int x = 0; |
5436 | // for (..) { |
5437 | // int x_priv = 0; |
5438 | // <first part> |
5439 | // x = x_priv + x; |
5440 | // x_priv = x; |
5441 | // <second part> |
5442 | // } |
5443 | // and |
5444 | // int x = 0; |
5445 | // #pragma omp simd reduction(inscan, +: x) |
5446 | // for (..) { |
5447 | // <first part> |
5448 | // #pragma omp scan exclusive(x) |
5449 | // <second part> |
5450 | // } |
5451 | // to |
5452 | // int x = 0; |
5453 | // for (..) { |
5454 | // int x_priv = 0; |
5455 | // <second part> |
5456 | // int temp = x; |
5457 | // x = x_priv + x; |
5458 | // x_priv = temp; |
5459 | // <first part> |
5460 | // } |
5461 | llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce" ); |
5462 | EmitBranch(IsInclusive |
5463 | ? OMPScanReduce |
5464 | : BreakContinueStack.back().ContinueBlock.getBlock()); |
5465 | EmitBlock(OMPScanDispatch); |
5466 | { |
5467 | // New scope for correct construction/destruction of temp variables for |
5468 | // exclusive scan. |
5469 | LexicalScope Scope(*this, S.getSourceRange()); |
5470 | EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock); |
5471 | EmitBlock(OMPScanReduce); |
5472 | if (!IsInclusive) { |
5473 | // Create temp var and copy LHS value to this temp value. |
5474 | // TMP = LHS; |
5475 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
5476 | const Expr *PrivateExpr = Privates[I]; |
5477 | const Expr *TempExpr = CopyArrayTemps[I]; |
5478 | EmitAutoVarDecl( |
5479 | *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl())); |
5480 | LValue DestLVal = EmitLValue(TempExpr); |
5481 | LValue SrcLVal = EmitLValue(LHSs[I]); |
5482 | EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), |
5483 | SrcLVal.getAddress(*this), |
5484 | cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), |
5485 | cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), |
5486 | CopyOps[I]); |
5487 | } |
5488 | } |
5489 | CGM.getOpenMPRuntime().emitReduction( |
5490 | *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, |
5491 | {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd}); |
5492 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
5493 | const Expr *PrivateExpr = Privates[I]; |
5494 | LValue DestLVal; |
5495 | LValue SrcLVal; |
5496 | if (IsInclusive) { |
5497 | DestLVal = EmitLValue(RHSs[I]); |
5498 | SrcLVal = EmitLValue(LHSs[I]); |
5499 | } else { |
5500 | const Expr *TempExpr = CopyArrayTemps[I]; |
5501 | DestLVal = EmitLValue(RHSs[I]); |
5502 | SrcLVal = EmitLValue(TempExpr); |
5503 | } |
5504 | EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), |
5505 | SrcLVal.getAddress(*this), |
5506 | cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), |
5507 | cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), |
5508 | CopyOps[I]); |
5509 | } |
5510 | } |
5511 | EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock); |
5512 | OMPScanExitBlock = IsInclusive |
5513 | ? BreakContinueStack.back().ContinueBlock.getBlock() |
5514 | : OMPScanReduce; |
5515 | EmitBlock(OMPAfterScanBlock); |
5516 | return; |
5517 | } |
5518 | if (!IsInclusive) { |
5519 | EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); |
5520 | EmitBlock(OMPScanExitBlock); |
5521 | } |
5522 | if (OMPFirstScanLoop) { |
5523 | // Emit buffer[i] = red; at the end of the input phase. |
5524 | const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) |
5525 | .getIterationVariable() |
5526 | ->IgnoreParenImpCasts(); |
5527 | LValue IdxLVal = EmitLValue(IVExpr); |
5528 | llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); |
5529 | IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); |
5530 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
5531 | const Expr *PrivateExpr = Privates[I]; |
5532 | const Expr *OrigExpr = Shareds[I]; |
5533 | const Expr *CopyArrayElem = CopyArrayElems[I]; |
5534 | OpaqueValueMapping IdxMapping( |
5535 | *this, |
5536 | cast<OpaqueValueExpr>( |
5537 | cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), |
5538 | RValue::get(IdxVal)); |
5539 | LValue DestLVal = EmitLValue(CopyArrayElem); |
5540 | LValue SrcLVal = EmitLValue(OrigExpr); |
5541 | EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), |
5542 | SrcLVal.getAddress(*this), |
5543 | cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), |
5544 | cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), |
5545 | CopyOps[I]); |
5546 | } |
5547 | } |
5548 | EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); |
5549 | if (IsInclusive) { |
5550 | EmitBlock(OMPScanExitBlock); |
5551 | EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); |
5552 | } |
5553 | EmitBlock(OMPScanDispatch); |
5554 | if (!OMPFirstScanLoop) { |
5555 | // Emit red = buffer[i]; at the entrance to the scan phase. |
5556 | const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) |
5557 | .getIterationVariable() |
5558 | ->IgnoreParenImpCasts(); |
5559 | LValue IdxLVal = EmitLValue(IVExpr); |
5560 | llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); |
5561 | IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); |
5562 | llvm::BasicBlock *ExclusiveExitBB = nullptr; |
5563 | if (!IsInclusive) { |
5564 | llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec" ); |
5565 | ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit" ); |
5566 | llvm::Value *Cmp = Builder.CreateIsNull(IdxVal); |
5567 | Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB); |
5568 | EmitBlock(ContBB); |
5569 | // Use idx - 1 iteration for exclusive scan. |
5570 | IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1)); |
5571 | } |
5572 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
5573 | const Expr *PrivateExpr = Privates[I]; |
5574 | const Expr *OrigExpr = Shareds[I]; |
5575 | const Expr *CopyArrayElem = CopyArrayElems[I]; |
5576 | OpaqueValueMapping IdxMapping( |
5577 | *this, |
5578 | cast<OpaqueValueExpr>( |
5579 | cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), |
5580 | RValue::get(IdxVal)); |
5581 | LValue SrcLVal = EmitLValue(CopyArrayElem); |
5582 | LValue DestLVal = EmitLValue(OrigExpr); |
5583 | EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), |
5584 | SrcLVal.getAddress(*this), |
5585 | cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), |
5586 | cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), |
5587 | CopyOps[I]); |
5588 | } |
5589 | if (!IsInclusive) { |
5590 | EmitBlock(ExclusiveExitBB); |
5591 | } |
5592 | } |
5593 | EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock |
5594 | : OMPAfterScanBlock); |
5595 | EmitBlock(OMPAfterScanBlock); |
5596 | } |
5597 | |
5598 | void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, |
5599 | const CodeGenLoopTy &CodeGenLoop, |
5600 | Expr *IncExpr) { |
5601 | // Emit the loop iteration variable. |
5602 | const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); |
5603 | const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl()); |
5604 | EmitVarDecl(*IVDecl); |
5605 | |
5606 | // Emit the iterations count variable. |
5607 | // If it is not a variable, Sema decided to calculate iterations count on each |
5608 | // iteration (e.g., it is foldable into a constant). |
5609 | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { |
5610 | EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); |
5611 | // Emit calculation of the iterations count. |
5612 | EmitIgnoredExpr(S.getCalcLastIteration()); |
5613 | } |
5614 | |
5615 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
5616 | |
5617 | bool HasLastprivateClause = false; |
5618 | // Check pre-condition. |
5619 | { |
5620 | OMPLoopScope PreInitScope(*this, S); |
5621 | // Skip the entire loop if we don't meet the precondition. |
5622 | // If the condition constant folds and can be elided, avoid emitting the |
5623 | // whole loop. |
5624 | bool CondConstant; |
5625 | llvm::BasicBlock *ContBlock = nullptr; |
5626 | if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { |
5627 | if (!CondConstant) |
5628 | return; |
5629 | } else { |
5630 | llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then" ); |
5631 | ContBlock = createBasicBlock("omp.precond.end" ); |
5632 | emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, |
5633 | getProfileCount(&S)); |
5634 | EmitBlock(ThenBlock); |
5635 | incrementProfileCounter(&S); |
5636 | } |
5637 | |
5638 | emitAlignedClause(*this, S); |
5639 | // Emit 'then' code. |
5640 | { |
5641 | // Emit helper vars inits. |
5642 | |
5643 | LValue LB = EmitOMPHelperVar( |
5644 | *this, cast<DeclRefExpr>( |
5645 | (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
5646 | ? S.getCombinedLowerBoundVariable() |
5647 | : S.getLowerBoundVariable()))); |
5648 | LValue UB = EmitOMPHelperVar( |
5649 | *this, cast<DeclRefExpr>( |
5650 | (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
5651 | ? S.getCombinedUpperBoundVariable() |
5652 | : S.getUpperBoundVariable()))); |
5653 | LValue ST = |
5654 | EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); |
5655 | LValue IL = |
5656 | EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); |
5657 | |
5658 | OMPPrivateScope LoopScope(*this); |
5659 | if (EmitOMPFirstprivateClause(S, LoopScope)) { |
5660 | // Emit implicit barrier to synchronize threads and avoid data races |
5661 | // on initialization of firstprivate variables and post-update of |
5662 | // lastprivate variables. |
5663 | CGM.getOpenMPRuntime().emitBarrierCall( |
5664 | *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, |
5665 | /*ForceSimpleCall=*/true); |
5666 | } |
5667 | EmitOMPPrivateClause(S, LoopScope); |
5668 | if (isOpenMPSimdDirective(S.getDirectiveKind()) && |
5669 | !isOpenMPParallelDirective(S.getDirectiveKind()) && |
5670 | !isOpenMPTeamsDirective(S.getDirectiveKind())) |
5671 | EmitOMPReductionClauseInit(S, LoopScope); |
5672 | HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); |
5673 | EmitOMPPrivateLoopCounters(S, LoopScope); |
5674 | (void)LoopScope.Privatize(); |
5675 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
5676 | CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); |
5677 | |
5678 | // Detect the distribute schedule kind and chunk. |
5679 | llvm::Value *Chunk = nullptr; |
5680 | OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; |
5681 | if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) { |
5682 | ScheduleKind = C->getDistScheduleKind(); |
5683 | if (const Expr *Ch = C->getChunkSize()) { |
5684 | Chunk = EmitScalarExpr(Ch); |
5685 | Chunk = EmitScalarConversion(Chunk, Ch->getType(), |
5686 | S.getIterationVariable()->getType(), |
5687 | S.getBeginLoc()); |
5688 | } |
5689 | } else { |
5690 | // Default behaviour for dist_schedule clause. |
5691 | CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( |
5692 | *this, S, ScheduleKind, Chunk); |
5693 | } |
5694 | const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); |
5695 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
5696 | |
5697 | // OpenMP [2.10.8, distribute Construct, Description] |
5698 | // If dist_schedule is specified, kind must be static. If specified, |
5699 | // iterations are divided into chunks of size chunk_size, chunks are |
5700 | // assigned to the teams of the league in a round-robin fashion in the |
5701 | // order of the team number. When no chunk_size is specified, the |
5702 | // iteration space is divided into chunks that are approximately equal |
5703 | // in size, and at most one chunk is distributed to each team of the |
5704 | // league. The size of the chunks is unspecified in this case. |
5705 | bool StaticChunked = |
5706 | RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && |
5707 | isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); |
5708 | if (RT.isStaticNonchunked(ScheduleKind, |
5709 | /* Chunked */ Chunk != nullptr) || |
5710 | StaticChunked) { |
5711 | CGOpenMPRuntime::StaticRTInput StaticInit( |
5712 | IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this), |
5713 | LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), |
5714 | StaticChunked ? Chunk : nullptr); |
5715 | RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, |
5716 | StaticInit); |
5717 | JumpDest LoopExit = |
5718 | getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit" )); |
5719 | // UB = min(UB, GlobalUB); |
5720 | EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
5721 | ? S.getCombinedEnsureUpperBound() |
5722 | : S.getEnsureUpperBound()); |
5723 | // IV = LB; |
5724 | EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
5725 | ? S.getCombinedInit() |
5726 | : S.getInit()); |
5727 | |
5728 | const Expr *Cond = |
5729 | isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
5730 | ? S.getCombinedCond() |
5731 | : S.getCond(); |
5732 | |
5733 | if (StaticChunked) |
5734 | Cond = S.getCombinedDistCond(); |
5735 | |
5736 | // For static unchunked schedules generate: |
5737 | // |
5738 | // 1. For distribute alone, codegen |
5739 | // while (idx <= UB) { |
5740 | // BODY; |
5741 | // ++idx; |
5742 | // } |
5743 | // |
5744 | // 2. When combined with 'for' (e.g. as in 'distribute parallel for') |
5745 | // while (idx <= UB) { |
5746 | // <CodeGen rest of pragma>(LB, UB); |
5747 | // idx += ST; |
5748 | // } |
5749 | // |
5750 | // For static chunk one schedule generate: |
5751 | // |
5752 | // while (IV <= GlobalUB) { |
5753 | // <CodeGen rest of pragma>(LB, UB); |
5754 | // LB += ST; |
5755 | // UB += ST; |
5756 | // UB = min(UB, GlobalUB); |
5757 | // IV = LB; |
5758 | // } |
5759 | // |
5760 | emitCommonSimdLoop( |
5761 | *this, S, |
5762 | [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
5763 | if (isOpenMPSimdDirective(S.getDirectiveKind())) |
5764 | CGF.EmitOMPSimdInit(S); |
5765 | }, |
5766 | [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, |
5767 | StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { |
5768 | CGF.EmitOMPInnerLoop( |
5769 | S, LoopScope.requiresCleanups(), Cond, IncExpr, |
5770 | [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { |
5771 | CodeGenLoop(CGF, S, LoopExit); |
5772 | }, |
5773 | [&S, StaticChunked](CodeGenFunction &CGF) { |
5774 | if (StaticChunked) { |
5775 | CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); |
5776 | CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); |
5777 | CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); |
5778 | CGF.EmitIgnoredExpr(S.getCombinedInit()); |
5779 | } |
5780 | }); |
5781 | }); |
5782 | EmitBlock(LoopExit.getBlock()); |
5783 | // Tell the runtime we are done. |
5784 | RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind()); |
5785 | } else { |
5786 | // Emit the outer loop, which requests its work chunk [LB..UB] from |
5787 | // runtime and runs the inner loop to process it. |
5788 | const OMPLoopArguments LoopArguments = { |
5789 | LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), |
5790 | IL.getAddress(*this), Chunk}; |
5791 | EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, |
5792 | CodeGenLoop); |
5793 | } |
5794 | if (isOpenMPSimdDirective(S.getDirectiveKind())) { |
5795 | EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { |
5796 | return CGF.Builder.CreateIsNotNull( |
5797 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
5798 | }); |
5799 | } |
5800 | if (isOpenMPSimdDirective(S.getDirectiveKind()) && |
5801 | !isOpenMPParallelDirective(S.getDirectiveKind()) && |
5802 | !isOpenMPTeamsDirective(S.getDirectiveKind())) { |
5803 | EmitOMPReductionClauseFinal(S, OMPD_simd); |
5804 | // Emit post-update of the reduction variables if IsLastIter != 0. |
5805 | emitPostUpdateForReductionClause( |
5806 | *this, S, [IL, &S](CodeGenFunction &CGF) { |
5807 | return CGF.Builder.CreateIsNotNull( |
5808 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
5809 | }); |
5810 | } |
5811 | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
5812 | if (HasLastprivateClause) { |
5813 | EmitOMPLastprivateClauseFinal( |
5814 | S, /*NoFinals=*/false, |
5815 | Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); |
5816 | } |
5817 | } |
5818 | |
5819 | // We're now done with the loop, so jump to the continuation block. |
5820 | if (ContBlock) { |
5821 | EmitBranch(ContBlock); |
5822 | EmitBlock(ContBlock, true); |
5823 | } |
5824 | } |
5825 | } |
5826 | |
5827 | void CodeGenFunction::EmitOMPDistributeDirective( |
5828 | const OMPDistributeDirective &S) { |
5829 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
5830 | CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); |
5831 | }; |
5832 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
5833 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); |
5834 | } |
5835 | |
5836 | static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, |
5837 | const CapturedStmt *S, |
5838 | SourceLocation Loc) { |
5839 | CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); |
5840 | CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; |
5841 | CGF.CapturedStmtInfo = &CapStmtInfo; |
5842 | llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc); |
5843 | Fn->setDoesNotRecurse(); |
5844 | return Fn; |
5845 | } |
5846 | |
5847 | template <typename T> |
5848 | static void emitRestoreIP(CodeGenFunction &CGF, const T *C, |
5849 | llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, |
5850 | llvm::OpenMPIRBuilder &OMPBuilder) { |
5851 | |
5852 | unsigned NumLoops = C->getNumLoops(); |
5853 | QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth( |
5854 | /*DestWidth=*/64, /*Signed=*/1); |
5855 | llvm::SmallVector<llvm::Value *> StoreValues; |
5856 | for (unsigned I = 0; I < NumLoops; I++) { |
5857 | const Expr *CounterVal = C->getLoopData(I); |
5858 | assert(CounterVal); |
5859 | llvm::Value *StoreValue = CGF.EmitScalarConversion( |
5860 | CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, |
5861 | CounterVal->getExprLoc()); |
5862 | StoreValues.emplace_back(StoreValue); |
5863 | } |
5864 | OMPDoacrossKind<T> ODK; |
5865 | bool IsDependSource = ODK.isSource(C); |
5866 | CGF.Builder.restoreIP( |
5867 | OMPBuilder.createOrderedDepend(CGF.Builder, AllocaIP, NumLoops, |
5868 | StoreValues, ".cnt.addr" , IsDependSource)); |
5869 | } |
5870 | |
5871 | void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { |
5872 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
5873 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
5874 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
5875 | |
5876 | if (S.hasClausesOfKind<OMPDependClause>() || |
5877 | S.hasClausesOfKind<OMPDoacrossClause>()) { |
5878 | // The ordered directive with depend clause. |
5879 | assert(!S.hasAssociatedStmt() && "No associated statement must be in " |
5880 | "ordered depend|doacross construct." ); |
5881 | InsertPointTy AllocaIP(AllocaInsertPt->getParent(), |
5882 | AllocaInsertPt->getIterator()); |
5883 | for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) |
5884 | emitRestoreIP(*this, DC, AllocaIP, OMPBuilder); |
5885 | for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>()) |
5886 | emitRestoreIP(*this, DC, AllocaIP, OMPBuilder); |
5887 | } else { |
5888 | // The ordered directive with threads or simd clause, or without clause. |
5889 | // Without clause, it behaves as if the threads clause is specified. |
5890 | const auto *C = S.getSingleClause<OMPSIMDClause>(); |
5891 | |
5892 | auto FiniCB = [this](InsertPointTy IP) { |
5893 | OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); |
5894 | }; |
5895 | |
5896 | auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP, |
5897 | InsertPointTy CodeGenIP) { |
5898 | Builder.restoreIP(CodeGenIP); |
5899 | |
5900 | const CapturedStmt *CS = S.getInnermostCapturedStmt(); |
5901 | if (C) { |
5902 | llvm::BasicBlock *FiniBB = splitBBWithSuffix( |
5903 | Builder, /*CreateBranch=*/false, ".ordered.after" ); |
5904 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
5905 | GenerateOpenMPCapturedVars(*CS, CapturedVars); |
5906 | llvm::Function *OutlinedFn = |
5907 | emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); |
5908 | assert(S.getBeginLoc().isValid() && |
5909 | "Outlined function call location must be valid." ); |
5910 | ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc()); |
5911 | OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, *FiniBB, |
5912 | OutlinedFn, CapturedVars); |
5913 | } else { |
5914 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
5915 | *this, CS->getCapturedStmt(), AllocaIP, CodeGenIP, "ordered" ); |
5916 | } |
5917 | }; |
5918 | |
5919 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
5920 | Builder.restoreIP( |
5921 | OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C)); |
5922 | } |
5923 | return; |
5924 | } |
5925 | |
5926 | if (S.hasClausesOfKind<OMPDependClause>()) { |
5927 | assert(!S.hasAssociatedStmt() && |
5928 | "No associated statement must be in ordered depend construct." ); |
5929 | for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) |
5930 | CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); |
5931 | return; |
5932 | } |
5933 | if (S.hasClausesOfKind<OMPDoacrossClause>()) { |
5934 | assert(!S.hasAssociatedStmt() && |
5935 | "No associated statement must be in ordered doacross construct." ); |
5936 | for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>()) |
5937 | CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); |
5938 | return; |
5939 | } |
5940 | const auto *C = S.getSingleClause<OMPSIMDClause>(); |
5941 | auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, |
5942 | PrePostActionTy &Action) { |
5943 | const CapturedStmt *CS = S.getInnermostCapturedStmt(); |
5944 | if (C) { |
5945 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
5946 | CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); |
5947 | llvm::Function *OutlinedFn = |
5948 | emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); |
5949 | CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), |
5950 | OutlinedFn, CapturedVars); |
5951 | } else { |
5952 | Action.Enter(CGF); |
5953 | CGF.EmitStmt(CS->getCapturedStmt()); |
5954 | } |
5955 | }; |
5956 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
5957 | CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C); |
5958 | } |
5959 | |
5960 | static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, |
5961 | QualType SrcType, QualType DestType, |
5962 | SourceLocation Loc) { |
5963 | assert(CGF.hasScalarEvaluationKind(DestType) && |
5964 | "DestType must have scalar evaluation kind." ); |
5965 | assert(!Val.isAggregate() && "Must be a scalar or complex." ); |
5966 | return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, |
5967 | DestType, Loc) |
5968 | : CGF.EmitComplexToScalarConversion( |
5969 | Val.getComplexVal(), SrcType, DestType, Loc); |
5970 | } |
5971 | |
5972 | static CodeGenFunction::ComplexPairTy |
5973 | convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, |
5974 | QualType DestType, SourceLocation Loc) { |
5975 | assert(CGF.getEvaluationKind(DestType) == TEK_Complex && |
5976 | "DestType must have complex evaluation kind." ); |
5977 | CodeGenFunction::ComplexPairTy ComplexVal; |
5978 | if (Val.isScalar()) { |
5979 | // Convert the input element to the element type of the complex. |
5980 | QualType DestElementType = |
5981 | DestType->castAs<ComplexType>()->getElementType(); |
5982 | llvm::Value *ScalarVal = CGF.EmitScalarConversion( |
5983 | Val.getScalarVal(), SrcType, DestElementType, Loc); |
5984 | ComplexVal = CodeGenFunction::ComplexPairTy( |
5985 | ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); |
5986 | } else { |
5987 | assert(Val.isComplex() && "Must be a scalar or complex." ); |
5988 | QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); |
5989 | QualType DestElementType = |
5990 | DestType->castAs<ComplexType>()->getElementType(); |
5991 | ComplexVal.first = CGF.EmitScalarConversion( |
5992 | Val.getComplexVal().first, SrcElementType, DestElementType, Loc); |
5993 | ComplexVal.second = CGF.EmitScalarConversion( |
5994 | Val.getComplexVal().second, SrcElementType, DestElementType, Loc); |
5995 | } |
5996 | return ComplexVal; |
5997 | } |
5998 | |
5999 | static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO, |
6000 | LValue LVal, RValue RVal) { |
6001 | if (LVal.isGlobalReg()) |
6002 | CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); |
6003 | else |
6004 | CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false); |
6005 | } |
6006 | |
6007 | static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF, |
6008 | llvm::AtomicOrdering AO, LValue LVal, |
6009 | SourceLocation Loc) { |
6010 | if (LVal.isGlobalReg()) |
6011 | return CGF.EmitLoadOfLValue(LVal, Loc); |
6012 | return CGF.EmitAtomicLoad( |
6013 | LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO), |
6014 | LVal.isVolatile()); |
6015 | } |
6016 | |
6017 | void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, |
6018 | QualType RValTy, SourceLocation Loc) { |
6019 | switch (getEvaluationKind(LVal.getType())) { |
6020 | case TEK_Scalar: |
6021 | EmitStoreThroughLValue(RValue::get(convertToScalarValue( |
6022 | *this, RVal, RValTy, LVal.getType(), Loc)), |
6023 | LVal); |
6024 | break; |
6025 | case TEK_Complex: |
6026 | EmitStoreOfComplex( |
6027 | convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, |
6028 | /*isInit=*/false); |
6029 | break; |
6030 | case TEK_Aggregate: |
6031 | llvm_unreachable("Must be a scalar or complex." ); |
6032 | } |
6033 | } |
6034 | |
6035 | static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, |
6036 | const Expr *X, const Expr *V, |
6037 | SourceLocation Loc) { |
6038 | // v = x; |
6039 | assert(V->isLValue() && "V of 'omp atomic read' is not lvalue" ); |
6040 | assert(X->isLValue() && "X of 'omp atomic read' is not lvalue" ); |
6041 | LValue XLValue = CGF.EmitLValue(X); |
6042 | LValue VLValue = CGF.EmitLValue(V); |
6043 | RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc); |
6044 | // OpenMP, 2.17.7, atomic Construct |
6045 | // If the read or capture clause is specified and the acquire, acq_rel, or |
6046 | // seq_cst clause is specified then the strong flush on exit from the atomic |
6047 | // operation is also an acquire flush. |
6048 | switch (AO) { |
6049 | case llvm::AtomicOrdering::Acquire: |
6050 | case llvm::AtomicOrdering::AcquireRelease: |
6051 | case llvm::AtomicOrdering::SequentiallyConsistent: |
6052 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, |
6053 | llvm::AtomicOrdering::Acquire); |
6054 | break; |
6055 | case llvm::AtomicOrdering::Monotonic: |
6056 | case llvm::AtomicOrdering::Release: |
6057 | break; |
6058 | case llvm::AtomicOrdering::NotAtomic: |
6059 | case llvm::AtomicOrdering::Unordered: |
6060 | llvm_unreachable("Unexpected ordering." ); |
6061 | } |
6062 | CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); |
6063 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); |
6064 | } |
6065 | |
6066 | static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, |
6067 | llvm::AtomicOrdering AO, const Expr *X, |
6068 | const Expr *E, SourceLocation Loc) { |
6069 | // x = expr; |
6070 | assert(X->isLValue() && "X of 'omp atomic write' is not lvalue" ); |
6071 | emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); |
6072 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); |
6073 | // OpenMP, 2.17.7, atomic Construct |
6074 | // If the write, update, or capture clause is specified and the release, |
6075 | // acq_rel, or seq_cst clause is specified then the strong flush on entry to |
6076 | // the atomic operation is also a release flush. |
6077 | switch (AO) { |
6078 | case llvm::AtomicOrdering::Release: |
6079 | case llvm::AtomicOrdering::AcquireRelease: |
6080 | case llvm::AtomicOrdering::SequentiallyConsistent: |
6081 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, |
6082 | llvm::AtomicOrdering::Release); |
6083 | break; |
6084 | case llvm::AtomicOrdering::Acquire: |
6085 | case llvm::AtomicOrdering::Monotonic: |
6086 | break; |
6087 | case llvm::AtomicOrdering::NotAtomic: |
6088 | case llvm::AtomicOrdering::Unordered: |
6089 | llvm_unreachable("Unexpected ordering." ); |
6090 | } |
6091 | } |
6092 | |
6093 | static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, |
6094 | RValue Update, |
6095 | BinaryOperatorKind BO, |
6096 | llvm::AtomicOrdering AO, |
6097 | bool IsXLHSInRHSPart) { |
6098 | ASTContext &Context = CGF.getContext(); |
6099 | // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' |
6100 | // expression is simple and atomic is allowed for the given type for the |
6101 | // target platform. |
6102 | if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() || |
6103 | (!isa<llvm::ConstantInt>(Update.getScalarVal()) && |
6104 | (Update.getScalarVal()->getType() != |
6105 | X.getAddress(CGF).getElementType())) || |
6106 | !Context.getTargetInfo().hasBuiltinAtomic( |
6107 | Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) |
6108 | return std::make_pair(false, RValue::get(nullptr)); |
6109 | |
6110 | auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) { |
6111 | if (T->isIntegerTy()) |
6112 | return true; |
6113 | |
6114 | if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub)) |
6115 | return llvm::isPowerOf2_64(CGF.CGM.getDataLayout().getTypeStoreSize(T)); |
6116 | |
6117 | return false; |
6118 | }; |
6119 | |
6120 | if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) || |
6121 | !CheckAtomicSupport(X.getAddress(CGF).getElementType(), BO)) |
6122 | return std::make_pair(false, RValue::get(nullptr)); |
6123 | |
6124 | bool IsInteger = X.getAddress(CGF).getElementType()->isIntegerTy(); |
6125 | llvm::AtomicRMWInst::BinOp RMWOp; |
6126 | switch (BO) { |
6127 | case BO_Add: |
6128 | RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd; |
6129 | break; |
6130 | case BO_Sub: |
6131 | if (!IsXLHSInRHSPart) |
6132 | return std::make_pair(false, RValue::get(nullptr)); |
6133 | RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub; |
6134 | break; |
6135 | case BO_And: |
6136 | RMWOp = llvm::AtomicRMWInst::And; |
6137 | break; |
6138 | case BO_Or: |
6139 | RMWOp = llvm::AtomicRMWInst::Or; |
6140 | break; |
6141 | case BO_Xor: |
6142 | RMWOp = llvm::AtomicRMWInst::Xor; |
6143 | break; |
6144 | case BO_LT: |
6145 | if (IsInteger) |
6146 | RMWOp = X.getType()->hasSignedIntegerRepresentation() |
6147 | ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min |
6148 | : llvm::AtomicRMWInst::Max) |
6149 | : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin |
6150 | : llvm::AtomicRMWInst::UMax); |
6151 | else |
6152 | RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin |
6153 | : llvm::AtomicRMWInst::FMax; |
6154 | break; |
6155 | case BO_GT: |
6156 | if (IsInteger) |
6157 | RMWOp = X.getType()->hasSignedIntegerRepresentation() |
6158 | ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max |
6159 | : llvm::AtomicRMWInst::Min) |
6160 | : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax |
6161 | : llvm::AtomicRMWInst::UMin); |
6162 | else |
6163 | RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax |
6164 | : llvm::AtomicRMWInst::FMin; |
6165 | break; |
6166 | case BO_Assign: |
6167 | RMWOp = llvm::AtomicRMWInst::Xchg; |
6168 | break; |
6169 | case BO_Mul: |
6170 | case BO_Div: |
6171 | case BO_Rem: |
6172 | case BO_Shl: |
6173 | case BO_Shr: |
6174 | case BO_LAnd: |
6175 | case BO_LOr: |
6176 | return std::make_pair(false, RValue::get(nullptr)); |
6177 | case BO_PtrMemD: |
6178 | case BO_PtrMemI: |
6179 | case BO_LE: |
6180 | case BO_GE: |
6181 | case BO_EQ: |
6182 | case BO_NE: |
6183 | case BO_Cmp: |
6184 | case BO_AddAssign: |
6185 | case BO_SubAssign: |
6186 | case BO_AndAssign: |
6187 | case BO_OrAssign: |
6188 | case BO_XorAssign: |
6189 | case BO_MulAssign: |
6190 | case BO_DivAssign: |
6191 | case BO_RemAssign: |
6192 | case BO_ShlAssign: |
6193 | case BO_ShrAssign: |
6194 | case BO_Comma: |
6195 | llvm_unreachable("Unsupported atomic update operation" ); |
6196 | } |
6197 | llvm::Value *UpdateVal = Update.getScalarVal(); |
6198 | if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { |
6199 | if (IsInteger) |
6200 | UpdateVal = CGF.Builder.CreateIntCast( |
6201 | IC, X.getAddress(CGF).getElementType(), |
6202 | X.getType()->hasSignedIntegerRepresentation()); |
6203 | else |
6204 | UpdateVal = CGF.Builder.CreateCast(llvm::Instruction::CastOps::UIToFP, IC, |
6205 | X.getAddress(CGF).getElementType()); |
6206 | } |
6207 | llvm::Value *Res = |
6208 | CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO); |
6209 | return std::make_pair(true, RValue::get(Res)); |
6210 | } |
6211 | |
6212 | std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( |
6213 | LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, |
6214 | llvm::AtomicOrdering AO, SourceLocation Loc, |
6215 | const llvm::function_ref<RValue(RValue)> CommonGen) { |
6216 | // Update expressions are allowed to have the following forms: |
6217 | // x binop= expr; -> xrval + expr; |
6218 | // x++, ++x -> xrval + 1; |
6219 | // x--, --x -> xrval - 1; |
6220 | // x = x binop expr; -> xrval binop expr |
6221 | // x = expr Op x; - > expr binop xrval; |
6222 | auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart); |
6223 | if (!Res.first) { |
6224 | if (X.isGlobalReg()) { |
6225 | // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop |
6226 | // 'xrval'. |
6227 | EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); |
6228 | } else { |
6229 | // Perform compare-and-swap procedure. |
6230 | EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); |
6231 | } |
6232 | } |
6233 | return Res; |
6234 | } |
6235 | |
6236 | static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, |
6237 | llvm::AtomicOrdering AO, const Expr *X, |
6238 | const Expr *E, const Expr *UE, |
6239 | bool IsXLHSInRHSPart, SourceLocation Loc) { |
6240 | assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && |
6241 | "Update expr in 'atomic update' must be a binary operator." ); |
6242 | const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); |
6243 | // Update expressions are allowed to have the following forms: |
6244 | // x binop= expr; -> xrval + expr; |
6245 | // x++, ++x -> xrval + 1; |
6246 | // x--, --x -> xrval - 1; |
6247 | // x = x binop expr; -> xrval binop expr |
6248 | // x = expr Op x; - > expr binop xrval; |
6249 | assert(X->isLValue() && "X of 'omp atomic update' is not lvalue" ); |
6250 | LValue XLValue = CGF.EmitLValue(X); |
6251 | RValue ExprRValue = CGF.EmitAnyExpr(E); |
6252 | const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); |
6253 | const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); |
6254 | const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; |
6255 | const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; |
6256 | auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) { |
6257 | CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); |
6258 | CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); |
6259 | return CGF.EmitAnyExpr(UE); |
6260 | }; |
6261 | (void)CGF.EmitOMPAtomicSimpleUpdateExpr( |
6262 | XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); |
6263 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); |
6264 | // OpenMP, 2.17.7, atomic Construct |
6265 | // If the write, update, or capture clause is specified and the release, |
6266 | // acq_rel, or seq_cst clause is specified then the strong flush on entry to |
6267 | // the atomic operation is also a release flush. |
6268 | switch (AO) { |
6269 | case llvm::AtomicOrdering::Release: |
6270 | case llvm::AtomicOrdering::AcquireRelease: |
6271 | case llvm::AtomicOrdering::SequentiallyConsistent: |
6272 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, |
6273 | llvm::AtomicOrdering::Release); |
6274 | break; |
6275 | case llvm::AtomicOrdering::Acquire: |
6276 | case llvm::AtomicOrdering::Monotonic: |
6277 | break; |
6278 | case llvm::AtomicOrdering::NotAtomic: |
6279 | case llvm::AtomicOrdering::Unordered: |
6280 | llvm_unreachable("Unexpected ordering." ); |
6281 | } |
6282 | } |
6283 | |
6284 | static RValue convertToType(CodeGenFunction &CGF, RValue Value, |
6285 | QualType SourceType, QualType ResType, |
6286 | SourceLocation Loc) { |
6287 | switch (CGF.getEvaluationKind(ResType)) { |
6288 | case TEK_Scalar: |
6289 | return RValue::get( |
6290 | convertToScalarValue(CGF, Value, SourceType, ResType, Loc)); |
6291 | case TEK_Complex: { |
6292 | auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc); |
6293 | return RValue::getComplex(Res.first, Res.second); |
6294 | } |
6295 | case TEK_Aggregate: |
6296 | break; |
6297 | } |
6298 | llvm_unreachable("Must be a scalar or complex." ); |
6299 | } |
6300 | |
6301 | static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, |
6302 | llvm::AtomicOrdering AO, |
6303 | bool IsPostfixUpdate, const Expr *V, |
6304 | const Expr *X, const Expr *E, |
6305 | const Expr *UE, bool IsXLHSInRHSPart, |
6306 | SourceLocation Loc) { |
6307 | assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue" ); |
6308 | assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue" ); |
6309 | RValue NewVVal; |
6310 | LValue VLValue = CGF.EmitLValue(V); |
6311 | LValue XLValue = CGF.EmitLValue(X); |
6312 | RValue ExprRValue = CGF.EmitAnyExpr(E); |
6313 | QualType NewVValType; |
6314 | if (UE) { |
6315 | // 'x' is updated with some additional value. |
6316 | assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && |
6317 | "Update expr in 'atomic capture' must be a binary operator." ); |
6318 | const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); |
6319 | // Update expressions are allowed to have the following forms: |
6320 | // x binop= expr; -> xrval + expr; |
6321 | // x++, ++x -> xrval + 1; |
6322 | // x--, --x -> xrval - 1; |
6323 | // x = x binop expr; -> xrval binop expr |
6324 | // x = expr Op x; - > expr binop xrval; |
6325 | const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); |
6326 | const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); |
6327 | const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; |
6328 | NewVValType = XRValExpr->getType(); |
6329 | const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; |
6330 | auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, |
6331 | IsPostfixUpdate](RValue XRValue) { |
6332 | CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); |
6333 | CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); |
6334 | RValue Res = CGF.EmitAnyExpr(UE); |
6335 | NewVVal = IsPostfixUpdate ? XRValue : Res; |
6336 | return Res; |
6337 | }; |
6338 | auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( |
6339 | XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); |
6340 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); |
6341 | if (Res.first) { |
6342 | // 'atomicrmw' instruction was generated. |
6343 | if (IsPostfixUpdate) { |
6344 | // Use old value from 'atomicrmw'. |
6345 | NewVVal = Res.second; |
6346 | } else { |
6347 | // 'atomicrmw' does not provide new value, so evaluate it using old |
6348 | // value of 'x'. |
6349 | CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); |
6350 | CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); |
6351 | NewVVal = CGF.EmitAnyExpr(UE); |
6352 | } |
6353 | } |
6354 | } else { |
6355 | // 'x' is simply rewritten with some 'expr'. |
6356 | NewVValType = X->getType().getNonReferenceType(); |
6357 | ExprRValue = convertToType(CGF, ExprRValue, E->getType(), |
6358 | X->getType().getNonReferenceType(), Loc); |
6359 | auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) { |
6360 | NewVVal = XRValue; |
6361 | return ExprRValue; |
6362 | }; |
6363 | // Try to perform atomicrmw xchg, otherwise simple exchange. |
6364 | auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( |
6365 | XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, |
6366 | Loc, Gen); |
6367 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); |
6368 | if (Res.first) { |
6369 | // 'atomicrmw' instruction was generated. |
6370 | NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; |
6371 | } |
6372 | } |
6373 | // Emit post-update store to 'v' of old/new 'x' value. |
6374 | CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); |
6375 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); |
6376 | // OpenMP 5.1 removes the required flush for capture clause. |
6377 | if (CGF.CGM.getLangOpts().OpenMP < 51) { |
6378 | // OpenMP, 2.17.7, atomic Construct |
6379 | // If the write, update, or capture clause is specified and the release, |
6380 | // acq_rel, or seq_cst clause is specified then the strong flush on entry to |
6381 | // the atomic operation is also a release flush. |
6382 | // If the read or capture clause is specified and the acquire, acq_rel, or |
6383 | // seq_cst clause is specified then the strong flush on exit from the atomic |
6384 | // operation is also an acquire flush. |
6385 | switch (AO) { |
6386 | case llvm::AtomicOrdering::Release: |
6387 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, |
6388 | llvm::AtomicOrdering::Release); |
6389 | break; |
6390 | case llvm::AtomicOrdering::Acquire: |
6391 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc, |
6392 | llvm::AtomicOrdering::Acquire); |
6393 | break; |
6394 | case llvm::AtomicOrdering::AcquireRelease: |
6395 | case llvm::AtomicOrdering::SequentiallyConsistent: |
6396 | CGF.CGM.getOpenMPRuntime().emitFlush( |
6397 | CGF, std::nullopt, Loc, llvm::AtomicOrdering::AcquireRelease); |
6398 | break; |
6399 | case llvm::AtomicOrdering::Monotonic: |
6400 | break; |
6401 | case llvm::AtomicOrdering::NotAtomic: |
6402 | case llvm::AtomicOrdering::Unordered: |
6403 | llvm_unreachable("Unexpected ordering." ); |
6404 | } |
6405 | } |
6406 | } |
6407 | |
6408 | static void emitOMPAtomicCompareExpr(CodeGenFunction &CGF, |
6409 | llvm::AtomicOrdering AO, const Expr *X, |
6410 | const Expr *V, const Expr *R, |
6411 | const Expr *E, const Expr *D, |
6412 | const Expr *CE, bool IsXBinopExpr, |
6413 | bool IsPostfixUpdate, bool IsFailOnly, |
6414 | SourceLocation Loc) { |
6415 | llvm::OpenMPIRBuilder &OMPBuilder = |
6416 | CGF.CGM.getOpenMPRuntime().getOMPBuilder(); |
6417 | |
6418 | OMPAtomicCompareOp Op; |
6419 | assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator" ); |
6420 | switch (cast<BinaryOperator>(CE)->getOpcode()) { |
6421 | case BO_EQ: |
6422 | Op = OMPAtomicCompareOp::EQ; |
6423 | break; |
6424 | case BO_LT: |
6425 | Op = OMPAtomicCompareOp::MIN; |
6426 | break; |
6427 | case BO_GT: |
6428 | Op = OMPAtomicCompareOp::MAX; |
6429 | break; |
6430 | default: |
6431 | llvm_unreachable("unsupported atomic compare binary operator" ); |
6432 | } |
6433 | |
6434 | LValue XLVal = CGF.EmitLValue(X); |
6435 | Address XAddr = XLVal.getAddress(CGF); |
6436 | |
6437 | auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) { |
6438 | if (X->getType() == E->getType()) |
6439 | return CGF.EmitScalarExpr(E); |
6440 | const Expr *NewE = E->IgnoreImplicitAsWritten(); |
6441 | llvm::Value *V = CGF.EmitScalarExpr(NewE); |
6442 | if (NewE->getType() == X->getType()) |
6443 | return V; |
6444 | return CGF.EmitScalarConversion(V, NewE->getType(), X->getType(), Loc); |
6445 | }; |
6446 | |
6447 | llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E); |
6448 | llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr; |
6449 | if (auto *CI = dyn_cast<llvm::ConstantInt>(EVal)) |
6450 | EVal = CGF.Builder.CreateIntCast( |
6451 | CI, XLVal.getAddress(CGF).getElementType(), |
6452 | E->getType()->hasSignedIntegerRepresentation()); |
6453 | if (DVal) |
6454 | if (auto *CI = dyn_cast<llvm::ConstantInt>(DVal)) |
6455 | DVal = CGF.Builder.CreateIntCast( |
6456 | CI, XLVal.getAddress(CGF).getElementType(), |
6457 | D->getType()->hasSignedIntegerRepresentation()); |
6458 | |
6459 | llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{ |
6460 | XAddr.getPointer(), XAddr.getElementType(), |
6461 | X->getType()->hasSignedIntegerRepresentation(), |
6462 | X->getType().isVolatileQualified()}; |
6463 | llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal; |
6464 | if (V) { |
6465 | LValue LV = CGF.EmitLValue(V); |
6466 | Address Addr = LV.getAddress(CGF); |
6467 | VOpVal = {Addr.getPointer(), Addr.getElementType(), |
6468 | V->getType()->hasSignedIntegerRepresentation(), |
6469 | V->getType().isVolatileQualified()}; |
6470 | } |
6471 | if (R) { |
6472 | LValue LV = CGF.EmitLValue(R); |
6473 | Address Addr = LV.getAddress(CGF); |
6474 | ROpVal = {Addr.getPointer(), Addr.getElementType(), |
6475 | R->getType()->hasSignedIntegerRepresentation(), |
6476 | R->getType().isVolatileQualified()}; |
6477 | } |
6478 | |
6479 | CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare( |
6480 | CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr, |
6481 | IsPostfixUpdate, IsFailOnly)); |
6482 | } |
6483 | |
6484 | static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, |
6485 | llvm::AtomicOrdering AO, bool IsPostfixUpdate, |
6486 | const Expr *X, const Expr *V, const Expr *R, |
6487 | const Expr *E, const Expr *UE, const Expr *D, |
6488 | const Expr *CE, bool IsXLHSInRHSPart, |
6489 | bool IsFailOnly, SourceLocation Loc) { |
6490 | switch (Kind) { |
6491 | case OMPC_read: |
6492 | emitOMPAtomicReadExpr(CGF, AO, X, V, Loc); |
6493 | break; |
6494 | case OMPC_write: |
6495 | emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc); |
6496 | break; |
6497 | case OMPC_unknown: |
6498 | case OMPC_update: |
6499 | emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc); |
6500 | break; |
6501 | case OMPC_capture: |
6502 | emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE, |
6503 | IsXLHSInRHSPart, Loc); |
6504 | break; |
6505 | case OMPC_compare: { |
6506 | emitOMPAtomicCompareExpr(CGF, AO, X, V, R, E, D, CE, IsXLHSInRHSPart, |
6507 | IsPostfixUpdate, IsFailOnly, Loc); |
6508 | break; |
6509 | } |
6510 | default: |
6511 | llvm_unreachable("Clause is not allowed in 'omp atomic'." ); |
6512 | } |
6513 | } |
6514 | |
6515 | void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { |
6516 | llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic; |
6517 | bool MemOrderingSpecified = false; |
6518 | if (S.getSingleClause<OMPSeqCstClause>()) { |
6519 | AO = llvm::AtomicOrdering::SequentiallyConsistent; |
6520 | MemOrderingSpecified = true; |
6521 | } else if (S.getSingleClause<OMPAcqRelClause>()) { |
6522 | AO = llvm::AtomicOrdering::AcquireRelease; |
6523 | MemOrderingSpecified = true; |
6524 | } else if (S.getSingleClause<OMPAcquireClause>()) { |
6525 | AO = llvm::AtomicOrdering::Acquire; |
6526 | MemOrderingSpecified = true; |
6527 | } else if (S.getSingleClause<OMPReleaseClause>()) { |
6528 | AO = llvm::AtomicOrdering::Release; |
6529 | MemOrderingSpecified = true; |
6530 | } else if (S.getSingleClause<OMPRelaxedClause>()) { |
6531 | AO = llvm::AtomicOrdering::Monotonic; |
6532 | MemOrderingSpecified = true; |
6533 | } |
6534 | llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered; |
6535 | OpenMPClauseKind Kind = OMPC_unknown; |
6536 | for (const OMPClause *C : S.clauses()) { |
6537 | // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause, |
6538 | // if it is first). |
6539 | OpenMPClauseKind K = C->getClauseKind(); |
6540 | if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire || |
6541 | K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint) |
6542 | continue; |
6543 | Kind = K; |
6544 | KindsEncountered.insert(K); |
6545 | } |
6546 | // We just need to correct Kind here. No need to set a bool saying it is |
6547 | // actually compare capture because we can tell from whether V and R are |
6548 | // nullptr. |
6549 | if (KindsEncountered.contains(OMPC_compare) && |
6550 | KindsEncountered.contains(OMPC_capture)) |
6551 | Kind = OMPC_compare; |
6552 | if (!MemOrderingSpecified) { |
6553 | llvm::AtomicOrdering DefaultOrder = |
6554 | CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); |
6555 | if (DefaultOrder == llvm::AtomicOrdering::Monotonic || |
6556 | DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent || |
6557 | (DefaultOrder == llvm::AtomicOrdering::AcquireRelease && |
6558 | Kind == OMPC_capture)) { |
6559 | AO = DefaultOrder; |
6560 | } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) { |
6561 | if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) { |
6562 | AO = llvm::AtomicOrdering::Release; |
6563 | } else if (Kind == OMPC_read) { |
6564 | assert(Kind == OMPC_read && "Unexpected atomic kind." ); |
6565 | AO = llvm::AtomicOrdering::Acquire; |
6566 | } |
6567 | } |
6568 | } |
6569 | |
6570 | LexicalScope Scope(*this, S.getSourceRange()); |
6571 | EmitStopPoint(S.getAssociatedStmt()); |
6572 | emitOMPAtomicExpr(*this, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(), |
6573 | S.getR(), S.getExpr(), S.getUpdateExpr(), S.getD(), |
6574 | S.getCondExpr(), S.isXLHSInRHSPart(), S.isFailOnly(), |
6575 | S.getBeginLoc()); |
6576 | } |
6577 | |
6578 | static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, |
6579 | const OMPExecutableDirective &S, |
6580 | const RegionCodeGenTy &CodeGen) { |
6581 | assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); |
6582 | CodeGenModule &CGM = CGF.CGM; |
6583 | |
6584 | // On device emit this construct as inlined code. |
6585 | if (CGM.getLangOpts().OpenMPIsTargetDevice) { |
6586 | OMPLexicalScope Scope(CGF, S, OMPD_target); |
6587 | CGM.getOpenMPRuntime().emitInlinedDirective( |
6588 | CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6589 | CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); |
6590 | }); |
6591 | return; |
6592 | } |
6593 | |
6594 | auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); |
6595 | llvm::Function *Fn = nullptr; |
6596 | llvm::Constant *FnID = nullptr; |
6597 | |
6598 | const Expr *IfCond = nullptr; |
6599 | // Check for the at most one if clause associated with the target region. |
6600 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
6601 | if (C->getNameModifier() == OMPD_unknown || |
6602 | C->getNameModifier() == OMPD_target) { |
6603 | IfCond = C->getCondition(); |
6604 | break; |
6605 | } |
6606 | } |
6607 | |
6608 | // Check if we have any device clause associated with the directive. |
6609 | llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device( |
6610 | nullptr, OMPC_DEVICE_unknown); |
6611 | if (auto *C = S.getSingleClause<OMPDeviceClause>()) |
6612 | Device.setPointerAndInt(C->getDevice(), C->getModifier()); |
6613 | |
6614 | // Check if we have an if clause whose conditional always evaluates to false |
6615 | // or if we do not have any targets specified. If so the target region is not |
6616 | // an offload entry point. |
6617 | bool IsOffloadEntry = true; |
6618 | if (IfCond) { |
6619 | bool Val; |
6620 | if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) |
6621 | IsOffloadEntry = false; |
6622 | } |
6623 | if (CGM.getLangOpts().OMPTargetTriples.empty()) |
6624 | IsOffloadEntry = false; |
6625 | |
6626 | if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) { |
6627 | unsigned DiagID = CGM.getDiags().getCustomDiagID( |
6628 | DiagnosticsEngine::Error, |
6629 | "No offloading entry generated while offloading is mandatory." ); |
6630 | CGM.getDiags().Report(DiagID); |
6631 | } |
6632 | |
6633 | assert(CGF.CurFuncDecl && "No parent declaration for target region!" ); |
6634 | StringRef ParentName; |
6635 | // In case we have Ctors/Dtors we use the complete type variant to produce |
6636 | // the mangling of the device outlined kernel. |
6637 | if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) |
6638 | ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); |
6639 | else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) |
6640 | ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); |
6641 | else |
6642 | ParentName = |
6643 | CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); |
6644 | |
6645 | // Emit target region as a standalone region. |
6646 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, |
6647 | IsOffloadEntry, CodeGen); |
6648 | OMPLexicalScope Scope(CGF, S, OMPD_task); |
6649 | auto &&SizeEmitter = |
6650 | [IsOffloadEntry](CodeGenFunction &CGF, |
6651 | const OMPLoopDirective &D) -> llvm::Value * { |
6652 | if (IsOffloadEntry) { |
6653 | OMPLoopScope(CGF, D); |
6654 | // Emit calculation of the iterations count. |
6655 | llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); |
6656 | NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, |
6657 | /*isSigned=*/false); |
6658 | return NumIterations; |
6659 | } |
6660 | return nullptr; |
6661 | }; |
6662 | CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, |
6663 | SizeEmitter); |
6664 | } |
6665 | |
6666 | static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, |
6667 | PrePostActionTy &Action) { |
6668 | Action.Enter(CGF); |
6669 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
6670 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
6671 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
6672 | (void)PrivateScope.Privatize(); |
6673 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
6674 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); |
6675 | |
6676 | CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); |
6677 | CGF.EnsureInsertPoint(); |
6678 | } |
6679 | |
6680 | void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, |
6681 | StringRef ParentName, |
6682 | const OMPTargetDirective &S) { |
6683 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6684 | emitTargetRegion(CGF, S, Action); |
6685 | }; |
6686 | llvm::Function *Fn; |
6687 | llvm::Constant *Addr; |
6688 | // Emit target region as a standalone region. |
6689 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
6690 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
6691 | assert(Fn && Addr && "Target device function emission failed." ); |
6692 | } |
6693 | |
6694 | void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { |
6695 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6696 | emitTargetRegion(CGF, S, Action); |
6697 | }; |
6698 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
6699 | } |
6700 | |
6701 | static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, |
6702 | const OMPExecutableDirective &S, |
6703 | OpenMPDirectiveKind InnermostKind, |
6704 | const RegionCodeGenTy &CodeGen) { |
6705 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); |
6706 | llvm::Function *OutlinedFn = |
6707 | CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( |
6708 | CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind, |
6709 | CodeGen); |
6710 | |
6711 | const auto *NT = S.getSingleClause<OMPNumTeamsClause>(); |
6712 | const auto *TL = S.getSingleClause<OMPThreadLimitClause>(); |
6713 | if (NT || TL) { |
6714 | const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr; |
6715 | const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr; |
6716 | |
6717 | CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, |
6718 | S.getBeginLoc()); |
6719 | } |
6720 | |
6721 | OMPTeamsScope Scope(CGF, S); |
6722 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
6723 | CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); |
6724 | CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, |
6725 | CapturedVars); |
6726 | } |
6727 | |
6728 | void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { |
6729 | // Emit teams region as a standalone region. |
6730 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6731 | Action.Enter(CGF); |
6732 | OMPPrivateScope PrivateScope(CGF); |
6733 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
6734 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
6735 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6736 | (void)PrivateScope.Privatize(); |
6737 | CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); |
6738 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6739 | }; |
6740 | emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); |
6741 | emitPostUpdateForReductionClause(*this, S, |
6742 | [](CodeGenFunction &) { return nullptr; }); |
6743 | } |
6744 | |
6745 | static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, |
6746 | const OMPTargetTeamsDirective &S) { |
6747 | auto *CS = S.getCapturedStmt(OMPD_teams); |
6748 | Action.Enter(CGF); |
6749 | // Emit teams region as a standalone region. |
6750 | auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6751 | Action.Enter(CGF); |
6752 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
6753 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
6754 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
6755 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6756 | (void)PrivateScope.Privatize(); |
6757 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
6758 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); |
6759 | CGF.EmitStmt(CS->getCapturedStmt()); |
6760 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6761 | }; |
6762 | emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); |
6763 | emitPostUpdateForReductionClause(CGF, S, |
6764 | [](CodeGenFunction &) { return nullptr; }); |
6765 | } |
6766 | |
6767 | void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( |
6768 | CodeGenModule &CGM, StringRef ParentName, |
6769 | const OMPTargetTeamsDirective &S) { |
6770 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6771 | emitTargetTeamsRegion(CGF, Action, S); |
6772 | }; |
6773 | llvm::Function *Fn; |
6774 | llvm::Constant *Addr; |
6775 | // Emit target region as a standalone region. |
6776 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
6777 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
6778 | assert(Fn && Addr && "Target device function emission failed." ); |
6779 | } |
6780 | |
6781 | void CodeGenFunction::EmitOMPTargetTeamsDirective( |
6782 | const OMPTargetTeamsDirective &S) { |
6783 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6784 | emitTargetTeamsRegion(CGF, Action, S); |
6785 | }; |
6786 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
6787 | } |
6788 | |
6789 | static void |
6790 | emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, |
6791 | const OMPTargetTeamsDistributeDirective &S) { |
6792 | Action.Enter(CGF); |
6793 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6794 | CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); |
6795 | }; |
6796 | |
6797 | // Emit teams region as a standalone region. |
6798 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
6799 | PrePostActionTy &Action) { |
6800 | Action.Enter(CGF); |
6801 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
6802 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6803 | (void)PrivateScope.Privatize(); |
6804 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, |
6805 | CodeGenDistribute); |
6806 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6807 | }; |
6808 | emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); |
6809 | emitPostUpdateForReductionClause(CGF, S, |
6810 | [](CodeGenFunction &) { return nullptr; }); |
6811 | } |
6812 | |
6813 | void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( |
6814 | CodeGenModule &CGM, StringRef ParentName, |
6815 | const OMPTargetTeamsDistributeDirective &S) { |
6816 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6817 | emitTargetTeamsDistributeRegion(CGF, Action, S); |
6818 | }; |
6819 | llvm::Function *Fn; |
6820 | llvm::Constant *Addr; |
6821 | // Emit target region as a standalone region. |
6822 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
6823 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
6824 | assert(Fn && Addr && "Target device function emission failed." ); |
6825 | } |
6826 | |
6827 | void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( |
6828 | const OMPTargetTeamsDistributeDirective &S) { |
6829 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6830 | emitTargetTeamsDistributeRegion(CGF, Action, S); |
6831 | }; |
6832 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
6833 | } |
6834 | |
6835 | static void emitTargetTeamsDistributeSimdRegion( |
6836 | CodeGenFunction &CGF, PrePostActionTy &Action, |
6837 | const OMPTargetTeamsDistributeSimdDirective &S) { |
6838 | Action.Enter(CGF); |
6839 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6840 | CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); |
6841 | }; |
6842 | |
6843 | // Emit teams region as a standalone region. |
6844 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
6845 | PrePostActionTy &Action) { |
6846 | Action.Enter(CGF); |
6847 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
6848 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6849 | (void)PrivateScope.Privatize(); |
6850 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, |
6851 | CodeGenDistribute); |
6852 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6853 | }; |
6854 | emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); |
6855 | emitPostUpdateForReductionClause(CGF, S, |
6856 | [](CodeGenFunction &) { return nullptr; }); |
6857 | } |
6858 | |
6859 | void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( |
6860 | CodeGenModule &CGM, StringRef ParentName, |
6861 | const OMPTargetTeamsDistributeSimdDirective &S) { |
6862 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6863 | emitTargetTeamsDistributeSimdRegion(CGF, Action, S); |
6864 | }; |
6865 | llvm::Function *Fn; |
6866 | llvm::Constant *Addr; |
6867 | // Emit target region as a standalone region. |
6868 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
6869 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
6870 | assert(Fn && Addr && "Target device function emission failed." ); |
6871 | } |
6872 | |
6873 | void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( |
6874 | const OMPTargetTeamsDistributeSimdDirective &S) { |
6875 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6876 | emitTargetTeamsDistributeSimdRegion(CGF, Action, S); |
6877 | }; |
6878 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
6879 | } |
6880 | |
6881 | void CodeGenFunction::EmitOMPTeamsDistributeDirective( |
6882 | const OMPTeamsDistributeDirective &S) { |
6883 | |
6884 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6885 | CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); |
6886 | }; |
6887 | |
6888 | // Emit teams region as a standalone region. |
6889 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
6890 | PrePostActionTy &Action) { |
6891 | Action.Enter(CGF); |
6892 | OMPPrivateScope PrivateScope(CGF); |
6893 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6894 | (void)PrivateScope.Privatize(); |
6895 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, |
6896 | CodeGenDistribute); |
6897 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6898 | }; |
6899 | emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); |
6900 | emitPostUpdateForReductionClause(*this, S, |
6901 | [](CodeGenFunction &) { return nullptr; }); |
6902 | } |
6903 | |
6904 | void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( |
6905 | const OMPTeamsDistributeSimdDirective &S) { |
6906 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6907 | CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); |
6908 | }; |
6909 | |
6910 | // Emit teams region as a standalone region. |
6911 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
6912 | PrePostActionTy &Action) { |
6913 | Action.Enter(CGF); |
6914 | OMPPrivateScope PrivateScope(CGF); |
6915 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6916 | (void)PrivateScope.Privatize(); |
6917 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, |
6918 | CodeGenDistribute); |
6919 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6920 | }; |
6921 | emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); |
6922 | emitPostUpdateForReductionClause(*this, S, |
6923 | [](CodeGenFunction &) { return nullptr; }); |
6924 | } |
6925 | |
6926 | void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( |
6927 | const OMPTeamsDistributeParallelForDirective &S) { |
6928 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6929 | CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, |
6930 | S.getDistInc()); |
6931 | }; |
6932 | |
6933 | // Emit teams region as a standalone region. |
6934 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
6935 | PrePostActionTy &Action) { |
6936 | Action.Enter(CGF); |
6937 | OMPPrivateScope PrivateScope(CGF); |
6938 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6939 | (void)PrivateScope.Privatize(); |
6940 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, |
6941 | CodeGenDistribute); |
6942 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6943 | }; |
6944 | emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); |
6945 | emitPostUpdateForReductionClause(*this, S, |
6946 | [](CodeGenFunction &) { return nullptr; }); |
6947 | } |
6948 | |
6949 | void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( |
6950 | const OMPTeamsDistributeParallelForSimdDirective &S) { |
6951 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6952 | CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, |
6953 | S.getDistInc()); |
6954 | }; |
6955 | |
6956 | // Emit teams region as a standalone region. |
6957 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
6958 | PrePostActionTy &Action) { |
6959 | Action.Enter(CGF); |
6960 | OMPPrivateScope PrivateScope(CGF); |
6961 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6962 | (void)PrivateScope.Privatize(); |
6963 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
6964 | CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); |
6965 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6966 | }; |
6967 | emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd, |
6968 | CodeGen); |
6969 | emitPostUpdateForReductionClause(*this, S, |
6970 | [](CodeGenFunction &) { return nullptr; }); |
6971 | } |
6972 | |
6973 | void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) { |
6974 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
6975 | llvm::Value *Device = nullptr; |
6976 | llvm::Value *NumDependences = nullptr; |
6977 | llvm::Value *DependenceList = nullptr; |
6978 | |
6979 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
6980 | Device = EmitScalarExpr(C->getDevice()); |
6981 | |
6982 | // Build list and emit dependences |
6983 | OMPTaskDataTy Data; |
6984 | buildDependences(S, Data); |
6985 | if (!Data.Dependences.empty()) { |
6986 | Address DependenciesArray = Address::invalid(); |
6987 | std::tie(NumDependences, DependenciesArray) = |
6988 | CGM.getOpenMPRuntime().emitDependClause(*this, Data.Dependences, |
6989 | S.getBeginLoc()); |
6990 | DependenceList = DependenciesArray.getPointer(); |
6991 | } |
6992 | Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>(); |
6993 | |
6994 | assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() || |
6995 | S.getSingleClause<OMPDestroyClause>() || |
6996 | S.getSingleClause<OMPUseClause>())) && |
6997 | "OMPNowaitClause clause is used separately in OMPInteropDirective." ); |
6998 | |
6999 | if (const auto *C = S.getSingleClause<OMPInitClause>()) { |
7000 | llvm::Value *InteropvarPtr = |
7001 | EmitLValue(C->getInteropVar()).getPointer(*this); |
7002 | llvm::omp::OMPInteropType InteropType = llvm::omp::OMPInteropType::Unknown; |
7003 | if (C->getIsTarget()) { |
7004 | InteropType = llvm::omp::OMPInteropType::Target; |
7005 | } else { |
7006 | assert(C->getIsTargetSync() && "Expected interop-type target/targetsync" ); |
7007 | InteropType = llvm::omp::OMPInteropType::TargetSync; |
7008 | } |
7009 | OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType, Device, |
7010 | NumDependences, DependenceList, |
7011 | Data.HasNowaitClause); |
7012 | } else if (const auto *C = S.getSingleClause<OMPDestroyClause>()) { |
7013 | llvm::Value *InteropvarPtr = |
7014 | EmitLValue(C->getInteropVar()).getPointer(*this); |
7015 | OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device, |
7016 | NumDependences, DependenceList, |
7017 | Data.HasNowaitClause); |
7018 | } else if (const auto *C = S.getSingleClause<OMPUseClause>()) { |
7019 | llvm::Value *InteropvarPtr = |
7020 | EmitLValue(C->getInteropVar()).getPointer(*this); |
7021 | OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device, |
7022 | NumDependences, DependenceList, |
7023 | Data.HasNowaitClause); |
7024 | } |
7025 | } |
7026 | |
7027 | static void emitTargetTeamsDistributeParallelForRegion( |
7028 | CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, |
7029 | PrePostActionTy &Action) { |
7030 | Action.Enter(CGF); |
7031 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7032 | CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, |
7033 | S.getDistInc()); |
7034 | }; |
7035 | |
7036 | // Emit teams region as a standalone region. |
7037 | auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7038 | PrePostActionTy &Action) { |
7039 | Action.Enter(CGF); |
7040 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
7041 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
7042 | (void)PrivateScope.Privatize(); |
7043 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
7044 | CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); |
7045 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
7046 | }; |
7047 | |
7048 | emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, |
7049 | CodeGenTeams); |
7050 | emitPostUpdateForReductionClause(CGF, S, |
7051 | [](CodeGenFunction &) { return nullptr; }); |
7052 | } |
7053 | |
7054 | void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( |
7055 | CodeGenModule &CGM, StringRef ParentName, |
7056 | const OMPTargetTeamsDistributeParallelForDirective &S) { |
7057 | // Emit SPMD target teams distribute parallel for region as a standalone |
7058 | // region. |
7059 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7060 | emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); |
7061 | }; |
7062 | llvm::Function *Fn; |
7063 | llvm::Constant *Addr; |
7064 | // Emit target region as a standalone region. |
7065 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7066 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
7067 | assert(Fn && Addr && "Target device function emission failed." ); |
7068 | } |
7069 | |
7070 | void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( |
7071 | const OMPTargetTeamsDistributeParallelForDirective &S) { |
7072 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7073 | emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); |
7074 | }; |
7075 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
7076 | } |
7077 | |
7078 | static void emitTargetTeamsDistributeParallelForSimdRegion( |
7079 | CodeGenFunction &CGF, |
7080 | const OMPTargetTeamsDistributeParallelForSimdDirective &S, |
7081 | PrePostActionTy &Action) { |
7082 | Action.Enter(CGF); |
7083 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7084 | CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, |
7085 | S.getDistInc()); |
7086 | }; |
7087 | |
7088 | // Emit teams region as a standalone region. |
7089 | auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7090 | PrePostActionTy &Action) { |
7091 | Action.Enter(CGF); |
7092 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
7093 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
7094 | (void)PrivateScope.Privatize(); |
7095 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
7096 | CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); |
7097 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
7098 | }; |
7099 | |
7100 | emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd, |
7101 | CodeGenTeams); |
7102 | emitPostUpdateForReductionClause(CGF, S, |
7103 | [](CodeGenFunction &) { return nullptr; }); |
7104 | } |
7105 | |
7106 | void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( |
7107 | CodeGenModule &CGM, StringRef ParentName, |
7108 | const OMPTargetTeamsDistributeParallelForSimdDirective &S) { |
7109 | // Emit SPMD target teams distribute parallel for simd region as a standalone |
7110 | // region. |
7111 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7112 | emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); |
7113 | }; |
7114 | llvm::Function *Fn; |
7115 | llvm::Constant *Addr; |
7116 | // Emit target region as a standalone region. |
7117 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7118 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
7119 | assert(Fn && Addr && "Target device function emission failed." ); |
7120 | } |
7121 | |
7122 | void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( |
7123 | const OMPTargetTeamsDistributeParallelForSimdDirective &S) { |
7124 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7125 | emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); |
7126 | }; |
7127 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
7128 | } |
7129 | |
7130 | void CodeGenFunction::EmitOMPCancellationPointDirective( |
7131 | const OMPCancellationPointDirective &S) { |
7132 | CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(), |
7133 | S.getCancelRegion()); |
7134 | } |
7135 | |
7136 | void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { |
7137 | const Expr *IfCond = nullptr; |
7138 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
7139 | if (C->getNameModifier() == OMPD_unknown || |
7140 | C->getNameModifier() == OMPD_cancel) { |
7141 | IfCond = C->getCondition(); |
7142 | break; |
7143 | } |
7144 | } |
7145 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
7146 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
7147 | // TODO: This check is necessary as we only generate `omp parallel` through |
7148 | // the OpenMPIRBuilder for now. |
7149 | if (S.getCancelRegion() == OMPD_parallel || |
7150 | S.getCancelRegion() == OMPD_sections || |
7151 | S.getCancelRegion() == OMPD_section) { |
7152 | llvm::Value *IfCondition = nullptr; |
7153 | if (IfCond) |
7154 | IfCondition = EmitScalarExpr(IfCond, |
7155 | /*IgnoreResultAssign=*/true); |
7156 | return Builder.restoreIP( |
7157 | OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion())); |
7158 | } |
7159 | } |
7160 | |
7161 | CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, |
7162 | S.getCancelRegion()); |
7163 | } |
7164 | |
7165 | CodeGenFunction::JumpDest |
7166 | CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { |
7167 | if (Kind == OMPD_parallel || Kind == OMPD_task || |
7168 | Kind == OMPD_target_parallel || Kind == OMPD_taskloop || |
7169 | Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop) |
7170 | return ReturnBlock; |
7171 | assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || |
7172 | Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || |
7173 | Kind == OMPD_distribute_parallel_for || |
7174 | Kind == OMPD_target_parallel_for || |
7175 | Kind == OMPD_teams_distribute_parallel_for || |
7176 | Kind == OMPD_target_teams_distribute_parallel_for); |
7177 | return OMPCancelStack.getExitBlock(); |
7178 | } |
7179 | |
7180 | void CodeGenFunction::EmitOMPUseDevicePtrClause( |
7181 | const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope, |
7182 | const llvm::DenseMap<const ValueDecl *, llvm::Value *> |
7183 | CaptureDeviceAddrMap) { |
7184 | llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; |
7185 | for (const Expr *OrigVarIt : C.varlists()) { |
7186 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(OrigVarIt)->getDecl()); |
7187 | if (!Processed.insert(OrigVD).second) |
7188 | continue; |
7189 | |
7190 | // In order to identify the right initializer we need to match the |
7191 | // declaration used by the mapping logic. In some cases we may get |
7192 | // OMPCapturedExprDecl that refers to the original declaration. |
7193 | const ValueDecl *MatchingVD = OrigVD; |
7194 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { |
7195 | // OMPCapturedExprDecl are used to privative fields of the current |
7196 | // structure. |
7197 | const auto *ME = cast<MemberExpr>(OED->getInit()); |
7198 | assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) && |
7199 | "Base should be the current struct!" ); |
7200 | MatchingVD = ME->getMemberDecl(); |
7201 | } |
7202 | |
7203 | // If we don't have information about the current list item, move on to |
7204 | // the next one. |
7205 | auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); |
7206 | if (InitAddrIt == CaptureDeviceAddrMap.end()) |
7207 | continue; |
7208 | |
7209 | llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType()); |
7210 | |
7211 | // Return the address of the private variable. |
7212 | bool IsRegistered = PrivateScope.addPrivate( |
7213 | OrigVD, |
7214 | Address(InitAddrIt->second, Ty, |
7215 | getContext().getTypeAlignInChars(getContext().VoidPtrTy))); |
7216 | assert(IsRegistered && "firstprivate var already registered as private" ); |
7217 | // Silence the warning about unused variable. |
7218 | (void)IsRegistered; |
7219 | } |
7220 | } |
7221 | |
7222 | static const VarDecl *getBaseDecl(const Expr *Ref) { |
7223 | const Expr *Base = Ref->IgnoreParenImpCasts(); |
7224 | while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base)) |
7225 | Base = OASE->getBase()->IgnoreParenImpCasts(); |
7226 | while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base)) |
7227 | Base = ASE->getBase()->IgnoreParenImpCasts(); |
7228 | return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl()); |
7229 | } |
7230 | |
7231 | void CodeGenFunction::EmitOMPUseDeviceAddrClause( |
7232 | const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, |
7233 | const llvm::DenseMap<const ValueDecl *, llvm::Value *> |
7234 | CaptureDeviceAddrMap) { |
7235 | llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; |
7236 | for (const Expr *Ref : C.varlists()) { |
7237 | const VarDecl *OrigVD = getBaseDecl(Ref); |
7238 | if (!Processed.insert(OrigVD).second) |
7239 | continue; |
7240 | // In order to identify the right initializer we need to match the |
7241 | // declaration used by the mapping logic. In some cases we may get |
7242 | // OMPCapturedExprDecl that refers to the original declaration. |
7243 | const ValueDecl *MatchingVD = OrigVD; |
7244 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { |
7245 | // OMPCapturedExprDecl are used to privative fields of the current |
7246 | // structure. |
7247 | const auto *ME = cast<MemberExpr>(OED->getInit()); |
7248 | assert(isa<CXXThisExpr>(ME->getBase()) && |
7249 | "Base should be the current struct!" ); |
7250 | MatchingVD = ME->getMemberDecl(); |
7251 | } |
7252 | |
7253 | // If we don't have information about the current list item, move on to |
7254 | // the next one. |
7255 | auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); |
7256 | if (InitAddrIt == CaptureDeviceAddrMap.end()) |
7257 | continue; |
7258 | |
7259 | llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType()); |
7260 | |
7261 | Address PrivAddr = |
7262 | Address(InitAddrIt->second, Ty, |
7263 | getContext().getTypeAlignInChars(getContext().VoidPtrTy)); |
7264 | // For declrefs and variable length array need to load the pointer for |
7265 | // correct mapping, since the pointer to the data was passed to the runtime. |
7266 | if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) || |
7267 | MatchingVD->getType()->isArrayType()) { |
7268 | QualType PtrTy = getContext().getPointerType( |
7269 | OrigVD->getType().getNonReferenceType()); |
7270 | PrivAddr = |
7271 | EmitLoadOfPointer(PrivAddr.withElementType(ConvertTypeForMem(PtrTy)), |
7272 | PtrTy->castAs<PointerType>()); |
7273 | } |
7274 | |
7275 | (void)PrivateScope.addPrivate(OrigVD, PrivAddr); |
7276 | } |
7277 | } |
7278 | |
7279 | // Generate the instructions for '#pragma omp target data' directive. |
7280 | void CodeGenFunction::EmitOMPTargetDataDirective( |
7281 | const OMPTargetDataDirective &S) { |
7282 | CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true, |
7283 | /*SeparateBeginEndCalls=*/true); |
7284 | |
7285 | // Create a pre/post action to signal the privatization of the device pointer. |
7286 | // This action can be replaced by the OpenMP runtime code generation to |
7287 | // deactivate privatization. |
7288 | bool PrivatizeDevicePointers = false; |
7289 | class DevicePointerPrivActionTy : public PrePostActionTy { |
7290 | bool &PrivatizeDevicePointers; |
7291 | |
7292 | public: |
7293 | explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) |
7294 | : PrivatizeDevicePointers(PrivatizeDevicePointers) {} |
7295 | void Enter(CodeGenFunction &CGF) override { |
7296 | PrivatizeDevicePointers = true; |
7297 | } |
7298 | }; |
7299 | DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); |
7300 | |
7301 | auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7302 | auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7303 | CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); |
7304 | }; |
7305 | |
7306 | // Codegen that selects whether to generate the privatization code or not. |
7307 | auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7308 | RegionCodeGenTy RCG(InnermostCodeGen); |
7309 | PrivatizeDevicePointers = false; |
7310 | |
7311 | // Call the pre-action to change the status of PrivatizeDevicePointers if |
7312 | // needed. |
7313 | Action.Enter(CGF); |
7314 | |
7315 | if (PrivatizeDevicePointers) { |
7316 | OMPPrivateScope PrivateScope(CGF); |
7317 | // Emit all instances of the use_device_ptr clause. |
7318 | for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) |
7319 | CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, |
7320 | Info.CaptureDeviceAddrMap); |
7321 | for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) |
7322 | CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope, |
7323 | Info.CaptureDeviceAddrMap); |
7324 | (void)PrivateScope.Privatize(); |
7325 | RCG(CGF); |
7326 | } else { |
7327 | // If we don't have target devices, don't bother emitting the data |
7328 | // mapping code. |
7329 | std::optional<OpenMPDirectiveKind> CaptureRegion; |
7330 | if (CGM.getLangOpts().OMPTargetTriples.empty()) { |
7331 | // Emit helper decls of the use_device_ptr/use_device_addr clauses. |
7332 | for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) |
7333 | for (const Expr *E : C->varlists()) { |
7334 | const Decl *D = cast<DeclRefExpr>(E)->getDecl(); |
7335 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) |
7336 | CGF.EmitVarDecl(*OED); |
7337 | } |
7338 | for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) |
7339 | for (const Expr *E : C->varlists()) { |
7340 | const Decl *D = getBaseDecl(E); |
7341 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) |
7342 | CGF.EmitVarDecl(*OED); |
7343 | } |
7344 | } else { |
7345 | CaptureRegion = OMPD_unknown; |
7346 | } |
7347 | |
7348 | OMPLexicalScope Scope(CGF, S, CaptureRegion); |
7349 | RCG(CGF); |
7350 | } |
7351 | }; |
7352 | |
7353 | // Forward the provided action to the privatization codegen. |
7354 | RegionCodeGenTy PrivRCG(PrivCodeGen); |
7355 | PrivRCG.setAction(Action); |
7356 | |
7357 | // Notwithstanding the body of the region is emitted as inlined directive, |
7358 | // we don't use an inline scope as changes in the references inside the |
7359 | // region are expected to be visible outside, so we do not privative them. |
7360 | OMPLexicalScope Scope(CGF, S); |
7361 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, |
7362 | PrivRCG); |
7363 | }; |
7364 | |
7365 | RegionCodeGenTy RCG(CodeGen); |
7366 | |
7367 | // If we don't have target devices, don't bother emitting the data mapping |
7368 | // code. |
7369 | if (CGM.getLangOpts().OMPTargetTriples.empty()) { |
7370 | RCG(*this); |
7371 | return; |
7372 | } |
7373 | |
7374 | // Check if we have any if clause associated with the directive. |
7375 | const Expr *IfCond = nullptr; |
7376 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
7377 | IfCond = C->getCondition(); |
7378 | |
7379 | // Check if we have any device clause associated with the directive. |
7380 | const Expr *Device = nullptr; |
7381 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7382 | Device = C->getDevice(); |
7383 | |
7384 | // Set the action to signal privatization of device pointers. |
7385 | RCG.setAction(PrivAction); |
7386 | |
7387 | // Emit region code. |
7388 | CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, |
7389 | Info); |
7390 | } |
7391 | |
7392 | void CodeGenFunction::EmitOMPTargetEnterDataDirective( |
7393 | const OMPTargetEnterDataDirective &S) { |
7394 | // If we don't have target devices, don't bother emitting the data mapping |
7395 | // code. |
7396 | if (CGM.getLangOpts().OMPTargetTriples.empty()) |
7397 | return; |
7398 | |
7399 | // Check if we have any if clause associated with the directive. |
7400 | const Expr *IfCond = nullptr; |
7401 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
7402 | IfCond = C->getCondition(); |
7403 | |
7404 | // Check if we have any device clause associated with the directive. |
7405 | const Expr *Device = nullptr; |
7406 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7407 | Device = C->getDevice(); |
7408 | |
7409 | OMPLexicalScope Scope(*this, S, OMPD_task); |
7410 | CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); |
7411 | } |
7412 | |
7413 | void CodeGenFunction::EmitOMPTargetExitDataDirective( |
7414 | const OMPTargetExitDataDirective &S) { |
7415 | // If we don't have target devices, don't bother emitting the data mapping |
7416 | // code. |
7417 | if (CGM.getLangOpts().OMPTargetTriples.empty()) |
7418 | return; |
7419 | |
7420 | // Check if we have any if clause associated with the directive. |
7421 | const Expr *IfCond = nullptr; |
7422 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
7423 | IfCond = C->getCondition(); |
7424 | |
7425 | // Check if we have any device clause associated with the directive. |
7426 | const Expr *Device = nullptr; |
7427 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7428 | Device = C->getDevice(); |
7429 | |
7430 | OMPLexicalScope Scope(*this, S, OMPD_task); |
7431 | CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); |
7432 | } |
7433 | |
7434 | static void emitTargetParallelRegion(CodeGenFunction &CGF, |
7435 | const OMPTargetParallelDirective &S, |
7436 | PrePostActionTy &Action) { |
7437 | // Get the captured statement associated with the 'parallel' region. |
7438 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); |
7439 | Action.Enter(CGF); |
7440 | auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7441 | Action.Enter(CGF); |
7442 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
7443 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
7444 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
7445 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
7446 | (void)PrivateScope.Privatize(); |
7447 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
7448 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); |
7449 | // TODO: Add support for clauses. |
7450 | CGF.EmitStmt(CS->getCapturedStmt()); |
7451 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); |
7452 | }; |
7453 | emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, |
7454 | emitEmptyBoundParameters); |
7455 | emitPostUpdateForReductionClause(CGF, S, |
7456 | [](CodeGenFunction &) { return nullptr; }); |
7457 | } |
7458 | |
7459 | void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( |
7460 | CodeGenModule &CGM, StringRef ParentName, |
7461 | const OMPTargetParallelDirective &S) { |
7462 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7463 | emitTargetParallelRegion(CGF, S, Action); |
7464 | }; |
7465 | llvm::Function *Fn; |
7466 | llvm::Constant *Addr; |
7467 | // Emit target region as a standalone region. |
7468 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7469 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
7470 | assert(Fn && Addr && "Target device function emission failed." ); |
7471 | } |
7472 | |
7473 | void CodeGenFunction::EmitOMPTargetParallelDirective( |
7474 | const OMPTargetParallelDirective &S) { |
7475 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7476 | emitTargetParallelRegion(CGF, S, Action); |
7477 | }; |
7478 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
7479 | } |
7480 | |
7481 | static void emitTargetParallelForRegion(CodeGenFunction &CGF, |
7482 | const OMPTargetParallelForDirective &S, |
7483 | PrePostActionTy &Action) { |
7484 | Action.Enter(CGF); |
7485 | // Emit directive as a combined directive that consists of two implicit |
7486 | // directives: 'parallel' with 'for' directive. |
7487 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7488 | Action.Enter(CGF); |
7489 | CodeGenFunction::OMPCancelStackRAII CancelRegion( |
7490 | CGF, OMPD_target_parallel_for, S.hasCancel()); |
7491 | CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, |
7492 | emitDispatchForLoopBounds); |
7493 | }; |
7494 | emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, |
7495 | emitEmptyBoundParameters); |
7496 | } |
7497 | |
7498 | void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( |
7499 | CodeGenModule &CGM, StringRef ParentName, |
7500 | const OMPTargetParallelForDirective &S) { |
7501 | // Emit SPMD target parallel for region as a standalone region. |
7502 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7503 | emitTargetParallelForRegion(CGF, S, Action); |
7504 | }; |
7505 | llvm::Function *Fn; |
7506 | llvm::Constant *Addr; |
7507 | // Emit target region as a standalone region. |
7508 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7509 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
7510 | assert(Fn && Addr && "Target device function emission failed." ); |
7511 | } |
7512 | |
7513 | void CodeGenFunction::EmitOMPTargetParallelForDirective( |
7514 | const OMPTargetParallelForDirective &S) { |
7515 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7516 | emitTargetParallelForRegion(CGF, S, Action); |
7517 | }; |
7518 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
7519 | } |
7520 | |
7521 | static void |
7522 | emitTargetParallelForSimdRegion(CodeGenFunction &CGF, |
7523 | const OMPTargetParallelForSimdDirective &S, |
7524 | PrePostActionTy &Action) { |
7525 | Action.Enter(CGF); |
7526 | // Emit directive as a combined directive that consists of two implicit |
7527 | // directives: 'parallel' with 'for' directive. |
7528 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7529 | Action.Enter(CGF); |
7530 | CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, |
7531 | emitDispatchForLoopBounds); |
7532 | }; |
7533 | emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, |
7534 | emitEmptyBoundParameters); |
7535 | } |
7536 | |
7537 | void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( |
7538 | CodeGenModule &CGM, StringRef ParentName, |
7539 | const OMPTargetParallelForSimdDirective &S) { |
7540 | // Emit SPMD target parallel for region as a standalone region. |
7541 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7542 | emitTargetParallelForSimdRegion(CGF, S, Action); |
7543 | }; |
7544 | llvm::Function *Fn; |
7545 | llvm::Constant *Addr; |
7546 | // Emit target region as a standalone region. |
7547 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7548 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
7549 | assert(Fn && Addr && "Target device function emission failed." ); |
7550 | } |
7551 | |
7552 | void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( |
7553 | const OMPTargetParallelForSimdDirective &S) { |
7554 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7555 | emitTargetParallelForSimdRegion(CGF, S, Action); |
7556 | }; |
7557 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
7558 | } |
7559 | |
7560 | /// Emit a helper variable and return corresponding lvalue. |
7561 | static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, |
7562 | const ImplicitParamDecl *PVD, |
7563 | CodeGenFunction::OMPPrivateScope &Privates) { |
7564 | const auto *VDecl = cast<VarDecl>(Helper->getDecl()); |
7565 | Privates.addPrivate(VDecl, CGF.GetAddrOfLocalVar(PVD)); |
7566 | } |
7567 | |
7568 | void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { |
7569 | assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); |
7570 | // Emit outlined function for task construct. |
7571 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); |
7572 | Address CapturedStruct = Address::invalid(); |
7573 | { |
7574 | OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); |
7575 | CapturedStruct = GenerateCapturedStmtArgument(*CS); |
7576 | } |
7577 | QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); |
7578 | const Expr *IfCond = nullptr; |
7579 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
7580 | if (C->getNameModifier() == OMPD_unknown || |
7581 | C->getNameModifier() == OMPD_taskloop) { |
7582 | IfCond = C->getCondition(); |
7583 | break; |
7584 | } |
7585 | } |
7586 | |
7587 | OMPTaskDataTy Data; |
7588 | // Check if taskloop must be emitted without taskgroup. |
7589 | Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); |
7590 | // TODO: Check if we should emit tied or untied task. |
7591 | Data.Tied = true; |
7592 | // Set scheduling for taskloop |
7593 | if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) { |
7594 | // grainsize clause |
7595 | Data.Schedule.setInt(/*IntVal=*/false); |
7596 | Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize())); |
7597 | } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) { |
7598 | // num_tasks clause |
7599 | Data.Schedule.setInt(/*IntVal=*/true); |
7600 | Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks())); |
7601 | } |
7602 | |
7603 | auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { |
7604 | // if (PreCond) { |
7605 | // for (IV in 0..LastIteration) BODY; |
7606 | // <Final counter/linear vars updates>; |
7607 | // } |
7608 | // |
7609 | |
7610 | // Emit: if (PreCond) - begin. |
7611 | // If the condition constant folds and can be elided, avoid emitting the |
7612 | // whole loop. |
7613 | bool CondConstant; |
7614 | llvm::BasicBlock *ContBlock = nullptr; |
7615 | OMPLoopScope PreInitScope(CGF, S); |
7616 | if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { |
7617 | if (!CondConstant) |
7618 | return; |
7619 | } else { |
7620 | llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then" ); |
7621 | ContBlock = CGF.createBasicBlock("taskloop.if.end" ); |
7622 | emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, |
7623 | CGF.getProfileCount(&S)); |
7624 | CGF.EmitBlock(ThenBlock); |
7625 | CGF.incrementProfileCounter(&S); |
7626 | } |
7627 | |
7628 | (void)CGF.EmitOMPLinearClauseInit(S); |
7629 | |
7630 | OMPPrivateScope LoopScope(CGF); |
7631 | // Emit helper vars inits. |
7632 | enum { LowerBound = 5, UpperBound, Stride, LastIter }; |
7633 | auto *I = CS->getCapturedDecl()->param_begin(); |
7634 | auto *LBP = std::next(I, LowerBound); |
7635 | auto *UBP = std::next(I, UpperBound); |
7636 | auto *STP = std::next(I, Stride); |
7637 | auto *LIP = std::next(I, LastIter); |
7638 | mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, |
7639 | LoopScope); |
7640 | mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, |
7641 | LoopScope); |
7642 | mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); |
7643 | mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, |
7644 | LoopScope); |
7645 | CGF.EmitOMPPrivateLoopCounters(S, LoopScope); |
7646 | CGF.EmitOMPLinearClause(S, LoopScope); |
7647 | bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); |
7648 | (void)LoopScope.Privatize(); |
7649 | // Emit the loop iteration variable. |
7650 | const Expr *IVExpr = S.getIterationVariable(); |
7651 | const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); |
7652 | CGF.EmitVarDecl(*IVDecl); |
7653 | CGF.EmitIgnoredExpr(S.getInit()); |
7654 | |
7655 | // Emit the iterations count variable. |
7656 | // If it is not a variable, Sema decided to calculate iterations count on |
7657 | // each iteration (e.g., it is foldable into a constant). |
7658 | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { |
7659 | CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); |
7660 | // Emit calculation of the iterations count. |
7661 | CGF.EmitIgnoredExpr(S.getCalcLastIteration()); |
7662 | } |
7663 | |
7664 | { |
7665 | OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); |
7666 | emitCommonSimdLoop( |
7667 | CGF, S, |
7668 | [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7669 | if (isOpenMPSimdDirective(S.getDirectiveKind())) |
7670 | CGF.EmitOMPSimdInit(S); |
7671 | }, |
7672 | [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
7673 | CGF.EmitOMPInnerLoop( |
7674 | S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), |
7675 | [&S](CodeGenFunction &CGF) { |
7676 | emitOMPLoopBodyWithStopPoint(CGF, S, |
7677 | CodeGenFunction::JumpDest()); |
7678 | }, |
7679 | [](CodeGenFunction &) {}); |
7680 | }); |
7681 | } |
7682 | // Emit: if (PreCond) - end. |
7683 | if (ContBlock) { |
7684 | CGF.EmitBranch(ContBlock); |
7685 | CGF.EmitBlock(ContBlock, true); |
7686 | } |
7687 | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
7688 | if (HasLastprivateClause) { |
7689 | CGF.EmitOMPLastprivateClauseFinal( |
7690 | S, isOpenMPSimdDirective(S.getDirectiveKind()), |
7691 | CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( |
7692 | CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, |
7693 | (*LIP)->getType(), S.getBeginLoc()))); |
7694 | } |
7695 | LoopScope.restoreMap(); |
7696 | CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) { |
7697 | return CGF.Builder.CreateIsNotNull( |
7698 | CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, |
7699 | (*LIP)->getType(), S.getBeginLoc())); |
7700 | }); |
7701 | }; |
7702 | auto &&TaskGen = [&S, SharedsTy, CapturedStruct, |
7703 | IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, |
7704 | const OMPTaskDataTy &Data) { |
7705 | auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, |
7706 | &Data](CodeGenFunction &CGF, PrePostActionTy &) { |
7707 | OMPLoopScope PreInitScope(CGF, S); |
7708 | CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S, |
7709 | OutlinedFn, SharedsTy, |
7710 | CapturedStruct, IfCond, Data); |
7711 | }; |
7712 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, |
7713 | CodeGen); |
7714 | }; |
7715 | if (Data.Nogroup) { |
7716 | EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); |
7717 | } else { |
7718 | CGM.getOpenMPRuntime().emitTaskgroupRegion( |
7719 | *this, |
7720 | [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, |
7721 | PrePostActionTy &Action) { |
7722 | Action.Enter(CGF); |
7723 | CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, |
7724 | Data); |
7725 | }, |
7726 | S.getBeginLoc()); |
7727 | } |
7728 | } |
7729 | |
7730 | void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { |
7731 | auto LPCRegion = |
7732 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
7733 | EmitOMPTaskLoopBasedDirective(S); |
7734 | } |
7735 | |
7736 | void CodeGenFunction::EmitOMPTaskLoopSimdDirective( |
7737 | const OMPTaskLoopSimdDirective &S) { |
7738 | auto LPCRegion = |
7739 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
7740 | OMPLexicalScope Scope(*this, S); |
7741 | EmitOMPTaskLoopBasedDirective(S); |
7742 | } |
7743 | |
7744 | void CodeGenFunction::EmitOMPMasterTaskLoopDirective( |
7745 | const OMPMasterTaskLoopDirective &S) { |
7746 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7747 | Action.Enter(CGF); |
7748 | EmitOMPTaskLoopBasedDirective(S); |
7749 | }; |
7750 | auto LPCRegion = |
7751 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
7752 | OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false); |
7753 | CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); |
7754 | } |
7755 | |
7756 | void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( |
7757 | const OMPMasterTaskLoopSimdDirective &S) { |
7758 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7759 | Action.Enter(CGF); |
7760 | EmitOMPTaskLoopBasedDirective(S); |
7761 | }; |
7762 | auto LPCRegion = |
7763 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
7764 | OMPLexicalScope Scope(*this, S); |
7765 | CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); |
7766 | } |
7767 | |
7768 | void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( |
7769 | const OMPParallelMasterTaskLoopDirective &S) { |
7770 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7771 | auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, |
7772 | PrePostActionTy &Action) { |
7773 | Action.Enter(CGF); |
7774 | CGF.EmitOMPTaskLoopBasedDirective(S); |
7775 | }; |
7776 | OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); |
7777 | CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, |
7778 | S.getBeginLoc()); |
7779 | }; |
7780 | auto LPCRegion = |
7781 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
7782 | emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen, |
7783 | emitEmptyBoundParameters); |
7784 | } |
7785 | |
7786 | void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective( |
7787 | const OMPParallelMasterTaskLoopSimdDirective &S) { |
7788 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7789 | auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, |
7790 | PrePostActionTy &Action) { |
7791 | Action.Enter(CGF); |
7792 | CGF.EmitOMPTaskLoopBasedDirective(S); |
7793 | }; |
7794 | OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); |
7795 | CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, |
7796 | S.getBeginLoc()); |
7797 | }; |
7798 | auto LPCRegion = |
7799 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
7800 | emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen, |
7801 | emitEmptyBoundParameters); |
7802 | } |
7803 | |
7804 | // Generate the instructions for '#pragma omp target update' directive. |
7805 | void CodeGenFunction::EmitOMPTargetUpdateDirective( |
7806 | const OMPTargetUpdateDirective &S) { |
7807 | // If we don't have target devices, don't bother emitting the data mapping |
7808 | // code. |
7809 | if (CGM.getLangOpts().OMPTargetTriples.empty()) |
7810 | return; |
7811 | |
7812 | // Check if we have any if clause associated with the directive. |
7813 | const Expr *IfCond = nullptr; |
7814 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
7815 | IfCond = C->getCondition(); |
7816 | |
7817 | // Check if we have any device clause associated with the directive. |
7818 | const Expr *Device = nullptr; |
7819 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7820 | Device = C->getDevice(); |
7821 | |
7822 | OMPLexicalScope Scope(*this, S, OMPD_task); |
7823 | CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); |
7824 | } |
7825 | |
7826 | void CodeGenFunction::EmitOMPGenericLoopDirective( |
7827 | const OMPGenericLoopDirective &S) { |
7828 | // Unimplemented, just inline the underlying statement for now. |
7829 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7830 | // Emit the loop iteration variable. |
7831 | const Stmt *CS = |
7832 | cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); |
7833 | const auto *ForS = dyn_cast<ForStmt>(CS); |
7834 | if (ForS && !isa<DeclStmt>(ForS->getInit())) { |
7835 | OMPPrivateScope LoopScope(CGF); |
7836 | CGF.EmitOMPPrivateLoopCounters(S, LoopScope); |
7837 | (void)LoopScope.Privatize(); |
7838 | CGF.EmitStmt(CS); |
7839 | LoopScope.restoreMap(); |
7840 | } else { |
7841 | CGF.EmitStmt(CS); |
7842 | } |
7843 | }; |
7844 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
7845 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen); |
7846 | } |
7847 | |
7848 | void CodeGenFunction::EmitOMPParallelGenericLoopDirective( |
7849 | const OMPLoopDirective &S) { |
7850 | // Emit combined directive as if its consituent constructs are 'parallel' |
7851 | // and 'for'. |
7852 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7853 | Action.Enter(CGF); |
7854 | emitOMPCopyinClause(CGF, S); |
7855 | (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); |
7856 | }; |
7857 | { |
7858 | auto LPCRegion = |
7859 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
7860 | emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, |
7861 | emitEmptyBoundParameters); |
7862 | } |
7863 | // Check for outer lastprivate conditional update. |
7864 | checkForLastprivateConditionalUpdate(*this, S); |
7865 | } |
7866 | |
7867 | void CodeGenFunction::EmitOMPTeamsGenericLoopDirective( |
7868 | const OMPTeamsGenericLoopDirective &S) { |
7869 | // To be consistent with current behavior of 'target teams loop', emit |
7870 | // 'teams loop' as if its constituent constructs are 'distribute, |
7871 | // 'parallel, and 'for'. |
7872 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7873 | CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, |
7874 | S.getDistInc()); |
7875 | }; |
7876 | |
7877 | // Emit teams region as a standalone region. |
7878 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7879 | PrePostActionTy &Action) { |
7880 | Action.Enter(CGF); |
7881 | OMPPrivateScope PrivateScope(CGF); |
7882 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
7883 | (void)PrivateScope.Privatize(); |
7884 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, |
7885 | CodeGenDistribute); |
7886 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
7887 | }; |
7888 | emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); |
7889 | emitPostUpdateForReductionClause(*this, S, |
7890 | [](CodeGenFunction &) { return nullptr; }); |
7891 | } |
7892 | |
7893 | static void |
7894 | emitTargetTeamsGenericLoopRegion(CodeGenFunction &CGF, |
7895 | const OMPTargetTeamsGenericLoopDirective &S, |
7896 | PrePostActionTy &Action) { |
7897 | Action.Enter(CGF); |
7898 | // Emit 'teams loop' as if its constituent constructs are 'distribute, |
7899 | // 'parallel, and 'for'. |
7900 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7901 | CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, |
7902 | S.getDistInc()); |
7903 | }; |
7904 | |
7905 | // Emit teams region as a standalone region. |
7906 | auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7907 | PrePostActionTy &Action) { |
7908 | Action.Enter(CGF); |
7909 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
7910 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
7911 | (void)PrivateScope.Privatize(); |
7912 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
7913 | CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); |
7914 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
7915 | }; |
7916 | |
7917 | emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, |
7918 | CodeGenTeams); |
7919 | emitPostUpdateForReductionClause(CGF, S, |
7920 | [](CodeGenFunction &) { return nullptr; }); |
7921 | } |
7922 | |
7923 | /// Emit combined directive 'target teams loop' as if its constituent |
7924 | /// constructs are 'target', 'teams', 'distribute', 'parallel', and 'for'. |
7925 | void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective( |
7926 | const OMPTargetTeamsGenericLoopDirective &S) { |
7927 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7928 | emitTargetTeamsGenericLoopRegion(CGF, S, Action); |
7929 | }; |
7930 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
7931 | } |
7932 | |
7933 | void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( |
7934 | CodeGenModule &CGM, StringRef ParentName, |
7935 | const OMPTargetTeamsGenericLoopDirective &S) { |
7936 | // Emit SPMD target parallel loop region as a standalone region. |
7937 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7938 | emitTargetTeamsGenericLoopRegion(CGF, S, Action); |
7939 | }; |
7940 | llvm::Function *Fn; |
7941 | llvm::Constant *Addr; |
7942 | // Emit target region as a standalone region. |
7943 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7944 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
7945 | assert(Fn && Addr && |
7946 | "Target device function emission failed for 'target teams loop'." ); |
7947 | } |
7948 | |
7949 | static void emitTargetParallelGenericLoopRegion( |
7950 | CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S, |
7951 | PrePostActionTy &Action) { |
7952 | Action.Enter(CGF); |
7953 | // Emit as 'parallel for'. |
7954 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7955 | Action.Enter(CGF); |
7956 | CodeGenFunction::OMPCancelStackRAII CancelRegion( |
7957 | CGF, OMPD_target_parallel_loop, /*hasCancel=*/false); |
7958 | CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, |
7959 | emitDispatchForLoopBounds); |
7960 | }; |
7961 | emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, |
7962 | emitEmptyBoundParameters); |
7963 | } |
7964 | |
7965 | void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( |
7966 | CodeGenModule &CGM, StringRef ParentName, |
7967 | const OMPTargetParallelGenericLoopDirective &S) { |
7968 | // Emit target parallel loop region as a standalone region. |
7969 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7970 | emitTargetParallelGenericLoopRegion(CGF, S, Action); |
7971 | }; |
7972 | llvm::Function *Fn; |
7973 | llvm::Constant *Addr; |
7974 | // Emit target region as a standalone region. |
7975 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7976 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
7977 | assert(Fn && Addr && "Target device function emission failed." ); |
7978 | } |
7979 | |
7980 | /// Emit combined directive 'target parallel loop' as if its constituent |
7981 | /// constructs are 'target', 'parallel', and 'for'. |
7982 | void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective( |
7983 | const OMPTargetParallelGenericLoopDirective &S) { |
7984 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7985 | emitTargetParallelGenericLoopRegion(CGF, S, Action); |
7986 | }; |
7987 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
7988 | } |
7989 | |
7990 | void CodeGenFunction::EmitSimpleOMPExecutableDirective( |
7991 | const OMPExecutableDirective &D) { |
7992 | if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) { |
7993 | EmitOMPScanDirective(*SD); |
7994 | return; |
7995 | } |
7996 | if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) |
7997 | return; |
7998 | auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7999 | OMPPrivateScope GlobalsScope(CGF); |
8000 | if (isOpenMPTaskingDirective(D.getDirectiveKind())) { |
8001 | // Capture global firstprivates to avoid crash. |
8002 | for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { |
8003 | for (const Expr *Ref : C->varlists()) { |
8004 | const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
8005 | if (!DRE) |
8006 | continue; |
8007 | const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()); |
8008 | if (!VD || VD->hasLocalStorage()) |
8009 | continue; |
8010 | if (!CGF.LocalDeclMap.count(VD)) { |
8011 | LValue GlobLVal = CGF.EmitLValue(Ref); |
8012 | GlobalsScope.addPrivate(VD, GlobLVal.getAddress(CGF)); |
8013 | } |
8014 | } |
8015 | } |
8016 | } |
8017 | if (isOpenMPSimdDirective(D.getDirectiveKind())) { |
8018 | (void)GlobalsScope.Privatize(); |
8019 | ParentLoopDirectiveForScanRegion ScanRegion(CGF, D); |
8020 | emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); |
8021 | } else { |
8022 | if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { |
8023 | for (const Expr *E : LD->counters()) { |
8024 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
8025 | if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { |
8026 | LValue GlobLVal = CGF.EmitLValue(E); |
8027 | GlobalsScope.addPrivate(VD, GlobLVal.getAddress(CGF)); |
8028 | } |
8029 | if (isa<OMPCapturedExprDecl>(VD)) { |
8030 | // Emit only those that were not explicitly referenced in clauses. |
8031 | if (!CGF.LocalDeclMap.count(VD)) |
8032 | CGF.EmitVarDecl(*VD); |
8033 | } |
8034 | } |
8035 | for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { |
8036 | if (!C->getNumForLoops()) |
8037 | continue; |
8038 | for (unsigned I = LD->getLoopsNumber(), |
8039 | E = C->getLoopNumIterations().size(); |
8040 | I < E; ++I) { |
8041 | if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( |
8042 | cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) { |
8043 | // Emit only those that were not explicitly referenced in clauses. |
8044 | if (!CGF.LocalDeclMap.count(VD)) |
8045 | CGF.EmitVarDecl(*VD); |
8046 | } |
8047 | } |
8048 | } |
8049 | } |
8050 | (void)GlobalsScope.Privatize(); |
8051 | CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); |
8052 | } |
8053 | }; |
8054 | if (D.getDirectiveKind() == OMPD_atomic || |
8055 | D.getDirectiveKind() == OMPD_critical || |
8056 | D.getDirectiveKind() == OMPD_section || |
8057 | D.getDirectiveKind() == OMPD_master || |
8058 | D.getDirectiveKind() == OMPD_masked) { |
8059 | EmitStmt(D.getAssociatedStmt()); |
8060 | } else { |
8061 | auto LPCRegion = |
8062 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D); |
8063 | OMPSimdLexicalScope Scope(*this, D); |
8064 | CGM.getOpenMPRuntime().emitInlinedDirective( |
8065 | *this, |
8066 | isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd |
8067 | : D.getDirectiveKind(), |
8068 | CodeGen); |
8069 | } |
8070 | // Check for outer lastprivate conditional update. |
8071 | checkForLastprivateConditionalUpdate(*this, D); |
8072 | } |
8073 | |