1//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit OpenMP nodes as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGCleanup.h"
14#include "CGOpenMPRuntime.h"
15#include "CodeGenFunction.h"
16#include "CodeGenModule.h"
17#include "TargetInfo.h"
18#include "clang/AST/ASTContext.h"
19#include "clang/AST/Attr.h"
20#include "clang/AST/DeclOpenMP.h"
21#include "clang/AST/OpenMPClause.h"
22#include "clang/AST/Stmt.h"
23#include "clang/AST/StmtOpenMP.h"
24#include "clang/AST/StmtVisitor.h"
25#include "clang/Basic/OpenMPKinds.h"
26#include "clang/Basic/PrettyStackTrace.h"
27#include "llvm/ADT/SmallSet.h"
28#include "llvm/BinaryFormat/Dwarf.h"
29#include "llvm/Frontend/OpenMP/OMPConstants.h"
30#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DebugInfoMetadata.h"
33#include "llvm/IR/Instructions.h"
34#include "llvm/IR/IntrinsicInst.h"
35#include "llvm/IR/Metadata.h"
36#include "llvm/Support/AtomicOrdering.h"
37#include <optional>
38using namespace clang;
39using namespace CodeGen;
40using namespace llvm::omp;
41
42static const VarDecl *getBaseDecl(const Expr *Ref);
43
44namespace {
45/// Lexical scope for OpenMP executable constructs, that handles correct codegen
46/// for captured expressions.
47class OMPLexicalScope : public CodeGenFunction::LexicalScope {
48 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
49 for (const auto *C : S.clauses()) {
50 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
51 if (const auto *PreInit =
52 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
53 for (const auto *I : PreInit->decls()) {
54 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
55 CGF.EmitVarDecl(cast<VarDecl>(*I));
56 } else {
57 CodeGenFunction::AutoVarEmission Emission =
58 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
59 CGF.EmitAutoVarCleanups(Emission);
60 }
61 }
62 }
63 }
64 }
65 }
66 CodeGenFunction::OMPPrivateScope InlinedShareds;
67
68 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
69 return CGF.LambdaCaptureFields.lookup(VD) ||
70 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
71 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
72 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
73 }
74
75public:
76 OMPLexicalScope(
77 CodeGenFunction &CGF, const OMPExecutableDirective &S,
78 const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt,
79 const bool EmitPreInitStmt = true)
80 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
81 InlinedShareds(CGF) {
82 if (EmitPreInitStmt)
83 emitPreInitStmt(CGF, S);
84 if (!CapturedRegion)
85 return;
86 assert(S.hasAssociatedStmt() &&
87 "Expected associated statement for inlined directive.");
88 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
89 for (const auto &C : CS->captures()) {
90 if (C.capturesVariable() || C.capturesVariableByCopy()) {
91 auto *VD = C.getCapturedVar();
92 assert(VD == VD->getCanonicalDecl() &&
93 "Canonical decl must be captured.");
94 DeclRefExpr DRE(
95 CGF.getContext(), const_cast<VarDecl *>(VD),
96 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
97 InlinedShareds.isGlobalVarCaptured(VD)),
98 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
99 InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
100 }
101 }
102 (void)InlinedShareds.Privatize();
103 }
104};
105
106/// Lexical scope for OpenMP parallel construct, that handles correct codegen
107/// for captured expressions.
108class OMPParallelScope final : public OMPLexicalScope {
109 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
110 OpenMPDirectiveKind Kind = S.getDirectiveKind();
111 return !(isOpenMPTargetExecutionDirective(Kind) ||
112 isOpenMPLoopBoundSharingDirective(Kind)) &&
113 isOpenMPParallelDirective(Kind);
114 }
115
116public:
117 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
118 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
119 EmitPreInitStmt(S)) {}
120};
121
122/// Lexical scope for OpenMP teams construct, that handles correct codegen
123/// for captured expressions.
124class OMPTeamsScope final : public OMPLexicalScope {
125 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
126 OpenMPDirectiveKind Kind = S.getDirectiveKind();
127 return !isOpenMPTargetExecutionDirective(Kind) &&
128 isOpenMPTeamsDirective(Kind);
129 }
130
131public:
132 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
133 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
134 EmitPreInitStmt(S)) {}
135};
136
137/// Private scope for OpenMP loop-based directives, that supports capturing
138/// of used expression from loop statement.
139class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
140 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
141 const DeclStmt *PreInits;
142 CodeGenFunction::OMPMapVars PreCondVars;
143 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
144 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
145 for (const auto *E : LD->counters()) {
146 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
147 EmittedAsPrivate.insert(VD->getCanonicalDecl());
148 (void)PreCondVars.setVarAddr(
149 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
150 }
151 // Mark private vars as undefs.
152 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
153 for (const Expr *IRef : C->varlists()) {
154 const auto *OrigVD =
155 cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
156 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
157 QualType OrigVDTy = OrigVD->getType().getNonReferenceType();
158 (void)PreCondVars.setVarAddr(
159 CGF, OrigVD,
160 Address(llvm::UndefValue::get(CGF.ConvertTypeForMem(
161 CGF.getContext().getPointerType(OrigVDTy))),
162 CGF.ConvertTypeForMem(OrigVDTy),
163 CGF.getContext().getDeclAlign(OrigVD)));
164 }
165 }
166 }
167 (void)PreCondVars.apply(CGF);
168 // Emit init, __range and __end variables for C++ range loops.
169 (void)OMPLoopBasedDirective::doForAllLoops(
170 LD->getInnermostCapturedStmt()->getCapturedStmt(),
171 /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(),
172 [&CGF](unsigned Cnt, const Stmt *CurStmt) {
173 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) {
174 if (const Stmt *Init = CXXFor->getInit())
175 CGF.EmitStmt(Init);
176 CGF.EmitStmt(CXXFor->getRangeStmt());
177 CGF.EmitStmt(CXXFor->getEndStmt());
178 }
179 return false;
180 });
181 PreInits = cast_or_null<DeclStmt>(LD->getPreInits());
182 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) {
183 PreInits = cast_or_null<DeclStmt>(Tile->getPreInits());
184 } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) {
185 PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits());
186 } else {
187 llvm_unreachable("Unknown loop-based directive kind.");
188 }
189 if (PreInits) {
190 for (const auto *I : PreInits->decls())
191 CGF.EmitVarDecl(cast<VarDecl>(*I));
192 }
193 PreCondVars.restore(CGF);
194 }
195
196public:
197 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
198 : CodeGenFunction::RunCleanupsScope(CGF) {
199 emitPreInitStmt(CGF, S);
200 }
201};
202
203class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
204 CodeGenFunction::OMPPrivateScope InlinedShareds;
205
206 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
207 return CGF.LambdaCaptureFields.lookup(VD) ||
208 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
209 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
210 cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
211 }
212
213public:
214 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
215 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
216 InlinedShareds(CGF) {
217 for (const auto *C : S.clauses()) {
218 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
219 if (const auto *PreInit =
220 cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
221 for (const auto *I : PreInit->decls()) {
222 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
223 CGF.EmitVarDecl(cast<VarDecl>(*I));
224 } else {
225 CodeGenFunction::AutoVarEmission Emission =
226 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
227 CGF.EmitAutoVarCleanups(Emission);
228 }
229 }
230 }
231 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
232 for (const Expr *E : UDP->varlists()) {
233 const Decl *D = cast<DeclRefExpr>(E)->getDecl();
234 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
235 CGF.EmitVarDecl(*OED);
236 }
237 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
238 for (const Expr *E : UDP->varlists()) {
239 const Decl *D = getBaseDecl(E);
240 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
241 CGF.EmitVarDecl(*OED);
242 }
243 }
244 }
245 if (!isOpenMPSimdDirective(S.getDirectiveKind()))
246 CGF.EmitOMPPrivateClause(S, InlinedShareds);
247 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
248 if (const Expr *E = TG->getReductionRef())
249 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
250 }
251 // Temp copy arrays for inscan reductions should not be emitted as they are
252 // not used in simd only mode.
253 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
254 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
255 if (C->getModifier() != OMPC_REDUCTION_inscan)
256 continue;
257 for (const Expr *E : C->copy_array_temps())
258 CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl());
259 }
260 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
261 while (CS) {
262 for (auto &C : CS->captures()) {
263 if (C.capturesVariable() || C.capturesVariableByCopy()) {
264 auto *VD = C.getCapturedVar();
265 if (CopyArrayTemps.contains(VD))
266 continue;
267 assert(VD == VD->getCanonicalDecl() &&
268 "Canonical decl must be captured.");
269 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
270 isCapturedVar(CGF, VD) ||
271 (CGF.CapturedStmtInfo &&
272 InlinedShareds.isGlobalVarCaptured(VD)),
273 VD->getType().getNonReferenceType(), VK_LValue,
274 C.getLocation());
275 InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
276 }
277 }
278 CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
279 }
280 (void)InlinedShareds.Privatize();
281 }
282};
283
284} // namespace
285
286static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
287 const OMPExecutableDirective &S,
288 const RegionCodeGenTy &CodeGen);
289
290LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
291 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
292 if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
293 OrigVD = OrigVD->getCanonicalDecl();
294 bool IsCaptured =
295 LambdaCaptureFields.lookup(OrigVD) ||
296 (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
297 (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
298 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
299 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
300 return EmitLValue(&DRE);
301 }
302 }
303 return EmitLValue(E);
304}
305
306llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
307 ASTContext &C = getContext();
308 llvm::Value *Size = nullptr;
309 auto SizeInChars = C.getTypeSizeInChars(Ty);
310 if (SizeInChars.isZero()) {
311 // getTypeSizeInChars() returns 0 for a VLA.
312 while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
313 VlaSizePair VlaSize = getVLASize(VAT);
314 Ty = VlaSize.Type;
315 Size =
316 Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) : VlaSize.NumElts;
317 }
318 SizeInChars = C.getTypeSizeInChars(Ty);
319 if (SizeInChars.isZero())
320 return llvm::ConstantInt::get(SizeTy, /*V=*/0);
321 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
322 }
323 return CGM.getSize(SizeInChars);
324}
325
326void CodeGenFunction::GenerateOpenMPCapturedVars(
327 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
328 const RecordDecl *RD = S.getCapturedRecordDecl();
329 auto CurField = RD->field_begin();
330 auto CurCap = S.captures().begin();
331 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
332 E = S.capture_init_end();
333 I != E; ++I, ++CurField, ++CurCap) {
334 if (CurField->hasCapturedVLAType()) {
335 const VariableArrayType *VAT = CurField->getCapturedVLAType();
336 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
337 CapturedVars.push_back(Val);
338 } else if (CurCap->capturesThis()) {
339 CapturedVars.push_back(CXXThisValue);
340 } else if (CurCap->capturesVariableByCopy()) {
341 llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
342
343 // If the field is not a pointer, we need to save the actual value
344 // and load it as a void pointer.
345 if (!CurField->getType()->isAnyPointerType()) {
346 ASTContext &Ctx = getContext();
347 Address DstAddr = CreateMemTemp(
348 Ctx.getUIntPtrType(),
349 Twine(CurCap->getCapturedVar()->getName(), ".casted"));
350 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
351
352 llvm::Value *SrcAddrVal = EmitScalarConversion(
353 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
354 Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
355 LValue SrcLV =
356 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
357
358 // Store the value using the source type pointer.
359 EmitStoreThroughLValue(RValue::get(CV), SrcLV);
360
361 // Load the value using the destination type pointer.
362 CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
363 }
364 CapturedVars.push_back(CV);
365 } else {
366 assert(CurCap->capturesVariable() && "Expected capture by reference.");
367 CapturedVars.push_back(EmitLValue(*I).getAddress(*this).getPointer());
368 }
369 }
370}
371
372static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
373 QualType DstType, StringRef Name,
374 LValue AddrLV) {
375 ASTContext &Ctx = CGF.getContext();
376
377 llvm::Value *CastedPtr = CGF.EmitScalarConversion(
378 AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(),
379 Ctx.getPointerType(DstType), Loc);
380 Address TmpAddr =
381 CGF.MakeNaturalAlignAddrLValue(CastedPtr, DstType).getAddress(CGF);
382 return TmpAddr;
383}
384
385static QualType getCanonicalParamType(ASTContext &C, QualType T) {
386 if (T->isLValueReferenceType())
387 return C.getLValueReferenceType(
388 getCanonicalParamType(C, T.getNonReferenceType()),
389 /*SpelledAsLValue=*/false);
390 if (T->isPointerType())
391 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
392 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
393 if (const auto *VLA = dyn_cast<VariableArrayType>(A))
394 return getCanonicalParamType(C, VLA->getElementType());
395 if (!A->isVariablyModifiedType())
396 return C.getCanonicalType(T);
397 }
398 return C.getCanonicalParamType(T);
399}
400
401namespace {
402/// Contains required data for proper outlined function codegen.
403struct FunctionOptions {
404 /// Captured statement for which the function is generated.
405 const CapturedStmt *S = nullptr;
406 /// true if cast to/from UIntPtr is required for variables captured by
407 /// value.
408 const bool UIntPtrCastRequired = true;
409 /// true if only casted arguments must be registered as local args or VLA
410 /// sizes.
411 const bool RegisterCastedArgsOnly = false;
412 /// Name of the generated function.
413 const StringRef FunctionName;
414 /// Location of the non-debug version of the outlined function.
415 SourceLocation Loc;
416 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
417 bool RegisterCastedArgsOnly, StringRef FunctionName,
418 SourceLocation Loc)
419 : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
420 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
421 FunctionName(FunctionName), Loc(Loc) {}
422};
423} // namespace
424
425static llvm::Function *emitOutlinedFunctionPrologue(
426 CodeGenFunction &CGF, FunctionArgList &Args,
427 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
428 &LocalAddrs,
429 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
430 &VLASizes,
431 llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
432 const CapturedDecl *CD = FO.S->getCapturedDecl();
433 const RecordDecl *RD = FO.S->getCapturedRecordDecl();
434 assert(CD->hasBody() && "missing CapturedDecl body");
435
436 CXXThisValue = nullptr;
437 // Build the argument list.
438 CodeGenModule &CGM = CGF.CGM;
439 ASTContext &Ctx = CGM.getContext();
440 FunctionArgList TargetArgs;
441 Args.append(CD->param_begin(),
442 std::next(CD->param_begin(), CD->getContextParamPosition()));
443 TargetArgs.append(
444 CD->param_begin(),
445 std::next(CD->param_begin(), CD->getContextParamPosition()));
446 auto I = FO.S->captures().begin();
447 FunctionDecl *DebugFunctionDecl = nullptr;
448 if (!FO.UIntPtrCastRequired) {
449 FunctionProtoType::ExtProtoInfo EPI;
450 QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, std::nullopt, EPI);
451 DebugFunctionDecl = FunctionDecl::Create(
452 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
453 SourceLocation(), DeclarationName(), FunctionTy,
454 Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
455 /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
456 /*hasWrittenPrototype=*/false);
457 }
458 for (const FieldDecl *FD : RD->fields()) {
459 QualType ArgType = FD->getType();
460 IdentifierInfo *II = nullptr;
461 VarDecl *CapVar = nullptr;
462
463 // If this is a capture by copy and the type is not a pointer, the outlined
464 // function argument type should be uintptr and the value properly casted to
465 // uintptr. This is necessary given that the runtime library is only able to
466 // deal with pointers. We can pass in the same way the VLA type sizes to the
467 // outlined function.
468 if (FO.UIntPtrCastRequired &&
469 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
470 I->capturesVariableArrayType()))
471 ArgType = Ctx.getUIntPtrType();
472
473 if (I->capturesVariable() || I->capturesVariableByCopy()) {
474 CapVar = I->getCapturedVar();
475 II = CapVar->getIdentifier();
476 } else if (I->capturesThis()) {
477 II = &Ctx.Idents.get("this");
478 } else {
479 assert(I->capturesVariableArrayType());
480 II = &Ctx.Idents.get("vla");
481 }
482 if (ArgType->isVariablyModifiedType())
483 ArgType = getCanonicalParamType(Ctx, ArgType);
484 VarDecl *Arg;
485 if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) {
486 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
487 II, ArgType,
488 ImplicitParamDecl::ThreadPrivateVar);
489 } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
490 Arg = ParmVarDecl::Create(
491 Ctx, DebugFunctionDecl,
492 CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
493 CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
494 /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
495 } else {
496 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
497 II, ArgType, ImplicitParamDecl::Other);
498 }
499 Args.emplace_back(Arg);
500 // Do not cast arguments if we emit function with non-original types.
501 TargetArgs.emplace_back(
502 FO.UIntPtrCastRequired
503 ? Arg
504 : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
505 ++I;
506 }
507 Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
508 CD->param_end());
509 TargetArgs.append(
510 std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
511 CD->param_end());
512
513 // Create the function declaration.
514 const CGFunctionInfo &FuncInfo =
515 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
516 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
517
518 auto *F =
519 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
520 FO.FunctionName, &CGM.getModule());
521 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
522 if (CD->isNothrow())
523 F->setDoesNotThrow();
524 F->setDoesNotRecurse();
525
526 // Always inline the outlined function if optimizations are enabled.
527 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
528 F->removeFnAttr(llvm::Attribute::NoInline);
529 F->addFnAttr(llvm::Attribute::AlwaysInline);
530 }
531
532 // Generate the function.
533 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
534 FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
535 FO.UIntPtrCastRequired ? FO.Loc
536 : CD->getBody()->getBeginLoc());
537 unsigned Cnt = CD->getContextParamPosition();
538 I = FO.S->captures().begin();
539 for (const FieldDecl *FD : RD->fields()) {
540 // Do not map arguments if we emit function with non-original types.
541 Address LocalAddr(Address::invalid());
542 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
543 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
544 TargetArgs[Cnt]);
545 } else {
546 LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
547 }
548 // If we are capturing a pointer by copy we don't need to do anything, just
549 // use the value that we get from the arguments.
550 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
551 const VarDecl *CurVD = I->getCapturedVar();
552 if (!FO.RegisterCastedArgsOnly)
553 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
554 ++Cnt;
555 ++I;
556 continue;
557 }
558
559 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
560 AlignmentSource::Decl);
561 if (FD->hasCapturedVLAType()) {
562 if (FO.UIntPtrCastRequired) {
563 ArgLVal = CGF.MakeAddrLValue(
564 castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
565 Args[Cnt]->getName(), ArgLVal),
566 FD->getType(), AlignmentSource::Decl);
567 }
568 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
569 const VariableArrayType *VAT = FD->getCapturedVLAType();
570 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
571 } else if (I->capturesVariable()) {
572 const VarDecl *Var = I->getCapturedVar();
573 QualType VarTy = Var->getType();
574 Address ArgAddr = ArgLVal.getAddress(CGF);
575 if (ArgLVal.getType()->isLValueReferenceType()) {
576 ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
577 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
578 assert(ArgLVal.getType()->isPointerType());
579 ArgAddr = CGF.EmitLoadOfPointer(
580 ArgAddr, ArgLVal.getType()->castAs<PointerType>());
581 }
582 if (!FO.RegisterCastedArgsOnly) {
583 LocalAddrs.insert(
584 {Args[Cnt], {Var, ArgAddr.withAlignment(Ctx.getDeclAlign(Var))}});
585 }
586 } else if (I->capturesVariableByCopy()) {
587 assert(!FD->getType()->isAnyPointerType() &&
588 "Not expecting a captured pointer.");
589 const VarDecl *Var = I->getCapturedVar();
590 LocalAddrs.insert({Args[Cnt],
591 {Var, FO.UIntPtrCastRequired
592 ? castValueFromUintptr(
593 CGF, I->getLocation(), FD->getType(),
594 Args[Cnt]->getName(), ArgLVal)
595 : ArgLVal.getAddress(CGF)}});
596 } else {
597 // If 'this' is captured, load it into CXXThisValue.
598 assert(I->capturesThis());
599 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
600 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}});
601 }
602 ++Cnt;
603 ++I;
604 }
605
606 return F;
607}
608
609llvm::Function *
610CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
611 SourceLocation Loc) {
612 assert(
613 CapturedStmtInfo &&
614 "CapturedStmtInfo should be set when generating the captured function");
615 const CapturedDecl *CD = S.getCapturedDecl();
616 // Build the argument list.
617 bool NeedWrapperFunction =
618 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
619 FunctionArgList Args;
620 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
621 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
622 SmallString<256> Buffer;
623 llvm::raw_svector_ostream Out(Buffer);
624 Out << CapturedStmtInfo->getHelperName();
625 if (NeedWrapperFunction)
626 Out << "_debug__";
627 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
628 Out.str(), Loc);
629 llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
630 VLASizes, CXXThisValue, FO);
631 CodeGenFunction::OMPPrivateScope LocalScope(*this);
632 for (const auto &LocalAddrPair : LocalAddrs) {
633 if (LocalAddrPair.second.first) {
634 LocalScope.addPrivate(LocalAddrPair.second.first,
635 LocalAddrPair.second.second);
636 }
637 }
638 (void)LocalScope.Privatize();
639 for (const auto &VLASizePair : VLASizes)
640 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
641 PGO.assignRegionCounters(GlobalDecl(CD), F);
642 CapturedStmtInfo->EmitBody(*this, CD->getBody());
643 (void)LocalScope.ForceCleanup();
644 FinishFunction(CD->getBodyRBrace());
645 if (!NeedWrapperFunction)
646 return F;
647
648 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
649 /*RegisterCastedArgsOnly=*/true,
650 CapturedStmtInfo->getHelperName(), Loc);
651 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
652 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
653 Args.clear();
654 LocalAddrs.clear();
655 VLASizes.clear();
656 llvm::Function *WrapperF =
657 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
658 WrapperCGF.CXXThisValue, WrapperFO);
659 llvm::SmallVector<llvm::Value *, 4> CallArgs;
660 auto *PI = F->arg_begin();
661 for (const auto *Arg : Args) {
662 llvm::Value *CallArg;
663 auto I = LocalAddrs.find(Arg);
664 if (I != LocalAddrs.end()) {
665 LValue LV = WrapperCGF.MakeAddrLValue(
666 I->second.second,
667 I->second.first ? I->second.first->getType() : Arg->getType(),
668 AlignmentSource::Decl);
669 if (LV.getType()->isAnyComplexType())
670 LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
671 LV.getAddress(WrapperCGF),
672 PI->getType()->getPointerTo(
673 LV.getAddress(WrapperCGF).getAddressSpace()),
674 PI->getType()));
675 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
676 } else {
677 auto EI = VLASizes.find(Arg);
678 if (EI != VLASizes.end()) {
679 CallArg = EI->second.second;
680 } else {
681 LValue LV =
682 WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
683 Arg->getType(), AlignmentSource::Decl);
684 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
685 }
686 }
687 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
688 ++PI;
689 }
690 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
691 WrapperCGF.FinishFunction();
692 return WrapperF;
693}
694
695//===----------------------------------------------------------------------===//
696// OpenMP Directive Emission
697//===----------------------------------------------------------------------===//
698void CodeGenFunction::EmitOMPAggregateAssign(
699 Address DestAddr, Address SrcAddr, QualType OriginalType,
700 const llvm::function_ref<void(Address, Address)> CopyGen) {
701 // Perform element-by-element initialization.
702 QualType ElementTy;
703
704 // Drill down to the base element type on both arrays.
705 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
706 llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
707 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
708
709 llvm::Value *SrcBegin = SrcAddr.getPointer();
710 llvm::Value *DestBegin = DestAddr.getPointer();
711 // Cast from pointer to array type to pointer to single element.
712 llvm::Value *DestEnd = Builder.CreateInBoundsGEP(DestAddr.getElementType(),
713 DestBegin, NumElements);
714
715 // The basic structure here is a while-do loop.
716 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
717 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
718 llvm::Value *IsEmpty =
719 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
720 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
721
722 // Enter the loop body, making that address the current address.
723 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
724 EmitBlock(BodyBB);
725
726 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
727
728 llvm::PHINode *SrcElementPHI =
729 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
730 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
731 Address SrcElementCurrent =
732 Address(SrcElementPHI, SrcAddr.getElementType(),
733 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
734
735 llvm::PHINode *DestElementPHI = Builder.CreatePHI(
736 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
737 DestElementPHI->addIncoming(DestBegin, EntryBB);
738 Address DestElementCurrent =
739 Address(DestElementPHI, DestAddr.getElementType(),
740 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
741
742 // Emit copy.
743 CopyGen(DestElementCurrent, SrcElementCurrent);
744
745 // Shift the address forward by one element.
746 llvm::Value *DestElementNext =
747 Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI,
748 /*Idx0=*/1, "omp.arraycpy.dest.element");
749 llvm::Value *SrcElementNext =
750 Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI,
751 /*Idx0=*/1, "omp.arraycpy.src.element");
752 // Check whether we've reached the end.
753 llvm::Value *Done =
754 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
755 Builder.CreateCondBr(Done, DoneBB, BodyBB);
756 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
757 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
758
759 // Done.
760 EmitBlock(DoneBB, /*IsFinished=*/true);
761}
762
763void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
764 Address SrcAddr, const VarDecl *DestVD,
765 const VarDecl *SrcVD, const Expr *Copy) {
766 if (OriginalType->isArrayType()) {
767 const auto *BO = dyn_cast<BinaryOperator>(Copy);
768 if (BO && BO->getOpcode() == BO_Assign) {
769 // Perform simple memcpy for simple copying.
770 LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
771 LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
772 EmitAggregateAssign(Dest, Src, OriginalType);
773 } else {
774 // For arrays with complex element types perform element by element
775 // copying.
776 EmitOMPAggregateAssign(
777 DestAddr, SrcAddr, OriginalType,
778 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
779 // Working with the single array element, so have to remap
780 // destination and source variables to corresponding array
781 // elements.
782 CodeGenFunction::OMPPrivateScope Remap(*this);
783 Remap.addPrivate(DestVD, DestElement);
784 Remap.addPrivate(SrcVD, SrcElement);
785 (void)Remap.Privatize();
786 EmitIgnoredExpr(Copy);
787 });
788 }
789 } else {
790 // Remap pseudo source variable to private copy.
791 CodeGenFunction::OMPPrivateScope Remap(*this);
792 Remap.addPrivate(SrcVD, SrcAddr);
793 Remap.addPrivate(DestVD, DestAddr);
794 (void)Remap.Privatize();
795 // Emit copying of the whole variable.
796 EmitIgnoredExpr(Copy);
797 }
798}
799
800bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
801 OMPPrivateScope &PrivateScope) {
802 if (!HaveInsertPoint())
803 return false;
804 bool DeviceConstTarget =
805 getLangOpts().OpenMPIsTargetDevice &&
806 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
807 bool FirstprivateIsLastprivate = false;
808 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
809 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
810 for (const auto *D : C->varlists())
811 Lastprivates.try_emplace(
812 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
813 C->getKind());
814 }
815 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
816 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
817 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
818 // Force emission of the firstprivate copy if the directive does not emit
819 // outlined function, like omp for, omp simd, omp distribute etc.
820 bool MustEmitFirstprivateCopy =
821 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
822 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
823 const auto *IRef = C->varlist_begin();
824 const auto *InitsRef = C->inits().begin();
825 for (const Expr *IInit : C->private_copies()) {
826 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
827 bool ThisFirstprivateIsLastprivate =
828 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
829 const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
830 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
831 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
832 !FD->getType()->isReferenceType() &&
833 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
834 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
835 ++IRef;
836 ++InitsRef;
837 continue;
838 }
839 // Do not emit copy for firstprivate constant variables in target regions,
840 // captured by reference.
841 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
842 FD && FD->getType()->isReferenceType() &&
843 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
844 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
845 ++IRef;
846 ++InitsRef;
847 continue;
848 }
849 FirstprivateIsLastprivate =
850 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
851 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
852 const auto *VDInit =
853 cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
854 bool IsRegistered;
855 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
856 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
857 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
858 LValue OriginalLVal;
859 if (!FD) {
860 // Check if the firstprivate variable is just a constant value.
861 ConstantEmission CE = tryEmitAsConstant(&DRE);
862 if (CE && !CE.isReference()) {
863 // Constant value, no need to create a copy.
864 ++IRef;
865 ++InitsRef;
866 continue;
867 }
868 if (CE && CE.isReference()) {
869 OriginalLVal = CE.getReferenceLValue(*this, &DRE);
870 } else {
871 assert(!CE && "Expected non-constant firstprivate.");
872 OriginalLVal = EmitLValue(&DRE);
873 }
874 } else {
875 OriginalLVal = EmitLValue(&DRE);
876 }
877 QualType Type = VD->getType();
878 if (Type->isArrayType()) {
879 // Emit VarDecl with copy init for arrays.
880 // Get the address of the original variable captured in current
881 // captured region.
882 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
883 const Expr *Init = VD->getInit();
884 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
885 // Perform simple memcpy.
886 LValue Dest = MakeAddrLValue(Emission.getAllocatedAddress(), Type);
887 EmitAggregateAssign(Dest, OriginalLVal, Type);
888 } else {
889 EmitOMPAggregateAssign(
890 Emission.getAllocatedAddress(), OriginalLVal.getAddress(*this),
891 Type,
892 [this, VDInit, Init](Address DestElement, Address SrcElement) {
893 // Clean up any temporaries needed by the
894 // initialization.
895 RunCleanupsScope InitScope(*this);
896 // Emit initialization for single element.
897 setAddrOfLocalVar(VDInit, SrcElement);
898 EmitAnyExprToMem(Init, DestElement,
899 Init->getType().getQualifiers(),
900 /*IsInitializer*/ false);
901 LocalDeclMap.erase(VDInit);
902 });
903 }
904 EmitAutoVarCleanups(Emission);
905 IsRegistered =
906 PrivateScope.addPrivate(OrigVD, Emission.getAllocatedAddress());
907 } else {
908 Address OriginalAddr = OriginalLVal.getAddress(*this);
909 // Emit private VarDecl with copy init.
910 // Remap temp VDInit variable to the address of the original
911 // variable (for proper handling of captured global variables).
912 setAddrOfLocalVar(VDInit, OriginalAddr);
913 EmitDecl(*VD);
914 LocalDeclMap.erase(VDInit);
915 Address VDAddr = GetAddrOfLocalVar(VD);
916 if (ThisFirstprivateIsLastprivate &&
917 Lastprivates[OrigVD->getCanonicalDecl()] ==
918 OMPC_LASTPRIVATE_conditional) {
919 // Create/init special variable for lastprivate conditionals.
920 llvm::Value *V =
921 EmitLoadOfScalar(MakeAddrLValue(VDAddr, (*IRef)->getType(),
922 AlignmentSource::Decl),
923 (*IRef)->getExprLoc());
924 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
925 *this, OrigVD);
926 EmitStoreOfScalar(V, MakeAddrLValue(VDAddr, (*IRef)->getType(),
927 AlignmentSource::Decl));
928 LocalDeclMap.erase(VD);
929 setAddrOfLocalVar(VD, VDAddr);
930 }
931 IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr);
932 }
933 assert(IsRegistered &&
934 "firstprivate var already registered as private");
935 // Silence the warning about unused variable.
936 (void)IsRegistered;
937 }
938 ++IRef;
939 ++InitsRef;
940 }
941 }
942 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
943}
944
945void CodeGenFunction::EmitOMPPrivateClause(
946 const OMPExecutableDirective &D,
947 CodeGenFunction::OMPPrivateScope &PrivateScope) {
948 if (!HaveInsertPoint())
949 return;
950 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
951 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
952 auto IRef = C->varlist_begin();
953 for (const Expr *IInit : C->private_copies()) {
954 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
955 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
956 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
957 EmitDecl(*VD);
958 // Emit private VarDecl with copy init.
959 bool IsRegistered =
960 PrivateScope.addPrivate(OrigVD, GetAddrOfLocalVar(VD));
961 assert(IsRegistered && "private var already registered as private");
962 // Silence the warning about unused variable.
963 (void)IsRegistered;
964 }
965 ++IRef;
966 }
967 }
968}
969
970bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
971 if (!HaveInsertPoint())
972 return false;
973 // threadprivate_var1 = master_threadprivate_var1;
974 // operator=(threadprivate_var2, master_threadprivate_var2);
975 // ...
976 // __kmpc_barrier(&loc, global_tid);
977 llvm::DenseSet<const VarDecl *> CopiedVars;
978 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
979 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
980 auto IRef = C->varlist_begin();
981 auto ISrcRef = C->source_exprs().begin();
982 auto IDestRef = C->destination_exprs().begin();
983 for (const Expr *AssignOp : C->assignment_ops()) {
984 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
985 QualType Type = VD->getType();
986 if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
987 // Get the address of the master variable. If we are emitting code with
988 // TLS support, the address is passed from the master as field in the
989 // captured declaration.
990 Address MasterAddr = Address::invalid();
991 if (getLangOpts().OpenMPUseTLS &&
992 getContext().getTargetInfo().isTLSSupported()) {
993 assert(CapturedStmtInfo->lookup(VD) &&
994 "Copyin threadprivates should have been captured!");
995 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
996 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
997 MasterAddr = EmitLValue(&DRE).getAddress(*this);
998 LocalDeclMap.erase(VD);
999 } else {
1000 MasterAddr =
1001 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
1002 : CGM.GetAddrOfGlobal(VD),
1003 CGM.getTypes().ConvertTypeForMem(VD->getType()),
1004 getContext().getDeclAlign(VD));
1005 }
1006 // Get the address of the threadprivate variable.
1007 Address PrivateAddr = EmitLValue(*IRef).getAddress(*this);
1008 if (CopiedVars.size() == 1) {
1009 // At first check if current thread is a master thread. If it is, no
1010 // need to copy data.
1011 CopyBegin = createBasicBlock("copyin.not.master");
1012 CopyEnd = createBasicBlock("copyin.not.master.end");
1013 // TODO: Avoid ptrtoint conversion.
1014 auto *MasterAddrInt =
1015 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy);
1016 auto *PrivateAddrInt =
1017 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy);
1018 Builder.CreateCondBr(
1019 Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin,
1020 CopyEnd);
1021 EmitBlock(CopyBegin);
1022 }
1023 const auto *SrcVD =
1024 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1025 const auto *DestVD =
1026 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1027 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
1028 }
1029 ++IRef;
1030 ++ISrcRef;
1031 ++IDestRef;
1032 }
1033 }
1034 if (CopyEnd) {
1035 // Exit out of copying procedure for non-master thread.
1036 EmitBlock(CopyEnd, /*IsFinished=*/true);
1037 return true;
1038 }
1039 return false;
1040}
1041
1042bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1043 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1044 if (!HaveInsertPoint())
1045 return false;
1046 bool HasAtLeastOneLastprivate = false;
1047 llvm::DenseSet<const VarDecl *> SIMDLCVs;
1048 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1049 const auto *LoopDirective = cast<OMPLoopDirective>(&D);
1050 for (const Expr *C : LoopDirective->counters()) {
1051 SIMDLCVs.insert(
1052 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1053 }
1054 }
1055 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1056 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1057 HasAtLeastOneLastprivate = true;
1058 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
1059 !getLangOpts().OpenMPSimd)
1060 break;
1061 const auto *IRef = C->varlist_begin();
1062 const auto *IDestRef = C->destination_exprs().begin();
1063 for (const Expr *IInit : C->private_copies()) {
1064 // Keep the address of the original variable for future update at the end
1065 // of the loop.
1066 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1067 // Taskloops do not require additional initialization, it is done in
1068 // runtime support library.
1069 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1070 const auto *DestVD =
1071 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1072 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1073 /*RefersToEnclosingVariableOrCapture=*/
1074 CapturedStmtInfo->lookup(OrigVD) != nullptr,
1075 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1076 PrivateScope.addPrivate(DestVD, EmitLValue(&DRE).getAddress(*this));
1077 // Check if the variable is also a firstprivate: in this case IInit is
1078 // not generated. Initialization of this variable will happen in codegen
1079 // for 'firstprivate' clause.
1080 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
1081 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
1082 Address VDAddr = Address::invalid();
1083 if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1084 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1085 *this, OrigVD);
1086 setAddrOfLocalVar(VD, VDAddr);
1087 } else {
1088 // Emit private VarDecl with copy init.
1089 EmitDecl(*VD);
1090 VDAddr = GetAddrOfLocalVar(VD);
1091 }
1092 bool IsRegistered = PrivateScope.addPrivate(OrigVD, VDAddr);
1093 assert(IsRegistered &&
1094 "lastprivate var already registered as private");
1095 (void)IsRegistered;
1096 }
1097 }
1098 ++IRef;
1099 ++IDestRef;
1100 }
1101 }
1102 return HasAtLeastOneLastprivate;
1103}
1104
1105void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1106 const OMPExecutableDirective &D, bool NoFinals,
1107 llvm::Value *IsLastIterCond) {
1108 if (!HaveInsertPoint())
1109 return;
1110 // Emit following code:
1111 // if (<IsLastIterCond>) {
1112 // orig_var1 = private_orig_var1;
1113 // ...
1114 // orig_varn = private_orig_varn;
1115 // }
1116 llvm::BasicBlock *ThenBB = nullptr;
1117 llvm::BasicBlock *DoneBB = nullptr;
1118 if (IsLastIterCond) {
1119 // Emit implicit barrier if at least one lastprivate conditional is found
1120 // and this is not a simd mode.
1121 if (!getLangOpts().OpenMPSimd &&
1122 llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
1123 [](const OMPLastprivateClause *C) {
1124 return C->getKind() == OMPC_LASTPRIVATE_conditional;
1125 })) {
1126 CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1127 OMPD_unknown,
1128 /*EmitChecks=*/false,
1129 /*ForceSimpleCall=*/true);
1130 }
1131 ThenBB = createBasicBlock(".omp.lastprivate.then");
1132 DoneBB = createBasicBlock(".omp.lastprivate.done");
1133 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1134 EmitBlock(ThenBB);
1135 }
1136 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1137 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1138 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1139 auto IC = LoopDirective->counters().begin();
1140 for (const Expr *F : LoopDirective->finals()) {
1141 const auto *D =
1142 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1143 if (NoFinals)
1144 AlreadyEmittedVars.insert(D);
1145 else
1146 LoopCountersAndUpdates[D] = F;
1147 ++IC;
1148 }
1149 }
1150 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1151 auto IRef = C->varlist_begin();
1152 auto ISrcRef = C->source_exprs().begin();
1153 auto IDestRef = C->destination_exprs().begin();
1154 for (const Expr *AssignOp : C->assignment_ops()) {
1155 const auto *PrivateVD =
1156 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1157 QualType Type = PrivateVD->getType();
1158 const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1159 if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1160 // If lastprivate variable is a loop control variable for loop-based
1161 // directive, update its value before copyin back to original
1162 // variable.
1163 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1164 EmitIgnoredExpr(FinalExpr);
1165 const auto *SrcVD =
1166 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1167 const auto *DestVD =
1168 cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1169 // Get the address of the private variable.
1170 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1171 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1172 PrivateAddr = Address(
1173 Builder.CreateLoad(PrivateAddr),
1174 CGM.getTypes().ConvertTypeForMem(RefTy->getPointeeType()),
1175 CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
1176 // Store the last value to the private copy in the last iteration.
1177 if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1178 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1179 *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
1180 (*IRef)->getExprLoc());
1181 // Get the address of the original variable.
1182 Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1183 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1184 }
1185 ++IRef;
1186 ++ISrcRef;
1187 ++IDestRef;
1188 }
1189 if (const Expr *PostUpdate = C->getPostUpdateExpr())
1190 EmitIgnoredExpr(PostUpdate);
1191 }
1192 if (IsLastIterCond)
1193 EmitBlock(DoneBB, /*IsFinished=*/true);
1194}
1195
1196void CodeGenFunction::EmitOMPReductionClauseInit(
1197 const OMPExecutableDirective &D,
1198 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1199 if (!HaveInsertPoint())
1200 return;
1201 SmallVector<const Expr *, 4> Shareds;
1202 SmallVector<const Expr *, 4> Privates;
1203 SmallVector<const Expr *, 4> ReductionOps;
1204 SmallVector<const Expr *, 4> LHSs;
1205 SmallVector<const Expr *, 4> RHSs;
1206 OMPTaskDataTy Data;
1207 SmallVector<const Expr *, 4> TaskLHSs;
1208 SmallVector<const Expr *, 4> TaskRHSs;
1209 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1210 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1211 continue;
1212 Shareds.append(C->varlist_begin(), C->varlist_end());
1213 Privates.append(C->privates().begin(), C->privates().end());
1214 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1215 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1216 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1217 if (C->getModifier() == OMPC_REDUCTION_task) {
1218 Data.ReductionVars.append(C->privates().begin(), C->privates().end());
1219 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
1220 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
1221 Data.ReductionOps.append(C->reduction_ops().begin(),
1222 C->reduction_ops().end());
1223 TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1224 TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1225 }
1226 }
1227 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1228 unsigned Count = 0;
1229 auto *ILHS = LHSs.begin();
1230 auto *IRHS = RHSs.begin();
1231 auto *IPriv = Privates.begin();
1232 for (const Expr *IRef : Shareds) {
1233 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1234 // Emit private VarDecl with reduction init.
1235 RedCG.emitSharedOrigLValue(*this, Count);
1236 RedCG.emitAggregateType(*this, Count);
1237 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1238 RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
1239 RedCG.getSharedLValue(Count).getAddress(*this),
1240 [&Emission](CodeGenFunction &CGF) {
1241 CGF.EmitAutoVarInit(Emission);
1242 return true;
1243 });
1244 EmitAutoVarCleanups(Emission);
1245 Address BaseAddr = RedCG.adjustPrivateAddress(
1246 *this, Count, Emission.getAllocatedAddress());
1247 bool IsRegistered =
1248 PrivateScope.addPrivate(RedCG.getBaseDecl(Count), BaseAddr);
1249 assert(IsRegistered && "private var already registered as private");
1250 // Silence the warning about unused variable.
1251 (void)IsRegistered;
1252
1253 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1254 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1255 QualType Type = PrivateVD->getType();
1256 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
1257 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1258 // Store the address of the original variable associated with the LHS
1259 // implicit variable.
1260 PrivateScope.addPrivate(LHSVD,
1261 RedCG.getSharedLValue(Count).getAddress(*this));
1262 PrivateScope.addPrivate(RHSVD, GetAddrOfLocalVar(PrivateVD));
1263 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1264 isa<ArraySubscriptExpr>(IRef)) {
1265 // Store the address of the original variable associated with the LHS
1266 // implicit variable.
1267 PrivateScope.addPrivate(LHSVD,
1268 RedCG.getSharedLValue(Count).getAddress(*this));
1269 PrivateScope.addPrivate(RHSVD,
1270 GetAddrOfLocalVar(PrivateVD).withElementType(
1271 ConvertTypeForMem(RHSVD->getType())));
1272 } else {
1273 QualType Type = PrivateVD->getType();
1274 bool IsArray = getContext().getAsArrayType(Type) != nullptr;
1275 Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(*this);
1276 // Store the address of the original variable associated with the LHS
1277 // implicit variable.
1278 if (IsArray) {
1279 OriginalAddr =
1280 OriginalAddr.withElementType(ConvertTypeForMem(LHSVD->getType()));
1281 }
1282 PrivateScope.addPrivate(LHSVD, OriginalAddr);
1283 PrivateScope.addPrivate(
1284 RHSVD, IsArray ? GetAddrOfLocalVar(PrivateVD).withElementType(
1285 ConvertTypeForMem(RHSVD->getType()))
1286 : GetAddrOfLocalVar(PrivateVD));
1287 }
1288 ++ILHS;
1289 ++IRHS;
1290 ++IPriv;
1291 ++Count;
1292 }
1293 if (!Data.ReductionVars.empty()) {
1294 Data.IsReductionWithTaskMod = true;
1295 Data.IsWorksharingReduction =
1296 isOpenMPWorksharingDirective(D.getDirectiveKind());
1297 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1298 *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
1299 const Expr *TaskRedRef = nullptr;
1300 switch (D.getDirectiveKind()) {
1301 case OMPD_parallel:
1302 TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
1303 break;
1304 case OMPD_for:
1305 TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
1306 break;
1307 case OMPD_sections:
1308 TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
1309 break;
1310 case OMPD_parallel_for:
1311 TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
1312 break;
1313 case OMPD_parallel_master:
1314 TaskRedRef =
1315 cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
1316 break;
1317 case OMPD_parallel_sections:
1318 TaskRedRef =
1319 cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
1320 break;
1321 case OMPD_target_parallel:
1322 TaskRedRef =
1323 cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
1324 break;
1325 case OMPD_target_parallel_for:
1326 TaskRedRef =
1327 cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
1328 break;
1329 case OMPD_distribute_parallel_for:
1330 TaskRedRef =
1331 cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
1332 break;
1333 case OMPD_teams_distribute_parallel_for:
1334 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
1335 .getTaskReductionRefExpr();
1336 break;
1337 case OMPD_target_teams_distribute_parallel_for:
1338 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
1339 .getTaskReductionRefExpr();
1340 break;
1341 case OMPD_simd:
1342 case OMPD_for_simd:
1343 case OMPD_section:
1344 case OMPD_single:
1345 case OMPD_master:
1346 case OMPD_critical:
1347 case OMPD_parallel_for_simd:
1348 case OMPD_task:
1349 case OMPD_taskyield:
1350 case OMPD_error:
1351 case OMPD_barrier:
1352 case OMPD_taskwait:
1353 case OMPD_taskgroup:
1354 case OMPD_flush:
1355 case OMPD_depobj:
1356 case OMPD_scan:
1357 case OMPD_ordered:
1358 case OMPD_atomic:
1359 case OMPD_teams:
1360 case OMPD_target:
1361 case OMPD_cancellation_point:
1362 case OMPD_cancel:
1363 case OMPD_target_data:
1364 case OMPD_target_enter_data:
1365 case OMPD_target_exit_data:
1366 case OMPD_taskloop:
1367 case OMPD_taskloop_simd:
1368 case OMPD_master_taskloop:
1369 case OMPD_master_taskloop_simd:
1370 case OMPD_parallel_master_taskloop:
1371 case OMPD_parallel_master_taskloop_simd:
1372 case OMPD_distribute:
1373 case OMPD_target_update:
1374 case OMPD_distribute_parallel_for_simd:
1375 case OMPD_distribute_simd:
1376 case OMPD_target_parallel_for_simd:
1377 case OMPD_target_simd:
1378 case OMPD_teams_distribute:
1379 case OMPD_teams_distribute_simd:
1380 case OMPD_teams_distribute_parallel_for_simd:
1381 case OMPD_target_teams:
1382 case OMPD_target_teams_distribute:
1383 case OMPD_target_teams_distribute_parallel_for_simd:
1384 case OMPD_target_teams_distribute_simd:
1385 case OMPD_declare_target:
1386 case OMPD_end_declare_target:
1387 case OMPD_threadprivate:
1388 case OMPD_allocate:
1389 case OMPD_declare_reduction:
1390 case OMPD_declare_mapper:
1391 case OMPD_declare_simd:
1392 case OMPD_requires:
1393 case OMPD_declare_variant:
1394 case OMPD_begin_declare_variant:
1395 case OMPD_end_declare_variant:
1396 case OMPD_unknown:
1397 default:
1398 llvm_unreachable("Enexpected directive with task reductions.");
1399 }
1400
1401 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
1402 EmitVarDecl(*VD);
1403 EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
1404 /*Volatile=*/false, TaskRedRef->getType());
1405 }
1406}
1407
1408void CodeGenFunction::EmitOMPReductionClauseFinal(
1409 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1410 if (!HaveInsertPoint())
1411 return;
1412 llvm::SmallVector<const Expr *, 8> Privates;
1413 llvm::SmallVector<const Expr *, 8> LHSExprs;
1414 llvm::SmallVector<const Expr *, 8> RHSExprs;
1415 llvm::SmallVector<const Expr *, 8> ReductionOps;
1416 bool HasAtLeastOneReduction = false;
1417 bool IsReductionWithTaskMod = false;
1418 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1419 // Do not emit for inscan reductions.
1420 if (C->getModifier() == OMPC_REDUCTION_inscan)
1421 continue;
1422 HasAtLeastOneReduction = true;
1423 Privates.append(C->privates().begin(), C->privates().end());
1424 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1425 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1426 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1427 IsReductionWithTaskMod =
1428 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1429 }
1430 if (HasAtLeastOneReduction) {
1431 if (IsReductionWithTaskMod) {
1432 CGM.getOpenMPRuntime().emitTaskReductionFini(
1433 *this, D.getBeginLoc(),
1434 isOpenMPWorksharingDirective(D.getDirectiveKind()));
1435 }
1436 bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1437 isOpenMPParallelDirective(D.getDirectiveKind()) ||
1438 ReductionKind == OMPD_simd;
1439 bool SimpleReduction = ReductionKind == OMPD_simd;
1440 // Emit nowait reduction if nowait clause is present or directive is a
1441 // parallel directive (it always has implicit barrier).
1442 CGM.getOpenMPRuntime().emitReduction(
1443 *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1444 {WithNowait, SimpleReduction, ReductionKind});
1445 }
1446}
1447
1448static void emitPostUpdateForReductionClause(
1449 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1450 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1451 if (!CGF.HaveInsertPoint())
1452 return;
1453 llvm::BasicBlock *DoneBB = nullptr;
1454 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1455 if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1456 if (!DoneBB) {
1457 if (llvm::Value *Cond = CondGen(CGF)) {
1458 // If the first post-update expression is found, emit conditional
1459 // block if it was requested.
1460 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1461 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1462 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1463 CGF.EmitBlock(ThenBB);
1464 }
1465 }
1466 CGF.EmitIgnoredExpr(PostUpdate);
1467 }
1468 }
1469 if (DoneBB)
1470 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1471}
1472
1473namespace {
1474/// Codegen lambda for appending distribute lower and upper bounds to outlined
1475/// parallel function. This is necessary for combined constructs such as
1476/// 'distribute parallel for'
1477typedef llvm::function_ref<void(CodeGenFunction &,
1478 const OMPExecutableDirective &,
1479 llvm::SmallVectorImpl<llvm::Value *> &)>
1480 CodeGenBoundParametersTy;
1481} // anonymous namespace
1482
1483static void
1484checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1485 const OMPExecutableDirective &S) {
1486 if (CGF.getLangOpts().OpenMP < 50)
1487 return;
1488 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1489 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1490 for (const Expr *Ref : C->varlists()) {
1491 if (!Ref->getType()->isScalarType())
1492 continue;
1493 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1494 if (!DRE)
1495 continue;
1496 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1497 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1498 }
1499 }
1500 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1501 for (const Expr *Ref : C->varlists()) {
1502 if (!Ref->getType()->isScalarType())
1503 continue;
1504 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1505 if (!DRE)
1506 continue;
1507 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1508 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1509 }
1510 }
1511 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1512 for (const Expr *Ref : C->varlists()) {
1513 if (!Ref->getType()->isScalarType())
1514 continue;
1515 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1516 if (!DRE)
1517 continue;
1518 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1519 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1520 }
1521 }
1522 // Privates should ne analyzed since they are not captured at all.
1523 // Task reductions may be skipped - tasks are ignored.
1524 // Firstprivates do not return value but may be passed by reference - no need
1525 // to check for updated lastprivate conditional.
1526 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1527 for (const Expr *Ref : C->varlists()) {
1528 if (!Ref->getType()->isScalarType())
1529 continue;
1530 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1531 if (!DRE)
1532 continue;
1533 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1534 }
1535 }
1536 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1537 CGF, S, PrivateDecls);
1538}
1539
1540static void emitCommonOMPParallelDirective(
1541 CodeGenFunction &CGF, const OMPExecutableDirective &S,
1542 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1543 const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1544 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1545 llvm::Value *NumThreads = nullptr;
1546 llvm::Function *OutlinedFn =
1547 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1548 CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind,
1549 CodeGen);
1550 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1551 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1552 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1553 /*IgnoreResultAssign=*/true);
1554 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1555 CGF, NumThreads, NumThreadsClause->getBeginLoc());
1556 }
1557 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1558 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1559 CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1560 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1561 }
1562 const Expr *IfCond = nullptr;
1563 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1564 if (C->getNameModifier() == OMPD_unknown ||
1565 C->getNameModifier() == OMPD_parallel) {
1566 IfCond = C->getCondition();
1567 break;
1568 }
1569 }
1570
1571 OMPParallelScope Scope(CGF, S);
1572 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1573 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1574 // lower and upper bounds with the pragma 'for' chunking mechanism.
1575 // The following lambda takes care of appending the lower and upper bound
1576 // parameters when necessary
1577 CodeGenBoundParameters(CGF, S, CapturedVars);
1578 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1579 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1580 CapturedVars, IfCond, NumThreads);
1581}
1582
1583static bool isAllocatableDecl(const VarDecl *VD) {
1584 const VarDecl *CVD = VD->getCanonicalDecl();
1585 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1586 return false;
1587 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1588 // Use the default allocation.
1589 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1590 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1591 !AA->getAllocator());
1592}
1593
1594static void emitEmptyBoundParameters(CodeGenFunction &,
1595 const OMPExecutableDirective &,
1596 llvm::SmallVectorImpl<llvm::Value *> &) {}
1597
1598static void emitOMPCopyinClause(CodeGenFunction &CGF,
1599 const OMPExecutableDirective &S) {
1600 bool Copyins = CGF.EmitOMPCopyinClause(S);
1601 if (Copyins) {
1602 // Emit implicit barrier to synchronize threads and avoid data races on
1603 // propagation master's thread values of threadprivate variables to local
1604 // instances of that variables of all other implicit threads.
1605 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1606 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1607 /*ForceSimpleCall=*/true);
1608 }
1609}
1610
1611Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1612 CodeGenFunction &CGF, const VarDecl *VD) {
1613 CodeGenModule &CGM = CGF.CGM;
1614 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1615
1616 if (!VD)
1617 return Address::invalid();
1618 const VarDecl *CVD = VD->getCanonicalDecl();
1619 if (!isAllocatableDecl(CVD))
1620 return Address::invalid();
1621 llvm::Value *Size;
1622 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1623 if (CVD->getType()->isVariablyModifiedType()) {
1624 Size = CGF.getTypeSize(CVD->getType());
1625 // Align the size: ((size + align - 1) / align) * align
1626 Size = CGF.Builder.CreateNUWAdd(
1627 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
1628 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
1629 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
1630 } else {
1631 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
1632 Size = CGM.getSize(Sz.alignTo(Align));
1633 }
1634
1635 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1636 assert(AA->getAllocator() &&
1637 "Expected allocator expression for non-default allocator.");
1638 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
1639 // According to the standard, the original allocator type is a enum (integer).
1640 // Convert to pointer type, if required.
1641 if (Allocator->getType()->isIntegerTy())
1642 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
1643 else if (Allocator->getType()->isPointerTy())
1644 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
1645 CGM.VoidPtrTy);
1646
1647 llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1648 CGF.Builder, Size, Allocator,
1649 getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
1650 llvm::CallInst *FreeCI =
1651 OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator);
1652
1653 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
1654 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1655 Addr,
1656 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
1657 getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
1658 return Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
1659}
1660
1661Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1662 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1663 SourceLocation Loc) {
1664 CodeGenModule &CGM = CGF.CGM;
1665 if (CGM.getLangOpts().OpenMPUseTLS &&
1666 CGM.getContext().getTargetInfo().isTLSSupported())
1667 return VDAddr;
1668
1669 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1670
1671 llvm::Type *VarTy = VDAddr.getElementType();
1672 llvm::Value *Data =
1673 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy);
1674 llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
1675 std::string Suffix = getNameWithSeparators({"cache", ""});
1676 llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
1677
1678 llvm::CallInst *ThreadPrivateCacheCall =
1679 OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
1680
1681 return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment());
1682}
1683
1684std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1685 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1686 SmallString<128> Buffer;
1687 llvm::raw_svector_ostream OS(Buffer);
1688 StringRef Sep = FirstSeparator;
1689 for (StringRef Part : Parts) {
1690 OS << Sep << Part;
1691 Sep = Separator;
1692 }
1693 return OS.str().str();
1694}
1695
1696void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
1697 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1698 InsertPointTy CodeGenIP, Twine RegionName) {
1699 CGBuilderTy &Builder = CGF.Builder;
1700 Builder.restoreIP(CodeGenIP);
1701 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1702 "." + RegionName + ".after");
1703
1704 {
1705 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1706 CGF.EmitStmt(RegionBodyStmt);
1707 }
1708
1709 if (Builder.saveIP().isSet())
1710 Builder.CreateBr(FiniBB);
1711}
1712
1713void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1714 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1715 InsertPointTy CodeGenIP, Twine RegionName) {
1716 CGBuilderTy &Builder = CGF.Builder;
1717 Builder.restoreIP(CodeGenIP);
1718 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1719 "." + RegionName + ".after");
1720
1721 {
1722 OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1723 CGF.EmitStmt(RegionBodyStmt);
1724 }
1725
1726 if (Builder.saveIP().isSet())
1727 Builder.CreateBr(FiniBB);
1728}
1729
1730void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1731 if (CGM.getLangOpts().OpenMPIRBuilder) {
1732 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1733 // Check if we have any if clause associated with the directive.
1734 llvm::Value *IfCond = nullptr;
1735 if (const auto *C = S.getSingleClause<OMPIfClause>())
1736 IfCond = EmitScalarExpr(C->getCondition(),
1737 /*IgnoreResultAssign=*/true);
1738
1739 llvm::Value *NumThreads = nullptr;
1740 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1741 NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
1742 /*IgnoreResultAssign=*/true);
1743
1744 ProcBindKind ProcBind = OMP_PROC_BIND_default;
1745 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1746 ProcBind = ProcBindClause->getProcBindKind();
1747
1748 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1749
1750 // The cleanup callback that finalizes all variabels at the given location,
1751 // thus calls destructors etc.
1752 auto FiniCB = [this](InsertPointTy IP) {
1753 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
1754 };
1755
1756 // Privatization callback that performs appropriate action for
1757 // shared/private/firstprivate/lastprivate/copyin/... variables.
1758 //
1759 // TODO: This defaults to shared right now.
1760 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1761 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1762 // The next line is appropriate only for variables (Val) with the
1763 // data-sharing attribute "shared".
1764 ReplVal = &Val;
1765
1766 return CodeGenIP;
1767 };
1768
1769 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1770 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1771
1772 auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
1773 InsertPointTy CodeGenIP) {
1774 OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1775 *this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel");
1776 };
1777
1778 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1779 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1780 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1781 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1782 Builder.restoreIP(
1783 OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1784 IfCond, NumThreads, ProcBind, S.hasCancel()));
1785 return;
1786 }
1787
1788 // Emit parallel region as a standalone region.
1789 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1790 Action.Enter(CGF);
1791 OMPPrivateScope PrivateScope(CGF);
1792 emitOMPCopyinClause(CGF, S);
1793 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1794 CGF.EmitOMPPrivateClause(S, PrivateScope);
1795 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1796 (void)PrivateScope.Privatize();
1797 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1798 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1799 };
1800 {
1801 auto LPCRegion =
1802 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
1803 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1804 emitEmptyBoundParameters);
1805 emitPostUpdateForReductionClause(*this, S,
1806 [](CodeGenFunction &) { return nullptr; });
1807 }
1808 // Check for outer lastprivate conditional update.
1809 checkForLastprivateConditionalUpdate(*this, S);
1810}
1811
1812void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) {
1813 EmitStmt(S.getIfStmt());
1814}
1815
1816namespace {
1817/// RAII to handle scopes for loop transformation directives.
1818class OMPTransformDirectiveScopeRAII {
1819 OMPLoopScope *Scope = nullptr;
1820 CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
1821 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
1822
1823 OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) =
1824 delete;
1825 OMPTransformDirectiveScopeRAII &
1826 operator=(const OMPTransformDirectiveScopeRAII &) = delete;
1827
1828public:
1829 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
1830 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) {
1831 Scope = new OMPLoopScope(CGF, *Dir);
1832 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
1833 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1834 }
1835 }
1836 ~OMPTransformDirectiveScopeRAII() {
1837 if (!Scope)
1838 return;
1839 delete CapInfoRAII;
1840 delete CGSI;
1841 delete Scope;
1842 }
1843};
1844} // namespace
1845
1846static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1847 int MaxLevel, int Level = 0) {
1848 assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1849 const Stmt *SimplifiedS = S->IgnoreContainers();
1850 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
1851 PrettyStackTraceLoc CrashInfo(
1852 CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1853 "LLVM IR generation of compound statement ('{}')");
1854
1855 // Keep track of the current cleanup stack depth, including debug scopes.
1856 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1857 for (const Stmt *CurStmt : CS->body())
1858 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
1859 return;
1860 }
1861 if (SimplifiedS == NextLoop) {
1862 if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS))
1863 SimplifiedS = Dir->getTransformedStmt();
1864 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
1865 SimplifiedS = CanonLoop->getLoopStmt();
1866 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
1867 S = For->getBody();
1868 } else {
1869 assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1870 "Expected canonical for loop or range-based for loop.");
1871 const auto *CXXFor = cast<CXXForRangeStmt>(SimplifiedS);
1872 CGF.EmitStmt(CXXFor->getLoopVarStmt());
1873 S = CXXFor->getBody();
1874 }
1875 if (Level + 1 < MaxLevel) {
1876 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
1877 S, /*TryImperfectlyNestedLoops=*/true);
1878 emitBody(CGF, S, NextLoop, MaxLevel, Level + 1);
1879 return;
1880 }
1881 }
1882 CGF.EmitStmt(S);
1883}
1884
1885void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1886 JumpDest LoopExit) {
1887 RunCleanupsScope BodyScope(*this);
1888 // Update counters values on current iteration.
1889 for (const Expr *UE : D.updates())
1890 EmitIgnoredExpr(UE);
1891 // Update the linear variables.
1892 // In distribute directives only loop counters may be marked as linear, no
1893 // need to generate the code for them.
1894 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1895 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1896 for (const Expr *UE : C->updates())
1897 EmitIgnoredExpr(UE);
1898 }
1899 }
1900
1901 // On a continue in the body, jump to the end.
1902 JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
1903 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1904 for (const Expr *E : D.finals_conditions()) {
1905 if (!E)
1906 continue;
1907 // Check that loop counter in non-rectangular nest fits into the iteration
1908 // space.
1909 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
1910 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
1911 getProfileCount(D.getBody()));
1912 EmitBlock(NextBB);
1913 }
1914
1915 OMPPrivateScope InscanScope(*this);
1916 EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
1917 bool IsInscanRegion = InscanScope.Privatize();
1918 if (IsInscanRegion) {
1919 // Need to remember the block before and after scan directive
1920 // to dispatch them correctly depending on the clause used in
1921 // this directive, inclusive or exclusive. For inclusive scan the natural
1922 // order of the blocks is used, for exclusive clause the blocks must be
1923 // executed in reverse order.
1924 OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
1925 OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
1926 // No need to allocate inscan exit block, in simd mode it is selected in the
1927 // codegen for the scan directive.
1928 if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
1929 OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
1930 OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
1931 EmitBranch(OMPScanDispatch);
1932 EmitBlock(OMPBeforeScanBlock);
1933 }
1934
1935 // Emit loop variables for C++ range loops.
1936 const Stmt *Body =
1937 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1938 // Emit loop body.
1939 emitBody(*this, Body,
1940 OMPLoopBasedDirective::tryToFindNextInnerLoop(
1941 Body, /*TryImperfectlyNestedLoops=*/true),
1942 D.getLoopsNumber());
1943
1944 // Jump to the dispatcher at the end of the loop body.
1945 if (IsInscanRegion)
1946 EmitBranch(OMPScanExitBlock);
1947
1948 // The end (updates/cleanups).
1949 EmitBlock(Continue.getBlock());
1950 BreakContinueStack.pop_back();
1951}
1952
1953using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
1954
1955/// Emit a captured statement and return the function as well as its captured
1956/// closure context.
1957static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
1958 const CapturedStmt *S) {
1959 LValue CapStruct = ParentCGF.InitCapturedStruct(*S);
1960 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
1961 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
1962 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S);
1963 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
1964 llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S);
1965
1966 return {F, CapStruct.getPointer(ParentCGF)};
1967}
1968
1969/// Emit a call to a previously captured closure.
1970static llvm::CallInst *
1971emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
1972 llvm::ArrayRef<llvm::Value *> Args) {
1973 // Append the closure context to the argument.
1974 SmallVector<llvm::Value *> EffectiveArgs;
1975 EffectiveArgs.reserve(Args.size() + 1);
1976 llvm::append_range(EffectiveArgs, Args);
1977 EffectiveArgs.push_back(Cap.second);
1978
1979 return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs);
1980}
1981
1982llvm::CanonicalLoopInfo *
1983CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
1984 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
1985
1986 // The caller is processing the loop-associated directive processing the \p
1987 // Depth loops nested in \p S. Put the previous pending loop-associated
1988 // directive to the stack. If the current loop-associated directive is a loop
1989 // transformation directive, it will push its generated loops onto the stack
1990 // such that together with the loops left here they form the combined loop
1991 // nest for the parent loop-associated directive.
1992 int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth;
1993 ExpectedOMPLoopDepth = Depth;
1994
1995 EmitStmt(S);
1996 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
1997
1998 // The last added loop is the outermost one.
1999 llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back();
2000
2001 // Pop the \p Depth loops requested by the call from that stack and restore
2002 // the previous context.
2003 OMPLoopNestStack.pop_back_n(Depth);
2004 ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth;
2005
2006 return Result;
2007}
2008
2009void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
2010 const Stmt *SyntacticalLoop = S->getLoopStmt();
2011 if (!getLangOpts().OpenMPIRBuilder) {
2012 // Ignore if OpenMPIRBuilder is not enabled.
2013 EmitStmt(SyntacticalLoop);
2014 return;
2015 }
2016
2017 LexicalScope ForScope(*this, S->getSourceRange());
2018
2019 // Emit init statements. The Distance/LoopVar funcs may reference variable
2020 // declarations they contain.
2021 const Stmt *BodyStmt;
2022 if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) {
2023 if (const Stmt *InitStmt = For->getInit())
2024 EmitStmt(InitStmt);
2025 BodyStmt = For->getBody();
2026 } else if (const auto *RangeFor =
2027 dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) {
2028 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
2029 EmitStmt(RangeStmt);
2030 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
2031 EmitStmt(BeginStmt);
2032 if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
2033 EmitStmt(EndStmt);
2034 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
2035 EmitStmt(LoopVarStmt);
2036 BodyStmt = RangeFor->getBody();
2037 } else
2038 llvm_unreachable("Expected for-stmt or range-based for-stmt");
2039
2040 // Emit closure for later use. By-value captures will be captured here.
2041 const CapturedStmt *DistanceFunc = S->getDistanceFunc();
2042 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc);
2043 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
2044 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc);
2045
2046 // Call the distance function to get the number of iterations of the loop to
2047 // come.
2048 QualType LogicalTy = DistanceFunc->getCapturedDecl()
2049 ->getParam(0)
2050 ->getType()
2051 .getNonReferenceType();
2052 Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr");
2053 emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()});
2054 llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count");
2055
2056 // Emit the loop structure.
2057 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2058 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2059 llvm::Value *IndVar) {
2060 Builder.restoreIP(CodeGenIP);
2061
2062 // Emit the loop body: Convert the logical iteration number to the loop
2063 // variable and emit the body.
2064 const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2065 LValue LCVal = EmitLValue(LoopVarRef);
2066 Address LoopVarAddress = LCVal.getAddress(*this);
2067 emitCapturedStmtCall(*this, LoopVarClosure,
2068 {LoopVarAddress.getPointer(), IndVar});
2069
2070 RunCleanupsScope BodyScope(*this);
2071 EmitStmt(BodyStmt);
2072 };
2073 llvm::CanonicalLoopInfo *CL =
2074 OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal);
2075
2076 // Finish up the loop.
2077 Builder.restoreIP(CL->getAfterIP());
2078 ForScope.ForceCleanup();
2079
2080 // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2081 OMPLoopNestStack.push_back(CL);
2082}
2083
2084void CodeGenFunction::EmitOMPInnerLoop(
2085 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2086 const Expr *IncExpr,
2087 const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2088 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2089 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
2090
2091 // Start the loop with a block that tests the condition.
2092 auto CondBlock = createBasicBlock("omp.inner.for.cond");
2093 EmitBlock(CondBlock);
2094 const SourceRange R = S.getSourceRange();
2095
2096 // If attributes are attached, push to the basic block with them.
2097 const auto &OMPED = cast<OMPExecutableDirective>(S);
2098 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2099 const Stmt *SS = ICS->getCapturedStmt();
2100 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
2101 OMPLoopNestStack.clear();
2102 if (AS)
2103 LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
2104 AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
2105 SourceLocToDebugLoc(R.getEnd()));
2106 else
2107 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2108 SourceLocToDebugLoc(R.getEnd()));
2109
2110 // If there are any cleanups between here and the loop-exit scope,
2111 // create a block to stage a loop exit along.
2112 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2113 if (RequiresCleanup)
2114 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
2115
2116 llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
2117
2118 // Emit condition.
2119 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
2120 if (ExitBlock != LoopExit.getBlock()) {
2121 EmitBlock(ExitBlock);
2122 EmitBranchThroughCleanup(LoopExit);
2123 }
2124
2125 EmitBlock(LoopBody);
2126 incrementProfileCounter(&S);
2127
2128 // Create a block for the increment.
2129 JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
2130 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2131
2132 BodyGen(*this);
2133
2134 // Emit "IV = IV + 1" and a back-edge to the condition block.
2135 EmitBlock(Continue.getBlock());
2136 EmitIgnoredExpr(IncExpr);
2137 PostIncGen(*this);
2138 BreakContinueStack.pop_back();
2139 EmitBranch(CondBlock);
2140 LoopStack.pop();
2141 // Emit the fall-through block.
2142 EmitBlock(LoopExit.getBlock());
2143}
2144
2145bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
2146 if (!HaveInsertPoint())
2147 return false;
2148 // Emit inits for the linear variables.
2149 bool HasLinears = false;
2150 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2151 for (const Expr *Init : C->inits()) {
2152 HasLinears = true;
2153 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
2154 if (const auto *Ref =
2155 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
2156 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
2157 const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
2158 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2159 CapturedStmtInfo->lookup(OrigVD) != nullptr,
2160 VD->getInit()->getType(), VK_LValue,
2161 VD->getInit()->getExprLoc());
2162 EmitExprAsInit(
2163 &DRE, VD,
2164 MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
2165 /*capturedByInit=*/false);
2166 EmitAutoVarCleanups(Emission);
2167 } else {
2168 EmitVarDecl(*VD);
2169 }
2170 }
2171 // Emit the linear steps for the linear clauses.
2172 // If a step is not constant, it is pre-calculated before the loop.
2173 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
2174 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
2175 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
2176 // Emit calculation of the linear step.
2177 EmitIgnoredExpr(CS);
2178 }
2179 }
2180 return HasLinears;
2181}
2182
2183void CodeGenFunction::EmitOMPLinearClauseFinal(
2184 const OMPLoopDirective &D,
2185 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2186 if (!HaveInsertPoint())
2187 return;
2188 llvm::BasicBlock *DoneBB = nullptr;
2189 // Emit the final values of the linear variables.
2190 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2191 auto IC = C->varlist_begin();
2192 for (const Expr *F : C->finals()) {
2193 if (!DoneBB) {
2194 if (llvm::Value *Cond = CondGen(*this)) {
2195 // If the first post-update expression is found, emit conditional
2196 // block if it was requested.
2197 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
2198 DoneBB = createBasicBlock(".omp.linear.pu.done");
2199 Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2200 EmitBlock(ThenBB);
2201 }
2202 }
2203 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
2204 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2205 CapturedStmtInfo->lookup(OrigVD) != nullptr,
2206 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2207 Address OrigAddr = EmitLValue(&DRE).getAddress(*this);
2208 CodeGenFunction::OMPPrivateScope VarScope(*this);
2209 VarScope.addPrivate(OrigVD, OrigAddr);
2210 (void)VarScope.Privatize();
2211 EmitIgnoredExpr(F);
2212 ++IC;
2213 }
2214 if (const Expr *PostUpdate = C->getPostUpdateExpr())
2215 EmitIgnoredExpr(PostUpdate);
2216 }
2217 if (DoneBB)
2218 EmitBlock(DoneBB, /*IsFinished=*/true);
2219}
2220
2221static void emitAlignedClause(CodeGenFunction &CGF,
2222 const OMPExecutableDirective &D) {
2223 if (!CGF.HaveInsertPoint())
2224 return;
2225 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2226 llvm::APInt ClauseAlignment(64, 0);
2227 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2228 auto *AlignmentCI =
2229 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2230 ClauseAlignment = AlignmentCI->getValue();
2231 }
2232 for (const Expr *E : Clause->varlists()) {
2233 llvm::APInt Alignment(ClauseAlignment);
2234 if (Alignment == 0) {
2235 // OpenMP [2.8.1, Description]
2236 // If no optional parameter is specified, implementation-defined default
2237 // alignments for SIMD instructions on the target platforms are assumed.
2238 Alignment =
2239 CGF.getContext()
2240 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2241 E->getType()->getPointeeType()))
2242 .getQuantity();
2243 }
2244 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2245 "alignment is not power of 2");
2246 if (Alignment != 0) {
2247 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2248 CGF.emitAlignmentAssumption(
2249 PtrValue, E, /*No second loc needed*/ SourceLocation(),
2250 llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
2251 }
2252 }
2253 }
2254}
2255
2256void CodeGenFunction::EmitOMPPrivateLoopCounters(
2257 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2258 if (!HaveInsertPoint())
2259 return;
2260 auto I = S.private_counters().begin();
2261 for (const Expr *E : S.counters()) {
2262 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2263 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2264 // Emit var without initialization.
2265 AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
2266 EmitAutoVarCleanups(VarEmission);
2267 LocalDeclMap.erase(PrivateVD);
2268 (void)LoopScope.addPrivate(VD, VarEmission.getAllocatedAddress());
2269 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
2270 VD->hasGlobalStorage()) {
2271 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2272 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
2273 E->getType(), VK_LValue, E->getExprLoc());
2274 (void)LoopScope.addPrivate(PrivateVD, EmitLValue(&DRE).getAddress(*this));
2275 } else {
2276 (void)LoopScope.addPrivate(PrivateVD, VarEmission.getAllocatedAddress());
2277 }
2278 ++I;
2279 }
2280 // Privatize extra loop counters used in loops for ordered(n) clauses.
2281 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2282 if (!C->getNumForLoops())
2283 continue;
2284 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2285 I < E; ++I) {
2286 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
2287 const auto *VD = cast<VarDecl>(DRE->getDecl());
2288 // Override only those variables that can be captured to avoid re-emission
2289 // of the variables declared within the loops.
2290 if (DRE->refersToEnclosingVariableOrCapture()) {
2291 (void)LoopScope.addPrivate(
2292 VD, CreateMemTemp(DRE->getType(), VD->getName()));
2293 }
2294 }
2295 }
2296}
2297
2298static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2299 const Expr *Cond, llvm::BasicBlock *TrueBlock,
2300 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2301 if (!CGF.HaveInsertPoint())
2302 return;
2303 {
2304 CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2305 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
2306 (void)PreCondScope.Privatize();
2307 // Get initial values of real counters.
2308 for (const Expr *I : S.inits()) {
2309 CGF.EmitIgnoredExpr(I);
2310 }
2311 }
2312 // Create temp loop control variables with their init values to support
2313 // non-rectangular loops.
2314 CodeGenFunction::OMPMapVars PreCondVars;
2315 for (const Expr *E : S.dependent_counters()) {
2316 if (!E)
2317 continue;
2318 assert(!E->getType().getNonReferenceType()->isRecordType() &&
2319 "dependent counter must not be an iterator.");
2320 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2321 Address CounterAddr =
2322 CGF.CreateMemTemp(VD->getType().getNonReferenceType());
2323 (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
2324 }
2325 (void)PreCondVars.apply(CGF);
2326 for (const Expr *E : S.dependent_inits()) {
2327 if (!E)
2328 continue;
2329 CGF.EmitIgnoredExpr(E);
2330 }
2331 // Check that loop is executed at least one time.
2332 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2333 PreCondVars.restore(CGF);
2334}
2335
2336void CodeGenFunction::EmitOMPLinearClause(
2337 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2338 if (!HaveInsertPoint())
2339 return;
2340 llvm::DenseSet<const VarDecl *> SIMDLCVs;
2341 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
2342 const auto *LoopDirective = cast<OMPLoopDirective>(&D);
2343 for (const Expr *C : LoopDirective->counters()) {
2344 SIMDLCVs.insert(
2345 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
2346 }
2347 }
2348 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2349 auto CurPrivate = C->privates().begin();
2350 for (const Expr *E : C->varlists()) {
2351 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2352 const auto *PrivateVD =
2353 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
2354 if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
2355 // Emit private VarDecl with copy init.
2356 EmitVarDecl(*PrivateVD);
2357 bool IsRegistered =
2358 PrivateScope.addPrivate(VD, GetAddrOfLocalVar(PrivateVD));
2359 assert(IsRegistered && "linear var already registered as private");
2360 // Silence the warning about unused variable.
2361 (void)IsRegistered;
2362 } else {
2363 EmitVarDecl(*PrivateVD);
2364 }
2365 ++CurPrivate;
2366 }
2367 }
2368}
2369
2370static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2371 const OMPExecutableDirective &D) {
2372 if (!CGF.HaveInsertPoint())
2373 return;
2374 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2375 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2376 /*ignoreResult=*/true);
2377 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2378 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2379 // In presence of finite 'safelen', it may be unsafe to mark all
2380 // the memory instructions parallel, because loop-carried
2381 // dependences of 'safelen' iterations are possible.
2382 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2383 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2384 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2385 /*ignoreResult=*/true);
2386 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2387 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2388 // In presence of finite 'safelen', it may be unsafe to mark all
2389 // the memory instructions parallel, because loop-carried
2390 // dependences of 'safelen' iterations are possible.
2391 CGF.LoopStack.setParallel(/*Enable=*/false);
2392 }
2393}
2394
2395void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
2396 // Walk clauses and process safelen/lastprivate.
2397 LoopStack.setParallel(/*Enable=*/true);
2398 LoopStack.setVectorizeEnable();
2399 emitSimdlenSafelenClause(*this, D);
2400 if (const auto *C = D.getSingleClause<OMPOrderClause>())
2401 if (C->getKind() == OMPC_ORDER_concurrent)
2402 LoopStack.setParallel(/*Enable=*/true);
2403 if ((D.getDirectiveKind() == OMPD_simd ||
2404 (getLangOpts().OpenMPSimd &&
2405 isOpenMPSimdDirective(D.getDirectiveKind()))) &&
2406 llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2407 [](const OMPReductionClause *C) {
2408 return C->getModifier() == OMPC_REDUCTION_inscan;
2409 }))
2410 // Disable parallel access in case of prefix sum.
2411 LoopStack.setParallel(/*Enable=*/false);
2412}
2413
2414void CodeGenFunction::EmitOMPSimdFinal(
2415 const OMPLoopDirective &D,
2416 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2417 if (!HaveInsertPoint())
2418 return;
2419 llvm::BasicBlock *DoneBB = nullptr;
2420 auto IC = D.counters().begin();
2421 auto IPC = D.private_counters().begin();
2422 for (const Expr *F : D.finals()) {
2423 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
2424 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
2425 const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
2426 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
2427 OrigVD->hasGlobalStorage() || CED) {
2428 if (!DoneBB) {
2429 if (llvm::Value *Cond = CondGen(*this)) {
2430 // If the first post-update expression is found, emit conditional
2431 // block if it was requested.
2432 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
2433 DoneBB = createBasicBlock(".omp.final.done");
2434 Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2435 EmitBlock(ThenBB);
2436 }
2437 }
2438 Address OrigAddr = Address::invalid();
2439 if (CED) {
2440 OrigAddr =
2441 EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(*this);
2442 } else {
2443 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2444 /*RefersToEnclosingVariableOrCapture=*/false,
2445 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2446 OrigAddr = EmitLValue(&DRE).getAddress(*this);
2447 }
2448 OMPPrivateScope VarScope(*this);
2449 VarScope.addPrivate(OrigVD, OrigAddr);
2450 (void)VarScope.Privatize();
2451 EmitIgnoredExpr(F);
2452 }
2453 ++IC;
2454 ++IPC;
2455 }
2456 if (DoneBB)
2457 EmitBlock(DoneBB, /*IsFinished=*/true);
2458}
2459
2460static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2461 const OMPLoopDirective &S,
2462 CodeGenFunction::JumpDest LoopExit) {
2463 CGF.EmitOMPLoopBody(S, LoopExit);
2464 CGF.EmitStopPoint(&S);
2465}
2466
2467/// Emit a helper variable and return corresponding lvalue.
2468static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2469 const DeclRefExpr *Helper) {
2470 auto VDecl = cast<VarDecl>(Helper->getDecl());
2471 CGF.EmitVarDecl(*VDecl);
2472 return CGF.EmitLValue(Helper);
2473}
2474
2475static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2476 const RegionCodeGenTy &SimdInitGen,
2477 const RegionCodeGenTy &BodyCodeGen) {
2478 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2479 PrePostActionTy &) {
2480 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2481 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2482 SimdInitGen(CGF);
2483
2484 BodyCodeGen(CGF);
2485 };
2486 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2487 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2488 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2489
2490 BodyCodeGen(CGF);
2491 };
2492 const Expr *IfCond = nullptr;
2493 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2494 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2495 if (CGF.getLangOpts().OpenMP >= 50 &&
2496 (C->getNameModifier() == OMPD_unknown ||
2497 C->getNameModifier() == OMPD_simd)) {
2498 IfCond = C->getCondition();
2499 break;
2500 }
2501 }
2502 }
2503 if (IfCond) {
2504 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2505 } else {
2506 RegionCodeGenTy ThenRCG(ThenGen);
2507 ThenRCG(CGF);
2508 }
2509}
2510
2511static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2512 PrePostActionTy &Action) {
2513 Action.Enter(CGF);
2514 assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
2515 "Expected simd directive");
2516 OMPLoopScope PreInitScope(CGF, S);
2517 // if (PreCond) {
2518 // for (IV in 0..LastIteration) BODY;
2519 // <Final counter/linear vars updates>;
2520 // }
2521 //
2522 if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
2523 isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
2524 isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
2525 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
2526 (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
2527 }
2528
2529 // Emit: if (PreCond) - begin.
2530 // If the condition constant folds and can be elided, avoid emitting the
2531 // whole loop.
2532 bool CondConstant;
2533 llvm::BasicBlock *ContBlock = nullptr;
2534 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2535 if (!CondConstant)
2536 return;
2537 } else {
2538 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
2539 ContBlock = CGF.createBasicBlock("simd.if.end");
2540 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
2541 CGF.getProfileCount(&S));
2542 CGF.EmitBlock(ThenBlock);
2543 CGF.incrementProfileCounter(&S);
2544 }
2545
2546 // Emit the loop iteration variable.
2547 const Expr *IVExpr = S.getIterationVariable();
2548 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
2549 CGF.EmitVarDecl(*IVDecl);
2550 CGF.EmitIgnoredExpr(S.getInit());
2551
2552 // Emit the iterations count variable.
2553 // If it is not a variable, Sema decided to calculate iterations count on
2554 // each iteration (e.g., it is foldable into a constant).
2555 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2556 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2557 // Emit calculation of the iterations count.
2558 CGF.EmitIgnoredExpr(S.getCalcLastIteration());
2559 }
2560
2561 emitAlignedClause(CGF, S);
2562 (void)CGF.EmitOMPLinearClauseInit(S);
2563 {
2564 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2565 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2566 CGF.EmitOMPLinearClause(S, LoopScope);
2567 CGF.EmitOMPPrivateClause(S, LoopScope);
2568 CGF.EmitOMPReductionClauseInit(S, LoopScope);
2569 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2570 CGF, S, CGF.EmitLValue(S.getIterationVariable()));
2571 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2572 (void)LoopScope.Privatize();
2573 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2574 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
2575
2576 emitCommonSimdLoop(
2577 CGF, S,
2578 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2579 CGF.EmitOMPSimdInit(S);
2580 },
2581 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2582 CGF.EmitOMPInnerLoop(
2583 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
2584 [&S](CodeGenFunction &CGF) {
2585 emitOMPLoopBodyWithStopPoint(CGF, S,
2586 CodeGenFunction::JumpDest());
2587 },
2588 [](CodeGenFunction &) {});
2589 });
2590 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
2591 // Emit final copy of the lastprivate variables at the end of loops.
2592 if (HasLastprivateClause)
2593 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
2594 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
2595 emitPostUpdateForReductionClause(CGF, S,
2596 [](CodeGenFunction &) { return nullptr; });
2597 LoopScope.restoreMap();
2598 CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
2599 }
2600 // Emit: if (PreCond) - end.
2601 if (ContBlock) {
2602 CGF.EmitBranch(ContBlock);
2603 CGF.EmitBlock(ContBlock, true);
2604 }
2605}
2606
2607static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) {
2608 // Check for unsupported clauses
2609 for (OMPClause *C : S.clauses()) {
2610 // Currently only order, simdlen and safelen clauses are supported
2611 if (!(isa<OMPSimdlenClause>(C) || isa<OMPSafelenClause>(C) ||
2612 isa<OMPOrderClause>(C) || isa<OMPAlignedClause>(C)))
2613 return false;
2614 }
2615
2616 // Check if we have a statement with the ordered directive.
2617 // Visit the statement hierarchy to find a compound statement
2618 // with a ordered directive in it.
2619 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(S.getRawStmt())) {
2620 if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) {
2621 for (const Stmt *SubStmt : SyntacticalLoop->children()) {
2622 if (!SubStmt)
2623 continue;
2624 if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(SubStmt)) {
2625 for (const Stmt *CSSubStmt : CS->children()) {
2626 if (!CSSubStmt)
2627 continue;
2628 if (isa<OMPOrderedDirective>(CSSubStmt)) {
2629 return false;
2630 }
2631 }
2632 }
2633 }
2634 }
2635 }
2636 return true;
2637}
2638static llvm::MapVector<llvm::Value *, llvm::Value *>
2639GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) {
2640 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars;
2641 for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) {
2642 llvm::APInt ClauseAlignment(64, 0);
2643 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2644 auto *AlignmentCI =
2645 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2646 ClauseAlignment = AlignmentCI->getValue();
2647 }
2648 for (const Expr *E : Clause->varlists()) {
2649 llvm::APInt Alignment(ClauseAlignment);
2650 if (Alignment == 0) {
2651 // OpenMP [2.8.1, Description]
2652 // If no optional parameter is specified, implementation-defined default
2653 // alignments for SIMD instructions on the target platforms are assumed.
2654 Alignment =
2655 CGF.getContext()
2656 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2657 E->getType()->getPointeeType()))
2658 .getQuantity();
2659 }
2660 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2661 "alignment is not power of 2");
2662 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2663 AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue());
2664 }
2665 }
2666 return AlignedVars;
2667}
2668
2669void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2670 bool UseOMPIRBuilder =
2671 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
2672 if (UseOMPIRBuilder) {
2673 auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF,
2674 PrePostActionTy &) {
2675 // Use the OpenMPIRBuilder if enabled.
2676 if (UseOMPIRBuilder) {
2677 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars =
2678 GetAlignedMapping(S, CGF);
2679 // Emit the associated statement and get its loop representation.
2680 const Stmt *Inner = S.getRawStmt();
2681 llvm::CanonicalLoopInfo *CLI =
2682 EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
2683
2684 llvm::OpenMPIRBuilder &OMPBuilder =
2685 CGM.getOpenMPRuntime().getOMPBuilder();
2686 // Add SIMD specific metadata
2687 llvm::ConstantInt *Simdlen = nullptr;
2688 if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) {
2689 RValue Len =
2690 this->EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2691 /*ignoreResult=*/true);
2692 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2693 Simdlen = Val;
2694 }
2695 llvm::ConstantInt *Safelen = nullptr;
2696 if (const auto *C = S.getSingleClause<OMPSafelenClause>()) {
2697 RValue Len =
2698 this->EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2699 /*ignoreResult=*/true);
2700 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2701 Safelen = Val;
2702 }
2703 llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown;
2704 if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
2705 if (C->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent) {
2706 Order = llvm::omp::OrderKind::OMP_ORDER_concurrent;
2707 }
2708 }
2709 // Add simd metadata to the collapsed loop. Do not generate
2710 // another loop for if clause. Support for if clause is done earlier.
2711 OMPBuilder.applySimd(CLI, AlignedVars,
2712 /*IfCond*/ nullptr, Order, Simdlen, Safelen);
2713 return;
2714 }
2715 };
2716 {
2717 auto LPCRegion =
2718 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2719 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2720 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd,
2721 CodeGenIRBuilder);
2722 }
2723 return;
2724 }
2725
2726 ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2727 OMPFirstScanLoop = true;
2728 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2729 emitOMPSimdRegion(CGF, S, Action);
2730 };
2731 {
2732 auto LPCRegion =
2733 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2734 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2735 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2736 }
2737 // Check for outer lastprivate conditional update.
2738 checkForLastprivateConditionalUpdate(*this, S);
2739}
2740
2741void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
2742 // Emit the de-sugared statement.
2743 OMPTransformDirectiveScopeRAII TileScope(*this, &S);
2744 EmitStmt(S.getTransformedStmt());
2745}
2746
2747void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
2748 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
2749
2750 if (UseOMPIRBuilder) {
2751 auto DL = SourceLocToDebugLoc(S.getBeginLoc());
2752 const Stmt *Inner = S.getRawStmt();
2753
2754 // Consume nested loop. Clear the entire remaining loop stack because a
2755 // fully unrolled loop is non-transformable. For partial unrolling the
2756 // generated outer loop is pushed back to the stack.
2757 llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
2758 OMPLoopNestStack.clear();
2759
2760 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2761
2762 bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1;
2763 llvm::CanonicalLoopInfo *UnrolledCLI = nullptr;
2764
2765 if (S.hasClausesOfKind<OMPFullClause>()) {
2766 assert(ExpectedOMPLoopDepth == 0);
2767 OMPBuilder.unrollLoopFull(DL, CLI);
2768 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2769 uint64_t Factor = 0;
2770 if (Expr *FactorExpr = PartialClause->getFactor()) {
2771 Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2772 assert(Factor >= 1 && "Only positive factors are valid");
2773 }
2774 OMPBuilder.unrollLoopPartial(DL, CLI, Factor,
2775 NeedsUnrolledCLI ? &UnrolledCLI : nullptr);
2776 } else {
2777 OMPBuilder.unrollLoopHeuristic(DL, CLI);
2778 }
2779
2780 assert((!NeedsUnrolledCLI || UnrolledCLI) &&
2781 "NeedsUnrolledCLI implies UnrolledCLI to be set");
2782 if (UnrolledCLI)
2783 OMPLoopNestStack.push_back(UnrolledCLI);
2784
2785 return;
2786 }
2787
2788 // This function is only called if the unrolled loop is not consumed by any
2789 // other loop-associated construct. Such a loop-associated construct will have
2790 // used the transformed AST.
2791
2792 // Set the unroll metadata for the next emitted loop.
2793 LoopStack.setUnrollState(LoopAttributes::Enable);
2794
2795 if (S.hasClausesOfKind<OMPFullClause>()) {
2796 LoopStack.setUnrollState(LoopAttributes::Full);
2797 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2798 if (Expr *FactorExpr = PartialClause->getFactor()) {
2799 uint64_t Factor =
2800 FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2801 assert(Factor >= 1 && "Only positive factors are valid");
2802 LoopStack.setUnrollCount(Factor);
2803 }
2804 }
2805
2806 EmitStmt(S.getAssociatedStmt());
2807}
2808
2809void CodeGenFunction::EmitOMPOuterLoop(
2810 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2811 CodeGenFunction::OMPPrivateScope &LoopScope,
2812 const CodeGenFunction::OMPLoopArguments &LoopArgs,
2813 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2814 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2815 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2816
2817 const Expr *IVExpr = S.getIterationVariable();
2818 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2819 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2820
2821 JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
2822
2823 // Start the loop with a block that tests the condition.
2824 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
2825 EmitBlock(CondBlock);
2826 const SourceRange R = S.getSourceRange();
2827 OMPLoopNestStack.clear();
2828 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2829 SourceLocToDebugLoc(R.getEnd()));
2830
2831 llvm::Value *BoolCondVal = nullptr;
2832 if (!DynamicOrOrdered) {
2833 // UB = min(UB, GlobalUB) or
2834 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2835 // 'distribute parallel for')
2836 EmitIgnoredExpr(LoopArgs.EUB);
2837 // IV = LB
2838 EmitIgnoredExpr(LoopArgs.Init);
2839 // IV < UB
2840 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
2841 } else {
2842 BoolCondVal =
2843 RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
2844 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
2845 }
2846
2847 // If there are any cleanups between here and the loop-exit scope,
2848 // create a block to stage a loop exit along.
2849 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2850 if (LoopScope.requiresCleanups())
2851 ExitBlock = createBasicBlock("omp.dispatch.cleanup");
2852
2853 llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
2854 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
2855 if (ExitBlock != LoopExit.getBlock()) {
2856 EmitBlock(ExitBlock);
2857 EmitBranchThroughCleanup(LoopExit);
2858 }
2859 EmitBlock(LoopBody);
2860
2861 // Emit "IV = LB" (in case of static schedule, we have already calculated new
2862 // LB for loop condition and emitted it above).
2863 if (DynamicOrOrdered)
2864 EmitIgnoredExpr(LoopArgs.Init);
2865
2866 // Create a block for the increment.
2867 JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
2868 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
2869
2870 emitCommonSimdLoop(
2871 *this, S,
2872 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2873 // Generate !llvm.loop.parallel metadata for loads and stores for loops
2874 // with dynamic/guided scheduling and without ordered clause.
2875 if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2876 CGF.LoopStack.setParallel(!IsMonotonic);
2877 if (const auto *C = S.getSingleClause<OMPOrderClause>())
2878 if (C->getKind() == OMPC_ORDER_concurrent)
2879 CGF.LoopStack.setParallel(/*Enable=*/true);
2880 } else {
2881 CGF.EmitOMPSimdInit(S);
2882 }
2883 },
2884 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2885 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2886 SourceLocation Loc = S.getBeginLoc();
2887 // when 'distribute' is not combined with a 'for':
2888 // while (idx <= UB) { BODY; ++idx; }
2889 // when 'distribute' is combined with a 'for'
2890 // (e.g. 'distribute parallel for')
2891 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2892 CGF.EmitOMPInnerLoop(
2893 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
2894 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
2895 CodeGenLoop(CGF, S, LoopExit);
2896 },
2897 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
2898 CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
2899 });
2900 });
2901
2902 EmitBlock(Continue.getBlock());
2903 BreakContinueStack.pop_back();
2904 if (!DynamicOrOrdered) {
2905 // Emit "LB = LB + Stride", "UB = UB + Stride".
2906 EmitIgnoredExpr(LoopArgs.NextLB);
2907 EmitIgnoredExpr(LoopArgs.NextUB);
2908 }
2909
2910 EmitBranch(CondBlock);
2911 OMPLoopNestStack.clear();
2912 LoopStack.pop();
2913 // Emit the fall-through block.
2914 EmitBlock(LoopExit.getBlock());
2915
2916 // Tell the runtime we are done.
2917 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
2918 if (!DynamicOrOrdered)
2919 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2920 S.getDirectiveKind());
2921 };
2922 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2923}
2924
2925void CodeGenFunction::EmitOMPForOuterLoop(
2926 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
2927 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
2928 const OMPLoopArguments &LoopArgs,
2929 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2930 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2931
2932 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2933 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind.Schedule);
2934
2935 assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule,
2936 LoopArgs.Chunk != nullptr)) &&
2937 "static non-chunked schedule does not need outer loop");
2938
2939 // Emit outer loop.
2940 //
2941 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2942 // When schedule(dynamic,chunk_size) is specified, the iterations are
2943 // distributed to threads in the team in chunks as the threads request them.
2944 // Each thread executes a chunk of iterations, then requests another chunk,
2945 // until no chunks remain to be distributed. Each chunk contains chunk_size
2946 // iterations, except for the last chunk to be distributed, which may have
2947 // fewer iterations. When no chunk_size is specified, it defaults to 1.
2948 //
2949 // When schedule(guided,chunk_size) is specified, the iterations are assigned
2950 // to threads in the team in chunks as the executing threads request them.
2951 // Each thread executes a chunk of iterations, then requests another chunk,
2952 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2953 // each chunk is proportional to the number of unassigned iterations divided
2954 // by the number of threads in the team, decreasing to 1. For a chunk_size
2955 // with value k (greater than 1), the size of each chunk is determined in the
2956 // same way, with the restriction that the chunks do not contain fewer than k
2957 // iterations (except for the last chunk to be assigned, which may have fewer
2958 // than k iterations).
2959 //
2960 // When schedule(auto) is specified, the decision regarding scheduling is
2961 // delegated to the compiler and/or runtime system. The programmer gives the
2962 // implementation the freedom to choose any possible mapping of iterations to
2963 // threads in the team.
2964 //
2965 // When schedule(runtime) is specified, the decision regarding scheduling is
2966 // deferred until run time, and the schedule and chunk size are taken from the
2967 // run-sched-var ICV. If the ICV is set to auto, the schedule is
2968 // implementation defined
2969 //
2970 // while(__kmpc_dispatch_next(&LB, &UB)) {
2971 // idx = LB;
2972 // while (idx <= UB) { BODY; ++idx;
2973 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2974 // } // inner loop
2975 // }
2976 //
2977 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2978 // When schedule(static, chunk_size) is specified, iterations are divided into
2979 // chunks of size chunk_size, and the chunks are assigned to the threads in
2980 // the team in a round-robin fashion in the order of the thread number.
2981 //
2982 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2983 // while (idx <= UB) { BODY; ++idx; } // inner loop
2984 // LB = LB + ST;
2985 // UB = UB + ST;
2986 // }
2987 //
2988
2989 const Expr *IVExpr = S.getIterationVariable();
2990 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2991 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2992
2993 if (DynamicOrOrdered) {
2994 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
2995 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
2996 llvm::Value *LBVal = DispatchBounds.first;
2997 llvm::Value *UBVal = DispatchBounds.second;
2998 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
2999 LoopArgs.Chunk};
3000 RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
3001 IVSigned, Ordered, DipatchRTInputValues);
3002 } else {
3003 CGOpenMPRuntime::StaticRTInput StaticInit(
3004 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
3005 LoopArgs.ST, LoopArgs.Chunk);
3006 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
3007 ScheduleKind, StaticInit);
3008 }
3009
3010 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
3011 const unsigned IVSize,
3012 const bool IVSigned) {
3013 if (Ordered) {
3014 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
3015 IVSigned);
3016 }
3017 };
3018
3019 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
3020 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
3021 OuterLoopArgs.IncExpr = S.getInc();
3022 OuterLoopArgs.Init = S.getInit();
3023 OuterLoopArgs.Cond = S.getCond();
3024 OuterLoopArgs.NextLB = S.getNextLowerBound();
3025 OuterLoopArgs.NextUB = S.getNextUpperBound();
3026 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
3027 emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
3028}
3029
3030static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
3031 const unsigned IVSize, const bool IVSigned) {}
3032
3033void CodeGenFunction::EmitOMPDistributeOuterLoop(
3034 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
3035 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
3036 const CodeGenLoopTy &CodeGenLoopContent) {
3037
3038 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3039
3040 // Emit outer loop.
3041 // Same behavior as a OMPForOuterLoop, except that schedule cannot be
3042 // dynamic
3043 //
3044
3045 const Expr *IVExpr = S.getIterationVariable();
3046 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3047 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3048
3049 CGOpenMPRuntime::StaticRTInput StaticInit(
3050 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
3051 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
3052 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
3053
3054 // for combined 'distribute' and 'for' the increment expression of distribute
3055 // is stored in DistInc. For 'distribute' alone, it is in Inc.
3056 Expr *IncExpr;
3057 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
3058 IncExpr = S.getDistInc();
3059 else
3060 IncExpr = S.getInc();
3061
3062 // this routine is shared by 'omp distribute parallel for' and
3063 // 'omp distribute': select the right EUB expression depending on the
3064 // directive
3065 OMPLoopArguments OuterLoopArgs;
3066 OuterLoopArgs.LB = LoopArgs.LB;
3067 OuterLoopArgs.UB = LoopArgs.UB;
3068 OuterLoopArgs.ST = LoopArgs.ST;
3069 OuterLoopArgs.IL = LoopArgs.IL;
3070 OuterLoopArgs.Chunk = LoopArgs.Chunk;
3071 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3072 ? S.getCombinedEnsureUpperBound()
3073 : S.getEnsureUpperBound();
3074 OuterLoopArgs.IncExpr = IncExpr;
3075 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3076 ? S.getCombinedInit()
3077 : S.getInit();
3078 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3079 ? S.getCombinedCond()
3080 : S.getCond();
3081 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3082 ? S.getCombinedNextLowerBound()
3083 : S.getNextLowerBound();
3084 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3085 ? S.getCombinedNextUpperBound()
3086 : S.getNextUpperBound();
3087
3088 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
3089 LoopScope, OuterLoopArgs, CodeGenLoopContent,
3090 emitEmptyOrdered);
3091}
3092
3093static std::pair<LValue, LValue>
3094emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
3095 const OMPExecutableDirective &S) {
3096 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
3097 LValue LB =
3098 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3099 LValue UB =
3100 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3101
3102 // When composing 'distribute' with 'for' (e.g. as in 'distribute
3103 // parallel for') we need to use the 'distribute'
3104 // chunk lower and upper bounds rather than the whole loop iteration
3105 // space. These are parameters to the outlined function for 'parallel'
3106 // and we copy the bounds of the previous schedule into the
3107 // the current ones.
3108 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
3109 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
3110 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
3111 PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
3112 PrevLBVal = CGF.EmitScalarConversion(
3113 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
3114 LS.getIterationVariable()->getType(),
3115 LS.getPrevLowerBoundVariable()->getExprLoc());
3116 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
3117 PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
3118 PrevUBVal = CGF.EmitScalarConversion(
3119 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
3120 LS.getIterationVariable()->getType(),
3121 LS.getPrevUpperBoundVariable()->getExprLoc());
3122
3123 CGF.EmitStoreOfScalar(PrevLBVal, LB);
3124 CGF.EmitStoreOfScalar(PrevUBVal, UB);
3125
3126 return {LB, UB};
3127}
3128
3129/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
3130/// we need to use the LB and UB expressions generated by the worksharing
3131/// code generation support, whereas in non combined situations we would
3132/// just emit 0 and the LastIteration expression
3133/// This function is necessary due to the difference of the LB and UB
3134/// types for the RT emission routines for 'for_static_init' and
3135/// 'for_dispatch_init'
3136static std::pair<llvm::Value *, llvm::Value *>
3137emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
3138 const OMPExecutableDirective &S,
3139 Address LB, Address UB) {
3140 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
3141 const Expr *IVExpr = LS.getIterationVariable();
3142 // when implementing a dynamic schedule for a 'for' combined with a
3143 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3144 // is not normalized as each team only executes its own assigned
3145 // distribute chunk
3146 QualType IteratorTy = IVExpr->getType();
3147 llvm::Value *LBVal =
3148 CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3149 llvm::Value *UBVal =
3150 CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
3151 return {LBVal, UBVal};
3152}
3153
3154static void emitDistributeParallelForDistributeInnerBoundParams(
3155 CodeGenFunction &CGF, const OMPExecutableDirective &S,
3156 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
3157 const auto &Dir = cast<OMPLoopDirective>(S);
3158 LValue LB =
3159 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
3160 llvm::Value *LBCast =
3161 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)),
3162 CGF.SizeTy, /*isSigned=*/false);
3163 CapturedVars.push_back(LBCast);
3164 LValue UB =
3165 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
3166
3167 llvm::Value *UBCast =
3168 CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)),
3169 CGF.SizeTy, /*isSigned=*/false);
3170 CapturedVars.push_back(UBCast);
3171}
3172
3173static void
3174emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
3175 const OMPLoopDirective &S,
3176 CodeGenFunction::JumpDest LoopExit) {
3177 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
3178 PrePostActionTy &Action) {
3179 Action.Enter(CGF);
3180 bool HasCancel = false;
3181 if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
3182 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
3183 HasCancel = D->hasCancel();
3184 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
3185 HasCancel = D->hasCancel();
3186 else if (const auto *D =
3187 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
3188 HasCancel = D->hasCancel();
3189 }
3190 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3191 HasCancel);
3192 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
3193 emitDistributeParallelForInnerBounds,
3194 emitDistributeParallelForDispatchBounds);
3195 };
3196
3197 emitCommonOMPParallelDirective(
3198 CGF, S,
3199 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
3200 CGInlinedWorksharingLoop,
3201 emitDistributeParallelForDistributeInnerBoundParams);
3202}
3203
3204void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3205 const OMPDistributeParallelForDirective &S) {
3206 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3207 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3208 S.getDistInc());
3209 };
3210 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3211 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3212}
3213
3214void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3215 const OMPDistributeParallelForSimdDirective &S) {
3216 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3217 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
3218 S.getDistInc());
3219 };
3220 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3221 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3222}
3223
3224void CodeGenFunction::EmitOMPDistributeSimdDirective(
3225 const OMPDistributeSimdDirective &S) {
3226 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3227 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3228 };
3229 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3230 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3231}
3232
3233void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3234 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
3235 // Emit SPMD target parallel for region as a standalone region.
3236 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3237 emitOMPSimdRegion(CGF, S, Action);
3238 };
3239 llvm::Function *Fn;
3240 llvm::Constant *Addr;
3241 // Emit target region as a standalone region.
3242 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3243 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3244 assert(Fn && Addr && "Target device function emission failed.");
3245}
3246
3247void CodeGenFunction::EmitOMPTargetSimdDirective(
3248 const OMPTargetSimdDirective &S) {
3249 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3250 emitOMPSimdRegion(CGF, S, Action);
3251 };
3252 emitCommonOMPTargetDirective(*this, S, CodeGen);
3253}
3254
3255namespace {
3256struct ScheduleKindModifiersTy {
3257 OpenMPScheduleClauseKind Kind;
3258 OpenMPScheduleClauseModifier M1;
3259 OpenMPScheduleClauseModifier M2;
3260 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3261 OpenMPScheduleClauseModifier M1,
3262 OpenMPScheduleClauseModifier M2)
3263 : Kind(Kind), M1(M1), M2(M2) {}
3264};
3265} // namespace
3266
3267bool CodeGenFunction::EmitOMPWorksharingLoop(
3268 const OMPLoopDirective &S, Expr *EUB,
3269 const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3270 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3271 // Emit the loop iteration variable.
3272 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3273 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
3274 EmitVarDecl(*IVDecl);
3275
3276 // Emit the iterations count variable.
3277 // If it is not a variable, Sema decided to calculate iterations count on each
3278 // iteration (e.g., it is foldable into a constant).
3279 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3280 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3281 // Emit calculation of the iterations count.
3282 EmitIgnoredExpr(S.getCalcLastIteration());
3283 }
3284
3285 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3286
3287 bool HasLastprivateClause;
3288 // Check pre-condition.
3289 {
3290 OMPLoopScope PreInitScope(*this, S);
3291 // Skip the entire loop if we don't meet the precondition.
3292 // If the condition constant folds and can be elided, avoid emitting the
3293 // whole loop.
3294 bool CondConstant;
3295 llvm::BasicBlock *ContBlock = nullptr;
3296 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3297 if (!CondConstant)
3298 return false;
3299 } else {
3300 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
3301 ContBlock = createBasicBlock("omp.precond.end");
3302 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3303 getProfileCount(&S));
3304 EmitBlock(ThenBlock);
3305 incrementProfileCounter(&S);
3306 }
3307
3308 RunCleanupsScope DoacrossCleanupScope(*this);
3309 bool Ordered = false;
3310 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3311 if (OrderedClause->getNumForLoops())
3312 RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
3313 else
3314 Ordered = true;
3315 }
3316
3317 llvm::DenseSet<const Expr *> EmittedFinals;
3318 emitAlignedClause(*this, S);
3319 bool HasLinears = EmitOMPLinearClauseInit(S);
3320 // Emit helper vars inits.
3321
3322 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3323 LValue LB = Bounds.first;
3324 LValue UB = Bounds.second;
3325 LValue ST =
3326 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3327 LValue IL =
3328 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3329
3330 // Emit 'then' code.
3331 {
3332 OMPPrivateScope LoopScope(*this);
3333 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
3334 // Emit implicit barrier to synchronize threads and avoid data races on
3335 // initialization of firstprivate variables and post-update of
3336 // lastprivate variables.
3337 CGM.getOpenMPRuntime().emitBarrierCall(
3338 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3339 /*ForceSimpleCall=*/true);
3340 }
3341 EmitOMPPrivateClause(S, LoopScope);
3342 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
3343 *this, S, EmitLValue(S.getIterationVariable()));
3344 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3345 EmitOMPReductionClauseInit(S, LoopScope);
3346 EmitOMPPrivateLoopCounters(S, LoopScope);
3347 EmitOMPLinearClause(S, LoopScope);
3348 (void)LoopScope.Privatize();
3349 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3350 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
3351
3352 // Detect the loop schedule kind and chunk.
3353 const Expr *ChunkExpr = nullptr;
3354 OpenMPScheduleTy ScheduleKind;
3355 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3356 ScheduleKind.Schedule = C->getScheduleKind();
3357 ScheduleKind.M1 = C->getFirstScheduleModifier();
3358 ScheduleKind.M2 = C->getSecondScheduleModifier();
3359 ChunkExpr = C->getChunkSize();
3360 } else {
3361 // Default behaviour for schedule clause.
3362 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3363 *this, S, ScheduleKind.Schedule, ChunkExpr);
3364 }
3365 bool HasChunkSizeOne = false;
3366 llvm::Value *Chunk = nullptr;
3367 if (ChunkExpr) {
3368 Chunk = EmitScalarExpr(ChunkExpr);
3369 Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
3370 S.getIterationVariable()->getType(),
3371 S.getBeginLoc());
3372 Expr::EvalResult Result;
3373 if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
3374 llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3375 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3376 }
3377 }
3378 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3379 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3380 // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3381 // If the static schedule kind is specified or if the ordered clause is
3382 // specified, and if no monotonic modifier is specified, the effect will
3383 // be as if the monotonic modifier was specified.
3384 bool StaticChunkedOne =
3385 RT.isStaticChunked(ScheduleKind.Schedule,
3386 /* Chunked */ Chunk != nullptr) &&
3387 HasChunkSizeOne &&
3388 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
3389 bool IsMonotonic =
3390 Ordered ||
3391 (ScheduleKind.Schedule == OMPC_SCHEDULE_static &&
3392 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
3393 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
3394 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
3395 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
3396 if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
3397 /* Chunked */ Chunk != nullptr) ||
3398 StaticChunkedOne) &&
3399 !Ordered) {
3400 JumpDest LoopExit =
3401 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3402 emitCommonSimdLoop(
3403 *this, S,
3404 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3405 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3406 CGF.EmitOMPSimdInit(S);
3407 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3408 if (C->getKind() == OMPC_ORDER_concurrent)
3409 CGF.LoopStack.setParallel(/*Enable=*/true);
3410 }
3411 },
3412 [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3413 &S, ScheduleKind, LoopExit,
3414 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3415 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3416 // When no chunk_size is specified, the iteration space is divided
3417 // into chunks that are approximately equal in size, and at most
3418 // one chunk is distributed to each thread. Note that the size of
3419 // the chunks is unspecified in this case.
3420 CGOpenMPRuntime::StaticRTInput StaticInit(
3421 IVSize, IVSigned, Ordered, IL.getAddress(CGF),
3422 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF),
3423 StaticChunkedOne ? Chunk : nullptr);
3424 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3425 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind,
3426 StaticInit);
3427 // UB = min(UB, GlobalUB);
3428 if (!StaticChunkedOne)
3429 CGF.EmitIgnoredExpr(S.getEnsureUpperBound());
3430 // IV = LB;
3431 CGF.EmitIgnoredExpr(S.getInit());
3432 // For unchunked static schedule generate:
3433 //
3434 // while (idx <= UB) {
3435 // BODY;
3436 // ++idx;
3437 // }
3438 //
3439 // For static schedule with chunk one:
3440 //
3441 // while (IV <= PrevUB) {
3442 // BODY;
3443 // IV += ST;
3444 // }
3445 CGF.EmitOMPInnerLoop(
3446 S, LoopScope.requiresCleanups(),
3447 StaticChunkedOne ? S.getCombinedParForInDistCond()
3448 : S.getCond(),
3449 StaticChunkedOne ? S.getDistInc() : S.getInc(),
3450 [&S, LoopExit](CodeGenFunction &CGF) {
3451 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3452 },
3453 [](CodeGenFunction &) {});
3454 });
3455 EmitBlock(LoopExit.getBlock());
3456 // Tell the runtime we are done.
3457 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3458 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3459 S.getDirectiveKind());
3460 };
3461 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
3462 } else {
3463 // Emit the outer loop, which requests its work chunk [LB..UB] from
3464 // runtime and runs the inner loop to process it.
3465 const OMPLoopArguments LoopArguments(
3466 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
3467 IL.getAddress(*this), Chunk, EUB);
3468 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3469 LoopArguments, CGDispatchBounds);
3470 }
3471 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3472 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3473 return CGF.Builder.CreateIsNotNull(
3474 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3475 });
3476 }
3477 EmitOMPReductionClauseFinal(
3478 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
3479 ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3480 : /*Parallel only*/ OMPD_parallel);
3481 // Emit post-update of the reduction variables if IsLastIter != 0.
3482 emitPostUpdateForReductionClause(
3483 *this, S, [IL, &S](CodeGenFunction &CGF) {
3484 return CGF.Builder.CreateIsNotNull(
3485 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3486 });
3487 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3488 if (HasLastprivateClause)
3489 EmitOMPLastprivateClauseFinal(
3490 S, isOpenMPSimdDirective(S.getDirectiveKind()),
3491 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
3492 LoopScope.restoreMap();
3493 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
3494 return CGF.Builder.CreateIsNotNull(
3495 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3496 });
3497 }
3498 DoacrossCleanupScope.ForceCleanup();
3499 // We're now done with the loop, so jump to the continuation block.
3500 if (ContBlock) {
3501 EmitBranch(ContBlock);
3502 EmitBlock(ContBlock, /*IsFinished=*/true);
3503 }
3504 }
3505 return HasLastprivateClause;
3506}
3507
3508/// The following two functions generate expressions for the loop lower
3509/// and upper bounds in case of static and dynamic (dispatch) schedule
3510/// of the associated 'for' or 'distribute' loop.
3511static std::pair<LValue, LValue>
3512emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3513 const auto &LS = cast<OMPLoopDirective>(S);
3514 LValue LB =
3515 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
3516 LValue UB =
3517 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
3518 return {LB, UB};
3519}
3520
3521/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3522/// consider the lower and upper bound expressions generated by the
3523/// worksharing loop support, but we use 0 and the iteration space size as
3524/// constants
3525static std::pair<llvm::Value *, llvm::Value *>
3526emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
3527 Address LB, Address UB) {
3528 const auto &LS = cast<OMPLoopDirective>(S);
3529 const Expr *IVExpr = LS.getIterationVariable();
3530 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
3531 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
3532 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
3533 return {LBVal, UBVal};
3534}
3535
3536/// Emits internal temp array declarations for the directive with inscan
3537/// reductions.
3538/// The code is the following:
3539/// \code
3540/// size num_iters = <num_iters>;
3541/// <type> buffer[num_iters];
3542/// \endcode
3543static void emitScanBasedDirectiveDecls(
3544 CodeGenFunction &CGF, const OMPLoopDirective &S,
3545 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3546 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3547 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3548 SmallVector<const Expr *, 4> Shareds;
3549 SmallVector<const Expr *, 4> Privates;
3550 SmallVector<const Expr *, 4> ReductionOps;
3551 SmallVector<const Expr *, 4> CopyArrayTemps;
3552 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3553 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3554 "Only inscan reductions are expected.");
3555 Shareds.append(C->varlist_begin(), C->varlist_end());
3556 Privates.append(C->privates().begin(), C->privates().end());
3557 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3558 CopyArrayTemps.append(C->copy_array_temps().begin(),
3559 C->copy_array_temps().end());
3560 }
3561 {
3562 // Emit buffers for each reduction variables.
3563 // ReductionCodeGen is required to emit correctly the code for array
3564 // reductions.
3565 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3566 unsigned Count = 0;
3567 auto *ITA = CopyArrayTemps.begin();
3568 for (const Expr *IRef : Privates) {
3569 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
3570 // Emit variably modified arrays, used for arrays/array sections
3571 // reductions.
3572 if (PrivateVD->getType()->isVariablyModifiedType()) {
3573 RedCG.emitSharedOrigLValue(CGF, Count);
3574 RedCG.emitAggregateType(CGF, Count);
3575 }
3576 CodeGenFunction::OpaqueValueMapping DimMapping(
3577 CGF,
3578 cast<OpaqueValueExpr>(
3579 cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
3580 ->getSizeExpr()),
3581 RValue::get(OMPScanNumIterations));
3582 // Emit temp buffer.
3583 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
3584 ++ITA;
3585 ++Count;
3586 }
3587 }
3588}
3589
3590/// Copies final inscan reductions values to the original variables.
3591/// The code is the following:
3592/// \code
3593/// <orig_var> = buffer[num_iters-1];
3594/// \endcode
3595static void emitScanBasedDirectiveFinals(
3596 CodeGenFunction &CGF, const OMPLoopDirective &S,
3597 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3598 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3599 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3600 SmallVector<const Expr *, 4> Shareds;
3601 SmallVector<const Expr *, 4> LHSs;
3602 SmallVector<const Expr *, 4> RHSs;
3603 SmallVector<const Expr *, 4> Privates;
3604 SmallVector<const Expr *, 4> CopyOps;
3605 SmallVector<const Expr *, 4> CopyArrayElems;
3606 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3607 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3608 "Only inscan reductions are expected.");
3609 Shareds.append(C->varlist_begin(), C->varlist_end());
3610 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3611 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3612 Privates.append(C->privates().begin(), C->privates().end());
3613 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
3614 CopyArrayElems.append(C->copy_array_elems().begin(),
3615 C->copy_array_elems().end());
3616 }
3617 // Create temp var and copy LHS value to this temp value.
3618 // LHS = TMP[LastIter];
3619 llvm::Value *OMPLast = CGF.Builder.CreateNSWSub(
3620 OMPScanNumIterations,
3621 llvm::ConstantInt::get(CGF.SizeTy, 1, /*isSigned=*/false));
3622 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
3623 const Expr *PrivateExpr = Privates[I];
3624 const Expr *OrigExpr = Shareds[I];
3625 const Expr *CopyArrayElem = CopyArrayElems[I];
3626 CodeGenFunction::OpaqueValueMapping IdxMapping(
3627 CGF,
3628 cast<OpaqueValueExpr>(
3629 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3630 RValue::get(OMPLast));
3631 LValue DestLVal = CGF.EmitLValue(OrigExpr);
3632 LValue SrcLVal = CGF.EmitLValue(CopyArrayElem);
3633 CGF.EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(CGF),
3634 SrcLVal.getAddress(CGF),
3635 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
3636 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
3637 CopyOps[I]);
3638 }
3639}
3640
3641/// Emits the code for the directive with inscan reductions.
3642/// The code is the following:
3643/// \code
3644/// #pragma omp ...
3645/// for (i: 0..<num_iters>) {
3646/// <input phase>;
3647/// buffer[i] = red;
3648/// }
3649/// #pragma omp master // in parallel region
3650/// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3651/// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3652/// buffer[i] op= buffer[i-pow(2,k)];
3653/// #pragma omp barrier // in parallel region
3654/// #pragma omp ...
3655/// for (0..<num_iters>) {
3656/// red = InclusiveScan ? buffer[i] : buffer[i-1];
3657/// <scan phase>;
3658/// }
3659/// \endcode
3660static void emitScanBasedDirective(
3661 CodeGenFunction &CGF, const OMPLoopDirective &S,
3662 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3663 llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3664 llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3665 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3666 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3667 SmallVector<const Expr *, 4> Privates;
3668 SmallVector<const Expr *, 4> ReductionOps;
3669 SmallVector<const Expr *, 4> LHSs;
3670 SmallVector<const Expr *, 4> RHSs;
3671 SmallVector<const Expr *, 4> CopyArrayElems;
3672 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3673 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3674 "Only inscan reductions are expected.");
3675 Privates.append(C->privates().begin(), C->privates().end());
3676 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3677 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3678 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3679 CopyArrayElems.append(C->copy_array_elems().begin(),
3680 C->copy_array_elems().end());
3681 }
3682 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3683 {
3684 // Emit loop with input phase:
3685 // #pragma omp ...
3686 // for (i: 0..<num_iters>) {
3687 // <input phase>;
3688 // buffer[i] = red;
3689 // }
3690 CGF.OMPFirstScanLoop = true;
3691 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3692 FirstGen(CGF);
3693 }
3694 // #pragma omp barrier // in parallel region
3695 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
3696 &ReductionOps,
3697 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
3698 Action.Enter(CGF);
3699 // Emit prefix reduction:
3700 // #pragma omp master // in parallel region
3701 // for (int k = 0; k <= ceil(log2(n)); ++k)
3702 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3703 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
3704 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
3705 llvm::Function *F =
3706 CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
3707 llvm::Value *Arg =
3708 CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
3709 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
3710 F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
3711 LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
3712 LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
3713 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3714 OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
3715 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
3716 CGF.EmitBlock(LoopBB);
3717 auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
3718 // size pow2k = 1;
3719 auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3720 Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
3721 Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
3722 // for (size i = n - 1; i >= 2 ^ k; --i)
3723 // tmp[i] op= tmp[i-pow2k];
3724 llvm::BasicBlock *InnerLoopBB =
3725 CGF.createBasicBlock("omp.inner.log.scan.body");
3726 llvm::BasicBlock *InnerExitBB =
3727 CGF.createBasicBlock("omp.inner.log.scan.exit");
3728 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
3729 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3730 CGF.EmitBlock(InnerLoopBB);
3731 auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3732 IVal->addIncoming(NMin1, LoopBB);
3733 {
3734 CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3735 auto *ILHS = LHSs.begin();
3736 auto *IRHS = RHSs.begin();
3737 for (const Expr *CopyArrayElem : CopyArrayElems) {
3738 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3739 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3740 Address LHSAddr = Address::invalid();
3741 {
3742 CodeGenFunction::OpaqueValueMapping IdxMapping(
3743 CGF,
3744 cast<OpaqueValueExpr>(
3745 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3746 RValue::get(IVal));
3747 LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3748 }
3749 PrivScope.addPrivate(LHSVD, LHSAddr);
3750 Address RHSAddr = Address::invalid();
3751 {
3752 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
3753 CodeGenFunction::OpaqueValueMapping IdxMapping(
3754 CGF,
3755 cast<OpaqueValueExpr>(
3756 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3757 RValue::get(OffsetIVal));
3758 RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3759 }
3760 PrivScope.addPrivate(RHSVD, RHSAddr);
3761 ++ILHS;
3762 ++IRHS;
3763 }
3764 PrivScope.Privatize();
3765 CGF.CGM.getOpenMPRuntime().emitReduction(
3766 CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
3767 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
3768 }
3769 llvm::Value *NextIVal =
3770 CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
3771 IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
3772 CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
3773 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3774 CGF.EmitBlock(InnerExitBB);
3775 llvm::Value *Next =
3776 CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
3777 Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
3778 // pow2k <<= 1;
3779 llvm::Value *NextPow2K =
3780 CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
3781 Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
3782 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
3783 CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
3784 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
3785 CGF.EmitBlock(ExitBB);
3786 };
3787 if (isOpenMPParallelDirective(S.getDirectiveKind())) {
3788 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3789 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3790 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3791 /*ForceSimpleCall=*/true);
3792 } else {
3793 RegionCodeGenTy RCG(CodeGen);
3794 RCG(CGF);
3795 }
3796
3797 CGF.OMPFirstScanLoop = false;
3798 SecondGen(CGF);
3799}
3800
3801static bool emitWorksharingDirective(CodeGenFunction &CGF,
3802 const OMPLoopDirective &S,
3803 bool HasCancel) {
3804 bool HasLastprivates;
3805 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
3806 [](const OMPReductionClause *C) {
3807 return C->getModifier() == OMPC_REDUCTION_inscan;
3808 })) {
3809 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3810 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3811 OMPLoopScope LoopScope(CGF, S);
3812 return CGF.EmitScalarExpr(S.getNumIterations());
3813 };
3814 const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
3815 CodeGenFunction::OMPCancelStackRAII CancelRegion(
3816 CGF, S.getDirectiveKind(), HasCancel);
3817 (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3818 emitForLoopBounds,
3819 emitDispatchForLoopBounds);
3820 // Emit an implicit barrier at the end.
3821 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
3822 OMPD_for);
3823 };
3824 const auto &&SecondGen = [&S, HasCancel,
3825 &HasLastprivates](CodeGenFunction &CGF) {
3826 CodeGenFunction::OMPCancelStackRAII CancelRegion(
3827 CGF, S.getDirectiveKind(), HasCancel);
3828 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3829 emitForLoopBounds,
3830 emitDispatchForLoopBounds);
3831 };
3832 if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3833 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
3834 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3835 if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3836 emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen);
3837 } else {
3838 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3839 HasCancel);
3840 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3841 emitForLoopBounds,
3842 emitDispatchForLoopBounds);
3843 }
3844 return HasLastprivates;
3845}
3846
3847static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) {
3848 if (S.hasCancel())
3849 return false;
3850 for (OMPClause *C : S.clauses()) {
3851 if (isa<OMPNowaitClause>(C))
3852 continue;
3853
3854 if (auto *SC = dyn_cast<OMPScheduleClause>(C)) {
3855 if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
3856 return false;
3857 if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
3858 return false;
3859 switch (SC->getScheduleKind()) {
3860 case OMPC_SCHEDULE_auto:
3861 case OMPC_SCHEDULE_dynamic:
3862 case OMPC_SCHEDULE_runtime:
3863 case OMPC_SCHEDULE_guided:
3864 case OMPC_SCHEDULE_static:
3865 continue;
3866 case OMPC_SCHEDULE_unknown:
3867 return false;
3868 }
3869 }
3870
3871 return false;
3872 }
3873
3874 return true;
3875}
3876
3877static llvm::omp::ScheduleKind
3878convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) {
3879 switch (ScheduleClauseKind) {
3880 case OMPC_SCHEDULE_unknown:
3881 return llvm::omp::OMP_SCHEDULE_Default;
3882 case OMPC_SCHEDULE_auto:
3883 return llvm::omp::OMP_SCHEDULE_Auto;
3884 case OMPC_SCHEDULE_dynamic:
3885 return llvm::omp::OMP_SCHEDULE_Dynamic;
3886 case OMPC_SCHEDULE_guided:
3887 return llvm::omp::OMP_SCHEDULE_Guided;
3888 case OMPC_SCHEDULE_runtime:
3889 return llvm::omp::OMP_SCHEDULE_Runtime;
3890 case OMPC_SCHEDULE_static:
3891 return llvm::omp::OMP_SCHEDULE_Static;
3892 }
3893 llvm_unreachable("Unhandled schedule kind");
3894}
3895
3896void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
3897 bool HasLastprivates = false;
3898 bool UseOMPIRBuilder =
3899 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
3900 auto &&CodeGen = [this, &S, &HasLastprivates,
3901 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
3902 // Use the OpenMPIRBuilder if enabled.
3903 if (UseOMPIRBuilder) {
3904 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
3905
3906 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default;
3907 llvm::Value *ChunkSize = nullptr;
3908 if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) {
3909 SchedKind =
3910 convertClauseKindToSchedKind(SchedClause->getScheduleKind());
3911 if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize())
3912 ChunkSize = EmitScalarExpr(ChunkSizeExpr);
3913 }
3914
3915 // Emit the associated statement and get its loop representation.
3916 const Stmt *Inner = S.getRawStmt();
3917 llvm::CanonicalLoopInfo *CLI =
3918 EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
3919
3920 llvm::OpenMPIRBuilder &OMPBuilder =
3921 CGM.getOpenMPRuntime().getOMPBuilder();
3922 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3923 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3924 OMPBuilder.applyWorkshareLoop(
3925 Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier,
3926 SchedKind, ChunkSize, /*HasSimdModifier=*/false,
3927 /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
3928 /*HasOrderedClause=*/false);
3929 return;
3930 }
3931
3932 HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
3933 };
3934 {
3935 auto LPCRegion =
3936 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3937 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3938 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
3939 S.hasCancel());
3940 }
3941
3942 if (!UseOMPIRBuilder) {
3943 // Emit an implicit barrier at the end.
3944 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3945 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3946 }
3947 // Check for outer lastprivate conditional update.
3948 checkForLastprivateConditionalUpdate(*this, S);
3949}
3950
3951void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
3952 bool HasLastprivates = false;
3953 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3954 PrePostActionTy &) {
3955 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3956 };
3957 {
3958 auto LPCRegion =
3959 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3960 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3961 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3962 }
3963
3964 // Emit an implicit barrier at the end.
3965 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3966 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3967 // Check for outer lastprivate conditional update.
3968 checkForLastprivateConditionalUpdate(*this, S);
3969}
3970
3971static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
3972 const Twine &Name,
3973 llvm::Value *Init = nullptr) {
3974 LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
3975 if (Init)
3976 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
3977 return LVal;
3978}
3979
3980void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
3981 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3982 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3983 bool HasLastprivates = false;
3984 auto &&CodeGen = [&S, CapturedStmt, CS,
3985 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
3986 const ASTContext &C = CGF.getContext();
3987 QualType KmpInt32Ty =
3988 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3989 // Emit helper vars inits.
3990 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
3991 CGF.Builder.getInt32(0));
3992 llvm::ConstantInt *GlobalUBVal = CS != nullptr
3993 ? CGF.Builder.getInt32(CS->size() - 1)
3994 : CGF.Builder.getInt32(0);
3995 LValue UB =
3996 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
3997 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
3998 CGF.Builder.getInt32(1));
3999 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
4000 CGF.Builder.getInt32(0));
4001 // Loop counter.
4002 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
4003 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4004 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
4005 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4006 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
4007 // Generate condition for loop.
4008 BinaryOperator *Cond = BinaryOperator::Create(
4009 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary,
4010 S.getBeginLoc(), FPOptionsOverride());
4011 // Increment for loop counter.
4012 UnaryOperator *Inc = UnaryOperator::Create(
4013 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary,
4014 S.getBeginLoc(), true, FPOptionsOverride());
4015 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
4016 // Iterate through all sections and emit a switch construct:
4017 // switch (IV) {
4018 // case 0:
4019 // <SectionStmt[0]>;
4020 // break;
4021 // ...
4022 // case <NumSection> - 1:
4023 // <SectionStmt[<NumSection> - 1]>;
4024 // break;
4025 // }
4026 // .omp.sections.exit:
4027 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
4028 llvm::SwitchInst *SwitchStmt =
4029 CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
4030 ExitBB, CS == nullptr ? 1 : CS->size());
4031 if (CS) {
4032 unsigned CaseNumber = 0;
4033 for (const Stmt *SubStmt : CS->children()) {
4034 auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
4035 CGF.EmitBlock(CaseBB);
4036 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
4037 CGF.EmitStmt(SubStmt);
4038 CGF.EmitBranch(ExitBB);
4039 ++CaseNumber;
4040 }
4041 } else {
4042 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
4043 CGF.EmitBlock(CaseBB);
4044 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
4045 CGF.EmitStmt(CapturedStmt);
4046 CGF.EmitBranch(ExitBB);
4047 }
4048 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
4049 };
4050
4051 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
4052 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
4053 // Emit implicit barrier to synchronize threads and avoid data races on
4054 // initialization of firstprivate variables and post-update of lastprivate
4055 // variables.
4056 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4057 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
4058 /*ForceSimpleCall=*/true);
4059 }
4060 CGF.EmitOMPPrivateClause(S, LoopScope);
4061 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
4062 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4063 CGF.EmitOMPReductionClauseInit(S, LoopScope);
4064 (void)LoopScope.Privatize();
4065 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4066 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
4067
4068 // Emit static non-chunked loop.
4069 OpenMPScheduleTy ScheduleKind;
4070 ScheduleKind.Schedule = OMPC_SCHEDULE_static;
4071 CGOpenMPRuntime::StaticRTInput StaticInit(
4072 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF),
4073 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF));
4074 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
4075 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
4076 // UB = min(UB, GlobalUB);
4077 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
4078 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
4079 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
4080 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
4081 // IV = LB;
4082 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
4083 // while (idx <= UB) { BODY; ++idx; }
4084 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
4085 [](CodeGenFunction &) {});
4086 // Tell the runtime we are done.
4087 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
4088 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
4089 S.getDirectiveKind());
4090 };
4091 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
4092 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4093 // Emit post-update of the reduction variables if IsLastIter != 0.
4094 emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
4095 return CGF.Builder.CreateIsNotNull(
4096 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
4097 });
4098
4099 // Emit final copy of the lastprivate variables if IsLastIter != 0.
4100 if (HasLastprivates)
4101 CGF.EmitOMPLastprivateClauseFinal(
4102 S, /*NoFinals=*/false,
4103 CGF.Builder.CreateIsNotNull(
4104 CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
4105 };
4106
4107 bool HasCancel = false;
4108 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
4109 HasCancel = OSD->hasCancel();
4110 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
4111 HasCancel = OPSD->hasCancel();
4112 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
4113 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
4114 HasCancel);
4115 // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
4116 // clause. Otherwise the barrier will be generated by the codegen for the
4117 // directive.
4118 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
4119 // Emit implicit barrier to synchronize threads and avoid data races on
4120 // initialization of firstprivate variables.
4121 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
4122 OMPD_unknown);
4123 }
4124}
4125
4126void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
4127 if (CGM.getLangOpts().OpenMPIRBuilder) {
4128 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4129 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4130 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
4131
4132 auto FiniCB = [this](InsertPointTy IP) {
4133 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4134 };
4135
4136 const CapturedStmt *ICS = S.getInnermostCapturedStmt();
4137 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4138 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
4139 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
4140 if (CS) {
4141 for (const Stmt *SubStmt : CS->children()) {
4142 auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
4143 InsertPointTy CodeGenIP) {
4144 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4145 *this, SubStmt, AllocaIP, CodeGenIP, "section");
4146 };
4147 SectionCBVector.push_back(SectionCB);
4148 }
4149 } else {
4150 auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
4151 InsertPointTy CodeGenIP) {
4152 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4153 *this, CapturedStmt, AllocaIP, CodeGenIP, "section");
4154 };
4155 SectionCBVector.push_back(SectionCB);
4156 }
4157
4158 // Privatization callback that performs appropriate action for
4159 // shared/private/firstprivate/lastprivate/copyin/... variables.
4160 //
4161 // TODO: This defaults to shared right now.
4162 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
4163 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
4164 // The next line is appropriate only for variables (Val) with the
4165 // data-sharing attribute "shared".
4166 ReplVal = &Val;
4167
4168 return CodeGenIP;
4169 };
4170
4171 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
4172 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
4173 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4174 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
4175 Builder.restoreIP(OMPBuilder.createSections(
4176 Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(),
4177 S.getSingleClause<OMPNowaitClause>()));
4178 return;
4179 }
4180 {
4181 auto LPCRegion =
4182 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4183 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4184 EmitSections(S);
4185 }
4186 // Emit an implicit barrier at the end.
4187 if (!S.getSingleClause<OMPNowaitClause>()) {
4188 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
4189 OMPD_sections);
4190 }
4191 // Check for outer lastprivate conditional update.
4192 checkForLastprivateConditionalUpdate(*this, S);
4193}
4194
4195void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
4196 if (CGM.getLangOpts().OpenMPIRBuilder) {
4197 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4198 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4199
4200 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
4201 auto FiniCB = [this](InsertPointTy IP) {
4202 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4203 };
4204
4205 auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
4206 InsertPointTy CodeGenIP) {
4207 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4208 *this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section");
4209 };
4210
4211 LexicalScope Scope(*this, S.getSourceRange());
4212 EmitStopPoint(&S);
4213 Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB));
4214
4215 return;
4216 }
4217 LexicalScope Scope(*this, S.getSourceRange());
4218 EmitStopPoint(&S);
4219 EmitStmt(S.getAssociatedStmt());
4220}
4221
4222void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
4223 llvm::SmallVector<const Expr *, 8> CopyprivateVars;
4224 llvm::SmallVector<const Expr *, 8> DestExprs;
4225 llvm::SmallVector<const Expr *, 8> SrcExprs;
4226 llvm::SmallVector<const Expr *, 8> AssignmentOps;
4227 // Check if there are any 'copyprivate' clauses associated with this
4228 // 'single' construct.
4229 // Build a list of copyprivate variables along with helper expressions
4230 // (<source>, <destination>, <destination>=<source> expressions)
4231 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
4232 CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
4233 DestExprs.append(C->destination_exprs().begin(),
4234 C->destination_exprs().end());
4235 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
4236 AssignmentOps.append(C->assignment_ops().begin(),
4237 C->assignment_ops().end());
4238 }
4239 // Emit code for 'single' region along with 'copyprivate' clauses
4240 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4241 Action.Enter(CGF);
4242 OMPPrivateScope SingleScope(CGF);
4243 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
4244 CGF.EmitOMPPrivateClause(S, SingleScope);
4245 (void)SingleScope.Privatize();
4246 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
4247 };
4248 {
4249 auto LPCRegion =
4250 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4251 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4252 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
4253 CopyprivateVars, DestExprs,
4254 SrcExprs, AssignmentOps);
4255 }
4256 // Emit an implicit barrier at the end (to avoid data race on firstprivate
4257 // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4258 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
4259 CGM.getOpenMPRuntime().emitBarrierCall(
4260 *this, S.getBeginLoc(),
4261 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
4262 }
4263 // Check for outer lastprivate conditional update.
4264 checkForLastprivateConditionalUpdate(*this, S);
4265}
4266
4267static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4268 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4269 Action.Enter(CGF);
4270 CGF.EmitStmt(S.getRawStmt());
4271 };
4272 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
4273}
4274
4275void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
4276 if (CGM.getLangOpts().OpenMPIRBuilder) {
4277 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4278 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4279
4280 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
4281
4282 auto FiniCB = [this](InsertPointTy IP) {
4283 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4284 };
4285
4286 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
4287 InsertPointTy CodeGenIP) {
4288 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4289 *this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master");
4290 };
4291
4292 LexicalScope Scope(*this, S.getSourceRange());
4293 EmitStopPoint(&S);
4294 Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
4295
4296 return;
4297 }
4298 LexicalScope Scope(*this, S.getSourceRange());
4299 EmitStopPoint(&S);
4300 emitMaster(*this, S);
4301}
4302
4303static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4304 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4305 Action.Enter(CGF);
4306 CGF.EmitStmt(S.getRawStmt());
4307 };
4308 Expr *Filter = nullptr;
4309 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4310 Filter = FilterClause->getThreadID();
4311 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(),
4312 Filter);
4313}
4314
4315void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
4316 if (CGM.getLangOpts().OpenMPIRBuilder) {
4317 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4318 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4319
4320 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
4321 const Expr *Filter = nullptr;
4322 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4323 Filter = FilterClause->getThreadID();
4324 llvm::Value *FilterVal = Filter
4325 ? EmitScalarExpr(Filter, CGM.Int32Ty)
4326 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
4327
4328 auto FiniCB = [this](InsertPointTy IP) {
4329 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4330 };
4331
4332 auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
4333 InsertPointTy CodeGenIP) {
4334 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4335 *this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked");
4336 };
4337
4338 LexicalScope Scope(*this, S.getSourceRange());
4339 EmitStopPoint(&S);
4340 Builder.restoreIP(
4341 OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal));
4342
4343 return;
4344 }
4345 LexicalScope Scope(*this, S.getSourceRange());
4346 EmitStopPoint(&S);
4347 emitMasked(*this, S);
4348}
4349
4350void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
4351 if (CGM.getLangOpts().OpenMPIRBuilder) {
4352 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4353 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4354
4355 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
4356 const Expr *Hint = nullptr;
4357 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4358 Hint = HintClause->getHint();
4359
4360 // TODO: This is slightly different from what's currently being done in
4361 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4362 // about typing is final.
4363 llvm::Value *HintInst = nullptr;
4364 if (Hint)
4365 HintInst =
4366 Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
4367
4368 auto FiniCB = [this](InsertPointTy IP) {
4369 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4370 };
4371
4372 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
4373 InsertPointTy CodeGenIP) {
4374 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4375 *this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical");
4376 };
4377
4378 LexicalScope Scope(*this, S.getSourceRange());
4379 EmitStopPoint(&S);
4380 Builder.restoreIP(OMPBuilder.createCritical(
4381 Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
4382 HintInst));
4383
4384 return;
4385 }
4386
4387 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4388 Action.Enter(CGF);
4389 CGF.EmitStmt(S.getAssociatedStmt());
4390 };
4391 const Expr *Hint = nullptr;
4392 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4393 Hint = HintClause->getHint();
4394 LexicalScope Scope(*this, S.getSourceRange());
4395 EmitStopPoint(&S);
4396 CGM.getOpenMPRuntime().emitCriticalRegion(*this,
4397 S.getDirectiveName().getAsString(),
4398 CodeGen, S.getBeginLoc(), Hint);
4399}
4400
4401void CodeGenFunction::EmitOMPParallelForDirective(
4402 const OMPParallelForDirective &S) {
4403 // Emit directive as a combined directive that consists of two implicit
4404 // directives: 'parallel' with 'for' directive.
4405 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4406 Action.Enter(CGF);
4407 emitOMPCopyinClause(CGF, S);
4408 (void)emitWorksharingDirective(CGF, S, S.hasCancel());
4409 };
4410 {
4411 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4412 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4413 CGCapturedStmtInfo CGSI(CR_OpenMP);
4414 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4415 OMPLoopScope LoopScope(CGF, S);
4416 return CGF.EmitScalarExpr(S.getNumIterations());
4417 };
4418 bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4419 [](const OMPReductionClause *C) {
4420 return C->getModifier() == OMPC_REDUCTION_inscan;
4421 });
4422 if (IsInscan)
4423 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4424 auto LPCRegion =
4425 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4426 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
4427 emitEmptyBoundParameters);
4428 if (IsInscan)
4429 emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
4430 }
4431 // Check for outer lastprivate conditional update.
4432 checkForLastprivateConditionalUpdate(*this, S);
4433}
4434
4435void CodeGenFunction::EmitOMPParallelForSimdDirective(
4436 const OMPParallelForSimdDirective &S) {
4437 // Emit directive as a combined directive that consists of two implicit
4438 // directives: 'parallel' with 'for' directive.
4439 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4440 Action.Enter(CGF);
4441 emitOMPCopyinClause(CGF, S);
4442 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4443 };
4444 {
4445 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4446 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4447 CGCapturedStmtInfo CGSI(CR_OpenMP);
4448 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4449 OMPLoopScope LoopScope(CGF, S);
4450 return CGF.EmitScalarExpr(S.getNumIterations());
4451 };
4452 bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4453 [](const OMPReductionClause *C) {
4454 return C->getModifier() == OMPC_REDUCTION_inscan;
4455 });
4456 if (IsInscan)
4457 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4458 auto LPCRegion =
4459 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4460 emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
4461 emitEmptyBoundParameters);
4462 if (IsInscan)
4463 emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
4464 }
4465 // Check for outer lastprivate conditional update.
4466 checkForLastprivateConditionalUpdate(*this, S);
4467}
4468
4469void CodeGenFunction::EmitOMPParallelMasterDirective(
4470 const OMPParallelMasterDirective &S) {
4471 // Emit directive as a combined directive that consists of two implicit
4472 // directives: 'parallel' with 'master' directive.
4473 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4474 Action.Enter(CGF);
4475 OMPPrivateScope PrivateScope(CGF);
4476 emitOMPCopyinClause(CGF, S);
4477 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4478 CGF.EmitOMPPrivateClause(S, PrivateScope);
4479 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4480 (void)PrivateScope.Privatize();
4481 emitMaster(CGF, S);
4482 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4483 };
4484 {
4485 auto LPCRegion =
4486 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4487 emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
4488 emitEmptyBoundParameters);
4489 emitPostUpdateForReductionClause(*this, S,
4490 [](CodeGenFunction &) { return nullptr; });
4491 }
4492 // Check for outer lastprivate conditional update.
4493 checkForLastprivateConditionalUpdate(*this, S);
4494}
4495
4496void CodeGenFunction::EmitOMPParallelMaskedDirective(
4497 const OMPParallelMaskedDirective &S) {
4498 // Emit directive as a combined directive that consists of two implicit
4499 // directives: 'parallel' with 'masked' directive.
4500 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4501 Action.Enter(CGF);
4502 OMPPrivateScope PrivateScope(CGF);
4503 emitOMPCopyinClause(CGF, S);
4504 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4505 CGF.EmitOMPPrivateClause(S, PrivateScope);
4506 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4507 (void)PrivateScope.Privatize();
4508 emitMasked(CGF, S);
4509 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4510 };
4511 {
4512 auto LPCRegion =
4513 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4514 emitCommonOMPParallelDirective(*this, S, OMPD_masked, CodeGen,
4515 emitEmptyBoundParameters);
4516 emitPostUpdateForReductionClause(*this, S,
4517 [](CodeGenFunction &) { return nullptr; });
4518 }
4519 // Check for outer lastprivate conditional update.
4520 checkForLastprivateConditionalUpdate(*this, S);
4521}
4522
4523void CodeGenFunction::EmitOMPParallelSectionsDirective(
4524 const OMPParallelSectionsDirective &S) {
4525 // Emit directive as a combined directive that consists of two implicit
4526 // directives: 'parallel' with 'sections' directive.
4527 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4528 Action.Enter(CGF);
4529 emitOMPCopyinClause(CGF, S);
4530 CGF.EmitSections(S);
4531 };
4532 {
4533 auto LPCRegion =
4534 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4535 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
4536 emitEmptyBoundParameters);
4537 }
4538 // Check for outer lastprivate conditional update.
4539 checkForLastprivateConditionalUpdate(*this, S);
4540}
4541
4542namespace {
4543/// Get the list of variables declared in the context of the untied tasks.
4544class CheckVarsEscapingUntiedTaskDeclContext final
4545 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
4546 llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
4547
4548public:
4549 explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4550 virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
4551 void VisitDeclStmt(const DeclStmt *S) {
4552 if (!S)
4553 return;
4554 // Need to privatize only local vars, static locals can be processed as is.
4555 for (const Decl *D : S->decls()) {
4556 if (const auto *VD = dyn_cast_or_null<VarDecl>(D))
4557 if (VD->hasLocalStorage())
4558 PrivateDecls.push_back(VD);
4559 }
4560 }
4561 void VisitOMPExecutableDirective(const OMPExecutableDirective *) {}
4562 void VisitCapturedStmt(const CapturedStmt *) {}
4563 void VisitLambdaExpr(const LambdaExpr *) {}
4564 void VisitBlockExpr(const BlockExpr *) {}
4565 void VisitStmt(const Stmt *S) {
4566 if (!S)
4567 return;
4568 for (const Stmt *Child : S->children())
4569 if (Child)
4570 Visit(Child);
4571 }
4572
4573 /// Swaps list of vars with the provided one.
4574 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
4575};
4576} // anonymous namespace
4577
4578static void buildDependences(const OMPExecutableDirective &S,
4579 OMPTaskDataTy &Data) {
4580
4581 // First look for 'omp_all_memory' and add this first.
4582 bool OmpAllMemory = false;
4583 if (llvm::any_of(
4584 S.getClausesOfKind<OMPDependClause>(), [](const OMPDependClause *C) {
4585 return C->getDependencyKind() == OMPC_DEPEND_outallmemory ||
4586 C->getDependencyKind() == OMPC_DEPEND_inoutallmemory;
4587 })) {
4588 OmpAllMemory = true;
4589 // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are
4590 // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to
4591 // simplify.
4592 OMPTaskDataTy::DependData &DD =
4593 Data.Dependences.emplace_back(OMPC_DEPEND_outallmemory,
4594 /*IteratorExpr=*/nullptr);
4595 // Add a nullptr Expr to simplify the codegen in emitDependData.
4596 DD.DepExprs.push_back(nullptr);
4597 }
4598 // Add remaining dependences skipping any 'out' or 'inout' if they are
4599 // overridden by 'omp_all_memory'.
4600 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4601 OpenMPDependClauseKind Kind = C->getDependencyKind();
4602 if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory)
4603 continue;
4604 if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout))
4605 continue;
4606 OMPTaskDataTy::DependData &DD =
4607 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
4608 DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4609 }
4610}
4611
4612void CodeGenFunction::EmitOMPTaskBasedDirective(
4613 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
4614 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
4615 OMPTaskDataTy &Data) {
4616 // Emit outlined function for task construct.
4617 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
4618 auto I = CS->getCapturedDecl()->param_begin();
4619 auto PartId = std::next(I);
4620 auto TaskT = std::next(I, 4);
4621 // Check if the task is final
4622 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
4623 // If the condition constant folds and can be elided, try to avoid emitting
4624 // the condition and the dead arm of the if/else.
4625 const Expr *Cond = Clause->getCondition();
4626 bool CondConstant;
4627 if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
4628 Data.Final.setInt(CondConstant);
4629 else
4630 Data.Final.setPointer(EvaluateExprAsBool(Cond));
4631 } else {
4632 // By default the task is not final.
4633 Data.Final.setInt(/*IntVal=*/false);
4634 }
4635 // Check if the task has 'priority' clause.
4636 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
4637 const Expr *Prio = Clause->getPriority();
4638 Data.Priority.setInt(/*IntVal=*/true);
4639 Data.Priority.setPointer(EmitScalarConversion(
4640 EmitScalarExpr(Prio), Prio->getType(),
4641 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4642 Prio->getExprLoc()));
4643 }
4644 // The first function argument for tasks is a thread id, the second one is a
4645 // part id (0 for tied tasks, >=0 for untied task).
4646 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
4647 // Get list of private variables.
4648 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
4649 auto IRef = C->varlist_begin();
4650 for (const Expr *IInit : C->private_copies()) {
4651 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4652 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4653 Data.PrivateVars.push_back(*IRef);
4654 Data.PrivateCopies.push_back(IInit);
4655 }
4656 ++IRef;
4657 }
4658 }
4659 EmittedAsPrivate.clear();
4660 // Get list of firstprivate variables.
4661 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4662 auto IRef = C->varlist_begin();
4663 auto IElemInitRef = C->inits().begin();
4664 for (const Expr *IInit : C->private_copies()) {
4665 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4666 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4667 Data.FirstprivateVars.push_back(*IRef);
4668 Data.FirstprivateCopies.push_back(IInit);
4669 Data.FirstprivateInits.push_back(*IElemInitRef);
4670 }
4671 ++IRef;
4672 ++IElemInitRef;
4673 }
4674 }
4675 // Get list of lastprivate variables (for taskloops).
4676 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
4677 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
4678 auto IRef = C->varlist_begin();
4679 auto ID = C->destination_exprs().begin();
4680 for (const Expr *IInit : C->private_copies()) {
4681 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4682 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4683 Data.LastprivateVars.push_back(*IRef);
4684 Data.LastprivateCopies.push_back(IInit);
4685 }
4686 LastprivateDstsOrigs.insert(
4687 std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
4688 cast<DeclRefExpr>(*IRef)));
4689 ++IRef;
4690 ++ID;
4691 }
4692 }
4693 SmallVector<const Expr *, 4> LHSs;
4694 SmallVector<const Expr *, 4> RHSs;
4695 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4696 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4697 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4698 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
4699 Data.ReductionOps.append(C->reduction_ops().begin(),
4700 C->reduction_ops().end());
4701 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4702 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4703 }
4704 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
4705 *this, S.getBeginLoc(), LHSs, RHSs, Data);
4706 // Build list of dependences.
4707 buildDependences(S, Data);
4708 // Get list of local vars for untied tasks.
4709 if (!Data.Tied) {
4710 CheckVarsEscapingUntiedTaskDeclContext Checker;
4711 Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt());
4712 Data.PrivateLocals.append(Checker.getPrivateDecls().begin(),
4713 Checker.getPrivateDecls().end());
4714 }
4715 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
4716 CapturedRegion](CodeGenFunction &CGF,
4717 PrePostActionTy &Action) {
4718 llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
4719 std::pair<Address, Address>>
4720 UntiedLocalVars;
4721 // Set proper addresses for generated private copies.
4722 OMPPrivateScope Scope(CGF);
4723 // Generate debug info for variables present in shared clause.
4724 if (auto *DI = CGF.getDebugInfo()) {
4725 llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields =
4726 CGF.CapturedStmtInfo->getCaptureFields();
4727 llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue();
4728 if (CaptureFields.size() && ContextValue) {
4729 unsigned CharWidth = CGF.getContext().getCharWidth();
4730 // The shared variables are packed together as members of structure.
4731 // So the address of each shared variable can be computed by adding
4732 // offset of it (within record) to the base address of record. For each
4733 // shared variable, debug intrinsic llvm.dbg.declare is generated with
4734 // appropriate expressions (DIExpression).
4735 // Ex:
4736 // %12 = load %struct.anon*, %struct.anon** %__context.addr.i
4737 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
4738 // metadata !svar1,
4739 // metadata !DIExpression(DW_OP_deref))
4740 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
4741 // metadata !svar2,
4742 // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref))
4743 for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) {
4744 const VarDecl *SharedVar = It->first;
4745 RecordDecl *CaptureRecord = It->second->getParent();
4746 const ASTRecordLayout &Layout =
4747 CGF.getContext().getASTRecordLayout(CaptureRecord);
4748 unsigned Offset =
4749 Layout.getFieldOffset(It->second->getFieldIndex()) / CharWidth;
4750 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4751 (void)DI->EmitDeclareOfAutoVariable(SharedVar, ContextValue,
4752 CGF.Builder, false);
4753 llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back();
4754 // Get the call dbg.declare instruction we just created and update
4755 // its DIExpression to add offset to base address.
4756 if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(&Last)) {
4757 SmallVector<uint64_t, 8> Ops;
4758 // Add offset to the base address if non zero.
4759 if (Offset) {
4760 Ops.push_back(llvm::dwarf::DW_OP_plus_uconst);
4761 Ops.push_back(Offset);
4762 }
4763 Ops.push_back(llvm::dwarf::DW_OP_deref);
4764 auto &Ctx = DDI->getContext();
4765 llvm::DIExpression *DIExpr = llvm::DIExpression::get(Ctx, Ops);
4766 Last.setOperand(2, llvm::MetadataAsValue::get(Ctx, DIExpr));
4767 }
4768 }
4769 }
4770 }
4771 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
4772 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
4773 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
4774 enum { PrivatesParam = 2, CopyFnParam = 3 };
4775 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4776 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
4777 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
4778 CS->getCapturedDecl()->getParam(PrivatesParam)));
4779 // Map privates.
4780 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4781 llvm::SmallVector<llvm::Value *, 16> CallArgs;
4782 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
4783 CallArgs.push_back(PrivatesPtr);
4784 ParamTypes.push_back(PrivatesPtr->getType());
4785 for (const Expr *E : Data.PrivateVars) {
4786 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4787 Address PrivatePtr = CGF.CreateMemTemp(
4788 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
4789 PrivatePtrs.emplace_back(VD, PrivatePtr);
4790 CallArgs.push_back(PrivatePtr.getPointer());
4791 ParamTypes.push_back(PrivatePtr.getType());
4792 }
4793 for (const Expr *E : Data.FirstprivateVars) {
4794 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4795 Address PrivatePtr =
4796 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4797 ".firstpriv.ptr.addr");
4798 PrivatePtrs.emplace_back(VD, PrivatePtr);
4799 FirstprivatePtrs.emplace_back(VD, PrivatePtr);
4800 CallArgs.push_back(PrivatePtr.getPointer());
4801 ParamTypes.push_back(PrivatePtr.getType());
4802 }
4803 for (const Expr *E : Data.LastprivateVars) {
4804 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4805 Address PrivatePtr =
4806 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
4807 ".lastpriv.ptr.addr");
4808 PrivatePtrs.emplace_back(VD, PrivatePtr);
4809 CallArgs.push_back(PrivatePtr.getPointer());
4810 ParamTypes.push_back(PrivatePtr.getType());
4811 }
4812 for (const VarDecl *VD : Data.PrivateLocals) {
4813 QualType Ty = VD->getType().getNonReferenceType();
4814 if (VD->getType()->isLValueReferenceType())
4815 Ty = CGF.getContext().getPointerType(Ty);
4816 if (isAllocatableDecl(VD))
4817 Ty = CGF.getContext().getPointerType(Ty);
4818 Address PrivatePtr = CGF.CreateMemTemp(
4819 CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
4820 auto Result = UntiedLocalVars.insert(
4821 std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid())));
4822 // If key exists update in place.
4823 if (Result.second == false)
4824 *Result.first = std::make_pair(
4825 VD, std::make_pair(PrivatePtr, Address::invalid()));
4826 CallArgs.push_back(PrivatePtr.getPointer());
4827 ParamTypes.push_back(PrivatePtr.getType());
4828 }
4829 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4830 ParamTypes, /*isVarArg=*/false);
4831 CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4832 CopyFn, CopyFnTy->getPointerTo());
4833 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4834 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
4835 for (const auto &Pair : LastprivateDstsOrigs) {
4836 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
4837 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
4838 /*RefersToEnclosingVariableOrCapture=*/
4839 CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
4840 Pair.second->getType(), VK_LValue,
4841 Pair.second->getExprLoc());
4842 Scope.addPrivate(Pair.first, CGF.EmitLValue(&DRE).getAddress(CGF));
4843 }
4844 for (const auto &Pair : PrivatePtrs) {
4845 Address Replacement = Address(
4846 CGF.Builder.CreateLoad(Pair.second),
4847 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
4848 CGF.getContext().getDeclAlign(Pair.first));
4849 Scope.addPrivate(Pair.first, Replacement);
4850 if (auto *DI = CGF.getDebugInfo())
4851 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4852 (void)DI->EmitDeclareOfAutoVariable(
4853 Pair.first, Pair.second.getPointer(), CGF.Builder,
4854 /*UsePointerValue*/ true);
4855 }
4856 // Adjust mapping for internal locals by mapping actual memory instead of
4857 // a pointer to this memory.
4858 for (auto &Pair : UntiedLocalVars) {
4859 QualType VDType = Pair.first->getType().getNonReferenceType();
4860 if (Pair.first->getType()->isLValueReferenceType())
4861 VDType = CGF.getContext().getPointerType(VDType);
4862 if (isAllocatableDecl(Pair.first)) {
4863 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4864 Address Replacement(
4865 Ptr,
4866 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(VDType)),
4867 CGF.getPointerAlign());
4868 Pair.second.first = Replacement;
4869 Ptr = CGF.Builder.CreateLoad(Replacement);
4870 Replacement = Address(Ptr, CGF.ConvertTypeForMem(VDType),
4871 CGF.getContext().getDeclAlign(Pair.first));
4872 Pair.second.second = Replacement;
4873 } else {
4874 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4875 Address Replacement(Ptr, CGF.ConvertTypeForMem(VDType),
4876 CGF.getContext().getDeclAlign(Pair.first));
4877 Pair.second.first = Replacement;
4878 }
4879 }
4880 }
4881 if (Data.Reductions) {
4882 OMPPrivateScope FirstprivateScope(CGF);
4883 for (const auto &Pair : FirstprivatePtrs) {
4884 Address Replacement(
4885 CGF.Builder.CreateLoad(Pair.second),
4886 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
4887 CGF.getContext().getDeclAlign(Pair.first));
4888 FirstprivateScope.addPrivate(Pair.first, Replacement);
4889 }
4890 (void)FirstprivateScope.Privatize();
4891 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
4892 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
4893 Data.ReductionCopies, Data.ReductionOps);
4894 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
4895 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
4896 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
4897 RedCG.emitSharedOrigLValue(CGF, Cnt);
4898 RedCG.emitAggregateType(CGF, Cnt);
4899 // FIXME: This must removed once the runtime library is fixed.
4900 // Emit required threadprivate variables for
4901 // initializer/combiner/finalizer.
4902 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4903 RedCG, Cnt);
4904 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4905 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4906 Replacement =
4907 Address(CGF.EmitScalarConversion(
4908 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4909 CGF.getContext().getPointerType(
4910 Data.ReductionCopies[Cnt]->getType()),
4911 Data.ReductionCopies[Cnt]->getExprLoc()),
4912 CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()),
4913 Replacement.getAlignment());
4914 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4915 Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
4916 }
4917 }
4918 // Privatize all private variables except for in_reduction items.
4919 (void)Scope.Privatize();
4920 SmallVector<const Expr *, 4> InRedVars;
4921 SmallVector<const Expr *, 4> InRedPrivs;
4922 SmallVector<const Expr *, 4> InRedOps;
4923 SmallVector<const Expr *, 4> TaskgroupDescriptors;
4924 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
4925 auto IPriv = C->privates().begin();
4926 auto IRed = C->reduction_ops().begin();
4927 auto ITD = C->taskgroup_descriptors().begin();
4928 for (const Expr *Ref : C->varlists()) {
4929 InRedVars.emplace_back(Ref);
4930 InRedPrivs.emplace_back(*IPriv);
4931 InRedOps.emplace_back(*IRed);
4932 TaskgroupDescriptors.emplace_back(*ITD);
4933 std::advance(IPriv, 1);
4934 std::advance(IRed, 1);
4935 std::advance(ITD, 1);
4936 }
4937 }
4938 // Privatize in_reduction items here, because taskgroup descriptors must be
4939 // privatized earlier.
4940 OMPPrivateScope InRedScope(CGF);
4941 if (!InRedVars.empty()) {
4942 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
4943 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
4944 RedCG.emitSharedOrigLValue(CGF, Cnt);
4945 RedCG.emitAggregateType(CGF, Cnt);
4946 // The taskgroup descriptor variable is always implicit firstprivate and
4947 // privatized already during processing of the firstprivates.
4948 // FIXME: This must removed once the runtime library is fixed.
4949 // Emit required threadprivate variables for
4950 // initializer/combiner/finalizer.
4951 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
4952 RedCG, Cnt);
4953 llvm::Value *ReductionsPtr;
4954 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
4955 ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr),
4956 TRExpr->getExprLoc());
4957 } else {
4958 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4959 }
4960 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4961 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
4962 Replacement = Address(
4963 CGF.EmitScalarConversion(
4964 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
4965 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
4966 InRedPrivs[Cnt]->getExprLoc()),
4967 CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()),
4968 Replacement.getAlignment());
4969 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
4970 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
4971 }
4972 }
4973 (void)InRedScope.Privatize();
4974
4975 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
4976 UntiedLocalVars);
4977 Action.Enter(CGF);
4978 BodyGen(CGF);
4979 };
4980 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
4981 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
4982 Data.NumberOfParts);
4983 OMPLexicalScope Scope(*this, S, std::nullopt,
4984 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
4985 !isOpenMPSimdDirective(S.getDirectiveKind()));
4986 TaskGen(*this, OutlinedFn, Data);
4987}
4988
4989static ImplicitParamDecl *
4990createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
4991 QualType Ty, CapturedDecl *CD,
4992 SourceLocation Loc) {
4993 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4994 ImplicitParamDecl::Other);
4995 auto *OrigRef = DeclRefExpr::Create(
4996 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
4997 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
4998 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
4999 ImplicitParamDecl::Other);
5000 auto *PrivateRef = DeclRefExpr::Create(
5001 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
5002 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
5003 QualType ElemType = C.getBaseElementType(Ty);
5004 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
5005 ImplicitParamDecl::Other);
5006 auto *InitRef = DeclRefExpr::Create(
5007 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
5008 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
5009 PrivateVD->setInitStyle(VarDecl::CInit);
5010 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
5011 InitRef, /*BasePath=*/nullptr,
5012 VK_PRValue, FPOptionsOverride()));
5013 Data.FirstprivateVars.emplace_back(OrigRef);
5014 Data.FirstprivateCopies.emplace_back(PrivateRef);
5015 Data.FirstprivateInits.emplace_back(InitRef);
5016 return OrigVD;
5017}
5018
5019void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
5020 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
5021 OMPTargetDataInfo &InputInfo) {
5022 // Emit outlined function for task construct.
5023 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
5024 Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
5025 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
5026 auto I = CS->getCapturedDecl()->param_begin();
5027 auto PartId = std::next(I);
5028 auto TaskT = std::next(I, 4);
5029 OMPTaskDataTy Data;
5030 // The task is not final.
5031 Data.Final.setInt(/*IntVal=*/false);
5032 // Get list of firstprivate variables.
5033 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
5034 auto IRef = C->varlist_begin();
5035 auto IElemInitRef = C->inits().begin();
5036 for (auto *IInit : C->private_copies()) {
5037 Data.FirstprivateVars.push_back(*IRef);
5038 Data.FirstprivateCopies.push_back(IInit);
5039 Data.FirstprivateInits.push_back(*IElemInitRef);
5040 ++IRef;
5041 ++IElemInitRef;
5042 }
5043 }
5044 SmallVector<const Expr *, 4> LHSs;
5045 SmallVector<const Expr *, 4> RHSs;
5046 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5047 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
5048 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
5049 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
5050 Data.ReductionOps.append(C->reduction_ops().begin(),
5051 C->reduction_ops().end());
5052 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5053 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5054 }
5055 OMPPrivateScope TargetScope(*this);
5056 VarDecl *BPVD = nullptr;
5057 VarDecl *PVD = nullptr;
5058 VarDecl *SVD = nullptr;
5059 VarDecl *MVD = nullptr;
5060 if (InputInfo.NumberOfTargetItems > 0) {
5061 auto *CD = CapturedDecl::Create(
5062 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
5063 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
5064 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
5065 getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
5066 /*IndexTypeQuals=*/0);
5067 BPVD = createImplicitFirstprivateForType(
5068 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5069 PVD = createImplicitFirstprivateForType(
5070 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5071 QualType SizesType = getContext().getConstantArrayType(
5072 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
5073 ArrSize, nullptr, ArrayType::Normal,
5074 /*IndexTypeQuals=*/0);
5075 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
5076 S.getBeginLoc());
5077 TargetScope.addPrivate(BPVD, InputInfo.BasePointersArray);
5078 TargetScope.addPrivate(PVD, InputInfo.PointersArray);
5079 TargetScope.addPrivate(SVD, InputInfo.SizesArray);
5080 // If there is no user-defined mapper, the mapper array will be nullptr. In
5081 // this case, we don't need to privatize it.
5082 if (!isa_and_nonnull<llvm::ConstantPointerNull>(
5083 InputInfo.MappersArray.getPointer())) {
5084 MVD = createImplicitFirstprivateForType(
5085 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5086 TargetScope.addPrivate(MVD, InputInfo.MappersArray);
5087 }
5088 }
5089 (void)TargetScope.Privatize();
5090 buildDependences(S, Data);
5091 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
5092 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
5093 // Set proper addresses for generated private copies.
5094 OMPPrivateScope Scope(CGF);
5095 if (!Data.FirstprivateVars.empty()) {
5096 enum { PrivatesParam = 2, CopyFnParam = 3 };
5097 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
5098 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
5099 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
5100 CS->getCapturedDecl()->getParam(PrivatesParam)));
5101 // Map privates.
5102 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
5103 llvm::SmallVector<llvm::Value *, 16> CallArgs;
5104 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
5105 CallArgs.push_back(PrivatesPtr);
5106 ParamTypes.push_back(PrivatesPtr->getType());
5107 for (const Expr *E : Data.FirstprivateVars) {
5108 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5109 Address PrivatePtr =
5110 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
5111 ".firstpriv.ptr.addr");
5112 PrivatePtrs.emplace_back(VD, PrivatePtr);
5113 CallArgs.push_back(PrivatePtr.getPointer());
5114 ParamTypes.push_back(PrivatePtr.getType());
5115 }
5116 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
5117 ParamTypes, /*isVarArg=*/false);
5118 CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5119 CopyFn, CopyFnTy->getPointerTo());
5120 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
5121 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
5122 for (const auto &Pair : PrivatePtrs) {
5123 Address Replacement(
5124 CGF.Builder.CreateLoad(Pair.second),
5125 CGF.ConvertTypeForMem(Pair.first->getType().getNonReferenceType()),
5126 CGF.getContext().getDeclAlign(Pair.first));
5127 Scope.addPrivate(Pair.first, Replacement);
5128 }
5129 }
5130 CGF.processInReduction(S, Data, CGF, CS, Scope);
5131 if (InputInfo.NumberOfTargetItems > 0) {
5132 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
5133 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
5134 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
5135 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
5136 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
5137 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
5138 // If MVD is nullptr, the mapper array is not privatized
5139 if (MVD)
5140 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
5141 CGF.GetAddrOfLocalVar(MVD), /*Index=*/0);
5142 }
5143
5144 Action.Enter(CGF);
5145 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
5146 BodyGen(CGF);
5147 };
5148 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5149 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
5150 Data.NumberOfParts);
5151 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
5152 IntegerLiteral IfCond(getContext(), TrueOrFalse,
5153 getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
5154 SourceLocation());
5155 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
5156 SharedsTy, CapturedStruct, &IfCond, Data);
5157}
5158
5159void CodeGenFunction::processInReduction(const OMPExecutableDirective &S,
5160 OMPTaskDataTy &Data,
5161 CodeGenFunction &CGF,
5162 const CapturedStmt *CS,
5163 OMPPrivateScope &Scope) {
5164 if (Data.Reductions) {
5165 OpenMPDirectiveKind CapturedRegion = S.getDirectiveKind();
5166 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
5167 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
5168 Data.ReductionCopies, Data.ReductionOps);
5169 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
5170 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(4)));
5171 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
5172 RedCG.emitSharedOrigLValue(CGF, Cnt);
5173 RedCG.emitAggregateType(CGF, Cnt);
5174 // FIXME: This must removed once the runtime library is fixed.
5175 // Emit required threadprivate variables for
5176 // initializer/combiner/finalizer.
5177 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
5178 RedCG, Cnt);
5179 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5180 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
5181 Replacement =
5182 Address(CGF.EmitScalarConversion(
5183 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
5184 CGF.getContext().getPointerType(
5185 Data.ReductionCopies[Cnt]->getType()),
5186 Data.ReductionCopies[Cnt]->getExprLoc()),
5187 CGF.ConvertTypeForMem(Data.ReductionCopies[Cnt]->getType()),
5188 Replacement.getAlignment());
5189 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
5190 Scope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
5191 }
5192 }
5193 (void)Scope.Privatize();
5194 SmallVector<const Expr *, 4> InRedVars;
5195 SmallVector<const Expr *, 4> InRedPrivs;
5196 SmallVector<const Expr *, 4> InRedOps;
5197 SmallVector<const Expr *, 4> TaskgroupDescriptors;
5198 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5199 auto IPriv = C->privates().begin();
5200 auto IRed = C->reduction_ops().begin();
5201 auto ITD = C->taskgroup_descriptors().begin();
5202 for (const Expr *Ref : C->varlists()) {
5203 InRedVars.emplace_back(Ref);
5204 InRedPrivs.emplace_back(*IPriv);
5205 InRedOps.emplace_back(*IRed);
5206 TaskgroupDescriptors.emplace_back(*ITD);
5207 std::advance(IPriv, 1);
5208 std::advance(IRed, 1);
5209 std::advance(ITD, 1);
5210 }
5211 }
5212 OMPPrivateScope InRedScope(CGF);
5213 if (!InRedVars.empty()) {
5214 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
5215 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
5216 RedCG.emitSharedOrigLValue(CGF, Cnt);
5217 RedCG.emitAggregateType(CGF, Cnt);
5218 // FIXME: This must removed once the runtime library is fixed.
5219 // Emit required threadprivate variables for
5220 // initializer/combiner/finalizer.
5221 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
5222 RedCG, Cnt);
5223 llvm::Value *ReductionsPtr;
5224 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5225 ReductionsPtr =
5226 CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), TRExpr->getExprLoc());
5227 } else {
5228 ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5229 }
5230 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5231 CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
5232 Replacement = Address(
5233 CGF.EmitScalarConversion(
5234 Replacement.getPointer(), CGF.getContext().VoidPtrTy,
5235 CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
5236 InRedPrivs[Cnt]->getExprLoc()),
5237 CGF.ConvertTypeForMem(InRedPrivs[Cnt]->getType()),
5238 Replacement.getAlignment());
5239 Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
5240 InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), Replacement);
5241 }
5242 }
5243 (void)InRedScope.Privatize();
5244}
5245
5246void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
5247 // Emit outlined function for task construct.
5248 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
5249 Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
5250 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
5251 const Expr *IfCond = nullptr;
5252 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5253 if (C->getNameModifier() == OMPD_unknown ||
5254 C->getNameModifier() == OMPD_task) {
5255 IfCond = C->getCondition();
5256 break;
5257 }
5258 }
5259
5260 OMPTaskDataTy Data;
5261 // Check if we should emit tied or untied task.
5262 Data.Tied = !S.getSingleClause<OMPUntiedClause>();
5263 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
5264 CGF.EmitStmt(CS->getCapturedStmt());
5265 };
5266 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
5267 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
5268 const OMPTaskDataTy &Data) {
5269 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
5270 SharedsTy, CapturedStruct, IfCond,
5271 Data);
5272 };
5273 auto LPCRegion =
5274 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
5275 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
5276}
5277
5278void CodeGenFunction::EmitOMPTaskyieldDirective(
5279 const OMPTaskyieldDirective &S) {
5280 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
5281}
5282
5283void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) {
5284 const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>();
5285 Expr *ME = MC ? MC->getMessageString() : nullptr;
5286 const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>();
5287 bool IsFatal = false;
5288 if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal)
5289 IsFatal = true;
5290 CGM.getOpenMPRuntime().emitErrorCall(*this, S.getBeginLoc(), ME, IsFatal);
5291}
5292
5293void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
5294 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
5295}
5296
5297void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
5298 OMPTaskDataTy Data;
5299 // Build list of dependences
5300 buildDependences(S, Data);
5301 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
5302 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data);
5303}
5304
5305bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) {
5306 return T.clauses().empty();
5307}
5308
5309void CodeGenFunction::EmitOMPTaskgroupDirective(
5310 const OMPTaskgroupDirective &S) {
5311 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5312 if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S)) {
5313 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5314 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5315 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5316 AllocaInsertPt->getIterator());
5317
5318 auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
5319 InsertPointTy CodeGenIP) {
5320 Builder.restoreIP(CodeGenIP);
5321 EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
5322 };
5323 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5324 if (!CapturedStmtInfo)
5325 CapturedStmtInfo = &CapStmtInfo;
5326 Builder.restoreIP(OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB));
5327 return;
5328 }
5329 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5330 Action.Enter(CGF);
5331 if (const Expr *E = S.getReductionRef()) {
5332 SmallVector<const Expr *, 4> LHSs;
5333 SmallVector<const Expr *, 4> RHSs;
5334 OMPTaskDataTy Data;
5335 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
5336 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
5337 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
5338 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
5339 Data.ReductionOps.append(C->reduction_ops().begin(),
5340 C->reduction_ops().end());
5341 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5342 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5343 }
5344 llvm::Value *ReductionDesc =
5345 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
5346 LHSs, RHSs, Data);
5347 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5348 CGF.EmitVarDecl(*VD);
5349 CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
5350 /*Volatile=*/false, E->getType());
5351 }
5352 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
5353 };
5354 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
5355}
5356
5357void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
5358 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
5359 ? llvm::AtomicOrdering::NotAtomic
5360 : llvm::AtomicOrdering::AcquireRelease;
5361 CGM.getOpenMPRuntime().emitFlush(
5362 *this,
5363 [&S]() -> ArrayRef<const Expr *> {
5364 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
5365 return llvm::ArrayRef(FlushClause->varlist_begin(),
5366 FlushClause->varlist_end());
5367 return std::nullopt;
5368 }(),
5369 S.getBeginLoc(), AO);
5370}
5371
5372void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
5373 const auto *DO = S.getSingleClause<OMPDepobjClause>();
5374 LValue DOLVal = EmitLValue(DO->getDepobj());
5375 if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
5376 OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
5377 DC->getModifier());
5378 Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
5379 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
5380 *this, Dependencies, DC->getBeginLoc());
5381 EmitStoreOfScalar(DepAddr.getPointer(), DOLVal);
5382 return;
5383 }
5384 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
5385 CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc());
5386 return;
5387 }
5388 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
5389 CGM.getOpenMPRuntime().emitUpdateClause(
5390 *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc());
5391 return;
5392 }
5393}
5394
5395void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
5396 if (!OMPParentLoopDirectiveForScan)
5397 return;
5398 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
5399 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
5400 SmallVector<const Expr *, 4> Shareds;
5401 SmallVector<const Expr *, 4> Privates;
5402 SmallVector<const Expr *, 4> LHSs;
5403 SmallVector<const Expr *, 4> RHSs;
5404 SmallVector<const Expr *, 4> ReductionOps;
5405 SmallVector<const Expr *, 4> CopyOps;
5406 SmallVector<const Expr *, 4> CopyArrayTemps;
5407 SmallVector<const Expr *, 4> CopyArrayElems;
5408 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
5409 if (C->getModifier() != OMPC_REDUCTION_inscan)
5410 continue;
5411 Shareds.append(C->varlist_begin(), C->varlist_end());
5412 Privates.append(C->privates().begin(), C->privates().end());
5413 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5414 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5415 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
5416 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
5417 CopyArrayTemps.append(C->copy_array_temps().begin(),
5418 C->copy_array_temps().end());
5419 CopyArrayElems.append(C->copy_array_elems().begin(),
5420 C->copy_array_elems().end());
5421 }
5422 if (ParentDir.getDirectiveKind() == OMPD_simd ||
5423 (getLangOpts().OpenMPSimd &&
5424 isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
5425 // For simd directive and simd-based directives in simd only mode, use the
5426 // following codegen:
5427 // int x = 0;
5428 // #pragma omp simd reduction(inscan, +: x)
5429 // for (..) {
5430 // <first part>
5431 // #pragma omp scan inclusive(x)
5432 // <second part>
5433 // }
5434 // is transformed to:
5435 // int x = 0;
5436 // for (..) {
5437 // int x_priv = 0;
5438 // <first part>
5439 // x = x_priv + x;
5440 // x_priv = x;
5441 // <second part>
5442 // }
5443 // and
5444 // int x = 0;
5445 // #pragma omp simd reduction(inscan, +: x)
5446 // for (..) {
5447 // <first part>
5448 // #pragma omp scan exclusive(x)
5449 // <second part>
5450 // }
5451 // to
5452 // int x = 0;
5453 // for (..) {
5454 // int x_priv = 0;
5455 // <second part>
5456 // int temp = x;
5457 // x = x_priv + x;
5458 // x_priv = temp;
5459 // <first part>
5460 // }
5461 llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
5462 EmitBranch(IsInclusive
5463 ? OMPScanReduce
5464 : BreakContinueStack.back().ContinueBlock.getBlock());
5465 EmitBlock(OMPScanDispatch);
5466 {
5467 // New scope for correct construction/destruction of temp variables for
5468 // exclusive scan.
5469 LexicalScope Scope(*this, S.getSourceRange());
5470 EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
5471 EmitBlock(OMPScanReduce);
5472 if (!IsInclusive) {
5473 // Create temp var and copy LHS value to this temp value.
5474 // TMP = LHS;
5475 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5476 const Expr *PrivateExpr = Privates[I];
5477 const Expr *TempExpr = CopyArrayTemps[I];
5478 EmitAutoVarDecl(
5479 *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
5480 LValue DestLVal = EmitLValue(TempExpr);
5481 LValue SrcLVal = EmitLValue(LHSs[I]);
5482 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5483 SrcLVal.getAddress(*this),
5484 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5485 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5486 CopyOps[I]);
5487 }
5488 }
5489 CGM.getOpenMPRuntime().emitReduction(
5490 *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
5491 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
5492 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5493 const Expr *PrivateExpr = Privates[I];
5494 LValue DestLVal;
5495 LValue SrcLVal;
5496 if (IsInclusive) {
5497 DestLVal = EmitLValue(RHSs[I]);
5498 SrcLVal = EmitLValue(LHSs[I]);
5499 } else {
5500 const Expr *TempExpr = CopyArrayTemps[I];
5501 DestLVal = EmitLValue(RHSs[I]);
5502 SrcLVal = EmitLValue(TempExpr);
5503 }
5504 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5505 SrcLVal.getAddress(*this),
5506 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5507 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5508 CopyOps[I]);
5509 }
5510 }
5511 EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
5512 OMPScanExitBlock = IsInclusive
5513 ? BreakContinueStack.back().ContinueBlock.getBlock()
5514 : OMPScanReduce;
5515 EmitBlock(OMPAfterScanBlock);
5516 return;
5517 }
5518 if (!IsInclusive) {
5519 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5520 EmitBlock(OMPScanExitBlock);
5521 }
5522 if (OMPFirstScanLoop) {
5523 // Emit buffer[i] = red; at the end of the input phase.
5524 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
5525 .getIterationVariable()
5526 ->IgnoreParenImpCasts();
5527 LValue IdxLVal = EmitLValue(IVExpr);
5528 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
5529 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
5530 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5531 const Expr *PrivateExpr = Privates[I];
5532 const Expr *OrigExpr = Shareds[I];
5533 const Expr *CopyArrayElem = CopyArrayElems[I];
5534 OpaqueValueMapping IdxMapping(
5535 *this,
5536 cast<OpaqueValueExpr>(
5537 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
5538 RValue::get(IdxVal));
5539 LValue DestLVal = EmitLValue(CopyArrayElem);
5540 LValue SrcLVal = EmitLValue(OrigExpr);
5541 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5542 SrcLVal.getAddress(*this),
5543 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5544 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5545 CopyOps[I]);
5546 }
5547 }
5548 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5549 if (IsInclusive) {
5550 EmitBlock(OMPScanExitBlock);
5551 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
5552 }
5553 EmitBlock(OMPScanDispatch);
5554 if (!OMPFirstScanLoop) {
5555 // Emit red = buffer[i]; at the entrance to the scan phase.
5556 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
5557 .getIterationVariable()
5558 ->IgnoreParenImpCasts();
5559 LValue IdxLVal = EmitLValue(IVExpr);
5560 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
5561 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
5562 llvm::BasicBlock *ExclusiveExitBB = nullptr;
5563 if (!IsInclusive) {
5564 llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec");
5565 ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit");
5566 llvm::Value *Cmp = Builder.CreateIsNull(IdxVal);
5567 Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB);
5568 EmitBlock(ContBB);
5569 // Use idx - 1 iteration for exclusive scan.
5570 IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1));
5571 }
5572 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5573 const Expr *PrivateExpr = Privates[I];
5574 const Expr *OrigExpr = Shareds[I];
5575 const Expr *CopyArrayElem = CopyArrayElems[I];
5576 OpaqueValueMapping IdxMapping(
5577 *this,
5578 cast<OpaqueValueExpr>(
5579 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
5580 RValue::get(IdxVal));
5581 LValue SrcLVal = EmitLValue(CopyArrayElem);
5582 LValue DestLVal = EmitLValue(OrigExpr);
5583 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5584 SrcLVal.getAddress(*this),
5585 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5586 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5587 CopyOps[I]);
5588 }
5589 if (!IsInclusive) {
5590 EmitBlock(ExclusiveExitBB);
5591 }
5592 }
5593 EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
5594 : OMPAfterScanBlock);
5595 EmitBlock(OMPAfterScanBlock);
5596}
5597
5598void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
5599 const CodeGenLoopTy &CodeGenLoop,
5600 Expr *IncExpr) {
5601 // Emit the loop iteration variable.
5602 const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
5603 const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
5604 EmitVarDecl(*IVDecl);
5605
5606 // Emit the iterations count variable.
5607 // If it is not a variable, Sema decided to calculate iterations count on each
5608 // iteration (e.g., it is foldable into a constant).
5609 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
5610 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
5611 // Emit calculation of the iterations count.
5612 EmitIgnoredExpr(S.getCalcLastIteration());
5613 }
5614
5615 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
5616
5617 bool HasLastprivateClause = false;
5618 // Check pre-condition.
5619 {
5620 OMPLoopScope PreInitScope(*this, S);
5621 // Skip the entire loop if we don't meet the precondition.
5622 // If the condition constant folds and can be elided, avoid emitting the
5623 // whole loop.
5624 bool CondConstant;
5625 llvm::BasicBlock *ContBlock = nullptr;
5626 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
5627 if (!CondConstant)
5628 return;
5629 } else {
5630 llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
5631 ContBlock = createBasicBlock("omp.precond.end");
5632 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
5633 getProfileCount(&S));
5634 EmitBlock(ThenBlock);
5635 incrementProfileCounter(&S);
5636 }
5637
5638 emitAlignedClause(*this, S);
5639 // Emit 'then' code.
5640 {
5641 // Emit helper vars inits.
5642
5643 LValue LB = EmitOMPHelperVar(
5644 *this, cast<DeclRefExpr>(
5645 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5646 ? S.getCombinedLowerBoundVariable()
5647 : S.getLowerBoundVariable())));
5648 LValue UB = EmitOMPHelperVar(
5649 *this, cast<DeclRefExpr>(
5650 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5651 ? S.getCombinedUpperBoundVariable()
5652 : S.getUpperBoundVariable())));
5653 LValue ST =
5654 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
5655 LValue IL =
5656 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
5657
5658 OMPPrivateScope LoopScope(*this);
5659 if (EmitOMPFirstprivateClause(S, LoopScope)) {
5660 // Emit implicit barrier to synchronize threads and avoid data races
5661 // on initialization of firstprivate variables and post-update of
5662 // lastprivate variables.
5663 CGM.getOpenMPRuntime().emitBarrierCall(
5664 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
5665 /*ForceSimpleCall=*/true);
5666 }
5667 EmitOMPPrivateClause(S, LoopScope);
5668 if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5669 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5670 !isOpenMPTeamsDirective(S.getDirectiveKind()))
5671 EmitOMPReductionClauseInit(S, LoopScope);
5672 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
5673 EmitOMPPrivateLoopCounters(S, LoopScope);
5674 (void)LoopScope.Privatize();
5675 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
5676 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
5677
5678 // Detect the distribute schedule kind and chunk.
5679 llvm::Value *Chunk = nullptr;
5680 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
5681 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
5682 ScheduleKind = C->getDistScheduleKind();
5683 if (const Expr *Ch = C->getChunkSize()) {
5684 Chunk = EmitScalarExpr(Ch);
5685 Chunk = EmitScalarConversion(Chunk, Ch->getType(),
5686 S.getIterationVariable()->getType(),
5687 S.getBeginLoc());
5688 }
5689 } else {
5690 // Default behaviour for dist_schedule clause.
5691 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
5692 *this, S, ScheduleKind, Chunk);
5693 }
5694 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
5695 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
5696
5697 // OpenMP [2.10.8, distribute Construct, Description]
5698 // If dist_schedule is specified, kind must be static. If specified,
5699 // iterations are divided into chunks of size chunk_size, chunks are
5700 // assigned to the teams of the league in a round-robin fashion in the
5701 // order of the team number. When no chunk_size is specified, the
5702 // iteration space is divided into chunks that are approximately equal
5703 // in size, and at most one chunk is distributed to each team of the
5704 // league. The size of the chunks is unspecified in this case.
5705 bool StaticChunked =
5706 RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
5707 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
5708 if (RT.isStaticNonchunked(ScheduleKind,
5709 /* Chunked */ Chunk != nullptr) ||
5710 StaticChunked) {
5711 CGOpenMPRuntime::StaticRTInput StaticInit(
5712 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(*this),
5713 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
5714 StaticChunked ? Chunk : nullptr);
5715 RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
5716 StaticInit);
5717 JumpDest LoopExit =
5718 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
5719 // UB = min(UB, GlobalUB);
5720 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5721 ? S.getCombinedEnsureUpperBound()
5722 : S.getEnsureUpperBound());
5723 // IV = LB;
5724 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5725 ? S.getCombinedInit()
5726 : S.getInit());
5727
5728 const Expr *Cond =
5729 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5730 ? S.getCombinedCond()
5731 : S.getCond();
5732
5733 if (StaticChunked)
5734 Cond = S.getCombinedDistCond();
5735
5736 // For static unchunked schedules generate:
5737 //
5738 // 1. For distribute alone, codegen
5739 // while (idx <= UB) {
5740 // BODY;
5741 // ++idx;
5742 // }
5743 //
5744 // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
5745 // while (idx <= UB) {
5746 // <CodeGen rest of pragma>(LB, UB);
5747 // idx += ST;
5748 // }
5749 //
5750 // For static chunk one schedule generate:
5751 //
5752 // while (IV <= GlobalUB) {
5753 // <CodeGen rest of pragma>(LB, UB);
5754 // LB += ST;
5755 // UB += ST;
5756 // UB = min(UB, GlobalUB);
5757 // IV = LB;
5758 // }
5759 //
5760 emitCommonSimdLoop(
5761 *this, S,
5762 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5763 if (isOpenMPSimdDirective(S.getDirectiveKind()))
5764 CGF.EmitOMPSimdInit(S);
5765 },
5766 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
5767 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
5768 CGF.EmitOMPInnerLoop(
5769 S, LoopScope.requiresCleanups(), Cond, IncExpr,
5770 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
5771 CodeGenLoop(CGF, S, LoopExit);
5772 },
5773 [&S, StaticChunked](CodeGenFunction &CGF) {
5774 if (StaticChunked) {
5775 CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
5776 CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
5777 CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
5778 CGF.EmitIgnoredExpr(S.getCombinedInit());
5779 }
5780 });
5781 });
5782 EmitBlock(LoopExit.getBlock());
5783 // Tell the runtime we are done.
5784 RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind());
5785 } else {
5786 // Emit the outer loop, which requests its work chunk [LB..UB] from
5787 // runtime and runs the inner loop to process it.
5788 const OMPLoopArguments LoopArguments = {
5789 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
5790 IL.getAddress(*this), Chunk};
5791 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
5792 CodeGenLoop);
5793 }
5794 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
5795 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
5796 return CGF.Builder.CreateIsNotNull(
5797 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5798 });
5799 }
5800 if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5801 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5802 !isOpenMPTeamsDirective(S.getDirectiveKind())) {
5803 EmitOMPReductionClauseFinal(S, OMPD_simd);
5804 // Emit post-update of the reduction variables if IsLastIter != 0.
5805 emitPostUpdateForReductionClause(
5806 *this, S, [IL, &S](CodeGenFunction &CGF) {
5807 return CGF.Builder.CreateIsNotNull(
5808 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5809 });
5810 }
5811 // Emit final copy of the lastprivate variables if IsLastIter != 0.
5812 if (HasLastprivateClause) {
5813 EmitOMPLastprivateClauseFinal(
5814 S, /*NoFinals=*/false,
5815 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
5816 }
5817 }
5818
5819 // We're now done with the loop, so jump to the continuation block.
5820 if (ContBlock) {
5821 EmitBranch(ContBlock);
5822 EmitBlock(ContBlock, true);
5823 }
5824 }
5825}
5826
5827void CodeGenFunction::EmitOMPDistributeDirective(
5828 const OMPDistributeDirective &S) {
5829 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5830 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
5831 };
5832 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5833 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
5834}
5835
5836static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
5837 const CapturedStmt *S,
5838 SourceLocation Loc) {
5839 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
5840 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5841 CGF.CapturedStmtInfo = &CapStmtInfo;
5842 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);
5843 Fn->setDoesNotRecurse();
5844 return Fn;
5845}
5846
5847template <typename T>
5848static void emitRestoreIP(CodeGenFunction &CGF, const T *C,
5849 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
5850 llvm::OpenMPIRBuilder &OMPBuilder) {
5851
5852 unsigned NumLoops = C->getNumLoops();
5853 QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth(
5854 /*DestWidth=*/64, /*Signed=*/1);
5855 llvm::SmallVector<llvm::Value *> StoreValues;
5856 for (unsigned I = 0; I < NumLoops; I++) {
5857 const Expr *CounterVal = C->getLoopData(I);
5858 assert(CounterVal);
5859 llvm::Value *StoreValue = CGF.EmitScalarConversion(
5860 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
5861 CounterVal->getExprLoc());
5862 StoreValues.emplace_back(StoreValue);
5863 }
5864 OMPDoacrossKind<T> ODK;
5865 bool IsDependSource = ODK.isSource(C);
5866 CGF.Builder.restoreIP(
5867 OMPBuilder.createOrderedDepend(CGF.Builder, AllocaIP, NumLoops,
5868 StoreValues, ".cnt.addr", IsDependSource));
5869}
5870
5871void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
5872 if (CGM.getLangOpts().OpenMPIRBuilder) {
5873 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5874 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5875
5876 if (S.hasClausesOfKind<OMPDependClause>() ||
5877 S.hasClausesOfKind<OMPDoacrossClause>()) {
5878 // The ordered directive with depend clause.
5879 assert(!S.hasAssociatedStmt() && "No associated statement must be in "
5880 "ordered depend|doacross construct.");
5881 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5882 AllocaInsertPt->getIterator());
5883 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
5884 emitRestoreIP(*this, DC, AllocaIP, OMPBuilder);
5885 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
5886 emitRestoreIP(*this, DC, AllocaIP, OMPBuilder);
5887 } else {
5888 // The ordered directive with threads or simd clause, or without clause.
5889 // Without clause, it behaves as if the threads clause is specified.
5890 const auto *C = S.getSingleClause<OMPSIMDClause>();
5891
5892 auto FiniCB = [this](InsertPointTy IP) {
5893 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
5894 };
5895
5896 auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP,
5897 InsertPointTy CodeGenIP) {
5898 Builder.restoreIP(CodeGenIP);
5899
5900 const CapturedStmt *CS = S.getInnermostCapturedStmt();
5901 if (C) {
5902 llvm::BasicBlock *FiniBB = splitBBWithSuffix(
5903 Builder, /*CreateBranch=*/false, ".ordered.after");
5904 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
5905 GenerateOpenMPCapturedVars(*CS, CapturedVars);
5906 llvm::Function *OutlinedFn =
5907 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
5908 assert(S.getBeginLoc().isValid() &&
5909 "Outlined function call location must be valid.");
5910 ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc());
5911 OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, *FiniBB,
5912 OutlinedFn, CapturedVars);
5913 } else {
5914 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
5915 *this, CS->getCapturedStmt(), AllocaIP, CodeGenIP, "ordered");
5916 }
5917 };
5918
5919 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5920 Builder.restoreIP(
5921 OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C));
5922 }
5923 return;
5924 }
5925
5926 if (S.hasClausesOfKind<OMPDependClause>()) {
5927 assert(!S.hasAssociatedStmt() &&
5928 "No associated statement must be in ordered depend construct.");
5929 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
5930 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
5931 return;
5932 }
5933 if (S.hasClausesOfKind<OMPDoacrossClause>()) {
5934 assert(!S.hasAssociatedStmt() &&
5935 "No associated statement must be in ordered doacross construct.");
5936 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
5937 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
5938 return;
5939 }
5940 const auto *C = S.getSingleClause<OMPSIMDClause>();
5941 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
5942 PrePostActionTy &Action) {
5943 const CapturedStmt *CS = S.getInnermostCapturedStmt();
5944 if (C) {
5945 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
5946 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
5947 llvm::Function *OutlinedFn =
5948 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
5949 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
5950 OutlinedFn, CapturedVars);
5951 } else {
5952 Action.Enter(CGF);
5953 CGF.EmitStmt(CS->getCapturedStmt());
5954 }
5955 };
5956 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5957 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
5958}
5959
5960static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
5961 QualType SrcType, QualType DestType,
5962 SourceLocation Loc) {
5963 assert(CGF.hasScalarEvaluationKind(DestType) &&
5964 "DestType must have scalar evaluation kind.");
5965 assert(!Val.isAggregate() && "Must be a scalar or complex.");
5966 return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
5967 DestType, Loc)
5968 : CGF.EmitComplexToScalarConversion(
5969 Val.getComplexVal(), SrcType, DestType, Loc);
5970}
5971
5972static CodeGenFunction::ComplexPairTy
5973convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
5974 QualType DestType, SourceLocation Loc) {
5975 assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
5976 "DestType must have complex evaluation kind.");
5977 CodeGenFunction::ComplexPairTy ComplexVal;
5978 if (Val.isScalar()) {
5979 // Convert the input element to the element type of the complex.
5980 QualType DestElementType =
5981 DestType->castAs<ComplexType>()->getElementType();
5982 llvm::Value *ScalarVal = CGF.EmitScalarConversion(
5983 Val.getScalarVal(), SrcType, DestElementType, Loc);
5984 ComplexVal = CodeGenFunction::ComplexPairTy(
5985 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
5986 } else {
5987 assert(Val.isComplex() && "Must be a scalar or complex.");
5988 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
5989 QualType DestElementType =
5990 DestType->castAs<ComplexType>()->getElementType();
5991 ComplexVal.first = CGF.EmitScalarConversion(
5992 Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
5993 ComplexVal.second = CGF.EmitScalarConversion(
5994 Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
5995 }
5996 return ComplexVal;
5997}
5998
5999static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6000 LValue LVal, RValue RVal) {
6001 if (LVal.isGlobalReg())
6002 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
6003 else
6004 CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false);
6005}
6006
6007static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
6008 llvm::AtomicOrdering AO, LValue LVal,
6009 SourceLocation Loc) {
6010 if (LVal.isGlobalReg())
6011 return CGF.EmitLoadOfLValue(LVal, Loc);
6012 return CGF.EmitAtomicLoad(
6013 LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO),
6014 LVal.isVolatile());
6015}
6016
6017void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
6018 QualType RValTy, SourceLocation Loc) {
6019 switch (getEvaluationKind(LVal.getType())) {
6020 case TEK_Scalar:
6021 EmitStoreThroughLValue(RValue::get(convertToScalarValue(
6022 *this, RVal, RValTy, LVal.getType(), Loc)),
6023 LVal);
6024 break;
6025 case TEK_Complex:
6026 EmitStoreOfComplex(
6027 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
6028 /*isInit=*/false);
6029 break;
6030 case TEK_Aggregate:
6031 llvm_unreachable("Must be a scalar or complex.");
6032 }
6033}
6034
6035static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6036 const Expr *X, const Expr *V,
6037 SourceLocation Loc) {
6038 // v = x;
6039 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
6040 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
6041 LValue XLValue = CGF.EmitLValue(X);
6042 LValue VLValue = CGF.EmitLValue(V);
6043 RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc);
6044 // OpenMP, 2.17.7, atomic Construct
6045 // If the read or capture clause is specified and the acquire, acq_rel, or
6046 // seq_cst clause is specified then the strong flush on exit from the atomic
6047 // operation is also an acquire flush.
6048 switch (AO) {
6049 case llvm::AtomicOrdering::Acquire:
6050 case llvm::AtomicOrdering::AcquireRelease:
6051 case llvm::AtomicOrdering::SequentiallyConsistent:
6052 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6053 llvm::AtomicOrdering::Acquire);
6054 break;
6055 case llvm::AtomicOrdering::Monotonic:
6056 case llvm::AtomicOrdering::Release:
6057 break;
6058 case llvm::AtomicOrdering::NotAtomic:
6059 case llvm::AtomicOrdering::Unordered:
6060 llvm_unreachable("Unexpected ordering.");
6061 }
6062 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
6063 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
6064}
6065
6066static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
6067 llvm::AtomicOrdering AO, const Expr *X,
6068 const Expr *E, SourceLocation Loc) {
6069 // x = expr;
6070 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
6071 emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
6072 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6073 // OpenMP, 2.17.7, atomic Construct
6074 // If the write, update, or capture clause is specified and the release,
6075 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6076 // the atomic operation is also a release flush.
6077 switch (AO) {
6078 case llvm::AtomicOrdering::Release:
6079 case llvm::AtomicOrdering::AcquireRelease:
6080 case llvm::AtomicOrdering::SequentiallyConsistent:
6081 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6082 llvm::AtomicOrdering::Release);
6083 break;
6084 case llvm::AtomicOrdering::Acquire:
6085 case llvm::AtomicOrdering::Monotonic:
6086 break;
6087 case llvm::AtomicOrdering::NotAtomic:
6088 case llvm::AtomicOrdering::Unordered:
6089 llvm_unreachable("Unexpected ordering.");
6090 }
6091}
6092
6093static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
6094 RValue Update,
6095 BinaryOperatorKind BO,
6096 llvm::AtomicOrdering AO,
6097 bool IsXLHSInRHSPart) {
6098 ASTContext &Context = CGF.getContext();
6099 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
6100 // expression is simple and atomic is allowed for the given type for the
6101 // target platform.
6102 if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() ||
6103 (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
6104 (Update.getScalarVal()->getType() !=
6105 X.getAddress(CGF).getElementType())) ||
6106 !Context.getTargetInfo().hasBuiltinAtomic(
6107 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
6108 return std::make_pair(false, RValue::get(nullptr));
6109
6110 auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) {
6111 if (T->isIntegerTy())
6112 return true;
6113
6114 if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub))
6115 return llvm::isPowerOf2_64(CGF.CGM.getDataLayout().getTypeStoreSize(T));
6116
6117 return false;
6118 };
6119
6120 if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) ||
6121 !CheckAtomicSupport(X.getAddress(CGF).getElementType(), BO))
6122 return std::make_pair(false, RValue::get(nullptr));
6123
6124 bool IsInteger = X.getAddress(CGF).getElementType()->isIntegerTy();
6125 llvm::AtomicRMWInst::BinOp RMWOp;
6126 switch (BO) {
6127 case BO_Add:
6128 RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd;
6129 break;
6130 case BO_Sub:
6131 if (!IsXLHSInRHSPart)
6132 return std::make_pair(false, RValue::get(nullptr));
6133 RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub;
6134 break;
6135 case BO_And:
6136 RMWOp = llvm::AtomicRMWInst::And;
6137 break;
6138 case BO_Or:
6139 RMWOp = llvm::AtomicRMWInst::Or;
6140 break;
6141 case BO_Xor:
6142 RMWOp = llvm::AtomicRMWInst::Xor;
6143 break;
6144 case BO_LT:
6145 if (IsInteger)
6146 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6147 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
6148 : llvm::AtomicRMWInst::Max)
6149 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
6150 : llvm::AtomicRMWInst::UMax);
6151 else
6152 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin
6153 : llvm::AtomicRMWInst::FMax;
6154 break;
6155 case BO_GT:
6156 if (IsInteger)
6157 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6158 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
6159 : llvm::AtomicRMWInst::Min)
6160 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
6161 : llvm::AtomicRMWInst::UMin);
6162 else
6163 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax
6164 : llvm::AtomicRMWInst::FMin;
6165 break;
6166 case BO_Assign:
6167 RMWOp = llvm::AtomicRMWInst::Xchg;
6168 break;
6169 case BO_Mul:
6170 case BO_Div:
6171 case BO_Rem:
6172 case BO_Shl:
6173 case BO_Shr:
6174 case BO_LAnd:
6175 case BO_LOr:
6176 return std::make_pair(false, RValue::get(nullptr));
6177 case BO_PtrMemD:
6178 case BO_PtrMemI:
6179 case BO_LE:
6180 case BO_GE:
6181 case BO_EQ:
6182 case BO_NE:
6183 case BO_Cmp:
6184 case BO_AddAssign:
6185 case BO_SubAssign:
6186 case BO_AndAssign:
6187 case BO_OrAssign:
6188 case BO_XorAssign:
6189 case BO_MulAssign:
6190 case BO_DivAssign:
6191 case BO_RemAssign:
6192 case BO_ShlAssign:
6193 case BO_ShrAssign:
6194 case BO_Comma:
6195 llvm_unreachable("Unsupported atomic update operation");
6196 }
6197 llvm::Value *UpdateVal = Update.getScalarVal();
6198 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
6199 if (IsInteger)
6200 UpdateVal = CGF.Builder.CreateIntCast(
6201 IC, X.getAddress(CGF).getElementType(),
6202 X.getType()->hasSignedIntegerRepresentation());
6203 else
6204 UpdateVal = CGF.Builder.CreateCast(llvm::Instruction::CastOps::UIToFP, IC,
6205 X.getAddress(CGF).getElementType());
6206 }
6207 llvm::Value *Res =
6208 CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO);
6209 return std::make_pair(true, RValue::get(Res));
6210}
6211
6212std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
6213 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
6214 llvm::AtomicOrdering AO, SourceLocation Loc,
6215 const llvm::function_ref<RValue(RValue)> CommonGen) {
6216 // Update expressions are allowed to have the following forms:
6217 // x binop= expr; -> xrval + expr;
6218 // x++, ++x -> xrval + 1;
6219 // x--, --x -> xrval - 1;
6220 // x = x binop expr; -> xrval binop expr
6221 // x = expr Op x; - > expr binop xrval;
6222 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
6223 if (!Res.first) {
6224 if (X.isGlobalReg()) {
6225 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
6226 // 'xrval'.
6227 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
6228 } else {
6229 // Perform compare-and-swap procedure.
6230 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
6231 }
6232 }
6233 return Res;
6234}
6235
6236static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
6237 llvm::AtomicOrdering AO, const Expr *X,
6238 const Expr *E, const Expr *UE,
6239 bool IsXLHSInRHSPart, SourceLocation Loc) {
6240 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6241 "Update expr in 'atomic update' must be a binary operator.");
6242 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
6243 // Update expressions are allowed to have the following forms:
6244 // x binop= expr; -> xrval + expr;
6245 // x++, ++x -> xrval + 1;
6246 // x--, --x -> xrval - 1;
6247 // x = x binop expr; -> xrval binop expr
6248 // x = expr Op x; - > expr binop xrval;
6249 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
6250 LValue XLValue = CGF.EmitLValue(X);
6251 RValue ExprRValue = CGF.EmitAnyExpr(E);
6252 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
6253 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
6254 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6255 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6256 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
6257 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6258 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6259 return CGF.EmitAnyExpr(UE);
6260 };
6261 (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
6262 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
6263 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6264 // OpenMP, 2.17.7, atomic Construct
6265 // If the write, update, or capture clause is specified and the release,
6266 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6267 // the atomic operation is also a release flush.
6268 switch (AO) {
6269 case llvm::AtomicOrdering::Release:
6270 case llvm::AtomicOrdering::AcquireRelease:
6271 case llvm::AtomicOrdering::SequentiallyConsistent:
6272 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6273 llvm::AtomicOrdering::Release);
6274 break;
6275 case llvm::AtomicOrdering::Acquire:
6276 case llvm::AtomicOrdering::Monotonic:
6277 break;
6278 case llvm::AtomicOrdering::NotAtomic:
6279 case llvm::AtomicOrdering::Unordered:
6280 llvm_unreachable("Unexpected ordering.");
6281 }
6282}
6283
6284static RValue convertToType(CodeGenFunction &CGF, RValue Value,
6285 QualType SourceType, QualType ResType,
6286 SourceLocation Loc) {
6287 switch (CGF.getEvaluationKind(ResType)) {
6288 case TEK_Scalar:
6289 return RValue::get(
6290 convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
6291 case TEK_Complex: {
6292 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
6293 return RValue::getComplex(Res.first, Res.second);
6294 }
6295 case TEK_Aggregate:
6296 break;
6297 }
6298 llvm_unreachable("Must be a scalar or complex.");
6299}
6300
6301static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
6302 llvm::AtomicOrdering AO,
6303 bool IsPostfixUpdate, const Expr *V,
6304 const Expr *X, const Expr *E,
6305 const Expr *UE, bool IsXLHSInRHSPart,
6306 SourceLocation Loc) {
6307 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
6308 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
6309 RValue NewVVal;
6310 LValue VLValue = CGF.EmitLValue(V);
6311 LValue XLValue = CGF.EmitLValue(X);
6312 RValue ExprRValue = CGF.EmitAnyExpr(E);
6313 QualType NewVValType;
6314 if (UE) {
6315 // 'x' is updated with some additional value.
6316 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6317 "Update expr in 'atomic capture' must be a binary operator.");
6318 const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
6319 // Update expressions are allowed to have the following forms:
6320 // x binop= expr; -> xrval + expr;
6321 // x++, ++x -> xrval + 1;
6322 // x--, --x -> xrval - 1;
6323 // x = x binop expr; -> xrval binop expr
6324 // x = expr Op x; - > expr binop xrval;
6325 const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
6326 const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
6327 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6328 NewVValType = XRValExpr->getType();
6329 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6330 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
6331 IsPostfixUpdate](RValue XRValue) {
6332 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6333 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6334 RValue Res = CGF.EmitAnyExpr(UE);
6335 NewVVal = IsPostfixUpdate ? XRValue : Res;
6336 return Res;
6337 };
6338 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6339 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
6340 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6341 if (Res.first) {
6342 // 'atomicrmw' instruction was generated.
6343 if (IsPostfixUpdate) {
6344 // Use old value from 'atomicrmw'.
6345 NewVVal = Res.second;
6346 } else {
6347 // 'atomicrmw' does not provide new value, so evaluate it using old
6348 // value of 'x'.
6349 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6350 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
6351 NewVVal = CGF.EmitAnyExpr(UE);
6352 }
6353 }
6354 } else {
6355 // 'x' is simply rewritten with some 'expr'.
6356 NewVValType = X->getType().getNonReferenceType();
6357 ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
6358 X->getType().getNonReferenceType(), Loc);
6359 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
6360 NewVVal = XRValue;
6361 return ExprRValue;
6362 };
6363 // Try to perform atomicrmw xchg, otherwise simple exchange.
6364 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6365 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
6366 Loc, Gen);
6367 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
6368 if (Res.first) {
6369 // 'atomicrmw' instruction was generated.
6370 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
6371 }
6372 }
6373 // Emit post-update store to 'v' of old/new 'x' value.
6374 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
6375 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
6376 // OpenMP 5.1 removes the required flush for capture clause.
6377 if (CGF.CGM.getLangOpts().OpenMP < 51) {
6378 // OpenMP, 2.17.7, atomic Construct
6379 // If the write, update, or capture clause is specified and the release,
6380 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6381 // the atomic operation is also a release flush.
6382 // If the read or capture clause is specified and the acquire, acq_rel, or
6383 // seq_cst clause is specified then the strong flush on exit from the atomic
6384 // operation is also an acquire flush.
6385 switch (AO) {
6386 case llvm::AtomicOrdering::Release:
6387 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6388 llvm::AtomicOrdering::Release);
6389 break;
6390 case llvm::AtomicOrdering::Acquire:
6391 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, std::nullopt, Loc,
6392 llvm::AtomicOrdering::Acquire);
6393 break;
6394 case llvm::AtomicOrdering::AcquireRelease:
6395 case llvm::AtomicOrdering::SequentiallyConsistent:
6396 CGF.CGM.getOpenMPRuntime().emitFlush(
6397 CGF, std::nullopt, Loc, llvm::AtomicOrdering::AcquireRelease);
6398 break;
6399 case llvm::AtomicOrdering::Monotonic:
6400 break;
6401 case llvm::AtomicOrdering::NotAtomic:
6402 case llvm::AtomicOrdering::Unordered:
6403 llvm_unreachable("Unexpected ordering.");
6404 }
6405 }
6406}
6407
6408static void emitOMPAtomicCompareExpr(CodeGenFunction &CGF,
6409 llvm::AtomicOrdering AO, const Expr *X,
6410 const Expr *V, const Expr *R,
6411 const Expr *E, const Expr *D,
6412 const Expr *CE, bool IsXBinopExpr,
6413 bool IsPostfixUpdate, bool IsFailOnly,
6414 SourceLocation Loc) {
6415 llvm::OpenMPIRBuilder &OMPBuilder =
6416 CGF.CGM.getOpenMPRuntime().getOMPBuilder();
6417
6418 OMPAtomicCompareOp Op;
6419 assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator");
6420 switch (cast<BinaryOperator>(CE)->getOpcode()) {
6421 case BO_EQ:
6422 Op = OMPAtomicCompareOp::EQ;
6423 break;
6424 case BO_LT:
6425 Op = OMPAtomicCompareOp::MIN;
6426 break;
6427 case BO_GT:
6428 Op = OMPAtomicCompareOp::MAX;
6429 break;
6430 default:
6431 llvm_unreachable("unsupported atomic compare binary operator");
6432 }
6433
6434 LValue XLVal = CGF.EmitLValue(X);
6435 Address XAddr = XLVal.getAddress(CGF);
6436
6437 auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) {
6438 if (X->getType() == E->getType())
6439 return CGF.EmitScalarExpr(E);
6440 const Expr *NewE = E->IgnoreImplicitAsWritten();
6441 llvm::Value *V = CGF.EmitScalarExpr(NewE);
6442 if (NewE->getType() == X->getType())
6443 return V;
6444 return CGF.EmitScalarConversion(V, NewE->getType(), X->getType(), Loc);
6445 };
6446
6447 llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E);
6448 llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr;
6449 if (auto *CI = dyn_cast<llvm::ConstantInt>(EVal))
6450 EVal = CGF.Builder.CreateIntCast(
6451 CI, XLVal.getAddress(CGF).getElementType(),
6452 E->getType()->hasSignedIntegerRepresentation());
6453 if (DVal)
6454 if (auto *CI = dyn_cast<llvm::ConstantInt>(DVal))
6455 DVal = CGF.Builder.CreateIntCast(
6456 CI, XLVal.getAddress(CGF).getElementType(),
6457 D->getType()->hasSignedIntegerRepresentation());
6458
6459 llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{
6460 XAddr.getPointer(), XAddr.getElementType(),
6461 X->getType()->hasSignedIntegerRepresentation(),
6462 X->getType().isVolatileQualified()};
6463 llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal;
6464 if (V) {
6465 LValue LV = CGF.EmitLValue(V);
6466 Address Addr = LV.getAddress(CGF);
6467 VOpVal = {Addr.getPointer(), Addr.getElementType(),
6468 V->getType()->hasSignedIntegerRepresentation(),
6469 V->getType().isVolatileQualified()};
6470 }
6471 if (R) {
6472 LValue LV = CGF.EmitLValue(R);
6473 Address Addr = LV.getAddress(CGF);
6474 ROpVal = {Addr.getPointer(), Addr.getElementType(),
6475 R->getType()->hasSignedIntegerRepresentation(),
6476 R->getType().isVolatileQualified()};
6477 }
6478
6479 CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare(
6480 CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr,
6481 IsPostfixUpdate, IsFailOnly));
6482}
6483
6484static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
6485 llvm::AtomicOrdering AO, bool IsPostfixUpdate,
6486 const Expr *X, const Expr *V, const Expr *R,
6487 const Expr *E, const Expr *UE, const Expr *D,
6488 const Expr *CE, bool IsXLHSInRHSPart,
6489 bool IsFailOnly, SourceLocation Loc) {
6490 switch (Kind) {
6491 case OMPC_read:
6492 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
6493 break;
6494 case OMPC_write:
6495 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
6496 break;
6497 case OMPC_unknown:
6498 case OMPC_update:
6499 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
6500 break;
6501 case OMPC_capture:
6502 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
6503 IsXLHSInRHSPart, Loc);
6504 break;
6505 case OMPC_compare: {
6506 emitOMPAtomicCompareExpr(CGF, AO, X, V, R, E, D, CE, IsXLHSInRHSPart,
6507 IsPostfixUpdate, IsFailOnly, Loc);
6508 break;
6509 }
6510 default:
6511 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
6512 }
6513}
6514
6515void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
6516 llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic;
6517 bool MemOrderingSpecified = false;
6518 if (S.getSingleClause<OMPSeqCstClause>()) {
6519 AO = llvm::AtomicOrdering::SequentiallyConsistent;
6520 MemOrderingSpecified = true;
6521 } else if (S.getSingleClause<OMPAcqRelClause>()) {
6522 AO = llvm::AtomicOrdering::AcquireRelease;
6523 MemOrderingSpecified = true;
6524 } else if (S.getSingleClause<OMPAcquireClause>()) {
6525 AO = llvm::AtomicOrdering::Acquire;
6526 MemOrderingSpecified = true;
6527 } else if (S.getSingleClause<OMPReleaseClause>()) {
6528 AO = llvm::AtomicOrdering::Release;
6529 MemOrderingSpecified = true;
6530 } else if (S.getSingleClause<OMPRelaxedClause>()) {
6531 AO = llvm::AtomicOrdering::Monotonic;
6532 MemOrderingSpecified = true;
6533 }
6534 llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered;
6535 OpenMPClauseKind Kind = OMPC_unknown;
6536 for (const OMPClause *C : S.clauses()) {
6537 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
6538 // if it is first).
6539 OpenMPClauseKind K = C->getClauseKind();
6540 if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire ||
6541 K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint)
6542 continue;
6543 Kind = K;
6544 KindsEncountered.insert(K);
6545 }
6546 // We just need to correct Kind here. No need to set a bool saying it is
6547 // actually compare capture because we can tell from whether V and R are
6548 // nullptr.
6549 if (KindsEncountered.contains(OMPC_compare) &&
6550 KindsEncountered.contains(OMPC_capture))
6551 Kind = OMPC_compare;
6552 if (!MemOrderingSpecified) {
6553 llvm::AtomicOrdering DefaultOrder =
6554 CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
6555 if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
6556 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
6557 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
6558 Kind == OMPC_capture)) {
6559 AO = DefaultOrder;
6560 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
6561 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
6562 AO = llvm::AtomicOrdering::Release;
6563 } else if (Kind == OMPC_read) {
6564 assert(Kind == OMPC_read && "Unexpected atomic kind.");
6565 AO = llvm::AtomicOrdering::Acquire;
6566 }
6567 }
6568 }
6569
6570 LexicalScope Scope(*this, S.getSourceRange());
6571 EmitStopPoint(S.getAssociatedStmt());
6572 emitOMPAtomicExpr(*this, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(),
6573 S.getR(), S.getExpr(), S.getUpdateExpr(), S.getD(),
6574 S.getCondExpr(), S.isXLHSInRHSPart(), S.isFailOnly(),
6575 S.getBeginLoc());
6576}
6577
6578static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
6579 const OMPExecutableDirective &S,
6580 const RegionCodeGenTy &CodeGen) {
6581 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
6582 CodeGenModule &CGM = CGF.CGM;
6583
6584 // On device emit this construct as inlined code.
6585 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
6586 OMPLexicalScope Scope(CGF, S, OMPD_target);
6587 CGM.getOpenMPRuntime().emitInlinedDirective(
6588 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6589 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
6590 });
6591 return;
6592 }
6593
6594 auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
6595 llvm::Function *Fn = nullptr;
6596 llvm::Constant *FnID = nullptr;
6597
6598 const Expr *IfCond = nullptr;
6599 // Check for the at most one if clause associated with the target region.
6600 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
6601 if (C->getNameModifier() == OMPD_unknown ||
6602 C->getNameModifier() == OMPD_target) {
6603 IfCond = C->getCondition();
6604 break;
6605 }
6606 }
6607
6608 // Check if we have any device clause associated with the directive.
6609 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
6610 nullptr, OMPC_DEVICE_unknown);
6611 if (auto *C = S.getSingleClause<OMPDeviceClause>())
6612 Device.setPointerAndInt(C->getDevice(), C->getModifier());
6613
6614 // Check if we have an if clause whose conditional always evaluates to false
6615 // or if we do not have any targets specified. If so the target region is not
6616 // an offload entry point.
6617 bool IsOffloadEntry = true;
6618 if (IfCond) {
6619 bool Val;
6620 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
6621 IsOffloadEntry = false;
6622 }
6623 if (CGM.getLangOpts().OMPTargetTriples.empty())
6624 IsOffloadEntry = false;
6625
6626 if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) {
6627 unsigned DiagID = CGM.getDiags().getCustomDiagID(
6628 DiagnosticsEngine::Error,
6629 "No offloading entry generated while offloading is mandatory.");
6630 CGM.getDiags().Report(DiagID);
6631 }
6632
6633 assert(CGF.CurFuncDecl && "No parent declaration for target region!");
6634 StringRef ParentName;
6635 // In case we have Ctors/Dtors we use the complete type variant to produce
6636 // the mangling of the device outlined kernel.
6637 if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
6638 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
6639 else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
6640 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
6641 else
6642 ParentName =
6643 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
6644
6645 // Emit target region as a standalone region.
6646 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
6647 IsOffloadEntry, CodeGen);
6648 OMPLexicalScope Scope(CGF, S, OMPD_task);
6649 auto &&SizeEmitter =
6650 [IsOffloadEntry](CodeGenFunction &CGF,
6651 const OMPLoopDirective &D) -> llvm::Value * {
6652 if (IsOffloadEntry) {
6653 OMPLoopScope(CGF, D);
6654 // Emit calculation of the iterations count.
6655 llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
6656 NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
6657 /*isSigned=*/false);
6658 return NumIterations;
6659 }
6660 return nullptr;
6661 };
6662 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
6663 SizeEmitter);
6664}
6665
6666static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
6667 PrePostActionTy &Action) {
6668 Action.Enter(CGF);
6669 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6670 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6671 CGF.EmitOMPPrivateClause(S, PrivateScope);
6672 (void)PrivateScope.Privatize();
6673 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6674 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6675
6676 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
6677 CGF.EnsureInsertPoint();
6678}
6679
6680void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
6681 StringRef ParentName,
6682 const OMPTargetDirective &S) {
6683 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6684 emitTargetRegion(CGF, S, Action);
6685 };
6686 llvm::Function *Fn;
6687 llvm::Constant *Addr;
6688 // Emit target region as a standalone region.
6689 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6690 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6691 assert(Fn && Addr && "Target device function emission failed.");
6692}
6693
6694void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
6695 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6696 emitTargetRegion(CGF, S, Action);
6697 };
6698 emitCommonOMPTargetDirective(*this, S, CodeGen);
6699}
6700
6701static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
6702 const OMPExecutableDirective &S,
6703 OpenMPDirectiveKind InnermostKind,
6704 const RegionCodeGenTy &CodeGen) {
6705 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
6706 llvm::Function *OutlinedFn =
6707 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
6708 CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind,
6709 CodeGen);
6710
6711 const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
6712 const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
6713 if (NT || TL) {
6714 const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
6715 const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
6716
6717 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
6718 S.getBeginLoc());
6719 }
6720
6721 OMPTeamsScope Scope(CGF, S);
6722 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6723 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
6724 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
6725 CapturedVars);
6726}
6727
6728void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
6729 // Emit teams region as a standalone region.
6730 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6731 Action.Enter(CGF);
6732 OMPPrivateScope PrivateScope(CGF);
6733 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6734 CGF.EmitOMPPrivateClause(S, PrivateScope);
6735 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6736 (void)PrivateScope.Privatize();
6737 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
6738 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6739 };
6740 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
6741 emitPostUpdateForReductionClause(*this, S,
6742 [](CodeGenFunction &) { return nullptr; });
6743}
6744
6745static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6746 const OMPTargetTeamsDirective &S) {
6747 auto *CS = S.getCapturedStmt(OMPD_teams);
6748 Action.Enter(CGF);
6749 // Emit teams region as a standalone region.
6750 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
6751 Action.Enter(CGF);
6752 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6753 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6754 CGF.EmitOMPPrivateClause(S, PrivateScope);
6755 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6756 (void)PrivateScope.Privatize();
6757 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6758 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6759 CGF.EmitStmt(CS->getCapturedStmt());
6760 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6761 };
6762 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
6763 emitPostUpdateForReductionClause(CGF, S,
6764 [](CodeGenFunction &) { return nullptr; });
6765}
6766
6767void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
6768 CodeGenModule &CGM, StringRef ParentName,
6769 const OMPTargetTeamsDirective &S) {
6770 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6771 emitTargetTeamsRegion(CGF, Action, S);
6772 };
6773 llvm::Function *Fn;
6774 llvm::Constant *Addr;
6775 // Emit target region as a standalone region.
6776 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6777 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6778 assert(Fn && Addr && "Target device function emission failed.");
6779}
6780
6781void CodeGenFunction::EmitOMPTargetTeamsDirective(
6782 const OMPTargetTeamsDirective &S) {
6783 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6784 emitTargetTeamsRegion(CGF, Action, S);
6785 };
6786 emitCommonOMPTargetDirective(*this, S, CodeGen);
6787}
6788
6789static void
6790emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6791 const OMPTargetTeamsDistributeDirective &S) {
6792 Action.Enter(CGF);
6793 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6794 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6795 };
6796
6797 // Emit teams region as a standalone region.
6798 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6799 PrePostActionTy &Action) {
6800 Action.Enter(CGF);
6801 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6802 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6803 (void)PrivateScope.Privatize();
6804 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6805 CodeGenDistribute);
6806 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6807 };
6808 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
6809 emitPostUpdateForReductionClause(CGF, S,
6810 [](CodeGenFunction &) { return nullptr; });
6811}
6812
6813void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
6814 CodeGenModule &CGM, StringRef ParentName,
6815 const OMPTargetTeamsDistributeDirective &S) {
6816 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6817 emitTargetTeamsDistributeRegion(CGF, Action, S);
6818 };
6819 llvm::Function *Fn;
6820 llvm::Constant *Addr;
6821 // Emit target region as a standalone region.
6822 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6823 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6824 assert(Fn && Addr && "Target device function emission failed.");
6825}
6826
6827void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
6828 const OMPTargetTeamsDistributeDirective &S) {
6829 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6830 emitTargetTeamsDistributeRegion(CGF, Action, S);
6831 };
6832 emitCommonOMPTargetDirective(*this, S, CodeGen);
6833}
6834
6835static void emitTargetTeamsDistributeSimdRegion(
6836 CodeGenFunction &CGF, PrePostActionTy &Action,
6837 const OMPTargetTeamsDistributeSimdDirective &S) {
6838 Action.Enter(CGF);
6839 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6840 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6841 };
6842
6843 // Emit teams region as a standalone region.
6844 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6845 PrePostActionTy &Action) {
6846 Action.Enter(CGF);
6847 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6848 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6849 (void)PrivateScope.Privatize();
6850 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6851 CodeGenDistribute);
6852 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6853 };
6854 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
6855 emitPostUpdateForReductionClause(CGF, S,
6856 [](CodeGenFunction &) { return nullptr; });
6857}
6858
6859void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
6860 CodeGenModule &CGM, StringRef ParentName,
6861 const OMPTargetTeamsDistributeSimdDirective &S) {
6862 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6863 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6864 };
6865 llvm::Function *Fn;
6866 llvm::Constant *Addr;
6867 // Emit target region as a standalone region.
6868 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6869 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6870 assert(Fn && Addr && "Target device function emission failed.");
6871}
6872
6873void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
6874 const OMPTargetTeamsDistributeSimdDirective &S) {
6875 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6876 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6877 };
6878 emitCommonOMPTargetDirective(*this, S, CodeGen);
6879}
6880
6881void CodeGenFunction::EmitOMPTeamsDistributeDirective(
6882 const OMPTeamsDistributeDirective &S) {
6883
6884 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6885 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6886 };
6887
6888 // Emit teams region as a standalone region.
6889 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6890 PrePostActionTy &Action) {
6891 Action.Enter(CGF);
6892 OMPPrivateScope PrivateScope(CGF);
6893 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6894 (void)PrivateScope.Privatize();
6895 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6896 CodeGenDistribute);
6897 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6898 };
6899 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
6900 emitPostUpdateForReductionClause(*this, S,
6901 [](CodeGenFunction &) { return nullptr; });
6902}
6903
6904void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
6905 const OMPTeamsDistributeSimdDirective &S) {
6906 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6907 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
6908 };
6909
6910 // Emit teams region as a standalone region.
6911 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6912 PrePostActionTy &Action) {
6913 Action.Enter(CGF);
6914 OMPPrivateScope PrivateScope(CGF);
6915 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6916 (void)PrivateScope.Privatize();
6917 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
6918 CodeGenDistribute);
6919 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6920 };
6921 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
6922 emitPostUpdateForReductionClause(*this, S,
6923 [](CodeGenFunction &) { return nullptr; });
6924}
6925
6926void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
6927 const OMPTeamsDistributeParallelForDirective &S) {
6928 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6929 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
6930 S.getDistInc());
6931 };
6932
6933 // Emit teams region as a standalone region.
6934 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6935 PrePostActionTy &Action) {
6936 Action.Enter(CGF);
6937 OMPPrivateScope PrivateScope(CGF);
6938 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6939 (void)PrivateScope.Privatize();
6940 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6941 CodeGenDistribute);
6942 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6943 };
6944 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
6945 emitPostUpdateForReductionClause(*this, S,
6946 [](CodeGenFunction &) { return nullptr; });
6947}
6948
6949void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
6950 const OMPTeamsDistributeParallelForSimdDirective &S) {
6951 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6952 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
6953 S.getDistInc());
6954 };
6955
6956 // Emit teams region as a standalone region.
6957 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6958 PrePostActionTy &Action) {
6959 Action.Enter(CGF);
6960 OMPPrivateScope PrivateScope(CGF);
6961 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6962 (void)PrivateScope.Privatize();
6963 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
6964 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
6965 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6966 };
6967 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd,
6968 CodeGen);
6969 emitPostUpdateForReductionClause(*this, S,
6970 [](CodeGenFunction &) { return nullptr; });
6971}
6972
6973void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) {
6974 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
6975 llvm::Value *Device = nullptr;
6976 llvm::Value *NumDependences = nullptr;
6977 llvm::Value *DependenceList = nullptr;
6978
6979 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
6980 Device = EmitScalarExpr(C->getDevice());
6981
6982 // Build list and emit dependences
6983 OMPTaskDataTy Data;
6984 buildDependences(S, Data);
6985 if (!Data.Dependences.empty()) {
6986 Address DependenciesArray = Address::invalid();
6987 std::tie(NumDependences, DependenciesArray) =
6988 CGM.getOpenMPRuntime().emitDependClause(*this, Data.Dependences,
6989 S.getBeginLoc());
6990 DependenceList = DependenciesArray.getPointer();
6991 }
6992 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
6993
6994 assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() ||
6995 S.getSingleClause<OMPDestroyClause>() ||
6996 S.getSingleClause<OMPUseClause>())) &&
6997 "OMPNowaitClause clause is used separately in OMPInteropDirective.");
6998
6999 if (const auto *C = S.getSingleClause<OMPInitClause>()) {
7000 llvm::Value *InteropvarPtr =
7001 EmitLValue(C->getInteropVar()).getPointer(*this);
7002 llvm::omp::OMPInteropType InteropType = llvm::omp::OMPInteropType::Unknown;
7003 if (C->getIsTarget()) {
7004 InteropType = llvm::omp::OMPInteropType::Target;
7005 } else {
7006 assert(C->getIsTargetSync() && "Expected interop-type target/targetsync");
7007 InteropType = llvm::omp::OMPInteropType::TargetSync;
7008 }
7009 OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType, Device,
7010 NumDependences, DependenceList,
7011 Data.HasNowaitClause);
7012 } else if (const auto *C = S.getSingleClause<OMPDestroyClause>()) {
7013 llvm::Value *InteropvarPtr =
7014 EmitLValue(C->getInteropVar()).getPointer(*this);
7015 OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device,
7016 NumDependences, DependenceList,
7017 Data.HasNowaitClause);
7018 } else if (const auto *C = S.getSingleClause<OMPUseClause>()) {
7019 llvm::Value *InteropvarPtr =
7020 EmitLValue(C->getInteropVar()).getPointer(*this);
7021 OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device,
7022 NumDependences, DependenceList,
7023 Data.HasNowaitClause);
7024 }
7025}
7026
7027static void emitTargetTeamsDistributeParallelForRegion(
7028 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
7029 PrePostActionTy &Action) {
7030 Action.Enter(CGF);
7031 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7032 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
7033 S.getDistInc());
7034 };
7035
7036 // Emit teams region as a standalone region.
7037 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7038 PrePostActionTy &Action) {
7039 Action.Enter(CGF);
7040 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7041 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7042 (void)PrivateScope.Privatize();
7043 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7044 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7045 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7046 };
7047
7048 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
7049 CodeGenTeams);
7050 emitPostUpdateForReductionClause(CGF, S,
7051 [](CodeGenFunction &) { return nullptr; });
7052}
7053
7054void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
7055 CodeGenModule &CGM, StringRef ParentName,
7056 const OMPTargetTeamsDistributeParallelForDirective &S) {
7057 // Emit SPMD target teams distribute parallel for region as a standalone
7058 // region.
7059 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7060 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7061 };
7062 llvm::Function *Fn;
7063 llvm::Constant *Addr;
7064 // Emit target region as a standalone region.
7065 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7066 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7067 assert(Fn && Addr && "Target device function emission failed.");
7068}
7069
7070void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
7071 const OMPTargetTeamsDistributeParallelForDirective &S) {
7072 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7073 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7074 };
7075 emitCommonOMPTargetDirective(*this, S, CodeGen);
7076}
7077
7078static void emitTargetTeamsDistributeParallelForSimdRegion(
7079 CodeGenFunction &CGF,
7080 const OMPTargetTeamsDistributeParallelForSimdDirective &S,
7081 PrePostActionTy &Action) {
7082 Action.Enter(CGF);
7083 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7084 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
7085 S.getDistInc());
7086 };
7087
7088 // Emit teams region as a standalone region.
7089 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7090 PrePostActionTy &Action) {
7091 Action.Enter(CGF);
7092 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7093 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7094 (void)PrivateScope.Privatize();
7095 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7096 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7097 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7098 };
7099
7100 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
7101 CodeGenTeams);
7102 emitPostUpdateForReductionClause(CGF, S,
7103 [](CodeGenFunction &) { return nullptr; });
7104}
7105
7106void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
7107 CodeGenModule &CGM, StringRef ParentName,
7108 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7109 // Emit SPMD target teams distribute parallel for simd region as a standalone
7110 // region.
7111 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7112 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7113 };
7114 llvm::Function *Fn;
7115 llvm::Constant *Addr;
7116 // Emit target region as a standalone region.
7117 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7118 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7119 assert(Fn && Addr && "Target device function emission failed.");
7120}
7121
7122void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
7123 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7124 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7125 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7126 };
7127 emitCommonOMPTargetDirective(*this, S, CodeGen);
7128}
7129
7130void CodeGenFunction::EmitOMPCancellationPointDirective(
7131 const OMPCancellationPointDirective &S) {
7132 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(),
7133 S.getCancelRegion());
7134}
7135
7136void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
7137 const Expr *IfCond = nullptr;
7138 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7139 if (C->getNameModifier() == OMPD_unknown ||
7140 C->getNameModifier() == OMPD_cancel) {
7141 IfCond = C->getCondition();
7142 break;
7143 }
7144 }
7145 if (CGM.getLangOpts().OpenMPIRBuilder) {
7146 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7147 // TODO: This check is necessary as we only generate `omp parallel` through
7148 // the OpenMPIRBuilder for now.
7149 if (S.getCancelRegion() == OMPD_parallel ||
7150 S.getCancelRegion() == OMPD_sections ||
7151 S.getCancelRegion() == OMPD_section) {
7152 llvm::Value *IfCondition = nullptr;
7153 if (IfCond)
7154 IfCondition = EmitScalarExpr(IfCond,
7155 /*IgnoreResultAssign=*/true);
7156 return Builder.restoreIP(
7157 OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion()));
7158 }
7159 }
7160
7161 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
7162 S.getCancelRegion());
7163}
7164
7165CodeGenFunction::JumpDest
7166CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
7167 if (Kind == OMPD_parallel || Kind == OMPD_task ||
7168 Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
7169 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
7170 return ReturnBlock;
7171 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
7172 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
7173 Kind == OMPD_distribute_parallel_for ||
7174 Kind == OMPD_target_parallel_for ||
7175 Kind == OMPD_teams_distribute_parallel_for ||
7176 Kind == OMPD_target_teams_distribute_parallel_for);
7177 return OMPCancelStack.getExitBlock();
7178}
7179
7180void CodeGenFunction::EmitOMPUseDevicePtrClause(
7181 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
7182 const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7183 CaptureDeviceAddrMap) {
7184 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7185 for (const Expr *OrigVarIt : C.varlists()) {
7186 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(OrigVarIt)->getDecl());
7187 if (!Processed.insert(OrigVD).second)
7188 continue;
7189
7190 // In order to identify the right initializer we need to match the
7191 // declaration used by the mapping logic. In some cases we may get
7192 // OMPCapturedExprDecl that refers to the original declaration.
7193 const ValueDecl *MatchingVD = OrigVD;
7194 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
7195 // OMPCapturedExprDecl are used to privative fields of the current
7196 // structure.
7197 const auto *ME = cast<MemberExpr>(OED->getInit());
7198 assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) &&
7199 "Base should be the current struct!");
7200 MatchingVD = ME->getMemberDecl();
7201 }
7202
7203 // If we don't have information about the current list item, move on to
7204 // the next one.
7205 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
7206 if (InitAddrIt == CaptureDeviceAddrMap.end())
7207 continue;
7208
7209 llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType());
7210
7211 // Return the address of the private variable.
7212 bool IsRegistered = PrivateScope.addPrivate(
7213 OrigVD,
7214 Address(InitAddrIt->second, Ty,
7215 getContext().getTypeAlignInChars(getContext().VoidPtrTy)));
7216 assert(IsRegistered && "firstprivate var already registered as private");
7217 // Silence the warning about unused variable.
7218 (void)IsRegistered;
7219 }
7220}
7221
7222static const VarDecl *getBaseDecl(const Expr *Ref) {
7223 const Expr *Base = Ref->IgnoreParenImpCasts();
7224 while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base))
7225 Base = OASE->getBase()->IgnoreParenImpCasts();
7226 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base))
7227 Base = ASE->getBase()->IgnoreParenImpCasts();
7228 return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl());
7229}
7230
7231void CodeGenFunction::EmitOMPUseDeviceAddrClause(
7232 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
7233 const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7234 CaptureDeviceAddrMap) {
7235 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7236 for (const Expr *Ref : C.varlists()) {
7237 const VarDecl *OrigVD = getBaseDecl(Ref);
7238 if (!Processed.insert(OrigVD).second)
7239 continue;
7240 // In order to identify the right initializer we need to match the
7241 // declaration used by the mapping logic. In some cases we may get
7242 // OMPCapturedExprDecl that refers to the original declaration.
7243 const ValueDecl *MatchingVD = OrigVD;
7244 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
7245 // OMPCapturedExprDecl are used to privative fields of the current
7246 // structure.
7247 const auto *ME = cast<MemberExpr>(OED->getInit());
7248 assert(isa<CXXThisExpr>(ME->getBase()) &&
7249 "Base should be the current struct!");
7250 MatchingVD = ME->getMemberDecl();
7251 }
7252
7253 // If we don't have information about the current list item, move on to
7254 // the next one.
7255 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
7256 if (InitAddrIt == CaptureDeviceAddrMap.end())
7257 continue;
7258
7259 llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType());
7260
7261 Address PrivAddr =
7262 Address(InitAddrIt->second, Ty,
7263 getContext().getTypeAlignInChars(getContext().VoidPtrTy));
7264 // For declrefs and variable length array need to load the pointer for
7265 // correct mapping, since the pointer to the data was passed to the runtime.
7266 if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) ||
7267 MatchingVD->getType()->isArrayType()) {
7268 QualType PtrTy = getContext().getPointerType(
7269 OrigVD->getType().getNonReferenceType());
7270 PrivAddr =
7271 EmitLoadOfPointer(PrivAddr.withElementType(ConvertTypeForMem(PtrTy)),
7272 PtrTy->castAs<PointerType>());
7273 }
7274
7275 (void)PrivateScope.addPrivate(OrigVD, PrivAddr);
7276 }
7277}
7278
7279// Generate the instructions for '#pragma omp target data' directive.
7280void CodeGenFunction::EmitOMPTargetDataDirective(
7281 const OMPTargetDataDirective &S) {
7282 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
7283 /*SeparateBeginEndCalls=*/true);
7284
7285 // Create a pre/post action to signal the privatization of the device pointer.
7286 // This action can be replaced by the OpenMP runtime code generation to
7287 // deactivate privatization.
7288 bool PrivatizeDevicePointers = false;
7289 class DevicePointerPrivActionTy : public PrePostActionTy {
7290 bool &PrivatizeDevicePointers;
7291
7292 public:
7293 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
7294 : PrivatizeDevicePointers(PrivatizeDevicePointers) {}
7295 void Enter(CodeGenFunction &CGF) override {
7296 PrivatizeDevicePointers = true;
7297 }
7298 };
7299 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
7300
7301 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7302 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7303 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
7304 };
7305
7306 // Codegen that selects whether to generate the privatization code or not.
7307 auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7308 RegionCodeGenTy RCG(InnermostCodeGen);
7309 PrivatizeDevicePointers = false;
7310
7311 // Call the pre-action to change the status of PrivatizeDevicePointers if
7312 // needed.
7313 Action.Enter(CGF);
7314
7315 if (PrivatizeDevicePointers) {
7316 OMPPrivateScope PrivateScope(CGF);
7317 // Emit all instances of the use_device_ptr clause.
7318 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7319 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
7320 Info.CaptureDeviceAddrMap);
7321 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7322 CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope,
7323 Info.CaptureDeviceAddrMap);
7324 (void)PrivateScope.Privatize();
7325 RCG(CGF);
7326 } else {
7327 // If we don't have target devices, don't bother emitting the data
7328 // mapping code.
7329 std::optional<OpenMPDirectiveKind> CaptureRegion;
7330 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7331 // Emit helper decls of the use_device_ptr/use_device_addr clauses.
7332 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7333 for (const Expr *E : C->varlists()) {
7334 const Decl *D = cast<DeclRefExpr>(E)->getDecl();
7335 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
7336 CGF.EmitVarDecl(*OED);
7337 }
7338 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7339 for (const Expr *E : C->varlists()) {
7340 const Decl *D = getBaseDecl(E);
7341 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
7342 CGF.EmitVarDecl(*OED);
7343 }
7344 } else {
7345 CaptureRegion = OMPD_unknown;
7346 }
7347
7348 OMPLexicalScope Scope(CGF, S, CaptureRegion);
7349 RCG(CGF);
7350 }
7351 };
7352
7353 // Forward the provided action to the privatization codegen.
7354 RegionCodeGenTy PrivRCG(PrivCodeGen);
7355 PrivRCG.setAction(Action);
7356
7357 // Notwithstanding the body of the region is emitted as inlined directive,
7358 // we don't use an inline scope as changes in the references inside the
7359 // region are expected to be visible outside, so we do not privative them.
7360 OMPLexicalScope Scope(CGF, S);
7361 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
7362 PrivRCG);
7363 };
7364
7365 RegionCodeGenTy RCG(CodeGen);
7366
7367 // If we don't have target devices, don't bother emitting the data mapping
7368 // code.
7369 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7370 RCG(*this);
7371 return;
7372 }
7373
7374 // Check if we have any if clause associated with the directive.
7375 const Expr *IfCond = nullptr;
7376 if (const auto *C = S.getSingleClause<OMPIfClause>())
7377 IfCond = C->getCondition();
7378
7379 // Check if we have any device clause associated with the directive.
7380 const Expr *Device = nullptr;
7381 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7382 Device = C->getDevice();
7383
7384 // Set the action to signal privatization of device pointers.
7385 RCG.setAction(PrivAction);
7386
7387 // Emit region code.
7388 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
7389 Info);
7390}
7391
7392void CodeGenFunction::EmitOMPTargetEnterDataDirective(
7393 const OMPTargetEnterDataDirective &S) {
7394 // If we don't have target devices, don't bother emitting the data mapping
7395 // code.
7396 if (CGM.getLangOpts().OMPTargetTriples.empty())
7397 return;
7398
7399 // Check if we have any if clause associated with the directive.
7400 const Expr *IfCond = nullptr;
7401 if (const auto *C = S.getSingleClause<OMPIfClause>())
7402 IfCond = C->getCondition();
7403
7404 // Check if we have any device clause associated with the directive.
7405 const Expr *Device = nullptr;
7406 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7407 Device = C->getDevice();
7408
7409 OMPLexicalScope Scope(*this, S, OMPD_task);
7410 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7411}
7412
7413void CodeGenFunction::EmitOMPTargetExitDataDirective(
7414 const OMPTargetExitDataDirective &S) {
7415 // If we don't have target devices, don't bother emitting the data mapping
7416 // code.
7417 if (CGM.getLangOpts().OMPTargetTriples.empty())
7418 return;
7419
7420 // Check if we have any if clause associated with the directive.
7421 const Expr *IfCond = nullptr;
7422 if (const auto *C = S.getSingleClause<OMPIfClause>())
7423 IfCond = C->getCondition();
7424
7425 // Check if we have any device clause associated with the directive.
7426 const Expr *Device = nullptr;
7427 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7428 Device = C->getDevice();
7429
7430 OMPLexicalScope Scope(*this, S, OMPD_task);
7431 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7432}
7433
7434static void emitTargetParallelRegion(CodeGenFunction &CGF,
7435 const OMPTargetParallelDirective &S,
7436 PrePostActionTy &Action) {
7437 // Get the captured statement associated with the 'parallel' region.
7438 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
7439 Action.Enter(CGF);
7440 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
7441 Action.Enter(CGF);
7442 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7443 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
7444 CGF.EmitOMPPrivateClause(S, PrivateScope);
7445 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7446 (void)PrivateScope.Privatize();
7447 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
7448 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
7449 // TODO: Add support for clauses.
7450 CGF.EmitStmt(CS->getCapturedStmt());
7451 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
7452 };
7453 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
7454 emitEmptyBoundParameters);
7455 emitPostUpdateForReductionClause(CGF, S,
7456 [](CodeGenFunction &) { return nullptr; });
7457}
7458
7459void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
7460 CodeGenModule &CGM, StringRef ParentName,
7461 const OMPTargetParallelDirective &S) {
7462 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7463 emitTargetParallelRegion(CGF, S, Action);
7464 };
7465 llvm::Function *Fn;
7466 llvm::Constant *Addr;
7467 // Emit target region as a standalone region.
7468 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7469 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7470 assert(Fn && Addr && "Target device function emission failed.");
7471}
7472
7473void CodeGenFunction::EmitOMPTargetParallelDirective(
7474 const OMPTargetParallelDirective &S) {
7475 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7476 emitTargetParallelRegion(CGF, S, Action);
7477 };
7478 emitCommonOMPTargetDirective(*this, S, CodeGen);
7479}
7480
7481static void emitTargetParallelForRegion(CodeGenFunction &CGF,
7482 const OMPTargetParallelForDirective &S,
7483 PrePostActionTy &Action) {
7484 Action.Enter(CGF);
7485 // Emit directive as a combined directive that consists of two implicit
7486 // directives: 'parallel' with 'for' directive.
7487 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7488 Action.Enter(CGF);
7489 CodeGenFunction::OMPCancelStackRAII CancelRegion(
7490 CGF, OMPD_target_parallel_for, S.hasCancel());
7491 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
7492 emitDispatchForLoopBounds);
7493 };
7494 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
7495 emitEmptyBoundParameters);
7496}
7497
7498void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
7499 CodeGenModule &CGM, StringRef ParentName,
7500 const OMPTargetParallelForDirective &S) {
7501 // Emit SPMD target parallel for region as a standalone region.
7502 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7503 emitTargetParallelForRegion(CGF, S, Action);
7504 };
7505 llvm::Function *Fn;
7506 llvm::Constant *Addr;
7507 // Emit target region as a standalone region.
7508 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7509 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7510 assert(Fn && Addr && "Target device function emission failed.");
7511}
7512
7513void CodeGenFunction::EmitOMPTargetParallelForDirective(
7514 const OMPTargetParallelForDirective &S) {
7515 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7516 emitTargetParallelForRegion(CGF, S, Action);
7517 };
7518 emitCommonOMPTargetDirective(*this, S, CodeGen);
7519}
7520
7521static void
7522emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
7523 const OMPTargetParallelForSimdDirective &S,
7524 PrePostActionTy &Action) {
7525 Action.Enter(CGF);
7526 // Emit directive as a combined directive that consists of two implicit
7527 // directives: 'parallel' with 'for' directive.
7528 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7529 Action.Enter(CGF);
7530 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
7531 emitDispatchForLoopBounds);
7532 };
7533 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
7534 emitEmptyBoundParameters);
7535}
7536
7537void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
7538 CodeGenModule &CGM, StringRef ParentName,
7539 const OMPTargetParallelForSimdDirective &S) {
7540 // Emit SPMD target parallel for region as a standalone region.
7541 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7542 emitTargetParallelForSimdRegion(CGF, S, Action);
7543 };
7544 llvm::Function *Fn;
7545 llvm::Constant *Addr;
7546 // Emit target region as a standalone region.
7547 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7548 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7549 assert(Fn && Addr && "Target device function emission failed.");
7550}
7551
7552void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
7553 const OMPTargetParallelForSimdDirective &S) {
7554 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7555 emitTargetParallelForSimdRegion(CGF, S, Action);
7556 };
7557 emitCommonOMPTargetDirective(*this, S, CodeGen);
7558}
7559
7560/// Emit a helper variable and return corresponding lvalue.
7561static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
7562 const ImplicitParamDecl *PVD,
7563 CodeGenFunction::OMPPrivateScope &Privates) {
7564 const auto *VDecl = cast<VarDecl>(Helper->getDecl());
7565 Privates.addPrivate(VDecl, CGF.GetAddrOfLocalVar(PVD));
7566}
7567
7568void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
7569 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
7570 // Emit outlined function for task construct.
7571 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
7572 Address CapturedStruct = Address::invalid();
7573 {
7574 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7575 CapturedStruct = GenerateCapturedStmtArgument(*CS);
7576 }
7577 QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
7578 const Expr *IfCond = nullptr;
7579 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7580 if (C->getNameModifier() == OMPD_unknown ||
7581 C->getNameModifier() == OMPD_taskloop) {
7582 IfCond = C->getCondition();
7583 break;
7584 }
7585 }
7586
7587 OMPTaskDataTy Data;
7588 // Check if taskloop must be emitted without taskgroup.
7589 Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
7590 // TODO: Check if we should emit tied or untied task.
7591 Data.Tied = true;
7592 // Set scheduling for taskloop
7593 if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) {
7594 // grainsize clause
7595 Data.Schedule.setInt(/*IntVal=*/false);
7596 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
7597 } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) {
7598 // num_tasks clause
7599 Data.Schedule.setInt(/*IntVal=*/true);
7600 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
7601 }
7602
7603 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
7604 // if (PreCond) {
7605 // for (IV in 0..LastIteration) BODY;
7606 // <Final counter/linear vars updates>;
7607 // }
7608 //
7609
7610 // Emit: if (PreCond) - begin.
7611 // If the condition constant folds and can be elided, avoid emitting the
7612 // whole loop.
7613 bool CondConstant;
7614 llvm::BasicBlock *ContBlock = nullptr;
7615 OMPLoopScope PreInitScope(CGF, S);
7616 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
7617 if (!CondConstant)
7618 return;
7619 } else {
7620 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
7621 ContBlock = CGF.createBasicBlock("taskloop.if.end");
7622 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
7623 CGF.getProfileCount(&S));
7624 CGF.EmitBlock(ThenBlock);
7625 CGF.incrementProfileCounter(&S);
7626 }
7627
7628 (void)CGF.EmitOMPLinearClauseInit(S);
7629
7630 OMPPrivateScope LoopScope(CGF);
7631 // Emit helper vars inits.
7632 enum { LowerBound = 5, UpperBound, Stride, LastIter };
7633 auto *I = CS->getCapturedDecl()->param_begin();
7634 auto *LBP = std::next(I, LowerBound);
7635 auto *UBP = std::next(I, UpperBound);
7636 auto *STP = std::next(I, Stride);
7637 auto *LIP = std::next(I, LastIter);
7638 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
7639 LoopScope);
7640 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
7641 LoopScope);
7642 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
7643 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
7644 LoopScope);
7645 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
7646 CGF.EmitOMPLinearClause(S, LoopScope);
7647 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
7648 (void)LoopScope.Privatize();
7649 // Emit the loop iteration variable.
7650 const Expr *IVExpr = S.getIterationVariable();
7651 const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
7652 CGF.EmitVarDecl(*IVDecl);
7653 CGF.EmitIgnoredExpr(S.getInit());
7654
7655 // Emit the iterations count variable.
7656 // If it is not a variable, Sema decided to calculate iterations count on
7657 // each iteration (e.g., it is foldable into a constant).
7658 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
7659 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
7660 // Emit calculation of the iterations count.
7661 CGF.EmitIgnoredExpr(S.getCalcLastIteration());
7662 }
7663
7664 {
7665 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7666 emitCommonSimdLoop(
7667 CGF, S,
7668 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7669 if (isOpenMPSimdDirective(S.getDirectiveKind()))
7670 CGF.EmitOMPSimdInit(S);
7671 },
7672 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
7673 CGF.EmitOMPInnerLoop(
7674 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
7675 [&S](CodeGenFunction &CGF) {
7676 emitOMPLoopBodyWithStopPoint(CGF, S,
7677 CodeGenFunction::JumpDest());
7678 },
7679 [](CodeGenFunction &) {});
7680 });
7681 }
7682 // Emit: if (PreCond) - end.
7683 if (ContBlock) {
7684 CGF.EmitBranch(ContBlock);
7685 CGF.EmitBlock(ContBlock, true);
7686 }
7687 // Emit final copy of the lastprivate variables if IsLastIter != 0.
7688 if (HasLastprivateClause) {
7689 CGF.EmitOMPLastprivateClauseFinal(
7690 S, isOpenMPSimdDirective(S.getDirectiveKind()),
7691 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
7692 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
7693 (*LIP)->getType(), S.getBeginLoc())));
7694 }
7695 LoopScope.restoreMap();
7696 CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) {
7697 return CGF.Builder.CreateIsNotNull(
7698 CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
7699 (*LIP)->getType(), S.getBeginLoc()));
7700 });
7701 };
7702 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
7703 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
7704 const OMPTaskDataTy &Data) {
7705 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
7706 &Data](CodeGenFunction &CGF, PrePostActionTy &) {
7707 OMPLoopScope PreInitScope(CGF, S);
7708 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S,
7709 OutlinedFn, SharedsTy,
7710 CapturedStruct, IfCond, Data);
7711 };
7712 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
7713 CodeGen);
7714 };
7715 if (Data.Nogroup) {
7716 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
7717 } else {
7718 CGM.getOpenMPRuntime().emitTaskgroupRegion(
7719 *this,
7720 [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
7721 PrePostActionTy &Action) {
7722 Action.Enter(CGF);
7723 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
7724 Data);
7725 },
7726 S.getBeginLoc());
7727 }
7728}
7729
7730void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
7731 auto LPCRegion =
7732 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7733 EmitOMPTaskLoopBasedDirective(S);
7734}
7735
7736void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
7737 const OMPTaskLoopSimdDirective &S) {
7738 auto LPCRegion =
7739 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7740 OMPLexicalScope Scope(*this, S);
7741 EmitOMPTaskLoopBasedDirective(S);
7742}
7743
7744void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
7745 const OMPMasterTaskLoopDirective &S) {
7746 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7747 Action.Enter(CGF);
7748 EmitOMPTaskLoopBasedDirective(S);
7749 };
7750 auto LPCRegion =
7751 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7752 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
7753 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
7754}
7755
7756void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
7757 const OMPMasterTaskLoopSimdDirective &S) {
7758 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7759 Action.Enter(CGF);
7760 EmitOMPTaskLoopBasedDirective(S);
7761 };
7762 auto LPCRegion =
7763 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7764 OMPLexicalScope Scope(*this, S);
7765 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
7766}
7767
7768void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
7769 const OMPParallelMasterTaskLoopDirective &S) {
7770 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7771 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7772 PrePostActionTy &Action) {
7773 Action.Enter(CGF);
7774 CGF.EmitOMPTaskLoopBasedDirective(S);
7775 };
7776 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7777 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
7778 S.getBeginLoc());
7779 };
7780 auto LPCRegion =
7781 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7782 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen,
7783 emitEmptyBoundParameters);
7784}
7785
7786void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
7787 const OMPParallelMasterTaskLoopSimdDirective &S) {
7788 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7789 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7790 PrePostActionTy &Action) {
7791 Action.Enter(CGF);
7792 CGF.EmitOMPTaskLoopBasedDirective(S);
7793 };
7794 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7795 CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
7796 S.getBeginLoc());
7797 };
7798 auto LPCRegion =
7799 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7800 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen,
7801 emitEmptyBoundParameters);
7802}
7803
7804// Generate the instructions for '#pragma omp target update' directive.
7805void CodeGenFunction::EmitOMPTargetUpdateDirective(
7806 const OMPTargetUpdateDirective &S) {
7807 // If we don't have target devices, don't bother emitting the data mapping
7808 // code.
7809 if (CGM.getLangOpts().OMPTargetTriples.empty())
7810 return;
7811
7812 // Check if we have any if clause associated with the directive.
7813 const Expr *IfCond = nullptr;
7814 if (const auto *C = S.getSingleClause<OMPIfClause>())
7815 IfCond = C->getCondition();
7816
7817 // Check if we have any device clause associated with the directive.
7818 const Expr *Device = nullptr;
7819 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7820 Device = C->getDevice();
7821
7822 OMPLexicalScope Scope(*this, S, OMPD_task);
7823 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7824}
7825
7826void CodeGenFunction::EmitOMPGenericLoopDirective(
7827 const OMPGenericLoopDirective &S) {
7828 // Unimplemented, just inline the underlying statement for now.
7829 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7830 // Emit the loop iteration variable.
7831 const Stmt *CS =
7832 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
7833 const auto *ForS = dyn_cast<ForStmt>(CS);
7834 if (ForS && !isa<DeclStmt>(ForS->getInit())) {
7835 OMPPrivateScope LoopScope(CGF);
7836 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
7837 (void)LoopScope.Privatize();
7838 CGF.EmitStmt(CS);
7839 LoopScope.restoreMap();
7840 } else {
7841 CGF.EmitStmt(CS);
7842 }
7843 };
7844 OMPLexicalScope Scope(*this, S, OMPD_unknown);
7845 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen);
7846}
7847
7848void CodeGenFunction::EmitOMPParallelGenericLoopDirective(
7849 const OMPLoopDirective &S) {
7850 // Emit combined directive as if its consituent constructs are 'parallel'
7851 // and 'for'.
7852 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7853 Action.Enter(CGF);
7854 emitOMPCopyinClause(CGF, S);
7855 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
7856 };
7857 {
7858 auto LPCRegion =
7859 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7860 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
7861 emitEmptyBoundParameters);
7862 }
7863 // Check for outer lastprivate conditional update.
7864 checkForLastprivateConditionalUpdate(*this, S);
7865}
7866
7867void CodeGenFunction::EmitOMPTeamsGenericLoopDirective(
7868 const OMPTeamsGenericLoopDirective &S) {
7869 // To be consistent with current behavior of 'target teams loop', emit
7870 // 'teams loop' as if its constituent constructs are 'distribute,
7871 // 'parallel, and 'for'.
7872 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7873 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
7874 S.getDistInc());
7875 };
7876
7877 // Emit teams region as a standalone region.
7878 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7879 PrePostActionTy &Action) {
7880 Action.Enter(CGF);
7881 OMPPrivateScope PrivateScope(CGF);
7882 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7883 (void)PrivateScope.Privatize();
7884 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
7885 CodeGenDistribute);
7886 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7887 };
7888 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
7889 emitPostUpdateForReductionClause(*this, S,
7890 [](CodeGenFunction &) { return nullptr; });
7891}
7892
7893static void
7894emitTargetTeamsGenericLoopRegion(CodeGenFunction &CGF,
7895 const OMPTargetTeamsGenericLoopDirective &S,
7896 PrePostActionTy &Action) {
7897 Action.Enter(CGF);
7898 // Emit 'teams loop' as if its constituent constructs are 'distribute,
7899 // 'parallel, and 'for'.
7900 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7901 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
7902 S.getDistInc());
7903 };
7904
7905 // Emit teams region as a standalone region.
7906 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7907 PrePostActionTy &Action) {
7908 Action.Enter(CGF);
7909 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7910 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7911 (void)PrivateScope.Privatize();
7912 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7913 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7914 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7915 };
7916
7917 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
7918 CodeGenTeams);
7919 emitPostUpdateForReductionClause(CGF, S,
7920 [](CodeGenFunction &) { return nullptr; });
7921}
7922
7923/// Emit combined directive 'target teams loop' as if its constituent
7924/// constructs are 'target', 'teams', 'distribute', 'parallel', and 'for'.
7925void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective(
7926 const OMPTargetTeamsGenericLoopDirective &S) {
7927 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7928 emitTargetTeamsGenericLoopRegion(CGF, S, Action);
7929 };
7930 emitCommonOMPTargetDirective(*this, S, CodeGen);
7931}
7932
7933void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
7934 CodeGenModule &CGM, StringRef ParentName,
7935 const OMPTargetTeamsGenericLoopDirective &S) {
7936 // Emit SPMD target parallel loop region as a standalone region.
7937 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7938 emitTargetTeamsGenericLoopRegion(CGF, S, Action);
7939 };
7940 llvm::Function *Fn;
7941 llvm::Constant *Addr;
7942 // Emit target region as a standalone region.
7943 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7944 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7945 assert(Fn && Addr &&
7946 "Target device function emission failed for 'target teams loop'.");
7947}
7948
7949static void emitTargetParallelGenericLoopRegion(
7950 CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S,
7951 PrePostActionTy &Action) {
7952 Action.Enter(CGF);
7953 // Emit as 'parallel for'.
7954 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7955 Action.Enter(CGF);
7956 CodeGenFunction::OMPCancelStackRAII CancelRegion(
7957 CGF, OMPD_target_parallel_loop, /*hasCancel=*/false);
7958 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
7959 emitDispatchForLoopBounds);
7960 };
7961 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
7962 emitEmptyBoundParameters);
7963}
7964
7965void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
7966 CodeGenModule &CGM, StringRef ParentName,
7967 const OMPTargetParallelGenericLoopDirective &S) {
7968 // Emit target parallel loop region as a standalone region.
7969 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7970 emitTargetParallelGenericLoopRegion(CGF, S, Action);
7971 };
7972 llvm::Function *Fn;
7973 llvm::Constant *Addr;
7974 // Emit target region as a standalone region.
7975 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7976 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7977 assert(Fn && Addr && "Target device function emission failed.");
7978}
7979
7980/// Emit combined directive 'target parallel loop' as if its constituent
7981/// constructs are 'target', 'parallel', and 'for'.
7982void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective(
7983 const OMPTargetParallelGenericLoopDirective &S) {
7984 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7985 emitTargetParallelGenericLoopRegion(CGF, S, Action);
7986 };
7987 emitCommonOMPTargetDirective(*this, S, CodeGen);
7988}
7989
7990void CodeGenFunction::EmitSimpleOMPExecutableDirective(
7991 const OMPExecutableDirective &D) {
7992 if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {
7993 EmitOMPScanDirective(*SD);
7994 return;
7995 }
7996 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
7997 return;
7998 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
7999 OMPPrivateScope GlobalsScope(CGF);
8000 if (isOpenMPTaskingDirective(D.getDirectiveKind())) {
8001 // Capture global firstprivates to avoid crash.
8002 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
8003 for (const Expr *Ref : C->varlists()) {
8004 const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
8005 if (!DRE)
8006 continue;
8007 const auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
8008 if (!VD || VD->hasLocalStorage())
8009 continue;
8010 if (!CGF.LocalDeclMap.count(VD)) {
8011 LValue GlobLVal = CGF.EmitLValue(Ref);
8012 GlobalsScope.addPrivate(VD, GlobLVal.getAddress(CGF));
8013 }
8014 }
8015 }
8016 }
8017 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
8018 (void)GlobalsScope.Privatize();
8019 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
8020 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
8021 } else {
8022 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
8023 for (const Expr *E : LD->counters()) {
8024 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
8025 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
8026 LValue GlobLVal = CGF.EmitLValue(E);
8027 GlobalsScope.addPrivate(VD, GlobLVal.getAddress(CGF));
8028 }
8029 if (isa<OMPCapturedExprDecl>(VD)) {
8030 // Emit only those that were not explicitly referenced in clauses.
8031 if (!CGF.LocalDeclMap.count(VD))
8032 CGF.EmitVarDecl(*VD);
8033 }
8034 }
8035 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
8036 if (!C->getNumForLoops())
8037 continue;
8038 for (unsigned I = LD->getLoopsNumber(),
8039 E = C->getLoopNumIterations().size();
8040 I < E; ++I) {
8041 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
8042 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
8043 // Emit only those that were not explicitly referenced in clauses.
8044 if (!CGF.LocalDeclMap.count(VD))
8045 CGF.EmitVarDecl(*VD);
8046 }
8047 }
8048 }
8049 }
8050 (void)GlobalsScope.Privatize();
8051 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
8052 }
8053 };
8054 if (D.getDirectiveKind() == OMPD_atomic ||
8055 D.getDirectiveKind() == OMPD_critical ||
8056 D.getDirectiveKind() == OMPD_section ||
8057 D.getDirectiveKind() == OMPD_master ||
8058 D.getDirectiveKind() == OMPD_masked) {
8059 EmitStmt(D.getAssociatedStmt());
8060 } else {
8061 auto LPCRegion =
8062 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D);
8063 OMPSimdLexicalScope Scope(*this, D);
8064 CGM.getOpenMPRuntime().emitInlinedDirective(
8065 *this,
8066 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
8067 : D.getDirectiveKind(),
8068 CodeGen);
8069 }
8070 // Check for outer lastprivate conditional update.
8071 checkForLastprivateConditionalUpdate(*this, D);
8072}
8073