clang: lib/CodeGen/CGOpenMPRuntimeGPU.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

23#include "llvm/ADT/SmallPtrSet.h"

24#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"

25#include "llvm/Frontend/OpenMP/OMPGridValues.h"

26

27using namespace clang;

29using namespace llvm::omp;

30

31namespace {

32

34 llvm::FunctionCallee EnterCallee = nullptr;

35 ArrayRef<llvm::Value *> EnterArgs;

36 llvm::FunctionCallee ExitCallee = nullptr;

37 ArrayRef<llvm::Value *> ExitArgs;

38 bool Conditional = false;

39 llvm::BasicBlock *ContBlock = nullptr;

40

41public:

42 NVPTXActionTy(llvm::FunctionCallee EnterCallee,

43 ArrayRef<llvm::Value *> EnterArgs,

44 llvm::FunctionCallee ExitCallee,

45 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)

46 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),

47 ExitArgs(ExitArgs), Conditional(Conditional) {}

48 void Enter(CodeGenFunction &CGF) override {

49 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);

50 if (Conditional) {

51 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);

54

55 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);

57 }

58 }

59 void Done(CodeGenFunction &CGF) {

60

63 }

64 void Exit(CodeGenFunction &CGF) override {

66 }

67};

68

69

70

71

72

73class ExecutionRuntimeModesRAII {

74private:

78

79public:

82 : ExecMode(ExecMode) {

83 SavedExecMode = ExecMode;

84 ExecMode = EntryMode;

85 }

86 ~ExecutionRuntimeModesRAII() { ExecMode = SavedExecMode; }

87};

88

91 if (const auto *ASE = dyn_cast(RefExpr)) {

92 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();

93 while (const auto *TempASE = dyn_cast(Base))

94 Base = TempASE->getBase()->IgnoreParenImpCasts();

95 RefExpr = Base;

96 } else if (auto *OASE = dyn_cast(RefExpr)) {

97 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();

98 while (const auto *TempOASE = dyn_cast(Base))

99 Base = TempOASE->getBase()->IgnoreParenImpCasts();

100 while (const auto *TempASE = dyn_cast(Base))

101 Base = TempASE->getBase()->IgnoreParenImpCasts();

102 RefExpr = Base;

103 }

105 if (const auto *DE = dyn_cast(RefExpr))

106 return cast(DE->getDecl()->getCanonicalDecl());

108 return cast(ME->getMemberDecl()->getCanonicalDecl());

109}

110

111static RecordDecl *buildRecordForGlobalizedVars(

114 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>

115 &MappedDeclsFields,

116 int BufSize) {

117 using VarsDataTy = std::pair<CharUnits , const ValueDecl *>;

118 if (EscapedDecls.empty() && EscapedDeclsForTeams.empty())

119 return nullptr;

121 for (const ValueDecl *D : EscapedDecls)

122 GlobalizedVars.emplace_back(C.getDeclAlign(D), D);

123 for (const ValueDecl *D : EscapedDeclsForTeams)

124 GlobalizedVars.emplace_back(C.getDeclAlign(D), D);

125

126

127

128

129

130 RecordDecl *GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty");

133 EscapedDeclsForTeams);

134 for (const auto &Pair : GlobalizedVars) {

135 const ValueDecl *VD = Pair.second;

138 Type = C.getPointerType(Type.getNonReferenceType());

139 else

140 Type = Type.getNonReferenceType();

143 if (SingleEscaped.count(VD)) {

147 nullptr, false,

153 I != E; ++I)

154 Field->addAttr(*I);

155 }

156 } else {

157 if (BufSize > 1) {

158 llvm::APInt ArraySize(32, BufSize);

159 Type = C.getConstantArrayType(Type, ArraySize, nullptr,

161 }

165 nullptr, false,

168 llvm::APInt Align(32, Pair.first.getQuantity());

169 Field->addAttr(AlignedAttr::CreateImplicit(

170 C, true,

172 C.getIntTypeForBitwidth(32, 0),

174 {}, AlignedAttr::GNU_aligned));

175 }

176 GlobalizedRD->addDecl(Field);

177 MappedDeclsFields.try_emplace(VD, Field);

178 }

180 return GlobalizedRD;

181}

182

183

184class CheckVarsEscapingDeclContext final

186 CodeGenFunction &CGF;

187 llvm::SetVector<const ValueDecl *> EscapedDecls;

188 llvm::SetVector<const ValueDecl *> EscapedVariableLengthDecls;

189 llvm::SetVector<const ValueDecl *> DelayedVariableLengthDecls;

190 llvm::SmallPtrSet<const Decl *, 4> EscapedParameters;

191 RecordDecl *GlobalizedRD = nullptr;

192 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;

193 bool AllEscaped = false;

194 bool IsForCombinedParallelRegion = false;

195

196 void markAsEscaped(const ValueDecl *VD) {

197

199 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))

200 return;

202

203 if (VD->hasAttrs() && VD->hasAttr())

204 return;

205

206 bool IsCaptured = false;

207 if (auto *CSI = CGF.CapturedStmtInfo) {

208 if (const FieldDecl *FD = CSI->lookup(cast(VD))) {

209

210

211 IsCaptured = true;

212 if (!IsForCombinedParallelRegion) {

213 if (!FD->hasAttrs())

214 return;

215 const auto *Attr = FD->getAttr();

216 if (!Attr)

217 return;

218 if (((Attr->getCaptureKind() != OMPC_map) &&

220 ((Attr->getCaptureKind() == OMPC_map) &&

221 !FD->getType()->isAnyPointerType()))

222 return;

223 }

224 if (!FD->getType()->isReferenceType()) {

226 "Parameter captured by value with variably modified type");

227 EscapedParameters.insert(VD);

228 } else if (!IsForCombinedParallelRegion) {

229 return;

230 }

231 }

232 }

233 if ((!CGF.CapturedStmtInfo ||

234 (IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) &&

236

237 return;

239

240

241 if (IsCaptured)

242 EscapedVariableLengthDecls.insert(VD);

243 else

244 DelayedVariableLengthDecls.insert(VD);

245 } else

246 EscapedDecls.insert(VD);

247 }

248

249 void VisitValueDecl(const ValueDecl *VD) {

251 markAsEscaped(VD);

252 if (const auto *VarD = dyn_cast(VD)) {

254 const bool SavedAllEscaped = AllEscaped;

256 Visit(VarD->getInit());

257 AllEscaped = SavedAllEscaped;

258 }

259 }

260 }

261 void VisitOpenMPCapturedStmt(const CapturedStmt *S,

262 ArrayRef<OMPClause *> Clauses,

263 bool IsCombinedParallelRegion) {

264 if (!S)

265 return;

266 for (const CapturedStmt::Capture &C : S->captures()) {

267 if (C.capturesVariable() && C.capturesVariableByCopy()) {

268 const ValueDecl *VD = C.getCapturedVar();

269 bool SavedIsForCombinedParallelRegion = IsForCombinedParallelRegion;

270 if (IsCombinedParallelRegion) {

271

272

273

274 IsForCombinedParallelRegion = false;

275 for (const OMPClause *C : Clauses) {

277 C->getClauseKind() == OMPC_reduction ||

278 C->getClauseKind() == OMPC_linear ||

279 C->getClauseKind() == OMPC_private)

280 continue;

281 ArrayRef<const Expr *> Vars;

282 if (const auto *PC = dyn_cast(C))

283 Vars = PC->getVarRefs();

284 else if (const auto *PC = dyn_cast(C))

285 Vars = PC->getVarRefs();

286 else

287 llvm_unreachable("Unexpected clause.");

288 for (const auto *E : Vars) {

289 const Decl *D =

292 IsForCombinedParallelRegion = true;

293 break;

294 }

295 }

296 if (IsForCombinedParallelRegion)

297 break;

298 }

299 }

300 markAsEscaped(VD);

302 VisitValueDecl(VD);

303 IsForCombinedParallelRegion = SavedIsForCombinedParallelRegion;

304 }

305 }

306 }

307

308 void buildRecordForGlobalizedVars(bool IsInTTDRegion) {

309 assert(!GlobalizedRD &&

310 "Record for globalized variables is built already.");

311 ArrayRef<const ValueDecl *> EscapedDeclsForParallel, EscapedDeclsForTeams;

312 unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;

313 if (IsInTTDRegion)

314 EscapedDeclsForTeams = EscapedDecls.getArrayRef();

315 else

316 EscapedDeclsForParallel = EscapedDecls.getArrayRef();

317 GlobalizedRD = ::buildRecordForGlobalizedVars(

318 CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,

319 MappedDeclsFields, WarpSize);

320 }

321

322public:

323 CheckVarsEscapingDeclContext(CodeGenFunction &CGF,

324 ArrayRef<const ValueDecl *> TeamsReductions)

325 : CGF(CGF), EscapedDecls(llvm::from_range, TeamsReductions) {}

326 ~CheckVarsEscapingDeclContext() = default;

327 void VisitDeclStmt(const DeclStmt *S) {

328 if (!S)

329 return;

331 if (const auto *VD = dyn_cast_or_null(D))

332 VisitValueDecl(VD);

333 }

334 void VisitOMPExecutableDirective(const OMPExecutableDirective *D) {

335 if (!D)

336 return;

337 if (!D->hasAssociatedStmt())

338 return;

339 if (const auto *S =

340 dyn_cast_or_null(D->getAssociatedStmt())) {

341

342

343 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;

345 if (CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown) {

346 VisitStmt(S->getCapturedStmt());

347 return;

348 }

349 VisitOpenMPCapturedStmt(

350 S, D->clauses(),

351 CaptureRegions.back() == OMPD_parallel &&

353 }

354 }

355 void VisitCapturedStmt(const CapturedStmt *S) {

356 if (!S)

357 return;

358 for (const CapturedStmt::Capture &C : S->captures()) {

359 if (C.capturesVariable() && C.capturesVariableByCopy()) {

360 const ValueDecl *VD = C.getCapturedVar();

361 markAsEscaped(VD);

363 VisitValueDecl(VD);

364 }

365 }

366 }

367 void VisitLambdaExpr(const LambdaExpr *E) {

368 if (!E)

369 return;

370 for (const LambdaCapture &C : E->captures()) {

371 if (C.capturesVariable()) {

372 if (C.getCaptureKind() == LCK_ByRef) {

373 const ValueDecl *VD = C.getCapturedVar();

374 markAsEscaped(VD);

376 VisitValueDecl(VD);

377 }

378 }

379 }

380 }

381 void VisitBlockExpr(const BlockExpr *E) {

382 if (!E)

383 return;

385 if (C.isByRef()) {

386 const VarDecl *VD = C.getVariable();

387 markAsEscaped(VD);

389 VisitValueDecl(VD);

390 }

391 }

392 }

393 void VisitCallExpr(const CallExpr *E) {

394 if (!E)

395 return;

396 for (const Expr *Arg : E->arguments()) {

397 if (!Arg)

398 continue;

399 if (Arg->isLValue()) {

400 const bool SavedAllEscaped = AllEscaped;

401 AllEscaped = true;

402 Visit(Arg);

403 AllEscaped = SavedAllEscaped;

404 } else {

405 Visit(Arg);

406 }

407 }

409 }

410 void VisitDeclRefExpr(const DeclRefExpr *E) {

411 if (!E)

412 return;

413 const ValueDecl *VD = E->getDecl();

414 if (AllEscaped)

415 markAsEscaped(VD);

417 VisitValueDecl(VD);

419 VisitValueDecl(VD);

420 }

421 void VisitUnaryOperator(const UnaryOperator *E) {

422 if (!E)

423 return;

424 if (E->getOpcode() == UO_AddrOf) {

425 const bool SavedAllEscaped = AllEscaped;

426 AllEscaped = true;

428 AllEscaped = SavedAllEscaped;

429 } else {

431 }

432 }

433 void VisitImplicitCastExpr(const ImplicitCastExpr *E) {

434 if (!E)

435 return;

436 if (E->getCastKind() == CK_ArrayToPointerDecay) {

437 const bool SavedAllEscaped = AllEscaped;

438 AllEscaped = true;

440 AllEscaped = SavedAllEscaped;

441 } else {

443 }

444 }

445 void VisitExpr(const Expr *E) {

446 if (!E)

447 return;

448 bool SavedAllEscaped = AllEscaped;

450 AllEscaped = false;

451 for (const Stmt *Child : E->children())

452 if (Child)

453 Visit(Child);

454 AllEscaped = SavedAllEscaped;

455 }

456 void VisitStmt(const Stmt *S) {

457 if (!S)

458 return;

459 for (const Stmt *Child : S->children())

460 if (Child)

461 Visit(Child);

462 }

463

464

465

466 const RecordDecl *getGlobalizedRecord(bool IsInTTDRegion) {

467 if (!GlobalizedRD)

468 buildRecordForGlobalizedVars(IsInTTDRegion);

469 return GlobalizedRD;

470 }

471

472

473 const FieldDecl *getFieldForGlobalizedVar(const ValueDecl *VD) const {

474 assert(GlobalizedRD &&

475 "Record for globalized variables must be generated already.");

476 return MappedDeclsFields.lookup(VD);

477 }

478

479

480 ArrayRef<const ValueDecl *> getEscapedDecls() const {

481 return EscapedDecls.getArrayRef();

482 }

483

484

485

486 const llvm::SmallPtrSetImpl<const Decl *> &getEscapedParameters() const {

487 return EscapedParameters;

488 }

489

490

491

492 ArrayRef<const ValueDecl *> getEscapedVariableLengthDecls() const {

493 return EscapedVariableLengthDecls.getArrayRef();

494 }

495

496

497

498 ArrayRef<const ValueDecl *> getDelayedVariableLengthDecls() const {

499 return DelayedVariableLengthDecls.getArrayRef();

500 }

501};

502}

503

505CGOpenMPRuntimeGPU::getExecutionMode() const {

506 return CurrentExecutionMode;

507}

508

510CGOpenMPRuntimeGPU::getDataSharingMode() const {

511 return CurrentDataSharingMode;

512}

513

514

517 const auto *CS = D.getInnermostCapturedStmt();

518 const auto *Body =

519 CS->getCapturedStmt()->IgnoreContainers(true);

521

522 if (const auto *NestedDir =

523 dyn_cast_or_null(ChildStmt)) {

525 switch (D.getDirectiveKind()) {

526 case OMPD_target:

528 return true;

529 if (DKind == OMPD_teams) {

530 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(

531 true);

532 if (!Body)

533 return false;

535 if (const auto *NND =

536 dyn_cast_or_null(ChildStmt)) {

537 DKind = NND->getDirectiveKind();

539 return true;

540 }

541 }

542 return false;

543 case OMPD_target_teams:

545 case OMPD_target_simd:

546 case OMPD_target_parallel:

547 case OMPD_target_parallel_for:

548 case OMPD_target_parallel_for_simd:

549 case OMPD_target_teams_distribute:

550 case OMPD_target_teams_distribute_simd:

551 case OMPD_target_teams_distribute_parallel_for:

552 case OMPD_target_teams_distribute_parallel_for_simd:

553 case OMPD_parallel:

554 case OMPD_for:

555 case OMPD_parallel_for:

556 case OMPD_parallel_master:

557 case OMPD_parallel_sections:

558 case OMPD_for_simd:

559 case OMPD_parallel_for_simd:

560 case OMPD_cancel:

561 case OMPD_cancellation_point:

562 case OMPD_ordered:

563 case OMPD_threadprivate:

564 case OMPD_allocate:

565 case OMPD_task:

566 case OMPD_simd:

567 case OMPD_sections:

568 case OMPD_section:

569 case OMPD_single:

570 case OMPD_master:

571 case OMPD_critical:

572 case OMPD_taskyield:

573 case OMPD_barrier:

574 case OMPD_taskwait:

575 case OMPD_taskgroup:

576 case OMPD_atomic:

577 case OMPD_flush:

578 case OMPD_depobj:

579 case OMPD_scan:

580 case OMPD_teams:

581 case OMPD_target_data:

582 case OMPD_target_exit_data:

583 case OMPD_target_enter_data:

584 case OMPD_distribute:

585 case OMPD_distribute_simd:

586 case OMPD_distribute_parallel_for:

587 case OMPD_distribute_parallel_for_simd:

588 case OMPD_teams_distribute:

589 case OMPD_teams_distribute_simd:

590 case OMPD_teams_distribute_parallel_for:

591 case OMPD_teams_distribute_parallel_for_simd:

592 case OMPD_target_update:

593 case OMPD_declare_simd:

594 case OMPD_declare_variant:

595 case OMPD_begin_declare_variant:

596 case OMPD_end_declare_variant:

597 case OMPD_declare_target:

598 case OMPD_end_declare_target:

599 case OMPD_declare_reduction:

600 case OMPD_declare_mapper:

601 case OMPD_taskloop:

602 case OMPD_taskloop_simd:

603 case OMPD_master_taskloop:

604 case OMPD_master_taskloop_simd:

605 case OMPD_parallel_master_taskloop:

606 case OMPD_parallel_master_taskloop_simd:

607 case OMPD_requires:

608 case OMPD_unknown:

609 default:

610 llvm_unreachable("Unexpected directive.");

611 }

612 }

613

614 return false;

615}

616

620 switch (DirectiveKind) {

621 case OMPD_target:

622 case OMPD_target_teams:

624 case OMPD_target_parallel_loop:

625 case OMPD_target_parallel:

626 case OMPD_target_parallel_for:

627 case OMPD_target_parallel_for_simd:

628 case OMPD_target_teams_distribute_parallel_for:

629 case OMPD_target_teams_distribute_parallel_for_simd:

630 case OMPD_target_simd:

631 case OMPD_target_teams_distribute_simd:

632 return true;

633 case OMPD_target_teams_distribute:

634 return false;

635 case OMPD_target_teams_loop:

636

637

638 if (auto *TTLD = dyn_cast(&D))

639 return TTLD->canBeParallelFor();

640 return false;

641 case OMPD_parallel:

642 case OMPD_for:

643 case OMPD_parallel_for:

644 case OMPD_parallel_master:

645 case OMPD_parallel_sections:

646 case OMPD_for_simd:

647 case OMPD_parallel_for_simd:

648 case OMPD_cancel:

649 case OMPD_cancellation_point:

650 case OMPD_ordered:

651 case OMPD_threadprivate:

652 case OMPD_allocate:

653 case OMPD_task:

654 case OMPD_simd:

655 case OMPD_sections:

656 case OMPD_section:

657 case OMPD_single:

658 case OMPD_master:

659 case OMPD_critical:

660 case OMPD_taskyield:

661 case OMPD_barrier:

662 case OMPD_taskwait:

663 case OMPD_taskgroup:

664 case OMPD_atomic:

665 case OMPD_flush:

666 case OMPD_depobj:

667 case OMPD_scan:

668 case OMPD_teams:

669 case OMPD_target_data:

670 case OMPD_target_exit_data:

671 case OMPD_target_enter_data:

672 case OMPD_distribute:

673 case OMPD_distribute_simd:

674 case OMPD_distribute_parallel_for:

675 case OMPD_distribute_parallel_for_simd:

676 case OMPD_teams_distribute:

677 case OMPD_teams_distribute_simd:

678 case OMPD_teams_distribute_parallel_for:

679 case OMPD_teams_distribute_parallel_for_simd:

680 case OMPD_target_update:

681 case OMPD_declare_simd:

682 case OMPD_declare_variant:

683 case OMPD_begin_declare_variant:

684 case OMPD_end_declare_variant:

685 case OMPD_declare_target:

686 case OMPD_end_declare_target:

687 case OMPD_declare_reduction:

688 case OMPD_declare_mapper:

689 case OMPD_taskloop:

690 case OMPD_taskloop_simd:

691 case OMPD_master_taskloop:

692 case OMPD_master_taskloop_simd:

693 case OMPD_parallel_master_taskloop:

694 case OMPD_parallel_master_taskloop_simd:

695 case OMPD_requires:

696 case OMPD_unknown:

697 default:

698 break;

699 }

700 llvm_unreachable(

701 "Unknown programming model for OpenMP directive on NVPTX target.");

702}

703

704void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,

705 StringRef ParentName,

706 llvm::Function *&OutlinedFn,

707 llvm::Constant *&OutlinedFnID,

708 bool IsOffloadEntry,

710 ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_NonSPMD);

711 EntryFunctionState EST;

712 WrapperFunctionsMap.clear();

713

714 [[maybe_unused]] bool IsBareKernel = D.getSingleClause();

715 assert(!IsBareKernel && "bare kernel should not be at generic mode");

716

717

718 class NVPTXPrePostActionTy : public PrePostActionTy {

719 CGOpenMPRuntimeGPU::EntryFunctionState &EST;

720 const OMPExecutableDirective &D;

721

722 public:

723 NVPTXPrePostActionTy(CGOpenMPRuntimeGPU::EntryFunctionState &EST,

724 const OMPExecutableDirective &D)

725 : EST(EST), D(D) {}

726 void Enter(CodeGenFunction &CGF) override {

727 auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());

728 RT.emitKernelInit(D, CGF, EST, false);

729

730 RT.setLocThreadIdInsertPt(CGF, true);

731 }

732 void Exit(CodeGenFunction &CGF) override {

733 auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());

734 RT.clearLocThreadIdInsertPt(CGF);

735 RT.emitKernelDeinit(CGF, EST, false);

736 }

737 } Action(EST, D);

739 IsInTTDRegion = true;

741 IsOffloadEntry, CodeGen);

742 IsInTTDRegion = false;

743}

744

745void CGOpenMPRuntimeGPU::emitKernelInit(const OMPExecutableDirective &D,

747 EntryFunctionState &EST, bool IsSPMD) {

748 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs Attrs;

749 Attrs.ExecFlags =

750 IsSPMD ? llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD

751 : llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC;

753

754 CGBuilderTy &Bld = CGF.Builder;

755 Bld.restoreIP(OMPBuilder.createTargetInit(Bld, Attrs));

756 if (!IsSPMD)

757 emitGenericVarsProlog(CGF, EST.Loc);

758}

759

760void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF,

761 EntryFunctionState &EST,

762 bool IsSPMD) {

763 if (!IsSPMD)

764 emitGenericVarsEpilog(CGF);

765

766

767 ASTContext &C = CGM.getContext();

768 RecordDecl *StaticRD = C.buildImplicitRecord(

769 "_openmp_teams_reduction_type_$_", RecordDecl::TagKind::Union);

771 for (const RecordDecl *TeamReductionRec : TeamsReductions) {

772 CanQualType RecTy = C.getCanonicalTagType(TeamReductionRec);

774 C, StaticRD, SourceLocation(), SourceLocation(), nullptr, RecTy,

775 C.getTrivialTypeSourceInfo(RecTy, SourceLocation()),

776 nullptr, false,

779 StaticRD->addDecl(Field);

780 }

782 CanQualType StaticTy = C.getCanonicalTagType(StaticRD);

783 llvm::Type *LLVMReductionsBufferTy =

784 CGM.getTypes().ConvertTypeForMem(StaticTy);

785 const auto &DL = CGM.getModule().getDataLayout();

787 TeamsReductions.empty()

788 ? 0

789 : DL.getTypeAllocSize(LLVMReductionsBufferTy).getFixedValue();

790 CGBuilderTy &Bld = CGF.Builder;

791 OMPBuilder.createTargetDeinit(Bld, ReductionDataSize,

792 C.getLangOpts().OpenMPCUDAReductionBufNum);

793 TeamsReductions.clear();

794}

795

796void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,

797 StringRef ParentName,

798 llvm::Function *&OutlinedFn,

799 llvm::Constant *&OutlinedFnID,

800 bool IsOffloadEntry,

802 ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_SPMD);

803 EntryFunctionState EST;

804

805 bool IsBareKernel = D.getSingleClause();

806

807

808 class NVPTXPrePostActionTy : public PrePostActionTy {

809 CGOpenMPRuntimeGPU &RT;

810 CGOpenMPRuntimeGPU::EntryFunctionState &EST;

811 bool IsBareKernel;

812 DataSharingMode Mode;

813 const OMPExecutableDirective &D;

814

815 public:

816 NVPTXPrePostActionTy(CGOpenMPRuntimeGPU &RT,

817 CGOpenMPRuntimeGPU::EntryFunctionState &EST,

818 bool IsBareKernel, const OMPExecutableDirective &D)

819 : RT(RT), EST(EST), IsBareKernel(IsBareKernel),

820 Mode(RT.CurrentDataSharingMode), D(D) {}

821 void Enter(CodeGenFunction &CGF) override {

822 if (IsBareKernel) {

823 RT.CurrentDataSharingMode = DataSharingMode::DS_CUDA;

824 return;

825 }

826 RT.emitKernelInit(D, CGF, EST, true);

827

828 RT.setLocThreadIdInsertPt(CGF, true);

829 }

830 void Exit(CodeGenFunction &CGF) override {

831 if (IsBareKernel) {

832 RT.CurrentDataSharingMode = Mode;

833 return;

834 }

835 RT.clearLocThreadIdInsertPt(CGF);

836 RT.emitKernelDeinit(CGF, EST, true);

837 }

838 } Action(*this, EST, IsBareKernel, D);

840 IsInTTDRegion = true;

842 IsOffloadEntry, CodeGen);

843 IsInTTDRegion = false;

844}

845

846void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(

847 const OMPExecutableDirective &D, StringRef ParentName,

848 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,

850 if (!IsOffloadEntry)

851 return;

852

853 assert(!ParentName.empty() && "Invalid target region parent name!");

854

856 bool IsBareKernel = D.getSingleClause();

857 if (Mode || IsBareKernel)

858 emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,

859 CodeGen);

860 else

861 emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,

862 CodeGen);

863}

864

867 llvm::OpenMPIRBuilderConfig Config(

868 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),

869 CGM.getLangOpts().OpenMPOffloadMandatory,

870 false, false,

872 Config.setDefaultTargetAS(

873 CGM.getContext().getTargetInfo().getTargetAddressSpace(LangAS::Default));

874 Config.setRuntimeCC(CGM.getRuntimeCC());

875

877

878 if (CGM.getLangOpts().OpenMPIsTargetDevice)

879 llvm_unreachable("OpenMP can only handle device code.");

880

881 if (CGM.getLangOpts().OpenMPCUDAMode)

883

885 if (CGM.getLangOpts().NoGPULib || CGM.getLangOpts().OMPHostIRFile.empty())

886 return;

887

888 OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTargetDebug,

889 "__omp_rtl_debug_kind");

890 OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTeamSubscription,

891 "__omp_rtl_assume_teams_oversubscription");

892 OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPThreadSubscription,

893 "__omp_rtl_assume_threads_oversubscription");

894 OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPNoThreadState,

895 "__omp_rtl_assume_no_thread_state");

896 OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPNoNestedParallelism,

897 "__omp_rtl_assume_no_nested_parallelism");

898}

899

901 ProcBindKind ProcBind,

903

904}

905

907 const Expr *Message,

909 CGM.getDiags().Report(Loc, diag::warn_omp_gpu_unsupported_clause)

910 << getOpenMPClauseName(OMPC_message);

911 return nullptr;

912}

913

914llvm::Value *

917 CGM.getDiags().Report(Loc, diag::warn_omp_gpu_unsupported_clause)

918 << getOpenMPClauseName(OMPC_severity);

919 return nullptr;

920}

921

927 if (Modifier == OMPC_NUMTHREADS_strict) {

928 CGM.getDiags().Report(Loc,

929 diag::warn_omp_gpu_unsupported_modifier_for_clause)

930 << "strict" << getOpenMPClauseName(OMPC_num_threads);

931 return;

932 }

933

934

935}

936

938 const Expr *NumTeams,

939 const Expr *ThreadLimit,

941

946

947 bool PrevIsInTTDRegion = IsInTTDRegion;

948 IsInTTDRegion = false;

949 auto *OutlinedFun =

951 CGF, D, ThreadIDVar, InnermostKind, CodeGen));

952 IsInTTDRegion = PrevIsInTTDRegion;

954 llvm::Function *WrapperFun =

955 createParallelDataSharingWrapper(OutlinedFun, D);

956 WrapperFunctionsMap[OutlinedFun] = WrapperFun;

957 }

958

959 return OutlinedFun;

960}

961

962

963

964static void

968 "expected teams directive.");

972 Ctx,

973 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(

974 true))) {

975 Dir = dyn_cast_or_null(S);

977 Dir = nullptr;

978 }

979 }

980 if (!Dir)

981 return;

983 for (const Expr *E : C->getVarRefs())

985 }

986}

987

988

989static void

993 "expected teams directive.");

995 for (const Expr *E : C->privates())

997 }

998}

999

1005

1006 const RecordDecl *GlobalizedRD = nullptr;

1008 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;

1009 unsigned WarpSize = CGM.getTarget().getGridValue().GV_Warp_Size;

1010

1015 if (!LastPrivatesReductions.empty()) {

1016 GlobalizedRD = ::buildRecordForGlobalizedVars(

1017 CGM.getContext(), {}, LastPrivatesReductions, MappedDeclsFields,

1018 WarpSize);

1019 }

1020 } else if (!LastPrivatesReductions.empty()) {

1021 assert(!TeamAndReductions.first &&

1022 "Previous team declaration is not expected.");

1023 TeamAndReductions.first = D.getCapturedStmt(OMPD_teams)->getCapturedDecl();

1024 std::swap(TeamAndReductions.second, LastPrivatesReductions);

1025 }

1026

1027

1031 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>

1032 &MappedDeclsFields;

1033

1034 public:

1035 NVPTXPrePostActionTy(

1037 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>

1038 &MappedDeclsFields)

1039 : Loc(Loc), GlobalizedRD(GlobalizedRD),

1040 MappedDeclsFields(MappedDeclsFields) {}

1042 auto &Rt =

1044 if (GlobalizedRD) {

1045 auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;

1046 I->getSecond().MappedParams =

1047 std::make_uniqueCodeGenFunction::OMPMapVars();

1048 DeclToAddrMapTy &Data = I->getSecond().LocalVarData;

1049 for (const auto &Pair : MappedDeclsFields) {

1050 assert(Pair.getFirst()->isCanonicalDecl() &&

1051 "Expected canonical declaration");

1052 Data.try_emplace(Pair.getFirst());

1053 }

1054 }

1055 Rt.emitGenericVarsProlog(CGF, Loc);

1056 }

1059 .emitGenericVarsEpilog(CGF);

1060 }

1061 } Action(Loc, GlobalizedRD, MappedDeclsFields);

1062 CodeGen.setAction(Action);

1064 CGF, D, ThreadIDVar, InnermostKind, CodeGen);

1065

1066 return OutlinedFun;

1067}

1068

1069void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF,

1072 return;

1073

1075

1076 const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);

1077 if (I == FunctionGlobalizedDecls.end())

1078 return;

1079

1080 for (auto &Rec : I->getSecond().LocalVarData) {

1082 bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);

1084

1085

1086 llvm::Value *ParValue;

1087 if (EscapedParam) {

1091 }

1092

1093

1095 llvm::CallBase *VoidPtr =

1097 CGM.getModule(), OMPRTL___kmpc_alloc_shared),

1098 AllocArgs, VD->getName());

1099

1100 VoidPtr->addRetAttr(llvm::Attribute::get(

1103

1104

1106 VoidPtr, Bld.getPtrTy(0), VD->getName() + "_on_stack");

1107 LValue VarAddr =

1109 Rec.second.PrivateAddr = VarAddr.getAddress();

1110 Rec.second.GlobalizedVal = VoidPtr;

1111

1112

1113 if (EscapedParam) {

1115 I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress());

1116 }

1118 VoidPtr->setDebugLoc(DI->SourceLocToDebugLoc(VD->getLocation()));

1119 }

1120

1121 for (const auto *ValueD : I->getSecond().EscapedVariableLengthDecls) {

1123 std::pair<llvm::Value *, llvm::Value *> AddrSizePair =

1125 I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(AddrSizePair);

1127 CGM.getContext().getDeclAlign(VD),

1129 I->getSecond().MappedParams->setVarAddr(CGF, VD, Base.getAddress());

1130 }

1131 I->getSecond().MappedParams->apply(CGF);

1132}

1133

1135 const VarDecl *VD) const {

1136 const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);

1137 if (I == FunctionGlobalizedDecls.end())

1138 return false;

1139

1140

1141 return llvm::is_contained(I->getSecond().DelayedVariableLengthDecls, VD);

1142}

1143

1144std::pair<llvm::Value *, llvm::Value *>

1148

1149

1151 CharUnits Align = CGM.getContext().getDeclAlign(VD);

1152 Size = Bld.CreateNUWAdd(

1153 Size, llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity() - 1));

1154 llvm::Value *AlignVal =

1156 Size = Bld.CreateUDiv(Size, AlignVal);

1157 Size = Bld.CreateNUWMul(Size, AlignVal);

1158

1159

1160 llvm::Value *AllocArgs[] = {Size};

1161 llvm::CallBase *VoidPtr =

1163 CGM.getModule(), OMPRTL___kmpc_alloc_shared),

1164 AllocArgs, VD->getName());

1165 VoidPtr->addRetAttr(llvm::Attribute::get(

1166 CGM.getLLVMContext(), llvm::Attribute::Alignment, Align.getQuantity()));

1167

1168 return std::make_pair(VoidPtr, Size);

1169}

1170

1173 const std::pair<llvm::Value *, llvm::Value *> &AddrSizePair) {

1174

1176 CGM.getModule(), OMPRTL___kmpc_free_shared),

1177 {AddrSizePair.first, AddrSizePair.second});

1178}

1179

1180void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF) {

1182 return;

1183

1184 const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);

1185 if (I != FunctionGlobalizedDecls.end()) {

1186

1187

1188 for (const auto &AddrSizePair :

1189 llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {

1191 CGM.getModule(), OMPRTL___kmpc_free_shared),

1192 {AddrSizePair.first, AddrSizePair.second});

1193 }

1194

1195 for (auto &Rec : llvm::reverse(I->getSecond().LocalVarData)) {

1197 I->getSecond().MappedParams->restore(CGF);

1198

1199 llvm::Value *FreeArgs[] = {Rec.second.GlobalizedVal,

1202 CGM.getModule(), OMPRTL___kmpc_free_shared),

1203 FreeArgs);

1204 }

1205 }

1206}

1207

1211 llvm::Function *OutlinedFn,

1214 return;

1215

1216 bool IsBareKernel = D.getSingleClause<OMPXBareClause>();

1217

1219 ".zero.addr");

1222

1223

1224 if (IsBareKernel)

1225 OutlinedFnArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));

1226 else

1227 OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).emitRawPointer(CGF));

1228 OutlinedFnArgs.push_back(ZeroAddr.getPointer());

1229 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());

1231}

1232

1239 return;

1240

1241 auto &&ParallelGen = [this, Loc, OutlinedFn, CapturedVars, IfCond,

1245 llvm::Value *NumThreadsVal = NumThreads;

1246 llvm::Function *WFn = WrapperFunctionsMap[OutlinedFn];

1247 llvm::PointerType *FnPtrTy = llvm::PointerType::get(

1248 CGF.getLLVMContext(), CGM.getDataLayout().getProgramAddressSpace());

1249

1250 llvm::Value *ID = llvm::ConstantPointerNull::get(FnPtrTy);

1251 if (WFn)

1252 ID = Bld.CreateBitOrPointerCast(WFn, FnPtrTy);

1253

1254 llvm::Value *FnPtr = Bld.CreateBitOrPointerCast(OutlinedFn, FnPtrTy);

1255

1256

1257

1258

1260

1262 llvm::ArrayType::get(CGM.VoidPtrTy, CapturedVars.size()),

1263 "captured_vars_addrs");

1264

1265 if (!CapturedVars.empty()) {

1266

1268 unsigned Idx = 0;

1269 for (llvm::Value *V : CapturedVars) {

1271 llvm::Value *PtrV;

1272 if (V->getType()->isIntegerTy())

1273 PtrV = Bld.CreateIntToPtr(V, CGF.VoidPtrTy);

1274 else

1278 ++Idx;

1279 }

1280 }

1281

1282 llvm::Value *IfCondVal = nullptr;

1283 if (IfCond)

1285 false);

1286 else

1287 IfCondVal = llvm::ConstantInt::get(CGF.Int32Ty, 1);

1288

1289 if (!NumThreadsVal)

1290 NumThreadsVal = llvm::ConstantInt::get(CGF.Int32Ty, -1);

1291 else

1292 NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.Int32Ty);

1293

1294

1295 llvm::Value *StrictNumThreadsVal = llvm::ConstantInt::get(CGF.Int32Ty, 0);

1296

1297 assert(IfCondVal && "Expected a value");

1299 llvm::Value *Args[] = {

1300 RTLoc,

1302 IfCondVal,

1303 NumThreadsVal,

1304 llvm::ConstantInt::get(CGF.Int32Ty, -1),

1305 FnPtr,

1306 ID,

1307 Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF),

1309 llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size()),

1310 StrictNumThreadsVal};

1311

1313 CGM.getModule(), OMPRTL___kmpc_parallel_60),

1314 Args);

1315 };

1316

1318 RCG(CGF);

1319}

1320

1321void CGOpenMPRuntimeGPU::syncCTAThreads(CodeGenFunction &CGF) {

1322

1324 return;

1325

1326

1327 llvm::Value *Args[] = {

1328 llvm::ConstantPointerNull::get(

1330 llvm::ConstantInt::get(CGF.Int32Ty, 0, true)};

1332 CGM.getModule(), OMPRTL___kmpc_barrier_simple_spmd),

1333 Args);

1334}

1335

1339 bool) {

1340

1342 return;

1343

1347

1349 CGM.getModule(), OMPRTL___kmpc_barrier),

1350 Args);

1351}

1352

1356 const Expr *Hint) {

1357 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.critical.loop");

1358 llvm::BasicBlock *TestBB = CGF.createBasicBlock("omp.critical.test");

1359 llvm::BasicBlock *SyncBB = CGF.createBasicBlock("omp.critical.sync");

1360 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body");

1361 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit");

1362

1364

1365

1367 CGM.getModule(), OMPRTL___kmpc_warp_active_thread_mask));

1368

1369 llvm::Value *ThreadID = RT.getGPUThreadID(CGF);

1370

1371

1372 llvm::Value *TeamWidth = RT.getGPUNumThreads(CGF);

1373

1374

1379 CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), CounterLVal,

1380 true);

1381

1382

1384 llvm::Value *CounterVal = CGF.EmitLoadOfScalar(CounterLVal, Loc);

1385 llvm::Value *CmpLoopBound = CGF.Builder.CreateICmpSLT(CounterVal, TeamWidth);

1386 CGF.Builder.CreateCondBr(CmpLoopBound, TestBB, ExitBB);

1387

1388

1389

1392 llvm::Value *CmpThreadToCounter =

1393 CGF.Builder.CreateICmpEQ(ThreadID, CounterVal);

1394 CGF.Builder.CreateCondBr(CmpThreadToCounter, BodyBB, SyncBB);

1395

1396

1398

1399

1401 Hint);

1402

1403

1404

1405

1406

1408

1410 CGM.getModule(), OMPRTL___kmpc_syncwarp),

1411 Mask);

1412

1413 llvm::Value *IncCounterVal =

1414 CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1));

1417

1418

1419 CGF.EmitBlock(ExitBB, true);

1420}

1421

1422

1427 "Cast type must sized.");

1429 "Val type must sized.");

1431 if (ValTy == CastTy)

1432 return Val;

1435 return CGF.Builder.CreateBitCast(Val, LLVMCastTy);

1437 return CGF.Builder.CreateIntCast(Val, LLVMCastTy,

1441 CGF.EmitStoreOfScalar(Val, ValCastItem, false, ValTy,

1444 return CGF.EmitLoadOfScalar(CastItem, false, CastTy, Loc,

1447}

1448

1449

1450

1451

1452

1453

1454

1455

1456

1457

1458

1459

1460

1461

1462

1463

1464

1465

1466

1467

1468

1469

1470

1471

1472

1473

1474

1475

1476

1477

1478

1479

1480

1481

1482

1483

1484

1485

1486

1487

1488

1489

1490

1491

1492

1493

1494

1495

1496

1497

1498

1499

1500

1501

1502

1503

1504

1505

1506

1507

1508

1509

1510

1511

1512

1513

1514

1515

1516

1517

1518

1519

1520

1521

1522

1523

1524

1525

1526

1527

1528

1529

1530

1531

1532

1533

1534

1535

1536

1537

1538

1539

1540

1541

1542

1543

1544

1545

1546

1547

1548

1549

1550

1551

1552

1553

1554

1555

1556

1557

1558

1559

1560

1561

1562

1563

1564

1565

1566

1567

1568

1569

1570

1571

1572

1573

1574

1575

1576

1577

1578

1579

1580

1581

1582

1583

1584

1585

1586

1587

1588

1589

1590

1591

1592

1593

1594

1595

1596

1597

1598

1599

1600

1601

1602

1603

1604

1605

1606

1607

1608

1609

1610

1611

1612

1613

1614

1615

1616

1617

1618

1619

1620

1621

1622

1623

1624

1625

1626

1627

1628

1629

1630

1631

1632

1633

1634

1635

1636

1637

1638

1639

1640

1641

1642

1643

1644

1645

1646

1647

1648

1649

1650

1651

1652

1653

1654

1655

1656

1657

1658

1659

1660

1661

1662

1663

1664

1665

1666

1667

1668

1669

1670

1671

1672

1673

1674

1675

1676

1677

1678

1679

1680

1681

1682

1683

1684

1685

1686

1687

1688

1689

1690

1696 return;

1697

1700

1702

1704 assert(!TeamsReduction && !ParallelReduction &&

1705 "Invalid reduction selection in emitReduction.");

1706 (void)ParallelReduction;

1708 ReductionOps, Options);

1709 return;

1710 }

1711

1712 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> VarFieldMap;

1714 int Cnt = 0;

1715 for (const Expr *DRE : Privates) {

1717 ++Cnt;

1718 }

1719 const RecordDecl *ReductionRec = ::buildRecordForGlobalizedVars(

1720 CGM.getContext(), PrivatesReductions, {}, VarFieldMap, 1);

1721

1722 if (TeamsReduction)

1723 TeamsReductions.push_back(ReductionRec);

1724

1725

1727

1728 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;

1729 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),

1731 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),

1732 CGF.Builder.GetInsertPoint());

1733 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(

1736

1738 unsigned Idx = 0;

1740 llvm::Type *ElementType;

1741 llvm::Value *Variable;

1742 llvm::Value *PrivateVariable;

1743 llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy AtomicReductionGen = nullptr;

1745 const auto *RHSVar =

1748 const auto *LHSVar =

1751 llvm::OpenMPIRBuilder::EvalKind EvalKind;

1754 EvalKind = llvm::OpenMPIRBuilder::EvalKind::Scalar;

1755 break;

1757 EvalKind = llvm::OpenMPIRBuilder::EvalKind::Complex;

1758 break;

1760 EvalKind = llvm::OpenMPIRBuilder::EvalKind::Aggregate;

1761 break;

1762 }

1763 auto ReductionGen = [&](InsertPointTy CodeGenIP, unsigned I,

1764 llvm::Value **LHSPtr, llvm::Value **RHSPtr,

1765 llvm::Function *NewFunc) {

1766 CGF.Builder.restoreIP(CodeGenIP);

1767 auto *CurFn = CGF.CurFn;

1768 CGF.CurFn = NewFunc;

1769

1776

1780

1781 CGF.CurFn = CurFn;

1782

1783 return InsertPointTy(CGF.Builder.GetInsertBlock(),

1784 CGF.Builder.GetInsertPoint());

1785 };

1786 ReductionInfos.emplace_back(llvm::OpenMPIRBuilder::ReductionInfo(

1787 ElementType, Variable, PrivateVariable, EvalKind,

1788 nullptr, ReductionGen, AtomicReductionGen,

1789 nullptr));

1790 Idx++;

1791 }

1792

1793 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =

1794 cantFail(OMPBuilder.createReductionsGPU(

1795 OmpLoc, AllocaIP, CodeGenIP, ReductionInfos, {}, false,

1796 TeamsReduction, llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang,

1798 C.getLangOpts().OpenMPCUDAReductionBufNum, RTLoc));

1799 CGF.Builder.restoreIP(AfterIP);

1800}

1801

1804 const VarDecl *NativeParam) const {

1806 return NativeParam;

1811 if (const auto *Attr = FD->getAttr()) {

1812 if (Attr->getCaptureKind() == OMPC_map) {

1813 PointeeTy = CGM.getContext().getAddrSpaceQualType(PointeeTy,

1815 }

1816 }

1817 ArgType = CGM.getContext().getPointerType(PointeeTy);

1819 enum { NVPTX_local_addr = 5 };

1824 CGM.getContext(), nullptr, NativeParam->getLocation(),

1827 CGM.getContext(),

1831 nullptr, SC_None, nullptr);

1832}

1833

1836 const VarDecl *NativeParam,

1837 const VarDecl *TargetParam) const {

1838 assert(NativeParam != TargetParam &&

1840 "Native arg must not be the same as target arg.");

1844 const Type *NonQualTy = QC.strip(NativeParamType);

1846 unsigned NativePointeeAddrSpace =

1849 llvm::Value *TargetAddr = CGF.EmitLoadOfScalar(LocalAddr, false,

1851

1853 TargetAddr,

1854 llvm::PointerType::get(CGF.getLLVMContext(), NativePointeeAddrSpace));

1856 CGF.EmitStoreOfScalar(TargetAddr, NativeParamAddr, false,

1857 NativeParamType);

1858 return NativeParamAddr;

1859}

1860

1865 TargetArgs.reserve(Args.size());

1866 auto *FnType = OutlinedFn.getFunctionType();

1867 for (unsigned I = 0, E = Args.size(); I < E; ++I) {

1868 if (FnType->isVarArg() && FnType->getNumParams() <= I) {

1869 TargetArgs.append(std::next(Args.begin(), I), Args.end());

1870 break;

1871 }

1872 llvm::Type *TargetType = FnType->getParamType(I);

1873 llvm::Value *NativeArg = Args[I];

1874 if (!TargetType->isPointerTy()) {

1875 TargetArgs.emplace_back(NativeArg);

1876 continue;

1877 }

1878 TargetArgs.emplace_back(

1880 }

1882}

1883

1884

1885

1886

1887

1888llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(

1891 const auto &CS = *D.getCapturedStmt(OMPD_parallel);

1892

1893

1899 ImplicitParamDecl ParallelLevelArg(Ctx, nullptr, D.getBeginLoc(),

1900 nullptr, Int16QTy,

1902 ImplicitParamDecl WrapperArg(Ctx, nullptr, D.getBeginLoc(),

1903 nullptr, Int32QTy,

1905 WrapperArgs.emplace_back(&ParallelLevelArg);

1906 WrapperArgs.emplace_back(&WrapperArg);

1907

1910

1911 auto *Fn = llvm::Function::Create(

1913 Twine(OutlinedParallelFn->getName(), "_wrapper"), &CGM.getModule());

1914

1915

1916

1917

1918

1919

1920

1921 Fn->addFnAttr(llvm::Attribute::NoInline);

1922

1924 Fn->setLinkage(llvm::GlobalValue::InternalLinkage);

1925 Fn->setDoesNotRecurse();

1926

1929 D.getBeginLoc(), D.getBeginLoc());

1930

1931 const auto *RD = CS.getCapturedRecordDecl();

1932 auto CurField = RD->field_begin();

1933

1935 ".zero.addr");

1937

1939

1942

1944 auto CI = CS.capture_begin();

1945

1946

1947

1950 llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer();

1951 llvm::Value *DataSharingArgs[] = {GlobalArgsPtr};

1953 CGM.getModule(), OMPRTL___kmpc_get_shared_variables),

1954 DataSharingArgs);

1955

1956

1957

1959 if (CS.capture_size() > 0 ||

1965 }

1966 unsigned Idx = 0;

1970 Src, Bld.getPtrTy(0), CGF.SizeTy);

1972 TypedAddress,

1973 false,

1976 Args.emplace_back(LB);

1977 ++Idx;

1982 TypedAddress,

1983 false,

1986 Args.emplace_back(UB);

1987 ++Idx;

1988 }

1989 if (CS.capture_size() > 0) {

1990 ASTContext &CGFContext = CGF.getContext();

1991 for (unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) {

1992 QualType ElemTy = CurField->getType();

1998 false,

2000 CI->getLocation());

2001 if (CI->capturesVariableByCopy() &&

2002 !CI->getCapturedVar()->getType()->isAnyPointerType()) {

2004 CI->getLocation());

2005 }

2006 Args.emplace_back(Arg);

2007 }

2008 }

2009

2012 return Fn;

2013}

2014

2016 const Decl *D) {

2018 return;

2019

2020 assert(D && "Expected function or captured|block decl.");

2021 assert(FunctionGlobalizedDecls.count(CGF.CurFn) == 0 &&

2022 "Function is registered already.");

2023 assert((!TeamAndReductions.first || TeamAndReductions.first == D) &&

2024 "Team is set but not processed.");

2025 const Stmt *Body = nullptr;

2026 bool NeedToDelayGlobalization = false;

2027 if (const auto *FD = dyn_cast(D)) {

2028 Body = FD->getBody();

2029 } else if (const auto *BD = dyn_cast(D)) {

2030 Body = BD->getBody();

2031 } else if (const auto *CD = dyn_cast(D)) {

2032 Body = CD->getBody();

2034 if (NeedToDelayGlobalization &&

2036 return;

2037 }

2038 if (!Body)

2039 return;

2040 CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);

2041 VarChecker.Visit(Body);

2042 const RecordDecl *GlobalizedVarsRecord =

2043 VarChecker.getGlobalizedRecord(IsInTTDRegion);

2044 TeamAndReductions.first = nullptr;

2045 TeamAndReductions.second.clear();

2047 VarChecker.getEscapedVariableLengthDecls();

2049 VarChecker.getDelayedVariableLengthDecls();

2050 if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty() &&

2051 DelayedVariableLengthDecls.empty())

2052 return;

2053 auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;

2054 I->getSecond().MappedParams =

2055 std::make_uniqueCodeGenFunction::OMPMapVars();

2056 I->getSecond().EscapedParameters.insert(

2057 VarChecker.getEscapedParameters().begin(),

2058 VarChecker.getEscapedParameters().end());

2059 I->getSecond().EscapedVariableLengthDecls.append(

2060 EscapedVariableLengthDecls.begin(), EscapedVariableLengthDecls.end());

2061 I->getSecond().DelayedVariableLengthDecls.append(

2062 DelayedVariableLengthDecls.begin(), DelayedVariableLengthDecls.end());

2063 DeclToAddrMapTy &Data = I->getSecond().LocalVarData;

2064 for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {

2065 assert(VD->isCanonicalDecl() && "Expected canonical declaration");

2066 Data.try_emplace(VD);

2067 }

2068 if (!NeedToDelayGlobalization) {

2069 emitGenericVarsProlog(CGF, D->getBeginLoc());

2070 struct GlobalizationScope final : EHScopeStack::Cleanup {

2071 GlobalizationScope() = default;

2072

2075 .emitGenericVarsEpilog(CGF);

2076 }

2077 };

2079 }

2080}

2081

2084 if (VD && VD->hasAttr()) {

2085 const auto *A = VD->getAttr();

2087 switch (A->getAllocatorType()) {

2088 case OMPAllocateDeclAttr::OMPNullMemAlloc:

2089 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:

2090 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:

2091 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:

2092 break;

2093 case OMPAllocateDeclAttr::OMPThreadMemAlloc:

2095 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:

2096

2098 case OMPAllocateDeclAttr::OMPConstMemAlloc:

2100 break;

2101 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:

2103 break;

2104 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:

2105 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:

2106 break;

2107 }

2109 auto *GV = new llvm::GlobalVariable(

2110 CGM.getModule(), VarTy, false,

2111 llvm::GlobalValue::InternalLinkage, llvm::PoisonValue::get(VarTy),

2113 nullptr, llvm::GlobalValue::NotThreadLocal,

2114 CGM.getContext().getTargetAddressSpace(AS));

2115 CharUnits Align = CGM.getContext().getDeclAlign(VD);

2116 GV->setAlignment(Align.getAsAlign());

2119 GV, CGF.Builder.getPtrTy(CGM.getContext().getTargetAddressSpace(

2121 VarTy, Align);

2122 }

2123

2126

2128 auto I = FunctionGlobalizedDecls.find(CGF.CurFn);

2129 if (I == FunctionGlobalizedDecls.end())

2131 auto VDI = I->getSecond().LocalVarData.find(VD);

2132 if (VDI != I->getSecond().LocalVarData.end())

2133 return VDI->second.PrivateAddr;

2137 IT != E; ++IT) {

2138 auto VDI = I->getSecond().LocalVarData.find(

2140 ->getCanonicalDecl());

2141 if (VDI != I->getSecond().LocalVarData.end())

2142 return VDI->second.PrivateAddr;

2143 }

2144 }

2145

2147}

2148

2150 FunctionGlobalizedDecls.erase(CGF.CurFn);

2152}

2153

2157 llvm::Value *&Chunk) const {

2160 ScheduleKind = OMPC_DIST_SCHEDULE_static;

2162 RT.getGPUNumThreads(CGF),

2164 S.getIterationVariable()->getType(), S.getBeginLoc());

2165 return;

2166 }

2168 CGF, S, ScheduleKind, Chunk);

2169}

2170

2174 const Expr *&ChunkExpr) const {

2175 ScheduleKind = OMPC_SCHEDULE_static;

2176

2177 llvm::APInt ChunkSize(32, 1);

2181}

2182

2186 " Expected target-based directive.");

2187 const CapturedStmt *CS = D.getCapturedStmt(OMPD_target);

2189

2190

2191 if (C.capturesVariable())

2192 continue;

2193 const VarDecl *VD = C.getCapturedVar();

2194 const auto *RD = VD->getType()

2198 if (!RD || !RD->isLambda())

2199 continue;

2204 else

2207 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;

2208 FieldDecl *ThisCapture = nullptr;

2209 RD->getCaptureFields(Captures, ThisCapture);

2215 }

2216 for (const LambdaCapture &LC : RD->captures()) {

2217 if (LC.getCaptureKind() != LCK_ByRef)

2218 continue;

2219 const ValueDecl *VD = LC.getCapturedVar();

2220

2221

2223 continue;

2224 auto It = Captures.find(VD);

2225 assert(It != Captures.end() && "Found lambda capture without field.");

2233 }

2234 }

2235}

2236

2239 if (!VD || !VD->hasAttr())

2240 return false;

2241 const auto *A = VD->getAttr();

2242 switch(A->getAllocatorType()) {

2243 case OMPAllocateDeclAttr::OMPNullMemAlloc:

2244 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:

2245

2246 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:

2247 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:

2248 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:

2249 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:

2250 case OMPAllocateDeclAttr::OMPThreadMemAlloc:

2252 return true;

2253 case OMPAllocateDeclAttr::OMPConstMemAlloc:

2255 return true;

2256 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:

2258 return true;

2259 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:

2260 llvm_unreachable("Expected predefined allocator for the variables with the "

2261 "static storage.");

2262 }

2263 return false;

2264}

2265

2266

2271 if (Feature.getValue()) {

2274 return Arch;

2275 }

2276 }

2278}

2279

2280

2281

2284 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {

2286 switch (Arch) {

2297 llvm::raw_svector_ostream Out(Buffer);

2299 << " does not support unified addressing";

2300 CGM.Error(Clause->getBeginLoc(), Out.str());

2301 return;

2302 }

2387 break;

2389 llvm_unreachable("Unexpected GPU arch.");

2390 }

2391 }

2392 }

2394}

2395

2399 const char *LocSize = "__kmpc_get_hardware_num_threads_in_block";

2400 llvm::Function *F = M->getFunction(LocSize);

2401 if (!F) {

2402 F = llvm::Function::Create(llvm::FunctionType::get(CGF.Int32Ty, {}, false),

2403 llvm::GlobalVariable::ExternalLinkage, LocSize,

2405 }

2406 return Bld.CreateCall(F, {}, "nvptx_num_threads");

2407}

2408

2412 OMPBuilder.getOrCreateRuntimeFunction(

2413 CGM.getModule(), OMPRTL___kmpc_get_hardware_thread_id_in_block),

2414 Args);

2415}

static void getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl * > &Vars)

Get list of reduction variables from the teams ... directives.

Definition CGOpenMPRuntimeGPU.cpp:990

static llvm::Value * castValueToType(CodeGenFunction &CGF, llvm::Value *Val, QualType ValTy, QualType CastTy, SourceLocation Loc)

Cast value to the specified type.

Definition CGOpenMPRuntimeGPU.cpp:1423

static void getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl * > &Vars)

Get list of lastprivate variables from the teams distribute ... or teams {distribute ....

Definition CGOpenMPRuntimeGPU.cpp:965

static bool hasNestedSPMDDirective(ASTContext &Ctx, const OMPExecutableDirective &D)

Check for inner (nested) SPMD construct, if any.

Definition CGOpenMPRuntimeGPU.cpp:515

static bool supportsSPMDExecutionMode(ASTContext &Ctx, const OMPExecutableDirective &D)

Definition CGOpenMPRuntimeGPU.cpp:617

static OffloadArch getOffloadArch(CodeGenModule &CGM)

Definition CGOpenMPRuntimeGPU.cpp:2267

This file defines OpenMP nodes for declarative directives.

This file defines OpenMP AST classes for clauses.

static std::pair< ValueDecl *, bool > getPrivateItem(Sema &S, Expr *&RefExpr, SourceLocation &ELoc, SourceRange &ERange, bool AllowArraySection=false, bool AllowAssumedSizeArray=false, StringRef DiagType="")

This file defines OpenMP AST classes for executable directives and clauses.

static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)

Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...

QualType getPointerType(QualType T) const

Return the uniqued reference to the type for a pointer to the specified type.

QualType getUIntPtrType() const

Return a type compatible with "uintptr_t" (C99 7.18.1.4), as defined by the target.

QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const

getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...

CharUnits getTypeSizeInChars(QualType T) const

Return the size of the specified (complete) type T, in characters.

QualType getSizeType() const

Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.

const TargetInfo & getTargetInfo() const

Attr - This represents one attribute.

ArrayRef< Capture > captures() const

const BlockDecl * getBlockDecl() const

Describes the capture of either a variable, or 'this', or variable-length array type.

This captures a statement into a function.

bool capturesVariable(const VarDecl *Var) const

True if this variable has been captured.

CastKind getCastKind() const

CharUnits - This is an opaque type for sizes expressed in character units.

bool isZero() const

isZero - Test whether the quantity equals zero.

llvm::Align getAsAlign() const

getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...

QuantityType getQuantity() const

getQuantity - Get the raw integer representation of this quantity.

Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...

llvm::Value * emitRawPointer(CodeGenFunction &CGF) const

Return the pointer contained in this class after authenticating it and adding offset to it if necessa...

Address withElementType(llvm::Type *ElemTy) const

Return address with different element type, but same pointer and alignment.

llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)

Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")

Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")

Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...

Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")

Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...

CGFunctionInfo - Class to encapsulate the information about a function definition.

llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override

Emits inlined function for the specified OpenMP teams.

Definition CGOpenMPRuntimeGPU.cpp:1000

void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override

Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...

Definition CGOpenMPRuntimeGPU.cpp:900

void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override

Emit a code for reduction clause.

Definition CGOpenMPRuntimeGPU.cpp:1691

DataSharingMode

Target codegen is specialized based on two data-sharing modes: CUDA, in which the local variables are...

@ DS_CUDA

CUDA data sharing mode.

@ DS_Generic

Generic data-sharing mode.

void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const override

Choose a default value for the dist_schedule clause.

Definition CGOpenMPRuntimeGPU.cpp:2154

Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) override

Gets the OpenMP-specific address of the local variable.

Definition CGOpenMPRuntimeGPU.cpp:2082

void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override

Emits OpenMP-specific function prolog.

Definition CGOpenMPRuntimeGPU.cpp:2015

void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const override

Choose a default value for the schedule clause.

Definition CGOpenMPRuntimeGPU.cpp:2171

void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override

This function ought to emit, in the general case, a call to.

Definition CGOpenMPRuntimeGPU.cpp:937

void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override

Emits a critical region.

Definition CGOpenMPRuntimeGPU.cpp:1353

void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override

Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...

Definition CGOpenMPRuntimeGPU.cpp:1208

bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override

Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...

Definition CGOpenMPRuntimeGPU.cpp:2237

void getKmpcFreeShared(CodeGenFunction &CGF, const std::pair< llvm::Value *, llvm::Value * > &AddrSizePair) override

Get call to __kmpc_free_shared.

Definition CGOpenMPRuntimeGPU.cpp:1171

CGOpenMPRuntimeGPU(CodeGenModule &CGM)

Definition CGOpenMPRuntimeGPU.cpp:865

llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override

Emits inlined function for the specified OpenMP parallel.

Definition CGOpenMPRuntimeGPU.cpp:942

void functionFinished(CodeGenFunction &CGF) override

Cleans up references to the objects in finished function.

Definition CGOpenMPRuntimeGPU.cpp:2149

llvm::Value * getGPUThreadID(CodeGenFunction &CGF)

Get the id of the current thread on the GPU.

Definition CGOpenMPRuntimeGPU.cpp:2409

void processRequiresDirective(const OMPRequiresDecl *D) override

Perform check on requires decl to ensure that target architecture supports unified addressing.

Definition CGOpenMPRuntimeGPU.cpp:2282

bool isDelayedVariableLengthDecl(CodeGenFunction &CGF, const VarDecl *VD) const override

Declare generalized virtual functions which need to be defined by all specializations of OpenMPGPURun...

Definition CGOpenMPRuntimeGPU.cpp:1134

void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const override

Emits call of the outlined function with the provided arguments, translating these arguments to corre...

Definition CGOpenMPRuntimeGPU.cpp:1861

Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override

Gets the address of the native argument basing on the address of the target-specific parameter.

Definition CGOpenMPRuntimeGPU.cpp:1835

ExecutionMode

Defines the execution mode.

@ EM_NonSPMD

Non-SPMD execution mode (1 master thread, others are workers).

@ EM_Unknown

Unknown execution mode (orphaned directive).

@ EM_SPMD

SPMD execution mode (all threads are worker threads).

void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override

Emit an implicit/explicit barrier for OpenMP threads.

Definition CGOpenMPRuntimeGPU.cpp:1336

llvm::Value * getGPUNumThreads(CodeGenFunction &CGF)

Get the maximum number of threads in a block of the GPU.

Definition CGOpenMPRuntimeGPU.cpp:2396

const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override

Translates the native parameter of outlined function if this is required for target.

Definition CGOpenMPRuntimeGPU.cpp:1803

std::pair< llvm::Value *, llvm::Value * > getKmpcAllocShared(CodeGenFunction &CGF, const VarDecl *VD) override

Get call to __kmpc_alloc_shared.

Definition CGOpenMPRuntimeGPU.cpp:1145

bool isGPU() const override

Returns true if the current target is a GPU.

llvm::Value * emitSeverityClause(OpenMPSeverityClauseKind Severity, SourceLocation Loc) override

Definition CGOpenMPRuntimeGPU.cpp:915

llvm::Value * emitMessageClause(CodeGenFunction &CGF, const Expr *Message, SourceLocation Loc) override

Definition CGOpenMPRuntimeGPU.cpp:906

void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr) override

Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...

Definition CGOpenMPRuntimeGPU.cpp:1233

void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation()) override

Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...

Definition CGOpenMPRuntimeGPU.cpp:922

void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const override

Adjust some parameters for the target-based directives, like addresses of the variables captured by r...

Definition CGOpenMPRuntimeGPU.cpp:2183

virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)

Emits address of the word in a memory where current thread id is stored.

CGOpenMPRuntime(CodeGenModule &CGM)

static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)

Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...

llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)

Emits object of ident_t type with info for source location.

virtual void functionFinished(CodeGenFunction &CGF)

Cleans up references to the objects in finished function.

virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)

Emits outlined function for the specified OpenMP teams directive D.

llvm::OpenMPIRBuilder OMPBuilder

An OpenMP-IR-Builder instance.

virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)

Helper to emit outlined function for 'target' directive.

bool hasRequiresUnifiedSharedMemory() const

Return whether the unified_shared_memory has been specified.

virtual void processRequiresDirective(const OMPRequiresDecl *D)

Perform check on requires decl to ensure that target architecture supports unified addressing.

llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)

Gets thread id value for the current thread.

void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)

Helper to determine the min/max number of threads/teams for D.

static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)

Returns default flags for the barriers depending on the directive, for which this barier is going to ...

virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)

Emits outlined function for the specified OpenMP parallel directive D.

virtual void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const

Choose default schedule type and chunk value for the dist_schedule clause.

llvm::Type * getIdentTyPointerTy()

Returns pointer to ident_t type.

void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)

Emits single reduction combiner.

llvm::OpenMPIRBuilder & getOMPBuilder()

virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)

Emits a critical region.

virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const

Emits call of the outlined function with the provided arguments, translating these arguments to corre...

virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)

Emit a code for reduction clause.

CapturedRegionKind getKind() const

bool isCXXThisExprCaptured() const

The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...

CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...

LValue EmitLoadOfReferenceLValue(LValue RefLVal)

CGCapturedStmtInfo * CapturedStmtInfo

llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)

createBasicBlock - Create an LLVM basic block.

Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)

Load a pointer with type PtrTy stored at address Ptr.

LValue MakeNaturalAlignPointeeRawAddrLValue(llvm::Value *V, QualType T)

Same as MakeNaturalAlignPointeeAddrLValue except that the pointer is known to be unsigned.

llvm::AssertingVH< llvm::Instruction > AllocaInsertPt

AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...

llvm::DebugLoc SourceLocToDebugLoc(SourceLocation Location)

Converts Location to a DebugLoc, if debug information is enabled.

RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")

CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...

const TargetInfo & getTarget() const

void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())

Emit code for the start of a function.

llvm::Value * EvaluateExprAsBool(const Expr *E)

EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...

bool HaveInsertPoint() const

HaveInsertPoint - True if an insertion point is defined.

CGDebugInfo * getDebugInfo()

llvm::Value * getTypeSize(QualType Ty)

Returns calculated size of the specified type.

LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)

EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...

ASTContext & getContext() const

llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)

EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...

llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")

llvm::Type * ConvertTypeForMem(QualType T)

CodeGenTypes & getTypes() const

static TypeEvaluationKind getEvaluationKind(QualType T)

getEvaluationKind - Return the TypeEvaluationKind of QualType T.

void EmitBranch(llvm::BasicBlock *Block)

EmitBranch - Emit a branch to the specified basic block from the current insert block,...

RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)

CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...

LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)

void FinishFunction(SourceLocation EndLoc=SourceLocation())

FinishFunction - Complete IR generation of the current function.

llvm::Value * LoadCXXThis()

LoadCXXThis - Load the value of 'this'.

Address GetAddrOfLocalVar(const VarDecl *VD)

GetAddrOfLocalVar - Return the address of a local variable.

llvm::LLVMContext & getLLVMContext()

llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)

Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...

void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)

EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...

void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)

EmitBlock - Emit the given block.

This class organizes the cross-function state that is used while generating LLVM code.

void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)

Set the attributes on the LLVM function for the given decl and function info.

llvm::Module & getModule() const

CodeGenTypes & getTypes()

const TargetInfo & getTarget() const

CGOpenMPRuntime & getOpenMPRuntime()

Return a reference to the configured OpenMP runtime.

ASTContext & getContext() const

llvm::LLVMContext & getLLVMContext()

llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)

GetFunctionType - Get the LLVM function type for.

const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)

A builtin function is a freestanding function using the default C conventions.

unsigned getTargetAddressSpace(QualType T) const

FunctionArgList - Type for representing both the decl and type of parameters to a function.

LValue - This represents an lvalue references.

Address getAddress() const

A basic class for pre|post-action for advanced codegen sequence for OpenMP region.

An abstract representation of an aligned address.

llvm::Value * getPointer() const

Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...

void setAction(PrePostActionTy &Action) const

ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.

DeclContext - This is used only as base class of specific decl types that can act as declaration cont...

void addDecl(Decl *D)

Add the declaration D into this context.

attr_iterator attr_end() const

bool isCanonicalDecl() const

Whether this particular Decl is a canonical one.

attr_iterator attr_begin() const

SourceLocation getLocation() const

DeclContext * getDeclContext()

SourceLocation getBeginLoc() const LLVM_READONLY

virtual Decl * getCanonicalDecl()

Retrieves the "canonical" declaration of the given declaration.

SourceLocation getBeginLoc() const LLVM_READONLY

This represents one expression.

Expr * IgnoreParenImpCasts() LLVM_READONLY

Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...

Expr * IgnoreParens() LLVM_READONLY

Skip past any parentheses which might surround this expression until reaching a fixed point.

bool isLValue() const

isLValue - True if this expression is an "l-value" according to the rules of the current language.

Represents a member of a struct/union/class.

static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)

GlobalDecl - represents a global declaration.

static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)

Create implicit parameter.

static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)

Returns a new integer literal with value 'V' and type 'type'.

Describes the capture of a variable or of this, or of a C++1y init-capture.

bool isInitCapture(const LambdaCapture *Capture) const

Determine whether one of this lambda's captures is an init-capture.

capture_range captures() const

Retrieve this lambda's captures.

IdentifierInfo * getIdentifier() const

Get the identifier that names this declaration, if there is one.

StringRef getName() const

Get the name of identifier for this declaration as a StringRef.

This is a basic class for representing single OpenMP clause.

This represents clause 'lastprivate' in the 'pragma omp ...' directives.

This represents clause 'reduction' in the 'pragma omp ...' directives.

This represents 'pragma omp requires...' directive.

clauselist_range clauselists()

This represents 'ompx_bare' clause in the 'pragma omp target teams ...' directive.

static ParmVarDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, StorageClass S, Expr *DefArg)

PointerType - C99 6.7.5.1 - Pointer Declarators.

A (possibly-)qualified type.

LangAS getAddressSpace() const

Return the address space of this type.

QualType getNonReferenceType() const

If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...

QualType getCanonicalType() const

A qualifier set is used to build a set of qualifiers.

const Type * strip(QualType type)

Collect any qualifiers on the given type and return an unqualified type.

QualType apply(const ASTContext &Context, QualType QT) const

Apply the collected qualifiers to the given type.

void addAddressSpace(LangAS space)

Represents a struct/union/class.

virtual void completeDefinition()

Note that the definition of this type is now complete.

Scope - A scope is a transient data structure that is used while parsing the program.

Encodes a location in the source.

Stmt - This represents one statement.

void startDefinition()

Starts the definition of this tag declaration.

unsigned getNewAlign() const

Return the largest alignment for which a suitably-sized allocation with 'operator new(size_t)' is gua...

TargetOptions & getTargetOpts() const

Retrieve the target options.

virtual const llvm::omp::GV & getGridValue() const

virtual bool hasFeature(StringRef Feature) const

Determine whether the given target has the given feature.

llvm::StringMap< bool > FeatureMap

The map of which features have been enabled disabled based on the command line.

The base class of the type hierarchy.

CXXRecordDecl * getAsCXXRecordDecl() const

Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...

bool isIntegerType() const

isIntegerType() does not include complex integers (a GCC extension).

bool isReferenceType() const

bool isLValueReferenceType() const

bool hasSignedIntegerRepresentation() const

Determine whether this type has an signed integer representation of some sort, e.g....

bool isVariablyModifiedType() const

Whether this type is a variably-modified type (C99 6.7.5).

Expr * getSubExpr() const

Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...

bool isInitCapture() const

Whether this variable is the implicit variable for a lambda init-capture.

Represents a variable declaration or definition.

VarDecl * getCanonicalDecl() override

Retrieves the "canonical" declaration of the given declaration.

bool isInitCapture() const

Whether this variable is the implicit variable for a lambda init-capture.

specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...

@ Type

The l-value was considered opaque, so the alignment was determined from a type.

@ Decl

The l-value was an access to a declared entity or something equivalently strong, like the address of ...

The JSON file list parser is used to communicate input to InstallAPI.

CanQual< Type > CanQualType

Represents a canonical, potentially-qualified type.

bool isa(CodeGen::Address addr)

@ ICIS_NoInit

No in-class initializer.

bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)

Checks if the specified directive is a distribute directive.

@ LCK_ByRef

Capturing by reference.

@ Private

'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...

bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)

Checks if the specified directive is a parallel-kind directive.

bool isOpenMPPrivate(OpenMPClauseKind Kind)

Checks if the specified clause is one of private clauses like 'private', 'firstprivate',...

OpenMPDistScheduleClauseKind

OpenMP attributes for 'dist_schedule' clause.

bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)

Checks if the specified directive is a target code offload directive.

bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)

Checks if the specified directive is a teams-kind directive.

OffloadArch StringToOffloadArch(llvm::StringRef S)

OpenMPSeverityClauseKind

OpenMP attributes for 'severity' clause.

bool isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind)

Checks if the specified directive kind is one of the composite or combined directives that need loop ...

LangAS

Defines the address space values used by the address space qualifier of QualType.

const char * OffloadArchToString(OffloadArch A)

llvm::omp::Directive OpenMPDirectiveKind

OpenMP directives.

void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)

Return the captured regions of an OpenMP directive.

OpenMPNumThreadsClauseModifier

U cast(CodeGen::Address addr)

LangAS getLangASFromTargetAS(unsigned TargetAS)

@ CXXThis

Parameter for C++ 'this' argument.

@ Other

Other implicit parameter.

OpenMPScheduleClauseKind

OpenMP attributes for 'schedule' clause.

OpenMPDirectiveKind ReductionKind

llvm::PointerType * VoidPtrTy

llvm::IntegerType * SizeTy

llvm::PointerType * VoidPtrPtrTy

llvm::IntegerType * Int32Ty