clang: lib/CodeGen/CGOpenMPRuntimeGPU.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

22#include "llvm/ADT/SmallPtrSet.h"

23#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"

24#include "llvm/Frontend/OpenMP/OMPGridValues.h"

25

26using namespace clang;

27using namespace CodeGen;

28using namespace llvm::omp;

29

30namespace {

31

33 llvm::FunctionCallee EnterCallee = nullptr;

35 llvm::FunctionCallee ExitCallee = nullptr;

38 llvm::BasicBlock *ContBlock = nullptr;

39

40public:

41 NVPTXActionTy(llvm::FunctionCallee EnterCallee,

43 llvm::FunctionCallee ExitCallee,

45 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),

48 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);

49 if (Conditional) {

50 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);

53

54 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);

56 }

57 }

59

62 }

65 }

66};

67

68

69

70

71

72class ExecutionRuntimeModesRAII {

73private:

77

78public:

81 : ExecMode(ExecMode) {

82 SavedExecMode = ExecMode;

83 ExecMode = EntryMode;

84 }

85 ~ExecutionRuntimeModesRAII() { ExecMode = SavedExecMode; }

86};

87

90 if (const auto *ASE = dyn_cast(RefExpr)) {

91 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();

92 while (const auto *TempASE = dyn_cast(Base))

93 Base = TempASE->getBase()->IgnoreParenImpCasts();

94 RefExpr = Base;

95 } else if (auto *OASE = dyn_cast(RefExpr)) {

96 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();

97 while (const auto *TempOASE = dyn_cast(Base))

98 Base = TempOASE->getBase()->IgnoreParenImpCasts();

99 while (const auto *TempASE = dyn_cast(Base))

100 Base = TempASE->getBase()->IgnoreParenImpCasts();

101 RefExpr = Base;

102 }

104 if (const auto *DE = dyn_cast(RefExpr))

105 return cast(DE->getDecl()->getCanonicalDecl());

106 const auto *ME = cast(RefExpr);

107 return cast(ME->getMemberDecl()->getCanonicalDecl());

108}

109

110static RecordDecl *buildRecordForGlobalizedVars(

113 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>

114 &MappedDeclsFields,

115 int BufSize) {

116 using VarsDataTy = std::pair<CharUnits , const ValueDecl *>;

117 if (EscapedDecls.empty() && EscapedDeclsForTeams.empty())

118 return nullptr;

120 for (const ValueDecl *D : EscapedDecls)

121 GlobalizedVars.emplace_back(C.getDeclAlign(D), D);

122 for (const ValueDecl *D : EscapedDeclsForTeams)

123 GlobalizedVars.emplace_back(C.getDeclAlign(D), D);

124

125

126

127

128

129 RecordDecl *GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty");

132 EscapedDeclsForTeams.begin(), EscapedDeclsForTeams.end());

133 for (const auto &Pair : GlobalizedVars) {

134 const ValueDecl *VD = Pair.second;

137 Type = C.getPointerType(Type.getNonReferenceType());

138 else

139 Type = Type.getNonReferenceType();

142 if (SingleEscaped.count(VD)) {

146 nullptr, false,

152 I != E; ++I)

153 Field->addAttr(*I);

154 }

155 } else {

156 if (BufSize > 1) {

157 llvm::APInt ArraySize(32, BufSize);

158 Type = C.getConstantArrayType(Type, ArraySize, nullptr,

159 ArraySizeModifier::Normal, 0);

160 }

164 nullptr, false,

167 llvm::APInt Align(32, Pair.first.getQuantity());

168 Field->addAttr(AlignedAttr::CreateImplicit(

169 C, true,

171 C.getIntTypeForBitwidth(32, 0),

173 {}, AlignedAttr::GNU_aligned));

174 }

175 GlobalizedRD->addDecl(Field);

176 MappedDeclsFields.try_emplace(VD, Field);

177 }

179 return GlobalizedRD;

180}

181

182

183class CheckVarsEscapingDeclContext final

186 llvm::SetVector<const ValueDecl *> EscapedDecls;

187 llvm::SetVector<const ValueDecl *> EscapedVariableLengthDecls;

188 llvm::SetVector<const ValueDecl *> DelayedVariableLengthDecls;

191 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;

192 bool AllEscaped = false;

193 bool IsForCombinedParallelRegion = false;

194

195 void markAsEscaped(const ValueDecl *VD) {

196

197 if (!isa(VD) ||

198 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))

199 return;

201

202 if (VD->hasAttrs() && VD->hasAttr())

203 return;

204

205 bool IsCaptured = false;

206 if (auto *CSI = CGF.CapturedStmtInfo) {

207 if (const FieldDecl *FD = CSI->lookup(cast(VD))) {

208

209

210 IsCaptured = true;

211 if (!IsForCombinedParallelRegion) {

212 if (!FD->hasAttrs())

213 return;

214 const auto *Attr = FD->getAttr();

216 return;

217 if (((Attr->getCaptureKind() != OMPC_map) &&

219 ((Attr->getCaptureKind() == OMPC_map) &&

220 !FD->getType()->isAnyPointerType()))

221 return;

222 }

223 if (!FD->getType()->isReferenceType()) {

225 "Parameter captured by value with variably modified type");

226 EscapedParameters.insert(VD);

227 } else if (!IsForCombinedParallelRegion) {

228 return;

229 }

230 }

231 }

232 if ((!CGF.CapturedStmtInfo ||

233 (IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) &&

235

236 return;

238

239

240 if (IsCaptured)

241 EscapedVariableLengthDecls.insert(VD);

242 else

243 DelayedVariableLengthDecls.insert(VD);

244 } else

245 EscapedDecls.insert(VD);

246 }

247

248 void VisitValueDecl(const ValueDecl *VD) {

250 markAsEscaped(VD);

251 if (const auto *VarD = dyn_cast(VD)) {

252 if (!isa(VarD) && VarD->hasInit()) {

253 const bool SavedAllEscaped = AllEscaped;

255 Visit(VarD->getInit());

256 AllEscaped = SavedAllEscaped;

257 }

258 }

259 }

260 void VisitOpenMPCapturedStmt(const CapturedStmt *S,

262 bool IsCombinedParallelRegion) {

263 if (!S)

264 return;

266 if (C.capturesVariable() && C.capturesVariableByCopy()) {

267 const ValueDecl *VD = C.getCapturedVar();

268 bool SavedIsForCombinedParallelRegion = IsForCombinedParallelRegion;

269 if (IsCombinedParallelRegion) {

270

271

272

273 IsForCombinedParallelRegion = false;

276 C->getClauseKind() == OMPC_reduction ||

277 C->getClauseKind() == OMPC_linear ||

278 C->getClauseKind() == OMPC_private)

279 continue;

281 if (const auto *PC = dyn_cast(C))

282 Vars = PC->getVarRefs();

283 else if (const auto *PC = dyn_cast(C))

284 Vars = PC->getVarRefs();

285 else

286 llvm_unreachable("Unexpected clause.");

287 for (const auto *E : Vars) {

291 IsForCombinedParallelRegion = true;

292 break;

293 }

294 }

295 if (IsForCombinedParallelRegion)

296 break;

297 }

298 }

299 markAsEscaped(VD);

300 if (isa(VD))

301 VisitValueDecl(VD);

302 IsForCombinedParallelRegion = SavedIsForCombinedParallelRegion;

303 }

304 }

305 }

306

307 void buildRecordForGlobalizedVars(bool IsInTTDRegion) {

308 assert(!GlobalizedRD &&

309 "Record for globalized variables is built already.");

311 unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;

312 if (IsInTTDRegion)

313 EscapedDeclsForTeams = EscapedDecls.getArrayRef();

314 else

315 EscapedDeclsForParallel = EscapedDecls.getArrayRef();

316 GlobalizedRD = ::buildRecordForGlobalizedVars(

317 CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,

318 MappedDeclsFields, WarpSize);

319 }

320

321public:

324 : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) {

325 }

326 virtual ~CheckVarsEscapingDeclContext() = default;

327 void VisitDeclStmt(const DeclStmt *S) {

328 if (!S)

329 return;

330 for (const Decl *D : S->decls())

331 if (const auto *VD = dyn_cast_or_null(D))

332 VisitValueDecl(VD);

333 }

335 if (D)

336 return;

337 if (D->hasAssociatedStmt())

338 return;

339 if (const auto *S =

340 dyn_cast_or_null(D->getAssociatedStmt())) {

341

342

345 if (CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown) {

346 VisitStmt(S->getCapturedStmt());

347 return;

348 }

349 VisitOpenMPCapturedStmt(

350 S, D->clauses(),

351 CaptureRegions.back() == OMPD_parallel &&

353 }

354 }

355 void VisitCapturedStmt(const CapturedStmt *S) {

356 if (!S)

357 return;

359 if (C.capturesVariable() && C.capturesVariableByCopy()) {

360 const ValueDecl *VD = C.getCapturedVar();

361 markAsEscaped(VD);

362 if (isa(VD))

363 VisitValueDecl(VD);

364 }

365 }

366 }

367 void VisitLambdaExpr(const LambdaExpr *E) {

368 if (E)

369 return;

371 if (C.capturesVariable()) {

372 if (C.getCaptureKind() == LCK_ByRef) {

373 const ValueDecl *VD = C.getCapturedVar();

374 markAsEscaped(VD);

375 if (E->isInitCapture(&C) || isa(VD))

376 VisitValueDecl(VD);

377 }

378 }

379 }

380 }

381 void VisitBlockExpr(const BlockExpr *E) {

382 if (E)

383 return;

385 if (C.isByRef()) {

386 const VarDecl *VD = C.getVariable();

387 markAsEscaped(VD);

388 if (isa(VD) || VD->isInitCapture())

389 VisitValueDecl(VD);

390 }

391 }

392 }

393 void VisitCallExpr(const CallExpr *E) {

394 if (E)

395 return;

396 for (const Expr *Arg : E->arguments()) {

397 if (!Arg)

398 continue;

399 if (Arg->isLValue()) {

400 const bool SavedAllEscaped = AllEscaped;

401 AllEscaped = true;

403 AllEscaped = SavedAllEscaped;

404 } else {

406 }

407 }

408 Visit(E->getCallee());

409 }

410 void VisitDeclRefExpr(const DeclRefExpr *E) {

411 if (E)

412 return;

414 if (AllEscaped)

415 markAsEscaped(VD);

416 if (isa(VD))

417 VisitValueDecl(VD);

419 VisitValueDecl(VD);

420 }

422 if (E)

423 return;

424 if (E->getOpcode() == UO_AddrOf) {

425 const bool SavedAllEscaped = AllEscaped;

426 AllEscaped = true;

427 Visit(E->getSubExpr());

428 AllEscaped = SavedAllEscaped;

429 } else {

430 Visit(E->getSubExpr());

431 }

432 }

434 if (E)

435 return;

436 if (E->getCastKind() == CK_ArrayToPointerDecay) {

437 const bool SavedAllEscaped = AllEscaped;

438 AllEscaped = true;

439 Visit(E->getSubExpr());

440 AllEscaped = SavedAllEscaped;

441 } else {

442 Visit(E->getSubExpr());

443 }

444 }

445 void VisitExpr(const Expr *E) {

446 if (E)

447 return;

448 bool SavedAllEscaped = AllEscaped;

450 AllEscaped = false;

452 if (Child)

454 AllEscaped = SavedAllEscaped;

455 }

456 void VisitStmt(const Stmt *S) {

457 if (!S)

458 return;

459 for (const Stmt *Child : S->children())

460 if (Child)

462 }

463

464

465

466 const RecordDecl *getGlobalizedRecord(bool IsInTTDRegion) {

467 if (!GlobalizedRD)

468 buildRecordForGlobalizedVars(IsInTTDRegion);

469 return GlobalizedRD;

470 }

471

472

473 const FieldDecl *getFieldForGlobalizedVar(const ValueDecl *VD) const {

474 assert(GlobalizedRD &&

475 "Record for globalized variables must be generated already.");

476 return MappedDeclsFields.lookup(VD);

477 }

478

479

481 return EscapedDecls.getArrayRef();

482 }

483

484

485

486 const llvm::SmallPtrSetImpl<const Decl *> &getEscapedParameters() const {

487 return EscapedParameters;

488 }

489

490

491

493 return EscapedVariableLengthDecls.getArrayRef();

494 }

495

496

497

499 return DelayedVariableLengthDecls.getArrayRef();

500 }

501};

502}

503

505CGOpenMPRuntimeGPU::getExecutionMode() const {

506 return CurrentExecutionMode;

507}

508

510CGOpenMPRuntimeGPU::getDataSharingMode() const {

511 return CurrentDataSharingMode;

512}

513

514

517 const auto *CS = D.getInnermostCapturedStmt();

518 const auto *Body =

519 CS->getCapturedStmt()->IgnoreContainers(true);

521

522 if (const auto *NestedDir =

523 dyn_cast_or_null(ChildStmt)) {

525 switch (D.getDirectiveKind()) {

526 case OMPD_target:

528 return true;

529 if (DKind == OMPD_teams) {

530 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(

531 true);

532 if (!Body)

533 return false;

535 if (const auto *NND =

536 dyn_cast_or_null(ChildStmt)) {

537 DKind = NND->getDirectiveKind();

539 return true;

540 }

541 }

542 return false;

543 case OMPD_target_teams:

545 case OMPD_target_simd:

546 case OMPD_target_parallel:

547 case OMPD_target_parallel_for:

548 case OMPD_target_parallel_for_simd:

549 case OMPD_target_teams_distribute:

550 case OMPD_target_teams_distribute_simd:

551 case OMPD_target_teams_distribute_parallel_for:

552 case OMPD_target_teams_distribute_parallel_for_simd:

553 case OMPD_parallel:

554 case OMPD_for:

555 case OMPD_parallel_for:

556 case OMPD_parallel_master:

557 case OMPD_parallel_sections:

558 case OMPD_for_simd:

559 case OMPD_parallel_for_simd:

560 case OMPD_cancel:

561 case OMPD_cancellation_point:

562 case OMPD_ordered:

563 case OMPD_threadprivate:

564 case OMPD_allocate:

565 case OMPD_task:

566 case OMPD_simd:

567 case OMPD_sections:

568 case OMPD_section:

569 case OMPD_single:

570 case OMPD_master:

571 case OMPD_critical:

572 case OMPD_taskyield:

573 case OMPD_barrier:

574 case OMPD_taskwait:

575 case OMPD_taskgroup:

576 case OMPD_atomic:

577 case OMPD_flush:

578 case OMPD_depobj:

579 case OMPD_scan:

580 case OMPD_teams:

581 case OMPD_target_data:

582 case OMPD_target_exit_data:

583 case OMPD_target_enter_data:

584 case OMPD_distribute:

585 case OMPD_distribute_simd:

586 case OMPD_distribute_parallel_for:

587 case OMPD_distribute_parallel_for_simd:

588 case OMPD_teams_distribute:

589 case OMPD_teams_distribute_simd:

590 case OMPD_teams_distribute_parallel_for:

591 case OMPD_teams_distribute_parallel_for_simd:

592 case OMPD_target_update:

593 case OMPD_declare_simd:

594 case OMPD_declare_variant:

595 case OMPD_begin_declare_variant:

596 case OMPD_end_declare_variant:

597 case OMPD_declare_target:

598 case OMPD_end_declare_target:

599 case OMPD_declare_reduction:

600 case OMPD_declare_mapper:

601 case OMPD_taskloop:

602 case OMPD_taskloop_simd:

603 case OMPD_master_taskloop:

604 case OMPD_master_taskloop_simd:

605 case OMPD_parallel_master_taskloop:

606 case OMPD_parallel_master_taskloop_simd:

607 case OMPD_requires:

608 case OMPD_unknown:

609 default:

610 llvm_unreachable("Unexpected directive.");

611 }

612 }

613

614 return false;

615}

616

620 switch (DirectiveKind) {

621 case OMPD_target:

622 case OMPD_target_teams:

624 case OMPD_target_parallel_loop:

625 case OMPD_target_parallel:

626 case OMPD_target_parallel_for:

627 case OMPD_target_parallel_for_simd:

628 case OMPD_target_teams_distribute_parallel_for:

629 case OMPD_target_teams_distribute_parallel_for_simd:

630 case OMPD_target_simd:

631 case OMPD_target_teams_distribute_simd:

632 return true;

633 case OMPD_target_teams_distribute:

634 return false;

635 case OMPD_target_teams_loop:

636

637

638 if (auto *TTLD = dyn_cast(&D))

639 return TTLD->canBeParallelFor();

640 return false;

641 case OMPD_parallel:

642 case OMPD_for:

643 case OMPD_parallel_for:

644 case OMPD_parallel_master:

645 case OMPD_parallel_sections:

646 case OMPD_for_simd:

647 case OMPD_parallel_for_simd:

648 case OMPD_cancel:

649 case OMPD_cancellation_point:

650 case OMPD_ordered:

651 case OMPD_threadprivate:

652 case OMPD_allocate:

653 case OMPD_task:

654 case OMPD_simd:

655 case OMPD_sections:

656 case OMPD_section:

657 case OMPD_single:

658 case OMPD_master:

659 case OMPD_critical:

660 case OMPD_taskyield:

661 case OMPD_barrier:

662 case OMPD_taskwait:

663 case OMPD_taskgroup:

664 case OMPD_atomic:

665 case OMPD_flush:

666 case OMPD_depobj:

667 case OMPD_scan:

668 case OMPD_teams:

669 case OMPD_target_data:

670 case OMPD_target_exit_data:

671 case OMPD_target_enter_data:

672 case OMPD_distribute:

673 case OMPD_distribute_simd:

674 case OMPD_distribute_parallel_for:

675 case OMPD_distribute_parallel_for_simd:

676 case OMPD_teams_distribute:

677 case OMPD_teams_distribute_simd:

678 case OMPD_teams_distribute_parallel_for:

679 case OMPD_teams_distribute_parallel_for_simd:

680 case OMPD_target_update:

681 case OMPD_declare_simd:

682 case OMPD_declare_variant:

683 case OMPD_begin_declare_variant:

684 case OMPD_end_declare_variant:

685 case OMPD_declare_target:

686 case OMPD_end_declare_target:

687 case OMPD_declare_reduction:

688 case OMPD_declare_mapper:

689 case OMPD_taskloop:

690 case OMPD_taskloop_simd:

691 case OMPD_master_taskloop:

692 case OMPD_master_taskloop_simd:

693 case OMPD_parallel_master_taskloop:

694 case OMPD_parallel_master_taskloop_simd:

695 case OMPD_requires:

696 case OMPD_unknown:

697 default:

698 break;

699 }

700 llvm_unreachable(

701 "Unknown programming model for OpenMP directive on NVPTX target.");

702}

703

705 StringRef ParentName,

706 llvm::Function *&OutlinedFn,

707 llvm::Constant *&OutlinedFnID,

708 bool IsOffloadEntry,

710 ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_NonSPMD);

711 EntryFunctionState EST;

712 WrapperFunctionsMap.clear();

713

714 [[maybe_unused]] bool IsBareKernel = D.getSingleClause<OMPXBareClause>();

715 assert(!IsBareKernel && "bare kernel should not be at generic mode");

716

717

719 CGOpenMPRuntimeGPU::EntryFunctionState &EST;

721

722 public:

723 NVPTXPrePostActionTy(CGOpenMPRuntimeGPU::EntryFunctionState &EST,

725 : EST(EST), D(D) {}

728 RT.emitKernelInit(D, CGF, EST, false);

729

730 RT.setLocThreadIdInsertPt(CGF, true);

731 }

735 RT.emitKernelDeinit(CGF, EST, false);

736 }

737 } Action(EST, D);

739 IsInTTDRegion = true;

741 IsOffloadEntry, CodeGen);

742 IsInTTDRegion = false;

743}

744

747 EntryFunctionState &EST, bool IsSPMD) {

748 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs Attrs;

749 Attrs.ExecFlags =

750 IsSPMD ? llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD

751 : llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC;

753

755 Bld.restoreIP(OMPBuilder.createTargetInit(Bld, Attrs));

756 if (!IsSPMD)

757 emitGenericVarsProlog(CGF, EST.Loc);

758}

759

760void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF,

761 EntryFunctionState &EST,

762 bool IsSPMD) {

763 if (!IsSPMD)

764 emitGenericVarsEpilog(CGF);

765

766

768 RecordDecl *StaticRD = C.buildImplicitRecord(

769 "_openmp_teams_reduction_type_$_", RecordDecl::TagKind::Union);

771 for (const RecordDecl *TeamReductionRec : TeamsReductions) {

772 QualType RecTy = C.getRecordType(TeamReductionRec);

776 nullptr, false,

779 StaticRD->addDecl(Field);

780 }

782 QualType StaticTy = C.getRecordType(StaticRD);

783 llvm::Type *LLVMReductionsBufferTy =

785 const auto &DL = CGM.getModule().getDataLayout();

787 TeamsReductions.empty()

788 ? 0

789 : DL.getTypeAllocSize(LLVMReductionsBufferTy).getFixedValue();

791 OMPBuilder.createTargetDeinit(Bld, ReductionDataSize,

792 C.getLangOpts().OpenMPCUDAReductionBufNum);

793 TeamsReductions.clear();

794}

795

797 StringRef ParentName,

798 llvm::Function *&OutlinedFn,

799 llvm::Constant *&OutlinedFnID,

800 bool IsOffloadEntry,

802 ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_SPMD);

803 EntryFunctionState EST;

804

806

807

810 CGOpenMPRuntimeGPU::EntryFunctionState &EST;

811 bool IsBareKernel;

812 DataSharingMode Mode;

814

815 public:

817 CGOpenMPRuntimeGPU::EntryFunctionState &EST,

819 : RT(RT), EST(EST), IsBareKernel(IsBareKernel),

820 Mode(RT.CurrentDataSharingMode), D(D) {}

822 if (IsBareKernel) {

823 RT.CurrentDataSharingMode = DataSharingMode::DS_CUDA;

824 return;

825 }

826 RT.emitKernelInit(D, CGF, EST, true);

827

828 RT.setLocThreadIdInsertPt(CGF, true);

829 }

831 if (IsBareKernel) {

832 RT.CurrentDataSharingMode = Mode;

833 return;

834 }

835 RT.clearLocThreadIdInsertPt(CGF);

836 RT.emitKernelDeinit(CGF, EST, true);

837 }

838 } Action(*this, EST, IsBareKernel, D);

840 IsInTTDRegion = true;

842 IsOffloadEntry, CodeGen);

843 IsInTTDRegion = false;

844}

845

846void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(

848 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,

850 if (!IsOffloadEntry)

851 return;

852

853 assert(!ParentName.empty() && "Invalid target region parent name!");

854

857 if (Mode || IsBareKernel)

858 emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,

859 CodeGen);

860 else

861 emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,

862 CodeGen);

863}

864

867 llvm::OpenMPIRBuilderConfig Config(

870 false, false,

873

875 llvm_unreachable("OpenMP can only handle device code.");

876

879

882 return;

883

885 "__omp_rtl_debug_kind");

887 "__omp_rtl_assume_teams_oversubscription");

889 "__omp_rtl_assume_threads_oversubscription");

891 "__omp_rtl_assume_no_thread_state");

893 "__omp_rtl_assume_no_nested_parallelism");

894}

895

897 ProcBindKind ProcBind,

899

900}

901

903 llvm::Value *NumThreads,

905

906}

907

909 const Expr *NumTeams,

910 const Expr *ThreadLimit,

912

917

918 bool PrevIsInTTDRegion = IsInTTDRegion;

919 IsInTTDRegion = false;

920 auto *OutlinedFun =

922 CGF, D, ThreadIDVar, InnermostKind, CodeGen));

923 IsInTTDRegion = PrevIsInTTDRegion;

925 llvm::Function *WrapperFun =

926 createParallelDataSharingWrapper(OutlinedFun, D);

927 WrapperFunctionsMap[OutlinedFun] = WrapperFun;

928 }

929

930 return OutlinedFun;

931}

932

933

934

935static void

939 "expected teams directive.");

943 Ctx,

944 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(

945 true))) {

946 Dir = dyn_cast_or_null(S);

948 Dir = nullptr;

949 }

950 }

951 if (!Dir)

952 return;

954 for (const Expr *E : C->getVarRefs())

956 }

957}

958

959

960static void

964 "expected teams directive.");

966 for (const Expr *E : C->privates())

968 }

969}

970

976

977 const RecordDecl *GlobalizedRD = nullptr;

979 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;

981

986 if (!LastPrivatesReductions.empty()) {

987 GlobalizedRD = ::buildRecordForGlobalizedVars(

988 CGM.getContext(), {}, LastPrivatesReductions, MappedDeclsFields,

989 WarpSize);

990 }

991 } else if (!LastPrivatesReductions.empty()) {

992 assert(!TeamAndReductions.first &&

993 "Previous team declaration is not expected.");

994 TeamAndReductions.first = D.getCapturedStmt(OMPD_teams)->getCapturedDecl();

995 std::swap(TeamAndReductions.second, LastPrivatesReductions);

996 }

997

998

1002 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>

1003 &MappedDeclsFields;

1004

1005 public:

1006 NVPTXPrePostActionTy(

1008 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>

1009 &MappedDeclsFields)

1010 : Loc(Loc), GlobalizedRD(GlobalizedRD),

1011 MappedDeclsFields(MappedDeclsFields) {}

1013 auto &Rt =

1015 if (GlobalizedRD) {

1016 auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;

1017 I->getSecond().MappedParams =

1018 std::make_uniqueCodeGenFunction::OMPMapVars();

1019 DeclToAddrMapTy &Data = I->getSecond().LocalVarData;

1020 for (const auto &Pair : MappedDeclsFields) {

1021 assert(Pair.getFirst()->isCanonicalDecl() &&

1022 "Expected canonical declaration");

1023 Data.insert(std::make_pair(Pair.getFirst(), MappedVarData()));

1024 }

1025 }

1026 Rt.emitGenericVarsProlog(CGF, Loc);

1027 }

1030 .emitGenericVarsEpilog(CGF);

1031 }

1032 } Action(Loc, GlobalizedRD, MappedDeclsFields);

1035 CGF, D, ThreadIDVar, InnermostKind, CodeGen);

1036

1037 return OutlinedFun;

1038}

1039

1040void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF,

1043 return;

1044

1046

1047 const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);

1048 if (I == FunctionGlobalizedDecls.end())

1049 return;

1050

1051 for (auto &Rec : I->getSecond().LocalVarData) {

1052 const auto *VD = cast(Rec.first);

1053 bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);

1055

1056

1057 llvm::Value *ParValue;

1058 if (EscapedParam) {

1062 }

1063

1064

1066 llvm::CallBase *VoidPtr =

1068 CGM.getModule(), OMPRTL___kmpc_alloc_shared),

1069 AllocArgs, VD->getName());

1070

1071 VoidPtr->addRetAttr(llvm::Attribute::get(

1074

1075

1077 VoidPtr, Bld.getPtrTy(0), VD->getName() + "_on_stack");

1080 Rec.second.PrivateAddr = VarAddr.getAddress();

1081 Rec.second.GlobalizedVal = VoidPtr;

1082

1083

1084 if (EscapedParam) {

1086 I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress());

1087 }

1089 VoidPtr->setDebugLoc(DI->SourceLocToDebugLoc(VD->getLocation()));

1090 }

1091

1092 for (const auto *ValueD : I->getSecond().EscapedVariableLengthDecls) {

1093 const auto *VD = cast(ValueD);

1094 std::pair<llvm::Value *, llvm::Value *> AddrSizePair =

1096 I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(AddrSizePair);

1100 I->getSecond().MappedParams->setVarAddr(CGF, VD, Base.getAddress());

1101 }

1102 I->getSecond().MappedParams->apply(CGF);

1103}

1104

1106 const VarDecl *VD) const {

1107 const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);

1108 if (I == FunctionGlobalizedDecls.end())

1109 return false;

1110

1111

1112 return llvm::is_contained(I->getSecond().DelayedVariableLengthDecls, VD);

1113}

1114

1115std::pair<llvm::Value *, llvm::Value *>

1119

1120

1123 Size = Bld.CreateNUWAdd(

1124 Size, llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity() - 1));

1125 llvm::Value *AlignVal =

1127 Size = Bld.CreateUDiv(Size, AlignVal);

1128 Size = Bld.CreateNUWMul(Size, AlignVal);

1129

1130

1131 llvm::Value *AllocArgs[] = {Size};

1132 llvm::CallBase *VoidPtr =

1134 CGM.getModule(), OMPRTL___kmpc_alloc_shared),

1135 AllocArgs, VD->getName());

1136 VoidPtr->addRetAttr(llvm::Attribute::get(

1138

1139 return std::make_pair(VoidPtr, Size);

1140}

1141

1144 const std::pair<llvm::Value *, llvm::Value *> &AddrSizePair) {

1145

1147 CGM.getModule(), OMPRTL___kmpc_free_shared),

1148 {AddrSizePair.first, AddrSizePair.second});

1149}

1150

1151void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF) {

1153 return;

1154

1155 const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);

1156 if (I != FunctionGlobalizedDecls.end()) {

1157

1158

1159 for (const auto &AddrSizePair :

1160 llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {

1162 CGM.getModule(), OMPRTL___kmpc_free_shared),

1163 {AddrSizePair.first, AddrSizePair.second});

1164 }

1165

1166 for (auto &Rec : llvm::reverse(I->getSecond().LocalVarData)) {

1167 const auto *VD = cast(Rec.first);

1168 I->getSecond().MappedParams->restore(CGF);

1169

1170 llvm::Value *FreeArgs[] = {Rec.second.GlobalizedVal,

1173 CGM.getModule(), OMPRTL___kmpc_free_shared),

1174 FreeArgs);

1175 }

1176 }

1177}

1178

1182 llvm::Function *OutlinedFn,

1185 return;

1186

1187 bool IsBareKernel = D.getSingleClause<OMPXBareClause>();

1188

1190 ".zero.addr");

1193

1194

1195 if (IsBareKernel)

1196 OutlinedFnArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));

1197 else

1199 OutlinedFnArgs.push_back(ZeroAddr.getPointer());

1200 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());

1202}

1203

1206 llvm::Function *OutlinedFn,

1208 const Expr *IfCond,

1209 llvm::Value *NumThreads) {

1211 return;

1212

1213 auto &&ParallelGen = [this, Loc, OutlinedFn, CapturedVars, IfCond,

1217 llvm::Value *NumThreadsVal = NumThreads;

1218 llvm::Function *WFn = WrapperFunctionsMap[OutlinedFn];

1219 llvm::Value *ID = llvm::ConstantPointerNull::get(CGM.Int8PtrTy);

1220 if (WFn)

1221 ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy);

1222 llvm::Value *FnPtr = Bld.CreateBitOrPointerCast(OutlinedFn, CGM.Int8PtrTy);

1223

1224

1225

1226

1228

1230 llvm::ArrayType::get(CGM.VoidPtrTy, CapturedVars.size()),

1231 "captured_vars_addrs");

1232

1233 if (!CapturedVars.empty()) {

1234

1236 unsigned Idx = 0;

1237 for (llvm::Value *V : CapturedVars) {

1239 llvm::Value *PtrV;

1240 if (V->getType()->isIntegerTy())

1241 PtrV = Bld.CreateIntToPtr(V, CGF.VoidPtrTy);

1242 else

1246 ++Idx;

1247 }

1248 }

1249

1250 llvm::Value *IfCondVal = nullptr;

1251 if (IfCond)

1253 false);

1254 else

1255 IfCondVal = llvm::ConstantInt::get(CGF.Int32Ty, 1);

1256

1257 if (!NumThreadsVal)

1258 NumThreadsVal = llvm::ConstantInt::get(CGF.Int32Ty, -1);

1259 else

1260 NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.Int32Ty),

1261

1262 assert(IfCondVal && "Expected a value");

1264 llvm::Value *Args[] = {

1265 RTLoc,

1267 IfCondVal,

1268 NumThreadsVal,

1269 llvm::ConstantInt::get(CGF.Int32Ty, -1),

1270 FnPtr,

1271 ID,

1272 Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF),

1274 llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())};

1276 CGM.getModule(), OMPRTL___kmpc_parallel_51),

1277 Args);

1278 };

1279

1281 RCG(CGF);

1282}

1283

1284void CGOpenMPRuntimeGPU::syncCTAThreads(CodeGenFunction &CGF) {

1285

1287 return;

1288

1289

1290 llvm::Value *Args[] = {

1291 llvm::ConstantPointerNull::get(

1293 llvm::ConstantInt::get(CGF.Int32Ty, 0, true)};

1295 CGM.getModule(), OMPRTL___kmpc_barrier_simple_spmd),

1296 Args);

1297}

1298

1302 bool) {

1303

1305 return;

1306

1310

1313 Args);

1314}

1315

1319 const Expr *Hint) {

1320 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.critical.loop");

1321 llvm::BasicBlock *TestBB = CGF.createBasicBlock("omp.critical.test");

1322 llvm::BasicBlock *SyncBB = CGF.createBasicBlock("omp.critical.sync");

1323 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body");

1324 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit");

1325

1327

1328

1330 CGM.getModule(), OMPRTL___kmpc_warp_active_thread_mask));

1331

1332 llvm::Value *ThreadID = RT.getGPUThreadID(CGF);

1333

1334

1335 llvm::Value *TeamWidth = RT.getGPUNumThreads(CGF);

1336

1337

1343 true);

1344

1345

1348 llvm::Value *CmpLoopBound = CGF.Builder.CreateICmpSLT(CounterVal, TeamWidth);

1349 CGF.Builder.CreateCondBr(CmpLoopBound, TestBB, ExitBB);

1350

1351

1352

1355 llvm::Value *CmpThreadToCounter =

1356 CGF.Builder.CreateICmpEQ(ThreadID, CounterVal);

1357 CGF.Builder.CreateCondBr(CmpThreadToCounter, BodyBB, SyncBB);

1358

1359

1361

1362

1364 Hint);

1365

1366

1367

1368

1369

1371

1374 Mask);

1375

1376 llvm::Value *IncCounterVal =

1377 CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1));

1380

1381

1382 CGF.EmitBlock(ExitBB, true);

1383}

1384

1385

1390 "Cast type must sized.");

1392 "Val type must sized.");

1394 if (ValTy == CastTy)

1395 return Val;

1398 return CGF.Builder.CreateBitCast(Val, LLVMCastTy);

1400 return CGF.Builder.CreateIntCast(Val, LLVMCastTy,

1404 CGF.EmitStoreOfScalar(Val, ValCastItem, false, ValTy,

1410}

1411

1412

1413

1414

1415

1416

1417

1418

1419

1420

1421

1422

1423

1424

1425

1426

1427

1428

1429

1430

1431

1432

1433

1434

1435

1436

1437

1438

1439

1440

1441

1442

1443

1444

1445

1446

1447

1448

1449

1450

1451

1452

1453

1454

1455

1456

1457

1458

1459

1460

1461

1462

1463

1464

1465

1466

1467

1468

1469

1470

1471

1472

1473

1474

1475

1476

1477

1478

1479

1480

1481

1482

1483

1484

1485

1486

1487

1488

1489

1490

1491

1492

1493

1494

1495

1496

1497

1498

1499

1500

1501

1502

1503

1504

1505

1506

1507

1508

1509

1510

1511

1512

1513

1514

1515

1516

1517

1518

1519

1520

1521

1522

1523

1524

1525

1526

1527

1528

1529

1530

1531

1532

1533

1534

1535

1536

1537

1538

1539

1540

1541

1542

1543

1544

1545

1546

1547

1548

1549

1550

1551

1552

1553

1554

1555

1556

1557

1558

1559

1560

1561

1562

1563

1564

1565

1566

1567

1568

1569

1570

1571

1572

1573

1574

1575

1576

1577

1578

1579

1580

1581

1582

1583

1584

1585

1586

1587

1588

1589

1590

1591

1592

1593

1594

1595

1596

1597

1598

1599

1600

1601

1602

1603

1604

1605

1606

1607

1608

1609

1610

1611

1612

1613

1614

1615

1616

1617

1618

1619

1620

1621

1622

1623

1624

1625

1626

1627

1628

1629

1630

1631

1632

1633

1634

1635

1636

1637

1638

1639

1640

1641

1642

1643

1644

1645

1646

1647

1648

1649

1650

1651

1652

1653

1659 return;

1660

1664

1666

1667 if (Options.SimpleReduction) {

1668 assert(!TeamsReduction && !ParallelReduction &&

1669 "Invalid reduction selection in emitReduction.");

1670 (void)ParallelReduction;

1672 ReductionOps, Options);

1673 return;

1674 }

1675

1676 llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> VarFieldMap;

1678 int Cnt = 0;

1679 for (const Expr *DRE : Privates) {

1680 PrivatesReductions[Cnt] = cast(DRE)->getDecl();

1681 ++Cnt;

1682 }

1683 const RecordDecl *ReductionRec = ::buildRecordForGlobalizedVars(

1684 CGM.getContext(), PrivatesReductions, {}, VarFieldMap, 1);

1685

1686 if (TeamsReduction)

1687 TeamsReductions.push_back(ReductionRec);

1688

1689

1691

1692 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;

1693 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),

1695 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),

1696 CGF.Builder.GetInsertPoint());

1697 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(

1700

1702 unsigned Idx = 0;

1704 llvm::Type *ElementType;

1706 llvm::Value *PrivateVariable;

1707 llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy AtomicReductionGen = nullptr;

1709 const auto *RHSVar =

1710 cast(cast(RHSExprs[Idx])->getDecl());

1712 const auto *LHSVar =

1713 cast(cast(LHSExprs[Idx])->getDecl());

1715 llvm::OpenMPIRBuilder::EvalKind EvalKind;

1718 EvalKind = llvm::OpenMPIRBuilder::EvalKind::Scalar;

1719 break;

1721 EvalKind = llvm::OpenMPIRBuilder::EvalKind::Complex;

1722 break;

1724 EvalKind = llvm::OpenMPIRBuilder::EvalKind::Aggregate;

1725 break;

1726 }

1727 auto ReductionGen = [&](InsertPointTy CodeGenIP, unsigned I,

1728 llvm::Value **LHSPtr, llvm::Value **RHSPtr,

1729 llvm::Function *NewFunc) {

1730 CGF.Builder.restoreIP(CodeGenIP);

1731 auto *CurFn = CGF.CurFn;

1732 CGF.CurFn = NewFunc;

1733

1735 cast(cast(LHSExprs[I])->getDecl()))

1736 .emitRawPointer(CGF);

1738 cast(cast(RHSExprs[I])->getDecl()))

1739 .emitRawPointer(CGF);

1740

1742 cast(LHSExprs[I]),

1743 cast(RHSExprs[I]));

1744

1745 CGF.CurFn = CurFn;

1746

1747 return InsertPointTy(CGF.Builder.GetInsertBlock(),

1748 CGF.Builder.GetInsertPoint());

1749 };

1750 ReductionInfos.emplace_back(llvm::OpenMPIRBuilder::ReductionInfo(

1751 ElementType, Variable, PrivateVariable, EvalKind,

1752 nullptr, ReductionGen, AtomicReductionGen));

1753 Idx++;

1754 }

1755

1756 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =

1757 cantFail(OMPBuilder.createReductionsGPU(

1758 OmpLoc, AllocaIP, CodeGenIP, ReductionInfos, false, TeamsReduction,

1759 DistributeReduction, llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang,

1761 C.getLangOpts().OpenMPCUDAReductionBufNum, RTLoc));

1762 CGF.Builder.restoreIP(AfterIP);

1763 return;

1764}

1765

1768 const VarDecl *NativeParam) const {

1770 return NativeParam;

1773 const Type *NonQualTy = QC.strip(ArgType);

1775 if (const auto *Attr = FD->getAttr()) {

1776 if (Attr->getCaptureKind() == OMPC_map) {

1779 }

1780 }

1783 enum { NVPTX_local_addr = 5 };

1786 if (isa(NativeParam))

1795 nullptr, SC_None, nullptr);

1796}

1797

1800 const VarDecl *NativeParam,

1801 const VarDecl *TargetParam) const {

1802 assert(NativeParam != TargetParam &&

1804 "Native arg must not be the same as target arg.");

1808 const Type *NonQualTy = QC.strip(NativeParamType);

1810 unsigned NativePointeeAddrSpace =

1813 llvm::Value *TargetAddr = CGF.EmitLoadOfScalar(LocalAddr, false,

1815

1817 TargetAddr,

1818 llvm::PointerType::get(CGF.getLLVMContext(), NativePointeeAddrSpace));

1820 CGF.EmitStoreOfScalar(TargetAddr, NativeParamAddr, false,

1821 NativeParamType);

1822 return NativeParamAddr;

1823}

1824

1829 TargetArgs.reserve(Args.size());

1830 auto *FnType = OutlinedFn.getFunctionType();

1831 for (unsigned I = 0, E = Args.size(); I < E; ++I) {

1832 if (FnType->isVarArg() && FnType->getNumParams() <= I) {

1833 TargetArgs.append(std::next(Args.begin(), I), Args.end());

1834 break;

1835 }

1836 llvm::Type *TargetType = FnType->getParamType(I);

1837 llvm::Value *NativeArg = Args[I];

1838 if (!TargetType->isPointerTy()) {

1839 TargetArgs.emplace_back(NativeArg);

1840 continue;

1841 }

1842 TargetArgs.emplace_back(

1844 }

1846}

1847

1848

1849

1850

1851

1852llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(

1855 const auto &CS = *D.getCapturedStmt(OMPD_parallel);

1856

1857

1864 nullptr, Int16QTy,

1867 nullptr, Int32QTy,

1869 WrapperArgs.emplace_back(&ParallelLevelArg);

1870 WrapperArgs.emplace_back(&WrapperArg);

1871

1874

1875 auto *Fn = llvm::Function::Create(

1877 Twine(OutlinedParallelFn->getName(), "_wrapper"), &CGM.getModule());

1878

1879

1880

1881

1882

1883

1884

1885 Fn->addFnAttr(llvm::Attribute::NoInline);

1886

1888 Fn->setLinkage(llvm::GlobalValue::InternalLinkage);

1889 Fn->setDoesNotRecurse();

1890

1894

1895 const auto *RD = CS.getCapturedRecordDecl();

1896 auto CurField = RD->field_begin();

1897

1899 ".zero.addr");

1901

1903

1906

1908 auto CI = CS.capture_begin();

1909

1910

1911

1914 llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer();

1915 llvm::Value *DataSharingArgs[] = {GlobalArgsPtr};

1917 CGM.getModule(), OMPRTL___kmpc_get_shared_variables),

1918 DataSharingArgs);

1919

1920

1921

1923 if (CS.capture_size() > 0 ||

1929 }

1930 unsigned Idx = 0;

1934 Src, Bld.getPtrTy(0), CGF.SizeTy);

1936 TypedAddress,

1937 false,

1939 cast(D).getLowerBoundVariable()->getExprLoc());

1940 Args.emplace_back(LB);

1941 ++Idx;

1946 TypedAddress,

1947 false,

1949 cast(D).getUpperBoundVariable()->getExprLoc());

1950 Args.emplace_back(UB);

1951 ++Idx;

1952 }

1953 if (CS.capture_size() > 0) {

1955 for (unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) {

1956 QualType ElemTy = CurField->getType();

1962 false,

1964 CI->getLocation());

1965 if (CI->capturesVariableByCopy() &&

1966 !CI->getCapturedVar()->getType()->isAnyPointerType()) {

1968 CI->getLocation());

1969 }

1970 Args.emplace_back(Arg);

1971 }

1972 }

1973

1976 return Fn;

1977}

1978

1980 const Decl *D) {

1982 return;

1983

1984 assert(D && "Expected function or captured|block decl.");

1985 assert(FunctionGlobalizedDecls.count(CGF.CurFn) == 0 &&

1986 "Function is registered already.");

1987 assert((!TeamAndReductions.first || TeamAndReductions.first == D) &&

1988 "Team is set but not processed.");

1989 const Stmt *Body = nullptr;

1990 bool NeedToDelayGlobalization = false;

1991 if (const auto *FD = dyn_cast(D)) {

1992 Body = FD->getBody();

1993 } else if (const auto *BD = dyn_cast(D)) {

1994 Body = BD->getBody();

1995 } else if (const auto *CD = dyn_cast(D)) {

1996 Body = CD->getBody();

1998 if (NeedToDelayGlobalization &&

2000 return;

2001 }

2002 if (!Body)

2003 return;

2004 CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);

2005 VarChecker.Visit(Body);

2006 const RecordDecl *GlobalizedVarsRecord =

2007 VarChecker.getGlobalizedRecord(IsInTTDRegion);

2008 TeamAndReductions.first = nullptr;

2009 TeamAndReductions.second.clear();

2011 VarChecker.getEscapedVariableLengthDecls();

2013 VarChecker.getDelayedVariableLengthDecls();

2014 if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty() &&

2015 DelayedVariableLengthDecls.empty())

2016 return;

2017 auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;

2018 I->getSecond().MappedParams =

2019 std::make_uniqueCodeGenFunction::OMPMapVars();

2020 I->getSecond().EscapedParameters.insert(

2021 VarChecker.getEscapedParameters().begin(),

2022 VarChecker.getEscapedParameters().end());

2023 I->getSecond().EscapedVariableLengthDecls.append(

2024 EscapedVariableLengthDecls.begin(), EscapedVariableLengthDecls.end());

2025 I->getSecond().DelayedVariableLengthDecls.append(

2026 DelayedVariableLengthDecls.begin(), DelayedVariableLengthDecls.end());

2027 DeclToAddrMapTy &Data = I->getSecond().LocalVarData;

2028 for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {

2029 assert(VD->isCanonicalDecl() && "Expected canonical declaration");

2030 Data.insert(std::make_pair(VD, MappedVarData()));

2031 }

2032 if (!NeedToDelayGlobalization) {

2033 emitGenericVarsProlog(CGF, D->getBeginLoc());

2035 GlobalizationScope() = default;

2036

2039 .emitGenericVarsEpilog(CGF);

2040 }

2041 };

2043 }

2044}

2045

2048 if (VD && VD->hasAttr()) {

2049 const auto *A = VD->getAttr();

2051 switch (A->getAllocatorType()) {

2052 case OMPAllocateDeclAttr::OMPNullMemAlloc:

2053 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:

2054 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:

2055 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:

2056 break;

2057 case OMPAllocateDeclAttr::OMPThreadMemAlloc:

2059 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:

2060

2062 case OMPAllocateDeclAttr::OMPConstMemAlloc:

2064 break;

2065 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:

2067 break;

2068 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:

2069 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:

2070 break;

2071 }

2073 auto *GV = new llvm::GlobalVariable(

2074 CGM.getModule(), VarTy, false,

2075 llvm::GlobalValue::InternalLinkage, llvm::PoisonValue::get(VarTy),

2077 nullptr, llvm::GlobalValue::NotThreadLocal,

2080 GV->setAlignment(Align.getAsAlign());

2085 VarTy, Align);

2086 }

2087

2090

2092 auto I = FunctionGlobalizedDecls.find(CGF.CurFn);

2093 if (I == FunctionGlobalizedDecls.end())

2095 auto VDI = I->getSecond().LocalVarData.find(VD);

2096 if (VDI != I->getSecond().LocalVarData.end())

2097 return VDI->second.PrivateAddr;

2101 IT != E; ++IT) {

2102 auto VDI = I->getSecond().LocalVarData.find(

2103 cast(cast(IT->getRef())->getDecl())

2104 ->getCanonicalDecl());

2105 if (VDI != I->getSecond().LocalVarData.end())

2106 return VDI->second.PrivateAddr;

2107 }

2108 }

2109

2111}

2112

2114 FunctionGlobalizedDecls.erase(CGF.CurFn);

2116}

2117

2121 llvm::Value *&Chunk) const {

2124 ScheduleKind = OMPC_DIST_SCHEDULE_static;

2126 RT.getGPUNumThreads(CGF),

2128 S.getIterationVariable()->getType(), S.getBeginLoc());

2129 return;

2130 }

2132 CGF, S, ScheduleKind, Chunk);

2133}

2134

2138 const Expr *&ChunkExpr) const {

2139 ScheduleKind = OMPC_SCHEDULE_static;

2140

2141 llvm::APInt ChunkSize(32, 1);

2145}

2146

2150 " Expected target-based directive.");

2151 const CapturedStmt *CS = D.getCapturedStmt(OMPD_target);

2153

2154

2155 if (C.capturesVariable())

2156 continue;

2157 const VarDecl *VD = C.getCapturedVar();

2158 const auto *RD = VD->getType()

2162 if (!RD || !RD->isLambda())

2163 continue;

2168 else

2171 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;

2172 FieldDecl *ThisCapture = nullptr;

2173 RD->getCaptureFields(Captures, ThisCapture);

2179 }

2180 for (const LambdaCapture &LC : RD->captures()) {

2181 if (LC.getCaptureKind() != LCK_ByRef)

2182 continue;

2183 const ValueDecl *VD = LC.getCapturedVar();

2184

2185

2187 continue;

2188 auto It = Captures.find(VD);

2189 assert(It != Captures.end() && "Found lambda capture without field.");

2197 }

2198 }

2199}

2200

2203 if (!VD || !VD->hasAttr())

2204 return false;

2205 const auto *A = VD->getAttr();

2206 switch(A->getAllocatorType()) {

2207 case OMPAllocateDeclAttr::OMPNullMemAlloc:

2208 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:

2209

2210 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:

2211 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:

2212 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:

2213 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:

2214 case OMPAllocateDeclAttr::OMPThreadMemAlloc:

2216 return true;

2217 case OMPAllocateDeclAttr::OMPConstMemAlloc:

2219 return true;

2220 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:

2222 return true;

2223 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:

2224 llvm_unreachable("Expected predefined allocator for the variables with the "

2225 "static storage.");

2226 }

2227 return false;

2228}

2229

2230

2235 if (Feature.getValue()) {

2238 return Arch;

2239 }

2240 }

2242}

2243

2244

2245

2247 for (const OMPClause *Clause : D->clauselists()) {

2248 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {

2250 switch (Arch) {

2261 llvm::raw_svector_ostream Out(Buffer);

2263 << " does not support unified addressing";

2264 CGM.Error(Clause->getBeginLoc(), Out.str());

2265 return;

2266 }

2338 break;

2340 llvm_unreachable("Unexpected GPU arch.");

2341 }

2342 }

2343 }

2345}

2346

2350 const char *LocSize = "__kmpc_get_hardware_num_threads_in_block";

2351 llvm::Function *F = M->getFunction(LocSize);

2352 if (!F) {

2353 F = llvm::Function::Create(llvm::FunctionType::get(CGF.Int32Ty, {}, false),

2354 llvm::GlobalVariable::ExternalLinkage, LocSize,

2356 }

2357 return Bld.CreateCall(F, {}, "nvptx_num_threads");

2358}

2359

2363 OMPBuilder.getOrCreateRuntimeFunction(

2364 CGM.getModule(), OMPRTL___kmpc_get_hardware_thread_id_in_block),

2365 Args);

2366}

static void getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl * > &Vars)

Get list of reduction variables from the teams ... directives.

static llvm::Value * castValueToType(CodeGenFunction &CGF, llvm::Value *Val, QualType ValTy, QualType CastTy, SourceLocation Loc)

Cast value to the specified type.

static void getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl * > &Vars)

Get list of lastprivate variables from the teams distribute ... or teams {distribute ....

static bool hasNestedSPMDDirective(ASTContext &Ctx, const OMPExecutableDirective &D)

Check for inner (nested) SPMD construct, if any.

static bool supportsSPMDExecutionMode(ASTContext &Ctx, const OMPExecutableDirective &D)

static OffloadArch getOffloadArch(CodeGenModule &CGM)

This file defines OpenMP nodes for declarative directives.

This file defines OpenMP AST classes for clauses.

static std::pair< ValueDecl *, bool > getPrivateItem(Sema &S, Expr *&RefExpr, SourceLocation &ELoc, SourceRange &ERange, bool AllowArraySection=false, StringRef DiagType="")

This file defines OpenMP AST classes for executable directives and clauses.

Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...

QualType getPointerType(QualType T) const

Return the uniqued reference to the type for a pointer to the specified type.

QualType getUIntPtrType() const

Return a type compatible with "uintptr_t" (C99 7.18.1.4), as defined by the target.

QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const

getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...

CanQualType getSizeType() const

Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.

CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const

Return a conservative estimate of the alignment of the specified decl D.

CharUnits getTypeSizeInChars(QualType T) const

Return the size of the specified (complete) type T, in characters.

const TargetInfo & getTargetInfo() const

QualType getAddrSpaceQualType(QualType T, LangAS AddressSpace) const

Return the uniqued reference to the type for an address space qualified type with the specified type ...

unsigned getTargetAddressSpace(LangAS AS) const

Attr - This represents one attribute.

A class which contains all the information about a particular captured value.

BlockExpr - Adaptor class for mixing a BlockDecl with expressions.

CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).

Describes the capture of either a variable, or 'this', or variable-length array type.

This captures a statement into a function.

bool capturesVariable(const VarDecl *Var) const

True if this variable has been captured.

CharUnits - This is an opaque type for sizes expressed in character units.

bool isZero() const

isZero - Test whether the quantity equals zero.

llvm::Align getAsAlign() const

getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...

QuantityType getQuantity() const

getQuantity - Get the raw integer representation of this quantity.

Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...

llvm::Value * emitRawPointer(CodeGenFunction &CGF) const

Return the pointer contained in this class after authenticating it and adding offset to it if necessa...

Address withElementType(llvm::Type *ElemTy) const

Return address with different element type, but same pointer and alignment.

llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)

Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")

Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")

Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...

Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")

Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...

CGFunctionInfo - Class to encapsulate the information about a function definition.

void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads) override

Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...

llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override

Emits inlined function for the specified OpenMP teams.

void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override

Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...

void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override

Emit a code for reduction clause.

DataSharingMode

Target codegen is specialized based on two data-sharing modes: CUDA, in which the local variables are...

@ DS_CUDA

CUDA data sharing mode.

@ DS_Generic

Generic data-sharing mode.

void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const override

Choose a default value for the dist_schedule clause.

Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) override

Gets the OpenMP-specific address of the local variable.

void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override

Emits OpenMP-specific function prolog.

void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const override

Choose a default value for the schedule clause.

void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override

This function ought to emit, in the general case, a call to.

void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override

Emits a critical region.

void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override

Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...

bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override

Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...

void getKmpcFreeShared(CodeGenFunction &CGF, const std::pair< llvm::Value *, llvm::Value * > &AddrSizePair) override

Get call to __kmpc_free_shared.

CGOpenMPRuntimeGPU(CodeGenModule &CGM)

llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override

Emits inlined function for the specified OpenMP parallel.

void functionFinished(CodeGenFunction &CGF) override

Cleans up references to the objects in finished function.

llvm::Value * getGPUThreadID(CodeGenFunction &CGF)

Get the id of the current thread on the GPU.

void processRequiresDirective(const OMPRequiresDecl *D) override

Perform check on requires decl to ensure that target architecture supports unified addressing.

bool isDelayedVariableLengthDecl(CodeGenFunction &CGF, const VarDecl *VD) const override

Declare generalized virtual functions which need to be defined by all specializations of OpenMPGPURun...

void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const override

Emits call of the outlined function with the provided arguments, translating these arguments to corre...

Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override

Gets the address of the native argument basing on the address of the target-specific parameter.

ExecutionMode

Defines the execution mode.

@ EM_NonSPMD

Non-SPMD execution mode (1 master thread, others are workers).

@ EM_Unknown

Unknown execution mode (orphaned directive).

@ EM_SPMD

SPMD execution mode (all threads are worker threads).

void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override

Emit an implicit/explicit barrier for OpenMP threads.

llvm::Value * getGPUNumThreads(CodeGenFunction &CGF)

Get the maximum number of threads in a block of the GPU.

const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override

Translates the native parameter of outlined function if this is required for target.

std::pair< llvm::Value *, llvm::Value * > getKmpcAllocShared(CodeGenFunction &CGF, const VarDecl *VD) override

Get call to __kmpc_alloc_shared.

bool isGPU() const override

Returns true if the current target is a GPU.

void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override

Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...

void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const override

Adjust some parameters for the target-based directives, like addresses of the variables captured by r...

virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)

Emits address of the word in a memory where current thread id is stored.

static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)

Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...

llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)

Emits object of ident_t type with info for source location.

virtual void functionFinished(CodeGenFunction &CGF)

Cleans up references to the objects in finished function.

virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)

Emits outlined function for the specified OpenMP teams directive D.

llvm::OpenMPIRBuilder OMPBuilder

An OpenMP-IR-Builder instance.

virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)

Helper to emit outlined function for 'target' directive.

bool hasRequiresUnifiedSharedMemory() const

Return whether the unified_shared_memory has been specified.

virtual void processRequiresDirective(const OMPRequiresDecl *D)

Perform check on requires decl to ensure that target architecture supports unified addressing.

llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)

Gets thread id value for the current thread.

void clearLocThreadIdInsertPt(CodeGenFunction &CGF)

void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)

Helper to determine the min/max number of threads/teams for D.

static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)

Returns default flags for the barriers depending on the directive, for which this barier is going to ...

virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)

Emits outlined function for the specified OpenMP parallel directive D.

virtual void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const

Choose default schedule type and chunk value for the dist_schedule clause.

llvm::Type * getIdentTyPointerTy()

Returns pointer to ident_t type.

void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)

Emits single reduction combiner.

llvm::OpenMPIRBuilder & getOMPBuilder()

virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)

Emits a critical region.

virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const

Emits call of the outlined function with the provided arguments, translating these arguments to corre...

virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)

Emit a code for reduction clause.

CapturedRegionKind getKind() const

bool isCXXThisExprCaptured() const

The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...

CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...

void FinishFunction(SourceLocation EndLoc=SourceLocation())

FinishFunction - Complete IR generation of the current function.

static TypeEvaluationKind getEvaluationKind(QualType T)

getEvaluationKind - Return the TypeEvaluationKind of QualType T.

CGCapturedStmtInfo * CapturedStmtInfo

LValue MakeNaturalAlignPointeeRawAddrLValue(llvm::Value *V, QualType T)

Same as MakeNaturalAlignPointeeAddrLValue except that the pointer is known to be unsigned.

Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)

Load a pointer with type PtrTy stored at address Ptr.

RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")

CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...

llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)

createBasicBlock - Create an LLVM basic block.

LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)

EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...

void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)

EmitBlock - Emit the given block.

llvm::Type * ConvertTypeForMem(QualType T)

llvm::AssertingVH< llvm::Instruction > AllocaInsertPt

AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...

RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)

CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...

const TargetInfo & getTarget() const

llvm::DebugLoc SourceLocToDebugLoc(SourceLocation Location)

Converts Location to a DebugLoc, if debug information is enabled.

llvm::Value * getTypeSize(QualType Ty)

Returns calculated size of the specified type.

void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())

Emit code for the start of a function.

bool HaveInsertPoint() const

HaveInsertPoint - True if an insertion point is defined.

CGDebugInfo * getDebugInfo()

void EmitBranch(llvm::BasicBlock *Block)

EmitBranch - Emit a branch to the specified basic block from the current insert block,...

ASTContext & getContext() const

llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)

EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...

llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")

CodeGenTypes & getTypes() const

llvm::Value * EvaluateExprAsBool(const Expr *E)

EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...

LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)

llvm::Value * LoadCXXThis()

LoadCXXThis - Load the value of 'this'.

LValue EmitLoadOfReferenceLValue(LValue RefLVal)

Address GetAddrOfLocalVar(const VarDecl *VD)

GetAddrOfLocalVar - Return the address of a local variable.

llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)

Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...

llvm::LLVMContext & getLLVMContext()

void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)

EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...

This class organizes the cross-function state that is used while generating LLVM code.

void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)

Set the attributes on the LLVM function for the given decl and function info.

llvm::Module & getModule() const

const LangOptions & getLangOpts() const

CodeGenTypes & getTypes()

const TargetInfo & getTarget() const

void Error(SourceLocation loc, StringRef error)

Emit a general error that something can't be done.

CGOpenMPRuntime & getOpenMPRuntime()

Return a reference to the configured OpenMP runtime.

ASTContext & getContext() const

llvm::LLVMContext & getLLVMContext()

llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)

GetFunctionType - Get the LLVM function type for.

const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)

A builtin function is a freestanding function using the default C conventions.

unsigned getTargetAddressSpace(QualType T) const

llvm::Type * ConvertTypeForMem(QualType T)

ConvertTypeForMem - Convert type T into a llvm::Type.

Information for lazily generating a cleanup.

FunctionArgList - Type for representing both the decl and type of parameters to a function.

LValue - This represents an lvalue references.

Address getAddress() const

A basic class for pre|post-action for advanced codegen sequence for OpenMP region.

An abstract representation of an aligned address.

llvm::Value * getPointer() const

Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...

void setAction(PrePostActionTy &Action) const

ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.

DeclContext - This is used only as base class of specific decl types that can act as declaration cont...

void addDecl(Decl *D)

Add the declaration D into this context.

A reference to a declared variable, function, enum, etc.

DeclStmt - Adaptor class for mixing declarations with statements and expressions.

Decl - This represents one declaration (or definition), e.g.

attr_iterator attr_end() const

bool isCanonicalDecl() const

Whether this particular Decl is a canonical one.

attr_iterator attr_begin() const

SourceLocation getLocation() const

DeclContext * getDeclContext()

SourceLocation getBeginLoc() const LLVM_READONLY

virtual Decl * getCanonicalDecl()

Retrieves the "canonical" declaration of the given declaration.

SourceLocation getBeginLoc() const LLVM_READONLY

This represents one expression.

Expr * IgnoreParenImpCasts() LLVM_READONLY

Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...

Expr * IgnoreParens() LLVM_READONLY

Skip past any parentheses which might surround this expression until reaching a fixed point.

bool isLValue() const

isLValue - True if this expression is an "l-value" according to the rules of the current language.

Represents a member of a struct/union/class.

static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)

GlobalDecl - represents a global declaration.

ImplicitCastExpr - Allows us to explicitly represent implicit type conversions, which have no direct ...

static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)

Create implicit parameter.

static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)

Returns a new integer literal with value 'V' and type 'type'.

Describes the capture of a variable or of this, or of a C++1y init-capture.

A C++ lambda expression, which produces a function object (of unspecified type) that can be invoked l...

std::string OMPHostIRFile

Name of the IR file that contains the result of the OpenMP target host code generation.

IdentifierInfo * getIdentifier() const

Get the identifier that names this declaration, if there is one.

StringRef getName() const

Get the name of identifier for this declaration as a StringRef.

This is a basic class for representing single OpenMP clause.

This is a basic class for representing single OpenMP executable directive.

OpenMPDirectiveKind getDirectiveKind() const

static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause * > Clauses)

This represents clause 'lastprivate' in the '#pragma omp ...' directives.

This is a common base class for loop directives ('omp simd', 'omp for', 'omp for simd' etc....

This represents clause 'reduction' in the '#pragma omp ...' directives.

This represents '#pragma omp requires...' directive.

This represents 'ompx_bare' clause in the '#pragma omp target teams ...' directive.

static ParmVarDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, StorageClass S, Expr *DefArg)

PointerType - C99 6.7.5.1 - Pointer Declarators.

A (possibly-)qualified type.

LangAS getAddressSpace() const

Return the address space of this type.

QualType getNonReferenceType() const

If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...

QualType getCanonicalType() const

A qualifier set is used to build a set of qualifiers.

const Type * strip(QualType type)

Collect any qualifiers on the given type and return an unqualified type.

QualType apply(const ASTContext &Context, QualType QT) const

Apply the collected qualifiers to the given type.

void addAddressSpace(LangAS space)

Represents a struct/union/class.

virtual void completeDefinition()

Note that the definition of this type is now complete.

Scope - A scope is a transient data structure that is used while parsing the program.

Encodes a location in the source.

RetTy Visit(PTR(Stmt) S, ParamTys... P)

Stmt - This represents one statement.

void startDefinition()

Starts the definition of this tag declaration.

unsigned getNewAlign() const

Return the largest alignment for which a suitably-sized allocation with '::operator new(size_t)' is g...

TargetOptions & getTargetOpts() const

Retrieve the target options.

virtual const llvm::omp::GV & getGridValue() const

virtual bool hasFeature(StringRef Feature) const

Determine whether the given target has the given feature.

llvm::StringMap< bool > FeatureMap

The map of which features have been enabled disabled based on the command line.

The base class of the type hierarchy.

CXXRecordDecl * getAsCXXRecordDecl() const

Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...

bool isIntegerType() const

isIntegerType() does not include complex integers (a GCC extension).

bool isReferenceType() const

QualType getPointeeType() const

If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.

bool isLValueReferenceType() const

bool hasSignedIntegerRepresentation() const

Determine whether this type has an signed integer representation of some sort, e.g....

bool isVariablyModifiedType() const

Whether this type is a variably-modified type (C99 6.7.5).

UnaryOperator - This represents the unary-expression's (except sizeof and alignof),...

Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...

bool isInitCapture() const

Whether this variable is the implicit variable for a lambda init-capture.

Represents a variable declaration or definition.

VarDecl * getCanonicalDecl() override

Retrieves the "canonical" declaration of the given declaration.

bool isInitCapture() const

Whether this variable is the implicit variable for a lambda init-capture.

specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...

@ Type

The l-value was considered opaque, so the alignment was determined from a type.

@ Decl

The l-value was an access to a declared entity or something equivalently strong, like the address of ...

The JSON file list parser is used to communicate input to InstallAPI.

llvm::omp::Directive OpenMPDirectiveKind

OpenMP directives.

@ ICIS_NoInit

No in-class initializer.

bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)

Checks if the specified directive is a distribute directive.

@ LCK_ByRef

Capturing by reference.

@ Private

'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...

bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)

Checks if the specified directive is a parallel-kind directive.

bool isOpenMPPrivate(OpenMPClauseKind Kind)

Checks if the specified clause is one of private clauses like 'private', 'firstprivate',...

OpenMPDistScheduleClauseKind

OpenMP attributes for 'dist_schedule' clause.

bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)

Checks if the specified directive is a target code offload directive.

bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)

Checks if the specified directive is a teams-kind directive.

OffloadArch StringToOffloadArch(llvm::StringRef S)

bool isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind)

Checks if the specified directive kind is one of the composite or combined directives that need loop ...

LangAS

Defines the address space values used by the address space qualifier of QualType.

const char * OffloadArchToString(OffloadArch A)

void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)

Return the captured regions of an OpenMP directive.

LangAS getLangASFromTargetAS(unsigned TargetAS)

@ CXXThis

Parameter for C++ 'this' argument.

@ Other

Other implicit parameter.

OpenMPScheduleClauseKind

OpenMP attributes for 'schedule' clause.

llvm::PointerType * VoidPtrTy

llvm::IntegerType * SizeTy

llvm::PointerType * VoidPtrPtrTy

llvm::IntegerType * Int32Ty

llvm::PointerType * Int8PtrTy