LLVM: lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

42#include "llvm/IR/IntrinsicsHexagon.h"

63#include

64#include

65#include

66#include

67#include

68#include

69#include

70#include

71#include

72#include

73#include

74#include

75

76#define DEBUG_TYPE "hexagon-lir"

77

78using namespace llvm;

79

82 cl::desc("Disable generation of memcpy in loop idiom recognition"));

83

86 cl::desc("Disable generation of memmove in loop idiom recognition"));

87

90 "check guarding the memmove."));

91

94 cl::desc("Threshold (in bytes) to perform the transformation, if the "

95 "runtime loop count (mem transfer size) is known at compile-time."));

96

99 cl::desc("Only enable generating memmove in non-nested loops"));

100

103 cl::desc("Enable Hexagon-specific memcpy for volatile destination."));

104

106 cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR"));

107

109 = "hexagon_memcpy_forward_vp4cp4n2";

110

111

112namespace llvm {

113

116

117}

118

119namespace {

120

121class HexagonLoopIdiomRecognize {

122public:

126 : AA(AA), DT(DT), LF(LF), TLI(TLI), SE(SE) {}

127

129

130private:

135 bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount);

137 bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount,

139 bool runOnCountableLoop(Loop *L);

140

147 bool HasMemcpy, HasMemmove;

148};

149

150class HexagonLoopIdiomRecognizeLegacyPass : public LoopPass {

151public:

152 static char ID;

153

154 explicit HexagonLoopIdiomRecognizeLegacyPass() : LoopPass(ID) {

157 }

158

160 return "Recognize Hexagon-specific loop idioms";

161 }

162

172 }

173

175};

176

177struct Simplifier {

178 struct Rule {

182 FuncType Fn;

183 };

184

185 void addRule(StringRef N, const Rule::FuncType &F) {

186 Rules.push_back(Rule(N, F));

187 }

188

189private:

190 struct WorkListType {

191 WorkListType() = default;

192

193 void push_back(Value *V) {

194

195 if (S.insert(V).second)

196 Q.push_back(V);

197 }

198

199 Value *pop_front_val() {

200 Value *V = Q.front();

201 Q.pop_front();

202 S.erase(V);

203 return V;

204 }

205

206 bool empty() const { return Q.empty(); }

207

208 private:

209 std::deque<Value *> Q;

210 std::set<Value *> S;

211 };

212

213 using ValueSetType = std::set<Value *>;

214

215 std::vector Rules;

216

217public:

218 struct Context {

220

222 ValueSetType Used;

223 ValueSetType Clones;

225

229 }

230

231 ~Context() { cleanup(); }

232

235

236 private:

237 friend struct Simplifier;

238

241

242 template void traverse(Value *V, FuncT F);

243 void record(Value *V);

245 void unuse(Value *V);

246

252 };

253

255};

256

257 struct PE {

258 PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {}

259

260 const Simplifier::Context &C;

262 };

263

266 P.C.print(OS, P.V ? P.V : P.C.Root);

267 return OS;

268 }

269

270}

271

272char HexagonLoopIdiomRecognizeLegacyPass::ID = 0;

273

275 "Recognize Hexagon-specific loop idioms", false, false)

285

286template

287void Simplifier::Context::traverse(Value *V, FuncT F) {

288 WorkListType Q;

289 Q.push_back(V);

290

291 while (!Q.empty()) {

292 Instruction *U = dyn_cast(Q.pop_front_val());

293 if (!U || U->getParent())

294 continue;

295 if (F(U))

296 continue;

297 for (Value *Op : U->operands())

298 Q.push_back(Op);

299 }

300}

301

302void Simplifier::Context::print(raw_ostream &OS, const Value *V) const {

303 const auto *U = dyn_cast(V);

304 if (!U) {

305 OS << V << '(' << *V << ')';

306 return;

307 }

308

309 if (U->getParent()) {

310 OS << U << '(';

311 U->printAsOperand(OS, true);

312 OS << ')';

313 return;

314 }

315

316 unsigned N = U->getNumOperands();

317 if (N != 0)

318 OS << U << '(';

319 OS << U->getOpcodeName();

320 for (const Value *Op : U->operands()) {

321 OS << ' ';

323 }

324 if (N != 0)

325 OS << ')';

326}

327

328void Simplifier::Context::initialize(Instruction *Exp) {

329

330

331

332 ValueMapType M;

334 WorkListType Q;

335 Q.push_back(Exp);

336

337 while (!Q.empty()) {

338 Value *V = Q.pop_front_val();

339 if (M.contains(V))

340 continue;

341 if (Instruction *U = dyn_cast(V)) {

342 if (isa(U) || U->getParent() != Block)

343 continue;

344 for (Value *Op : U->operands())

345 Q.push_back(Op);

346 M.insert({U, U->clone()});

347 }

348 }

349

350 for (std::pair<Value*,Value*> P : M) {

352 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {

353 auto F = M.find(U->getOperand(i));

354 if (F != M.end())

355 U->setOperand(i, F->second);

356 }

357 }

358

359 auto R = M.find(Exp);

361 Root = R->second;

362

363 record(Root);

364 use(Root);

365}

366

367void Simplifier::Context::record(Value *V) {

369 Clones.insert(U);

370 return true;

371 };

373}

374

375void Simplifier::Context::use(Value *V) {

377 Used.insert(U);

378 return true;

379 };

380 traverse(V, Use);

381}

382

383void Simplifier::Context::unuse(Value *V) {

384 if (!isa(V) || cast(V)->getParent() != nullptr)

385 return;

386

387 auto Unuse = [this](Instruction *U) -> bool {

388 if (U->use_empty())

389 return false;

390 Used.erase(U);

391 return true;

392 };

393 traverse(V, Unuse);

394}

395

397 if (Tree == OldV)

398 return NewV;

399 if (OldV == NewV)

400 return Tree;

401

402 WorkListType Q;

403 Q.push_back(Tree);

404 while (!Q.empty()) {

405 Instruction *U = dyn_cast(Q.pop_front_val());

406

407 if (!U || U->getParent())

408 continue;

409 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {

410 Value *Op = U->getOperand(i);

411 if (Op == OldV) {

412 U->setOperand(i, NewV);

413 unuse(OldV);

414 } else {

415 Q.push_back(Op);

416 }

417 }

418 }

419 return Tree;

420}

421

422void Simplifier::Context::replace(Value *OldV, Value *NewV) {

423 if (Root == OldV) {

424 Root = NewV;

425 use(Root);

426 return;

427 }

428

429

430

431

432

433

434

435 WorkListType Q;

436 Q.push_back(NewV);

437 while (!Q.empty()) {

438 Value *V = Q.pop_front_val();

440 if (!U || U->getParent())

441 continue;

442 if (Value *DupV = find(Root, V)) {

443 if (DupV != V)

444 NewV = subst(NewV, V, DupV);

445 } else {

446 for (Value *Op : U->operands())

447 Q.push_back(Op);

448 }

449 }

450

451

452 Root = subst(Root, OldV, NewV);

453 use(Root);

454}

455

456void Simplifier::Context::cleanup() {

457 for (Value *V : Clones) {

459 if (U->getParent())

460 U->dropAllReferences();

461 }

462

463 for (Value *V : Clones) {

465 if (U->getParent())

466 U->deleteValue();

467 }

468}

469

470bool Simplifier::Context::equal(const Instruction *I,

472 if (I == J)

473 return true;

474 if (I->isSameOperationAs(J))

475 return false;

476 if (isa(I))

477 return I->isIdenticalTo(J);

478

479 for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) {

481 if (OpI == OpJ)

482 continue;

483 auto *InI = dyn_cast(OpI);

484 auto *InJ = dyn_cast(OpJ);

485 if (InI && InJ) {

486 if (equal(InI, InJ))

487 return false;

488 } else if (InI != InJ || !InI)

489 return false;

490 }

491 return true;

492}

493

494Value *Simplifier::Context::find(Value *Tree, Value *Sub) const {

495 Instruction *SubI = dyn_cast(Sub);

496 WorkListType Q;

497 Q.push_back(Tree);

498

499 while (!Q.empty()) {

500 Value *V = Q.pop_front_val();

501 if (V == Sub)

502 return V;

504 if (!U || U->getParent())

505 continue;

506 if (SubI && equal(SubI, U))

507 return U;

508 assert(!isa(U));

509 for (Value *Op : U->operands())

510 Q.push_back(Op);

511 }

512 return nullptr;

513}

514

517 if (I->getParent())

518 return;

519

520 for (Value *Op : I->operands()) {

521 if (Instruction *OpI = dyn_cast(Op))

522 link(OpI, B, At);

523 }

524

525 I->insertInto(B, At);

526}

527

530 if (Instruction *RootI = dyn_cast(Root))

531 link(RootI, B, At);

532 return Root;

533}

534

535Value *Simplifier::simplify(Context &C) {

536 WorkListType Q;

537 Q.push_back(C.Root);

538 unsigned Count = 0;

540

541 while (!Q.empty()) {

542 if (Count++ >= Limit)

543 break;

544 Instruction *U = dyn_cast(Q.pop_front_val());

545 if (!U || U->getParent() || C.Used.count(U))

546 continue;

547 bool Changed = false;

548 for (Rule &R : Rules) {

550 if (!W)

551 continue;

552 Changed = true;

553 C.record(W);

554 C.replace(U, W);

555 Q.push_back(C.Root);

556 break;

557 }

558 if (!Changed) {

559 for (Value *Op : U->operands())

560 Q.push_back(Op);

561 }

562 }

563 return Count < Limit ? C.Root : nullptr;

564}

565

566

567

568

569

570

571

572namespace {

573

574 class PolynomialMultiplyRecognize {

575 public:

576 explicit PolynomialMultiplyRecognize(Loop *loop, const DataLayout &dl,

579 : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {}

580

581 bool recognize();

582

583 private:

585

587 LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext();

589 }

590

594

597 void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early,

598 ValueSeq &Late);

599 bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late);

601 bool highBitsAreZero(Value *V, unsigned IterCount);

602 bool keepsHighBitsZero(Value *V, unsigned IterCount);

605 unsigned IterCount);

606 void cleanupLoopBody(BasicBlock *LoopB);

607

608 struct ParsedValues {

609 ParsedValues() = default;

610

613 Value *Q = nullptr;

617 unsigned IterCount = 0;

618 bool Left = false;

619 bool Inv = false;

620 };

621

622 bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV);

623 bool matchRightShift(SelectInst *SelI, ParsedValues &PV);

625 Value *CIV, ParsedValues &PV, bool PreScan);

626 unsigned getInverseMxN(unsigned QP);

628

629 void setupPreSimplifier(Simplifier &S);

630 void setupPostSimplifier(Simplifier &S);

631

632 Loop *CurLoop;

637 };

638

639}

640

641Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) {

643 if (std::distance(PI, PE) != 2)

644 return nullptr;

645 BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI;

646

647 for (auto I = BB->begin(), E = BB->end(); I != E && isa(I); ++I) {

648 auto *PN = cast(I);

649 Value *InitV = PN->getIncomingValueForBlock(PB);

650 if (!isa(InitV) || !cast(InitV)->isZero())

651 continue;

652 Value *IterV = PN->getIncomingValueForBlock(BB);

653 auto *BO = dyn_cast(IterV);

654 if (!BO)

655 continue;

656 if (BO->getOpcode() != Instruction::Add)

657 continue;

658 Value *IncV = nullptr;

659 if (BO->getOperand(0) == PN)

660 IncV = BO->getOperand(1);

661 else if (BO->getOperand(1) == PN)

662 IncV = BO->getOperand(0);

663 if (IncV == nullptr)

664 continue;

665

666 if (auto *T = dyn_cast(IncV))

667 if (T->isOne())

668 return PN;

669 }

670 return nullptr;

671}

672

674 for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) {

675 Use &TheUse = UI.getUse();

676 ++UI;

677 if (auto *II = dyn_cast(TheUse.getUser()))

678 if (BB == II->getParent())

679 II->replaceUsesOfWith(I, J);

680 }

681}

682

683bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI,

684 Value *CIV, ParsedValues &PV) {

685

686

687

688

689

690

691

695

696 using namespace PatternMatch;

697

699 Value *A = nullptr, *B = nullptr, *C = nullptr;

700

703 return false;

705 return false;

706

707

708

709 Value *X = nullptr, *Sh1 = nullptr;

710

712 Sh1 = A;

713 X = B;

715 Sh1 = B;

716 X = A;

717 } else {

718

719

720 return false;

721 }

722

723 bool TrueIfZero;

724

725

728 else if (C == Sh1)

730 else

731 return false;

732

733

734

735

736

737 Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr;

738 if (TrueIfZero) {

739 ShouldSameV = TrueV;

740 ShouldXoredV = FalseV;

741 } else {

742 ShouldSameV = FalseV;

743 ShouldXoredV = TrueV;

744 }

745

746 Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr;

749

750

751

752 if (ShouldSameV == Y)

753 T = Z;

754 else if (ShouldSameV == Z)

755 T = Y;

756 else

757 return false;

758 R = ShouldSameV;

759

760

761

762

763 } else if (match(ShouldSameV, m_Zero())) {

764

765

767 return false;

768 T = ShouldXoredV;

769

770

771

774 return false;

775

776

777 } else

778 return false;

779

780

781

782

783

786 return false;

787

788

789

790 PV.X = X;

791 PV.Q = Q;

792 PV.R = R;

793 PV.Left = true;

794 return true;

795}

796

797bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI,

798 ParsedValues &PV) {

799

800

801

802

803

804

805

809

810 using namespace PatternMatch;

811

814 bool TrueIfZero;

815

818 return false;

819

820

824 return false;

825

826

828 } else

829 return false;

830

833 return false;

834

835

836

837 Value *R = nullptr, *Q = nullptr;

838 if (TrueIfZero) {

839

840

842 return false;

843

845 return false;

846

847

848 } else {

849

850

852 return false;

853

855 return false;

856

857

858 }

859

860 PV.X = X;

861 PV.Q = Q;

862 PV.R = R;

863 PV.Left = false;

864 return true;

865}

866

867bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI,

869 bool PreScan) {

870 using namespace PatternMatch;

871

872

873

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

902

903

904

905

906

907

908

909 if (matchLeftShift(SelI, CIV, PV)) {

910

911 if (PreScan)

912 return true;

913

914

915 auto *RPhi = dyn_cast(PV.R);

916 if (!RPhi)

917 return false;

918 if (SelI != RPhi->getIncomingValueForBlock(LoopB))

919 return false;

920 PV.Res = SelI;

921

922

923

924 if (CurLoop->isLoopInvariant(PV.X)) {

925 PV.P = PV.X;

926 PV.Inv = false;

927 } else {

928

929

930

931

932 PV.Inv = true;

933 if (PV.X != PV.R) {

934 Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr;

936 return false;

937 auto *I1 = dyn_cast(X1);

938 auto *I2 = dyn_cast(X2);

939 if (!I1 || I1->getParent() != LoopB) {

940 Var = X2;

941 Inv = X1;

942 } else if (!I2 || I2->getParent() != LoopB) {

943 Var = X1;

944 Inv = X2;

945 } else

946 return false;

947 if (Var != PV.R)

948 return false;

949 PV.M = Inv;

950 }

951

952

953 Value *EntryP = RPhi->getIncomingValueForBlock(PrehB);

954 PV.P = EntryP;

955 }

956

957 return true;

958 }

959

960 if (matchRightShift(SelI, PV)) {

961

962

963 if (PV.Inv && !isa(PV.Q))

964 return false;

965 if (PreScan)

966 return true;

967

968 return false;

969 }

970

971 return false;

972}

973

974bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,

977 if (T || T->getBitWidth() > DestTy->getBitWidth())

978 return false;

979 if (T->getBitWidth() == DestTy->getBitWidth())

980 return true;

981

982

983

984

985

986

988 if (!In)

989 return true;

990

991

992 switch (In->getOpcode()) {

993 case Instruction::PHI:

994 case Instruction::ZExt:

995 case Instruction::And:

996 case Instruction::Or:

997 case Instruction::Xor:

998 case Instruction::LShr:

999 case Instruction::Select:

1000 case Instruction::Trunc:

1001 return true;

1002 case Instruction::ICmp:

1003 if (CmpInst *CI = cast(In))

1004 return CI->isEquality() || CI->isUnsigned();

1006 case Instruction::Add:

1007 return In->hasNoSignedWrap() && In->hasNoUnsignedWrap();

1008 }

1009 return false;

1010}

1011

1012void PolynomialMultiplyRecognize::promoteTo(Instruction *In,

1014 Type *OrigTy = In->getType();

1015 assert(!OrigTy->isVoidTy() && "Invalid instruction to promote");

1016

1017

1018 if (In->getType()->isIntegerTy(1))

1019 In->mutateType(DestTy);

1020 unsigned DestBW = DestTy->getBitWidth();

1021

1022

1023 if (PHINode *P = dyn_cast(In)) {

1024 unsigned N = P->getNumIncomingValues();

1025 for (unsigned i = 0; i != N; ++i) {

1026 BasicBlock *InB = P->getIncomingBlock(i);

1027 if (InB == LoopB)

1028 continue;

1029 Value *InV = P->getIncomingValue(i);

1031

1032 if (Ty != P->getType()) {

1033

1034

1037 P->setIncomingValue(i, InV);

1038 }

1039 }

1040 } else if (ZExtInst *Z = dyn_cast(In)) {

1041 Value *Op = Z->getOperand(0);

1042 if (Op->getType() == Z->getType())

1043 Z->replaceAllUsesWith(Op);

1044 Z->eraseFromParent();

1045 return;

1046 }

1047 if (TruncInst *T = dyn_cast(In)) {

1048 IntegerType *TruncTy = cast(OrigTy);

1051 T->replaceAllUsesWith(And);

1052 T->eraseFromParent();

1053 return;

1054 }

1055

1056

1057 for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) {

1058 if (ConstantInt *CI = dyn_cast(In->getOperand(i)))

1059 if (CI->getBitWidth() < DestBW)

1060 In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));

1061 }

1062}

1063

1064bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,

1067

1068

1069

1070

1072 return false;

1074

1075

1076 unsigned DestBW = DestTy->getBitWidth();

1078 if (P.getNumIncomingValues() != 1)

1079 return false;

1080 assert(P.getIncomingBlock(0) == LoopB);

1081 IntegerType *T = dyn_cast(P.getType());

1082 if (T || T->getBitWidth() > DestBW)

1083 return false;

1084 }

1085

1086

1088 if (In.isTerminator() && !isPromotableTo(&In, DestTy))

1089 return false;

1090

1091

1092 std::vector<Instruction*> LoopIns;

1093 std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns),

1096 if (In->isTerminator())

1097 promoteTo(In, DestTy, LoopB);

1098

1099

1102 for (auto I = ExitB->begin(); I != End; ++I) {

1103 PHINode *P = dyn_cast(I);

1104 if (P)

1105 break;

1106 Type *Ty0 = P->getIncomingValue(0)->getType();

1107 Type *PTy = P->getType();

1108 if (PTy != Ty0) {

1109 assert(Ty0 == DestTy);

1110

1111 P->mutateType(Ty0);

1113

1114 P->mutateType(PTy);

1115 P->replaceAllUsesWith(T);

1116

1117 P->mutateType(Ty0);

1118 cast(T)->setOperand(0, P);

1119 }

1120 }

1121

1122 return true;

1123}

1124

1125bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In,

1126 ValueSeq &Cycle) {

1127

1128 if (Out == In)

1129 return true;

1130

1131 auto *BB = cast(Out)->getParent();

1132 bool HadPhi = false;

1133

1134 for (auto *U : Out->users()) {

1135 auto *I = dyn_cast(&*U);

1136 if (I == nullptr || I->getParent() != BB)

1137 continue;

1138

1139

1140

1141

1142

1143 bool IsPhi = isa(I);

1144 if (IsPhi && HadPhi)

1145 return false;

1146 HadPhi |= IsPhi;

1147 if (Cycle.insert(I))

1148 return false;

1149 if (findCycle(I, In, Cycle))

1150 break;

1152 }

1153 return Cycle.empty();

1154}

1155

1156void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI,

1157 ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) {

1158

1159

1160

1161

1162 bool IsE = true;

1163 unsigned I, N = Cycle.size();

1164 for (I = 0; I < N; ++I) {

1166 if (DivI == V)

1167 IsE = false;

1168 else if (!isa(V))

1169 continue;

1170

1171 break;

1172 }

1173

1174

1175 ValueSeq &First = !IsE ? Early : Late;

1176 for (unsigned J = 0; J < I; ++J)

1178

1179 ValueSeq &Second = IsE ? Early : Late;

1180 Second.insert(Cycle[I]);

1181 for (++I; I < N; ++I) {

1183 if (DivI == V || isa(V))

1184 break;

1185 Second.insert(V);

1186 }

1187

1188 for (; I < N; ++I)

1190}

1191

1192bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI,

1193 ValueSeq &Early, ValueSeq &Late) {

1194

1195

1196

1197 if (UseI->getOpcode() == Instruction::Select) {

1199 if (Early.count(TV) || Early.count(FV)) {

1200 if (Late.count(TV) || Late.count(FV))

1201 return false;

1202 Early.insert(UseI);

1203 } else if (Late.count(TV) || Late.count(FV)) {

1204 if (Early.count(TV) || Early.count(FV))

1205 return false;

1206 Late.insert(UseI);

1207 }

1208 return true;

1209 }

1210

1211

1212

1214 return true;

1215

1216 bool AE = true, AL = true;

1217 for (auto &I : UseI->operands()) {

1218 if (Early.count(&*I))

1219 AL = false;

1220 else if (Late.count(&*I))

1221 AE = false;

1222 }

1223

1224

1225

1226 if (AE && AL)

1227 return true;

1228

1229

1230

1231 if (!AE && !AL)

1232 return false;

1233

1234

1236

1237 if (AE)

1238 Early.insert(UseI);

1239 else

1240 Late.insert(UseI);

1241 return true;

1242}

1243

1244bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) {

1245 switch (I->getOpcode()) {

1246 case Instruction::And:

1247 case Instruction::Or:

1248 case Instruction::Xor:

1249 case Instruction::LShr:

1250 case Instruction::Shl:

1251 case Instruction::Select:

1252 case Instruction::ICmp:

1253 case Instruction::PHI:

1254 break;

1255 default:

1256 return false;

1257 }

1258 return true;

1259}

1260

1261bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V,

1262 unsigned IterCount) {

1263 auto *T = dyn_cast(V->getType());

1264 if (T)

1265 return false;

1266

1269 return Known.countMinLeadingZeros() >= IterCount;

1270}

1271

1272bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V,

1273 unsigned IterCount) {

1274

1275

1276 if (auto *C = dyn_cast(V))

1277 return C->getValue().countl_zero() >= IterCount;

1278

1279 if (auto *I = dyn_cast(V)) {

1280 switch (I->getOpcode()) {

1281 case Instruction::And:

1282 case Instruction::Or:

1283 case Instruction::Xor:

1284 case Instruction::LShr:

1285 case Instruction::Select:

1286 case Instruction::ICmp:

1287 case Instruction::PHI:

1288 case Instruction::ZExt:

1289 return true;

1290 }

1291 }

1292

1293 return false;

1294}

1295

1296bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I, Value *Op) {

1297 unsigned Opc = I->getOpcode();

1298 if (Opc == Instruction::Shl || Opc == Instruction::LShr)

1299 return Op != I->getOperand(1);

1300 return true;

1301}

1302

1303bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,

1304 BasicBlock *ExitB, unsigned IterCount) {

1305 Value *CIV = getCountIV(LoopB);

1306 if (CIV == nullptr)

1307 return false;

1308 auto *CIVTy = dyn_cast(CIV->getType());

1309 if (CIVTy == nullptr)

1310 return false;

1311

1312 ValueSeq RShifts;

1313 ValueSeq Early, Late, Cycled;

1314

1315

1317 using namespace PatternMatch;

1318

1321 continue;

1322 ValueSeq C;

1323 if (!findCycle(&I, V, C))

1324 continue;

1325

1326

1327 C.insert(&I);

1328 classifyCycle(&I, C, Early, Late);

1329 Cycled.insert(C.begin(), C.end());

1330 RShifts.insert(&I);

1331 }

1332

1333

1334

1335 ValueSeq Users(Cycled.begin(), Cycled.end());

1336 for (unsigned i = 0; i < Users.size(); ++i) {

1338 if (!isa(V->getType()))

1339 return false;

1340 auto *R = cast(V);

1341

1342

1343 if (!commutesWithShift(R))

1344 return false;

1345 for (User *U : R->users()) {

1346 auto *T = cast(U);

1347

1348

1349

1350 if (T->getParent() != LoopB || RShifts.count(T) || isa(T))

1351 continue;

1352

1354 if (!classifyInst(T, Early, Late))

1355 return false;

1356 }

1357 }

1358

1359 if (Users.empty())

1360 return false;

1361

1362

1364 ValueSeq Inputs;

1365 for (unsigned i = 0; i < Internal.size(); ++i) {

1366 auto *R = dyn_cast(Internal[i]);

1367 if (!R)

1368 continue;

1369 for (Value *Op : R->operands()) {

1370 auto *T = dyn_cast(Op);

1371 if (T && T->getParent() != LoopB)

1372 Inputs.insert(Op);

1373 else

1375 }

1376 }

1377 for (Value *V : Inputs)

1378 if (!highBitsAreZero(V, IterCount))

1379 return false;

1380 for (Value *V : Internal)

1381 if (!keepsHighBitsZero(V, IterCount))

1382 return false;

1383

1384

1386 std::map<Value*,Value*> ShiftMap;

1387

1388 using CastMapType = std::map<std::pair<Value *, Type *>, Value *>;

1389

1390 CastMapType CastMap;

1391

1394 auto H = CM.find(std::make_pair(V, Ty));

1395 if (H != CM.end())

1396 return H->second;

1397 Value *CV = IRB.CreateIntCast(V, Ty, false);

1398 CM.insert(std::make_pair(std::make_pair(V, Ty), CV));

1399 return CV;

1400 };

1401

1402 for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) {

1403 using namespace PatternMatch;

1404

1405 if (isa(I) || Users.count(&*I))

1406 continue;

1407

1408

1412 continue;

1413 }

1414

1415

1416 for (auto &J : I->operands()) {

1418 if (!isOperandShifted(&*I, Op))

1419 continue;

1421 continue;

1422

1423 if (isa(Op) && cast(Op)->isZero())

1424 continue;

1425

1426 auto F = ShiftMap.find(Op);

1427 Value *W = (F != ShiftMap.end()) ? F->second : nullptr;

1428 if (W == nullptr) {

1429 IRB.SetInsertPoint(&*I);

1430

1431

1432

1433 Value *ShAmt = CIV, *ShVal = Op;

1434 auto *VTy = cast(ShVal->getType());

1435 auto *ATy = cast(ShAmt->getType());

1436 if (Late.count(&*I))

1437 ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1));

1438

1439

1440 if (VTy != ATy) {

1441 if (VTy->getBitWidth() < ATy->getBitWidth())

1442 ShVal = upcast(CastMap, IRB, ShVal, ATy);

1443 else

1444 ShAmt = upcast(CastMap, IRB, ShAmt, VTy);

1445 }

1446

1447 W = IRB.CreateShl(ShVal, ShAmt);

1448 ShiftMap.insert(std::make_pair(Op, W));

1449 }

1450 I->replaceUsesOfWith(Op, W);

1451 }

1452 }

1453

1454

1455

1456

1457

1458

1460 for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) {

1461 if (!isa(P))

1462 break;

1463 auto *PN = cast(P);

1464 Value *U = PN->getIncomingValueForBlock(LoopB);

1465 if (Users.count(U))

1466 continue;

1467 Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount));

1469

1470

1471

1472

1473 cast(S)->replaceUsesOfWith(S, PN);

1474 }

1475

1476 return true;

1477}

1478

1479void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {

1480 for (auto &I : *LoopB)

1482 I.replaceAllUsesWith(SV);

1483

1486}

1487

1488unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) {

1489

1490

1491 std::array<char,32> Q, C;

1492

1493 for (unsigned i = 0; i < 32; ++i) {

1494 Q[i] = QP & 1;

1495 QP >>= 1;

1496 }

1498

1499

1500

1501

1502

1503

1504

1505

1506

1507

1508

1509 C[0] = 1;

1510 for (unsigned i = 1; i < 32; ++i) {

1511

1512

1513

1514

1515

1516

1517 unsigned T = 0;

1518 for (unsigned j = 0; j < i; ++j)

1519 T = T ^ (C[j] & Q[i-j]);

1520 C[i] = T;

1521 }

1522

1523 unsigned QV = 0;

1524 for (unsigned i = 0; i < 32; ++i)

1525 if (C[i])

1526 QV |= (1 << i);

1527

1528 return QV;

1529}

1530

1532 ParsedValues &PV) {

1534 Module *M = At->getParent()->getParent()->getParent();

1537

1538 Value *P = PV.P, *Q = PV.Q, *P0 = P;

1539 unsigned IC = PV.IterCount;

1540

1541 if (PV.M != nullptr)

1542 P0 = P = B.CreateXor(P, PV.M);

1543

1544

1546

1547 if (PV.IterCount != 32)

1548 P = B.CreateAnd(P, BMI);

1549

1550 if (PV.Inv) {

1551 auto *QI = dyn_cast(PV.Q);

1552 assert(QI && QI->getBitWidth() <= 32);

1553

1554

1555 unsigned M = (1 << PV.IterCount) - 1;

1556 unsigned Tmp = (QI->getZExtValue() | 1) & M;

1557 unsigned QV = getInverseMxN(Tmp) & M;

1558 auto *QVI = ConstantInt::get(QI->getType(), QV);

1559 P = B.CreateCall(PMF, {P, QVI});

1560 P = B.CreateTrunc(P, QI->getType());

1561 if (IC != 32)

1562 P = B.CreateAnd(P, BMI);

1563 }

1564

1565 Value *R = B.CreateCall(PMF, {P, Q});

1566

1567 if (PV.M != nullptr)

1568 R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false));

1569

1570 return R;

1571}

1572

1574 if (const auto *CI = dyn_cast(V))

1575 return CI->getValue().isNonNegative();

1576 const Instruction *I = dyn_cast(V);

1577 if (I)

1578 return false;

1579 switch (I->getOpcode()) {

1580 case Instruction::LShr:

1581 if (const auto SI = dyn_cast(I->getOperand(1)))

1582 return SI->getZExtValue() > 0;

1583 return false;

1584 case Instruction::Or:

1585 case Instruction::Xor:

1588 case Instruction::And:

1591 }

1592 return false;

1593}

1594

1595void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) {

1596 S.addRule("sink-zext",

1597

1599 if (I->getOpcode() != Instruction::ZExt)

1600 return nullptr;

1601 Instruction *T = dyn_cast(I->getOperand(0));

1603 return nullptr;

1604 switch (T->getOpcode()) {

1605 case Instruction::And:

1606 case Instruction::Or:

1607 case Instruction::Xor:

1608 break;

1609 default:

1610 return nullptr;

1611 }

1613 return B.CreateBinOp(cast(T)->getOpcode(),

1614 B.CreateZExt(T->getOperand(0), I->getType()),

1615 B.CreateZExt(T->getOperand(1), I->getType()));

1616 });

1617 S.addRule("xor/and -> and/xor",

1618

1620 if (I->getOpcode() != Instruction::Xor)

1621 return nullptr;

1622 Instruction *And0 = dyn_cast(I->getOperand(0));

1623 Instruction *And1 = dyn_cast(I->getOperand(1));

1624 if (!And0 || !And1)

1625 return nullptr;

1626 if (And0->getOpcode() != Instruction::And ||

1627 And1->getOpcode() != Instruction::And)

1628 return nullptr;

1629 if (And0->getOperand(1) != And1->getOperand(1))

1630 return nullptr;

1632 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)),

1633 And0->getOperand(1));

1634 });

1635 S.addRule("sink binop into select",

1636

1637

1640 if (!BO)

1641 return nullptr;

1645 Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue();

1647 return B.CreateSelect(Sel->getCondition(),

1648 B.CreateBinOp(Op, X, Z),

1649 B.CreateBinOp(Op, Y, Z));

1650 }

1654 Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue();

1655 return B.CreateSelect(Sel->getCondition(),

1656 B.CreateBinOp(Op, X, Y),

1657 B.CreateBinOp(Op, X, Z));

1658 }

1659 return nullptr;

1660 });

1661 S.addRule("fold select-select",

1662

1663

1665 SelectInst *Sel = dyn_cast(I);

1666 if (!Sel)

1667 return nullptr;

1671 if (Sel0->getCondition() == C)

1672 return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue());

1673 }

1675 if (Sel1->getCondition() == C)

1676 return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue());

1677 }

1678 return nullptr;

1679 });

1680 S.addRule("or-signbit -> xor-signbit",

1681

1683 if (I->getOpcode() != Instruction::Or)

1684 return nullptr;

1685 ConstantInt *Msb = dyn_cast(I->getOperand(1));

1687 return nullptr;

1689 return nullptr;

1691 });

1692 S.addRule("sink lshr into binop",

1693

1695 if (I->getOpcode() != Instruction::LShr)

1696 return nullptr;

1697 BinaryOperator *BitOp = dyn_cast(I->getOperand(0));

1698 if (!BitOp)

1699 return nullptr;

1701 case Instruction::And:

1702 case Instruction::Or:

1703 case Instruction::Xor:

1704 break;

1705 default:

1706 return nullptr;

1707 }

1709 Value *S = I->getOperand(1);

1710 return B.CreateBinOp(BitOp->getOpcode(),

1713 });

1714 S.addRule("expose bitop-const",

1715

1717 auto IsBitOp = [](unsigned Op) -> bool {

1718 switch (Op) {

1719 case Instruction::And:

1720 case Instruction::Or:

1721 case Instruction::Xor:

1722 return true;

1723 }

1724 return false;

1725 };

1727 if (!BitOp1 || !IsBitOp(BitOp1->getOpcode()))

1728 return nullptr;

1730 if (!BitOp2 || !IsBitOp(BitOp2->getOpcode()))

1731 return nullptr;

1734 if (!CA || !CB)

1735 return nullptr;

1738 return B.CreateBinOp(BitOp2->getOpcode(), X,

1739 B.CreateBinOp(BitOp1->getOpcode(), CA, CB));

1740 });

1741}

1742

1743void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) {

1744 S.addRule("(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a",

1746 if (I->getOpcode() != Instruction::And)

1747 return nullptr;

1748 Instruction *Xor = dyn_cast(I->getOperand(0));

1749 ConstantInt *C0 = dyn_cast(I->getOperand(1));

1750 if (Xor || !C0)

1751 return nullptr;

1752 if (Xor->getOpcode() != Instruction::Xor)

1753 return nullptr;

1754 Instruction *And0 = dyn_cast(Xor->getOperand(0));

1755 Instruction *And1 = dyn_cast(Xor->getOperand(1));

1756

1757 if (!And0 || And0->getOpcode() != Instruction::And)

1759 ConstantInt *C1 = dyn_cast(And0->getOperand(1));

1760 if (!C1)

1761 return nullptr;

1762 uint32_t V0 = C0->getZExtValue();

1763 uint32_t V1 = C1->getZExtValue();

1764 if (V0 != (V0 & V1))

1765 return nullptr;

1767 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1), C0);

1768 });

1769}

1770

1771bool PolynomialMultiplyRecognize::recognize() {

1772 LLVM_DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"

1773 << *CurLoop << '\n');

1774

1775

1776

1777

1778

1779 BasicBlock *LoopB = CurLoop->getHeader();

1781

1782 if (LoopB != CurLoop->getLoopLatch())

1783 return false;

1784 BasicBlock *ExitB = CurLoop->getExitBlock();

1785 if (ExitB == nullptr)

1786 return false;

1787 BasicBlock *EntryB = CurLoop->getLoopPreheader();

1788 if (EntryB == nullptr)

1789 return false;

1790

1791 unsigned IterCount = 0;

1792 const SCEV *CT = SE.getBackedgeTakenCount(CurLoop);

1793 if (isa(CT))

1794 return false;

1795 if (auto *CV = dyn_cast(CT))

1796 IterCount = CV->getValue()->getZExtValue() + 1;

1797

1798 Value *CIV = getCountIV(LoopB);

1799 if (CIV == nullptr)

1800 return false;

1801 ParsedValues PV;

1802 Simplifier PreSimp;

1803 PV.IterCount = IterCount;

1804 LLVM_DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCount

1805 << '\n');

1806

1807 setupPreSimplifier(PreSimp);

1808

1809

1810

1811

1812

1813

1814

1815 bool FoundPreScan = false;

1816 auto FeedsPHI = [LoopB](const Value *V) -> bool {

1817 for (const Value *U : V->users()) {

1818 if (const auto *P = dyn_cast(U))

1819 if (P->getParent() == LoopB)

1820 return true;

1821 }

1822 return false;

1823 };

1826 if (!SI || !FeedsPHI(SI))

1827 continue;

1828

1829 Simplifier::Context C(SI);

1830 Value *T = PreSimp.simplify(C);

1831 SelectInst *SelI = (T && isa(T)) ? cast(T) : SI;

1832 LLVM_DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n');

1833 if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) {

1834 FoundPreScan = true;

1835 if (SelI != SI) {

1836 Value *NewSel = C.materialize(LoopB, SI->getIterator());

1837 SI->replaceAllUsesWith(NewSel);

1839 }

1840 break;

1841 }

1842 }

1843

1844 if (!FoundPreScan) {

1845 LLVM_DEBUG(dbgs() << "Have not found candidates for pmpy\n");

1846 return false;

1847 }

1848

1849 if (!PV.Left) {

1850

1851

1852

1853

1854 if (!promoteTypes(LoopB, ExitB))

1855 return false;

1856

1857 Simplifier PostSimp;

1858 setupPostSimplifier(PostSimp);

1861 if (!SI || !FeedsPHI(SI))

1862 continue;

1863 Simplifier::Context C(SI);

1864 Value *T = PostSimp.simplify(C);

1865 SelectInst *SelI = dyn_cast_or_null(T);

1866 if (SelI != SI) {

1867 Value *NewSel = C.materialize(LoopB, SI->getIterator());

1868 SI->replaceAllUsesWith(NewSel);

1870 }

1871 break;

1872 }

1873

1874 if (!convertShiftsToLeft(LoopB, ExitB, IterCount))

1875 return false;

1876 cleanupLoopBody(LoopB);

1877 }

1878

1879

1880 bool FoundScan = false;

1882 SelectInst *SelI = dyn_cast(&In);

1883 if (!SelI)

1884 continue;

1885 LLVM_DEBUG(dbgs() << "scanSelect: " << *SelI << '\n');

1886 FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false);

1887 if (FoundScan)

1888 break;

1889 }

1891

1893 StringRef PP = (PV.M ? "(P+M)" : "P");

1894 if (!PV.Inv)

1895 dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";

1896 else

1897 dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + "

1898 << PP << "\n";

1899 dbgs() << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n";

1900 if (PV.M)

1901 dbgs() << " M:" << *PV.M << "\n";

1902 dbgs() << " Q:" << *PV.Q << "\n";

1903 dbgs() << " Iteration count:" << PV.IterCount << "\n";

1904 });

1905

1907 Value *PM = generate(At, PV);

1908 if (PM == nullptr)

1909 return false;

1910

1911 if (PM->getType() != PV.Res->getType())

1913

1915 PV.Res->eraseFromParent();

1916 return true;

1917}

1918

1919int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) {

1921 return SC->getAPInt().getSExtValue();

1922 return 0;

1923}

1924

1925bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) {

1926

1928 return false;

1929

1930 Value *StoredVal = SI->getValueOperand();

1931 Value *StorePtr = SI->getPointerOperand();

1932

1933

1934 uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());

1935 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)

1936 return false;

1937

1938

1939

1940

1941 auto *StoreEv = dyn_cast(SE->getSCEV(StorePtr));

1942 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())

1943 return false;

1944

1945

1946

1947 int Stride = getSCEVStride(StoreEv);

1948 if (Stride == 0)

1949 return false;

1950 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());

1951 if (StoreSize != unsigned(std::abs(Stride)))

1952 return false;

1953

1954

1955 LoadInst *LI = dyn_cast(SI->getValueOperand());

1957 return false;

1958

1959

1960

1961

1963 auto *LoadEv = dyn_cast(SE->getSCEV(LoadPtr));

1964 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())

1965 return false;

1966

1967

1968 if (StoreEv->getOperand(1) != LoadEv->getOperand(1))

1969 return false;

1970

1971

1972 return true;

1973}

1974

1975

1976

1977

1978static bool

1980 const SCEV *BECount, unsigned StoreSize,

1983

1984

1985

1987

1988

1989

1990 if (const SCEVConstant *BECst = dyn_cast(BECount))

1992 StoreSize);

1993

1994

1995

1996

1997

1999

2000 for (auto *B : L->blocks())

2001 for (auto &I : *B)

2002 if (Ignored.count(&I) == 0 &&

2004 return true;

2005

2006 return false;

2007}

2008

2009void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB,

2013 if (StoreInst *SI = dyn_cast(&I))

2014 if (isLegalStore(CurLoop, SI))

2016}

2017

2018bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop,

2021 "Expected only non-volatile stores, or Hexagon-specific memcpy"

2022 "to volatile destination.");

2023

2024 Value *StorePtr = SI->getPointerOperand();

2025 auto *StoreEv = cast(SE->getSCEV(StorePtr));

2026 unsigned Stride = getSCEVStride(StoreEv);

2027 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());

2028 if (Stride != StoreSize)

2029 return false;

2030

2031

2032

2033

2034 auto *LI = cast(SI->getValueOperand());

2035 auto *LoadEv = cast(SE->getSCEV(LI->getPointerOperand()));

2036

2037

2038

2039

2043 SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom");

2044

2045 Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace());

2046

2047

2048

2049

2050

2051

2052

2053 Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),

2054 Builder.getPtrTy(SI->getPointerAddressSpace()), ExpPt);

2055 Value *LoadBasePtr = nullptr;

2056

2057 bool Overlap = false;

2058 bool DestVolatile = SI->isVolatile();

2060

2061 if (DestVolatile) {

2062

2063

2064 if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) {

2065CleanupAndExit:

2066

2067 Expander.clear();

2068 if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {

2070 StoreBasePtr = nullptr;

2071 }

2072 if (LoadBasePtr) {

2074 LoadBasePtr = nullptr;

2075 }

2076 return false;

2077 }

2078 }

2079

2083 StoreSize, *AA, Ignore1)) {

2084

2087 BECount, StoreSize, *AA, Ignore1)) {

2088

2089 goto CleanupAndExit;

2090 }

2091

2092 Overlap = true;

2093 }

2094

2095 if (!Overlap) {

2097 goto CleanupAndExit;

2098 } else {

2099

2100

2102 if (Func->hasFnAttribute(Attribute::AlwaysInline))

2103 goto CleanupAndExit;

2104

2105

2106

2107

2108

2112 if (!coverLoop(CurLoop, Insts))

2113 goto CleanupAndExit;

2114

2116 goto CleanupAndExit;

2117 bool IsNested = CurLoop->getParentLoop() != nullptr;

2119 goto CleanupAndExit;

2120 }

2121

2122

2123

2124 LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),

2126

2130 StoreSize, *AA, Ignore2))

2131 goto CleanupAndExit;

2132

2133

2134 bool StridePos = getSCEVStride(LoadEv) >= 0;

2135

2136

2137 if (!StridePos && DestVolatile)

2138 goto CleanupAndExit;

2139

2140 bool RuntimeCheck = (Overlap || DestVolatile);

2141

2143 if (RuntimeCheck) {

2144

2147 if (ExitBlocks.size() != 1)

2148 goto CleanupAndExit;

2149 ExitB = ExitBlocks[0];

2150 }

2151

2152

2153

2155 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);

2157

2158 const SCEV *NumBytesS =

2159 SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);

2160 if (StoreSize != 1)

2161 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),

2163 Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);

2164 if (Instruction *In = dyn_cast(NumBytes))

2166 NumBytes = Simp;

2167

2169

2170 if (RuntimeCheck) {

2172 if (ConstantInt *CI = dyn_cast(NumBytes)) {

2173 uint64_t C = CI->getZExtValue();

2174 if (Threshold != 0 && C < Threshold)

2175 goto CleanupAndExit;

2177 goto CleanupAndExit;

2178 }

2179

2182 Loop *ParentL = LF->getLoopFor(Preheader);

2183 StringRef HeaderName = Header->getName();

2184

2185

2186

2188 Func, Header);

2189 if (ParentL)

2192 for (auto &In : *Header) {

2193 PHINode *PN = dyn_cast(&In);

2194 if (!PN)

2195 break;

2197 if (bx >= 0)

2199 }

2200 DT->addNewBlock(NewPreheader, Preheader);

2202

2203

2204

2205

2206

2207

2208

2209 Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy);

2210 Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy);

2211 Value *LowA = StridePos ? SA : LA;

2212 Value *HighA = StridePos ? LA : SA;

2213 Value *CmpA = Builder.CreateICmpULT(LowA, HighA);

2215

2216

2217

2218 Value *Dist = Builder.CreateSub(LowA, HighA);

2219 Value *CmpD = Builder.CreateICmpSLE(NumBytes, Dist);

2220 Value *CmpEither = Builder.CreateOr(Cond, CmpD);

2221 Cond = CmpEither;

2222

2223 if (Threshold != 0) {

2225 Value *Thr = ConstantInt::get(Ty, Threshold);

2226 Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes);

2227 Value *CmpBoth = Builder.CreateAnd(Cond, CmpB);

2228 Cond = CmpBoth;

2229 }

2231 Func, NewPreheader);

2232 if (ParentL)

2235 Builder.CreateCondBr(Cond, MemmoveB, NewPreheader);

2239

2243 if (!ExitD)

2244 break;

2245 }

2246

2247

2248

2249

2250 if (ExitD && DT->dominates(Preheader, ExitD)) {

2254 }

2255

2256

2258 CondBuilder.CreateBr(ExitB);

2259 CondBuilder.SetInsertPoint(MemmoveB->getTerminator());

2260

2261 if (DestVolatile) {

2263 Type *PtrTy = PointerType::get(Ctx, 0);

2268

2269 const SCEV *OneS = SE->getConstant(Int32Ty, 1);

2270 const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty);

2271 const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW);

2272 Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty,

2274 if (Instruction *In = dyn_cast(NumWords))

2276 NumWords = Simp;

2277

2278 NewCall = CondBuilder.CreateCall(Fn,

2279 {StoreBasePtr, LoadBasePtr, NumWords});

2280 } else {

2281 NewCall = CondBuilder.CreateMemMove(

2282 StoreBasePtr, SI->getAlign(), LoadBasePtr, LI->getAlign(), NumBytes);

2283 }

2284 } else {

2285 NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlign(), LoadBasePtr,

2287

2288

2290 }

2291

2293

2294 LLVM_DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ")

2295 << *NewCall << "\n"

2296 << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"

2297 << " from store ptr=" << *StoreEv << " at: " << *SI

2298 << "\n");

2299

2300 return true;

2301}

2302

2303

2304

2305

2306bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,

2309 for (auto *B : L->blocks())

2311

2313

2314

2315

2316

2317

2318 for (unsigned i = 0; i < Worklist.size(); ++i) {

2320 for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) {

2321 Instruction *OpI = dyn_cast(I);

2322 if (!OpI)

2323 continue;

2325 if (!LoopBlocks.count(PB))

2326 continue;

2327 Worklist.insert(OpI);

2328 }

2329 }

2330

2331

2332

2333

2334

2335 for (auto *B : L->blocks()) {

2336 for (auto &In : *B) {

2337 if (isa(In) || isa(In))

2338 continue;

2339 if (!Worklist.count(&In) && In.mayHaveSideEffects())

2340 return false;

2341 for (auto *K : In.users()) {

2342 Instruction *UseI = dyn_cast(K);

2343 if (!UseI)

2344 continue;

2346 if (LF->getLoopFor(UseB) != L)

2347 return false;

2348 }

2349 }

2350 }

2351

2352 return true;

2353}

2354

2355

2356

2357

2358bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,

2360

2361

2362

2363 auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool {

2365 };

2366 if (all\_of(ExitBlocks, DominatedByBB))

2367 return false;

2368

2369 bool MadeChange = false;

2370

2372 collectStores(CurLoop, BB, Stores);

2373

2374

2375 for (auto &SI : Stores)

2376 MadeChange |= processCopyingStore(CurLoop, SI, BECount);

2377

2378 return MadeChange;

2379}

2380

2381bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) {

2382 PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE);

2383 if (PMR.recognize())

2384 return true;

2385

2386 if (!HasMemcpy && !HasMemmove)

2387 return false;

2388

2389 const SCEV *BECount = SE->getBackedgeTakenCount(L);

2390 assert(!isa(BECount) &&

2391 "runOnCountableLoop() called on a loop without a predictable"

2392 "backedge-taken count");

2393

2395 L->getUniqueExitBlocks(ExitBlocks);

2396

2397 bool Changed = false;

2398

2399

2400 for (auto *BB : L->getBlocks()) {

2401

2402 if (LF->getLoopFor(BB) != L)

2403 continue;

2404 Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);

2405 }

2406

2407 return Changed;

2408}

2409

2410bool HexagonLoopIdiomRecognize::run(Loop *L) {

2411 const Module &M = *L->getHeader()->getParent()->getParent();

2413 return false;

2414

2415

2416

2417 if (L->getLoopPreheader())

2418 return false;

2419

2420

2421 StringRef Name = L->getHeader()->getParent()->getName();

2422 if (Name == "memset" || Name == "memcpy" || Name == "memmove")

2423 return false;

2424

2425 DL = &L->getHeader()->getDataLayout();

2426

2427 HasMemcpy = TLI->has(LibFunc_memcpy);

2428 HasMemmove = TLI->has(LibFunc_memmove);

2429

2430 if (SE->hasLoopInvariantBackedgeTakenCount(L))

2431 return runOnCountableLoop(L);

2432 return false;

2433}

2434

2435bool HexagonLoopIdiomRecognizeLegacyPass::runOnLoop(Loop *L,

2437 if (skipLoop(L))

2438 return false;

2439

2440 auto *AA = &getAnalysis().getAAResults();

2441 auto *DT = &getAnalysis().getDomTree();

2442 auto *LF = &getAnalysis().getLoopInfo();

2443 auto *TLI = &getAnalysis().getTLI(

2444 *L->getHeader()->getParent());

2445 auto *SE = &getAnalysis().getSE();

2446 return HexagonLoopIdiomRecognize(AA, DT, LF, TLI, SE).run(L);

2447}

2448

2450 return new HexagonLoopIdiomRecognizeLegacyPass();

2451}

2452

2457 return HexagonLoopIdiomRecognize(&AR.AA, &AR.DT, &AR.LI, &AR.TLI, &AR.SE)

2458 .run(&L)

2461}

This file implements a class to represent arbitrary precision integral constant values and operations...

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)

This file contains the simple types necessary to represent the attributes associated with functions a...

static const Function * getParent(const Value *V)

static void cleanup(BlockFrequencyInfoImplBase &BFI)

Clear all memory not needed downstream.

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

#define LLVM_ATTRIBUTE_USED

This file contains the declarations for the subclasses of Constant, which represent the different fla...

This file defines the DenseMap class.

static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")

static cl::opt< unsigned > SimplifyLimit("hlir-simplify-limit", cl::init(10000), cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR"))

hexagon loop Recognize Hexagon specific loop idioms

static cl::opt< bool > DisableMemcpyIdiom("disable-memcpy-idiom", cl::Hidden, cl::init(false), cl::desc("Disable generation of memcpy in loop idiom recognition"))

static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB)

static cl::opt< unsigned > RuntimeMemSizeThreshold("runtime-mem-idiom-threshold", cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime " "check guarding the memmove."))

static cl::opt< bool > HexagonVolatileMemcpy("disable-hexagon-volatile-memcpy", cl::Hidden, cl::init(false), cl::desc("Enable Hexagon-specific memcpy for volatile destination."))

static cl::opt< bool > DisableMemmoveIdiom("disable-memmove-idiom", cl::Hidden, cl::init(false), cl::desc("Disable generation of memmove in loop idiom recognition"))

static cl::opt< unsigned > CompileTimeMemSizeThreshold("compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64), cl::desc("Threshold (in bytes) to perform the transformation, if the " "runtime loop count (mem transfer size) is known at compile-time."))

static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, SmallPtrSetImpl< Instruction * > &Ignored)

mayLoopAccessLocation - Return true if the specified loop might access the specified pointer location...

static const char * HexagonVolatileMemcpyName

static bool hasZeroSignBit(const Value *V)

static cl::opt< bool > OnlyNonNestedMemmove("only-nonnested-memmove-idiom", cl::Hidden, cl::init(true), cl::desc("Only enable generating memmove in non-nested loops"))

Module.h This file contains the declarations for the Module class.

This header defines various interfaces for pass management in LLVM.

iv Induction Variable Users

static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)

Move duplicate certain instructions close to their use

This header provides classes for managing per-loop analyses.

This file provides utility analysis objects describing memory locations.

uint64_t IntrinsicInst * II

static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")

PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

const SmallVectorImpl< MachineOperand > & Cond

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

This file implements a set that has insertion order iteration characteristics.

This file defines the SmallPtrSet class.

This file defines the SmallSet class.

This file defines the SmallVector class.

static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)

Initialize the set of available library functions based on the specified target triple.

static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)

Returns the opcode of Values or ~0 if they do not all agree.

A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.

ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)

Check whether or not an instruction may read or write the optionally specified memory location.

bool isSignMask() const

Check if the APInt's value is returned by getSignMask.

static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)

Constructs an APInt value that has the bottom loBitsSet bits set.

A container for analyses that lazily runs them and caches their results.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequiredID(const void *ID)

AnalysisUsage & addRequired()

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

LLVM Basic Block Representation.

iterator begin()

Instruction iterator methods.

iterator_range< const_phi_iterator > phis() const

Returns a range that iterates over the phis in the basic block.

const_iterator getFirstInsertionPt() const

Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...

const Instruction * getFirstNonPHI() const

Returns a pointer to the first instruction in this block that is not a PHINode instruction.

static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)

Creates a new BasicBlock.

const BasicBlock * getSinglePredecessor() const

Return the predecessor of this block if it has a single predecessor block.

const Function * getParent() const

Return the enclosing method, or null if none.

InstListType::iterator iterator

Instruction iterators...

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

BinaryOps getOpcode() const

This class represents a function call, abstracting a target machine's calling convention.

This class is the base class for the comparison instructions.

An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...

This is the shared class of boolean and integer constants.

const APInt & getValue() const

Return the constant as an APInt value reference.

This class represents an Operation in the Expression.

A parsed version of the target data layout string in and methods for querying it.

void setIDom(DomTreeNodeBase *NewIDom)

void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)

changeImmediateDominator - This method is used to update the dominator tree information when a node's...

DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)

Add a new node to the dominator tree information.

DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const

getNode - return the (Post)DominatorTree node for the specified basic block.

Legacy analysis pass which computes a DominatorTree.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const

Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...

bool dominates(const BasicBlock *BB, const Use &U) const

Return true if the (end of the) basic block BB dominates the use U.

A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...

A possibly irreducible generalization of a Loop.

Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)

Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")

Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)

BranchInst * CreateBr(BasicBlock *Dest)

Create an unconditional 'br label X' instruction.

Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")

Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

void setDebugLoc(DebugLoc Loc)

Set the debug location information for this instruction.

Class to represent integer types.

static IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

unsigned getBitWidth() const

Get the number of bits in this IntegerType.

This is an important class for using LLVM in a threaded context.

This class provides an interface for updating the loop pass manager based on mutations to the loop ne...

An instruction for reading from memory.

unsigned getPointerAddressSpace() const

Returns the address space of the pointer operand.

Value * getPointerOperand()

Align getAlign() const

Return the alignment of the access that is being performed.

static LocationSize precise(uint64_t Value)

static constexpr LocationSize afterPointer()

Any location after the base pointer (but still within the underlying object).

BlockT * getHeader() const

void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)

This method is used by other analyses to update loop information.

BlockT * getLoopPreheader() const

If there is a preheader for this loop, return it.

void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const

Return all unique successor blocks of this loop.

LoopT * getParentLoop() const

Return the parent loop if it exists or nullptr for top level loops.

The legacy pass manager's analysis pass to compute loop information.

virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0

Represents a single loop in the control flow graph.

Representation for a specific memory location.

A Module instance is used to store all the information related to an LLVM module.

void setIncomingBlock(unsigned i, BasicBlock *BB)

int getBasicBlockIndex(const BasicBlock *BB) const

Return the first index of the specified basic block in the value list for this PHI.

PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

Pass interface - Implemented by all 'passes'.

virtual void getAnalysisUsage(AnalysisUsage &) const

getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...

virtual StringRef getPassName() const

getPassName - Return a nice clean name for a pass.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

This node represents a polynomial recurrence on the trip count of the specified loop.

This class represents a constant integer value.

This class uses information about analyze scalars to rewrite expressions in canonical form.

const SCEV * getOperand(unsigned i) const

This class represents an analyzed expression in the program.

Type * getType() const

Return the LLVM type of this SCEV expression.

The main scalar evolution driver.

This class represents the LLVM 'select' instruction.

const Value * getFalseValue() const

const Value * getCondition() const

const Value * getTrueValue() const

A vector that has set insertion semantics.

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...

size_type count(const T &V) const

count - Return 1 if the element is in the set, 0 otherwise.

std::pair< const_iterator, bool > insert(const T &V)

insert - Insert an element into the set if it isn't already there.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

An instruction for storing to memory.

StringRef - Represent a constant reference to a string, i.e.

Provides information about what library functions are available for the current target.

Triple - Helper class for working with autoconf configuration names.

ArchType getArch() const

Get the parsed architecture type of this triple.

This class represents a truncation of integer types.

The instances of the Type class are immutable: once they are created, they are never changed.

static Type * getVoidTy(LLVMContext &C)

static IntegerType * getInt32Ty(LLVMContext &C)

bool isVoidTy() const

Return true if this is 'void'.

A Use represents the edge between a Value definition and its users.

User * getUser() const

Returns the User that contains this Use.

Value * getOperand(unsigned i) const

unsigned getNumOperands() const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

user_iterator user_begin()

void setName(const Twine &Name)

Change the name of the value.

bool hasOneUse() const

Return true if there is exactly one use of this value.

void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

iterator_range< user_iterator > users()

StringRef getName() const

Return a constant reference to the value's name.

This class represents zero extension of integer types.

const ParentTy * getParent() const

self_iterator getIterator()

This class implements an extremely fast bulk output stream that can only output to a stream.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

@ C

The default llvm calling convention, compatible with C.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})

Look up the Function declaration of the intrinsic id in the Module M.

@ SC

CHAIN = SC CHAIN, Imm128 - System call.

BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)

BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)

bool match(Val *V, const Pattern &P)

specificval_ty m_Specific(const Value *V)

Match if we have a specific specified value.

CmpClass_match< LHS, RHS, ICmpInst, true > m_c_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)

Matches an ICmp with a predicate over LHS and RHS in either order.

cst_pred_ty< is_one > m_One()

Match an integer 1 or a vector with all elements equal to 1.

BinaryOp_match< LHS, RHS, Instruction::Xor, true > m_c_Xor(const LHS &L, const RHS &R)

Matches an Xor with LHS and RHS in either order.

CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)

Matches ZExt.

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)

CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)

BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)

is_zero m_Zero()

Match any null constant or a vector with all elements equal to 0.

initializer< Ty > init(const Ty &Val)

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

void link(std::unique_ptr< LinkGraph > G, std::unique_ptr< JITLinkContext > Ctx)

Link the given graph.

NodeAddr< FuncNode * > Func

This is an optimization pass for GlobalISel generic memory operations.

auto find(R &&Range, const T &Val)

Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())

If the specified value is a trivially dead instruction, delete it.

auto pred_end(const MachineBasicBlock *BB)

Pass * createHexagonLoopIdiomPass()

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)

See if we can compute a simplified version of this instruction.

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

bool isModOrRefSet(const ModRefInfo MRI)

ModRefInfo

Flags indicating whether a memory access modifies or references memory.

@ First

Helpers to iterate all locations in the MemoryEffectsBase class.

void replace(R &&Range, const T &OldValue, const T &NewValue)

Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.

@ Xor

Bitwise or logical XOR of integers.

@ And

Bitwise or logical AND of integers.

void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)

Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...

DWARFExpression::Operation Op

raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)

auto pred_begin(const MachineBasicBlock *BB)

void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &)

PreservedAnalyses getLoopPassPreservedAnalyses()

Returns the minimum set of Analyses that all loop passes must preserve.

auto predecessors(const MachineBasicBlock *BB)

bool equal(L &&LRange, R &&RRange)

Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)

The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...