LLVM: lib/Transforms/Vectorize/VectorCombine.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

38#include

39#include

40#include

41#include

42

43#define DEBUG_TYPE "vector-combine"

45

46using namespace llvm;

48

49STATISTIC(NumVecLoad, "Number of vector loads formed");

50STATISTIC(NumVecCmp, "Number of vector compares formed");

51STATISTIC(NumVecBO, "Number of vector binops formed");

52STATISTIC(NumVecCmpBO, "Number of vector compare + binop formed");

53STATISTIC(NumShufOfBitcast, "Number of shuffles moved after bitcast");

54STATISTIC(NumScalarOps, "Number of scalar unary + binary ops formed");

55STATISTIC(NumScalarCmp, "Number of scalar compares formed");

56STATISTIC(NumScalarIntrinsic, "Number of scalar intrinsic calls formed");

57

60 cl::desc("Disable all vector combine transforms"));

61

64 cl::desc("Disable binop extract to shuffle transforms"));

65

68 cl::desc("Max number of instructions to scan for vector combining."));

69

70static const unsigned InvalidIndex = std::numeric_limits::max();

71

72namespace {

73class VectorCombine {

74public:

78 bool TryEarlyFoldsOnly)

81 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}

82

83 bool run();

84

85private:

88 const TargetTransformInfo &TTI;

89 const DominatorTree &DT;

90 AAResults &AA;

91 AssumptionCache ∾

92 const DataLayout *DL;

94 const SimplifyQuery SQ;

95

96

97

98 bool TryEarlyFoldsOnly;

99

100 InstructionWorklist Worklist;

101

102

103

105

106

107

108

109 bool vectorizeLoadInsert(Instruction &I);

110 bool widenSubvectorLoad(Instruction &I);

111 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,

112 ExtractElementInst *Ext1,

113 unsigned PreferredExtractIndex) const;

114 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,

115 const Instruction &I,

116 ExtractElementInst *&ConvertToShuffle,

117 unsigned PreferredExtractIndex);

120 bool foldExtractExtract(Instruction &I);

121 bool foldInsExtFNeg(Instruction &I);

122 bool foldInsExtBinop(Instruction &I);

123 bool foldInsExtVectorToShuffle(Instruction &I);

124 bool foldBitOpOfCastops(Instruction &I);

125 bool foldBitOpOfCastConstant(Instruction &I);

126 bool foldBitcastShuffle(Instruction &I);

127 bool scalarizeOpOrCmp(Instruction &I);

128 bool scalarizeVPIntrinsic(Instruction &I);

129 bool foldExtractedCmps(Instruction &I);

130 bool foldBinopOfReductions(Instruction &I);

131 bool foldSingleElementStore(Instruction &I);

132 bool scalarizeLoad(Instruction &I);

133 bool scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy, Value *Ptr);

134 bool scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy, Value *Ptr);

135 bool scalarizeExtExtract(Instruction &I);

136 bool foldConcatOfBoolMasks(Instruction &I);

137 bool foldPermuteOfBinops(Instruction &I);

138 bool foldShuffleOfBinops(Instruction &I);

139 bool foldShuffleOfSelects(Instruction &I);

140 bool foldShuffleOfCastops(Instruction &I);

141 bool foldShuffleOfShuffles(Instruction &I);

142 bool foldPermuteOfIntrinsic(Instruction &I);

143 bool foldShuffleOfIntrinsics(Instruction &I);

144 bool foldShuffleToIdentity(Instruction &I);

145 bool foldShuffleFromReductions(Instruction &I);

146 bool foldShuffleChainsToReduce(Instruction &I);

147 bool foldCastFromReductions(Instruction &I);

148 bool foldSelectShuffle(Instruction &I, bool FromReduction = false);

149 bool foldInterleaveIntrinsics(Instruction &I);

150 bool shrinkType(Instruction &I);

151 bool shrinkLoadForShuffles(Instruction &I);

152 bool shrinkPhiOfShuffles(Instruction &I);

153

154 void replaceValue(Instruction &Old, Value &New, bool Erase = true) {

155 LLVM_DEBUG(dbgs() << "VC: Replacing: " << Old << '\n');

159 New.takeName(&Old);

160 Worklist.pushUsersToWorkList(*NewI);

161 Worklist.pushValue(NewI);

162 }

165 } else {

166 Worklist.push(&Old);

167 }

168 }

169

173 Worklist.remove(&I);

174 I.eraseFromParent();

175

176

177

178 SmallPtrSet<Value *, 4> Visited;

183 OpI, nullptr, nullptr, [&](Value *V) {

186 Worklist.remove(I);

187 if (I == NextInst)

188 NextInst = NextInst->getNextNode();

190 }

191 }))

192 continue;

193 Worklist.pushUsersToWorkList(*OpI);

194 Worklist.pushValue(OpI);

195 }

196 }

197 }

198 }

199};

200}

201

202

203

209

211

212

213

214 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||

215 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||

217 return false;

218

219

220

221 Type *ScalarTy = Load->getType()->getScalarType();

223 unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth();

224 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||

225 ScalarSize % 8 != 0)

226 return false;

227

228 return true;

229}

230

231bool VectorCombine::vectorizeLoadInsert(Instruction &I) {

232

233

237 return false;

238

239

242 if (!HasExtract)

244

247 return false;

248

252

253

254

255

256

257 Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts();

259

260 unsigned MinVecNumElts = MinVectorSize / ScalarSize;

261 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false);

262 unsigned OffsetEltIndex = 0;

263 Align Alignment = Load->getAlign();

265 &DT)) {

266

267

268

269

270 unsigned OffsetBitWidth = DL->getIndexTypeSizeInBits(SrcPtr->getType());

271 APInt Offset(OffsetBitWidth, 0);

273

274

275

276 if (Offset.isNegative())

277 return false;

278

279

280

281 uint64_t ScalarSizeInBytes = ScalarSize / 8;

282 if (Offset.urem(ScalarSizeInBytes) != 0)

283 return false;

284

285

286 OffsetEltIndex = Offset.udiv(ScalarSizeInBytes).getZExtValue();

287 if (OffsetEltIndex >= MinVecNumElts)

288 return false;

289

291 &DT))

292 return false;

293

294

295

296

298 }

299

300

301

303 Type *LoadTy = Load->getType();

304 unsigned AS = Load->getPointerAddressSpace();

308 OldCost +=

310 true, HasExtract, CostKind);

311

312

315

316

317

318

319

320

321

323 unsigned OutputNumElts = Ty->getNumElements();

325 assert(OffsetEltIndex < MinVecNumElts && "Address offset too big");

326 Mask[0] = OffsetEltIndex;

327 if (OffsetEltIndex)

330

331

332

333 if (OldCost < NewCost || !NewCost.isValid())

334 return false;

335

336

337

339 Value *CastedPtr =

343

344 replaceValue(I, *VecLd);

345 ++NumVecLoad;

346 return true;

347}

348

349

350

351

352bool VectorCombine::widenSubvectorLoad(Instruction &I) {

353

355 if (!Shuf->isIdentityWithPadding())

356 return false;

357

358

359 unsigned NumOpElts =

361 unsigned OpIndex = any_of(Shuf->getShuffleMask(), [&NumOpElts](int M) {

362 return M >= (int)(NumOpElts);

363 });

364

367 return false;

368

369

370

371

373 Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts();

375 Align Alignment = Load->getAlign();

377 return false;

378

380 Type *LoadTy = Load->getType();

381 unsigned AS = Load->getPointerAddressSpace();

382

383

384

385

386

389

390

393

394

395

396 if (OldCost < NewCost || !NewCost.isValid())

397 return false;

398

400 Value *CastedPtr =

403 replaceValue(I, *VecLd);

404 ++NumVecLoad;

405 return true;

406}

407

408

409

410ExtractElementInst *VectorCombine::getShuffleExtract(

411 ExtractElementInst *Ext0, ExtractElementInst *Ext1,

412 unsigned PreferredExtractIndex = InvalidIndex) const {

415 assert(Index0C && Index1C && "Expected constant extract indexes");

416

417 unsigned Index0 = Index0C->getZExtValue();

418 unsigned Index1 = Index1C->getZExtValue();

419

420

421 if (Index0 == Index1)

422 return nullptr;

423

430

431

433 return nullptr;

434

435

436

437

438 if (Cost0 > Cost1)

439 return Ext0;

440 if (Cost1 > Cost0)

441 return Ext1;

442

443

444

445 if (PreferredExtractIndex == Index0)

446 return Ext1;

447 if (PreferredExtractIndex == Index1)

448 return Ext0;

449

450

451 return Index0 > Index1 ? Ext0 : Ext1;

452}

453

454

455

456

457

458

459bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,

460 ExtractElementInst *Ext1,

461 const Instruction &I,

462 ExtractElementInst *&ConvertToShuffle,

463 unsigned PreferredExtractIndex) {

466 assert(Ext0IndexC && Ext1IndexC && "Expected constant extract indexes");

467

468 unsigned Opcode = I.getOpcode();

474

475

477 if (IsBinOp) {

480 } else {

481 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&

482 "Expected a compare");

488 }

489

490

491

492 unsigned Ext0Index = Ext0IndexC->getZExtValue();

493 unsigned Ext1Index = Ext1IndexC->getZExtValue();

494

499

500

501

502

503

504

505

506

507 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;

508 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;

509 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);

510

511

512

514 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {

515

516

517

518

519 bool HasUseTax = Ext0 == Ext1 ? !Ext0->hasNUses(2)

521 OldCost = CheapExtractCost + ScalarOpCost;

522 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;

523 } else {

524

525

526 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;

527 NewCost = VectorOpCost + CheapExtractCost +

528 !Ext0->hasOneUse() * Extract0Cost +

529 !Ext1->hasOneUse() * Extract1Cost;

530 }

531

532 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);

533 if (ConvertToShuffle) {

535 return true;

536

537

538

539

540

541

542

543

545 SmallVector ShuffleMask(FixedVecTy->getNumElements(),

547 ShuffleMask[BestInsIndex] = BestExtIndex;

549 VecTy, VecTy, ShuffleMask, CostKind, 0,

550 nullptr, {ConvertToShuffle});

551 } else {

553 VecTy, VecTy, {}, CostKind, 0, nullptr,

554 {ConvertToShuffle});

555 }

556 }

557

558

559

560

561 return OldCost < NewCost;

562}

563

564

565

568

569

570

573 ShufMask[NewIndex] = OldIndex;

574 return Builder.CreateShuffleVector(Vec, ShufMask, "shift");

575}

576

577

578

579

580

583

586 return nullptr;

587

588

589

593 return nullptr;

594

596 NewIndex, Builder);

597 return Shuf;

598}

599

600

601

602

604 Instruction &I) {

606

607

608

609 ++NumVecCmp;

613}

614

615

616

617

619 Instruction &I) {

621

622

623

624 ++NumVecBO;

626 V1, "foldExtExtBinop");

627

628

629

631 VecBOInst->copyIRFlags(&I);

632

634}

635

636

637bool VectorCombine::foldExtractExtract(Instruction &I) {

638

639

641 return false;

642

647 return false;

648

650 uint64_t C0, C1;

654 return false;

655

656

657

658

659

660

664 if (I.hasOneUse())

667

668 ExtractElementInst *ExtractToChange;

669 if (isExtractExtractCheap(Ext0, Ext1, I, ExtractToChange, InsertIndex))

670 return false;

671

674

675 if (ExtractToChange) {

676 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;

677 Value *NewExtOp =

679 if (!NewExtOp)

680 return false;

681 if (ExtractToChange == Ext0)

682 ExtOp0 = NewExtOp;

683 else

684 ExtOp1 = NewExtOp;

685 }

686

690 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex, I)

691 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex, I);

692 Worklist.push(Ext0);

693 Worklist.push(Ext1);

694 replaceValue(I, *NewExt);

695 return true;

696}

697

698

699

700bool VectorCombine::foldInsExtFNeg(Instruction &I) {

701

703 uint64_t ExtIdx, InsIdx;

707 return false;

708

709

715 return false;

716

718 auto *DstVecScalarTy = DstVecTy->getScalarType();

720 if (!SrcVecTy || DstVecScalarTy != SrcVecTy->getScalarType())

721 return false;

722

723

724

725 unsigned NumDstElts = DstVecTy->getNumElements();

726 unsigned NumSrcElts = SrcVecTy->getNumElements();

727 if (ExtIdx > NumSrcElts || InsIdx >= NumDstElts || NumDstElts == 1)

728 return false;

729

730

731

732

733 SmallVector Mask(NumDstElts);

734 std::iota(Mask.begin(), Mask.end(), 0);

735 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;

739

740

741

742

745

750

751 bool NeedLenChg = SrcVecTy->getNumElements() != NumDstElts;

752

753

754 SmallVector SrcMask;

755 if (NeedLenChg) {

757 SrcMask[ExtIdx % NumDstElts] = ExtIdx;

759 DstVecTy, SrcVecTy, SrcMask, CostKind);

760 }

761

762 LLVM_DEBUG(dbgs() << "Found an insertion of (extract)fneg : " << I

763 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost

764 << "\n");

765 if (NewCost > OldCost)

766 return false;

767

768 Value *NewShuf, *LenChgShuf = nullptr;

769

771 if (NeedLenChg) {

772

776 } else {

777

779 }

780

782 replaceValue(I, *NewShuf);

783 return true;

784}

785

786

787

788bool VectorCombine::foldInsExtBinop(Instruction &I) {

789 BinaryOperator *VecBinOp, *SclBinOp;

794 return false;

795

796

798 if (BinOpcode != SclBinOp->getOpcode())

799 return false;

800

802 if (!ResultTy)

803 return false;

804

805

806

807

819

820 LLVM_DEBUG(dbgs() << "Found an insertion of two binops: " << I

821 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost

822 << "\n");

823 if (NewCost > OldCost)

824 return false;

825

830 Value *NewBO = Builder.CreateBinOp(BinOpcode, NewIns0, NewIns1);

831

832

834 NewInst->copyIRFlags(VecBinOp);

835 NewInst->andIRFlags(SclBinOp);

836 }

837

840 replaceValue(I, *NewBO);

841 return true;

842}

843

844

845

846bool VectorCombine::foldBitOpOfCastops(Instruction &I) {

847

849 if (!BinOp || !BinOp->isBitwiseLogicOp())

850 return false;

851

852

855 if (!LHSCast || !RHSCast) {

856 LLVM_DEBUG(dbgs() << " One or both operands are not cast instructions\n");

857 return false;

858 }

859

860

862 if (CastOpcode != RHSCast->getOpcode())

863 return false;

864

865

866 switch (CastOpcode) {

867 case Instruction::BitCast:

868 case Instruction::Trunc:

869 case Instruction::SExt:

870 case Instruction::ZExt:

871 break;

872 default:

873 return false;

874 }

875

876 Value *LHSSrc = LHSCast->getOperand(0);

877 Value *RHSSrc = RHSCast->getOperand(0);

878

879

881 return false;

882

883 auto *SrcTy = LHSSrc->getType();

884 auto *DstTy = I.getType();

885

886

887 if (CastOpcode != Instruction::BitCast &&

889 return false;

890

891

892 if (!SrcTy->getScalarType()->isIntegerTy() ||

893 !DstTy->getScalarType()->isIntegerTy())

894 return false;

895

896

897

898

899

900

905

908 LHSCastCost + RHSCastCost;

909

910

913

916 GenericCastCost;

917

918

919 if (!LHSCast->hasOneUse())

920 NewCost += LHSCastCost;

921 if (!RHSCast->hasOneUse())

922 NewCost += RHSCastCost;

923

924 LLVM_DEBUG(dbgs() << "foldBitOpOfCastops: OldCost=" << OldCost

925 << " NewCost=" << NewCost << "\n");

926

927 if (NewCost > OldCost)

928 return false;

929

930

931 Value *NewOp = Builder.CreateBinOp(BinOp->getOpcode(), LHSSrc, RHSSrc,

932 BinOp->getName() + ".inner");

934 NewBinOp->copyIRFlags(BinOp);

935

937

938

940

941

944

945

947

948 replaceValue(I, *Result);

949 return true;

950}

951

952

953

954

955

956

957bool VectorCombine::foldBitOpOfCastConstant(Instruction &I) {

960

961

963 return false;

964

965

967 if (!LHSCast)

968 return false;

969

971

972

973 switch (CastOpcode) {

974 case Instruction::BitCast:

975 case Instruction::ZExt:

976 case Instruction::SExt:

977 case Instruction::Trunc:

978 break;

979 default:

980 return false;

981 }

982

983 Value *LHSSrc = LHSCast->getOperand(0);

984

985 auto *SrcTy = LHSSrc->getType();

986 auto *DstTy = I.getType();

987

988

989 if (CastOpcode != Instruction::BitCast &&

991 return false;

992

993

994 if (!SrcTy->getScalarType()->isIntegerTy() ||

995 !DstTy->getScalarType()->isIntegerTy())

996 return false;

997

998

999 PreservedCastFlags RHSFlags;

1001 if (!InvC)

1002 return false;

1003

1004

1005

1006

1007

1008

1011

1014

1015

1018

1021 GenericCastCost;

1022

1023

1024 if (!LHSCast->hasOneUse())

1025 NewCost += LHSCastCost;

1026

1027 LLVM_DEBUG(dbgs() << "foldBitOpOfCastConstant: OldCost=" << OldCost

1028 << " NewCost=" << NewCost << "\n");

1029

1030 if (NewCost > OldCost)

1031 return false;

1032

1033

1035 LHSSrc, InvC, I.getName() + ".inner");

1037 NewBinOp->copyIRFlags(&I);

1038

1040

1041

1043

1044

1045 if (RHSFlags.NNeg)

1047 if (RHSFlags.NUW)

1049 if (RHSFlags.NSW)

1051

1053

1054

1056

1057 replaceValue(I, *Result);

1058 return true;

1059}

1060

1061

1062

1063

1064bool VectorCombine::foldBitcastShuffle(Instruction &I) {

1066 ArrayRef Mask;

1069 return false;

1070

1071

1072

1073

1074

1075

1078 if (!DestTy || !SrcTy)

1079 return false;

1080

1081 unsigned DestEltSize = DestTy->getScalarSizeInBits();

1082 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();

1083 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)

1084 return false;

1085

1087

1088

1089

1090 if (!IsUnary) {

1093 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&

1094 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))

1095 return false;

1096 }

1097

1098 SmallVector<int, 16> NewMask;

1099 if (DestEltSize <= SrcEltSize) {

1100

1101

1102 assert(SrcEltSize % DestEltSize == 0 && "Unexpected shuffle mask");

1103 unsigned ScaleFactor = SrcEltSize / DestEltSize;

1105 } else {

1106

1107

1108 assert(DestEltSize % SrcEltSize == 0 && "Unexpected shuffle mask");

1109 unsigned ScaleFactor = DestEltSize / SrcEltSize;

1111 return false;

1112 }

1113

1114

1115

1116 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;

1117 auto *NewShuffleTy =

1119 auto *OldShuffleTy =

1121 unsigned NumOps = IsUnary ? 1 : 2;

1122

1123

1127

1131 TargetTransformInfo::CastContextHint::None,

1136 TargetTransformInfo::CastContextHint::None,

1138

1139 LLVM_DEBUG(dbgs() << "Found a bitcasted shuffle: " << I << "\n OldCost: "

1140 << OldCost << " vs NewCost: " << NewCost << "\n");

1141

1142 if (NewCost > OldCost || !NewCost.isValid())

1143 return false;

1144

1145

1146 ++NumShufOfBitcast;

1150 replaceValue(I, *Shuf);

1151 return true;

1152}

1153

1154

1155

1156

1157bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) {

1159 return false;

1163

1165 return false;

1166

1167

1168

1171 if (!ScalarOp0 || !ScalarOp1)

1172 return false;

1173

1174

1175

1176

1177

1178

1179 auto IsAllTrueMask = [](Value *MaskVal) {

1182 return ConstValue->isAllOnesValue();

1183 return false;

1184 };

1186 return false;

1187

1188

1191 return false;

1192

1193

1194

1196 SmallVector Mask;

1198 Mask.resize(FVTy->getNumElements(), 0);

1203

1204

1207 Args.push_back(V->getType());

1208 IntrinsicCostAttributes Attrs(IntrID, VecTy, Args);

1211

1212

1213 std::optional FunctionalOpcode =

1215 std::optionalIntrinsic::ID ScalarIntrID = std::nullopt;

1216 if (!FunctionalOpcode) {

1218 if (!ScalarIntrID)

1219 return false;

1220 }

1221

1222

1224 if (ScalarIntrID) {

1225 IntrinsicCostAttributes Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);

1227 } else {

1229 VecTy->getScalarType(), CostKind);

1230 }

1231

1232

1235 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;

1236

1237 LLVM_DEBUG(dbgs() << "Found a VP Intrinsic to scalarize: " << VPI

1238 << "\n");

1239 LLVM_DEBUG(dbgs() << "Cost of Intrinsic: " << OldCost

1240 << ", Cost of scalarizing:" << NewCost << "\n");

1241

1242

1243 if (OldCost < NewCost || !NewCost.isValid())

1244 return false;

1245

1246

1249

1250

1251

1252

1253

1254 bool SafeToSpeculate;

1255 if (ScalarIntrID)

1257 .hasAttribute(Attribute::AttrKind::Speculatable);

1258 else

1260 *FunctionalOpcode, &VPI, nullptr, &AC, &DT);

1261 if (!SafeToSpeculate &&

1263 return false;

1264

1265 Value *ScalarVal =

1266 ScalarIntrID

1267 ? Builder.CreateIntrinsic(VecTy->getScalarType(), *ScalarIntrID,

1268 {ScalarOp0, ScalarOp1})

1270 ScalarOp0, ScalarOp1);

1271

1273 return true;

1274}

1275

1276

1277

1278

1279bool VectorCombine::scalarizeOpOrCmp(Instruction &I) {

1284 if (!UO && !BO && !CI && II)

1285 return false;

1286

1287

1288 if (II) {

1290 return false;

1291 for (auto [Idx, Arg] : enumerate(II->args()))

1292 if (Arg->getType() != II->getType() &&

1294 return false;

1295 }

1296

1297

1298

1299

1300

1301 if (CI)

1302 for (User *U : I.users())

1304 return false;

1305

1306

1307

1309 std::optional<uint64_t> Index;

1310

1311 auto Ops = II ? II->args() : I.operands();

1315 uint64_t InsIdx = 0;

1318

1320 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)

1321 return false;

1322

1323

1324 if (!Index)

1326 else if (InsIdx != *Index)

1327 return false;

1331 OpNum, &TTI)) {

1337 } else {

1338 return false;

1339 }

1340 }

1341

1342

1343 if (Index.has_value())

1344 return false;

1345

1347 Type *ScalarTy = VecTy->getScalarType();

1348 assert(VecTy->isVectorTy() &&

1351 "Unexpected types for insert element into binop or cmp");

1352

1353 unsigned Opcode = I.getOpcode();

1355 if (CI) {

1361 } else if (UO || BO) {

1364 } else {

1365 IntrinsicCostAttributes ScalarICA(

1366 II->getIntrinsicID(), ScalarTy,

1369 IntrinsicCostAttributes VectorICA(

1370 II->getIntrinsicID(), VecTy,

1373 }

1374

1375

1376

1377 Value *NewVecC = nullptr;

1378 if (CI)

1379 NewVecC = simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);

1380 else if (UO)

1381 NewVecC =

1382 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);

1383 else if (BO)

1384 NewVecC = simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);

1385 else if (II)

1386 NewVecC = simplifyCall(II, II->getCalledOperand(), VecCs, SQ);

1387

1388 if (!NewVecC)

1389 return false;

1390

1391

1392

1397

1398 for (auto [Idx, Op, VecC, Scalar] : enumerate(Ops, VecCs, ScalarOps)) {

1400 II->getIntrinsicID(), Idx, &TTI)))

1401 continue;

1403 Instruction::InsertElement, VecTy, CostKind, *Index, VecC, Scalar);

1404 OldCost += InsertCost;

1405 NewCost += Op->hasOneUse() * InsertCost;

1406 }

1407

1408

1409 if (OldCost < NewCost || !NewCost.isValid())

1410 return false;

1411

1412

1413

1414 if (CI)

1415 ++NumScalarCmp;

1416 else if (UO || BO)

1417 ++NumScalarOps;

1418 else

1419 ++NumScalarIntrinsic;

1420

1421

1422 for (auto [OpIdx, Scalar, VecC] : enumerate(ScalarOps, VecCs))

1423 if (!Scalar)

1426

1428 if (CI)

1429 Scalar = Builder.CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);

1430 else if (UO || BO)

1432 else

1434

1435 Scalar->setName(I.getName() + ".scalar");

1436

1437

1438

1440 ScalarInst->copyIRFlags(&I);

1441

1443 replaceValue(I, *Insert);

1444 return true;

1445}

1446

1447

1448

1449

1450bool VectorCombine::foldExtractedCmps(Instruction &I) {

1452

1453

1454

1455 if (!BI || I.getType()->isIntegerTy(1))

1456 return false;

1457

1458

1459

1460 Value *B0 = I.getOperand(0), *B1 = I.getOperand(1);

1463 CmpPredicate P0, P1;

1466 return false;

1467

1469 if (!MatchingPred)

1470 return false;

1471

1472

1473

1475 uint64_t Index0, Index1;

1478 return false;

1479

1482 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1, CostKind);

1483 if (!ConvertToShuf)

1484 return false;

1485 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&

1486 "Unknown ExtractElementInst");

1487

1488

1489

1491 unsigned CmpOpcode =

1494 if (!VecTy)

1495 return false;

1496

1504

1506 Ext0Cost + Ext1Cost + CmpCost * 2 +

1508

1509

1510

1511

1512 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;

1513 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;

1518 ShufMask[CheapIndex] = ExpensiveIndex;

1523 NewCost += Ext0->hasOneUse() ? 0 : Ext0Cost;

1524 NewCost += Ext1->hasOneUse() ? 0 : Ext1Cost;

1525

1526

1527

1528

1529 if (OldCost < NewCost || !NewCost.isValid())

1530 return false;

1531

1532

1535 CmpC[Index0] = C0;

1536 CmpC[Index1] = C1;

1539 Value *LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;

1540 Value *RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;

1543 replaceValue(I, *NewExt);

1544 ++NumVecCmpBO;

1545 return true;

1546}

1547

1556 unsigned ReductionOpc =

1560 auto *ExtType = cast(RedOp->getOperand(0)->getType());

1561

1562 CostBeforeReduction =

1563 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, ExtType,

1565 CostAfterReduction =

1566 TTI.getExtendedReductionCost(ReductionOpc, IsUnsigned, II.getType(),

1568 return;

1569 }

1570 if (RedOp && II.getIntrinsicID() == Intrinsic::vector_reduce_add &&

1576 (Op0->getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {

1577

1581

1583 TTI.getCastInstrCost(Op0->getOpcode(), MulType, ExtType,

1586 TTI.getArithmeticInstrCost(Instruction::Mul, MulType, CostKind);

1588 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, MulType,

1590

1591 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;

1592 CostAfterReduction = TTI.getMulAccReductionCost(

1593 IsUnsigned, ReductionOpc, II.getType(), ExtType, CostKind);

1594 return;

1595 }

1596 CostAfterReduction = TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,

1598}

1599

1600bool VectorCombine::foldBinopOfReductions(Instruction &I) {

1603 if (BinOpOpc == Instruction::Sub)

1604 ReductionIID = Intrinsic::vector_reduce_add;

1606 return false;

1607

1608 auto checkIntrinsicAndGetItsArgument = [](Value *V,

1611 if (II)

1612 return nullptr;

1613 if (II->getIntrinsicID() == IID && II->hasOneUse())

1614 return II->getArgOperand(0);

1615 return nullptr;

1616 };

1617

1618 Value *V0 = checkIntrinsicAndGetItsArgument(I.getOperand(0), ReductionIID);

1619 if (!V0)

1620 return false;

1621 Value *V1 = checkIntrinsicAndGetItsArgument(I.getOperand(1), ReductionIID);

1622 if (!V1)

1623 return false;

1624

1626 if (V1->getType() != VTy)

1627 return false;

1630 unsigned ReductionOpc =

1632

1642 NewCost =

1643 CostOfRedOperand0 + CostOfRedOperand1 +

1646 if (NewCost >= OldCost || !NewCost.isValid())

1647 return false;

1648

1649 LLVM_DEBUG(dbgs() << "Found two mergeable reductions: " << I

1650 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost

1651 << "\n");

1652 Value *VectorBO;

1653 if (BinOpOpc == Instruction::Or)

1654 VectorBO = Builder.CreateOr(V0, V1, "",

1656 else

1657 VectorBO = Builder.CreateBinOp(BinOpOpc, V0, V1);

1658

1660 replaceValue(I, *Rdx);

1661 return true;

1662}

1663

1664

1668 unsigned NumScanned = 0;

1669 return std::any_of(Begin, End, [&](const Instruction &Instr) {

1670 return isModSet(AA.getModRefInfo(&Instr, Loc)) ||

1672 });

1673}

1674

1675namespace {

1676

1677

1678class ScalarizationResult {

1679 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };

1680

1681 StatusTy Status;

1682 Value *ToFreeze;

1683

1684 ScalarizationResult(StatusTy Status, Value *ToFreeze = nullptr)

1685 : Status(Status), ToFreeze(ToFreeze) {}

1686

1687public:

1688 ScalarizationResult(const ScalarizationResult &Other) = default;

1689 ~ScalarizationResult() {

1690 assert(!ToFreeze && "freeze() not called with ToFreeze being set");

1691 }

1692

1693 static ScalarizationResult unsafe() { return {StatusTy::Unsafe}; }

1694 static ScalarizationResult safe() { return {StatusTy::Safe}; }

1695 static ScalarizationResult safeWithFreeze(Value *ToFreeze) {

1696 return {StatusTy::SafeWithFreeze, ToFreeze};

1697 }

1698

1699

1700 bool isSafe() const { return Status == StatusTy::Safe; }

1701

1702 bool isUnsafe() const { return Status == StatusTy::Unsafe; }

1703

1704

1705 bool isSafeWithFreeze() const { return Status == StatusTy::SafeWithFreeze; }

1706

1707

1708 void discard() {

1709 ToFreeze = nullptr;

1710 Status = StatusTy::Unsafe;

1711 }

1712

1713

1714 void freeze(IRBuilderBase &Builder, Instruction &UserI) {

1715 assert(isSafeWithFreeze() &&

1716 "should only be used when freezing is required");

1718 "UserI must be a user of ToFreeze");

1719 IRBuilder<>::InsertPointGuard Guard(Builder);

1724 if (U.get() == ToFreeze)

1725 U.set(Frozen);

1726

1727 ToFreeze = nullptr;

1728 }

1729};

1730}

1731

1732

1733

1738

1739

1740

1741 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();

1743

1745 if (C->getValue().ult(NumElements))

1746 return ScalarizationResult::safe();

1747 return ScalarizationResult::unsafe();

1748 }

1749

1750

1752 return ScalarizationResult::unsafe();

1753

1754 APInt Zero(IntWidth, 0);

1755 APInt MaxElts(IntWidth, NumElements);

1758

1761 true, &AC, CtxI, &DT)))

1762 return ScalarizationResult::safe();

1763 return ScalarizationResult::unsafe();

1764 }

1765

1766

1767

1774 }

1775

1776 if (ValidIndices.contains(IdxRange))

1777 return ScalarizationResult::safeWithFreeze(IdxBase);

1778 return ScalarizationResult::unsafe();

1779}

1780

1781

1782

1783

1784

1790 C->getZExtValue() * DL.getTypeStoreSize(ScalarType));

1791 return commonAlignment(VectorAlignment, DL.getTypeStoreSize(ScalarType));

1792}

1793

1794

1795

1796

1797

1798

1799

1800

1801

1802bool VectorCombine::foldSingleElementStore(Instruction &I) {

1804 return false;

1806 if (SI->isSimple() || isa<VectorType>(SI->getValueOperand()->getType()))

1807 return false;

1808

1809

1810

1812 Value *NewElement;

1814 if (match(SI->getValueOperand(),

1817 return false;

1818

1821 Value *SrcAddr = Load->getPointerOperand()->stripPointerCasts();

1822

1823

1824 if (Load->isSimple() || Load->getParent() != SI->getParent() ||

1825 DL->typeSizeEqualsStoreSize(Load->getType()->getScalarType()) ||

1826 SrcAddr != SI->getPointerOperand()->stripPointerCasts())

1827 return false;

1828

1829 auto ScalarizableIdx = canScalarizeAccess(VecTy, Idx, Load, AC, DT);

1830 if (ScalarizableIdx.isUnsafe() ||

1833 return false;

1834

1835

1836

1837 Worklist.push(Load);

1838

1839 if (ScalarizableIdx.isSafeWithFreeze())

1842 SI->getValueOperand()->getType(), SI->getPointerOperand(),

1843 {ConstantInt::get(Idx->getType(), 0), Idx});

1844 StoreInst *NSI = Builder.CreateStore(NewElement, GEP);

1847 std::max(SI->getAlign(), Load->getAlign()), NewElement->getType(), Idx,

1848 *DL);

1850 replaceValue(I, *NSI);

1852 return true;

1853 }

1854

1855 return false;

1856}

1857

1858

1859

1860bool VectorCombine::scalarizeLoad(Instruction &I) {

1863 return false;

1864

1867 if (LI->isVolatile() || DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))

1868 return false;

1869

1870 bool AllExtracts = true;

1871 bool AllBitcasts = true;

1873 unsigned NumInstChecked = 0;

1874

1875

1876

1877

1878 for (User *U : LI->users()) {

1880 if (!UI || UI->getParent() != LI->getParent())

1881 return false;

1882

1883

1884

1885 if (UI->use_empty())

1886 return false;

1887

1889 AllExtracts = false;

1891 AllBitcasts = false;

1892

1893

1895 for (Instruction &I :

1896 make_range(std::next(LI->getIterator()), UI->getIterator())) {

1897

1898

1899 if (NumInstChecked == MaxInstrsToScan || I.mayWriteToMemory())

1900 return false;

1901 NumInstChecked++;

1902 }

1903 LastCheckedInst = UI;

1904 }

1905 }

1906

1907 if (AllExtracts)

1908 return scalarizeLoadExtract(LI, VecTy, Ptr);

1909 if (AllBitcasts)

1910 return scalarizeLoadBitcast(LI, VecTy, Ptr);

1911 return false;

1912}

1913

1914

1915bool VectorCombine::scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,

1918 return false;

1919

1920 DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;

1922

1923 for (auto &Pair : NeedFreeze)

1924 Pair.second.discard();

1925 });

1926

1931

1932 for (User *U : LI->users()) {

1934

1935 auto ScalarIdx =

1937 if (ScalarIdx.isUnsafe())

1938 return false;

1939 if (ScalarIdx.isSafeWithFreeze()) {

1940 NeedFreeze.try_emplace(UI, ScalarIdx);

1941 ScalarIdx.discard();

1942 }

1943

1945 OriginalCost +=

1947 Index ? Index->getZExtValue() : -1);

1948 ScalarizedCost +=

1952 nullptr, nullptr, CostKind);

1953 }

1954

1955 LLVM_DEBUG(dbgs() << "Found all extractions of a vector load: " << *LI

1956 << "\n LoadExtractCost: " << OriginalCost

1957 << " vs ScalarizedCost: " << ScalarizedCost << "\n");

1958

1959 if (ScalarizedCost >= OriginalCost)

1960 return false;

1961

1962

1963

1964 Worklist.push(LI);

1965

1966 Type *ElemType = VecTy->getElementType();

1967

1968

1969 for (User *U : LI->users()) {

1971 Value *Idx = EI->getIndexOperand();

1972

1973

1974 auto It = NeedFreeze.find(EI);

1975 if (It != NeedFreeze.end())

1977

1982 Builder.CreateLoad(ElemType, GEP, EI->getName() + ".scalar"));

1983

1984 Align ScalarOpAlignment =

1986 NewLoad->setAlignment(ScalarOpAlignment);

1987

1989 size_t Offset = ConstIdx->getZExtValue() * DL->getTypeStoreSize(ElemType);

1992 }

1993

1994 replaceValue(*EI, *NewLoad, false);

1995 }

1996

1997 FailureGuard.release();

1998 return true;

1999}

2000

2001

2002bool VectorCombine::scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,

2007

2008 Type *TargetScalarType = nullptr;

2009 unsigned VecBitWidth = DL->getTypeSizeInBits(VecTy);

2010

2011 for (User *U : LI->users()) {

2013

2014 Type *DestTy = BC->getDestTy();

2016 return false;

2017

2018 unsigned DestBitWidth = DL->getTypeSizeInBits(DestTy);

2019 if (DestBitWidth != VecBitWidth)

2020 return false;

2021

2022

2023 if (!TargetScalarType)

2024 TargetScalarType = DestTy;

2025 else if (TargetScalarType != DestTy)

2026 return false;

2027

2028 OriginalCost +=

2031 }

2032

2033 if (!TargetScalarType)

2034 return false;

2035

2036 assert(!LI->user_empty() && "Unexpected load without bitcast users");

2040

2041 LLVM_DEBUG(dbgs() << "Found vector load feeding only bitcasts: " << *LI

2042 << "\n OriginalCost: " << OriginalCost

2043 << " vs ScalarizedCost: " << ScalarizedCost << "\n");

2044

2045 if (ScalarizedCost >= OriginalCost)

2046 return false;

2047

2048

2049

2050 Worklist.push(LI);

2051

2053 auto *ScalarLoad =

2054 Builder.CreateLoad(TargetScalarType, Ptr, LI->getName() + ".scalar");

2056 ScalarLoad->copyMetadata(*LI);

2057

2058

2059 for (User *U : LI->users()) {

2061 replaceValue(*BC, *ScalarLoad, false);

2062 }

2063

2064 return true;

2065}

2066

2067bool VectorCombine::scalarizeExtExtract(Instruction &I) {

2069 return false;

2071 if (!Ext)

2072 return false;

2073

2074

2075

2076

2078 if (!SrcTy)

2079 return false;

2081

2082 Type *ScalarDstTy = DstTy->getElementType();

2083 if (DL->getTypeSizeInBits(SrcTy) != DL->getTypeSizeInBits(ScalarDstTy))

2084 return false;

2085

2089 unsigned ExtCnt = 0;

2090 bool ExtLane0 = false;

2091 for (User *U : Ext->users()) {

2092 uint64_t Idx;

2094 return false;

2096 continue;

2097 ExtCnt += 1;

2098 ExtLane0 |= !Idx;

2101 }

2102

2105 Instruction::And, ScalarDstTy, CostKind,

2108 (ExtCnt - ExtLane0) *

2110 Instruction::LShr, ScalarDstTy, CostKind,

2113 if (ScalarCost > VectorCost)

2114 return false;

2115

2116 Value *ScalarV = Ext->getOperand(0);

2118 &DT)) {

2119

2120

2121

2122

2123 SmallDenseSet<ConstantInt *, 8> ExtractedLanes;

2124 bool AllExtractsTriggerUB = true;

2125 ExtractElementInst *LastExtract = nullptr;

2126 BasicBlock *ExtBB = Ext->getParent();

2127 for (User *U : Ext->users()) {

2130 AllExtractsTriggerUB = false;

2131 break;

2132 }

2134 if (!LastExtract || LastExtract->comesBefore(Extract))

2135 LastExtract = Extract;

2136 }

2137 if (ExtractedLanes.size() != DstTy->getNumElements() ||

2138 !AllExtractsTriggerUB ||

2142 }

2144 ScalarV,

2145 IntegerType::get(SrcTy->getContext(), DL->getTypeSizeInBits(SrcTy)));

2146 uint64_t SrcEltSizeInBits = DL->getTypeSizeInBits(SrcTy->getElementType());

2147 uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1;

2148 uint64_t TotalBits = DL->getTypeSizeInBits(SrcTy);

2150 Value *Mask = ConstantInt::get(PackedTy, EltBitMask);

2151 for (User *U : Ext->users()) {

2153 uint64_t Idx =

2155 uint64_t ShiftAmt =

2156 DL->isBigEndian()

2157 ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)

2158 : (Idx * SrcEltSizeInBits);

2161 U->replaceAllUsesWith(And);

2162 }

2163 return true;

2164}

2165

2166

2167

2168

2169bool VectorCombine::foldConcatOfBoolMasks(Instruction &I) {

2170 Type *Ty = I.getType();

2172 return false;

2173

2174

2175 if (DL->isBigEndian())

2176 return false;

2177

2178

2181 return false;

2182

2183

2184

2186 uint64_t ShAmtX = 0;

2191 return false;

2192

2194 uint64_t ShAmtY = 0;

2199 return false;

2200

2201

2202 if (ShAmtX > ShAmtY) {

2206 }

2207

2208

2209

2210 uint64_t ShAmtDiff = ShAmtY - ShAmtX;

2211 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);

2214 if (!MaskTy || SrcX->getType() != SrcY->getType() ||

2215 !MaskTy->getElementType()->isIntegerTy(1) ||

2216 MaskTy->getNumElements() != ShAmtDiff ||

2217 MaskTy->getNumElements() > (BitWidth / 2))

2218 return false;

2219

2221 auto *ConcatIntTy =

2222 Type::getIntNTy(Ty->getContext(), ConcatTy->getNumElements());

2223 auto *MaskIntTy = Type::getIntNTy(Ty->getContext(), ShAmtDiff);

2224

2226 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);

2227

2228

2231 OldCost +=

2235 OldCost += 2 * TTI.getCastInstrCost(Instruction::BitCast, MaskIntTy, MaskTy,

2237

2240 MaskTy, ConcatMask, CostKind);

2241 NewCost += TTI.getCastInstrCost(Instruction::BitCast, ConcatIntTy, ConcatTy,

2243 if (Ty != ConcatIntTy)

2246 if (ShAmtX > 0)

2248

2249 LLVM_DEBUG(dbgs() << "Found a concatenation of bitcasted bool masks: " << I

2250 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost

2251 << "\n");

2252

2253 if (NewCost > OldCost)

2254 return false;

2255

2256

2257

2260

2262

2263 if (Ty != ConcatIntTy) {

2266 }

2267

2268 if (ShAmtX > 0) {

2271 }

2272

2273 replaceValue(I, *Result);

2274 return true;

2275}

2276

2277

2278

2279bool VectorCombine::foldPermuteOfBinops(Instruction &I) {

2280 BinaryOperator *BinOp;

2281 ArrayRef OuterMask;

2284 return false;

2285

2286

2288 return false;

2289

2290 Value *Op00, *Op01, *Op10, *Op11;

2291 ArrayRef Mask0, Mask1;

2292 bool Match0 =

2295 bool Match1 =

2298 if (!Match0 && !Match1)

2299 return false;

2300

2301 Op00 = Match0 ? Op00 : BinOp->getOperand(0);

2302 Op01 = Match0 ? Op01 : BinOp->getOperand(0);

2303 Op10 = Match1 ? Op10 : BinOp->getOperand(1);

2304 Op11 = Match1 ? Op11 : BinOp->getOperand(1);

2305

2311 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)

2312 return false;

2313

2314 unsigned NumSrcElts = BinOpTy->getNumElements();

2315

2316

2317

2319 any_of(OuterMask, [NumSrcElts](int M) { return M >= (int)NumSrcElts; }))

2320 return false;

2321

2322

2323 SmallVector NewMask0, NewMask1;

2324 for (int M : OuterMask) {

2325 if (M < 0 || M >= (int)NumSrcElts) {

2328 } else {

2329 NewMask0.push_back(Match0 ? Mask0[M] : M);

2330 NewMask1.push_back(Match1 ? Mask1[M] : M);

2331 }

2332 }

2333

2334 unsigned NumOpElts = Op0Ty->getNumElements();

2335 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&

2336 all_of(NewMask0, [NumOpElts](int M) { return M < (int)NumOpElts; }) &&

2338 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&

2339 all_of(NewMask1, [NumOpElts](int M) { return M < (int)NumOpElts; }) &&

2341

2342

2346 BinOpTy, OuterMask, CostKind, 0, nullptr, {BinOp}, &I);

2347 if (Match0)

2351 if (Match1)

2355

2358

2359 if (!IsIdentity0)

2360 NewCost +=

2362 Op0Ty, NewMask0, CostKind, 0, nullptr, {Op00, Op01});

2363 if (!IsIdentity1)

2364 NewCost +=

2366 Op1Ty, NewMask1, CostKind, 0, nullptr, {Op10, Op11});

2367

2368 LLVM_DEBUG(dbgs() << "Found a shuffle feeding a shuffled binop: " << I

2369 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost

2370 << "\n");

2371

2372

2373 if (NewCost > OldCost)

2374 return false;

2375

2381

2382

2384 NewInst->copyIRFlags(BinOp);

2385

2388 replaceValue(I, *NewBO);

2389 return true;

2390}

2391

2392

2393

2394bool VectorCombine::foldShuffleOfBinops(Instruction &I) {

2395 ArrayRef OldMask;

2399 return false;

2400

2401

2402 if (LHS->getOpcode() != RHS->getOpcode())

2403 return false;

2404

2406 bool IsCommutative = false;

2412

2414 return false;

2415 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());

2420 } else

2421 return false;

2422

2426 if (!ShuffleDstTy || !BinResTy || !BinOpTy || X->getType() != Z->getType())

2427 return false;

2428

2429 unsigned NumSrcElts = BinOpTy->getNumElements();

2430

2431

2432 if (IsCommutative && X != Z && Y != W && (X == W || Y == Z))

2434

2435 auto ConvertToUnary = [NumSrcElts](int &M) {

2436 if (M >= (int)NumSrcElts)

2437 M -= NumSrcElts;

2438 };

2439

2440 SmallVector NewMask0(OldMask);

2442 if (X == Z) {

2446 }

2447

2448 SmallVector NewMask1(OldMask);

2450 if (Y == W) {

2454 }

2455

2456

2461 BinResTy, OldMask, CostKind, 0, nullptr, {LHS, RHS},

2462 &I);

2463

2464

2465

2466

2467

2471 ArrayRef InnerMask;

2473 m_Mask(InnerMask)))) &&

2474 InnerOp->getType() == Op->getType() &&

2476 [NumSrcElts](int M) { return M < (int)NumSrcElts; })) {

2477 for (int &M : Mask)

2478 if (Offset <= M && M < (int)(Offset + NumSrcElts)) {

2481 }

2483 Op = InnerOp;

2484 return true;

2485 }

2486 return false;

2487 };

2488 bool ReducedInstCount = false;

2489 ReducedInstCount |= MergeInner(X, 0, NewMask0, CostKind);

2490 ReducedInstCount |= MergeInner(Y, 0, NewMask1, CostKind);

2491 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0, CostKind);

2492 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1, CostKind);

2493

2494 auto *ShuffleCmpTy =

2498 nullptr, {X, Z}) +

2500 nullptr, {Y, W});

2501

2503 NewCost +=

2505 } else {

2507 ShuffleDstTy, PredLHS, CostKind);

2508 }

2509

2510 LLVM_DEBUG(dbgs() << "Found a shuffle feeding two binops: " << I

2511 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost

2512 << "\n");

2513

2514

2515

2518 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))

2519 return false;

2520

2526 : Builder.CreateCmp(PredLHS, Shuf0, Shuf1);

2527

2528

2530 NewInst->copyIRFlags(LHS);

2531 NewInst->andIRFlags(RHS);

2532 }

2533

2536 replaceValue(I, *NewBO);

2537 return true;

2538}

2539

2540

2541

2542

2543bool VectorCombine::foldShuffleOfSelects(Instruction &I) {

2544 ArrayRef Mask;

2545 Value *C1, *T1, *F1, *C2, *T2, *F2;

2549 m_Mask(Mask))))

2550 return false;

2551

2554 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)

2555 return false;

2556

2559

2560 if (((SI0FOp == nullptr) != (SI1FOp == nullptr)) ||

2561 ((SI0FOp != nullptr) &&

2562 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))

2563 return false;

2564

2568 auto SelOp = Instruction::Select;

2573 OldCost +=

2575 {I.getOperand(0), I.getOperand(1)}, &I);

2576

2579 Mask, CostKind, 0, nullptr, {C1, C2});

2581 nullptr, {T1, T2});

2583 nullptr, {F1, F2});

2585 toVectorTy(Type::getInt1Ty(I.getContext()), DstVecTy->getNumElements()));

2588

2589 LLVM_DEBUG(dbgs() << "Found a shuffle feeding two selects: " << I

2590 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost

2591 << "\n");

2592 if (NewCost > OldCost)

2593 return false;

2594

2599

2600 if (SI0FOp)

2601 NewSel = Builder.CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,

2602 SI0FOp->getFastMathFlags());

2603 else

2604 NewSel = Builder.CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);

2605

2607 Worklist.pushValue(ShuffleTrue);

2608 Worklist.pushValue(ShuffleFalse);

2609 replaceValue(I, *NewSel);

2610 return true;

2611}

2612

2613

2614

2615bool VectorCombine::foldShuffleOfCastops(Instruction &I) {

2617 ArrayRef OldMask;

2619 return false;

2620

2621

2623

2626 if (!C0 || (IsBinaryShuffle && !C1))

2627 return false;

2628

2630

2631

2632

2633 if (!IsBinaryShuffle && Opcode == Instruction::BitCast)

2634 return false;

2635

2636 if (IsBinaryShuffle) {

2637 if (C0->getSrcTy() != C1->getSrcTy())

2638 return false;

2639

2640 if (Opcode != C1->getOpcode()) {

2642 Opcode = Instruction::SExt;

2643 else

2644 return false;

2645 }

2646 }

2647

2651 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)

2652 return false;

2653

2654 unsigned NumSrcElts = CastSrcTy->getNumElements();

2655 unsigned NumDstElts = CastDstTy->getNumElements();

2656 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&

2657 "Only bitcasts expected to alter src/dst element counts");

2658

2659

2660

2661 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&

2662 (NumDstElts % NumSrcElts) != 0)

2663 return false;

2664

2665 SmallVector<int, 16> NewMask;

2666 if (NumSrcElts >= NumDstElts) {

2667

2668

2669 assert(NumSrcElts % NumDstElts == 0 && "Unexpected shuffle mask");

2670 unsigned ScaleFactor = NumSrcElts / NumDstElts;

2672 } else {

2673

2674

2675 assert(NumDstElts % NumSrcElts == 0 && "Unexpected shuffle mask");

2676 unsigned ScaleFactor = NumDstElts / NumSrcElts;

2678 return false;

2679 }

2680

2681 auto *NewShuffleDstTy =

2683

2684

2688

2690 if (IsBinaryShuffle)

2692 else

2694

2696 OldCost += TTI.getShuffleCost(ShuffleKind, ShuffleDstTy, CastDstTy, OldMask,

2698

2700 CastSrcTy, NewMask, CostKind);

2704 NewCost += CostC0;

2705 if (IsBinaryShuffle) {

2709 OldCost += CostC1;

2711 NewCost += CostC1;

2712 }

2713

2714 LLVM_DEBUG(dbgs() << "Found a shuffle feeding two casts: " << I

2715 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost

2716 << "\n");

2717 if (NewCost > OldCost)

2718 return false;

2719

2721 if (IsBinaryShuffle)

2723 NewMask);

2724 else

2726

2727 Value *Cast = Builder.CreateCast(Opcode, Shuf, ShuffleDstTy);

2728

2729

2731 NewInst->copyIRFlags(C0);

2732 if (IsBinaryShuffle)

2733 NewInst->andIRFlags(C1);

2734 }

2735

2737 replaceValue(I, *Cast);

2738 return true;

2739}

2740

2741

2742

2743

2744

2745

2746

2747bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {

2748 ArrayRef OuterMask;

2749 Value *OuterV0, *OuterV1;

2752 return false;

2753

2754 ArrayRef InnerMask0, InnerMask1;

2755 Value *X0, *X1, *Y0, *Y1;

2756 bool Match0 =

2758 bool Match1 =

2760 if (!Match0 && !Match1)

2761 return false;

2762

2763

2764

2765 SmallVector<int, 16> PoisonMask1;

2767 X1 = X0;

2768 Y1 = Y0;

2770 InnerMask1 = PoisonMask1;

2771 Match1 = true;

2772 }

2773

2774 X0 = Match0 ? X0 : OuterV0;

2775 Y0 = Match0 ? Y0 : OuterV0;

2776 X1 = Match1 ? X1 : OuterV1;

2777 Y1 = Match1 ? Y1 : OuterV1;

2781 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||

2783 return false;

2784

2785 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();

2786 unsigned NumImmElts = ShuffleImmTy->getNumElements();

2787

2788

2789

2790

2791 SmallVector<int, 16> NewMask(OuterMask);

2792 Value *NewX = nullptr, *NewY = nullptr;

2793 for (int &M : NewMask) {

2794 Value *Src = nullptr;

2795 if (0 <= M && M < (int)NumImmElts) {

2796 Src = OuterV0;

2797 if (Match0) {

2798 M = InnerMask0[M];

2799 Src = M >= (int)NumSrcElts ? Y0 : X0;

2800 M = M >= (int)NumSrcElts ? (M - NumSrcElts) : M;

2801 }

2802 } else if (M >= (int)NumImmElts) {

2803 Src = OuterV1;

2804 M -= NumImmElts;

2805 if (Match1) {

2806 M = InnerMask1[M];

2807 Src = M >= (int)NumSrcElts ? Y1 : X1;

2808 M = M >= (int)NumSrcElts ? (M - NumSrcElts) : M;

2809 }

2810 }

2812 assert(0 <= M && M < (int)NumSrcElts && "Unexpected shuffle mask index");

2814

2815

2817 return false;

2819 continue;

2820 }

2821 if (!NewX || NewX == Src) {

2822 NewX = Src;

2823 continue;

2824 }

2825 if (!NewY || NewY == Src) {

2826 M += NumSrcElts;

2827 NewY = Src;

2828 continue;

2829 }

2830 return false;

2831 }

2832 }

2833

2834 if (!NewX)

2836 if (!NewY)

2838

2839

2841 replaceValue(I, *NewX);

2842 return true;

2843 }

2844

2845

2847 if (Match0)

2849

2851 if (Match1)

2853

2855

2856 InstructionCost OldCost = InnerCost0 + InnerCost1 + OuterCost;

2857

2858 bool IsUnary = all_of(NewMask, [&](int M) { return M < (int)NumSrcElts; });

2864 nullptr, {NewX, NewY});

2866 NewCost += InnerCost0;

2868 NewCost += InnerCost1;

2869

2870 LLVM_DEBUG(dbgs() << "Found a shuffle feeding two shuffles: " << I

2871 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost

2872 << "\n");

2873 if (NewCost > OldCost)

2874 return false;

2875

2877 replaceValue(I, *Shuf);

2878 return true;

2879}

2880

2881

2882

2883bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) {

2885 ArrayRef OldMask;

2887 m_Mask(OldMask))))

2888 return false;

2889

2892 if (!II0 || !II1)

2893 return false;

2894

2896 if (IID != II1->getIntrinsicID())

2897 return false;

2898

2901 if (!ShuffleDstTy || !II0Ty)

2902 return false;

2903

2905 return false;

2906

2907 for (unsigned I = 0, E = II0->arg_size(); I != E; ++I)

2909 II0->getArgOperand(I) != II1->getArgOperand(I))

2910 return false;

2911

2916 II0Ty, OldMask, CostKind, 0, nullptr, {II0, II1}, &I);

2917

2920 for (unsigned I = 0, E = II0->arg_size(); I != E; ++I) {

2922 NewArgsTy.push_back(II0->getArgOperand(I)->getType());

2923 } else {

2926 ShuffleDstTy->getNumElements());

2930 CostKind, 0, nullptr, {II0->getArgOperand(I), II1->getArgOperand(I)});

2931 }

2932 }

2933 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);

2935

2936 LLVM_DEBUG(dbgs() << "Found a shuffle feeding two intrinsics: " << I

2937 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost

2938 << "\n");

2939

2940 if (NewCost > OldCost)

2941 return false;

2942

2944 for (unsigned I = 0, E = II0->arg_size(); I != E; ++I)

2946 NewArgs.push_back(II0->getArgOperand(I));

2947 } else {

2949 II1->getArgOperand(I), OldMask);

2952 }

2954

2955

2957 NewInst->copyIRFlags(II0);

2958 NewInst->andIRFlags(II1);

2959 }

2960

2961 replaceValue(I, *NewIntrinsic);

2962 return true;

2963}

2964

2965

2966

2967bool VectorCombine::foldPermuteOfIntrinsic(Instruction &I) {

2969 ArrayRef Mask;

2971 return false;

2972

2974 if (!II0)

2975 return false;

2976

2979 if (!ShuffleDstTy || !IntrinsicSrcTy)

2980 return false;

2981

2982

2983 unsigned NumSrcElts = IntrinsicSrcTy->getNumElements();

2984 if (any_of(Mask, [NumSrcElts](int M) { return M >= (int)NumSrcElts; }))

2985 return false;

2986

2989 return false;

2990

2991

2995 IntrinsicSrcTy, Mask, CostKind, 0, nullptr, {V0}, &I);

2996

2999 for (unsigned I = 0, E = II0->arg_size(); I != E; ++I) {

3001 NewArgsTy.push_back(II0->getArgOperand(I)->getType());

3002 } else {

3005 ShuffleDstTy->getNumElements());

3008 ArgTy, VecTy, Mask, CostKind, 0, nullptr,

3009 {II0->getArgOperand(I)});

3010 }

3011 }

3012 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);

3014

3015 LLVM_DEBUG(dbgs() << "Found a permute of intrinsic: " << I << "\n OldCost: "

3016 << OldCost << " vs NewCost: " << NewCost << "\n");

3017

3018 if (NewCost > OldCost)

3019 return false;

3020

3021

3023 for (unsigned I = 0, E = II0->arg_size(); I != E; ++I) {

3025 NewArgs.push_back(II0->getArgOperand(I));

3026 } else {

3030 }

3031 }

3032

3034

3037

3038 replaceValue(I, *NewIntrinsic);

3039 return true;

3040}

3041

3043

3046 unsigned NumElts =

3048 int M = SV->getMaskValue(Lane);

3049 if (M < 0)

3051 if (static_cast<unsigned>(M) < NumElts) {

3052 U = &SV->getOperandUse(0);

3053 Lane = M;

3054 } else {

3055 U = &SV->getOperandUse(1);

3056 Lane = M - NumElts;

3057 }

3058 }

3060}

3061

3066 auto [U, Lane] = IL;

3069 Lane)

3072 }

3073 return NItem;

3074}

3075

3076

3080 unsigned NumElts = Ty->getNumElements();

3081 if (Item.size() == NumElts || NumElts == 1 || Item.size() % NumElts != 0)

3082 return false;

3083

3084

3085

3087 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);

3090 Ty, ConcatMask, CostKind) != 0)

3091 return false;

3092

3093 unsigned NumSlices = Item.size() / NumElts;

3094

3095

3097 return false;

3098 for (unsigned Slice = 0; Slice < NumSlices; ++Slice) {

3099 Use *SliceV = Item[Slice * NumElts].first;

3100 if (!SliceV || SliceV->get()->getType() != Ty)

3101 return false;

3102 for (unsigned Elt = 0; Elt < NumElts; ++Elt) {

3103 auto [V, Lane] = Item[Slice * NumElts + Elt];

3104 if (Lane != static_cast<int>(Elt) || SliceV->get() != V->get())

3105 return false;

3106 }

3107 }

3108 return true;

3109}

3110

3117 auto [FrontU, FrontLane] = Item.front();

3118

3119 if (IdentityLeafs.contains(FrontU)) {

3120 return FrontU->get();

3121 }

3122 if (SplatLeafs.contains(FrontU)) {

3124 return Builder.CreateShuffleVector(FrontU->get(), Mask);

3125 }

3126 if (ConcatLeafs.contains(FrontU)) {

3127 unsigned NumElts =

3130 for (unsigned S = 0; S < Values.size(); ++S)

3131 Values[S] = Item[S * NumElts].first->get();

3132

3133 while (Values.size() > 1) {

3134 NumElts *= 2;

3136 std::iota(Mask.begin(), Mask.end(), 0);

3138 for (unsigned S = 0; S < NewValues.size(); ++S)

3139 NewValues[S] =

3140 Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);

3141 Values = NewValues;

3142 }

3143 return Values[0];

3144 }

3145

3148 unsigned NumOps = I->getNumOperands() - (II ? 1 : 0);

3150 for (unsigned Idx = 0; Idx < NumOps; Idx++) {

3151 if (II &&

3153 Ops[Idx] = II->getOperand(Idx);

3154 continue;

3155 }

3157 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,

3158 Builder, TTI);

3159 }

3160

3162 for (const auto &Lane : Item)

3163 if (Lane.first)

3164 ValueList.push_back(Lane.first->get());

3165

3166 Type *DstTy =

3173 }

3175 auto *Value = Builder.CreateCmp(CI->getPredicate(), Ops[0], Ops[1]);

3178 }

3180 auto *Value = Builder.CreateSelect(Ops[0], Ops[1], Ops[2], "", SI);

3183 }

3185 auto *Value = Builder.CreateCast(CI->getOpcode(), Ops[0], DstTy);

3188 }

3189 if (II) {

3190 auto *Value = Builder.CreateIntrinsic(DstTy, II->getIntrinsicID(), Ops);

3193 }

3199}

3200

3201

3202

3203

3204bool VectorCombine::foldShuffleToIdentity(Instruction &I) {

3206 if (!Ty || I.use_empty())

3207 return false;

3208

3210 for (unsigned M = 0, E = Ty->getNumElements(); M < E; ++M)

3212

3215 SmallPtrSet<Use *, 4> IdentityLeafs, SplatLeafs, ConcatLeafs;

3216 unsigned NumVisited = 0;

3217

3218 while (!Worklist.empty()) {

3220 return false;

3221

3223 auto [FrontU, FrontLane] = Item.front();

3224

3225

3226 if (!FrontU)

3227 return false;

3228

3229

3231 return X->getType() == Y->getType() &&

3233 };

3234

3235

3236 if (FrontLane == 0 &&

3238 Ty->getNumElements() &&

3240 Value *FrontV = Item.front().first->get();

3241 return E.value().first || (IsEquiv(E.value().first->get(), FrontV) &&

3242 E.value().second == (int)E.index());

3243 })) {

3244 IdentityLeafs.insert(FrontU);

3245 continue;

3246 }

3247

3249 C && C->getSplatValue() &&

3251 Value *FrontV = Item.front().first->get();

3252 Use *U = IL.first;

3256 })) {

3257 SplatLeafs.insert(FrontU);

3258 continue;

3259 }

3260

3262 auto [FrontU, FrontLane] = Item.front();

3263 auto [U, Lane] = IL;

3264 return U || (U->get() == FrontU->get() && Lane == FrontLane);

3265 })) {

3266 SplatLeafs.insert(FrontU);

3267 continue;

3268 }

3269

3270

3271

3272 auto CheckLaneIsEquivalentToFirst = [Item](InstLane IL) {

3273 Value *FrontV = Item.front().first->get();

3274 if (!IL.first)

3275 return true;

3276 Value *V = IL.first->get();

3278 return false;

3279 if (V->getValueID() != FrontV->getValueID())

3280 return false;

3282 if (CI->getPredicate() != cast(FrontV)->getPredicate())

3283 return false;

3285 if (CI->getSrcTy()->getScalarType() !=

3286 cast(FrontV)->getSrcTy()->getScalarType())

3287 return false;

3290 SI->getOperand(0)->getType() !=

3292 return false;

3294 return false;

3297 II->getIntrinsicID() ==

3299 II->hasOperandBundles());

3300 };

3301 if (all_of(drop_begin(Item), CheckLaneIsEquivalentToFirst)) {

3302

3304

3306 BO && BO->isIntDivRem())

3307 return false;

3310 continue;

3311 } else if (isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,

3312 FPToUIInst, SIToFPInst, UIToFPInst>(FrontU)) {

3314 continue;

3316

3319 if (DstTy && SrcTy &&

3320 SrcTy->getNumElements() == DstTy->getNumElements()) {

3322 continue;

3323 }

3328 continue;

3331 II->hasOperandBundles()) {

3332 for (unsigned Op = 0, E = II->getNumOperands() - 1; Op < E; Op++) {

3334 &TTI)) {

3336 Value *FrontV = Item.front().first->get();

3337 Use *U = IL.first;

3340 }))

3341 return false;

3342 continue;

3343 }

3345 }

3346 continue;

3347 }

3348 }

3349

3351 ConcatLeafs.insert(FrontU);

3352 continue;

3353 }

3354

3355 return false;

3356 }

3357

3358 if (NumVisited <= 1)

3359 return false;

3360

3361 LLVM_DEBUG(dbgs() << "Found a superfluous identity shuffle: " << I << "\n");

3362

3363

3364

3367 ConcatLeafs, Builder, &TTI);

3368 replaceValue(I, *V);

3369 return true;

3370}

3371

3372

3373

3374

3375bool VectorCombine::foldShuffleFromReductions(Instruction &I) {

3377 if (II)

3378 return false;

3379 switch (II->getIntrinsicID()) {

3380 case Intrinsic::vector_reduce_add:

3381 case Intrinsic::vector_reduce_mul:

3382 case Intrinsic::vector_reduce_and:

3383 case Intrinsic::vector_reduce_or:

3384 case Intrinsic::vector_reduce_xor:

3385 case Intrinsic::vector_reduce_smin:

3386 case Intrinsic::vector_reduce_smax:

3387 case Intrinsic::vector_reduce_umin:

3388 case Intrinsic::vector_reduce_umax:

3389 break;

3390 default:

3391 return false;

3392 }

3393

3394

3395

3396

3397 std::queue<Value *> Worklist;

3398 SmallPtrSet<Value *, 4> Visited;

3399 ShuffleVectorInst *Shuffle = nullptr;

3401 Worklist.push(Op);

3402

3403 while (!Worklist.empty()) {

3404 Value *CV = Worklist.front();

3405 Worklist.pop();

3407 continue;

3408

3409

3411 continue;

3412

3414

3416 if (CI->isBinaryOp()) {

3417 for (auto *Op : CI->operand_values())

3418 Worklist.push(Op);

3419 continue;

3421 if (Shuffle && Shuffle != SV)

3422 return false;

3423 Shuffle = SV;

3424 continue;

3425 }

3426 }

3427

3428

3429 return false;

3430 }

3431

3432 if (!Shuffle)

3433 return false;

3434

3435

3436

3437

3438 for (auto *V : Visited)

3439 for (auto *U : V->users())

3440 if (!Visited.contains(U) && U != &I)

3441 return false;

3442

3443 FixedVectorType *VecType =

3445 if (!VecType)

3446 return false;

3447 FixedVectorType *ShuffleInputType =

3449 if (!ShuffleInputType)

3450 return false;

3451 unsigned NumInputElts = ShuffleInputType->getNumElements();

3452

3453

3454

3455 SmallVector ConcatMask;

3457 sort(ConcatMask, [](int X, int Y) { return (unsigned)X < (unsigned)Y; });

3458 bool UsesSecondVec =

3459 any_of(ConcatMask, [&](int M) { return M >= (int)NumInputElts; });

3460

3466 ShuffleInputType, ConcatMask, CostKind);

3467

3468 LLVM_DEBUG(dbgs() << "Found a reduction feeding from a shuffle: " << *Shuffle

3469 << "\n");

3470 LLVM_DEBUG(dbgs() << " OldCost: " << OldCost << " vs NewCost: " << NewCost

3471 << "\n");

3472 bool MadeChanges = false;

3473 if (NewCost < OldCost) {

3477 LLVM_DEBUG(dbgs() << "Created new shuffle: " << *NewShuffle << "\n");

3478 replaceValue(*Shuffle, *NewShuffle);

3479 return true;

3480 }

3481

3482

3483

3484 MadeChanges |= foldSelectShuffle(*Shuffle, true);

3485 return MadeChanges;

3486}

3487

3488

3489

3490

3491

3492

3493

3494

3495

3496

3497

3498

3499

3500

3501

3502

3503

3504

3505

3506

3507

3508

3509

3510

3511

3512

3513

3514

3515

3516

3517

3518

3519

3520

3521

3522

3523

3524

3525

3526

3527

3528

3529

3530bool VectorCombine::foldShuffleChainsToReduce(Instruction &I) {

3531

3532 std::queue<Value *> InstWorklist;

3534

3535

3536 std::optional CommonCallOp = std::nullopt;

3537 std::optionalInstruction::BinaryOps CommonBinOp = std::nullopt;

3538

3539 bool IsFirstCallOrBinInst = true;

3540 bool ShouldBeCallOrBinInst = true;

3541

3542

3543

3544

3545

3546 SmallVector<Value *, 2> PrevVecV(2, nullptr);

3547

3550 return false;

3551

3553 if (!FVT)

3554 return false;

3555

3556 int64_t VecSize = FVT->getNumElements();

3557 if (VecSize < 2)

3558 return false;

3559

3560

3561

3562 unsigned int NumLevels = Log2_64_Ceil(VecSize), VisitedCnt = 0;

3563 int64_t ShuffleMaskHalf = 1, ExpectedParityMask = 0;

3564

3565

3566

3567

3568

3569

3570

3571

3572

3573 for (int Cur = VecSize, Mask = NumLevels - 1; Cur > 1;

3574 Cur = (Cur + 1) / 2, --Mask) {

3575 if (Cur & 1)

3576 ExpectedParityMask |= (1ll << Mask);

3577 }

3578

3579 InstWorklist.push(VecOpEE);

3580

3581 while (!InstWorklist.empty()) {

3582 Value *CI = InstWorklist.front();

3583 InstWorklist.pop();

3584

3586 if (!ShouldBeCallOrBinInst)

3587 return false;

3588

3589 if (!IsFirstCallOrBinInst &&

3590 any_of(PrevVecV, [](Value *VecV) { return VecV == nullptr; }))

3591 return false;

3592

3593

3594

3595 if (II != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))

3596 return false;

3597 IsFirstCallOrBinInst = false;

3598

3599 if (!CommonCallOp)

3600 CommonCallOp = II->getIntrinsicID();

3601 if (II->getIntrinsicID() != *CommonCallOp)

3602 return false;

3603

3604 switch (II->getIntrinsicID()) {

3605 case Intrinsic::umin:

3606 case Intrinsic::umax:

3607 case Intrinsic::smin:

3608 case Intrinsic::smax: {

3609 auto *Op0 = II->getOperand(0);

3610 auto *Op1 = II->getOperand(1);

3611 PrevVecV[0] = Op0;

3612 PrevVecV[1] = Op1;

3613 break;

3614 }

3615 default:

3616 return false;

3617 }

3618 ShouldBeCallOrBinInst ^= 1;

3619

3620 IntrinsicCostAttributes ICA(

3621 *CommonCallOp, II->getType(),

3622 {PrevVecV[0]->getType(), PrevVecV[1]->getType()});

3624

3625

3626

3628 std::swap(PrevVecV[0], PrevVecV[1]);

3629 InstWorklist.push(PrevVecV[1]);

3630 InstWorklist.push(PrevVecV[0]);

3632

3633

3634 if (!ShouldBeCallOrBinInst)

3635 return false;

3636

3637 if (!IsFirstCallOrBinInst &&

3638 any_of(PrevVecV, [](Value *VecV) { return VecV == nullptr; }))

3639 return false;

3640

3641 if (BinOp != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))

3642 return false;

3643 IsFirstCallOrBinInst = false;

3644

3645 if (!CommonBinOp)

3646 CommonBinOp = BinOp->getOpcode();

3647

3648 if (BinOp->getOpcode() != *CommonBinOp)

3649 return false;

3650

3651 switch (*CommonBinOp) {

3652 case BinaryOperator::Add:

3653 case BinaryOperator::Mul:

3654 case BinaryOperator::Or:

3655 case BinaryOperator::And:

3656 case BinaryOperator::Xor: {

3659 PrevVecV[0] = Op0;

3660 PrevVecV[1] = Op1;

3661 break;

3662 }

3663 default:

3664 return false;

3665 }

3666 ShouldBeCallOrBinInst ^= 1;

3667

3668 OrigCost +=

3670

3672 std::swap(PrevVecV[0], PrevVecV[1]);

3673 InstWorklist.push(PrevVecV[1]);

3674 InstWorklist.push(PrevVecV[0]);

3676

3677

3678 if (ShouldBeCallOrBinInst ||

3679 any_of(PrevVecV, [](Value *VecV) { return VecV == nullptr; }))

3680 return false;

3681

3682 if (SVInst != PrevVecV[1])

3683 return false;

3684

3685 ArrayRef CurMask;

3687 m_Mask(CurMask))))

3688 return false;

3689

3690

3691 for (int Mask = 0, MaskSize = CurMask.size(); Mask != MaskSize; ++Mask) {

3692 if (Mask < ShuffleMaskHalf &&

3693 CurMask[Mask] != ShuffleMaskHalf + Mask - (ExpectedParityMask & 1))

3694 return false;

3695 if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)

3696 return false;

3697 }

3698

3699

3700 ShuffleMaskHalf *= 2;

3701 ShuffleMaskHalf -= (ExpectedParityMask & 1);

3702 ExpectedParityMask >>= 1;

3703

3705 SVInst->getType(), SVInst->getType(),

3707

3708 VisitedCnt += 1;

3709 if (!ExpectedParityMask && VisitedCnt == NumLevels)

3710 break;

3711

3712 ShouldBeCallOrBinInst ^= 1;

3713 } else {

3714 return false;

3715 }

3716 }

3717

3718

3719 if (ShouldBeCallOrBinInst)

3720 return false;

3721

3722 assert(VecSize != -1 && "Expected Match for Vector Size");

3723

3724 Value *FinalVecV = PrevVecV[0];

3725 if (!FinalVecV)

3726 return false;

3727

3729

3733 if (!ReducedOp)

3734 return false;

3735

3736 IntrinsicCostAttributes ICA(ReducedOp, FinalVecVTy, {FinalVecV});

3738

3739 if (NewCost >= OrigCost)

3740 return false;

3741

3742 auto *ReducedResult =

3744 replaceValue(I, *ReducedResult);

3745

3746 return true;

3747}

3748

3749

3750

3751

3752

3753bool VectorCombine::foldCastFromReductions(Instruction &I) {

3755 if (II)

3756 return false;

3757

3758 bool TruncOnly = false;

3760 switch (IID) {

3761 case Intrinsic::vector_reduce_add:

3762 case Intrinsic::vector_reduce_mul:

3763 TruncOnly = true;

3764 break;

3765 case Intrinsic::vector_reduce_and:

3766 case Intrinsic::vector_reduce_or:

3767 case Intrinsic::vector_reduce_xor:

3768 break;

3769 default:

3770 return false;

3771 }

3772

3774 Value *ReductionSrc = I.getOperand(0);

3775

3779 return false;

3780

3781 auto CastOpc =

3783

3786 Type *ResultTy = I.getType();

3787

3789 ReductionOpc, ReductionSrcTy, std::nullopt, CostKind);

3796 TTI.getCastInstrCost(CastOpc, ResultTy, ReductionSrcTy->getScalarType(),

3798

3799 if (OldCost <= NewCost || !NewCost.isValid())

3800 return false;

3801

3803 II->getIntrinsicID(), {Src});

3804 Value *NewCast = Builder.CreateCast(CastOpc, NewReduction, ResultTy);

3805 replaceValue(I, *NewCast);

3806 return true;

3807}

3808

3809

3810

3811

3812

3814 constexpr unsigned MaxVisited = 32;

3817 bool FoundReduction = false;

3818

3820 while (!WorkList.empty()) {

3822 for (User *U : I->users()) {

3824 if (!UI || !Visited.insert(UI).second)

3825 continue;

3826 if (Visited.size() > MaxVisited)

3827 return false;

3829

3830 if (FoundReduction)

3831 return false;

3832 switch (II->getIntrinsicID()) {

3833 case Intrinsic::vector_reduce_add:

3834 case Intrinsic::vector_reduce_mul:

3835 case Intrinsic::vector_reduce_and:

3836 case Intrinsic::vector_reduce_or:

3837 case Intrinsic::vector_reduce_xor:

3838 case Intrinsic::vector_reduce_smin:

3839 case Intrinsic::vector_reduce_smax:

3840 case Intrinsic::vector_reduce_umin:

3841 case Intrinsic::vector_reduce_umax:

3842 FoundReduction = true;

3843 continue;

3844 default:

3845 return false;

3846 }

3847 }

3848

3850 return false;

3851

3853 }

3854 }

3855 return FoundReduction;

3856}

3857

3858

3859

3860

3861

3862

3863

3864

3865

3866

3867

3868bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {

3873 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||

3875 return false;

3876

3881 SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});

3882 auto checkSVNonOpUses = [&](Instruction *I) {

3883 if (I || I->getOperand(0)->getType() != VT)

3884 return true;

3885 return any_of(I->users(), [&](User *U) {

3886 return U != Op0 && U != Op1 &&

3887 !(isa(U) &&

3888 (InputShuffles.contains(cast(U)) ||

3889 isInstructionTriviallyDead(cast(U))));

3890 });

3891 };

3892 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||

3893 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))

3894 return false;

3895

3896

3897

3898

3900 auto collectShuffles = [&](Instruction *I) {

3901 for (auto *U : I->users()) {

3903 if (!SV || SV->getType() != VT)

3904 return false;

3905 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||

3906 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))

3907 return false;

3910 }

3911 return true;

3912 };

3913 if (!collectShuffles(Op0) || !collectShuffles(Op1))

3914 return false;

3915

3916

3917 if (FromReduction && Shuffles.size() > 1)

3918 return false;

3919

3920

3921

3922 if (!FromReduction) {

3923 for (ShuffleVectorInst *SV : Shuffles) {

3924 for (auto *U : SV->users()) {

3927 Shuffles.push_back(SSV);

3928 }

3929 }

3930 }

3931

3932

3933

3934

3935

3936

3939 int MaxV1Elt = 0, MaxV2Elt = 0;

3940 unsigned NumElts = VT->getNumElements();

3941 for (ShuffleVectorInst *SVN : Shuffles) {

3942 SmallVector Mask;

3943 SVN->getShuffleMask(Mask);

3944

3945

3946

3947 Value *SVOp0 = SVN->getOperand(0);

3948 Value *SVOp1 = SVN->getOperand(1);

3953 for (int &Elem : Mask) {

3954 if (Elem >= static_cast<int>(SSV->getShuffleMask().size()))

3955 return false;

3956 Elem = Elem < 0 ? Elem : SSV->getMaskValue(Elem);

3957 }

3958 }

3959 if (SVOp0 == Op1 && SVOp1 == Op0) {

3962 }

3963 if (SVOp0 != Op0 || SVOp1 != Op1)

3964 return false;

3965

3966

3967

3968

3969 SmallVector ReconstructMask;

3970 for (unsigned I = 0; I < Mask.size(); I++) {

3971 if (Mask[I] < 0) {

3973 } else if (Mask[I] < static_cast<int>(NumElts)) {

3974 MaxV1Elt = std::max(MaxV1Elt, Mask[I]);

3975 auto It = find_if(V1, [&](const std::pair<int, int> &A) {

3976 return Mask[I] == A.first;

3977 });

3978 if (It != V1.end())

3980 else {

3983 }

3984 } else {

3985 MaxV2Elt = std::max(MaxV2Elt, Mask[I] - NumElts);

3986 auto It = find_if(V2, [&](const std::pair<int, int> &A) {

3987 return Mask[I] - static_cast<int>(NumElts) == A.first;

3988 });

3989 if (It != V2.end())

3990 ReconstructMask.push_back(NumElts + It - V2.begin());

3991 else {

3994 }

3995 }

3996 }

3997

3998

3999

4000 if (FromReduction)

4001 sort(ReconstructMask);

4002 OrigReconstructMasks.push_back(std::move(ReconstructMask));

4003 }

4004

4005

4006

4007

4008

4010 (MaxV1Elt == static_cast<int>(V1.size()) - 1 &&

4011 MaxV2Elt == static_cast<int>(V2.size()) - 1))

4012 return false;

4013

4014

4015

4016

4017 auto GetBaseMaskValue = [&](Instruction *I, int M) {

4019 if (!SV)

4020 return M;

4023 if (InputShuffles.contains(SSV))

4024 return SSV->getMaskValue(SV->getMaskValue(M));

4025 return SV->getMaskValue(M);

4026 };

4027

4028

4029

4030

4031

4032 auto SortBase = [&](Instruction *A, std::pair<int, int> X,

4033 std::pair<int, int> Y) {

4034 int MXA = GetBaseMaskValue(A, X.first);

4035 int MYA = GetBaseMaskValue(A, Y.first);

4036 return MXA < MYA;

4037 };

4038 stable_sort(V1, [&](std::pair<int, int> A, std::pair<int, int> B) {

4039 return SortBase(SVI0A, A, B);

4040 });

4041 stable_sort(V2, [&](std::pair<int, int> A, std::pair<int, int> B) {

4042 return SortBase(SVI1A, A, B);

4043 });

4044

4045

4047 for (const auto &Mask : OrigReconstructMasks) {

4048 SmallVector ReconstructMask;

4049 for (int M : Mask) {

4051 auto It = find_if(V, [M](auto A) { return A.second == M; });

4052 assert(It != V.end() && "Expected all entries in Mask");

4053 return std::distance(V.begin(), It);

4054 };

4055 if (M < 0)

4057 else if (M < static_cast<int>(NumElts)) {

4058 ReconstructMask.push_back(FindIndex(V1, M));

4059 } else {

4060 ReconstructMask.push_back(NumElts + FindIndex(V2, M));

4061 }

4062 }

4063 ReconstructMasks.push_back(std::move(ReconstructMask));

4064 }

4065

4066

4067

4068 SmallVector V1A, V1B, V2A, V2B;

4069 for (unsigned I = 0; I < V1.size(); I++) {

4070 V1A.push_back(GetBaseMaskValue(SVI0A, V1[I].first));

4071 V1B.push_back(GetBaseMaskValue(SVI0B, V1[I].first));

4072 }

4073 for (unsigned I = 0; I < V2.size(); I++) {

4074 V2A.push_back(GetBaseMaskValue(SVI1A, V2[I].first));

4075 V2B.push_back(GetBaseMaskValue(SVI1B, V2[I].first));

4076 }

4077 while (V1A.size() < NumElts) {

4080 }

4081 while (V2A.size() < NumElts) {

4084 }

4085

4088 if (!SV)

4089 return C;

4093 VT, VT, SV->getShuffleMask(), CostKind);

4094 };

4096 return C +

4098 };

4099

4100 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();

4101 unsigned MaxVectorSize =

4103 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;

4104 if (MaxElementsInVector == 0)

4105 return false;

4106

4107

4108

4109

4110

4111

4112

4113 std::set<SmallVector<int, 4>> UniqueShuffles;

4114 auto AddShuffleMaskAdjustedCost = [&](InstructionCost C, ArrayRef Mask) {

4115

4116 auto ShuffleCost =

4118 unsigned NumFullVectors = Mask.size() / MaxElementsInVector;

4119 if (NumFullVectors < 2)

4120 return C + ShuffleCost;

4121 SmallVector<int, 4> SubShuffle(MaxElementsInVector);

4122 unsigned NumUniqueGroups = 0;

4123 unsigned NumGroups = Mask.size() / MaxElementsInVector;

4124

4125

4126 for (unsigned I = 0; I < NumFullVectors; ++I) {

4127 for (unsigned J = 0; J < MaxElementsInVector; ++J)

4128 SubShuffle[J] = Mask[MaxElementsInVector * I + J];

4129 if (UniqueShuffles.insert(SubShuffle).second)

4130 NumUniqueGroups += 1;

4131 }

4132 return C + ShuffleCost * NumUniqueGroups / NumGroups;

4133 };

4136 if (!SV)

4137 return C;

4138 SmallVector<int, 16> Mask;

4139 SV->getShuffleMask(Mask);

4140 return AddShuffleMaskAdjustedCost(C, Mask);

4141 };

4142

4143 auto AllShufflesHaveSameOperands =

4144 [](SmallPtrSetImpl<Instruction *> &InputShuffles) {

4145 if (InputShuffles.size() < 2)

4146 return false;

4147 ShuffleVectorInst *FirstSV =

4149 if (!FirstSV)

4150 return false;

4151

4153 return std::all_of(

4154 std::next(InputShuffles.begin()), InputShuffles.end(),

4155 [&](Instruction *I) {

4156 ShuffleVectorInst *SV = dyn_cast(I);

4157 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;

4158 });

4159 };

4160

4161

4162

4166 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),

4168 if (AllShufflesHaveSameOperands(InputShuffles)) {

4169 UniqueShuffles.clear();

4170 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),

4172 } else {

4173 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),

4175 }

4176

4177

4178

4179 FixedVectorType *Op0SmallVT =

4181 FixedVectorType *Op1SmallVT =

4186 UniqueShuffles.clear();

4187 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),

4189 std::set<SmallVector> OutputShuffleMasks({V1A, V1B, V2A, V2B});

4190 CostAfter +=

4191 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),

4193

4194 LLVM_DEBUG(dbgs() << "Found a binop select shuffle pattern: " << I << "\n");

4196 << " vs CostAfter: " << CostAfter << "\n");

4197 if (CostBefore < CostAfter ||

4199 return false;

4200

4201

4204 if (!SV)

4205 return I;

4208 if (InputShuffles.contains(SSV))

4210 return SV->getOperand(Op);

4211 };

4212 Builder.SetInsertPoint(*SVI0A->getInsertionPointAfterDef());

4214 GetShuffleOperand(SVI0A, 1), V1A);

4215 Builder.SetInsertPoint(*SVI0B->getInsertionPointAfterDef());

4217 GetShuffleOperand(SVI0B, 1), V1B);

4218 Builder.SetInsertPoint(*SVI1A->getInsertionPointAfterDef());

4220 GetShuffleOperand(SVI1A, 1), V2A);

4221 Builder.SetInsertPoint(*SVI1B->getInsertionPointAfterDef());

4223 GetShuffleOperand(SVI1B, 1), V2B);

4226 NSV0A, NSV0B);

4228 I->copyIRFlags(Op0, true);

4231 NSV1A, NSV1B);

4233 I->copyIRFlags(Op1, true);

4234

4235 for (int S = 0, E = ReconstructMasks.size(); S != E; S++) {

4238 replaceValue(*Shuffles[S], *NSV, false);

4239 }

4240

4241 Worklist.pushValue(NSV0A);

4242 Worklist.pushValue(NSV0B);

4243 Worklist.pushValue(NSV1A);

4244 Worklist.pushValue(NSV1B);

4245 return true;

4246}

4247

4248

4249

4250

4251

4252

4253

4254bool VectorCombine::shrinkType(Instruction &I) {

4255 Value *ZExted, *OtherOperand;

4257 m_Value(OtherOperand))) &&

4259 return false;

4260

4261 Value *ZExtOperand = I.getOperand(I.getOperand(0) == OtherOperand ? 1 : 0);

4262

4265 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();

4266

4267 if (I.getOpcode() == Instruction::LShr) {

4268

4269

4272 return false;

4273 } else {

4274

4275

4278 return false;

4279 }

4280

4281

4282

4284 Instruction::ZExt, BigTy, SmallTy,

4285 TargetTransformInfo::CastContextHint::None, CostKind);

4288

4289

4290 for (User *U : ZExtOperand->users()) {

4292 if (UI == &I) {

4293 CurrentCost +=

4295 ShrinkCost +=

4297 ShrinkCost += ZExtCost;

4298 continue;

4299 }

4300

4302 return false;

4303

4304

4307 return false;

4308

4310 ShrinkCost +=

4312 ShrinkCost += ZExtCost;

4313 }

4314

4315

4316

4319 Instruction::Trunc, SmallTy, BigTy,

4320 TargetTransformInfo::CastContextHint::None, CostKind);

4321

4322

4323

4324

4325 if (ShrinkCost > CurrentCost)

4326 return false;

4327

4329 Value *Op0 = ZExted;

4331

4332 if (I.getOperand(0) == OtherOperand)

4334 Value *NewBinOp =

4339 replaceValue(I, *NewZExtr);

4340 return true;

4341}

4342

4343

4344

4345bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {

4346 Value *DstVec, *SrcVec;

4347 uint64_t ExtIdx, InsIdx;

4352 return false;

4353

4356

4357 if (!DstVecTy || !SrcVecTy ||

4358 SrcVecTy->getElementType() != DstVecTy->getElementType())

4359 return false;

4360

4361 unsigned NumDstElts = DstVecTy->getNumElements();

4362 unsigned NumSrcElts = SrcVecTy->getNumElements();

4363 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)

4364 return false;

4365

4366

4369

4370 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;

4371 bool IsExtIdxInBounds = ExtIdx < NumDstElts;

4373 if (NeedDstSrcSwap) {

4375 if (!IsExtIdxInBounds && NeedExpOrNarrow)

4376 Mask[InsIdx] = 0;

4377 else

4378 Mask[InsIdx] = ExtIdx;

4380 } else {

4382 std::iota(Mask.begin(), Mask.end(), 0);

4383 if (!IsExtIdxInBounds && NeedExpOrNarrow)

4384 Mask[InsIdx] = NumDstElts;

4385 else

4386 Mask[InsIdx] = ExtIdx + NumDstElts;

4387 }

4388

4389

4397

4399 SmallVector ExtToVecMask;

4400 if (!NeedExpOrNarrow) {

4401

4402

4405 nullptr, {DstVec, SrcVec});

4406 } else {

4407

4408

4409

4411 if (IsExtIdxInBounds)

4412 ExtToVecMask[ExtIdx] = ExtIdx;

4413 else

4414 ExtToVecMask[0] = ExtIdx;

4415

4417 DstVecTy, SrcVecTy, ExtToVecMask, CostKind);

4419 }

4420

4421 if (!Ext->hasOneUse())

4422 NewCost += ExtCost;

4423

4424 LLVM_DEBUG(dbgs() << "Found a insert/extract shuffle-like pair: " << I

4425 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost

4426 << "\n");

4427

4428 if (OldCost < NewCost)

4429 return false;

4430

4431 if (NeedExpOrNarrow) {

4432 if (!NeedDstSrcSwap)

4434 else

4436 }

4437

4438

4442 }

4443

4445 replaceValue(I, *Shuf);

4446

4447 return true;

4448}

4449

4450

4451

4452

4453

4454bool VectorCombine::foldInterleaveIntrinsics(Instruction &I) {

4455 const APInt *SplatVal0, *SplatVal1;

4458 return false;

4459

4460 LLVM_DEBUG(dbgs() << "VC: Folding interleave2 with two splats: " << I

4461 << "\n");

4462

4463 auto *VTy =

4465 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);

4466 unsigned Width = VTy->getElementType()->getIntegerBitWidth();

4467

4468

4469

4470

4471

4475 LLVM_DEBUG(dbgs() << "VC: The cost to cast from " << *ExtVTy << " to "

4476 << *I.getType() << " is too high.\n");

4477 return false;

4478 }

4479

4480 APInt NewSplatVal = SplatVal1->zext(Width * 2);

4481 NewSplatVal <<= Width;

4482 NewSplatVal |= SplatVal0->zext(Width * 2);

4484 ExtVTy->getElementCount(), ConstantInt::get(F.getContext(), NewSplatVal));

4485

4487 replaceValue(I, *Builder.CreateBitCast(NewSplat, I.getType()));

4488 return true;

4489}

4490

4491

4492bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {

4494 if (!OldLoad || !OldLoad->isSimple())

4495 return false;

4496

4498 if (!OldLoadTy)

4499 return false;

4500

4501 unsigned const OldNumElements = OldLoadTy->getNumElements();

4502

4503

4504

4505

4506

4507 using IndexRange = std::pair<int, int>;

4508 auto GetIndexRangeInShuffles = [&]() -> std::optional {

4509 IndexRange OutputRange = IndexRange(OldNumElements, -1);

4510 for (llvm::Use &Use : I.uses()) {

4511

4512 User *Shuffle = Use.getUser();

4513 ArrayRef Mask;

4514

4515 if (match(Shuffle,

4517 return std::nullopt;

4518

4519

4521 continue;

4522

4523

4524 for (int Index : Mask) {

4525 if (Index >= 0 && Index < static_cast<int>(OldNumElements)) {

4526 OutputRange.first = std::min(Index, OutputRange.first);

4527 OutputRange.second = std::max(Index, OutputRange.second);

4528 }

4529 }

4530 }

4531

4532 if (OutputRange.second < OutputRange.first)

4533 return std::nullopt;

4534

4535 return OutputRange;

4536 };

4537

4538

4539 if (std::optional Indices = GetIndexRangeInShuffles()) {

4540 unsigned const NewNumElements = Indices->second + 1u;

4541

4542

4543

4544 if (NewNumElements < OldNumElements) {

4547

4548

4549 Type *ElemTy = OldLoadTy->getElementType();

4551 Value *PtrOp = OldLoad->getPointerOperand();

4552

4554 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),

4555 OldLoad->getPointerAddressSpace(), CostKind);

4557 TTI.getMemoryOpCost(Instruction::Load, NewLoadTy, OldLoad->getAlign(),

4558 OldLoad->getPointerAddressSpace(), CostKind);

4559

4560 using UseEntry = std::pair<ShuffleVectorInst *, std::vector>;

4562 unsigned const MaxIndex = NewNumElements * 2u;

4563

4564 for (llvm::Use &Use : I.uses()) {

4566 ArrayRef OldMask = Shuffle->getShuffleMask();

4567

4568

4569 NewUses.push_back({Shuffle, OldMask});

4570

4571

4572 for (int Index : OldMask) {

4573 if (Index >= static_cast<int>(MaxIndex))

4574 return false;

4575 }

4576

4577

4578 OldCost +=

4580 OldLoadTy, OldMask, CostKind);

4581 NewCost +=

4583 NewLoadTy, OldMask, CostKind);

4584 }

4585

4587 dbgs() << "Found a load used only by shufflevector instructions: "

4588 << I << "\n OldCost: " << OldCost

4589 << " vs NewCost: " << NewCost << "\n");

4590

4591 if (OldCost < NewCost || !NewCost.isValid())

4592 return false;

4593

4594

4596 Builder.CreateAlignedLoad(NewLoadTy, PtrOp, OldLoad->getAlign()));

4597 NewLoad->copyMetadata(I);

4598

4599

4600 for (UseEntry &Use : NewUses) {

4601 ShuffleVectorInst *Shuffle = Use.first;

4602 std::vector &NewMask = Use.second;

4603

4608

4609 replaceValue(*Shuffle, *NewShuffle, false);

4610 }

4611

4612 return true;

4613 }

4614 }

4615 return false;

4616}

4617

4618

4619

4620

4621

4622bool VectorCombine::shrinkPhiOfShuffles(Instruction &I) {

4624 if (!Phi || Phi->getNumIncomingValues() != 2u)

4625 return false;

4626

4628 ArrayRef Mask0;

4629 ArrayRef Mask1;

4630

4631 if (match(Phi->getOperand(0u),

4635 return false;

4636

4638

4639

4642 auto const InputNumElements = InputVT->getNumElements();

4643

4644 if (InputNumElements >= ResultVT->getNumElements())

4645 return false;

4646

4647

4648

4649 SmallVector<int, 16> NewMask;

4651

4652 for (auto [M0, M1] : zip(Mask0, Mask1)) {

4653 if (M0 >= 0 && M1 >= 0)

4655 else if (M0 == -1 && M1 == -1)

4656 continue;

4657 else

4658 return false;

4659 }

4660

4661

4662

4663

4665 return false;

4666

4667

4668 int MaskOffset = NewMask[0u];

4669 unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;

4670 NewMask.clear();

4671

4672 for (unsigned I = 0u; I < InputNumElements; ++I) {

4674 Index = (Index + 1u) % InputNumElements;

4675 }

4676

4677

4679 auto OldCost =

4684

4685 LLVM_DEBUG(dbgs() << "Found a phi of mergeable shuffles: " << I

4686 << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost

4687 << "\n");

4688

4689 if (NewCost > OldCost)

4690 return false;

4691

4692

4698

4701 auto *NewPhi = Builder.CreatePHI(NewShuf0->getType(), 2u);

4702 NewPhi->addIncoming(NewShuf0, Phi->getIncomingBlock(0u));

4703 NewPhi->addIncoming(Op, Phi->getIncomingBlock(1u));

4704

4705 Builder.SetInsertPoint(*NewPhi->getInsertionPointAfterDef());

4708

4709 replaceValue(*Phi, *NewShuf1);

4710 return true;

4711}

4712

4713

4714

4715bool VectorCombine::run() {

4717 return false;

4718

4719

4721 return false;

4722

4723 LLVM_DEBUG(dbgs() << "\n\nVECTORCOMBINE on " << F.getName() << "\n");

4724

4729 auto Opcode = I.getOpcode();

4730

4732

4733

4734

4735

4736

4737 if (IsFixedVectorType) {

4738 switch (Opcode) {

4739 case Instruction::InsertElement:

4740 if (vectorizeLoadInsert(I))

4741 return true;

4742 break;

4743 case Instruction::ShuffleVector:

4744 if (widenSubvectorLoad(I))

4745 return true;

4746 break;

4747 default:

4748 break;

4749 }

4750 }

4751

4752

4753

4754 if (IsVectorType) {

4755 if (scalarizeOpOrCmp(I))

4756 return true;

4757 if (scalarizeLoad(I))

4758 return true;

4759 if (scalarizeExtExtract(I))

4760 return true;

4761 if (scalarizeVPIntrinsic(I))

4762 return true;

4763 if (foldInterleaveIntrinsics(I))

4764 return true;

4765 }

4766

4767 if (Opcode == Instruction::Store)

4768 if (foldSingleElementStore(I))

4769 return true;

4770

4771

4772 if (TryEarlyFoldsOnly)

4773 return false;

4774

4775

4776

4777

4778

4779 if (IsFixedVectorType) {

4780 switch (Opcode) {

4781 case Instruction::InsertElement:

4782 if (foldInsExtFNeg(I))

4783 return true;

4784 if (foldInsExtBinop(I))

4785 return true;

4786 if (foldInsExtVectorToShuffle(I))

4787 return true;

4788 break;

4789 case Instruction::ShuffleVector:

4790 if (foldPermuteOfBinops(I))

4791 return true;

4792 if (foldShuffleOfBinops(I))

4793 return true;

4794 if (foldShuffleOfSelects(I))

4795 return true;

4796 if (foldShuffleOfCastops(I))

4797 return true;

4798 if (foldShuffleOfShuffles(I))

4799 return true;

4800 if (foldPermuteOfIntrinsic(I))

4801 return true;

4802 if (foldShuffleOfIntrinsics(I))

4803 return true;

4804 if (foldSelectShuffle(I))

4805 return true;

4806 if (foldShuffleToIdentity(I))

4807 return true;

4808 break;

4809 case Instruction::Load:

4810 if (shrinkLoadForShuffles(I))

4811 return true;

4812 break;

4813 case Instruction::BitCast:

4814 if (foldBitcastShuffle(I))

4815 return true;

4816 break;

4817 case Instruction::And:

4818 case Instruction::Or:

4819 case Instruction::Xor:

4820 if (foldBitOpOfCastops(I))

4821 return true;

4822 if (foldBitOpOfCastConstant(I))

4823 return true;

4824 break;

4825 case Instruction::PHI:

4826 if (shrinkPhiOfShuffles(I))

4827 return true;

4828 break;

4829 default:

4830 if (shrinkType(I))

4831 return true;

4832 break;

4833 }

4834 } else {

4835 switch (Opcode) {

4836 case Instruction::Call:

4837 if (foldShuffleFromReductions(I))

4838 return true;

4839 if (foldCastFromReductions(I))

4840 return true;

4841 break;

4842 case Instruction::ExtractElement:

4843 if (foldShuffleChainsToReduce(I))

4844 return true;

4845 break;

4846 case Instruction::ICmp:

4847 case Instruction::FCmp:

4848 if (foldExtractExtract(I))

4849 return true;

4850 break;

4851 case Instruction::Or:

4852 if (foldConcatOfBoolMasks(I))

4853 return true;

4854 [[fallthrough]];

4855 default:

4857 if (foldExtractExtract(I))

4858 return true;

4859 if (foldExtractedCmps(I))

4860 return true;

4861 if (foldBinopOfReductions(I))

4862 return true;

4863 }

4864 break;

4865 }

4866 }

4867 return false;

4868 };

4869

4870 bool MadeChange = false;

4871 for (BasicBlock &BB : F) {

4872

4874 continue;

4875

4876

4877

4878

4879

4881 while (I) {

4883 if (I->isDebugOrPseudoInst())

4884 MadeChange |= FoldInst(*I);

4885 I = NextInst;

4886 }

4887 }

4888

4889 NextInst = nullptr;

4890

4891 while (!Worklist.isEmpty()) {

4893 if (I)

4894 continue;

4895

4898 continue;

4899 }

4900

4901 MadeChange |= FoldInst(*I);

4902 }

4903

4904 return MadeChange;

4905}

4906

4915 TryEarlyFoldsOnly);

4920 return PA;

4921}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))

This is the interface for LLVM's primary stateless and local alias analysis.

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))

This file defines the DenseMap class.

This is the interface for a simple mod/ref and alias analysis over globals.

const size_t AbstractManglingParser< Derived, Alloc >::NumOps

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)

MachineInstr unsigned OpIdx

uint64_t IntrinsicInst * II

FunctionAnalysisManager FAM

This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

static SymbolRef::Type getType(const Symbol *Sym)

This pass exposes codegen information to IR-level passes.

static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)

Returns the opcode of Values or ~0 if they do not all agree.

static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)

Definition VectorCombine.cpp:3111

static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)

Detect concat of multiple values into a vector.

Definition VectorCombine.cpp:3077

static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)

Definition VectorCombine.cpp:1548

static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)

Definition VectorCombine.cpp:3063

static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)

Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.

Definition VectorCombine.cpp:566

static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)

The memory operation on a vector of ScalarType had alignment of VectorAlignment.

Definition VectorCombine.cpp:1785

static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)

Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....

Definition VectorCombine.cpp:3813

static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)

Check if it is legal to scalarize a memory access to VecTy at index Idx.

Definition VectorCombine.cpp:1734

static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))

static InstLane lookThroughShuffles(Use *U, int Lane)

Definition VectorCombine.cpp:3044

static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)

Definition VectorCombine.cpp:210

static const unsigned InvalidIndex

Definition VectorCombine.cpp:70

std::pair< Use *, int > InstLane

Definition VectorCombine.cpp:3042

static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)

Given an extract element instruction with constant index operand, shuffle the source vector (shift th...

Definition VectorCombine.cpp:581

static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))

static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))

static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)

Definition VectorCombine.cpp:1665

static constexpr int Concat[]

A manager for alias analyses.

Class for arbitrary precision integers.

LLVM_ABI APInt zext(unsigned width) const

Zero extend to a new width.

static APInt getOneBitSet(unsigned numBits, unsigned BitNo)

Return an APInt with exactly one bit set in the result.

bool uge(const APInt &RHS) const

Unsigned greater or equal comparison.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

const T & front() const

front - Get the first element.

size_t size() const

size - Get the array size.

A function analysis which provides an AssumptionCache.

A cache of @llvm.assume calls within a function.

LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const

Return true if the attribute exists in this set.

InstListType::iterator iterator

Instruction iterators...

BinaryOps getOpcode() const

Represents analyses that only rely on functions' control flow.

Value * getArgOperand(unsigned i) const

iterator_range< User::op_iterator > args()

Iteration adapter for range-for loops.

static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)

Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...

static Type * makeCmpResultType(Type *opnd_type)

Create a result type for fcmp/icmp.

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

bool isFPPredicate() const

static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)

Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...

static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)

This is the shared class of boolean and integer constants.

const APInt & getValue() const

Return the constant as an APInt value reference.

This class represents a range of values.

LLVM_ABI ConstantRange urem(const ConstantRange &Other) const

Return a new range representing the possible values resulting from an unsigned remainder operation of...

LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const

Return a new range representing the possible values resulting from a binary-and of a value in this ra...

LLVM_ABI bool contains(const APInt &Val) const

Return true if the specified value is in the set.

static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)

Return a ConstantVector with the specified constant in each element.

static LLVM_ABI Constant * get(ArrayRef< Constant * > V)

A parsed version of the target data layout string in and methods for querying it.

Analysis pass which computes a DominatorTree.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

LLVM_ABI bool isReachableFromEntry(const Use &U) const

Provide an overload for a Use.

Convenience struct for specifying and reasoning about fast-math flags.

Class to represent fixed width SIMD vectors.

unsigned getNumElements() const

static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)

static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)

Common base class shared among various IRBuilders.

Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")

Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")

LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)

LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)

LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")

Return a vector value that contains.

LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)

Value * CreateFreeze(Value *V, const Twine &Name="")

Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)

Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})

void SetCurrentDebugLocation(DebugLoc L)

Set location information used by debugging information.

Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")

Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")

ConstantInt * getInt64(uint64_t C)

Get a constant 64-bit value.

LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")

Create a call to intrinsic ID with Args, mangled using Types.

ConstantInt * getInt32(uint32_t C)

Get a constant 32-bit value.

Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)

PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")

InstTy * Insert(InstTy *I, const Twine &Name="") const

Insert and return the specified instruction.

Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")

LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)

Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...

Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)

Create either a UnaryOperator or BinaryOperator depending on Opc.

Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)

Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")

Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")

StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)

Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)

PointerType * getPtrTy(unsigned AddrSpace=0)

Fetch the type representing a pointer.

Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)

Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)

InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.

void push(Instruction *I)

Push the instruction onto the worklist stack.

LLVM_ABI void setHasNoUnsignedWrap(bool b=true)

Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.

LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)

Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...

LLVM_ABI void setHasNoSignedWrap(bool b=true)

Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.

const DebugLoc & getDebugLoc() const

Return the debug location for this node as a DebugLoc.

LLVM_ABI void andIRFlags(const Value *V)

Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.

LLVM_ABI void setNonNeg(bool b=true)

Set or clear the nneg flag on this instruction, which must be a zext instruction.

LLVM_ABI bool comesBefore(const Instruction *Other) const

Given an instruction Other in the same basic block as this instruction, return true if this instructi...

LLVM_ABI AAMDNodes getAAMetadata() const

Returns the AA metadata for this instruction.

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())

Copy metadata from SrcInst to this instruction.

static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

A wrapper class for inspecting calls to intrinsic functions.

Intrinsic::ID getIntrinsicID() const

Return the intrinsic ID of this intrinsic.

An instruction for reading from memory.

unsigned getPointerAddressSpace() const

Returns the address space of the pointer operand.

void setAlignment(Align Align)

Type * getPointerOperandType() const

Align getAlign() const

Return the alignment of the access that is being performed.

Representation for a specific memory location.

static LLVM_ABI MemoryLocation get(const LoadInst *LI)

Return a location with information about the memory reference by the given instruction.

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses & preserveSet()

Mark an analysis set as preserved.

const SDValue & getOperand(unsigned Num) const

This instruction constructs a fixed permutation of two input vectors.

int getMaskValue(unsigned Elt) const

Return the shuffle mask value of this instruction for the given element index.

VectorType * getType() const

Overload to return most specific vector type.

static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)

Convert the input shuffle mask operand to a vector of integers.

static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)

Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...

static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)

Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

bool contains(ConstPtrType Ptr) const

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

void assign(size_type NumElts, ValueParamT Elt)

reference emplace_back(ArgTypes &&... Args)

void reserve(size_type N)

void append(ItTy in_start, ItTy in_end)

Add the specified range to the end of the SmallVector.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

void setAlignment(Align Align)

Analysis pass providing the TargetTransformInfo.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

static LLVM_ABI CastContextHint getCastContextHint(const Instruction *I)

Calculates a CastContextHint from I.

LLVM_ABI InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, const Value *Op0=nullptr, const Value *Op1=nullptr) const

LLVM_ABI InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const

Estimate the overhead of scalarizing an instruction.

LLVM_ABI InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const

LLVM_ABI TypeSize getRegisterBitWidth(RegisterKind K) const

LLVM_ABI InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const

LLVM_ABI bool allowVectorElementIndexingUsingGEP() const

Returns true if GEP should not be used to index into vectors for this target.

LLVM_ABI InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask={}, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const

LLVM_ABI InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const

LLVM_ABI InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const

Calculate the cost of vector reduction intrinsics.

LLVM_ABI InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const

LLVM_ABI unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const

TargetCostKind

The kind of cost model.

@ TCK_RecipThroughput

Reciprocal throughput.

LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const

This is an approximation of reciprocal throughput of a math/logic op.

LLVM_ABI unsigned getMinVectorRegisterBitWidth() const

LLVM_ABI InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const

LLVM_ABI unsigned getNumberOfRegisters(unsigned ClassID) const

LLVM_ABI InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const

Estimate the cost of a given IR user when lowered.

ShuffleKind

The various kinds of shuffle patterns for vector queries.

@ SK_PermuteSingleSrc

Shuffle elements of single source vector with any shuffle mask.

@ SK_Broadcast

Broadcast element 0 to all other elements.

@ SK_PermuteTwoSrc

Merge elements from two source vectors into one with any shuffle mask.

@ None

The cast is not used with a load/store of any kind.

@ OK_NonUniformConstantValue

The instances of the Type class are immutable: once they are created, they are never changed.

bool isPointerTy() const

True if this is an instance of PointerType.

LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY

Return the basic size of this type if it is a primitive type.

LLVMContext & getContext() const

Return the LLVMContext in which this type was uniqued.

LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

bool isFloatingPointTy() const

Return true if this is one of the floating-point types.

bool isIntegerTy() const

True if this is an instance of IntegerType.

A Use represents the edge between a Value definition and its users.

Value * getOperand(unsigned i) const

static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)

std::optional< unsigned > getFunctionalIntrinsicID() const

std::optional< unsigned > getFunctionalOpcode() const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const

This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...

bool hasOneUse() const

Return true if there is exactly one use of this value.

LLVM_ABI void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

iterator_range< user_iterator > users()

LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const

Returns an alignment of the pointer value.

unsigned getValueID() const

Return an ID for the concrete type of this object.

LLVM_ABI bool hasNUses(unsigned N) const

Return true if this Value has exactly N uses.

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

PreservedAnalyses run(Function &F, FunctionAnalysisManager &)

Definition VectorCombine.cpp:4907

static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)

This static method is the primary way to construct an VectorType.

std::pair< iterator, bool > insert(const ValueT &V)

const ParentTy * getParent() const

self_iterator getIterator()

NodeTy * getNextNode()

Get the next node, or nullptr for the list tail.

Abstract Attribute helper functions.

constexpr char Align[]

Key for Kernel::Arg::Metadata::mAlign.

constexpr char Args[]

Key for Kernel::Metadata::mArgs.

constexpr char Attrs[]

Key for Kernel::Metadata::mAttrs.

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

@ C

The default llvm calling convention, compatible with C.

@ BasicBlock

Various leaf nodes.

LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)

Return the function attributes for an intrinsic.

SpecificConstantMatch m_ZeroInt()

Convenience matchers for specific integer values.

OneUse_match< SubPat > m_OneUse(const SubPat &SP)

class_match< PoisonValue > m_Poison()

Match an arbitrary poison constant.

BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)

class_match< BinaryOperator > m_BinOp()

Match an arbitrary binary operation and ignore it.

BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)

class_match< Constant > m_Constant()

Match an arbitrary Constant and ignore it.

ap_match< APInt > m_APInt(const APInt *&Res)

Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.

CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)

Matches Trunc.

bool match(Val *V, const Pattern &P)

bind_ty< Instruction > m_Instruction(Instruction *&I)

Match an instruction, capturing it if we match.

specificval_ty m_Specific(const Value *V)

Match if we have a specific specified value.

DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)

TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)

Matches ExtractElementInst.

class_match< ConstantInt > m_ConstantInt()

Match an arbitrary ConstantInt and ignore it.

IntrinsicID_match m_Intrinsic()

Match intrinsic calls like this: m_IntrinsicIntrinsic::fabs(m_Value(X))

ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)

Matches SelectInst.

match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)

Combine two pattern matchers matching L && R.

BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)

TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)

Matches ShuffleVectorInst independently of mask value.

OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)

Matches LoadInst.

CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)

Matches ZExt.

BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)

Matches bitwise logic operations in either order.

class_match< CmpInst > m_Cmp()

Matches any compare instruction and ignore it.

CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)

Matches BitCast.

match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)

Match either "sext" or "zext nneg".

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)

match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)

FNeg_match< OpTy > m_FNeg(const OpTy &X)

Match 'fneg X' as 'fsub -0.0, X'.

BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)

auto m_Undef()

Match an arbitrary undef constant.

is_zero m_Zero()

Match any null constant or a vector with all elements equal to 0.

ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)

Matches InsertElementInst.

initializer< Ty > init(const Ty &Val)

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

@ User

could "use" a pointer

NodeAddr< PhiNode * > Phi

NodeAddr< UseNode * > Use

friend class Instruction

Iterator for Instructions in a `BasicBlock.

This is an optimization pass for GlobalISel generic memory operations.

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)

zip iterator for two or more iteratable types.

FunctionAddr VTableAddr Value

void stable_sort(R &&Range)

UnaryFunction for_each(R &&Range, UnaryFunction F)

Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())

If the specified value is a trivially dead instruction, delete it.

detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)

LLVM_ABI SDValue peekThroughBitcasts(SDValue V)

Return the non-bitcasted source operand of V if it exists.

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

unsigned Log2_64_Ceil(uint64_t Value)

Return the ceil log base 2 of the specified value, 64 if the value is zero.

LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)

Given operand for a UnaryOperator, fold the result or return null.

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)

Returns the arithmetic instruction opcode used when expanding a reduction.

constexpr bool isUIntN(unsigned N, uint64_t x)

Checks if an unsigned integer fits into the given (dynamic) bit width.

LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)

Given a callsite, callee, and arguments, fold the result or return null.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)

Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...

LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...

LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)

Return true if the instruction does not have any effects besides calculating the result and does not ...

LLVM_ABI Value * getSplatValue(const Value *V)

Get splat value if the input is a splat vector or return nullptr.

LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)

Determine the possible constant range of an integer or vector of integer value.

unsigned M1(unsigned Val)

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)

Return true if the result produced by the instruction is not used, and the instruction will return.

LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)

Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

bool isModSet(const ModRefInfo MRI)

void sort(IteratorTy Start, IteratorTy End)

LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)

Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...

LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)

LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)

Return true if we know that executing a load from this value cannot trap.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)

Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...

LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)

Return true if the given value is known to be non-zero when defined.

MutableArrayRef(T &OneElt) -> MutableArrayRef< T >

constexpr int PoisonMaskElem

LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)

This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.

IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)

Given operands for a BinaryOperator, fold the result or return null.

LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...

LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)

Returns the reduction intrinsic id corresponding to the binary operation.

@ And

Bitwise or logical AND of integers.

LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)

Identifies if the vector form of the intrinsic has a scalar operand.

DWARFExpression::Operation Op

unsigned M0(unsigned Val)

constexpr unsigned BitWidth

LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)

Return true if this function can prove that the instruction I will always transfer execution to one o...

LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)

Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

Align commonAlignment(Align A, uint64_t Offset)

Returns the alignment that satisfies both alignments.

bool all_equal(std::initializer_list< T > Values)

Returns true if all Values in the initializer lists are equal or the list.

LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)

Given operands for a CmpInst, fold the result or return null.

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)

Returns true if V cannot be poison, but may be undef.

Type * toVectorTy(Type *Scalar, ElementCount EC)

A helper function for converting Scalar types to vector types.

LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)

Identify if the intrinsic is trivially vectorizable.

LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)

Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)

Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.

This struct is a compact representation of a valid (non-zero power of two) alignment.

unsigned countMaxActiveBits() const

Returns the maximum number of bits needed to represent all possible unsigned values with these known ...

APInt getMaxValue() const

Return the maximal unsigned value possible given these KnownBits.