LLVM: lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

47#include

48

49#define GET_GICOMBINER_DEPS

50#include "AArch64GenPostLegalizeGILowering.inc"

51#undef GET_GICOMBINER_DEPS

52

53#define DEBUG_TYPE "aarch64-postlegalizer-lowering"

54

55using namespace llvm;

58

59namespace {

60

61#define GET_GICOMBINER_TYPES

62#include "AArch64GenPostLegalizeGILowering.inc"

63#undef GET_GICOMBINER_TYPES

64

65

66

67

68struct ShuffleVectorPseudo {

69 unsigned Opc;

70 Register Dst;

72 ShuffleVectorPseudo(unsigned Opc, Register Dst,

73 std::initializer_list SrcOps)

74 : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};

75 ShuffleVectorPseudo() = default;

76};

77

78

79

81 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);

82 Register DstReg = MI.getOperand(0).getReg();

83 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();

84 if (DstSize != 16 && DstSize != 32 && DstSize != 64)

85 return false;

86

87

88

89

90 return all_of(MRI.use_nodbg_instructions(DstReg),

92}

93

94

96 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);

98 const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF();

99 MIB.buildConstant(MI.getOperand(0).getReg(), ImmValAPF.bitcastToAPInt());

100 MI.eraseFromParent();

101}

102

103

104

105std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef M,

106 unsigned NumElts) {

107

108 auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });

109 if (FirstRealElt == M.end())

110 return std::nullopt;

111

112

113 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();

114 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1, false, true);

115

116

117

119 make_range(std::next(FirstRealElt), M.end()),

120 [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))

121 return std::nullopt;

122

123

124

125

126

127

128

130 bool ReverseExt = false;

131

132

133

134

135

136

137

138 if (Imm < NumElts)

139 ReverseExt = true;

140 else

141 Imm -= NumElts;

142 return std::make_pair(ReverseExt, Imm);

143}

144

145

146

147

148

149

150

151

152

154 int NumInputElements) {

155 if (M.size() != static_cast<size_t>(NumInputElements))

156 return std::nullopt;

157 int NumLHSMatch = 0, NumRHSMatch = 0;

158 int LastLHSMismatch = -1, LastRHSMismatch = -1;

159 for (int Idx = 0; Idx < NumInputElements; ++Idx) {

160 if (M[Idx] == -1) {

161 ++NumLHSMatch;

162 ++NumRHSMatch;

163 continue;

164 }

165 M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;

166 M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;

167 }

168 const int NumNeededToMatch = NumInputElements - 1;

169 if (NumLHSMatch == NumNeededToMatch)

170 return std::make_pair(true, LastLHSMismatch);

171 if (NumRHSMatch == NumNeededToMatch)

172 return std::make_pair(false, LastRHSMismatch);

173 return std::nullopt;

174}

175

176

177

179 ShuffleVectorPseudo &MatchInfo) {

180 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

181 ArrayRef ShuffleMask = MI.getOperand(3).getShuffleMask();

182 Register Dst = MI.getOperand(0).getReg();

183 Register Src = MI.getOperand(1).getReg();

184 LLT Ty = MRI.getType(Dst);

185 unsigned EltSize = Ty.getScalarSizeInBits();

186

187

188 if (EltSize == 64)

189 return false;

190

191 unsigned NumElts = Ty.getNumElements();

192

193

194 for (unsigned LaneSize : {64U, 32U, 16U}) {

195 if (isREVMask(ShuffleMask, EltSize, NumElts, LaneSize)) {

196 unsigned Opcode;

197 if (LaneSize == 64U)

198 Opcode = AArch64::G_REV64;

199 else if (LaneSize == 32U)

200 Opcode = AArch64::G_REV32;

201 else

202 Opcode = AArch64::G_REV16;

203

204 MatchInfo = ShuffleVectorPseudo(Opcode, Dst, {Src});

205 return true;

206 }

207 }

208

209 return false;

210}

211

212

213

215 ShuffleVectorPseudo &MatchInfo) {

216 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

217 unsigned WhichResult;

218 ArrayRef ShuffleMask = MI.getOperand(3).getShuffleMask();

219 Register Dst = MI.getOperand(0).getReg();

220 unsigned NumElts = MRI.getType(Dst).getNumElements();

221 if (isTRNMask(ShuffleMask, NumElts, WhichResult))

222 return false;

223 unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;

224 Register V1 = MI.getOperand(1).getReg();

225 Register V2 = MI.getOperand(2).getReg();

226 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});

227 return true;

228}

229

230

231

232

233

234

236 ShuffleVectorPseudo &MatchInfo) {

237 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

238 unsigned WhichResult;

239 ArrayRef ShuffleMask = MI.getOperand(3).getShuffleMask();

240 Register Dst = MI.getOperand(0).getReg();

241 unsigned NumElts = MRI.getType(Dst).getNumElements();

242 if (isUZPMask(ShuffleMask, NumElts, WhichResult))

243 return false;

244 unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;

245 Register V1 = MI.getOperand(1).getReg();

246 Register V2 = MI.getOperand(2).getReg();

247 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});

248 return true;

249}

250

252 ShuffleVectorPseudo &MatchInfo) {

253 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

254 unsigned WhichResult;

255 unsigned OperandOrder;

256 ArrayRef ShuffleMask = MI.getOperand(3).getShuffleMask();

257 Register Dst = MI.getOperand(0).getReg();

258 unsigned NumElts = MRI.getType(Dst).getNumElements();

259 if (isZIPMask(ShuffleMask, NumElts, WhichResult, OperandOrder))

260 return false;

261 unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;

262 Register V1 = MI.getOperand(OperandOrder == 0 ? 1 : 2).getReg();

263 Register V2 = MI.getOperand(OperandOrder == 0 ? 2 : 1).getReg();

264 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});

265 return true;

266}

267

268

269bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,

271 ShuffleVectorPseudo &MatchInfo) {

272 if (Lane != 0)

273 return false;

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290 auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,

291 MI.getOperand(1).getReg(), MRI);

292 if (!InsMI)

293 return false;

294

295 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),

297 return false;

298

299

301 return false;

302

303 MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),

304 {InsMI->getOperand(2).getReg()});

305 return true;

306}

307

308

309bool matchDupFromBuildVector(int Lane, MachineInstr &MI,

311 ShuffleVectorPseudo &MatchInfo) {

312 assert(Lane >= 0 && "Expected positive lane?");

313 int NumElements = MRI.getType(MI.getOperand(1).getReg()).getNumElements();

314

315

316 auto *BuildVecMI =

318 MI.getOperand(Lane < NumElements ? 1 : 2).getReg(), MRI);

319

320 if (NumElements <= Lane)

321 Lane -= NumElements;

322

323 if (!BuildVecMI)

324 return false;

325 Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();

326 MatchInfo =

327 ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});

328 return true;

329}

330

332 ShuffleVectorPseudo &MatchInfo) {

333 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

335 if (!MaybeLane)

336 return false;

337 int Lane = *MaybeLane;

338

339 if (Lane < 0)

340 Lane = 0;

341 if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))

342 return true;

343 if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))

344 return true;

345 return false;

346}

347

348

349

351 unsigned NumElts = Ty.getNumElements();

352

353

354 if (M[0] < 0)

355 return false;

356

357

358

359

360 unsigned ExpectedElt = M[0];

361 for (unsigned I = 1; I < NumElts; ++I) {

362

363

364 ++ExpectedElt;

365 if (ExpectedElt == NumElts)

366 ExpectedElt = 0;

367

368 if (M[I] < 0)

369 continue;

370 if (ExpectedElt != static_cast<unsigned>(M[I]))

371 return false;

372 }

373

374 return true;

375}

376

378 ShuffleVectorPseudo &MatchInfo) {

379 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

380 Register Dst = MI.getOperand(0).getReg();

381 LLT DstTy = MRI.getType(Dst);

382 Register V1 = MI.getOperand(1).getReg();

383 Register V2 = MI.getOperand(2).getReg();

384 auto Mask = MI.getOperand(3).getShuffleMask();

386 auto ExtInfo = getExtMask(Mask, DstTy.getNumElements());

387 uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;

388

389 if (!ExtInfo) {

391 !isSingletonExtMask(Mask, DstTy))

392 return false;

393

394 Imm = Mask[0] * ExtFactor;

395 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});

396 return true;

397 }

398 bool ReverseExt;

399 std::tie(ReverseExt, Imm) = *ExtInfo;

400 if (ReverseExt)

402 Imm *= ExtFactor;

403 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});

404 return true;

405}

406

407

408

410 ShuffleVectorPseudo &MatchInfo) {

412 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);

413 MI.eraseFromParent();

414}

415

416

417

418

419void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {

421 if (MatchInfo.SrcOps[2].getImm() == 0)

422 MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]);

423 else {

424

425 auto Cst =

426 MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());

427 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},

428 {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});

429 }

430 MI.eraseFromParent();

431}

432

434 Register Dst = MI.getOperand(0).getReg();

435 Register Src = MI.getOperand(1).getReg();

436 LLT DstTy = MRI.getType(Dst);

438 "Expected 128bit vector in applyFullRev");

440 auto Cst = MIRBuilder.buildConstant(LLT::scalar(32), 8);

441 auto Rev = MIRBuilder.buildInstr(AArch64::G_REV64, {DstTy}, {Src});

442 MIRBuilder.buildInstr(AArch64::G_EXT, {Dst}, {Rev, Rev, Cst});

443 MI.eraseFromParent();

444}

445

447 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);

448

449 auto ValAndVReg =

451 return !ValAndVReg;

452}

453

457 Builder.setInstrAndDebugLoc(Insert);

458

461 LLT EltTy = MRI.getType(Insert.getElementReg());

462 LLT IdxTy = MRI.getType(Insert.getIndexReg());

463

465 return;

466

467

472 Alignment, false);

475 auto StackTemp = Builder.buildFrameIndex(FramePtrTy, FrameIdx);

476

477 Builder.buildStore(Insert.getOperand(1), StackTemp, PtrInfo, Align(8));

478

479

480

482 "Expected a power-2 vector size");

483 auto Mask = Builder.buildConstant(IdxTy, VecTy.getNumElements() - 1);

485 auto EltSize = Builder.buildConstant(IdxTy, EltTy.getSizeInBytes());

486 Register Mul = Builder.buildMul(IdxTy, And, EltSize).getReg(0);

488 Builder.buildPtrAdd(MRI.getType(StackTemp.getReg(0)), StackTemp, Mul)

489 .getReg(0);

490

491

492 Builder.buildStore(Insert.getElementReg(), EltPtr, PtrInfo, Align(1));

493

494 Builder.buildLoad(Insert.getReg(0), StackTemp, PtrInfo, Align(8));

495 Insert.eraseFromParent();

496}

497

498

499

500

501

502

503

504

505

506

507

508

510 std::tuple<Register, int, Register, int> &MatchInfo) {

511 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

512 ArrayRef ShuffleMask = MI.getOperand(3).getShuffleMask();

513 Register Dst = MI.getOperand(0).getReg();

514 int NumElts = MRI.getType(Dst).getNumElements();

515 auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts);

516 if (!DstIsLeftAndDstLane)

517 return false;

518 bool DstIsLeft;

519 int DstLane;

520 std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane;

525

526 int SrcLane = ShuffleMask[DstLane];

527 if (SrcLane >= NumElts) {

529 SrcLane -= NumElts;

530 }

531

532 MatchInfo = std::make_tuple(DstVec, DstLane, SrcVec, SrcLane);

533 return true;

534}

535

538 std::tuple<Register, int, Register, int> &MatchInfo) {

539 Builder.setInstrAndDebugLoc(MI);

540 Register Dst = MI.getOperand(0).getReg();

541 auto ScalarTy = MRI.getType(Dst).getElementType();

543 int DstLane, SrcLane;

544 std::tie(DstVec, DstLane, SrcVec, SrcLane) = MatchInfo;

545 auto SrcCst = Builder.buildConstant(LLT::scalar(64), SrcLane);

546 auto Extract = Builder.buildExtractVectorElement(ScalarTy, SrcVec, SrcCst);

547 auto DstCst = Builder.buildConstant(LLT::scalar(64), DstLane);

548 Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst);

549 MI.eraseFromParent();

550}

551

552

553

554

556 int64_t &Cnt) {

557 assert(Ty.isVector() && "vector shift count is not a vector type");

560 if (!Cst)

561 return false;

562 Cnt = *Cst;

563 int64_t ElementBits = Ty.getScalarSizeInBits();

564 return Cnt >= 1 && Cnt <= ElementBits;

565}

566

567

569 int64_t &Imm) {

570 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||

571 MI.getOpcode() == TargetOpcode::G_LSHR);

572 LLT Ty = MRI.getType(MI.getOperand(1).getReg());

573 if (!Ty.isVector())

574 return false;

576}

577

579 int64_t &Imm) {

580 unsigned Opc = MI.getOpcode();

581 assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);

582 unsigned NewOpc =

583 Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;

585 MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1)}).addImm(Imm);

586 MI.eraseFromParent();

587}

588

589

590

591

592

593

594

595

596std::optional<std::pair<uint64_t, CmpInst::Predicate>>

599 const auto &Ty = MRI.getType(RHS);

600 if (Ty.isVector())

601 return std::nullopt;

602 unsigned Size = Ty.getSizeInBits();

603 assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");

604

605

606

608 if (!ValAndVReg)

609 return std::nullopt;

610 uint64_t OriginalC = ValAndVReg->Value.getZExtValue();

613 return std::nullopt;

614

615

616

617 switch (P) {

618 default:

619 return std::nullopt;

622

623

624

625

626

627

628 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||

629 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))

630 return std::nullopt;

632 C -= 1;

633 break;

636

637

638

639

640

641

642 assert(C != 0 && "C should not be zero here!");

644 C -= 1;

645 break;

648

649

650

651

652

653

654 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||

655 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))

656 return std::nullopt;

658 C += 1;

659 break;

662

663

664

665

666

667

668 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||

670 return std::nullopt;

672 C += 1;

673 break;

674 }

675

676

677

678 if (Size == 32)

681 return {{C, P}};

682

683 auto NumberOfInstrToLoadImm = [=](uint64_t Imm) {

686 return Insn.size();

687 };

688

689 if (NumberOfInstrToLoadImm(OriginalC) > NumberOfInstrToLoadImm(C))

690 return {{C, P}};

691

692 return std::nullopt;

693}

694

695

696

697

698

699

700

701

702

703bool matchAdjustICmpImmAndPred(

705 std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {

706 assert(MI.getOpcode() == TargetOpcode::G_ICMP);

709 if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {

710 MatchInfo = *MaybeNewImmAndPred;

711 return true;

712 }

713 return false;

714}

715

716void applyAdjustICmpImmAndPred(

717 MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,

723 MatchInfo.first);

725 RHS.setReg(Cst->getOperand(0).getReg());

726 MI.getOperand(1).setPredicate(MatchInfo.second);

728}

729

731 std::pair<unsigned, int> &MatchInfo) {

732 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

733 Register Src1Reg = MI.getOperand(1).getReg();

734 const LLT SrcTy = MRI.getType(Src1Reg);

735 const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

736

738 if (!LaneIdx)

739 return false;

740

741

742 if (*LaneIdx >= SrcTy.getNumElements())

743 return false;

744

745 if (DstTy != SrcTy)

746 return false;

747

750

751 unsigned Opc = 0;

752 switch (SrcTy.getNumElements()) {

753 case 2:

754 if (ScalarSize == 64)

755 Opc = AArch64::G_DUPLANE64;

756 else if (ScalarSize == 32)

757 Opc = AArch64::G_DUPLANE32;

758 break;

759 case 4:

760 if (ScalarSize == 32)

761 Opc = AArch64::G_DUPLANE32;

762 else if (ScalarSize == 16)

763 Opc = AArch64::G_DUPLANE16;

764 break;

765 case 8:

766 if (ScalarSize == 8)

767 Opc = AArch64::G_DUPLANE8;

768 else if (ScalarSize == 16)

769 Opc = AArch64::G_DUPLANE16;

770 break;

771 case 16:

772 if (ScalarSize == 8)

773 Opc = AArch64::G_DUPLANE8;

774 break;

775 default:

776 break;

777 }

778 if (Opc)

779 return false;

780

781 MatchInfo.first = Opc;

782 MatchInfo.second = *LaneIdx;

783 return true;

784}

785

788 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);

789 Register Src1Reg = MI.getOperand(1).getReg();

790 const LLT SrcTy = MRI.getType(Src1Reg);

791

792 B.setInstrAndDebugLoc(MI);

793 auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);

794

795 Register DupSrc = MI.getOperand(1).getReg();

796

797

798 if (SrcTy.getSizeInBits() == 64) {

799 auto Undef = B.buildUndef(SrcTy);

800 DupSrc = B.buildConcatVectors(SrcTy.multiplyElements(2),

801 {Src1Reg, Undef.getReg(0)})

802 .getReg(0);

803 }

804 B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});

805 MI.eraseFromParent();

806}

807

810 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);

811 const LLT SrcTy = MRI.getType(Src1Reg);

812 if (SrcTy.getSizeInBits() != 128 && SrcTy.getSizeInBits() != 64)

813 return false;

814 return SrcTy.isVector() && !SrcTy.isScalable() &&

815 Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1;

816}

817

821 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);

822 const LLT SrcTy = MRI.getType(Src1Reg);

823 assert((SrcTy.isVector() && !SrcTy.isScalable()) &&

824 "Expected a fixed length vector");

825

826 for (int I = 0; I < SrcTy.getNumElements(); ++I)

827 B.buildExtractVectorElementConstant(Unmerge.getReg(I), Src1Reg, I);

828 MI.eraseFromParent();

829}

830

832 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);

833

834

835

836

838 return false;

839

841}

842

845 B.setInstrAndDebugLoc(MI);

846 B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()},

847 {MI.getOperand(1).getReg()});

848 MI.eraseFromParent();

849}

850

851

852

854

855 if (MRI.hasOneNonDBGUse(CmpOp))

856 return 0;

857

858

859 auto IsSupportedExtend = [&](const MachineInstr &MI) {

860 if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)

861 return true;

862 if (MI.getOpcode() != TargetOpcode::G_AND)

863 return false;

864 auto ValAndVReg =

866 if (!ValAndVReg)

867 return false;

868 uint64_t Mask = ValAndVReg->Value.getZExtValue();

869 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);

870 };

871

873 if (IsSupportedExtend(*Def))

874 return 1;

875

876 unsigned Opc = Def->getOpcode();

877 if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&

878 Opc != TargetOpcode::G_LSHR)

879 return 0;

880

881 auto MaybeShiftAmt =

883 if (!MaybeShiftAmt)

884 return 0;

885 uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();

888

889

890

891

892 if (IsSupportedExtend(*ShiftLHS))

893 return (ShiftAmt <= 4) ? 2 : 1;

894

895 LLT Ty = MRI.getType(Def->getOperand(0).getReg());

896 if (Ty.isVector())

897 return 0;

898 unsigned ShiftSize = Ty.getSizeInBits();

899 if ((ShiftSize == 32 && ShiftAmt <= 31) ||

900 (ShiftSize == 64 && ShiftAmt <= 63))

901 return 1;

902 return 0;

903}

904

905

906

908 assert(MI.getOpcode() == TargetOpcode::G_ICMP);

909

910

911

912

913

914

915

916

917

918

919

923 return false;

924

927 auto GetRegForProfit = [&](Register Reg) {

929 return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg;

930 };

931

932

933

936

937

938

941}

942

949 MI.getOperand(2).setReg(RHS);

950 MI.getOperand(3).setReg(LHS);

952}

953

954

955

956

961 assert(DstTy.isVector() && "Expected vector types only?");

962 assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");

963 switch (CC) {

964 default:

968 auto FCmp = MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});

970 };

974 };

978 };

982 };

986 };

990 };

991 }

992}

993

994

997 assert(MI.getOpcode() == TargetOpcode::G_FCMP);

999

1000 Register Dst = MI.getOperand(0).getReg();

1001 LLT DstTy = MRI.getType(Dst);

1002 if (!DstTy.isVector() || ST.hasNEON())

1003 return false;

1005 unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();

1006 if (EltSize == 16 && ST.hasFullFP16())

1007 return false;

1008 if (EltSize != 16 && EltSize != 32 && EltSize != 64)

1009 return false;

1010

1011 return true;

1012}

1013

1014

1017 assert(MI.getOpcode() == TargetOpcode::G_FCMP);

1019

1021

1022 Register Dst = CmpMI.getReg(0);

1026

1027 LLT DstTy = MRI.getType(Dst);

1028

1029 bool Invert = false;

1034

1035

1036

1037

1038

1041 } else

1043

1044

1046

1047 const bool NoNans =

1048 ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;

1049

1050 auto Cmp = getVectorFCMP(CC, LHS, RHS, NoNans, MRI);

1053 CmpRes = Cmp(MIB);

1054 else {

1055 auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, NoNans, MRI);

1056 auto Cmp2Dst = Cmp2(MIB);

1057 auto Cmp1Dst = Cmp(MIB);

1058 CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);

1059 }

1060 if (Invert)

1062 MRI.replaceRegWith(Dst, CmpRes);

1063 MI.eraseFromParent();

1064}

1065

1066

1069

1070

1071 for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {

1072 auto ConstVal =

1074

1075 if (!ConstVal.has_value())

1076 return true;

1077 }

1078

1079 return false;

1080}

1081

1085 LLT DstTy = MRI.getType(GBuildVec->getReg(0));

1086 Register DstReg = B.buildUndef(DstTy).getReg(0);

1087

1088 for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {

1089 Register SrcReg = GBuildVec->getSourceReg(I);

1091 continue;

1092 auto IdxReg = B.buildConstant(LLT::scalar(64), I);

1093 DstReg =

1094 B.buildInsertVectorElement(DstTy, DstReg, SrcReg, IdxReg).getReg(0);

1095 }

1096 B.buildCopy(GBuildVec->getReg(0), DstReg);

1097 GBuildVec->eraseFromParent();

1098}

1099

1102 assert(MI.getOpcode() == TargetOpcode::G_STORE);

1103 Register DstReg = MI.getOperand(0).getReg();

1104 if (MRI.getType(DstReg).isVector())

1105 return false;

1106

1108 return false;

1109

1110 return MRI.getType(SrcReg).getSizeInBits() <= 64;

1111}

1112

1116 assert(MI.getOpcode() == TargetOpcode::G_STORE);

1118 MI.getOperand(0).setReg(SrcReg);

1120}

1121

1122

1123

1124

1126 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);

1127 Register DstReg = MI.getOperand(0).getReg();

1128 LLT DstTy = MRI.getType(DstReg);

1130}

1131

1134 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);

1135 B.setInstrAndDebugLoc(MI);

1137 Helper.lower(MI, 0, LLT());

1138}

1139

1140

1141

1145 if (Unmerge.getNumDefs() != 2)

1146 return false;

1147 if (MRI.use_nodbg_empty(Unmerge.getReg(1)))

1148 return false;

1149

1150 LLT DstTy = MRI.getType(Unmerge.getReg(0));

1152 return false;

1153

1155 if (!Ext)

1156 return false;

1157

1160 auto LowestVal =

1162 if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())

1163 return false;

1164

1166 return false;

1167

1168 MatchInfo = ExtSrc1;

1169 return true;

1170}

1171

1176

1177 Register Dst1 = MI.getOperand(0).getReg();

1178 MI.getOperand(0).setReg(MI.getOperand(1).getReg());

1179 MI.getOperand(1).setReg(Dst1);

1180 MI.getOperand(2).setReg(SrcReg);

1182}

1183

1184

1185

1186

1187

1188

1190

1191 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

1193}

1194

1197 assert(MI.getOpcode() == TargetOpcode::G_MUL &&

1198 "Expected a G_MUL instruction");

1199

1200

1201 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

1204 Helper.fewerElementsVector(

1205 MI, 0,

1207}

1208

1209class AArch64PostLegalizerLoweringImpl : public Combiner {

1210protected:

1211 const CombinerHelper Helper;

1212 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;

1213 const AArch64Subtarget &STI;

1214

1215public:

1216 AArch64PostLegalizerLoweringImpl(

1217 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,

1218 GISelCSEInfo *CSEInfo,

1219 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,

1220 const AArch64Subtarget &STI);

1221

1222 static const char *getName() { return "AArch6400PreLegalizerCombiner"; }

1223

1224 bool tryCombineAll(MachineInstr &I) const override;

1225

1226private:

1227#define GET_GICOMBINER_CLASS_MEMBERS

1228#include "AArch64GenPostLegalizeGILowering.inc"

1229#undef GET_GICOMBINER_CLASS_MEMBERS

1230};

1231

1232#define GET_GICOMBINER_IMPL

1233#include "AArch64GenPostLegalizeGILowering.inc"

1234#undef GET_GICOMBINER_IMPL

1235

1236AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(

1239 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,

1241 : Combiner(MF, CInfo, TPC, nullptr, CSEInfo),

1242 Helper(Observer, B, true), RuleConfig(RuleConfig),

1243 STI(STI),

1245#include "AArch64GenPostLegalizeGILowering.inc"

1247{

1248}

1249

1251public:

1252 static char ID;

1253

1254 AArch64PostLegalizerLowering();

1255

1256 StringRef getPassName() const override {

1257 return "AArch64PostLegalizerLowering";

1258 }

1259

1262

1263private:

1264 AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;

1265};

1266}

1267

1268void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {

1273}

1274

1275AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()

1277 if (!RuleConfig.parseCommandLineOption())

1279}

1280

1281bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {

1283 return false;

1284 assert(MF.getProperties().hasLegalized() && "Expected a legalized function?");

1285 auto *TPC = &getAnalysis();

1287

1289 CombinerInfo CInfo( true, false,

1290 nullptr, true,

1291 F.hasOptSize(), F.hasMinSize());

1292

1293 CInfo.MaxIterations = 1;

1295

1296 CInfo.EnableFullDCE = false;

1297 AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, TPC, nullptr,

1298 RuleConfig, ST);

1299 return Impl.combineMachineInstrs();

1300}

1301

1302char AArch64PostLegalizerLowering::ID = 0;

1304 "Lower AArch64 MachineInstrs after legalization", false,

1305 false)

1308 "Lower AArch64 MachineInstrs after legalization", false,

1310

1311namespace llvm {

1313 return new AArch64PostLegalizerLowering();

1314}

1315}

unsigned const MachineRegisterInfo * MRI

static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)

isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...

static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)

static unsigned getCmpOperandFoldingProfit(SDValue Op)

Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.

This file declares the targeting of the Machinelegalizer class for AArch64.

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

#define GET_GICOMBINER_CONSTRUCTOR_INITS

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

This contains common combine transformations that may be used in a combine pass,or by the target else...

Option class for Targets to specify which operations are combined how and when.

This contains the base class for all Combiners generated by TableGen.

This contains common code to allow clients to notify changes to machine instr.

Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...

Contains matchers for matching SSA Machine Instructions.

This file declares the MachineIRBuilder class.

Promote Memory to Register

static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

static StringRef getName(Value *V)

Target-Independent Code Generator Pass Configuration Options pass.

APInt bitcastToAPInt() const

Class for arbitrary precision integers.

uint64_t getZExtValue() const

Get zero extended value.

unsigned logBase2() const

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

This class is the base class for the comparison instructions.

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

@ ICMP_SLT

signed less than

@ ICMP_SLE

signed less or equal

@ ICMP_UGE

unsigned greater or equal

@ ICMP_UGT

unsigned greater than

@ ICMP_SGT

signed greater than

@ ICMP_ULT

unsigned less than

@ FCMP_ORD

0 1 1 1 True if ordered (no nans)

@ ICMP_SGE

signed greater or equal

@ ICMP_ULE

unsigned less or equal

@ FCMP_UNO

1 0 0 0 True if unordered: isnan(X) | isnan(Y)

Predicate getSwappedPredicate() const

For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.

FunctionPass class - This class is used to implement most global optimizations.

Abstract class that contains various methods for clients to notify about changes.

virtual void changingInstr(MachineInstr &MI)=0

This instruction is about to be mutated in some way.

virtual void changedInstr(MachineInstr &MI)=0

This instruction was mutated in some way.

constexpr bool isScalableVector() const

Returns true if the LLT is a scalable vector.

static constexpr LLT scalar(unsigned SizeInBits)

Get a low-level scalar or aggregate "bag of bits".

constexpr uint16_t getNumElements() const

Returns the number of elements in a vector LLT.

constexpr bool isVector() const

static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)

Get a low-level pointer in the given address space.

constexpr TypeSize getSizeInBits() const

Returns the total size of the type. Must only be called on sized types.

constexpr LLT getElementType() const

Returns the vector's element type. Only valid for vector types.

constexpr ElementCount getElementCount() const

static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)

Get a low-level fixed-width vector of some number of elements and element width.

constexpr LLT changeElementCount(ElementCount EC) const

Return a vector or scalar with the same element type and the new element count.

constexpr TypeSize getSizeInBytes() const

Returns the total size of the type in bytes, i.e.

LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)

Create a new statically sized stack object, returning a nonnegative identifier to represent it.

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

virtual bool runOnMachineFunction(MachineFunction &MF)=0

runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

Function & getFunction()

Return the LLVM function that this machine code represents.

const MachineFunctionProperties & getProperties() const

Get the function properties.

Helper class to build MachineInstr.

MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)

Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.

MachineInstrBuilder buildInstr(unsigned Opcode)

Build and insert = Opcode .

void setInstrAndDebugLoc(MachineInstr &MI)

Set the insertion point to before MI, and set the debug loc to MI's loc.

MachineRegisterInfo * getMRI()

Getter for MRI.

MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)

Build and insert Res = G_OR Op0, Op1.

virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)

Build and insert Res = G_CONSTANT Val.

Register getReg(unsigned Idx) const

Get the register for the operand index.

Representation of each machine instruction.

const MachineOperand & getOperand(unsigned i) const

MachineOperand class - Representation of each machine instruction operand.

Register getReg() const

getReg - Returns the register number.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

virtual StringRef getPassName() const

getPassName - Return a nice clean name for a pass.

Wrapper class representing virtual and physical registers.

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Target-Independent Code Generator Pass Configuration Options.

A Use represents the edge between a Value definition and its users.

constexpr ScalarTy getKnownMinValue() const

Returns the minimum value this quantity can represent.

constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const

We do not provide the '/' operator here because division for polynomial types does not work in the sa...

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

std::optional< RegOrConstant > getAArch64VectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)

constexpr bool isLegalArithImmed(const uint64_t C)

void changeVectorFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)

Find the AArch64 condition codes necessary to represent P for a vector floating point comparison.

bool isCMN(const MachineInstr *MaybeSub, const CmpInst::Predicate &Pred, const MachineRegisterInfo &MRI)

std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)

void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)

Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ C

The default llvm calling convention, compatible with C.

operand_type_match m_Reg()

SpecificConstantMatch m_ZeroInt()

Convenience matchers for specific integer values.

ImplicitDefMatch m_GImplicitDef()

bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)

UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)

Predicate getPredicate(unsigned Condition, unsigned Hint)

Return predicate consisting of specified condition and hint bits.

@ Undef

Value of the register doesn't matter.

NodeAddr< DefNode * > Def

This is an optimization pass for GlobalISel generic memory operations.

LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)

Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)

See if Reg is defined by an single def instruction that is Opcode.

bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)

Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

constexpr bool isPowerOf2_64(uint64_t Value)

Return true if the argument is a power of two > 0 (64 bit edition.)

LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)

Find the def instruction for Reg, folding away any trivial copies.

FunctionPass * createAArch64PostLegalizerLowering()

Definition AArch64PostLegalizerLowering.cpp:1312

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)

Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...

bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)

isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.

LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)

If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...

LLVM_ABI bool isBuildVectorAllOnes(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)

Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...

LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)

Modify analysis usage so it preserves passes required for the SelectionDAG fallback.

bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)

Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> or <1,...

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)

If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)

If all non-negative Mask elements are the same value, return that value.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

This struct is a compact representation of a valid (non-zero power of two) alignment.

@ SinglePass

Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...

This class contains a discriminated union of information about pointers in memory operands,...

static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)

Return a MachinePointerInfo record that refers to the specified FrameIndex.