LLVM: lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp Source File (original) (raw)

30 return Ty.isPointer() && Ty.getSizeInBits() == Width;

31}

34 std::initializer_list DstOpMappingList,

35 std::initializer_list SrcOpMappingList,

41 std::initializer_list OpList,

48 switch (UniID) {

49 case S1:

59 case P0:

61 case P1:

63 case P3:

65 case P4:

67 case P5:

69 case P8:

82 return MRI.getType(Reg).getSizeInBits() == 32;

84 return MRI.getType(Reg).getSizeInBits() == 64;

86 return MRI.getType(Reg).getSizeInBits() == 96;

88 return MRI.getType(Reg).getSizeInBits() == 128;

90 return MRI.getType(Reg).getSizeInBits() == 256;

92 return MRI.getType(Reg).getSizeInBits() == 512;

130 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(Reg);

132 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(Reg);

134 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg);

175 case _:

176 return true;

177 default:

179 }

180}

185

188 if (MI.getOperand(i).isReg())

189 return false;

190 continue;

191 }

192

193

194 if (MI.getOperand(i).isReg())

195 return false;

196

199 return false;

200 }

201

202

205

206 return true;

207}

212 : FastTypes(FastTypes) {}

216 return S16;

218 return S32;

220 return S64;

229 return _;

230}

235 return B32;

238 return B64;

240 return B96;

243 return _;

244}

250

251

252

253

255 Register Reg = MI.getOperand(0).getReg();

256 int Slot;

258 Slot = getFastPredicateSlot(LLTToBId(MRI.getType(Reg)));

259 else

260 Slot = getFastPredicateSlot(LLTToId(MRI.getType(Reg)));

261

262 if (Slot != -1)

263 return MUI.isUniform(Reg) ? &Uni[Slot] : &Div[Slot];

264 }

265

266

268 if (Rule.Predicate.match(MI, MUI, MRI))

269 return &Rule.OperandMapping;

270 }

271

272 return nullptr;

273}

276 Rules.push_back(Rule);

277}

281 int Slot = getFastPredicateSlot(Ty);

282 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");

283 Div[Slot] = RuleApplyIDs;

284}

288 int Slot = getFastPredicateSlot(Ty);

289 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");

290 Uni[Slot] = RuleApplyIDs;

291}

344RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list OpcList,

350RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list OpcList,

357 unsigned Opc = MI.getOpcode();

358 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||

359 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||

360 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {

362 auto IRAIt = IRulesAlias.find(IntrID);

363 if (IRAIt == IRulesAlias.end())

364 return nullptr;

365 return &IRules.at(IRAIt->second);

366 }

367

368 auto GRAIt = GRulesAlias.find(Opc);

369 if (GRAIt == GRulesAlias.end())

370 return nullptr;

371 return &GRules.at(GRAIt->second);

372}

375class Predicate {

376private:

377 struct Elt {

378

379

380

381

382

383

384

385

386

387

388

390 bool Neg;

391 unsigned TJumpOffset;

392 unsigned FJumpOffset;

393 };

394

396

398

399public:

401 Expression.push_back({Pred, false, 1, 1});

402 };

403

405 unsigned Idx = 0;

406 unsigned ResultIdx = Expression.size();

407 bool Result;

408 do {

409 Result = Expression[Idx].Pred(MI);

410 Result = Expression[Idx].Neg ? !Result : Result;

411 if (Result) {

412 Idx += Expression[Idx].TJumpOffset;

413 } else {

414 Idx += Expression[Idx].FJumpOffset;

415 }

416 } while ((Idx != ResultIdx));

417

418 return Result;

419 };

420

423 for (const Elt &ExprElt : Expression) {

424 NegExpression.push_back({ExprElt.Pred, !ExprElt.Neg, ExprElt.FJumpOffset,

425 ExprElt.TJumpOffset});

426 }

427 return Predicate(std::move(NegExpression));

428 };

429

430 Predicate operator&&(const Predicate &RHS) const {

432

433 unsigned RHSSize = RHS.Expression.size();

434 unsigned ResultIdx = Expression.size();

435 for (unsigned i = 0; i < ResultIdx; ++i) {

436

437 if (i + AndExpression[i].FJumpOffset == ResultIdx)

438 AndExpression[i].FJumpOffset += RHSSize;

439 }

440

441 AndExpression.append(RHS.Expression);

442

443 return Predicate(std::move(AndExpression));

444 }

445

446 Predicate operator||(const Predicate &RHS) const {

448

449 unsigned RHSSize = RHS.Expression.size();

450 unsigned ResultIdx = Expression.size();

451 for (unsigned i = 0; i < ResultIdx; ++i) {

452

453 if (i + OrExpression[i].TJumpOffset == ResultIdx)

454 OrExpression[i].TJumpOffset += RHSSize;

455 }

456

457 OrExpression.append(RHS.Expression);

458

459 return Predicate(std::move(OrExpression));

460 }

461};

466 : ST(&_ST), MRI(&_MRI) {

467

468 addRulesForGOpcs({G_ADD, G_SUB}, Standard)

477

478 addRulesForGOpcs({G_UADDO, G_USUBO}, Standard)

481

482 addRulesForGOpcs({G_UADDE, G_USUBE}, Standard)

485

487

488 addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)

497

498 addRulesForGOpcs({G_SHL}, Standard)

507

508 addRulesForGOpcs({G_LSHR}, Standard)

517

518 addRulesForGOpcs({G_ASHR}, Standard)

527

528 addRulesForGOpcs({G_FSHR}, Standard)

531

533

534 addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)

539

540 addRulesForGOpcs({G_SMIN, G_SMAX}, Standard)

547

548 addRulesForGOpcs({G_UMIN, G_UMAX}, Standard)

555

556

557

559 addRulesForGOpcs({G_CONSTANT})

561 addRulesForGOpcs({G_FREEZE}).Any({{DivS1}, {{Vcc}, {Vcc}}});

562

563 addRulesForGOpcs({G_ICMP})

567

568 addRulesForGOpcs({G_FCMP})

571

572 addRulesForGOpcs({G_BRCOND})

575

576 addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}});

577

578 addRulesForGOpcs({G_SELECT}, StandardB)

585

586 addRulesForGOpcs({G_ANYEXT})

597

598

599

600 addRulesForGOpcs({G_TRUNC})

610

614

615 addRulesForGOpcs({G_ZEXT})

624

629

630 addRulesForGOpcs({G_SEXT})

639

644

645 addRulesForGOpcs({G_SEXT_INREG})

650

651 addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT}, Standard)

656

657 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();

658 bool hasSMRDSmall = ST->hasScalarSubwordLoads();

659 bool usesTrue16 = ST->useRealTrue16Insts();

660

661 Predicate isAlign16([](const MachineInstr &MI) -> bool {

662 return (*MI.memoperands_begin())->getAlign() >= Align(16);

663 });

664

665 Predicate isAlign4([](const MachineInstr &MI) -> bool {

666 return (*MI.memoperands_begin())->getAlign() >= Align(4);

667 });

668

669 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {

670 return (*MI.memoperands_begin())->isAtomic();

671 });

672

673 Predicate isUniMMO([](const MachineInstr &MI) -> bool {

675 });

676

677 Predicate isConst([](const MachineInstr &MI) -> bool {

678

683 });

684

685 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {

686 return (*MI.memoperands_begin())->isVolatile();

687 });

688

689 Predicate isInvMMO([](const MachineInstr &MI) -> bool {

690 return (*MI.memoperands_begin())->isInvariant();

691 });

692

693 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {

694 return (*MI.memoperands_begin())->getFlags() & MONoClobber;

695 });

696

697 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {

700 });

701

702 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {

705 return MemSize == 16 || MemSize == 8;

706 });

707

708 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {

711 });

712

713 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&

714 (isConst || isInvMMO || isNoClobberMMO);

715

716

717

718 addRulesForGOpcs({G_LOAD})

719

721 .Any({{DivB32, P0}, {{VgprB32}, {VgprP0}}})

725

726

727

729 .Any({{DivB32, P1}, {{VgprB32}, {VgprP1}}})

735

736

737 .Any({{{UniS16, P1}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP1}}}, usesTrue16 && hasSMRDSmall)

739 .Any({{{UniB32, P1}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP1}}}, hasSMRDSmall)

740

741 .Any({{{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall)

750

751

752

753

754

755

756

757 .Any({{{UniS16, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && hasSMRDSmall)

759 .Any({{{UniB32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP1}}}, hasSMRDSmall)

760 .Any({{{UniB32, P1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}}, !hasSMRDSmall)

766

767

769 .Any({{DivB32, P3}, {{VgprB32}, {VgprP3}}})

773

779

780

781

783 .Any({{DivB32, P4}, {{VgprB32}, {VgprP4}}})

789

790

791 .Any({{{UniS16, P4}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP4}}}, usesTrue16 && hasSMRDSmall)

793 .Any({{{UniB32, P4}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP4}}}, hasSMRDSmall)

794 .Any({{{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall)

803

804

805 .Any({{{UniS16, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && hasSMRDSmall)

807 .Any({{{UniB32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP4}}}, hasSMRDSmall)

808 .Any({{{UniB32, P4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP4}}}, !hasSMRDSmall)

814

815

817 .Any({{DivB32, P5}, {{VgprB32}, {VgprP5}}})

821

823

824

825 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD})

827

833

836

842

844

845 addRulesForGOpcs({G_STORE})

846

848 .Any({{B32, P0}, {{}, {VgprB32, VgprP0}}})

852

853

854

855

856

858 .Any({{B32, DivP1}, {{}, {VgprB32, VgprP1}}})

862

863

864

865

867 .Any({{B32, UniP1}, {{}, {VgprB32, SgprP1}}})

871

872

878

879

880 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,

881 G_AMDGPU_TBUFFER_LOAD_FORMAT},

891

892 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE})

894

895 addRulesForGOpcs({G_PTR_ADD})

900

901 addRulesForGOpcs({G_INTTOPTR})

908

909 addRulesForGOpcs({G_PTRTOINT})

916

918

919 addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}});

920

921 addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)

923

924 addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}});

925

926 addRulesForGOpcs({G_GLOBAL_VALUE})

932

933 addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).Any({{UniP5}, {{SgprP5}, {}}});

934

935 bool hasSALUFloat = ST->hasSALUFloatInsts();

936

937 addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, Standard)

948 hasSALUFloat)

950

951

952

953

954

955 addRulesForGOpcs({G_FNEG, G_FABS}, Standard)

969

970 addRulesForGOpcs({G_FPTOUI})

973

974 addRulesForGOpcs({G_UITOFP})

978

979 addRulesForGOpcs({G_IS_FPCLASS})

986

988

990

991

992 addRulesForIOpcs({amdgcn_end_cf}).Any({{_, S32}, {{}, {None, Sgpr32}}});

993

994 addRulesForIOpcs({amdgcn_if_break}, Standard)

996

997 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)

999

1000 addRulesForIOpcs({amdgcn_readfirstlane})

1002

1003

1005

1006}