LLVM: lib/Target/X86/X86FloatingPoint.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

42#include "llvm/Config/llvm-config.h"

50#include

51#include

52using namespace llvm;

53

54#define DEBUG_TYPE "x86-fp-stackifier"

55

56STATISTIC(NumFXCH, "Number of fxch instructions inserted");

57STATISTIC(NumFP, "Number of floating point instructions");

58

59namespace {

60const unsigned ScratchFPReg = 7;

61

62class FPS {

63public:

66

67private:

69

70

71

72

73

74

75

76

77

78

79

80

81 struct LiveBundle {

82

83 unsigned Mask = 0;

84

85

86

87 unsigned FixCount = 0;

88

89

90

91 unsigned char FixStack[8];

92

93 LiveBundle() = default;

94

95

96 bool isFixed() const { return !Mask || FixCount; }

97 };

98

99

100

102

103

104 EdgeBundles *Bundles = nullptr;

105

106

107 static unsigned calcLiveInMask(MachineBasicBlock *MBB, bool RemoveFPs) {

108 unsigned Mask = 0;

110 I != MBB->livein_end();) {

112 static_assert(X86::FP6 - X86::FP0 == 6, "sequential regnums");

113 if (Reg >= X86::FP0 && Reg <= X86::FP6) {

114 Mask |= 1 << (Reg - X86::FP0);

115 if (RemoveFPs) {

116 I = MBB->removeLiveIn(I);

117 continue;

118 }

119 }

120 ++I;

121 }

123 }

124

125

126 void bundleCFGRecomputeKillFlags(MachineFunction &MF);

127

128 MachineBasicBlock *MBB = nullptr;

129

130

131

132

133

134 unsigned Stack[8] = {};

135 unsigned StackTop = 0;

136

137 enum {

138 NumFPRegs = 8

139 };

140

141

142

143

144

145 unsigned RegMap[NumFPRegs] = {};

146

147

148 void setupBlockStack();

149

150

151 void finishBlockStack();

152

153#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

154 void dumpStack() const {

155 dbgs() << "Stack contents:";

156 for (unsigned i = 0; i != StackTop; ++i) {

157 dbgs() << " FP" << Stack[i];

158 assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");

159 }

160 }

161#endif

162

163

164

165 unsigned getSlot(unsigned RegNo) const {

166 assert(RegNo < NumFPRegs && "Regno out of range!");

167 return RegMap[RegNo];

168 }

169

170

171 bool isLive(unsigned RegNo) const {

172 unsigned Slot = getSlot(RegNo);

173 return Slot < StackTop && Stack[Slot] == RegNo;

174 }

175

176

177 unsigned getStackEntry(unsigned STi) const {

178 if (STi >= StackTop)

180 return Stack[StackTop - 1 - STi];

181 }

182

183

184

185 unsigned getSTReg(unsigned RegNo) const {

186 return StackTop - 1 - getSlot(RegNo) + X86::ST0;

187 }

188

189

190 void pushReg(unsigned Reg) {

191 assert(Reg < NumFPRegs && "Register number out of range!");

192 if (StackTop >= 8)

194 Stack[StackTop] = Reg;

195 RegMap[Reg] = StackTop++;

196 }

197

198

199 void popReg() {

200 if (StackTop == 0)

202 RegMap[Stack[--StackTop]] = ~0;

203 }

204

205 bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop - 1; }

208 if (isAtTop(RegNo))

209 return;

210

211 unsigned STReg = getSTReg(RegNo);

212 unsigned RegOnTop = getStackEntry(0);

213

214

215 std::swap(RegMap[RegNo], RegMap[RegOnTop]);

216

217

218 if (RegMap[RegOnTop] >= StackTop)

220 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop - 1]);

221

222

223 BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);

224 ++NumFXCH;

225 }

226

227 void duplicateToTop(unsigned RegNo, unsigned AsReg,

230 unsigned STReg = getSTReg(RegNo);

231 pushReg(AsReg);

232

233 BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);

234 }

235

236

237

239

240

241

242

243

244

246

247

248

250 unsigned FPRegNo);

251

252

254

255

256

257 void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,

259

260 bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);

261

271

272

273 static bool isFPCopy(MachineInstr &MI) {

274 Register DstReg = MI.getOperand(0).getReg();

275 Register SrcReg = MI.getOperand(1).getReg();

276

277 return X86::RFP80RegClass.contains(DstReg) ||

278 X86::RFP80RegClass.contains(SrcReg);

279 }

280

281 void setKillFlags(MachineBasicBlock &MBB) const;

282};

283

285public:

286 X86FPStackifierLegacy() : MachineFunctionPass(ID) {}

287

288 static char ID;

289

290private:

291 void getAnalysisUsage(AnalysisUsage &AU) const override {

293 AU.addRequired();

297 }

298

299 bool runOnMachineFunction(MachineFunction &MF) override;

300

301 MachineFunctionProperties getRequiredProperties() const override {

302 return MachineFunctionProperties().setNoVRegs();

303 }

304

305 StringRef getPassName() const override { return "X86 FP Stackifier"; }

306};

307}

308

309char X86FPStackifierLegacy::ID = 0;

310

312 false, false)

316

318 return new X86FPStackifierLegacy();

319}

320

321

322

324 assert(MO.isReg() && "Expected an FP register!");

326 assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");

327 return Reg - X86::FP0;

328}

329

331

332

333 static_assert(X86::FP6 == X86::FP0 + 6,

334 "Register enums aren't sorted right!");

335 const MachineRegisterInfo &MRI = MF.getRegInfo();

336 for (unsigned I = 0; I <= 6; ++I)

337 if (MRI.reg_nodbg_empty(X86::FP0 + I)) {

338 return true;

339 }

340

341 return false;

342}

343

344

345

346

347bool FPS::run(MachineFunction &MF, EdgeBundles *FunctionBundles) {

348 Bundles = FunctionBundles;

350

351

352 bundleCFGRecomputeKillFlags(MF);

353

354 StackTop = 0;

355

356

357

358 df_iterator_default_set<MachineBasicBlock *> Processed;

359 MachineBasicBlock *Entry = &MF.front();

360

361 LiveBundle &Bundle =

362 LiveBundles[Bundles->getBundle(Entry->getNumber(), false)];

363

364

365

366 if ((Entry->getParent()->getFunction().getCallingConv() ==

367 CallingConv::X86_RegCall) &&

368 (Bundle.Mask && !Bundle.FixCount)) {

369

370

371

372

373

374

375 assert((Bundle.Mask & 0xFE) == 0 &&

376 "Only FP0 could be passed as an argument");

377 Bundle.FixCount = 1;

378 Bundle.FixStack[0] = 0;

379 }

380

382 for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))

383 Changed |= processBasicBlock(MF, *BB);

384

385

386 if (MF.size() != Processed.size())

387 for (MachineBasicBlock &BB : MF)

388 if (Processed.insert(&BB).second)

389 Changed |= processBasicBlock(MF, BB);

390

391 LiveBundles.clear();

392

394}

395

396

397

398

399

400

401void FPS::bundleCFGRecomputeKillFlags(MachineFunction &MF) {

402 assert(LiveBundles.empty() && "Stale data in LiveBundles");

404

405

406 for (MachineBasicBlock &MBB : MF) {

407 setKillFlags(MBB);

408

409 const unsigned Mask = calcLiveInMask(&MBB, false);

410 if (!Mask)

411 continue;

412

414 }

415}

416

417

418

419

420bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {

422 MBB = &BB;

423

424 setupBlockStack();

425

427 MachineInstr &MI = *I;

428 uint64_t Flags = MI.getDesc().TSFlags;

429

431 if (MI.isInlineAsm())

433

434 if (MI.isCopy() && isFPCopy(MI))

436

437 if (MI.isImplicitDef() &&

438 X86::RFP80RegClass.contains(MI.getOperand(0).getReg()))

440

441 if (MI.isCall())

443

444

445

446

447

448

449

450

451

452 if (MI.isFakeUse()) {

453 const MachineOperand &MO = MI.getOperand(0);

454 if (MO.isReg() && X86::RFP80RegClass.contains(MO.getReg())) {

457 else

458 MI.removeOperand(0);

459 }

460 }

461

463 continue;

464

465 MachineInstr *PrevMI = nullptr;

467 PrevMI = &*std::prev(I);

468

469 ++NumFP;

471

472

473

475 for (const MachineOperand &MO : MI.operands())

478

479 switch (FPInstClass) {

481 handleZeroArgFP(I);

482 break;

484 handleOneArgFP(I);

485 break;

487 handleOneArgFPRW(I);

488 break;

490 handleTwoArgFP(I);

491 break;

493 handleCompareFP(I);

494 break;

496 handleCondMovFP(I);

497 break;

499 handleSpecialFP(I);

500 break;

501 default:

503 }

504

505

506

508

509

510 static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");

511 if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg - X86::FP0)) {

512 LLVM_DEBUG(dbgs() << "Register FP#" << Reg - X86::FP0 << " is dead!\n");

513 freeStackSlotAfter(I, Reg - X86::FP0);

514 }

515 }

516

517

520 if (I == PrevI) {

521 dbgs() << "Just deleted pseudo instruction\n";

522 } else {

524

525 while (Start != BB.begin() && std::prev(Start) != PrevI)

527 dbgs() << "Inserted instructions:\n\t";

529 while (++Start != std::next(I)) {

530 }

531 }

532 dumpStack();

533 });

534 (void)PrevMI;

535

537 }

538

539 finishBlockStack();

540

542}

543

544

545

546void FPS::setupBlockStack() {

548 << " derived from " << MBB->getName() << ".\n");

549 StackTop = 0;

550

551 const LiveBundle &Bundle =

553

554 if (!Bundle.Mask) {

556 return;

557 }

558

559

560 assert(Bundle.isFixed() && "Reached block before any predecessors");

561

562

563 for (unsigned i = Bundle.FixCount; i > 0; --i) {

564 LLVM_DEBUG(dbgs() << "Live-in st(" << (i - 1) << "): %fp"

565 << unsigned(Bundle.FixStack[i - 1]) << '\n');

566 pushReg(Bundle.FixStack[i - 1]);

567 }

568

569

570

571

572 unsigned Mask = calcLiveInMask(MBB, true);

573 adjustLiveRegs(Mask, MBB->begin());

575}

576

577

578

579

580

581void FPS::finishBlockStack() {

582

584 return;

585

587 << " derived from " << MBB->getName() << ".\n");

588

589

591 LiveBundle &Bundle = LiveBundles[BundleIdx];

592

593

594

596 adjustLiveRegs(Bundle.Mask, Term);

597

598 if (!Bundle.Mask) {

600 return;

601 }

602

603

605 if (Bundle.isFixed()) {

607 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);

608 } else {

609

611 Bundle.FixCount = StackTop;

612 for (unsigned i = 0; i < StackTop; ++i)

613 Bundle.FixStack[i] = getStackEntry(i);

614 }

615}

616

617

618

619

620

621namespace {

622struct TableEntry {

623 uint16_t from;

624 uint16_t to;

625 bool operator<(const TableEntry &TE) const { return from < TE.from; }

626 friend bool operator<(const TableEntry &TE, unsigned V) {

627 return TE.from < V;

628 }

629 [[maybe_unused]] friend bool operator<(unsigned V, const TableEntry &TE) {

630 return V < TE.from;

631 }

632};

633}

634

637 if (I != Table.end() && I->from == Opcode)

638 return I->to;

639 return -1;

640}

641

642#ifdef NDEBUG

643#define ASSERT_SORTED(TABLE)

644#else

645#define ASSERT_SORTED(TABLE) \

646 { \

647 static std::atomic TABLE##Checked(false); \

648 if (!TABLE##Checked.load(std::memory_order_relaxed)) { \

649 assert(is_sorted(TABLE) && \

650 "All lookup tables must be sorted for efficient access!"); \

651 TABLE##Checked.store(true, std::memory_order_relaxed); \

652 } \

653 }

654#endif

655

656

657

658

659

660

661

662

663

665 {X86::ABS_Fp32, X86::ABS_F},

666 {X86::ABS_Fp64, X86::ABS_F},

667 {X86::ABS_Fp80, X86::ABS_F},

668 {X86::ADD_Fp32m, X86::ADD_F32m},

669 {X86::ADD_Fp64m, X86::ADD_F64m},

670 {X86::ADD_Fp64m32, X86::ADD_F32m},

671 {X86::ADD_Fp80m32, X86::ADD_F32m},

672 {X86::ADD_Fp80m64, X86::ADD_F64m},

673 {X86::ADD_FpI16m32, X86::ADD_FI16m},

674 {X86::ADD_FpI16m64, X86::ADD_FI16m},

675 {X86::ADD_FpI16m80, X86::ADD_FI16m},

676 {X86::ADD_FpI32m32, X86::ADD_FI32m},

677 {X86::ADD_FpI32m64, X86::ADD_FI32m},

678 {X86::ADD_FpI32m80, X86::ADD_FI32m},

679 {X86::CHS_Fp32, X86::CHS_F},

680 {X86::CHS_Fp64, X86::CHS_F},

681 {X86::CHS_Fp80, X86::CHS_F},

682 {X86::CMOVBE_Fp32, X86::CMOVBE_F},

683 {X86::CMOVBE_Fp64, X86::CMOVBE_F},

684 {X86::CMOVBE_Fp80, X86::CMOVBE_F},

685 {X86::CMOVB_Fp32, X86::CMOVB_F},

686 {X86::CMOVB_Fp64, X86::CMOVB_F},

687 {X86::CMOVB_Fp80, X86::CMOVB_F},

688 {X86::CMOVE_Fp32, X86::CMOVE_F},

689 {X86::CMOVE_Fp64, X86::CMOVE_F},

690 {X86::CMOVE_Fp80, X86::CMOVE_F},

691 {X86::CMOVNBE_Fp32, X86::CMOVNBE_F},

692 {X86::CMOVNBE_Fp64, X86::CMOVNBE_F},

693 {X86::CMOVNBE_Fp80, X86::CMOVNBE_F},

694 {X86::CMOVNB_Fp32, X86::CMOVNB_F},

695 {X86::CMOVNB_Fp64, X86::CMOVNB_F},

696 {X86::CMOVNB_Fp80, X86::CMOVNB_F},

697 {X86::CMOVNE_Fp32, X86::CMOVNE_F},

698 {X86::CMOVNE_Fp64, X86::CMOVNE_F},

699 {X86::CMOVNE_Fp80, X86::CMOVNE_F},

700 {X86::CMOVNP_Fp32, X86::CMOVNP_F},

701 {X86::CMOVNP_Fp64, X86::CMOVNP_F},

702 {X86::CMOVNP_Fp80, X86::CMOVNP_F},

703 {X86::CMOVP_Fp32, X86::CMOVP_F},

704 {X86::CMOVP_Fp64, X86::CMOVP_F},

705 {X86::CMOVP_Fp80, X86::CMOVP_F},

706 {X86::COM_FpIr32, X86::COM_FIr},

707 {X86::COM_FpIr64, X86::COM_FIr},

708 {X86::COM_FpIr80, X86::COM_FIr},

709 {X86::COM_Fpr32, X86::COM_FST0r},

710 {X86::COM_Fpr64, X86::COM_FST0r},

711 {X86::COM_Fpr80, X86::COM_FST0r},

712 {X86::DIVR_Fp32m, X86::DIVR_F32m},

713 {X86::DIVR_Fp64m, X86::DIVR_F64m},

714 {X86::DIVR_Fp64m32, X86::DIVR_F32m},

715 {X86::DIVR_Fp80m32, X86::DIVR_F32m},

716 {X86::DIVR_Fp80m64, X86::DIVR_F64m},

717 {X86::DIVR_FpI16m32, X86::DIVR_FI16m},

718 {X86::DIVR_FpI16m64, X86::DIVR_FI16m},

719 {X86::DIVR_FpI16m80, X86::DIVR_FI16m},

720 {X86::DIVR_FpI32m32, X86::DIVR_FI32m},

721 {X86::DIVR_FpI32m64, X86::DIVR_FI32m},

722 {X86::DIVR_FpI32m80, X86::DIVR_FI32m},

723 {X86::DIV_Fp32m, X86::DIV_F32m},

724 {X86::DIV_Fp64m, X86::DIV_F64m},

725 {X86::DIV_Fp64m32, X86::DIV_F32m},

726 {X86::DIV_Fp80m32, X86::DIV_F32m},

727 {X86::DIV_Fp80m64, X86::DIV_F64m},

728 {X86::DIV_FpI16m32, X86::DIV_FI16m},

729 {X86::DIV_FpI16m64, X86::DIV_FI16m},

730 {X86::DIV_FpI16m80, X86::DIV_FI16m},

731 {X86::DIV_FpI32m32, X86::DIV_FI32m},

732 {X86::DIV_FpI32m64, X86::DIV_FI32m},

733 {X86::DIV_FpI32m80, X86::DIV_FI32m},

734 {X86::ILD_Fp16m32, X86::ILD_F16m},

735 {X86::ILD_Fp16m64, X86::ILD_F16m},

736 {X86::ILD_Fp16m80, X86::ILD_F16m},

737 {X86::ILD_Fp32m32, X86::ILD_F32m},

738 {X86::ILD_Fp32m64, X86::ILD_F32m},

739 {X86::ILD_Fp32m80, X86::ILD_F32m},

740 {X86::ILD_Fp64m32, X86::ILD_F64m},

741 {X86::ILD_Fp64m64, X86::ILD_F64m},

742 {X86::ILD_Fp64m80, X86::ILD_F64m},

743 {X86::ISTT_Fp16m32, X86::ISTT_FP16m},

744 {X86::ISTT_Fp16m64, X86::ISTT_FP16m},

745 {X86::ISTT_Fp16m80, X86::ISTT_FP16m},

746 {X86::ISTT_Fp32m32, X86::ISTT_FP32m},

747 {X86::ISTT_Fp32m64, X86::ISTT_FP32m},

748 {X86::ISTT_Fp32m80, X86::ISTT_FP32m},

749 {X86::ISTT_Fp64m32, X86::ISTT_FP64m},

750 {X86::ISTT_Fp64m64, X86::ISTT_FP64m},

751 {X86::ISTT_Fp64m80, X86::ISTT_FP64m},

752 {X86::IST_Fp16m32, X86::IST_F16m},

753 {X86::IST_Fp16m64, X86::IST_F16m},

754 {X86::IST_Fp16m80, X86::IST_F16m},

755 {X86::IST_Fp32m32, X86::IST_F32m},

756 {X86::IST_Fp32m64, X86::IST_F32m},

757 {X86::IST_Fp32m80, X86::IST_F32m},

758 {X86::IST_Fp64m32, X86::IST_FP64m},

759 {X86::IST_Fp64m64, X86::IST_FP64m},

760 {X86::IST_Fp64m80, X86::IST_FP64m},

761 {X86::LD_Fp032, X86::LD_F0},

762 {X86::LD_Fp064, X86::LD_F0},

763 {X86::LD_Fp080, X86::LD_F0},

764 {X86::LD_Fp132, X86::LD_F1},

765 {X86::LD_Fp164, X86::LD_F1},

766 {X86::LD_Fp180, X86::LD_F1},

767 {X86::LD_Fp32m, X86::LD_F32m},

768 {X86::LD_Fp32m64, X86::LD_F32m},

769 {X86::LD_Fp32m80, X86::LD_F32m},

770 {X86::LD_Fp64m, X86::LD_F64m},

771 {X86::LD_Fp64m80, X86::LD_F64m},

772 {X86::LD_Fp80m, X86::LD_F80m},

773 {X86::MUL_Fp32m, X86::MUL_F32m},

774 {X86::MUL_Fp64m, X86::MUL_F64m},

775 {X86::MUL_Fp64m32, X86::MUL_F32m},

776 {X86::MUL_Fp80m32, X86::MUL_F32m},

777 {X86::MUL_Fp80m64, X86::MUL_F64m},

778 {X86::MUL_FpI16m32, X86::MUL_FI16m},

779 {X86::MUL_FpI16m64, X86::MUL_FI16m},

780 {X86::MUL_FpI16m80, X86::MUL_FI16m},

781 {X86::MUL_FpI32m32, X86::MUL_FI32m},

782 {X86::MUL_FpI32m64, X86::MUL_FI32m},

783 {X86::MUL_FpI32m80, X86::MUL_FI32m},

784 {X86::SQRT_Fp32, X86::SQRT_F},

785 {X86::SQRT_Fp64, X86::SQRT_F},

786 {X86::SQRT_Fp80, X86::SQRT_F},

787 {X86::ST_Fp32m, X86::ST_F32m},

788 {X86::ST_Fp64m, X86::ST_F64m},

789 {X86::ST_Fp64m32, X86::ST_F32m},

790 {X86::ST_Fp80m32, X86::ST_F32m},

791 {X86::ST_Fp80m64, X86::ST_F64m},

792 {X86::ST_FpP80m, X86::ST_FP80m},

793 {X86::SUBR_Fp32m, X86::SUBR_F32m},

794 {X86::SUBR_Fp64m, X86::SUBR_F64m},

795 {X86::SUBR_Fp64m32, X86::SUBR_F32m},

796 {X86::SUBR_Fp80m32, X86::SUBR_F32m},

797 {X86::SUBR_Fp80m64, X86::SUBR_F64m},

798 {X86::SUBR_FpI16m32, X86::SUBR_FI16m},

799 {X86::SUBR_FpI16m64, X86::SUBR_FI16m},

800 {X86::SUBR_FpI16m80, X86::SUBR_FI16m},

801 {X86::SUBR_FpI32m32, X86::SUBR_FI32m},

802 {X86::SUBR_FpI32m64, X86::SUBR_FI32m},

803 {X86::SUBR_FpI32m80, X86::SUBR_FI32m},

804 {X86::SUB_Fp32m, X86::SUB_F32m},

805 {X86::SUB_Fp64m, X86::SUB_F64m},

806 {X86::SUB_Fp64m32, X86::SUB_F32m},

807 {X86::SUB_Fp80m32, X86::SUB_F32m},

808 {X86::SUB_Fp80m64, X86::SUB_F64m},

809 {X86::SUB_FpI16m32, X86::SUB_FI16m},

810 {X86::SUB_FpI16m64, X86::SUB_FI16m},

811 {X86::SUB_FpI16m80, X86::SUB_FI16m},

812 {X86::SUB_FpI32m32, X86::SUB_FI32m},

813 {X86::SUB_FpI32m64, X86::SUB_FI32m},

814 {X86::SUB_FpI32m80, X86::SUB_FI32m},

815 {X86::TST_Fp32, X86::TST_F},

816 {X86::TST_Fp64, X86::TST_F},

817 {X86::TST_Fp80, X86::TST_F},

818 {X86::UCOM_FpIr32, X86::UCOM_FIr},

819 {X86::UCOM_FpIr64, X86::UCOM_FIr},

820 {X86::UCOM_FpIr80, X86::UCOM_FIr},

821 {X86::UCOM_Fpr32, X86::UCOM_Fr},

822 {X86::UCOM_Fpr64, X86::UCOM_Fr},

823 {X86::UCOM_Fpr80, X86::UCOM_Fr},

824 {X86::XAM_Fp32, X86::XAM_F},

825 {X86::XAM_Fp64, X86::XAM_F},

826 {X86::XAM_Fp80, X86::XAM_F},

827};

828

832 assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");

833 return Opc;

834}

835

836

837

838

839

840

841

842

844 {X86::ADD_FrST0, X86::ADD_FPrST0},

845

846 {X86::COMP_FST0r, X86::FCOMPP}, {X86::COM_FIr, X86::COM_FIPr},

847 {X86::COM_FST0r, X86::COMP_FST0r},

848

849 {X86::DIVR_FrST0, X86::DIVR_FPrST0}, {X86::DIV_FrST0, X86::DIV_FPrST0},

850

851 {X86::IST_F16m, X86::IST_FP16m}, {X86::IST_F32m, X86::IST_FP32m},

852

853 {X86::MUL_FrST0, X86::MUL_FPrST0},

854

855 {X86::ST_F32m, X86::ST_FP32m}, {X86::ST_F64m, X86::ST_FP64m},

856 {X86::ST_Frr, X86::ST_FPrr},

857

858 {X86::SUBR_FrST0, X86::SUBR_FPrST0}, {X86::SUB_FrST0, X86::SUB_FPrST0},

859

860 {X86::UCOM_FIr, X86::UCOM_FIPr},

861

862 {X86::UCOM_FPr, X86::UCOM_FPPr}, {X86::UCOM_Fr, X86::UCOM_FPr},

863};

864

867 MI.findRegisterDefOperand(X86::FPSW, nullptr))

869 return true;

870 return false;

871}

872

876 while (++I != MBB.end()) {

879 return I;

880 }

881 return MBB.end();

882}

883

884

885

886

887

888

889

891 MachineInstr &MI = *I;

892 const DebugLoc &dl = MI.getDebugLoc();

894

895 popReg();

896

897

899 if (Opcode != -1) {

900 I->setDesc(TII->get(Opcode));

901 if (Opcode == X86::FCOMPP || Opcode == X86::UCOM_FPPr)

902 I->removeOperand(0);

903 MI.dropDebugNumber();

904 } else {

905

906

908 MachineBasicBlock &MBB = *MI.getParent();

910 if (Next != MBB.end() && Next->readsRegister(X86::FPSW, nullptr))

912 }

914 }

915}

916

917

918

919

920

922 if (getStackEntry(0) == FPRegNo) {

923 popStackAfter(I);

924 return;

925 }

926

927

928

929

930 I = freeStackSlotBefore(++I, FPRegNo);

931}

932

933

934

937 unsigned STReg = getSTReg(FPRegNo);

938 unsigned OldSlot = getSlot(FPRegNo);

939 unsigned TopReg = Stack[StackTop - 1];

940 Stack[OldSlot] = TopReg;

941 RegMap[TopReg] = OldSlot;

942 RegMap[FPRegNo] = ~0;

943 Stack[--StackTop] = ~0;

947}

948

949

950

952 unsigned Defs = Mask;

953 unsigned Kills = 0;

954 for (unsigned i = 0; i < StackTop; ++i) {

955 unsigned RegNo = Stack[i];

956 if (!(Defs & (1 << RegNo)))

957

958 Kills |= (1 << RegNo);

959 else

960

961 Defs &= ~(1 << RegNo);

962 }

963 assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");

964

965

966 while (Kills && Defs) {

969 LLVM_DEBUG(dbgs() << "Renaming %fp" << KReg << " as imp %fp" << DReg

970 << "\n");

971 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);

972 std::swap(RegMap[KReg], RegMap[DReg]);

973 Kills &= ~(1 << KReg);

974 Defs &= ~(1 << DReg);

975 }

976

977

978 if (Kills && I != MBB->begin()) {

980 while (StackTop) {

981 unsigned KReg = getStackEntry(0);

982 if (!(Kills & (1 << KReg)))

983 break;

985 popStackAfter(I2);

986 Kills &= ~(1 << KReg);

987 }

988 }

989

990

991 while (Kills) {

994 freeStackSlotBefore(I, KReg);

995 Kills &= ~(1 << KReg);

996 }

997

998

999 while (Defs) {

1001 LLVM_DEBUG(dbgs() << "Defining %fp" << DReg << " as 0\n");

1003 pushReg(DReg);

1004 Defs &= ~(1 << DReg);

1005 }

1006

1007

1009 assert(StackTop == (unsigned)llvm::popcount(Mask) && "Live count mismatch");

1010}

1011

1012

1013

1014

1015void FPS::shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,

1017

1018 while (FixCount--) {

1019

1020 unsigned OldReg = getStackEntry(FixCount);

1021

1022 unsigned Reg = FixStack[FixCount];

1023 if (Reg == OldReg)

1024 continue;

1025

1026 moveToTop(Reg, I);

1027 if (FixCount > 0)

1028 moveToTop(OldReg, I);

1029 }

1031}

1032

1033

1034

1035

1036

1038 MachineInstr &MI = *I;

1039 unsigned STReturns = 0;

1040

1041 bool ClobbersFPStack = false;

1042 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {

1043 MachineOperand &Op = MI.getOperand(i);

1044

1045

1046 if (Op.isRegMask()) {

1047 bool ClobbersFP0 = Op.clobbersPhysReg(X86::FP0);

1048#ifndef NDEBUG

1049 static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");

1050 for (unsigned i = 1; i != 8; ++i)

1051 assert(Op.clobbersPhysReg(X86::FP0 + i) == ClobbersFP0 &&

1052 "Inconsistent FP register clobber");

1053#endif

1054

1055 if (ClobbersFP0)

1056 ClobbersFPStack = true;

1057 }

1058

1059 if (Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)

1060 continue;

1061

1062 assert(Op.isImplicit() && "Expected implicit def/use");

1063

1064 if (Op.isDef())

1066

1067

1068 MI.removeOperand(i);

1069 --i;

1070 --e;

1071 }

1072

1073

1074

1075

1076 assert((ClobbersFPStack || STReturns == 0) &&

1077 "ST returns without FP stack clobber");

1078 if (!ClobbersFPStack)

1079 return;

1080

1082

1083

1084

1086

1087

1088

1089

1090 while (StackTop > 0)

1091 popReg();

1092

1093 for (unsigned I = 0; I < N; ++I)

1094 pushReg(N - I - 1);

1095

1096

1097

1098 if (STReturns)

1099 I->dropDebugNumber();

1100}

1101

1102

1103

1105 MachineInstr &MI = *I;

1106

1107

1108 unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;

1109 unsigned LiveMask = 0;

1110

1111 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {

1112 MachineOperand &Op = MI.getOperand(i);

1113 if (Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)

1114 continue;

1115

1116

1118 (Op.isKill() ||

1119 getFPReg(Op) == FirstFPRegOp ||

1120 MI.killsRegister(Op.getReg(),

1121 nullptr)) &&

1122 "Ret only defs operands, and values aren't live beyond it");

1123

1124 if (FirstFPRegOp == ~0U)

1126 else {

1127 assert(SecondFPRegOp == ~0U && "More than two fp operands!");

1129 }

1131

1132

1133 MI.removeOperand(i);

1134 --i;

1135 --e;

1136 }

1137

1138

1139

1140 adjustLiveRegs(LiveMask, MI);

1141 if (!LiveMask)

1142 return;

1143

1144

1145

1146

1147

1148 if (SecondFPRegOp == ~0U) {

1149

1150 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&

1151 "Top of stack not the right register for RET!");

1152

1153

1154

1155

1156 StackTop = 0;

1157 return;

1158 }

1159

1160

1161

1162

1163 if (StackTop == 1) {

1164 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0) &&

1165 "Stack misconfiguration for RET!");

1166

1167

1168

1169 unsigned NewReg = ScratchFPReg;

1170 duplicateToTop(FirstFPRegOp, NewReg, MI);

1171 FirstFPRegOp = NewReg;

1172 }

1173

1174

1175 assert(StackTop == 2 && "Must have two values live!");

1176

1177

1178

1179 if (getStackEntry(0) == SecondFPRegOp) {

1180 assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");

1181 moveToTop(FirstFPRegOp, MI);

1182 }

1183

1184

1185

1186 assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");

1187 assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");

1188 StackTop = 0;

1189}

1190

1191

1192

1194 MachineInstr &MI = *I;

1195 unsigned DestReg = getFPReg(MI.getOperand(0));

1196

1197

1198 MI.removeOperand(0);

1200 MI.addOperand(

1202

1203

1204 pushReg(DestReg);

1205

1206 MI.dropDebugNumber();

1207}

1208

1209

1210

1212 MachineInstr &MI = *I;

1213 unsigned NumOps = MI.getDesc().getNumOperands();

1215 "Can only handle fst* & ftst instructions!");

1216

1217

1219 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg, nullptr);

1220

1221

1222

1223

1224

1225

1226

1227 if (!KillsSrc && (MI.getOpcode() == X86::IST_Fp64m32 ||

1228 MI.getOpcode() == X86::ISTT_Fp16m32 ||

1229 MI.getOpcode() == X86::ISTT_Fp32m32 ||

1230 MI.getOpcode() == X86::ISTT_Fp64m32 ||

1231 MI.getOpcode() == X86::IST_Fp64m64 ||

1232 MI.getOpcode() == X86::ISTT_Fp16m64 ||

1233 MI.getOpcode() == X86::ISTT_Fp32m64 ||

1234 MI.getOpcode() == X86::ISTT_Fp64m64 ||

1235 MI.getOpcode() == X86::IST_Fp64m80 ||

1236 MI.getOpcode() == X86::ISTT_Fp16m80 ||

1237 MI.getOpcode() == X86::ISTT_Fp32m80 ||

1238 MI.getOpcode() == X86::ISTT_Fp64m80 ||

1239 MI.getOpcode() == X86::ST_FpP80m)) {

1240 duplicateToTop(Reg, ScratchFPReg, I);

1241 } else {

1242 moveToTop(Reg, I);

1243 }

1244

1245

1246 MI.removeOperand(NumOps - 1);

1248 MI.addOperand(

1250

1251 if (MI.getOpcode() == X86::IST_FP64m || MI.getOpcode() == X86::ISTT_FP16m ||

1252 MI.getOpcode() == X86::ISTT_FP32m || MI.getOpcode() == X86::ISTT_FP64m ||

1253 MI.getOpcode() == X86::ST_FP80m) {

1254 if (StackTop == 0)

1256 --StackTop;

1257 } else if (KillsSrc) {

1258 popStackAfter(I);

1259 }

1260

1261 MI.dropDebugNumber();

1262}

1263

1264

1265

1266

1267

1268

1269

1270

1271

1273 MachineInstr &MI = *I;

1274#ifndef NDEBUG

1275 unsigned NumOps = MI.getDesc().getNumOperands();

1276 assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");

1277#endif

1278

1279

1281 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg, nullptr);

1282

1283 if (KillsSrc) {

1284

1285

1286 moveToTop(Reg, I);

1287 if (StackTop == 0)

1289 --StackTop;

1290 pushReg(getFPReg(MI.getOperand(0)));

1291 } else {

1292

1293

1295 }

1296

1297

1298 MI.removeOperand(1);

1299 MI.removeOperand(0);

1301 MI.dropDebugNumber();

1302}

1303

1304

1305

1306

1307

1308

1310 {X86::ADD_Fp32, X86::ADD_FST0r}, {X86::ADD_Fp64, X86::ADD_FST0r},

1311 {X86::ADD_Fp80, X86::ADD_FST0r}, {X86::DIV_Fp32, X86::DIV_FST0r},

1312 {X86::DIV_Fp64, X86::DIV_FST0r}, {X86::DIV_Fp80, X86::DIV_FST0r},

1313 {X86::MUL_Fp32, X86::MUL_FST0r}, {X86::MUL_Fp64, X86::MUL_FST0r},

1314 {X86::MUL_Fp80, X86::MUL_FST0r}, {X86::SUB_Fp32, X86::SUB_FST0r},

1315 {X86::SUB_Fp64, X86::SUB_FST0r}, {X86::SUB_Fp80, X86::SUB_FST0r},

1316};

1317

1318

1320 {X86::ADD_Fp32, X86::ADD_FST0r},

1321 {X86::ADD_Fp64, X86::ADD_FST0r},

1322 {X86::ADD_Fp80, X86::ADD_FST0r},

1323 {X86::DIV_Fp32, X86::DIVR_FST0r},

1324 {X86::DIV_Fp64, X86::DIVR_FST0r},

1325 {X86::DIV_Fp80, X86::DIVR_FST0r},

1326 {X86::MUL_Fp32, X86::MUL_FST0r},

1327 {X86::MUL_Fp64, X86::MUL_FST0r},

1328 {X86::MUL_Fp80, X86::MUL_FST0r},

1329 {X86::SUB_Fp32, X86::SUBR_FST0r},

1330 {X86::SUB_Fp64, X86::SUBR_FST0r},

1331 {X86::SUB_Fp80, X86::SUBR_FST0r},

1332};

1333

1334

1336 {X86::ADD_Fp32, X86::ADD_FrST0},

1337 {X86::ADD_Fp64, X86::ADD_FrST0},

1338 {X86::ADD_Fp80, X86::ADD_FrST0},

1339 {X86::DIV_Fp32, X86::DIVR_FrST0},

1340 {X86::DIV_Fp64, X86::DIVR_FrST0},

1341 {X86::DIV_Fp80, X86::DIVR_FrST0},

1342 {X86::MUL_Fp32, X86::MUL_FrST0},

1343 {X86::MUL_Fp64, X86::MUL_FrST0},

1344 {X86::MUL_Fp80, X86::MUL_FrST0},

1345 {X86::SUB_Fp32, X86::SUBR_FrST0},

1346 {X86::SUB_Fp64, X86::SUBR_FrST0},

1347 {X86::SUB_Fp80, X86::SUBR_FrST0},

1348};

1349

1350

1352 {X86::ADD_Fp32, X86::ADD_FrST0}, {X86::ADD_Fp64, X86::ADD_FrST0},

1353 {X86::ADD_Fp80, X86::ADD_FrST0}, {X86::DIV_Fp32, X86::DIV_FrST0},

1354 {X86::DIV_Fp64, X86::DIV_FrST0}, {X86::DIV_Fp80, X86::DIV_FrST0},

1355 {X86::MUL_Fp32, X86::MUL_FrST0}, {X86::MUL_Fp64, X86::MUL_FrST0},

1356 {X86::MUL_Fp80, X86::MUL_FrST0}, {X86::SUB_Fp32, X86::SUB_FrST0},

1357 {X86::SUB_Fp64, X86::SUB_FrST0}, {X86::SUB_Fp80, X86::SUB_FrST0},

1358};

1359

1360

1361

1362

1363

1364

1365

1366

1367

1373 MachineInstr &MI = *I;

1374

1375 unsigned NumOperands = MI.getDesc().getNumOperands();

1376 assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");

1377 unsigned Dest = getFPReg(MI.getOperand(0));

1378 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));

1379 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));

1380 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0, nullptr);

1381 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1, nullptr);

1382 const DebugLoc &dl = MI.getDebugLoc();

1383

1384 unsigned TOS = getStackEntry(0);

1385

1386

1387

1388 if (Op0 != TOS && Op1 != TOS) {

1389

1390

1391

1392 if (KillsOp0) {

1393 moveToTop(Op0, I);

1394 TOS = Op0;

1395 } else if (KillsOp1) {

1396 moveToTop(Op1, I);

1397 TOS = Op1;

1398 } else {

1399

1400

1401

1402

1403

1404 duplicateToTop(Op0, Dest, I);

1405 Op0 = TOS = Dest;

1406 KillsOp0 = true;

1407 }

1408 } else if (!KillsOp0 && !KillsOp1) {

1409

1410

1411

1412 duplicateToTop(Op0, Dest, I);

1413 Op0 = TOS = Dest;

1414 KillsOp0 = true;

1415 }

1416

1417

1418

1419 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&

1420 "Stack conditions not set up right!");

1421

1422

1423

1424 ArrayRef InstTable;

1425 bool isForward = TOS == Op0;

1426 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);

1427 if (updateST0) {

1428 if (isForward)

1430 else

1432 } else {

1433 if (isForward)

1435 else

1437 }

1438

1439 int Opcode = Lookup(InstTable, MI.getOpcode());

1440 assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");

1441

1442

1443 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;

1444

1445

1448

1449 if (MI.mayRaiseFPException())

1450 I->setFlag(MachineInstr::MIFlag::NoFPExcept);

1451

1452

1453

1454 if (KillsOp0 && KillsOp1 && Op0 != Op1) {

1455 assert(!updateST0 && "Should have updated other operand!");

1456 popStackAfter(I);

1457 }

1458

1459

1460

1461 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);

1462 assert(UpdatedSlot < StackTop && Dest < 7);

1463 Stack[UpdatedSlot] = Dest;

1464 RegMap[Dest] = UpdatedSlot;

1465 MBB->getParent()->deleteMachineInstr(&MI);

1466}

1467

1468

1469

1470

1472 MachineInstr &MI = *I;

1473

1474 unsigned NumOperands = MI.getDesc().getNumOperands();

1475 assert(NumOperands == 2 && "Illegal FUCOM* instruction!");

1476 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));

1477 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));

1478 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0, nullptr);

1479 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1, nullptr);

1480

1481

1482

1483 moveToTop(Op0, I);

1484

1485

1486 MI.getOperand(0).setReg(getSTReg(Op1));

1487 MI.removeOperand(1);

1489 MI.dropDebugNumber();

1490

1491

1492 if (KillsOp0)

1493 freeStackSlotAfter(I, Op0);

1494 if (KillsOp1 && Op0 != Op1)

1495 freeStackSlotAfter(I, Op1);

1496}

1497

1498

1499

1500

1501

1503 MachineInstr &MI = *I;

1504

1505 unsigned Op0 = getFPReg(MI.getOperand(0));

1506 unsigned Op1 = getFPReg(MI.getOperand(2));

1507 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1, nullptr);

1508

1509

1510 moveToTop(Op0, I);

1511

1512

1513

1514 MI.removeOperand(0);

1515 MI.removeOperand(1);

1516 MI.getOperand(0).setReg(getSTReg(Op1));

1518 MI.dropDebugNumber();

1519

1520

1521 if (Op0 != Op1 && KillsOp1) {

1522

1523 freeStackSlotAfter(I, Op1);

1524 }

1525}

1526

1527

1528

1529

1530

1532 MachineInstr &MI = *Inst;

1533

1534 if (MI.isCall()) {

1535 handleCall(Inst);

1536 return;

1537 }

1538

1539 if (MI.isReturn()) {

1540 handleReturn(Inst);

1541 return;

1542 }

1543

1544 switch (MI.getOpcode()) {

1545 default:

1547 case TargetOpcode::COPY: {

1548

1549 const MachineOperand &MO1 = MI.getOperand(1);

1550 const MachineOperand &MO0 = MI.getOperand(0);

1551 bool KillsSrc = MI.killsRegister(MO1.getReg(), nullptr);

1552

1553

1554 unsigned DstFP = getFPReg(MO0);

1555 unsigned SrcFP = getFPReg(MO1);

1556 assert(isLive(SrcFP) && "Cannot copy dead register");

1557 if (KillsSrc) {

1558

1559

1560 unsigned Slot = getSlot(SrcFP);

1562 RegMap[DstFP] = Slot;

1563 } else {

1564

1565

1566 duplicateToTop(SrcFP, DstFP, Inst);

1567 }

1568 break;

1569 }

1570

1571 case TargetOpcode::IMPLICIT_DEF: {

1572

1573 unsigned Reg = MI.getOperand(0).getReg() - X86::FP0;

1574 LLVM_DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');

1576 pushReg(Reg);

1577 break;

1578 }

1579

1580 case TargetOpcode::INLINEASM:

1581 case TargetOpcode::INLINEASM_BR: {

1582

1583

1584

1585

1586

1587

1588

1589

1590

1591

1592

1593

1594

1595

1596

1597

1598

1599

1600

1601

1602

1603

1604

1605

1606

1607

1608

1609

1610

1611

1612

1613

1614

1615 unsigned STUses = 0, STDefs = 0, STClobbers = 0;

1616 unsigned NumOps = 0;

1617 SmallSet<unsigned, 1> FRegIdx;

1618 unsigned RCID;

1619

1621 i != e && MI.getOperand(i).isImm(); i += 1 + NumOps) {

1622 unsigned Flags = MI.getOperand(i).getImm();

1623 const InlineAsm::Flag F(Flags);

1624

1625 NumOps = F.getNumOperandRegisters();

1627 continue;

1628 const MachineOperand &MO = MI.getOperand(i + 1);

1629 if (!MO.isReg())

1630 continue;

1631 unsigned STReg = MO.getReg() - X86::FP0;

1632 if (STReg >= 8)

1633 continue;

1634

1635

1636

1637 if (F.hasRegClassConstraint(RCID)) {

1638 FRegIdx.insert(i + 1);

1639 continue;

1640 }

1641

1642 switch (F.getKind()) {

1643 case InlineAsm::Kind::RegUse:

1644 STUses |= (1u << STReg);

1645 break;

1646 case InlineAsm::Kind::RegDef:

1647 case InlineAsm::Kind::RegDefEarlyClobber:

1648 STDefs |= (1u << STReg);

1649 break;

1650 case InlineAsm::Kind::Clobber:

1651 STClobbers |= (1u << STReg);

1652 break;

1653 default:

1654 break;

1655 }

1656 }

1657

1658 if (STUses && isMask\_32(STUses))

1659 MI.emitGenericError("fixed input regs must be last on the x87 stack");

1661

1662

1663 if (STDefs && isMask\_32(STDefs)) {

1664 MI.emitGenericError("output regs must be last on the x87 stack");

1666 }

1668

1669

1670 if (STClobbers && isMask\_32(STDefs | STClobbers))

1671 MI.emitGenericError("clobbers must be last on the x87 stack");

1672

1673

1674 unsigned STPopped = STUses & (STDefs | STClobbers);

1675 if (STPopped && isMask\_32(STPopped))

1676 MI.emitGenericError(

1677 "implicitly popped regs must be last on the x87 stack");

1679

1680 LLVM_DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "

1681 << NumSTPopped << ", and defines " << NumSTDefs

1682 << " regs.\n");

1683

1684#ifndef NDEBUG

1685

1686

1687 for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I)

1688 if (FRegIdx.count(I)) {

1690 "Operands with constraint \"f\" cannot overlap with defs");

1691 }

1692#endif

1693

1694

1695

1696 unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;

1697 for (const MachineOperand &Op : MI.operands()) {

1698 if (Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)

1699 continue;

1701

1702

1703

1704

1705 if (Op.isUse() && Op.isKill())

1706 FPKills |= 1U << FPReg;

1707 }

1708

1709

1710 FPKills &= ~(STDefs | STClobbers);

1711

1712

1713 unsigned char STUsesArray[8];

1714

1715 for (unsigned I = 0; I < NumSTUses; ++I)

1716 STUsesArray[I] = I;

1717

1718 shuffleStackTop(STUsesArray, NumSTUses, Inst);

1720 dbgs() << "Before asm: ";

1721 dumpStack();

1722 });

1723

1724

1725 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {

1726 MachineOperand &Op = MI.getOperand(i);

1727 if (Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)

1728 continue;

1729

1731

1732 if (FRegIdx.count(i))

1733

1734 Op.setReg(getSTReg(FPReg));

1735 else

1736

1737 Op.setReg(X86::ST0 + FPReg);

1738 }

1739

1740

1741 StackTop -= NumSTPopped;

1742

1743 for (unsigned i = 0; i < NumSTDefs; ++i)

1744 pushReg(NumSTDefs - i - 1);

1745

1746

1747

1748

1749

1750

1751

1752

1753 while (FPKills) {

1755 if (isLive(FPReg))

1756 freeStackSlotAfter(Inst, FPReg);

1757 FPKills &= ~(1U << FPReg);

1758 }

1759

1760

1761 return;

1762 }

1763

1764

1765

1766

1767

1768 case TargetOpcode::FAKE_USE: {

1769 assert(MI.getNumExplicitOperands() == 1 &&

1770 "FAKE_USE must have exactly one operand");

1771 if (MI.getOperand(0).isKill()) {

1772 freeStackSlotBefore(Inst, getFPReg(MI.getOperand(0)));

1773 }

1774 MI.removeOperand(0);

1775 return;

1776 }

1777 }

1778

1779 Inst = MBB->erase(Inst);

1780

1781

1782

1786 } else

1787 --Inst;

1788}

1789

1790void FPS::setKillFlags(MachineBasicBlock &MBB) const {

1791 const TargetRegisterInfo &TRI =

1793 LiveRegUnits LPR(TRI);

1794

1795 LPR.addLiveOuts(MBB);

1796

1798 if (MI.isDebugInstr())

1799 continue;

1800

1801 std::bitset<8> Defs;

1803

1804 for (auto &MO : MI.operands()) {

1805 if (!MO.isReg())

1806 continue;

1807

1808 unsigned Reg = MO.getReg() - X86::FP0;

1809

1810 if (Reg >= 8)

1811 continue;

1812

1813 if (MO.isDef()) {

1814 Defs.set(Reg);

1815 if (LPR.available(MO.getReg()))

1817 } else

1818 Uses.push_back(&MO);

1819 }

1820

1821 for (auto *MO : Uses)

1822 if (Defs.test(getFPReg(*MO)) || LPR.available(MO->getReg()))

1824

1825 LPR.stepBackward(MI);

1826 }

1827}

1828

1829bool X86FPStackifierLegacy::runOnMachineFunction(MachineFunction &MF) {

1830 FPS Impl;

1831 if (!Impl.shouldRun(MF))

1832 return false;

1833

1834 EdgeBundles *Bundles =

1835 &getAnalysis().getEdgeBundles();

1836 return FPS().run(MF, Bundles);

1837}

1838

1839PreservedAnalyses

1842 FPS Impl;

1843 if (!Impl.shouldRun(MF))

1845

1847 bool Changed = Impl.run(MF, Bundles);

1852 return PA;

1853}

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static Register getFPReg(const CSKYSubtarget &STI)

This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.

const size_t AbstractManglingParser< Derived, Alloc >::NumOps

static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)

Return the first DebugLoc that has line number information, given a range of instructions.

Register const TargetRegisterInfo * TRI

Promote Memory to Register

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

static constexpr MCPhysReg FPReg

Remove Loads Into Fake Uses

This file defines the SmallSet class.

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

static const TableEntry ReverseST0Table[]

Definition X86FloatingPoint.cpp:1319

#define ASSERT_SORTED(TABLE)

Definition X86FloatingPoint.cpp:645

static const TableEntry ForwardST0Table[]

Definition X86FloatingPoint.cpp:1309

static bool doesInstructionSetFPSW(MachineInstr &MI)

Definition X86FloatingPoint.cpp:865

static unsigned getFPReg(const MachineOperand &MO)

getFPReg - Return the X86::FPx register number for the specified operand.

Definition X86FloatingPoint.cpp:323

static const TableEntry ForwardSTiTable[]

Definition X86FloatingPoint.cpp:1335

static const TableEntry OpcodeTable[]

Definition X86FloatingPoint.cpp:664

static const TableEntry ReverseSTiTable[]

Definition X86FloatingPoint.cpp:1351

static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)

Definition X86FloatingPoint.cpp:635

static const TableEntry PopTable[]

Definition X86FloatingPoint.cpp:843

static unsigned getConcreteOpcode(unsigned Opcode)

Definition X86FloatingPoint.cpp:829

static MachineBasicBlock::iterator getNextFPInstruction(MachineBasicBlock::iterator I)

Definition X86FloatingPoint.cpp:874

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

AnalysisUsage & addPreservedID(const void *ID)

AnalysisUsage & addRequired()

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Represents analyses that only rely on functions' control flow.

unsigned getBundle(unsigned N, bool Out) const

getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N

unsigned getNumBundles() const

getNumBundles - Return the total number of bundles in the CFG.

FunctionPass class - This class is used to implement most global optimizations.

const MCInstrDesc & get(unsigned Opcode) const

Return the machine instruction descriptor that corresponds to the specified instruction opcode.

int getNumber() const

MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...

MachineInstr * remove(MachineInstr *I)

Remove the unbundled instruction from the instruction list without deleting it.

LiveInVector::const_iterator livein_iterator

LLVM_ABI iterator getFirstTerminator()

Returns an iterator to the first terminator instruction of this basic block.

LLVM_ABI void dump() const

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

LLVM_ABI instr_iterator erase(instr_iterator I)

Remove an instruction from the instruction list and delete it.

MachineInstrBundleIterator< MachineInstr > iterator

LLVM_ABI StringRef getName() const

Return the name of the corresponding LLVM basic block, or an empty string.

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

const MachineBasicBlock & front() const

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

MachineInstr * getInstr() const

If conversion operators fail, use this method to get the MachineInstr explicitly.

Representation of each machine instruction.

void setFlag(MIFlag Flag)

Set a MI flag.

MachineOperand class - Representation of each machine instruction operand.

bool isReg() const

isReg - Tests if this is a MO_Register operand.

void setIsDead(bool Val=true)

void setIsKill(bool Val=true)

Register getReg() const

getReg - Returns the register number.

static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses none()

Convenience factory function for the empty preserved set.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses & preserveSet()

Mark an analysis set as preserved.

Wrapper class representing virtual and physical registers.

size_type count(const T &V) const

count - Return 1 if the element is in the set, 0 otherwise.

std::pair< const_iterator, bool > insert(const T &V)

insert - Insert an element into the set if it isn't already there.

void push_back(const T &Elt)

TargetInstrInfo - Interface to description of machine instruction set.

virtual const TargetInstrInfo * getInstrInfo() const

virtual const TargetRegisterInfo * getRegisterInfo() const =0

Return the target's register information.

PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)

Definition X86FloatingPoint.cpp:1840

Pass manager infrastructure for declaring and invalidating analyses.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

@ SpecialFP

SpecialFP - Special instruction forms. Dispatch by opcode explicitly.

@ NotFP

NotFP - The default, set for instructions that do not use FP registers.

@ OneArgFPRW

OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a result back to ST(0).

@ ZeroArgFP

ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0.

@ OneArgFP

OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst.

@ CompareFP

CompareFP - 2 arg FP instructions which implicitly read ST(0) and an explicit argument,...

@ CondMovFP

CondMovFP - "2 operand" floating point conditional move instructions.

@ TwoArgFP

TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an explicit argument,...

bool isX87Instruction(MachineInstr &MI)

Check if the instruction is X87 instruction.

This is an optimization pass for GlobalISel generic memory operations.

iterator_range< df_ext_iterator< T, SetTy > > depth_first_ext(const T &G, SetTy &S)

bool operator<(int64_t V1, const APSInt &V2)

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

int countr_one(T Value)

Count the number of ones from the least significant bit to the first zero bit.

constexpr bool isMask_32(uint32_t Value)

Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...

LLVM_ABI char & MachineDominatorsID

MachineDominators - This pass is a machine dominators analysis pass.

AnalysisManager< MachineFunction > MachineFunctionAnalysisManager

constexpr int popcount(T Value) noexcept

Count the number of set bits in a value.

int countr_zero(T Val)

Count number of 0's from the least significant bit to the most stopping at the first 1.

LLVM_ABI char & MachineLoopInfoID

MachineLoopInfo - This pass is a loop analysis pass.

auto reverse(ContainerTy &&C)

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

auto lower_bound(R &&Range, T &&Value)

Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...

uint16_t MCPhysReg

An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...

FunctionPass * createX86FPStackifierLegacyPass()

Definition X86FloatingPoint.cpp:317

FunctionAddr VTableAddr Next

DWARFExpression::Operation Op

LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)

Prints a machine basic block reference.

constexpr uint64_t NextPowerOf2(uint64_t A)

Returns the next power of two (in 64-bits) that is strictly greater than A.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

std::pair< iterator, bool > insert(NodeRef N)