LLVM: lib/Target/X86/X86FloatingPoint.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
42#include "llvm/Config/llvm-config.h"
50#include
51#include
52using namespace llvm;
53
54#define DEBUG_TYPE "x86-fp-stackifier"
55
56STATISTIC(NumFXCH, "Number of fxch instructions inserted");
57STATISTIC(NumFP, "Number of floating point instructions");
58
59namespace {
60const unsigned ScratchFPReg = 7;
61
62class FPS {
63public:
66
67private:
69
70
71
72
73
74
75
76
77
78
79
80
81 struct LiveBundle {
82
83 unsigned Mask = 0;
84
85
86
87 unsigned FixCount = 0;
88
89
90
91 unsigned char FixStack[8];
92
93 LiveBundle() = default;
94
95
96 bool isFixed() const { return !Mask || FixCount; }
97 };
98
99
100
102
103
104 EdgeBundles *Bundles = nullptr;
105
106
107 static unsigned calcLiveInMask(MachineBasicBlock *MBB, bool RemoveFPs) {
108 unsigned Mask = 0;
110 I != MBB->livein_end();) {
112 static_assert(X86::FP6 - X86::FP0 == 6, "sequential regnums");
113 if (Reg >= X86::FP0 && Reg <= X86::FP6) {
114 Mask |= 1 << (Reg - X86::FP0);
115 if (RemoveFPs) {
117 continue;
118 }
119 }
120 ++I;
121 }
123 }
124
125
126 void bundleCFGRecomputeKillFlags(MachineFunction &MF);
127
128 MachineBasicBlock *MBB = nullptr;
129
130
131
132
133
134 unsigned Stack[8] = {};
135 unsigned StackTop = 0;
136
137 enum {
138 NumFPRegs = 8
139 };
140
141
142
143
144
145 unsigned RegMap[NumFPRegs] = {};
146
147
148 void setupBlockStack();
149
150
151 void finishBlockStack();
152
153#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
154 void dumpStack() const {
155 dbgs() << "Stack contents:";
156 for (unsigned i = 0; i != StackTop; ++i) {
157 dbgs() << " FP" << Stack[i];
158 assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
159 }
160 }
161#endif
162
163
164
165 unsigned getSlot(unsigned RegNo) const {
166 assert(RegNo < NumFPRegs && "Regno out of range!");
167 return RegMap[RegNo];
168 }
169
170
171 bool isLive(unsigned RegNo) const {
172 unsigned Slot = getSlot(RegNo);
173 return Slot < StackTop && Stack[Slot] == RegNo;
174 }
175
176
177 unsigned getStackEntry(unsigned STi) const {
178 if (STi >= StackTop)
180 return Stack[StackTop - 1 - STi];
181 }
182
183
184
185 unsigned getSTReg(unsigned RegNo) const {
186 return StackTop - 1 - getSlot(RegNo) + X86::ST0;
187 }
188
189
190 void pushReg(unsigned Reg) {
191 assert(Reg < NumFPRegs && "Register number out of range!");
192 if (StackTop >= 8)
194 Stack[StackTop] = Reg;
195 RegMap[Reg] = StackTop++;
196 }
197
198
199 void popReg() {
200 if (StackTop == 0)
202 RegMap[Stack[--StackTop]] = ~0;
203 }
204
205 bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop - 1; }
208 if (isAtTop(RegNo))
209 return;
210
211 unsigned STReg = getSTReg(RegNo);
212 unsigned RegOnTop = getStackEntry(0);
213
214
215 std::swap(RegMap[RegNo], RegMap[RegOnTop]);
216
217
218 if (RegMap[RegOnTop] >= StackTop)
220 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop - 1]);
221
222
223 BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
224 ++NumFXCH;
225 }
226
227 void duplicateToTop(unsigned RegNo, unsigned AsReg,
230 unsigned STReg = getSTReg(RegNo);
231 pushReg(AsReg);
232
233 BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
234 }
235
236
237
239
240
241
242
243
244
246
247
248
250 unsigned FPRegNo);
251
252
254
255
256
257 void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
259
260 bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
261
271
272
273 static bool isFPCopy(MachineInstr &MI) {
274 Register DstReg = MI.getOperand(0).getReg();
275 Register SrcReg = MI.getOperand(1).getReg();
276
277 return X86::RFP80RegClass.contains(DstReg) ||
278 X86::RFP80RegClass.contains(SrcReg);
279 }
280
281 void setKillFlags(MachineBasicBlock &MBB) const;
282};
283
285public:
286 X86FPStackifierLegacy() : MachineFunctionPass(ID) {}
287
288 static char ID;
289
290private:
291 void getAnalysisUsage(AnalysisUsage &AU) const override {
293 AU.addRequired();
297 }
298
299 bool runOnMachineFunction(MachineFunction &MF) override;
300
301 MachineFunctionProperties getRequiredProperties() const override {
302 return MachineFunctionProperties().setNoVRegs();
303 }
304
305 StringRef getPassName() const override { return "X86 FP Stackifier"; }
306};
307}
308
309char X86FPStackifierLegacy::ID = 0;
310
312 false, false)
316
318 return new X86FPStackifierLegacy();
319}
320
321
322
324 assert(MO.isReg() && "Expected an FP register!");
326 assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
327 return Reg - X86::FP0;
328}
329
331
332
333 static_assert(X86::FP6 == X86::FP0 + 6,
334 "Register enums aren't sorted right!");
335 const MachineRegisterInfo &MRI = MF.getRegInfo();
336 for (unsigned I = 0; I <= 6; ++I)
337 if (.reg_nodbg_empty(X86::FP0 + I)) {
338 return true;
339 }
340
341 return false;
342}
343
344
345
346
347bool FPS::run(MachineFunction &MF, EdgeBundles *FunctionBundles) {
348 Bundles = FunctionBundles;
350
351
352 bundleCFGRecomputeKillFlags(MF);
353
354 StackTop = 0;
355
356
357
358 df_iterator_default_set<MachineBasicBlock *> Processed;
359 MachineBasicBlock *Entry = &MF.front();
360
361 LiveBundle &Bundle =
362 LiveBundles[Bundles->getBundle(Entry->getNumber(), false)];
363
364
365
366 if ((Entry->getParent()->getFunction().getCallingConv() ==
367 CallingConv::X86_RegCall) &&
368 (Bundle.Mask && !Bundle.FixCount)) {
369
370
371
372
373
374
375 assert((Bundle.Mask & 0xFE) == 0 &&
376 "Only FP0 could be passed as an argument");
377 Bundle.FixCount = 1;
378 Bundle.FixStack[0] = 0;
379 }
380
382 for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))
383 Changed |= processBasicBlock(MF, *BB);
384
385
386 if (MF.size() != Processed.size())
387 for (MachineBasicBlock &BB : MF)
388 if (Processed.insert(&BB).second)
389 Changed |= processBasicBlock(MF, BB);
390
391 LiveBundles.clear();
392
394}
395
396
397
398
399
400
401void FPS::bundleCFGRecomputeKillFlags(MachineFunction &MF) {
402 assert(LiveBundles.empty() && "Stale data in LiveBundles");
404
405
406 for (MachineBasicBlock &MBB : MF) {
407 setKillFlags(MBB);
408
409 const unsigned Mask = calcLiveInMask(&MBB, false);
410 if (!Mask)
411 continue;
412
414 }
415}
416
417
418
419
420bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
422 MBB = &BB;
423
424 setupBlockStack();
425
428 uint64_t Flags = MI.getDesc().TSFlags;
429
431 if (MI.isInlineAsm())
433
434 if (MI.isCopy() && isFPCopy(MI))
436
437 if (MI.isImplicitDef() &&
438 X86::RFP80RegClass.contains(MI.getOperand(0).getReg()))
440
441 if (MI.isCall())
443
444
445
446
447
448
449
450
451
452 if (MI.isFakeUse()) {
453 const MachineOperand &MO = MI.getOperand(0);
454 if (MO.isReg() && X86::RFP80RegClass.contains(MO.getReg())) {
457 else
458 MI.removeOperand(0);
459 }
460 }
461
463 continue;
464
465 MachineInstr *PrevMI = nullptr;
467 PrevMI = &*std::prev(I);
468
469 ++NumFP;
471
472
473
475 for (const MachineOperand &MO : MI.operands())
478
479 switch (FPInstClass) {
481 handleZeroArgFP(I);
482 break;
484 handleOneArgFP(I);
485 break;
487 handleOneArgFPRW(I);
488 break;
490 handleTwoArgFP(I);
491 break;
493 handleCompareFP(I);
494 break;
496 handleCondMovFP(I);
497 break;
499 handleSpecialFP(I);
500 break;
501 default:
503 }
504
505
506
508
509
510 static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
511 if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg - X86::FP0)) {
512 LLVM_DEBUG(dbgs() << "Register FP#" << Reg - X86::FP0 << " is dead!\n");
513 freeStackSlotAfter(I, Reg - X86::FP0);
514 }
515 }
516
517
520 if (I == PrevI) {
521 dbgs() << "Just deleted pseudo instruction\n";
522 } else {
524
525 while (Start != BB.begin() && std::prev(Start) != PrevI)
527 dbgs() << "Inserted instructions:\n\t";
529 while (++Start != std::next(I)) {
530 }
531 }
532 dumpStack();
533 });
534 (void)PrevMI;
535
537 }
538
539 finishBlockStack();
540
542}
543
544
545
546void FPS::setupBlockStack() {
548 << " derived from " << MBB->getName() << ".\n");
549 StackTop = 0;
550
551 const LiveBundle &Bundle =
553
554 if (!Bundle.Mask) {
556 return;
557 }
558
559
560 assert(Bundle.isFixed() && "Reached block before any predecessors");
561
562
563 for (unsigned i = Bundle.FixCount; i > 0; --i) {
564 LLVM_DEBUG(dbgs() << "Live-in st(" << (i - 1) << "): %fp"
565 << unsigned(Bundle.FixStack[i - 1]) << '\n');
566 pushReg(Bundle.FixStack[i - 1]);
567 }
568
569
570
571
572 unsigned Mask = calcLiveInMask(MBB, true);
573 adjustLiveRegs(Mask, MBB->begin());
575}
576
577
578
579
580
581void FPS::finishBlockStack() {
582
584 return;
585
587 << " derived from " << MBB->getName() << ".\n");
588
589
591 LiveBundle &Bundle = LiveBundles[BundleIdx];
592
593
594
596 adjustLiveRegs(Bundle.Mask, Term);
597
598 if (!Bundle.Mask) {
600 return;
601 }
602
603
605 if (Bundle.isFixed()) {
607 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
608 } else {
609
611 Bundle.FixCount = StackTop;
612 for (unsigned i = 0; i < StackTop; ++i)
613 Bundle.FixStack[i] = getStackEntry(i);
614 }
615}
616
617
618
619
620
621namespace {
622struct TableEntry {
623 uint16_t from;
624 uint16_t to;
625 bool operator<(const TableEntry &TE) const { return from < TE.from; }
626 friend bool operator<(const TableEntry &TE, unsigned V) {
628 }
629 [[maybe_unused]] friend bool operator<(unsigned V, const TableEntry &TE) {
631 }
632};
633}
634
637 if (I != Table.end() && I->from == Opcode)
638 return I->to;
639 return -1;
640}
641
642#ifdef NDEBUG
643#define ASSERT_SORTED(TABLE)
644#else
645#define ASSERT_SORTED(TABLE) \
646 { \
647 static std::atomic TABLE##Checked(false); \
648 if (!TABLE##Checked.load(std::memory_order_relaxed)) { \
649 assert(is_sorted(TABLE) && \
650 "All lookup tables must be sorted for efficient access!"); \
651 TABLE##Checked.store(true, std::memory_order_relaxed); \
652 } \
653 }
654#endif
655
656
657
658
659
660
661
662
663
665 {X86::ABS_Fp32, X86::ABS_F},
666 {X86::ABS_Fp64, X86::ABS_F},
667 {X86::ABS_Fp80, X86::ABS_F},
668 {X86::ADD_Fp32m, X86::ADD_F32m},
669 {X86::ADD_Fp64m, X86::ADD_F64m},
670 {X86::ADD_Fp64m32, X86::ADD_F32m},
671 {X86::ADD_Fp80m32, X86::ADD_F32m},
672 {X86::ADD_Fp80m64, X86::ADD_F64m},
673 {X86::ADD_FpI16m32, X86::ADD_FI16m},
674 {X86::ADD_FpI16m64, X86::ADD_FI16m},
675 {X86::ADD_FpI16m80, X86::ADD_FI16m},
676 {X86::ADD_FpI32m32, X86::ADD_FI32m},
677 {X86::ADD_FpI32m64, X86::ADD_FI32m},
678 {X86::ADD_FpI32m80, X86::ADD_FI32m},
679 {X86::CHS_Fp32, X86::CHS_F},
680 {X86::CHS_Fp64, X86::CHS_F},
681 {X86::CHS_Fp80, X86::CHS_F},
682 {X86::CMOVBE_Fp32, X86::CMOVBE_F},
683 {X86::CMOVBE_Fp64, X86::CMOVBE_F},
684 {X86::CMOVBE_Fp80, X86::CMOVBE_F},
685 {X86::CMOVB_Fp32, X86::CMOVB_F},
686 {X86::CMOVB_Fp64, X86::CMOVB_F},
687 {X86::CMOVB_Fp80, X86::CMOVB_F},
688 {X86::CMOVE_Fp32, X86::CMOVE_F},
689 {X86::CMOVE_Fp64, X86::CMOVE_F},
690 {X86::CMOVE_Fp80, X86::CMOVE_F},
691 {X86::CMOVNBE_Fp32, X86::CMOVNBE_F},
692 {X86::CMOVNBE_Fp64, X86::CMOVNBE_F},
693 {X86::CMOVNBE_Fp80, X86::CMOVNBE_F},
694 {X86::CMOVNB_Fp32, X86::CMOVNB_F},
695 {X86::CMOVNB_Fp64, X86::CMOVNB_F},
696 {X86::CMOVNB_Fp80, X86::CMOVNB_F},
697 {X86::CMOVNE_Fp32, X86::CMOVNE_F},
698 {X86::CMOVNE_Fp64, X86::CMOVNE_F},
699 {X86::CMOVNE_Fp80, X86::CMOVNE_F},
700 {X86::CMOVNP_Fp32, X86::CMOVNP_F},
701 {X86::CMOVNP_Fp64, X86::CMOVNP_F},
702 {X86::CMOVNP_Fp80, X86::CMOVNP_F},
703 {X86::CMOVP_Fp32, X86::CMOVP_F},
704 {X86::CMOVP_Fp64, X86::CMOVP_F},
705 {X86::CMOVP_Fp80, X86::CMOVP_F},
706 {X86::COM_FpIr32, X86::COM_FIr},
707 {X86::COM_FpIr64, X86::COM_FIr},
708 {X86::COM_FpIr80, X86::COM_FIr},
709 {X86::COM_Fpr32, X86::COM_FST0r},
710 {X86::COM_Fpr64, X86::COM_FST0r},
711 {X86::COM_Fpr80, X86::COM_FST0r},
712 {X86::DIVR_Fp32m, X86::DIVR_F32m},
713 {X86::DIVR_Fp64m, X86::DIVR_F64m},
714 {X86::DIVR_Fp64m32, X86::DIVR_F32m},
715 {X86::DIVR_Fp80m32, X86::DIVR_F32m},
716 {X86::DIVR_Fp80m64, X86::DIVR_F64m},
717 {X86::DIVR_FpI16m32, X86::DIVR_FI16m},
718 {X86::DIVR_FpI16m64, X86::DIVR_FI16m},
719 {X86::DIVR_FpI16m80, X86::DIVR_FI16m},
720 {X86::DIVR_FpI32m32, X86::DIVR_FI32m},
721 {X86::DIVR_FpI32m64, X86::DIVR_FI32m},
722 {X86::DIVR_FpI32m80, X86::DIVR_FI32m},
723 {X86::DIV_Fp32m, X86::DIV_F32m},
724 {X86::DIV_Fp64m, X86::DIV_F64m},
725 {X86::DIV_Fp64m32, X86::DIV_F32m},
726 {X86::DIV_Fp80m32, X86::DIV_F32m},
727 {X86::DIV_Fp80m64, X86::DIV_F64m},
728 {X86::DIV_FpI16m32, X86::DIV_FI16m},
729 {X86::DIV_FpI16m64, X86::DIV_FI16m},
730 {X86::DIV_FpI16m80, X86::DIV_FI16m},
731 {X86::DIV_FpI32m32, X86::DIV_FI32m},
732 {X86::DIV_FpI32m64, X86::DIV_FI32m},
733 {X86::DIV_FpI32m80, X86::DIV_FI32m},
734 {X86::ILD_Fp16m32, X86::ILD_F16m},
735 {X86::ILD_Fp16m64, X86::ILD_F16m},
736 {X86::ILD_Fp16m80, X86::ILD_F16m},
737 {X86::ILD_Fp32m32, X86::ILD_F32m},
738 {X86::ILD_Fp32m64, X86::ILD_F32m},
739 {X86::ILD_Fp32m80, X86::ILD_F32m},
740 {X86::ILD_Fp64m32, X86::ILD_F64m},
741 {X86::ILD_Fp64m64, X86::ILD_F64m},
742 {X86::ILD_Fp64m80, X86::ILD_F64m},
743 {X86::ISTT_Fp16m32, X86::ISTT_FP16m},
744 {X86::ISTT_Fp16m64, X86::ISTT_FP16m},
745 {X86::ISTT_Fp16m80, X86::ISTT_FP16m},
746 {X86::ISTT_Fp32m32, X86::ISTT_FP32m},
747 {X86::ISTT_Fp32m64, X86::ISTT_FP32m},
748 {X86::ISTT_Fp32m80, X86::ISTT_FP32m},
749 {X86::ISTT_Fp64m32, X86::ISTT_FP64m},
750 {X86::ISTT_Fp64m64, X86::ISTT_FP64m},
751 {X86::ISTT_Fp64m80, X86::ISTT_FP64m},
752 {X86::IST_Fp16m32, X86::IST_F16m},
753 {X86::IST_Fp16m64, X86::IST_F16m},
754 {X86::IST_Fp16m80, X86::IST_F16m},
755 {X86::IST_Fp32m32, X86::IST_F32m},
756 {X86::IST_Fp32m64, X86::IST_F32m},
757 {X86::IST_Fp32m80, X86::IST_F32m},
758 {X86::IST_Fp64m32, X86::IST_FP64m},
759 {X86::IST_Fp64m64, X86::IST_FP64m},
760 {X86::IST_Fp64m80, X86::IST_FP64m},
761 {X86::LD_Fp032, X86::LD_F0},
762 {X86::LD_Fp064, X86::LD_F0},
763 {X86::LD_Fp080, X86::LD_F0},
764 {X86::LD_Fp132, X86::LD_F1},
765 {X86::LD_Fp164, X86::LD_F1},
766 {X86::LD_Fp180, X86::LD_F1},
767 {X86::LD_Fp32m, X86::LD_F32m},
768 {X86::LD_Fp32m64, X86::LD_F32m},
769 {X86::LD_Fp32m80, X86::LD_F32m},
770 {X86::LD_Fp64m, X86::LD_F64m},
771 {X86::LD_Fp64m80, X86::LD_F64m},
772 {X86::LD_Fp80m, X86::LD_F80m},
773 {X86::MUL_Fp32m, X86::MUL_F32m},
774 {X86::MUL_Fp64m, X86::MUL_F64m},
775 {X86::MUL_Fp64m32, X86::MUL_F32m},
776 {X86::MUL_Fp80m32, X86::MUL_F32m},
777 {X86::MUL_Fp80m64, X86::MUL_F64m},
778 {X86::MUL_FpI16m32, X86::MUL_FI16m},
779 {X86::MUL_FpI16m64, X86::MUL_FI16m},
780 {X86::MUL_FpI16m80, X86::MUL_FI16m},
781 {X86::MUL_FpI32m32, X86::MUL_FI32m},
782 {X86::MUL_FpI32m64, X86::MUL_FI32m},
783 {X86::MUL_FpI32m80, X86::MUL_FI32m},
784 {X86::SQRT_Fp32, X86::SQRT_F},
785 {X86::SQRT_Fp64, X86::SQRT_F},
786 {X86::SQRT_Fp80, X86::SQRT_F},
787 {X86::ST_Fp32m, X86::ST_F32m},
788 {X86::ST_Fp64m, X86::ST_F64m},
789 {X86::ST_Fp64m32, X86::ST_F32m},
790 {X86::ST_Fp80m32, X86::ST_F32m},
791 {X86::ST_Fp80m64, X86::ST_F64m},
792 {X86::ST_FpP80m, X86::ST_FP80m},
793 {X86::SUBR_Fp32m, X86::SUBR_F32m},
794 {X86::SUBR_Fp64m, X86::SUBR_F64m},
795 {X86::SUBR_Fp64m32, X86::SUBR_F32m},
796 {X86::SUBR_Fp80m32, X86::SUBR_F32m},
797 {X86::SUBR_Fp80m64, X86::SUBR_F64m},
798 {X86::SUBR_FpI16m32, X86::SUBR_FI16m},
799 {X86::SUBR_FpI16m64, X86::SUBR_FI16m},
800 {X86::SUBR_FpI16m80, X86::SUBR_FI16m},
801 {X86::SUBR_FpI32m32, X86::SUBR_FI32m},
802 {X86::SUBR_FpI32m64, X86::SUBR_FI32m},
803 {X86::SUBR_FpI32m80, X86::SUBR_FI32m},
804 {X86::SUB_Fp32m, X86::SUB_F32m},
805 {X86::SUB_Fp64m, X86::SUB_F64m},
806 {X86::SUB_Fp64m32, X86::SUB_F32m},
807 {X86::SUB_Fp80m32, X86::SUB_F32m},
808 {X86::SUB_Fp80m64, X86::SUB_F64m},
809 {X86::SUB_FpI16m32, X86::SUB_FI16m},
810 {X86::SUB_FpI16m64, X86::SUB_FI16m},
811 {X86::SUB_FpI16m80, X86::SUB_FI16m},
812 {X86::SUB_FpI32m32, X86::SUB_FI32m},
813 {X86::SUB_FpI32m64, X86::SUB_FI32m},
814 {X86::SUB_FpI32m80, X86::SUB_FI32m},
815 {X86::TST_Fp32, X86::TST_F},
816 {X86::TST_Fp64, X86::TST_F},
817 {X86::TST_Fp80, X86::TST_F},
818 {X86::UCOM_FpIr32, X86::UCOM_FIr},
819 {X86::UCOM_FpIr64, X86::UCOM_FIr},
820 {X86::UCOM_FpIr80, X86::UCOM_FIr},
821 {X86::UCOM_Fpr32, X86::UCOM_Fr},
822 {X86::UCOM_Fpr64, X86::UCOM_Fr},
823 {X86::UCOM_Fpr80, X86::UCOM_Fr},
824 {X86::XAM_Fp32, X86::XAM_F},
825 {X86::XAM_Fp64, X86::XAM_F},
826 {X86::XAM_Fp80, X86::XAM_F},
827};
828
832 assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
833 return Opc;
834}
835
836
837
838
839
840
841
842
844 {X86::ADD_FrST0, X86::ADD_FPrST0},
845
846 {X86::COMP_FST0r, X86::FCOMPP}, {X86::COM_FIr, X86::COM_FIPr},
847 {X86::COM_FST0r, X86::COMP_FST0r},
848
849 {X86::DIVR_FrST0, X86::DIVR_FPrST0}, {X86::DIV_FrST0, X86::DIV_FPrST0},
850
851 {X86::IST_F16m, X86::IST_FP16m}, {X86::IST_F32m, X86::IST_FP32m},
852
853 {X86::MUL_FrST0, X86::MUL_FPrST0},
854
855 {X86::ST_F32m, X86::ST_FP32m}, {X86::ST_F64m, X86::ST_FP64m},
856 {X86::ST_Frr, X86::ST_FPrr},
857
858 {X86::SUBR_FrST0, X86::SUBR_FPrST0}, {X86::SUB_FrST0, X86::SUB_FPrST0},
859
860 {X86::UCOM_FIr, X86::UCOM_FIPr},
861
862 {X86::UCOM_FPr, X86::UCOM_FPPr}, {X86::UCOM_Fr, X86::UCOM_FPr},
863};
864
867 MI.findRegisterDefOperand(X86::FPSW, nullptr))
869 return true;
870 return false;
871}
872
876 while (++I != MBB.end()) {
879 return I;
880 }
881 return MBB.end();
882}
883
884
885
886
887
888
889
892 const DebugLoc &dl = MI.getDebugLoc();
894
895 popReg();
896
897
899 if (Opcode != -1) {
900 I->setDesc(TII->get(Opcode));
901 if (Opcode == X86::FCOMPP || Opcode == X86::UCOM_FPPr)
902 I->removeOperand(0);
903 MI.dropDebugNumber();
904 } else {
905
906
908 MachineBasicBlock &MBB = *MI.getParent();
910 if (Next != MBB.end() && Next->readsRegister(X86::FPSW, nullptr))
912 }
914 }
915}
916
917
918
919
920
922 if (getStackEntry(0) == FPRegNo) {
923 popStackAfter(I);
924 return;
925 }
926
927
928
929
930 I = freeStackSlotBefore(++I, FPRegNo);
931}
932
933
934
937 unsigned STReg = getSTReg(FPRegNo);
938 unsigned OldSlot = getSlot(FPRegNo);
939 unsigned TopReg = Stack[StackTop - 1];
940 Stack[OldSlot] = TopReg;
941 RegMap[TopReg] = OldSlot;
942 RegMap[FPRegNo] = ~0;
943 Stack[--StackTop] = ~0;
947}
948
949
950
952 unsigned Defs = Mask;
953 unsigned Kills = 0;
954 for (unsigned i = 0; i < StackTop; ++i) {
955 unsigned RegNo = Stack[i];
956 if (!(Defs & (1 << RegNo)))
957
958 Kills |= (1 << RegNo);
959 else
960
961 Defs &= ~(1 << RegNo);
962 }
963 assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
964
965
966 while (Kills && Defs) {
969 LLVM_DEBUG(dbgs() << "Renaming %fp" << KReg << " as imp %fp" << DReg
970 << "\n");
971 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
972 std::swap(RegMap[KReg], RegMap[DReg]);
973 Kills &= ~(1 << KReg);
974 Defs &= ~(1 << DReg);
975 }
976
977
978 if (Kills && I != MBB->begin()) {
980 while (StackTop) {
981 unsigned KReg = getStackEntry(0);
982 if (!(Kills & (1 << KReg)))
983 break;
985 popStackAfter(I2);
986 Kills &= ~(1 << KReg);
987 }
988 }
989
990
991 while (Kills) {
994 freeStackSlotBefore(I, KReg);
995 Kills &= ~(1 << KReg);
996 }
997
998
999 while (Defs) {
1001 LLVM_DEBUG(dbgs() << "Defining %fp" << DReg << " as 0\n");
1003 pushReg(DReg);
1004 Defs &= ~(1 << DReg);
1005 }
1006
1007
1009 assert(StackTop == (unsigned)llvm::popcount(Mask) && "Live count mismatch");
1010}
1011
1012
1013
1014
1015void FPS::shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
1017
1018 while (FixCount--) {
1019
1020 unsigned OldReg = getStackEntry(FixCount);
1021
1022 unsigned Reg = FixStack[FixCount];
1023 if (Reg == OldReg)
1024 continue;
1025
1027 if (FixCount > 0)
1028 moveToTop(OldReg, I);
1029 }
1031}
1032
1033
1034
1035
1036
1039 unsigned STReturns = 0;
1040
1041 bool ClobbersFPStack = false;
1042 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1043 MachineOperand &Op = MI.getOperand(i);
1044
1045
1046 if (Op.isRegMask()) {
1047 bool ClobbersFP0 = Op.clobbersPhysReg(X86::FP0);
1048#ifndef NDEBUG
1049 static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers");
1050 for (unsigned i = 1; i != 8; ++i)
1051 assert(Op.clobbersPhysReg(X86::FP0 + i) == ClobbersFP0 &&
1052 "Inconsistent FP register clobber");
1053#endif
1054
1055 if (ClobbersFP0)
1056 ClobbersFPStack = true;
1057 }
1058
1059 if (.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1060 continue;
1061
1062 assert(Op.isImplicit() && "Expected implicit def/use");
1063
1064 if (Op.isDef())
1066
1067
1068 MI.removeOperand(i);
1069 --i;
1070 --e;
1071 }
1072
1073
1074
1075
1076 assert((ClobbersFPStack || STReturns == 0) &&
1077 "ST returns without FP stack clobber");
1078 if (!ClobbersFPStack)
1079 return;
1080
1082
1083
1084
1086
1087
1088
1089
1090 while (StackTop > 0)
1091 popReg();
1092
1093 for (unsigned I = 0; I < N; ++I)
1095
1096
1097
1098 if (STReturns)
1099 I->dropDebugNumber();
1100}
1101
1102
1103
1106
1107
1108 unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
1109 unsigned LiveMask = 0;
1110
1111 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1112 MachineOperand &Op = MI.getOperand(i);
1113 if (.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1114 continue;
1115
1116
1118 (Op.isKill() ||
1119 getFPReg(Op) == FirstFPRegOp ||
1120 MI.killsRegister(Op.getReg(),
1121 nullptr)) &&
1122 "Ret only defs operands, and values aren't live beyond it");
1123
1124 if (FirstFPRegOp == ~0U)
1126 else {
1127 assert(SecondFPRegOp == ~0U && "More than two fp operands!");
1129 }
1131
1132
1133 MI.removeOperand(i);
1134 --i;
1135 --e;
1136 }
1137
1138
1139
1140 adjustLiveRegs(LiveMask, MI);
1141 if (!LiveMask)
1142 return;
1143
1144
1145
1146
1147
1148 if (SecondFPRegOp == ~0U) {
1149
1150 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
1151 "Top of stack not the right register for RET!");
1152
1153
1154
1155
1156 StackTop = 0;
1157 return;
1158 }
1159
1160
1161
1162
1163 if (StackTop == 1) {
1164 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0) &&
1165 "Stack misconfiguration for RET!");
1166
1167
1168
1169 unsigned NewReg = ScratchFPReg;
1170 duplicateToTop(FirstFPRegOp, NewReg, MI);
1171 FirstFPRegOp = NewReg;
1172 }
1173
1174
1175 assert(StackTop == 2 && "Must have two values live!");
1176
1177
1178
1179 if (getStackEntry(0) == SecondFPRegOp) {
1180 assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
1181 moveToTop(FirstFPRegOp, MI);
1182 }
1183
1184
1185
1186 assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
1187 assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
1188 StackTop = 0;
1189}
1190
1191
1192
1195 unsigned DestReg = getFPReg(MI.getOperand(0));
1196
1197
1198 MI.removeOperand(0);
1200 MI.addOperand(
1202
1203
1204 pushReg(DestReg);
1205
1206 MI.dropDebugNumber();
1207}
1208
1209
1210
1213 unsigned NumOps = MI.getDesc().getNumOperands();
1215 "Can only handle fst* & ftst instructions!");
1216
1217
1219 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg, nullptr);
1220
1221
1222
1223
1224
1225
1226
1227 if (!KillsSrc && (MI.getOpcode() == X86::IST_Fp64m32 ||
1228 MI.getOpcode() == X86::ISTT_Fp16m32 ||
1229 MI.getOpcode() == X86::ISTT_Fp32m32 ||
1230 MI.getOpcode() == X86::ISTT_Fp64m32 ||
1231 MI.getOpcode() == X86::IST_Fp64m64 ||
1232 MI.getOpcode() == X86::ISTT_Fp16m64 ||
1233 MI.getOpcode() == X86::ISTT_Fp32m64 ||
1234 MI.getOpcode() == X86::ISTT_Fp64m64 ||
1235 MI.getOpcode() == X86::IST_Fp64m80 ||
1236 MI.getOpcode() == X86::ISTT_Fp16m80 ||
1237 MI.getOpcode() == X86::ISTT_Fp32m80 ||
1238 MI.getOpcode() == X86::ISTT_Fp64m80 ||
1239 MI.getOpcode() == X86::ST_FpP80m)) {
1240 duplicateToTop(Reg, ScratchFPReg, I);
1241 } else {
1243 }
1244
1245
1246 MI.removeOperand(NumOps - 1);
1248 MI.addOperand(
1250
1251 if (MI.getOpcode() == X86::IST_FP64m || MI.getOpcode() == X86::ISTT_FP16m ||
1252 MI.getOpcode() == X86::ISTT_FP32m || MI.getOpcode() == X86::ISTT_FP64m ||
1253 MI.getOpcode() == X86::ST_FP80m) {
1254 if (StackTop == 0)
1256 --StackTop;
1257 } else if (KillsSrc) {
1258 popStackAfter(I);
1259 }
1260
1261 MI.dropDebugNumber();
1262}
1263
1264
1265
1266
1267
1268
1269
1270
1271
1274#ifndef NDEBUG
1275 unsigned NumOps = MI.getDesc().getNumOperands();
1276 assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
1277#endif
1278
1279
1281 bool KillsSrc = MI.killsRegister(X86::FP0 + Reg, nullptr);
1282
1283 if (KillsSrc) {
1284
1285
1287 if (StackTop == 0)
1289 --StackTop;
1290 pushReg(getFPReg(MI.getOperand(0)));
1291 } else {
1292
1293
1295 }
1296
1297
1298 MI.removeOperand(1);
1299 MI.removeOperand(0);
1301 MI.dropDebugNumber();
1302}
1303
1304
1305
1306
1307
1308
1310 {X86::ADD_Fp32, X86::ADD_FST0r}, {X86::ADD_Fp64, X86::ADD_FST0r},
1311 {X86::ADD_Fp80, X86::ADD_FST0r}, {X86::DIV_Fp32, X86::DIV_FST0r},
1312 {X86::DIV_Fp64, X86::DIV_FST0r}, {X86::DIV_Fp80, X86::DIV_FST0r},
1313 {X86::MUL_Fp32, X86::MUL_FST0r}, {X86::MUL_Fp64, X86::MUL_FST0r},
1314 {X86::MUL_Fp80, X86::MUL_FST0r}, {X86::SUB_Fp32, X86::SUB_FST0r},
1315 {X86::SUB_Fp64, X86::SUB_FST0r}, {X86::SUB_Fp80, X86::SUB_FST0r},
1316};
1317
1318
1320 {X86::ADD_Fp32, X86::ADD_FST0r},
1321 {X86::ADD_Fp64, X86::ADD_FST0r},
1322 {X86::ADD_Fp80, X86::ADD_FST0r},
1323 {X86::DIV_Fp32, X86::DIVR_FST0r},
1324 {X86::DIV_Fp64, X86::DIVR_FST0r},
1325 {X86::DIV_Fp80, X86::DIVR_FST0r},
1326 {X86::MUL_Fp32, X86::MUL_FST0r},
1327 {X86::MUL_Fp64, X86::MUL_FST0r},
1328 {X86::MUL_Fp80, X86::MUL_FST0r},
1329 {X86::SUB_Fp32, X86::SUBR_FST0r},
1330 {X86::SUB_Fp64, X86::SUBR_FST0r},
1331 {X86::SUB_Fp80, X86::SUBR_FST0r},
1332};
1333
1334
1336 {X86::ADD_Fp32, X86::ADD_FrST0},
1337 {X86::ADD_Fp64, X86::ADD_FrST0},
1338 {X86::ADD_Fp80, X86::ADD_FrST0},
1339 {X86::DIV_Fp32, X86::DIVR_FrST0},
1340 {X86::DIV_Fp64, X86::DIVR_FrST0},
1341 {X86::DIV_Fp80, X86::DIVR_FrST0},
1342 {X86::MUL_Fp32, X86::MUL_FrST0},
1343 {X86::MUL_Fp64, X86::MUL_FrST0},
1344 {X86::MUL_Fp80, X86::MUL_FrST0},
1345 {X86::SUB_Fp32, X86::SUBR_FrST0},
1346 {X86::SUB_Fp64, X86::SUBR_FrST0},
1347 {X86::SUB_Fp80, X86::SUBR_FrST0},
1348};
1349
1350
1352 {X86::ADD_Fp32, X86::ADD_FrST0}, {X86::ADD_Fp64, X86::ADD_FrST0},
1353 {X86::ADD_Fp80, X86::ADD_FrST0}, {X86::DIV_Fp32, X86::DIV_FrST0},
1354 {X86::DIV_Fp64, X86::DIV_FrST0}, {X86::DIV_Fp80, X86::DIV_FrST0},
1355 {X86::MUL_Fp32, X86::MUL_FrST0}, {X86::MUL_Fp64, X86::MUL_FrST0},
1356 {X86::MUL_Fp80, X86::MUL_FrST0}, {X86::SUB_Fp32, X86::SUB_FrST0},
1357 {X86::SUB_Fp64, X86::SUB_FrST0}, {X86::SUB_Fp80, X86::SUB_FrST0},
1358};
1359
1360
1361
1362
1363
1364
1365
1366
1367
1374
1375 unsigned NumOperands = MI.getDesc().getNumOperands();
1376 assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
1377 unsigned Dest = getFPReg(MI.getOperand(0));
1378 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));
1379 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));
1380 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0, nullptr);
1381 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1, nullptr);
1382 const DebugLoc &dl = MI.getDebugLoc();
1383
1384 unsigned TOS = getStackEntry(0);
1385
1386
1387
1388 if (Op0 != TOS && Op1 != TOS) {
1389
1390
1391
1392 if (KillsOp0) {
1393 moveToTop(Op0, I);
1394 TOS = Op0;
1395 } else if (KillsOp1) {
1396 moveToTop(Op1, I);
1397 TOS = Op1;
1398 } else {
1399
1400
1401
1402
1403
1404 duplicateToTop(Op0, Dest, I);
1405 Op0 = TOS = Dest;
1406 KillsOp0 = true;
1407 }
1408 } else if (!KillsOp0 && !KillsOp1) {
1409
1410
1411
1412 duplicateToTop(Op0, Dest, I);
1413 Op0 = TOS = Dest;
1414 KillsOp0 = true;
1415 }
1416
1417
1418
1419 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
1420 "Stack conditions not set up right!");
1421
1422
1423
1424 ArrayRef InstTable;
1425 bool isForward = TOS == Op0;
1426 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
1427 if (updateST0) {
1428 if (isForward)
1430 else
1432 } else {
1433 if (isForward)
1435 else
1437 }
1438
1439 int Opcode = Lookup(InstTable, MI.getOpcode());
1440 assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
1441
1442
1443 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
1444
1445
1448
1449 if (.mayRaiseFPException())
1450 I->setFlag(MachineInstr::MIFlag::NoFPExcept);
1451
1452
1453
1454 if (KillsOp0 && KillsOp1 && Op0 != Op1) {
1455 assert(!updateST0 && "Should have updated other operand!");
1456 popStackAfter(I);
1457 }
1458
1459
1460
1461 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
1462 assert(UpdatedSlot < StackTop && Dest < 7);
1463 Stack[UpdatedSlot] = Dest;
1464 RegMap[Dest] = UpdatedSlot;
1465 MBB->getParent()->deleteMachineInstr(&MI);
1466}
1467
1468
1469
1470
1473
1474 unsigned NumOperands = MI.getDesc().getNumOperands();
1475 assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
1476 unsigned Op0 = getFPReg(MI.getOperand(NumOperands - 2));
1477 unsigned Op1 = getFPReg(MI.getOperand(NumOperands - 1));
1478 bool KillsOp0 = MI.killsRegister(X86::FP0 + Op0, nullptr);
1479 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1, nullptr);
1480
1481
1482
1483 moveToTop(Op0, I);
1484
1485
1486 MI.getOperand(0).setReg(getSTReg(Op1));
1487 MI.removeOperand(1);
1489 MI.dropDebugNumber();
1490
1491
1492 if (KillsOp0)
1493 freeStackSlotAfter(I, Op0);
1494 if (KillsOp1 && Op0 != Op1)
1495 freeStackSlotAfter(I, Op1);
1496}
1497
1498
1499
1500
1501
1504
1505 unsigned Op0 = getFPReg(MI.getOperand(0));
1506 unsigned Op1 = getFPReg(MI.getOperand(2));
1507 bool KillsOp1 = MI.killsRegister(X86::FP0 + Op1, nullptr);
1508
1509
1510 moveToTop(Op0, I);
1511
1512
1513
1514 MI.removeOperand(0);
1515 MI.removeOperand(1);
1516 MI.getOperand(0).setReg(getSTReg(Op1));
1518 MI.dropDebugNumber();
1519
1520
1521 if (Op0 != Op1 && KillsOp1) {
1522
1523 freeStackSlotAfter(I, Op1);
1524 }
1525}
1526
1527
1528
1529
1530
1532 MachineInstr &MI = *Inst;
1533
1534 if (MI.isCall()) {
1535 handleCall(Inst);
1536 return;
1537 }
1538
1539 if (MI.isReturn()) {
1540 handleReturn(Inst);
1541 return;
1542 }
1543
1544 switch (MI.getOpcode()) {
1545 default:
1547 case TargetOpcode::COPY: {
1548
1549 const MachineOperand &MO1 = MI.getOperand(1);
1550 const MachineOperand &MO0 = MI.getOperand(0);
1551 bool KillsSrc = MI.killsRegister(MO1.getReg(), nullptr);
1552
1553
1554 unsigned DstFP = getFPReg(MO0);
1555 unsigned SrcFP = getFPReg(MO1);
1556 assert(isLive(SrcFP) && "Cannot copy dead register");
1557 if (KillsSrc) {
1558
1559
1560 unsigned Slot = getSlot(SrcFP);
1562 RegMap[DstFP] = Slot;
1563 } else {
1564
1565
1566 duplicateToTop(SrcFP, DstFP, Inst);
1567 }
1568 break;
1569 }
1570
1571 case TargetOpcode::IMPLICIT_DEF: {
1572
1573 unsigned Reg = MI.getOperand(0).getReg() - X86::FP0;
1574 LLVM_DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg << '\n');
1576 pushReg(Reg);
1577 break;
1578 }
1579
1580 case TargetOpcode::INLINEASM:
1581 case TargetOpcode::INLINEASM_BR: {
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615 unsigned STUses = 0, STDefs = 0, STClobbers = 0;
1616 unsigned NumOps = 0;
1617 SmallSet<unsigned, 1> FRegIdx;
1618 unsigned RCID;
1619
1621 i != e && MI.getOperand(i).isImm(); i += 1 + NumOps) {
1622 unsigned Flags = MI.getOperand(i).getImm();
1623 const InlineAsm::Flag F(Flags);
1624
1625 NumOps = F.getNumOperandRegisters();
1627 continue;
1628 const MachineOperand &MO = MI.getOperand(i + 1);
1629 if (!MO.isReg())
1630 continue;
1631 unsigned STReg = MO.getReg() - X86::FP0;
1632 if (STReg >= 8)
1633 continue;
1634
1635
1636
1637 if (F.hasRegClassConstraint(RCID)) {
1638 FRegIdx.insert(i + 1);
1639 continue;
1640 }
1641
1642 switch (F.getKind()) {
1643 case InlineAsm::Kind::RegUse:
1644 STUses |= (1u << STReg);
1645 break;
1646 case InlineAsm::Kind::RegDef:
1647 case InlineAsm::Kind::RegDefEarlyClobber:
1648 STDefs |= (1u << STReg);
1649 break;
1650 case InlineAsm::Kind::Clobber:
1651 STClobbers |= (1u << STReg);
1652 break;
1653 default:
1654 break;
1655 }
1656 }
1657
1658 if (STUses && (STUses))
1659 MI.emitGenericError("fixed input regs must be last on the x87 stack");
1661
1662
1663 if (STDefs && (STDefs)) {
1664 MI.emitGenericError("output regs must be last on the x87 stack");
1666 }
1668
1669
1670 if (STClobbers && (STDefs | STClobbers))
1671 MI.emitGenericError("clobbers must be last on the x87 stack");
1672
1673
1674 unsigned STPopped = STUses & (STDefs | STClobbers);
1675 if (STPopped && (STPopped))
1676 MI.emitGenericError(
1677 "implicitly popped regs must be last on the x87 stack");
1679
1680 LLVM_DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
1681 << NumSTPopped << ", and defines " << NumSTDefs
1682 << " regs.\n");
1683
1684#ifndef NDEBUG
1685
1686
1687 for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I)
1690 "Operands with constraint \"f\" cannot overlap with defs");
1691 }
1692#endif
1693
1694
1695
1696 unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
1697 for (const MachineOperand &Op : MI.operands()) {
1698 if (.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1699 continue;
1701
1702
1703
1704
1705 if (Op.isUse() && Op.isKill())
1706 FPKills |= 1U << FPReg;
1707 }
1708
1709
1710 FPKills &= ~(STDefs | STClobbers);
1711
1712
1713 unsigned char STUsesArray[8];
1714
1715 for (unsigned I = 0; I < NumSTUses; ++I)
1717
1718 shuffleStackTop(STUsesArray, NumSTUses, Inst);
1720 dbgs() << "Before asm: ";
1721 dumpStack();
1722 });
1723
1724
1725 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1726 MachineOperand &Op = MI.getOperand(i);
1727 if (.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
1728 continue;
1729
1731
1732 if (FRegIdx.count(i))
1733
1734 Op.setReg(getSTReg(FPReg));
1735 else
1736
1737 Op.setReg(X86::ST0 + FPReg);
1738 }
1739
1740
1741 StackTop -= NumSTPopped;
1742
1743 for (unsigned i = 0; i < NumSTDefs; ++i)
1744 pushReg(NumSTDefs - i - 1);
1745
1746
1747
1748
1749
1750
1751
1752
1753 while (FPKills) {
1755 if (isLive(FPReg))
1756 freeStackSlotAfter(Inst, FPReg);
1757 FPKills &= ~(1U << FPReg);
1758 }
1759
1760
1761 return;
1762 }
1763
1764
1765
1766
1767
1768 case TargetOpcode::FAKE_USE: {
1769 assert(MI.getNumExplicitOperands() == 1 &&
1770 "FAKE_USE must have exactly one operand");
1771 if (MI.getOperand(0).isKill()) {
1772 freeStackSlotBefore(Inst, getFPReg(MI.getOperand(0)));
1773 }
1774 MI.removeOperand(0);
1775 return;
1776 }
1777 }
1778
1780
1781
1782
1786 } else
1787 --Inst;
1788}
1789
1790void FPS::setKillFlags(MachineBasicBlock &MBB) const {
1791 const TargetRegisterInfo &TRI =
1793 LiveRegUnits LPR(TRI);
1794
1795 LPR.addLiveOuts(MBB);
1796
1798 if (MI.isDebugInstr())
1799 continue;
1800
1801 std::bitset<8> Defs;
1803
1804 for (auto &MO : MI.operands()) {
1805 if (!MO.isReg())
1806 continue;
1807
1808 unsigned Reg = MO.getReg() - X86::FP0;
1809
1810 if (Reg >= 8)
1811 continue;
1812
1813 if (MO.isDef()) {
1814 Defs.set(Reg);
1815 if (LPR.available(MO.getReg()))
1817 } else
1818 Uses.push_back(&MO);
1819 }
1820
1821 for (auto *MO : Uses)
1822 if (Defs.test(getFPReg(*MO)) || LPR.available(MO->getReg()))
1824
1825 LPR.stepBackward(MI);
1826 }
1827}
1828
1829bool X86FPStackifierLegacy::runOnMachineFunction(MachineFunction &MF) {
1830 FPS Impl;
1831 if (!Impl.shouldRun(MF))
1832 return false;
1833
1834 EdgeBundles *Bundles =
1835 &getAnalysis().getEdgeBundles();
1836 return FPS().run(MF, Bundles);
1837}
1838
1839PreservedAnalyses
1842 FPS Impl;
1843 if (!Impl.shouldRun(MF))
1845
1847 bool Changed = Impl.run(MF, Bundles);
1852 return PA;
1853}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static Register getFPReg(const CSKYSubtarget &STI)
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static constexpr MCPhysReg FPReg
Remove Loads Into Fake Uses
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static const TableEntry ReverseST0Table[]
Definition X86FloatingPoint.cpp:1319
#define ASSERT_SORTED(TABLE)
Definition X86FloatingPoint.cpp:645
static const TableEntry ForwardST0Table[]
Definition X86FloatingPoint.cpp:1309
static bool doesInstructionSetFPSW(MachineInstr &MI)
Definition X86FloatingPoint.cpp:865
static unsigned getFPReg(const MachineOperand &MO)
getFPReg - Return the X86::FPx register number for the specified operand.
Definition X86FloatingPoint.cpp:323
static const TableEntry ForwardSTiTable[]
Definition X86FloatingPoint.cpp:1335
static const TableEntry OpcodeTable[]
Definition X86FloatingPoint.cpp:664
static const TableEntry ReverseSTiTable[]
Definition X86FloatingPoint.cpp:1351
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
Definition X86FloatingPoint.cpp:635
static const TableEntry PopTable[]
Definition X86FloatingPoint.cpp:843
static unsigned getConcreteOpcode(unsigned Opcode)
Definition X86FloatingPoint.cpp:829
static MachineBasicBlock::iterator getNextFPInstruction(MachineBasicBlock::iterator I)
Definition X86FloatingPoint.cpp:874
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Represents analyses that only rely on functions' control flow.
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
FunctionPass class - This class is used to implement most global optimizations.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
LiveInVector::const_iterator livein_iterator
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void dump() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
void setFlag(MIFlag Flag)
Set a MI flag.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Wrapper class representing virtual and physical registers.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
TargetInstrInfo - Interface to description of machine instruction set.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Definition X86FloatingPoint.cpp:1840
Pass manager infrastructure for declaring and invalidating analyses.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ SpecialFP
SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
@ NotFP
NotFP - The default, set for instructions that do not use FP registers.
@ OneArgFPRW
OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a result back to ST(0).
@ ZeroArgFP
ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0.
@ OneArgFP
OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst.
@ CompareFP
CompareFP - 2 arg FP instructions which implicitly read ST(0) and an explicit argument,...
@ CondMovFP
CondMovFP - "2 operand" floating point conditional move instructions.
@ TwoArgFP
TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an explicit argument,...
bool isX87Instruction(MachineInstr &MI)
Check if the instruction is X87 instruction.
This is an optimization pass for GlobalISel generic memory operations.
iterator_range< df_ext_iterator< T, SetTy > > depth_first_ext(const T &G, SetTy &S)
bool operator<(int64_t V1, const APSInt &V2)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
LLVM_ABI char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
LLVM_ABI char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
FunctionPass * createX86FPStackifierLegacyPass()
Definition X86FloatingPoint.cpp:317
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
std::pair< iterator, bool > insert(NodeRef N)