LLVM: lib/CodeGen/ExpandFp.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
39#include
40
41#define DEBUG_TYPE "expand-fp"
42
43using namespace llvm;
44
48 cl::desc("fp convert instructions on integers with "
49 "more than bits are expanded."));
50
51namespace {
52
53
54
55class FRemExpander {
56
58
59
60
62
63
64
65 Type *ComputeFpTy;
66
67
69
70
71
73
74
76
77public:
78 static bool canExpandType(Type *Ty) {
79
80
81 return Ty->isIEEELikeFPTy() && !Ty->isBFloatTy() && !Ty->isFP128Ty();
82 }
83
85 assert(canExpandType(Ty) && "Expected supported floating point type");
86
87
88
89 Type *ComputeTy = Ty;
90
91
92
93 unsigned MaxIter = 2;
94
96
98 MaxIter = 1;
99 }
100
101 unsigned Precision =
103 return FRemExpander{B, Ty, Precision / MaxIter, ComputeTy};
104 }
105
106
107
108
109
110 Value *buildFRem(Value *X, Value *Y, std::optional &SQ) const;
111
112
113
114
116
117private:
118 FRemExpander(IRBuilder<> &B, Type *FremTy, unsigned Bits, Type *ComputeFpTy)
119 : B(B), FremTy(FremTy), ComputeFpTy(ComputeFpTy), ExTy(B.getInt32Ty()),
120 Bits(ConstantInt::get(ExTy, Bits)), One(ConstantInt::get(ExTy, 1)) {};
121
122 Value *createRcp(Value *V, const Twine &Name) const {
123
124
125 return B.CreateFDiv(ConstantFP::get(ComputeFpTy, 1.0), V, Name);
126 }
127
128
129
131
132
133
134
135
136
137 Value *Q = B.CreateUnaryIntrinsic(Intrinsic::rint, B.CreateFMul(Ax, Ayinv),
138 {}, "q");
139 Value *AxUpdate = B.CreateFMA(B.CreateFNeg(Q), Ay, Ax, {}, "ax");
142 Value *Axp = B.CreateFAdd(AxUpdate, Ay, "axp");
143 return B.CreateSelect(Clt, Axp, AxUpdate, "ax");
144 }
145
146
147
148
149 std::pair<Value *, Value *> buildExpAndPower(Value *Src, Value *NewExp,
150 const Twine &ExName,
151 const Twine &PowName) const {
152
153
154
155 Type *Ty = Src->getType();
156 Type *ExTy = B.getInt32Ty();
157 Value *Frexp = B.CreateIntrinsic(Intrinsic::frexp, {Ty, ExTy}, Src);
158 Value *Mant = B.CreateExtractValue(Frexp, {0});
159 Value *Exp = B.CreateExtractValue(Frexp, {1});
160
161 Exp = B.CreateSub(Exp, One, ExName);
162 Value *Pow = B.CreateLdexp(Mant, NewExp, {}, PowName);
163
164 return {Pow, Exp};
165 }
166
167
168
169
170
171 void buildRemainderComputation(Value *AxInitial, Value *AyInitial, Value *X,
172 PHINode *RetPhi, FastMathFlags FMF) const {
173 IRBuilder<>::FastMathFlagGuard Guard(B);
174 B.setFastMathFlags(FMF);
175
176
177
178
179
180
181 auto [Ax, Ex] = buildExpAndPower(AxInitial, Bits, "ex", "ax");
182 auto [Ay, Ey] = buildExpAndPower(AyInitial, One, "ey", "ay");
183
184
185
186
187 Value *Nb = B.CreateSub(Ex, Ey, "nb");
188 Value *Ayinv = createRcp(Ay, "ayinv");
189
190
191 BasicBlock *PreheaderBB = B.GetInsertBlock();
195
196 B.CreateCondBr(B.CreateICmp(CmpInst::ICMP_SGT, Nb, Bits), LoopBB, ExitBB);
197
198
199
200
201
202
203
204 B.SetInsertPoint(LoopBB);
205 PHINode *NbIv = B.CreatePHI(Nb->getType(), 2, "nb_iv");
207
208 auto *AxPhi = B.CreatePHI(ComputeFpTy, 2, "ax_loop_phi");
209 AxPhi->addIncoming(Ax, PreheaderBB);
210
211 Value *AxPhiUpdate = buildUpdateAx(AxPhi, Ay, Ayinv);
212 AxPhiUpdate = B.CreateLdexp(AxPhiUpdate, Bits, {}, "ax_update");
213 AxPhi->addIncoming(AxPhiUpdate, LoopBB);
214 NbIv->addIncoming(B.CreateSub(NbIv, Bits, "nb_update"), LoopBB);
215
216 B.CreateCondBr(B.CreateICmp(CmpInst::ICMP_SGT, NbIv, Bits), LoopBB, ExitBB);
217
218
219
220
221 B.SetInsertPoint(ExitBB);
222
223 auto *AxPhiExit = B.CreatePHI(ComputeFpTy, 2, "ax_exit_phi");
224 AxPhiExit->addIncoming(Ax, PreheaderBB);
225 AxPhiExit->addIncoming(AxPhi, LoopBB);
226 auto *NbExitPhi = B.CreatePHI(Nb->getType(), 2, "nb_exit_phi");
227 NbExitPhi->addIncoming(NbIv, LoopBB);
228 NbExitPhi->addIncoming(Nb, PreheaderBB);
229
230 Value *AxFinal = B.CreateLdexp(
231 AxPhiExit, B.CreateAdd(B.CreateSub(NbExitPhi, Bits), One), {}, "ax");
232 AxFinal = buildUpdateAx(AxFinal, Ay, Ayinv);
233
234
235
236
237 AxFinal = B.CreateLdexp(AxFinal, Ey, {}, "ax");
238 if (ComputeFpTy != FremTy)
239 AxFinal = B.CreateFPTrunc(AxFinal, FremTy);
240 Value *Ret = B.CreateCopySign(AxFinal, X);
241
243 }
244
245
246
247
248
249 void buildElseBranch(Value *Ax, Value *Ay, Value *X, PHINode *RetPhi) const {
250
251
253 Value *Ret = B.CreateSelect(B.CreateFCmpOEQ(Ax, Ay), ZeroWithXSign, X);
254
255 RetPhi->addIncoming(Ret, B.GetInsertBlock());
256 }
257
258
259
261 std::optional &SQ,
262 bool NoInfs) const {
263
264
265
268 Ret);
271 ? B.getTrue()
272 : B.CreateFCmpULT(B.CreateUnaryIntrinsic(Intrinsic::fabs, X),
274 Ret = B.CreateSelect(XFinite, Ret, Nan);
275
276 return Ret;
277 }
278};
279
281 IRBuilder<>::FastMathFlagGuard Guard(B);
282
283
284
285
286 B.clearFastMathFlags();
287
288 Value *Quot = B.CreateFDiv(X, Y);
289 Value *Trunc = B.CreateUnaryIntrinsic(Intrinsic::trunc, Quot, {});
290 Value *Neg = B.CreateFNeg(Trunc);
291
292 return B.CreateFMA(Neg, Y, X);
293}
294
296 std::optional &SQ) const {
297 assert(X->getType() == FremTy && Y->getType() == FremTy);
298
299 FastMathFlags FMF = B.getFastMathFlags();
300
301
302
303
304
305
306
307
308 Value *Ax = B.CreateUnaryIntrinsic(Intrinsic::fabs, X, {}, "ax");
309 Value *Ay = B.CreateUnaryIntrinsic(Intrinsic::fabs, Y, {}, "ay");
310 if (ComputeFpTy != X->getType()) {
311 Ax = B.CreateFPExt(Ax, ComputeFpTy, "ax");
312 Ay = B.CreateFPExt(Ay, ComputeFpTy, "ay");
313 }
314 Value *AxAyCmp = B.CreateFCmpOGT(Ax, Ay);
315
316 PHINode *RetPhi = B.CreatePHI(FremTy, 2, "ret");
317 Value *Ret = RetPhi;
318
319
320
322 Ret = handleInputCornerCases(Ret, X, Y, SQ, FMF.noInfs());
323
328
329 auto SavedInsertPt = B.GetInsertPoint();
330
331
332
333
334
335
336
337 FastMathFlags ComputeFMF = FMF;
340
341 B.SetInsertPoint(ThenBB);
342 buildRemainderComputation(Ax, Ay, X, RetPhi, FMF);
344
345
346 B.SetInsertPoint(ElseBB);
347 buildElseBranch(Ax, Ay, X, RetPhi);
349
350 B.SetInsertPoint(SavedInsertPt);
351
352 return Ret;
353}
354}
355
357 LLVM_DEBUG(dbgs() << "Expanding instruction: " << I << '\n');
358
360 assert(FRemExpander::canExpandType(Ty) &&
361 "Expected supported floating point type");
362
364
367
369 B.setFastMathFlags(FMF);
370 B.SetCurrentDebugLocation(I.getDebugLoc());
371
372 const FRemExpander Expander = FRemExpander::create(B, Ty);
374 ? Expander.buildApproxFRem(I.getOperand(0), I.getOperand(1))
375 : Expander.buildFRem(I.getOperand(0), I.getOperand(1), SQ);
376
377 I.replaceAllUsesWith(Ret);
379 I.eraseFromParent();
380
381 return true;
382}
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
437
439 auto *FloatVal = FPToI->getOperand(0);
441
443 unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
444
445
446
447 Value *A1 = nullptr;
448 if (FloatVal->getType()->isHalfTy()) {
449 if (FPToI->getOpcode() == Instruction::FPToUI) {
450 Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getInt32Ty());
451 A1 = Builder.CreateZExt(A0, IntTy);
452 } else {
453 Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getInt32Ty());
454 A1 = Builder.CreateSExt(A0, IntTy);
455 }
459 return;
460 }
461
462
463
464 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
465 unsigned FloatWidth =
466 PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());
467 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
468 unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
469 Value *ImplicitBit = Builder.CreateShl(
470 Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth));
471 Value *SignificandMask =
472 Builder.CreateSub(ImplicitBit, Builder.getIntN(BitWidth, 1));
473 Value *NegOne = Builder.CreateSExt(
478
479 BasicBlock *Entry = Builder.GetInsertBlock();
480 Function *F = Entry->getParent();
481 Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
483 Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");
494
495 Entry->getTerminator()->eraseFromParent();
496
497
498 Builder.SetInsertPoint(Entry);
499 Value *FloatVal0 = FloatVal;
500
501
502 if (FloatVal->getType()->isX86_FP80Ty())
503 FloatVal0 =
504 Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));
506 Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth));
507 Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType());
508 Value *PosOrNeg = Builder.CreateICmpSGT(
513 Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));
514 Value *And2 = Builder.CreateAnd(
515 And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1));
516 Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
517 Value *Or = Builder.CreateOr(Abs, ImplicitBit);
519 Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias));
520 Builder.CreateCondBr(Cmp, End, IfEnd);
521
522
523 Builder.SetInsertPoint(IfEnd);
524 Value *Add1 = Builder.CreateAdd(
526 IntTy, -static_cast<int64_t>(ExponentBias + BitWidth)));
527 Value *Cmp3 = Builder.CreateICmpULT(
529 Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);
530
531
532 Builder.SetInsertPoint(IfThen5);
533 Value *PosInf = Builder.CreateXor(NegOne, NegInf);
534 Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf);
535 Builder.CreateBr(End);
536
537
538 Builder.SetInsertPoint(IfEnd9);
539 Value *Cmp10 = Builder.CreateICmpULT(
540 And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth));
541 Builder.CreateCondBr(Cmp10, IfThen12, IfElse);
542
543
544 Builder.SetInsertPoint(IfThen12);
545 Value *Sub13 = Builder.CreateSub(
546 Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), And2);
547 Value *Shr14 = Builder.CreateLShr(Or, Sub13);
548 Value *Mul = Builder.CreateMul(Shr14, Sign);
549 Builder.CreateBr(End);
550
551
552 Builder.SetInsertPoint(IfElse);
553 Value *Sub15 = Builder.CreateAdd(
555 IntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
556 Value *Shl = Builder.CreateShl(Or, Sub15);
557 Value *Mul16 = Builder.CreateMul(Shl, Sign);
558 Builder.CreateBr(End);
559
560
561 Builder.SetInsertPoint(End, End->begin());
562 PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4);
563
568
572}
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
659
663
664 unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
666
667
668 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
669
670
671 FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
672 FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
673 unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
674 bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
675
676 assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() "
677 "assumes integer width is larger than fp.");
678
680 Builder.CreateShl(Builder.getIntN(BitWidth, 1),
681 Builder.getIntN(BitWidth, FPMantissaWidth + 3));
682
683 BasicBlock *Entry = Builder.GetInsertBlock();
684 Function *F = Entry->getParent();
685 Entry->setName(Twine(Entry->getName(), "itofp-entry"));
687 Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return");
704
705 Entry->getTerminator()->eraseFromParent();
706
710
711
712 Builder.SetInsertPoint(Entry);
714 Builder.CreateCondBr(Cmp, End, IfEnd);
715
716
717 Builder.SetInsertPoint(IfEnd);
719 Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1));
720 Value *Xor = Builder.CreateXor(Shr, IntVal);
721 Value *Sub = Builder.CreateSub(Xor, Shr);
722 Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True});
723 Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty());
724 int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;
725 Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth),
726 FloatWidth == 128 ? Call : Cast);
727 Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),
728 FloatWidth == 128 ? Call : Cast);
729 Value *Cmp3 = Builder.CreateICmpSGT(
730 Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
731 Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
732
733
734 Builder.SetInsertPoint(IfThen4);
736 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
737 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
738
739
740 Builder.SetInsertPoint(SwBB);
742 Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1));
743 Builder.CreateBr(SwEpilog);
744
745
746 Builder.SetInsertPoint(SwDefault);
747 Value *Sub5 = Builder.CreateSub(
748 Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3),
749 FloatWidth == 128 ? Call : Cast);
750 Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
751 Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal,
752 FloatWidth == 128 ? Sub5 : ShProm);
754 Builder.CreateAdd(FloatWidth == 128 ? Call : Cast,
755 Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
756 Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
758 FloatWidth == 128 ? Sub8 : ShProm9);
759 Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal);
760 Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0));
761 Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
762 Value *Or = Builder.CreateOr(Shr6, Conv11);
763 Builder.CreateBr(SwEpilog);
764
765
766 Builder.SetInsertPoint(SwEpilog);
767 PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
769 AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);
771 Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
772 Value *A1 = Builder.CreateLShr(A0, Builder.getInt32(2));
773 Value *A2 = Builder.CreateAnd(A1, Builder.getInt32(1));
774 Value *Conv16 = Builder.CreateZExt(A2, IntTy);
775 Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
776 Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));
777 Value *Shr18 = nullptr;
778 if (IsSigned)
779 Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2));
780 else
781 Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2));
782 Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3");
783 Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0));
784 Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
785 Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32));
786 Value *ExtractT64 = nullptr;
787 if (FloatWidth > 80)
788 ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
789 else
790 ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
791 Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
792
793
794 Builder.SetInsertPoint(IfThen20);
795 Value *Shr21 = nullptr;
796 if (IsSigned)
797 Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3));
798 else
799 Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3));
800 Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
801 Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));
802 Value *ExtractT62 = nullptr;
803 if (FloatWidth > 80)
804 ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getInt64Ty());
805 else
806 ExtractT62 = Builder.CreateTrunc(Extract, Builder.getInt32Ty());
807 Builder.CreateBr(IfEnd26);
808
809
810 Builder.SetInsertPoint(IfElse);
811 Value *Sub24 = Builder.CreateAdd(
812 FloatWidth == 128 ? Call : Cast,
814 -(BitWidth - FPMantissaWidth - 1)));
815 Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
816 Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal,
817 FloatWidth == 128 ? Sub24 : ShProm25);
818 Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
819 Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));
820 Value *ExtractT66 = nullptr;
821 if (FloatWidth > 80)
822 ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
823 else
824 ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
825 Builder.CreateBr(IfEnd26);
826
827
828 Builder.SetInsertPoint(IfEnd26);
829 PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
830 AAddr1Off0->addIncoming(ExtractT, IfThen20);
831 AAddr1Off0->addIncoming(ExtractT60, SwEpilog);
832 AAddr1Off0->addIncoming(ExtractT61, IfElse);
833 PHINode *AAddr1Off32 = nullptr;
834 if (FloatWidth > 32) {
835 AAddr1Off32 =
836 Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
837 AAddr1Off32->addIncoming(ExtractT62, IfThen20);
838 AAddr1Off32->addIncoming(ExtractT64, SwEpilog);
839 AAddr1Off32->addIncoming(ExtractT66, IfElse);
840 }
842 if (FloatWidth <= 80) {
843 E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
847 }
848 Value *And29 = nullptr;
849 if (FloatWidth > 80) {
850 Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1),
851 Builder.getIntN(BitWidth, 63));
852 And29 = Builder.CreateAnd(Shr, Temp2, "and29");
853 } else {
854 Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getInt32Ty());
855 And29 = Builder.CreateAnd(
857 }
858 unsigned TempMod = FPMantissaWidth % 32;
859 Value *And34 = nullptr;
860 Value *Shl30 = nullptr;
861 if (FloatWidth > 80) {
862 TempMod += 32;
863 Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getInt64(TempMod));
864 Shl30 = Builder.CreateAdd(
865 Add, Builder.getInt64(((1ull << (62ull - TempMod)) - 1ull) << TempMod));
866 And34 = Builder.CreateZExt(Shl30, Builder.getInt128Ty());
867 } else {
868 Value *Add = Builder.CreateShl(E0, Builder.getInt32(TempMod));
869 Shl30 = Builder.CreateAdd(
870 Add, Builder.getInt32(((1 << (30 - TempMod)) - 1) << TempMod));
871 And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
872 Builder.getInt32((1 << TempMod) - 1));
873 }
874 Value *Or35 = nullptr;
875 if (FloatWidth > 80) {
876 Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getInt128Ty());
877 Value *Or31 = Builder.CreateOr(And29Trunc, And34);
878 Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
879 Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
880 Builder.getIntN(128, FPMantissaWidth));
881 Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
882 Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
883 Or35 = Builder.CreateOr(Or34, A6);
884 } else {
885 Value *Or31 = Builder.CreateOr(And34, And29);
886 Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
887 }
888 Value *A4 = nullptr;
890 Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
891 Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
893 Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
894 Value *Or1 = Builder.CreateOr(Shl1, And1);
895 A4 = Builder.CreateBitCast(Or1, IToFP->getType());
898 Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));
899 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
901
902
904 Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext()));
905 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
906 } else
907 A4 = Builder.CreateBitCast(Or35, IToFP->getType());
908 Builder.CreateBr(End);
909
910
911 Builder.SetInsertPoint(End, End->begin());
912 PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2);
915
919}
920
924
926
927 unsigned NumElements = VTy->getElementCount().getFixedValue();
929 for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
930 Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx);
931
932 Value *NewOp = nullptr;
934 NewOp = Builder.CreateBinOp(
935 BinOp->getOpcode(), Ext,
936 Builder.CreateExtractElement(I->getOperand(1), Idx));
938 NewOp = Builder.CreateCast(CastI->getOpcode(), Ext,
939 I->getType()->getScalarType());
940 else
942
943 Result = Builder.CreateInsertElement(Result, NewOp, Idx);
945 ScalarizedI->copyIRFlags(I, true);
947 }
948 }
949
950 I->replaceAllUsesWith(Result);
951 I->dropAllReferences();
952 I->eraseFromParent();
953}
954
955
956
957
958
960 assert(Ty->isFloatingPointTy());
961 if (Ty->isFloatTy() || Ty->is16bitFPTy())
962 return RTLIB::REM_F32;
963 if (Ty->isDoubleTy())
964 return RTLIB::REM_F64;
965 if (Ty->isFP128Ty())
966 return RTLIB::REM_F128;
967 if (Ty->isX86_FP80Ty())
968 return RTLIB::REM_F80;
969 if (Ty->isPPC_FP128Ty())
970 return RTLIB::REM_PPCF128;
971
973}
974
975
976
977
981 return true;
982
984}
985
988 if (I.getOperand(0)->getType()->isVectorTy())
990 else
992}
993
997
998 unsigned MaxLegalFpConvertBitWidth =
1002
1004 return false;
1005
1006 auto ShouldHandleInst = [&](Instruction &I) {
1008
1009 if (Ty->isScalableTy())
1010 return false;
1011
1012 switch (I.getOpcode()) {
1013 case Instruction::FRem:
1015 FRemExpander::canExpandType(Ty->getScalarType());
1016
1017 case Instruction::FPToUI:
1018 case Instruction::FPToSI: {
1020 return IntTy->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;
1021 }
1022
1023 case Instruction::UIToFP:
1024 case Instruction::SIToFP: {
1025 auto *IntTy =
1027 return IntTy->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;
1028 }
1029 }
1030
1031 return false;
1032 };
1033
1037 if (!ShouldHandleInst(I))
1038 continue;
1039
1042 }
1043
1044 while (!Worklist.empty()) {
1046
1047 switch (I->getOpcode()) {
1048 case Instruction::FRem: {
1049 auto SQ = [&]() -> std::optional {
1050 if (AC) {
1051 auto Res = std::make_optional(
1052 I->getModule()->getDataLayout(), I);
1053 Res->AC = AC;
1054 return Res;
1055 }
1056 return {};
1057 }();
1058
1060 break;
1061 }
1062
1063 case Instruction::FPToUI:
1064 case Instruction::FPToSI:
1066 break;
1067
1068 case Instruction::UIToFP:
1069 case Instruction::SIToFP:
1071 break;
1072 }
1073 }
1074
1076}
1077
1078namespace {
1079class ExpandFpLegacyPass : public FunctionPass {
1081
1082public:
1083 static char ID;
1084
1086 : FunctionPass(ID), OptLevel(OptLevel) {
1088 }
1089
1091
1093 auto *TM = &getAnalysis().getTM();
1094 const TargetSubtargetInfo *Subtarget = TM->getSubtargetImpl(F);
1095 auto *TLI = Subtarget->getTargetLowering();
1096 AssumptionCache *AC = nullptr;
1097
1098 const LibcallLoweringInfo &Libcalls =
1099 getAnalysis().getLibcallLowering(
1100 *Subtarget);
1101
1102 if (OptLevel != CodeGenOptLevel::None && .hasOptNone())
1103 AC = &getAnalysis().getAssumptionCache(F);
1104 return runImpl(F, *TLI, Libcalls, AC);
1105 }
1106
1107 void getAnalysisUsage(AnalysisUsage &AU) const override {
1108 AU.addRequired();
1110 if (OptLevel != CodeGenOptLevel::None)
1111 AU.addRequired();
1114 AU.addRequired();
1115 }
1116};
1117}
1118
1120 : TM(&TM), OptLevel(OptLevel) {}
1121
1125 OS, MapClassName2PassName);
1126 OS << '<';
1127 OS << "O" << (int)OptLevel;
1128 OS << '>';
1129}
1130
1137
1139
1142
1143 if (!LibcallLowering) {
1145 "' analysis required");
1147 }
1148
1150 LibcallLowering->getLibcallLowering(*STI);
1151
1154}
1155
1156char ExpandFpLegacyPass::ID = 0;
1158 "Expand certain fp instructions", false, false)
1161
1163 return new ExpandFpLegacyPass(OptLevel);
1164}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
static bool expandFRem(BinaryOperator &I, std::optional< SimplifyQuery > &SQ)
Definition ExpandFp.cpp:356
static void expandIToFP(Instruction *IToFP)
Generate code to convert a fp number to integer, replacing S(U)IToFP with the generated code.
Definition ExpandFp.cpp:658
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
Definition ExpandFp.cpp:994
static void expandFPToI(Instruction *FPToI)
Generate code to convert a fp number to integer, replacing FPToS(U)I with the generated code.
Definition ExpandFp.cpp:436
static RTLIB::Libcall fremToLibcall(Type *Ty)
Return the Libcall for a frem instruction of type Ty.
Definition ExpandFp.cpp:959
static void addToWorklist(Instruction &I, SmallVector< Instruction *, 4 > &Worklist)
Definition ExpandFp.cpp:986
static bool targetSupportsFrem(const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, Type *Ty)
Definition ExpandFp.cpp:978
static cl::opt< unsigned > ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, cl::init(llvm::IntegerType::MAX_INT_BITS), cl::desc("fp convert instructions on integers with " "more than bits are expanded."))
static void scalarize(Instruction *I, SmallVectorImpl< Instruction * > &Worklist)
Definition ExpandFp.cpp:921
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ ICMP_SGT
signed greater than
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition ExpandFp.cpp:1131
ExpandFpPass(const TargetMachine &TM, CodeGenOptLevel OptLevel)
Definition ExpandFp.cpp:1119
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition ExpandFp.cpp:1122
Convenience struct for specifying and reasoning about fast-math flags.
void setAllowContract(bool B=true)
void setAllowReciprocal(bool B=true)
void setNoNaNs(bool B=true)
void setNoInfs(bool B=true)
FunctionPass class - This class is used to implement most global optimizations.
Module * getParent()
Get the module that this global value is contained inside of...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
Tracks which library functions to use for a particular subtarget.
LLVM_ABI const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
Record a mapping from subtarget to LibcallLoweringInfo.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
unsigned getMaxLargeFPConvertBitWidthSupported() const
Returns the size in bits of the maximum fp to/from int conversion the backend supports.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
bool isX86_FP80Ty() const
Return true if this is x86 long double.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
LLVM_ABI const fltSemantics & getFltSemantics() const
void dropAllReferences()
Drop all references to operands.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
LLVM_ABI bool isKnownNeverInfinity(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not an infinity or if the floating-point vector val...
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
inst_iterator inst_begin(Function *F)
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CodeGenOptLevel
Code generation optimization level.
inst_iterator inst_end(Function *F)
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI void initializeExpandFpLegacyPassPass(PassRegistry &)
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
LLVM_ABI FunctionPass * createExpandFpPass()
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
A CRTP mix-in to automatically provide informational APIs needed for passes.