LLVM: lib/CodeGen/ExpandFp.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

39#include

40

41#define DEBUG_TYPE "expand-fp"

42

43using namespace llvm;

44

48 cl::desc("fp convert instructions on integers with "

49 "more than bits are expanded."));

50

51namespace {

52

53

54

55class FRemExpander {

56

58

59

60

62

63

64

65 Type *ComputeFpTy;

66

67

69

70

71

73

74

76

77public:

78 static bool canExpandType(Type *Ty) {

79

80

81 return Ty->isIEEELikeFPTy() && !Ty->isBFloatTy() && !Ty->isFP128Ty();

82 }

83

85 assert(canExpandType(Ty) && "Expected supported floating point type");

86

87

88

89 Type *ComputeTy = Ty;

90

91

92

93 unsigned MaxIter = 2;

94

96

98 MaxIter = 1;

99 }

100

101 unsigned Precision =

103 return FRemExpander{B, Ty, Precision / MaxIter, ComputeTy};

104 }

105

106

107

108

109

110 Value *buildFRem(Value *X, Value *Y, std::optional &SQ) const;

111

112

113

114

116

117private:

118 FRemExpander(IRBuilder<> &B, Type *FremTy, unsigned Bits, Type *ComputeFpTy)

119 : B(B), FremTy(FremTy), ComputeFpTy(ComputeFpTy), ExTy(B.getInt32Ty()),

120 Bits(ConstantInt::get(ExTy, Bits)), One(ConstantInt::get(ExTy, 1)) {};

121

122 Value *createRcp(Value *V, const Twine &Name) const {

123

124

125 return B.CreateFDiv(ConstantFP::get(ComputeFpTy, 1.0), V, Name);

126 }

127

128

129

131

132

133

134

135

136

137 Value *Q = B.CreateUnaryIntrinsic(Intrinsic::rint, B.CreateFMul(Ax, Ayinv),

138 {}, "q");

139 Value *AxUpdate = B.CreateFMA(B.CreateFNeg(Q), Ay, Ax, {}, "ax");

142 Value *Axp = B.CreateFAdd(AxUpdate, Ay, "axp");

143 return B.CreateSelect(Clt, Axp, AxUpdate, "ax");

144 }

145

146

147

148

149 std::pair<Value *, Value *> buildExpAndPower(Value *Src, Value *NewExp,

150 const Twine &ExName,

151 const Twine &PowName) const {

152

153

154

155 Type *Ty = Src->getType();

156 Type *ExTy = B.getInt32Ty();

157 Value *Frexp = B.CreateIntrinsic(Intrinsic::frexp, {Ty, ExTy}, Src);

158 Value *Mant = B.CreateExtractValue(Frexp, {0});

159 Value *Exp = B.CreateExtractValue(Frexp, {1});

160

161 Exp = B.CreateSub(Exp, One, ExName);

162 Value *Pow = B.CreateLdexp(Mant, NewExp, {}, PowName);

163

164 return {Pow, Exp};

165 }

166

167

168

169

170

171 void buildRemainderComputation(Value *AxInitial, Value *AyInitial, Value *X,

172 PHINode *RetPhi, FastMathFlags FMF) const {

173 IRBuilder<>::FastMathFlagGuard Guard(B);

174 B.setFastMathFlags(FMF);

175

176

177

178

179

180

181 auto [Ax, Ex] = buildExpAndPower(AxInitial, Bits, "ex", "ax");

182 auto [Ay, Ey] = buildExpAndPower(AyInitial, One, "ey", "ay");

183

184

185

186

187 Value *Nb = B.CreateSub(Ex, Ey, "nb");

188 Value *Ayinv = createRcp(Ay, "ayinv");

189

190

191 BasicBlock *PreheaderBB = B.GetInsertBlock();

195

196 B.CreateCondBr(B.CreateICmp(CmpInst::ICMP_SGT, Nb, Bits), LoopBB, ExitBB);

197

198

199

200

201

202

203

204 B.SetInsertPoint(LoopBB);

205 PHINode *NbIv = B.CreatePHI(Nb->getType(), 2, "nb_iv");

207

208 auto *AxPhi = B.CreatePHI(ComputeFpTy, 2, "ax_loop_phi");

209 AxPhi->addIncoming(Ax, PreheaderBB);

210

211 Value *AxPhiUpdate = buildUpdateAx(AxPhi, Ay, Ayinv);

212 AxPhiUpdate = B.CreateLdexp(AxPhiUpdate, Bits, {}, "ax_update");

213 AxPhi->addIncoming(AxPhiUpdate, LoopBB);

214 NbIv->addIncoming(B.CreateSub(NbIv, Bits, "nb_update"), LoopBB);

215

216 B.CreateCondBr(B.CreateICmp(CmpInst::ICMP_SGT, NbIv, Bits), LoopBB, ExitBB);

217

218

219

220

221 B.SetInsertPoint(ExitBB);

222

223 auto *AxPhiExit = B.CreatePHI(ComputeFpTy, 2, "ax_exit_phi");

224 AxPhiExit->addIncoming(Ax, PreheaderBB);

225 AxPhiExit->addIncoming(AxPhi, LoopBB);

226 auto *NbExitPhi = B.CreatePHI(Nb->getType(), 2, "nb_exit_phi");

227 NbExitPhi->addIncoming(NbIv, LoopBB);

228 NbExitPhi->addIncoming(Nb, PreheaderBB);

229

230 Value *AxFinal = B.CreateLdexp(

231 AxPhiExit, B.CreateAdd(B.CreateSub(NbExitPhi, Bits), One), {}, "ax");

232 AxFinal = buildUpdateAx(AxFinal, Ay, Ayinv);

233

234

235

236

237 AxFinal = B.CreateLdexp(AxFinal, Ey, {}, "ax");

238 if (ComputeFpTy != FremTy)

239 AxFinal = B.CreateFPTrunc(AxFinal, FremTy);

240 Value *Ret = B.CreateCopySign(AxFinal, X);

241

243 }

244

245

246

247

248

249 void buildElseBranch(Value *Ax, Value *Ay, Value *X, PHINode *RetPhi) const {

250

251

253 Value *Ret = B.CreateSelect(B.CreateFCmpOEQ(Ax, Ay), ZeroWithXSign, X);

254

255 RetPhi->addIncoming(Ret, B.GetInsertBlock());

256 }

257

258

259

261 std::optional &SQ,

262 bool NoInfs) const {

263

264

265

268 Ret);

271 ? B.getTrue()

272 : B.CreateFCmpULT(B.CreateUnaryIntrinsic(Intrinsic::fabs, X),

274 Ret = B.CreateSelect(XFinite, Ret, Nan);

275

276 return Ret;

277 }

278};

279

281 IRBuilder<>::FastMathFlagGuard Guard(B);

282

283

284

285

286 B.clearFastMathFlags();

287

288 Value *Quot = B.CreateFDiv(X, Y);

289 Value *Trunc = B.CreateUnaryIntrinsic(Intrinsic::trunc, Quot, {});

290 Value *Neg = B.CreateFNeg(Trunc);

291

292 return B.CreateFMA(Neg, Y, X);

293}

294

296 std::optional &SQ) const {

297 assert(X->getType() == FremTy && Y->getType() == FremTy);

298

299 FastMathFlags FMF = B.getFastMathFlags();

300

301

302

303

304

305

306

307

308 Value *Ax = B.CreateUnaryIntrinsic(Intrinsic::fabs, X, {}, "ax");

309 Value *Ay = B.CreateUnaryIntrinsic(Intrinsic::fabs, Y, {}, "ay");

310 if (ComputeFpTy != X->getType()) {

311 Ax = B.CreateFPExt(Ax, ComputeFpTy, "ax");

312 Ay = B.CreateFPExt(Ay, ComputeFpTy, "ay");

313 }

314 Value *AxAyCmp = B.CreateFCmpOGT(Ax, Ay);

315

316 PHINode *RetPhi = B.CreatePHI(FremTy, 2, "ret");

317 Value *Ret = RetPhi;

318

319

320

322 Ret = handleInputCornerCases(Ret, X, Y, SQ, FMF.noInfs());

323

328

329 auto SavedInsertPt = B.GetInsertPoint();

330

331

332

333

334

335

336

337 FastMathFlags ComputeFMF = FMF;

340

341 B.SetInsertPoint(ThenBB);

342 buildRemainderComputation(Ax, Ay, X, RetPhi, FMF);

344

345

346 B.SetInsertPoint(ElseBB);

347 buildElseBranch(Ax, Ay, X, RetPhi);

349

350 B.SetInsertPoint(SavedInsertPt);

351

352 return Ret;

353}

354}

355

357 LLVM_DEBUG(dbgs() << "Expanding instruction: " << I << '\n');

358

359 Type *Ty = I.getType();

360 assert(FRemExpander::canExpandType(Ty) &&

361 "Expected supported floating point type");

362

364

367

369 B.setFastMathFlags(FMF);

370 B.SetCurrentDebugLocation(I.getDebugLoc());

371

372 const FRemExpander Expander = FRemExpander::create(B, Ty);

374 ? Expander.buildApproxFRem(I.getOperand(0), I.getOperand(1))

375 : Expander.buildFRem(I.getOperand(0), I.getOperand(1), SQ);

376

377 I.replaceAllUsesWith(Ret);

379 I.eraseFromParent();

380

381 return true;

382}

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

437

439 auto *FloatVal = FPToI->getOperand(0);

441

443 unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;

444

445

446

447 Value *A1 = nullptr;

448 if (FloatVal->getType()->isHalfTy()) {

449 if (FPToI->getOpcode() == Instruction::FPToUI) {

450 Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getInt32Ty());

451 A1 = Builder.CreateZExt(A0, IntTy);

452 } else {

453 Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getInt32Ty());

454 A1 = Builder.CreateSExt(A0, IntTy);

455 }

459 return;

460 }

461

462

463

464 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;

465 unsigned FloatWidth =

466 PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());

467 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;

468 unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;

469 Value *ImplicitBit = Builder.CreateShl(

470 Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth));

471 Value *SignificandMask =

472 Builder.CreateSub(ImplicitBit, Builder.getIntN(BitWidth, 1));

473 Value *NegOne = Builder.CreateSExt(

478

479 BasicBlock *Entry = Builder.GetInsertBlock();

480 Function *F = Entry->getParent();

481 Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));

483 Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");

494

495 Entry->getTerminator()->eraseFromParent();

496

497

498 Builder.SetInsertPoint(Entry);

499 Value *FloatVal0 = FloatVal;

500

501

502 if (FloatVal->getType()->isX86_FP80Ty())

503 FloatVal0 =

504 Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));

506 Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth));

507 Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType());

508 Value *PosOrNeg = Builder.CreateICmpSGT(

513 Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));

514 Value *And2 = Builder.CreateAnd(

515 And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1));

516 Value *Abs = Builder.CreateAnd(ARep, SignificandMask);

517 Value *Or = Builder.CreateOr(Abs, ImplicitBit);

519 Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias));

520 Builder.CreateCondBr(Cmp, End, IfEnd);

521

522

523 Builder.SetInsertPoint(IfEnd);

524 Value *Add1 = Builder.CreateAdd(

526 IntTy, -static_cast<int64_t>(ExponentBias + BitWidth)));

527 Value *Cmp3 = Builder.CreateICmpULT(

529 Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);

530

531

532 Builder.SetInsertPoint(IfThen5);

533 Value *PosInf = Builder.CreateXor(NegOne, NegInf);

534 Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf);

535 Builder.CreateBr(End);

536

537

538 Builder.SetInsertPoint(IfEnd9);

539 Value *Cmp10 = Builder.CreateICmpULT(

540 And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth));

541 Builder.CreateCondBr(Cmp10, IfThen12, IfElse);

542

543

544 Builder.SetInsertPoint(IfThen12);

545 Value *Sub13 = Builder.CreateSub(

546 Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), And2);

547 Value *Shr14 = Builder.CreateLShr(Or, Sub13);

548 Value *Mul = Builder.CreateMul(Shr14, Sign);

549 Builder.CreateBr(End);

550

551

552 Builder.SetInsertPoint(IfElse);

553 Value *Sub15 = Builder.CreateAdd(

555 IntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));

556 Value *Shl = Builder.CreateShl(Or, Sub15);

557 Value *Mul16 = Builder.CreateMul(Shl, Sign);

558 Builder.CreateBr(End);

559

560

561 Builder.SetInsertPoint(End, End->begin());

562 PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4);

563

568

572}

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

659

663

664 unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();

666

667

668 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;

669

670

671 FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;

672 FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;

673 unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);

674 bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;

675

676 assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() "

677 "assumes integer width is larger than fp.");

678

680 Builder.CreateShl(Builder.getIntN(BitWidth, 1),

681 Builder.getIntN(BitWidth, FPMantissaWidth + 3));

682

683 BasicBlock *Entry = Builder.GetInsertBlock();

684 Function *F = Entry->getParent();

685 Entry->setName(Twine(Entry->getName(), "itofp-entry"));

687 Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return");

704

705 Entry->getTerminator()->eraseFromParent();

706

710

711

712 Builder.SetInsertPoint(Entry);

714 Builder.CreateCondBr(Cmp, End, IfEnd);

715

716

717 Builder.SetInsertPoint(IfEnd);

719 Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1));

720 Value *Xor = Builder.CreateXor(Shr, IntVal);

721 Value *Sub = Builder.CreateSub(Xor, Shr);

722 Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True});

723 Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty());

724 int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;

725 Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth),

726 FloatWidth == 128 ? Call : Cast);

727 Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),

728 FloatWidth == 128 ? Call : Cast);

729 Value *Cmp3 = Builder.CreateICmpSGT(

730 Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));

731 Builder.CreateCondBr(Cmp3, IfThen4, IfElse);

732

733

734 Builder.SetInsertPoint(IfThen4);

736 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);

737 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);

738

739

740 Builder.SetInsertPoint(SwBB);

742 Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1));

743 Builder.CreateBr(SwEpilog);

744

745

746 Builder.SetInsertPoint(SwDefault);

747 Value *Sub5 = Builder.CreateSub(

748 Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3),

749 FloatWidth == 128 ? Call : Cast);

750 Value *ShProm = Builder.CreateZExt(Sub5, IntTy);

751 Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal,

752 FloatWidth == 128 ? Sub5 : ShProm);

754 Builder.CreateAdd(FloatWidth == 128 ? Call : Cast,

755 Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));

756 Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);

758 FloatWidth == 128 ? Sub8 : ShProm9);

759 Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal);

760 Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0));

761 Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);

762 Value *Or = Builder.CreateOr(Shr6, Conv11);

763 Builder.CreateBr(SwEpilog);

764

765

766 Builder.SetInsertPoint(SwEpilog);

767 PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);

769 AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);

771 Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());

772 Value *A1 = Builder.CreateLShr(A0, Builder.getInt32(2));

773 Value *A2 = Builder.CreateAnd(A1, Builder.getInt32(1));

774 Value *Conv16 = Builder.CreateZExt(A2, IntTy);

775 Value *Or17 = Builder.CreateOr(AAddr0, Conv16);

776 Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));

777 Value *Shr18 = nullptr;

778 if (IsSigned)

779 Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2));

780 else

781 Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2));

782 Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3");

783 Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0));

784 Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));

785 Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32));

786 Value *ExtractT64 = nullptr;

787 if (FloatWidth > 80)

788 ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());

789 else

790 ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());

791 Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);

792

793

794 Builder.SetInsertPoint(IfThen20);

795 Value *Shr21 = nullptr;

796 if (IsSigned)

797 Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3));

798 else

799 Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3));

800 Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));

801 Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));

802 Value *ExtractT62 = nullptr;

803 if (FloatWidth > 80)

804 ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getInt64Ty());

805 else

806 ExtractT62 = Builder.CreateTrunc(Extract, Builder.getInt32Ty());

807 Builder.CreateBr(IfEnd26);

808

809

810 Builder.SetInsertPoint(IfElse);

811 Value *Sub24 = Builder.CreateAdd(

812 FloatWidth == 128 ? Call : Cast,

814 -(BitWidth - FPMantissaWidth - 1)));

815 Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);

816 Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal,

817 FloatWidth == 128 ? Sub24 : ShProm25);

818 Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));

819 Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));

820 Value *ExtractT66 = nullptr;

821 if (FloatWidth > 80)

822 ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());

823 else

824 ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());

825 Builder.CreateBr(IfEnd26);

826

827

828 Builder.SetInsertPoint(IfEnd26);

829 PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);

830 AAddr1Off0->addIncoming(ExtractT, IfThen20);

831 AAddr1Off0->addIncoming(ExtractT60, SwEpilog);

832 AAddr1Off0->addIncoming(ExtractT61, IfElse);

833 PHINode *AAddr1Off32 = nullptr;

834 if (FloatWidth > 32) {

835 AAddr1Off32 =

836 Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);

837 AAddr1Off32->addIncoming(ExtractT62, IfThen20);

838 AAddr1Off32->addIncoming(ExtractT64, SwEpilog);

839 AAddr1Off32->addIncoming(ExtractT66, IfElse);

840 }

842 if (FloatWidth <= 80) {

843 E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);

847 }

848 Value *And29 = nullptr;

849 if (FloatWidth > 80) {

850 Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1),

851 Builder.getIntN(BitWidth, 63));

852 And29 = Builder.CreateAnd(Shr, Temp2, "and29");

853 } else {

854 Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getInt32Ty());

855 And29 = Builder.CreateAnd(

857 }

858 unsigned TempMod = FPMantissaWidth % 32;

859 Value *And34 = nullptr;

860 Value *Shl30 = nullptr;

861 if (FloatWidth > 80) {

862 TempMod += 32;

863 Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getInt64(TempMod));

864 Shl30 = Builder.CreateAdd(

865 Add, Builder.getInt64(((1ull << (62ull - TempMod)) - 1ull) << TempMod));

866 And34 = Builder.CreateZExt(Shl30, Builder.getInt128Ty());

867 } else {

868 Value *Add = Builder.CreateShl(E0, Builder.getInt32(TempMod));

869 Shl30 = Builder.CreateAdd(

870 Add, Builder.getInt32(((1 << (30 - TempMod)) - 1) << TempMod));

871 And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,

872 Builder.getInt32((1 << TempMod) - 1));

873 }

874 Value *Or35 = nullptr;

875 if (FloatWidth > 80) {

876 Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getInt128Ty());

877 Value *Or31 = Builder.CreateOr(And29Trunc, And34);

878 Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));

879 Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),

880 Builder.getIntN(128, FPMantissaWidth));

881 Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));

882 Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);

883 Or35 = Builder.CreateOr(Or34, A6);

884 } else {

885 Value *Or31 = Builder.CreateOr(And34, And29);

886 Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);

887 }

888 Value *A4 = nullptr;

890 Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));

891 Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));

893 Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));

894 Value *Or1 = Builder.CreateOr(Shl1, And1);

895 A4 = Builder.CreateBitCast(Or1, IToFP->getType());

898 Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));

899 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());

901

902

904 Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext()));

905 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());

906 } else

907 A4 = Builder.CreateBitCast(Or35, IToFP->getType());

908 Builder.CreateBr(End);

909

910

911 Builder.SetInsertPoint(End, End->begin());

912 PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2);

915

919}

920

924

926

927 unsigned NumElements = VTy->getElementCount().getFixedValue();

929 for (unsigned Idx = 0; Idx < NumElements; ++Idx) {

930 Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx);

931

932 Value *NewOp = nullptr;

934 NewOp = Builder.CreateBinOp(

935 BinOp->getOpcode(), Ext,

936 Builder.CreateExtractElement(I->getOperand(1), Idx));

938 NewOp = Builder.CreateCast(CastI->getOpcode(), Ext,

939 I->getType()->getScalarType());

940 else

942

943 Result = Builder.CreateInsertElement(Result, NewOp, Idx);

945 ScalarizedI->copyIRFlags(I, true);

947 }

948 }

949

950 I->replaceAllUsesWith(Result);

951 I->dropAllReferences();

952 I->eraseFromParent();

953}

954

955

956

957

958

960 assert(Ty->isFloatingPointTy());

961 if (Ty->isFloatTy() || Ty->is16bitFPTy())

962 return RTLIB::REM_F32;

963 if (Ty->isDoubleTy())

964 return RTLIB::REM_F64;

965 if (Ty->isFP128Ty())

966 return RTLIB::REM_F128;

967 if (Ty->isX86_FP80Ty())

968 return RTLIB::REM_F80;

969 if (Ty->isPPC_FP128Ty())

970 return RTLIB::REM_PPCF128;

971

973}

974

975

976

977

981 return true;

982

984}

985

988 if (I.getOperand(0)->getType()->isVectorTy())

990 else

992}

993

997

998 unsigned MaxLegalFpConvertBitWidth =

1002

1004 return false;

1005

1006 auto ShouldHandleInst = [&](Instruction &I) {

1007 Type *Ty = I.getType();

1008

1009 if (Ty->isScalableTy())

1010 return false;

1011

1012 switch (I.getOpcode()) {

1013 case Instruction::FRem:

1015 FRemExpander::canExpandType(Ty->getScalarType());

1016

1017 case Instruction::FPToUI:

1018 case Instruction::FPToSI: {

1020 return IntTy->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;

1021 }

1022

1023 case Instruction::UIToFP:

1024 case Instruction::SIToFP: {

1025 auto *IntTy =

1027 return IntTy->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;

1028 }

1029 }

1030

1031 return false;

1032 };

1033

1037 if (!ShouldHandleInst(I))

1038 continue;

1039

1042 }

1043

1044 while (!Worklist.empty()) {

1046

1047 switch (I->getOpcode()) {

1048 case Instruction::FRem: {

1049 auto SQ = [&]() -> std::optional {

1050 if (AC) {

1051 auto Res = std::make_optional(

1052 I->getModule()->getDataLayout(), I);

1053 Res->AC = AC;

1054 return Res;

1055 }

1056 return {};

1057 }();

1058

1060 break;

1061 }

1062

1063 case Instruction::FPToUI:

1064 case Instruction::FPToSI:

1066 break;

1067

1068 case Instruction::UIToFP:

1069 case Instruction::SIToFP:

1071 break;

1072 }

1073 }

1074

1076}

1077

1078namespace {

1079class ExpandFpLegacyPass : public FunctionPass {

1081

1082public:

1083 static char ID;

1084

1086 : FunctionPass(ID), OptLevel(OptLevel) {

1088 }

1089

1091

1093 auto *TM = &getAnalysis().getTM();

1094 const TargetSubtargetInfo *Subtarget = TM->getSubtargetImpl(F);

1095 auto *TLI = Subtarget->getTargetLowering();

1096 AssumptionCache *AC = nullptr;

1097

1098 const LibcallLoweringInfo &Libcalls =

1099 getAnalysis().getLibcallLowering(

1100 *Subtarget);

1101

1102 if (OptLevel != CodeGenOptLevel::None && F.hasOptNone())

1103 AC = &getAnalysis().getAssumptionCache(F);

1104 return runImpl(F, *TLI, Libcalls, AC);

1105 }

1106

1107 void getAnalysisUsage(AnalysisUsage &AU) const override {

1108 AU.addRequired();

1110 if (OptLevel != CodeGenOptLevel::None)

1111 AU.addRequired();

1114 AU.addRequired();

1115 }

1116};

1117}

1118

1120 : TM(&TM), OptLevel(OptLevel) {}

1121

1125 OS, MapClassName2PassName);

1126 OS << '<';

1127 OS << "O" << (int)OptLevel;

1128 OS << '>';

1129}

1130

1137

1139

1142

1143 if (!LibcallLowering) {

1145 "' analysis required");

1147 }

1148

1150 LibcallLowering->getLibcallLowering(*STI);

1151

1154}

1155

1156char ExpandFpLegacyPass::ID = 0;

1158 "Expand certain fp instructions", false, false)

1161

1163 return new ExpandFpLegacyPass(OptLevel);

1164}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static bool runOnFunction(Function &F, bool PostInlining)

static bool expandFRem(BinaryOperator &I, std::optional< SimplifyQuery > &SQ)

Definition ExpandFp.cpp:356

static void expandIToFP(Instruction *IToFP)

Generate code to convert a fp number to integer, replacing S(U)IToFP with the generated code.

Definition ExpandFp.cpp:658

static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)

Definition ExpandFp.cpp:994

static void expandFPToI(Instruction *FPToI)

Generate code to convert a fp number to integer, replacing FPToS(U)I with the generated code.

Definition ExpandFp.cpp:436

static RTLIB::Libcall fremToLibcall(Type *Ty)

Return the Libcall for a frem instruction of type Ty.

Definition ExpandFp.cpp:959

static void addToWorklist(Instruction &I, SmallVector< Instruction *, 4 > &Worklist)

Definition ExpandFp.cpp:986

static bool targetSupportsFrem(const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, Type *Ty)

Definition ExpandFp.cpp:978

static cl::opt< unsigned > ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, cl::init(llvm::IntegerType::MAX_INT_BITS), cl::desc("fp convert instructions on integers with " "more than bits are expanded."))

static void scalarize(Instruction *I, SmallVectorImpl< Instruction * > &Worklist)

Definition ExpandFp.cpp:921

This is the interface for a simple mod/ref and alias analysis over globals.

Module.h This file contains the declarations for the Module class.

This header defines various interfaces for pass management in LLVM.

FunctionAnalysisManager FAM

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This file defines the SmallVector class.

static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

This file describes how to lower LLVM code to machine code.

Target-Independent Code Generator Pass Configuration Options pass.

static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)

AnalysisUsage & addRequired()

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

A function analysis which provides an AssumptionCache.

A cache of @llvm.assume calls within a function.

LLVM Basic Block Representation.

iterator begin()

Instruction iterator methods.

const Function * getParent() const

Return the enclosing method, or null if none.

static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)

Creates a new BasicBlock.

LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)

Split the basic block into two basic blocks at the specified instruction.

@ FCMP_OLT

0 1 0 0 True if ordered and less than

@ ICMP_SGT

signed greater than

static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)

static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)

static LLVM_ABI Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)

This is the shared class of boolean and integer constants.

static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)

static ConstantInt * getSigned(IntegerType *Ty, int64_t V)

Return a ConstantInt with the specified value for the specified type.

PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)

Definition ExpandFp.cpp:1131

ExpandFpPass(const TargetMachine &TM, CodeGenOptLevel OptLevel)

Definition ExpandFp.cpp:1119

void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)

Definition ExpandFp.cpp:1122

Convenience struct for specifying and reasoning about fast-math flags.

void setAllowContract(bool B=true)

void setAllowReciprocal(bool B=true)

void setNoNaNs(bool B=true)

void setNoInfs(bool B=true)

FunctionPass class - This class is used to implement most global optimizations.

Module * getParent()

Get the module that this global value is contained inside of...

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

LLVM_ABI InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

Class to represent integer types.

@ MAX_INT_BITS

Maximum number of bits that can be specified.

Tracks which library functions to use for a particular subtarget.

LLVM_ABI const char * getLibcallName(RTLIB::Libcall Call) const

Get the libcall routine name for the specified libcall.

Record a mapping from subtarget to LibcallLoweringInfo.

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

static LLVM_ABI PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses none()

Convenience factory function for the empty preserved set.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

bool isOperationExpand(unsigned Op, EVT VT) const

Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...

unsigned getMaxLargeFPConvertBitWidthSupported() const

Returns the size in bits of the maximum fp to/from int conversion the backend supports.

This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...

Primary interface to the complete machine description for the target machine.

TargetSubtargetInfo - Generic base class for all target subtargets.

virtual const TargetLowering * getTargetLowering() const

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

The instances of the Type class are immutable: once they are created, they are never changed.

LLVM_ABI unsigned getIntegerBitWidth() const

bool isX86_FP80Ty() const

Return true if this is x86 long double.

bool isBFloatTy() const

Return true if this is 'bfloat', a 16-bit bfloat type.

static LLVM_ABI Type * getFP128Ty(LLVMContext &C)

bool isHalfTy() const

Return true if this is 'half', a 16-bit IEEE fp type.

bool isDoubleTy() const

Return true if this is 'double', a 64-bit IEEE fp type.

static LLVM_ABI Type * getFloatTy(LLVMContext &C)

LLVM_ABI int getFPMantissaWidth() const

Return the width of the mantissa of this type.

LLVM_ABI const fltSemantics & getFltSemantics() const

void dropAllReferences()

Drop all references to operands.

Value * getOperand(unsigned i) const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

LLVM_ABI void takeName(Value *V)

Transfer the name from V to this value.

An efficient, type-erasing, non-owning reference to a callable.

const ParentTy * getParent() const

This class implements an extremely fast bulk output stream that can only output to a stream.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ BasicBlock

Various leaf nodes.

LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})

Look up the Function declaration of the intrinsic id in the Module M.

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

FunctionAddr VTableAddr Value

LLVM_ABI bool isKnownNeverInfinity(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)

Return true if the floating-point scalar value is not an infinity or if the floating-point vector val...

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy

Provide the ModuleAnalysisManager to Function proxy.

inst_iterator inst_begin(Function *F)

uint64_t PowerOf2Ceil(uint64_t A)

Returns the power of two which is greater than or equal to the given value.

decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)

LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)

SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

CodeGenOptLevel

Code generation optimization level.

inst_iterator inst_end(Function *F)

IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

LLVM_ABI void initializeExpandFpLegacyPassPass(PassRegistry &)

@ Xor

Bitwise or logical XOR of integers.

@ Sub

Subtraction of integers.

constexpr unsigned BitWidth

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

LLVM_ABI FunctionPass * createExpandFpPass()

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)

Return the value type corresponding to the specified type.

A CRTP mix-in to automatically provide informational APIs needed for passes.