LLVM: lib/IR/AutoUpgrade.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

34#include "llvm/IR/IntrinsicsAArch64.h"

35#include "llvm/IR/IntrinsicsARM.h"

36#include "llvm/IR/IntrinsicsNVPTX.h"

37#include "llvm/IR/IntrinsicsRISCV.h"

38#include "llvm/IR/IntrinsicsWebAssembly.h"

39#include "llvm/IR/IntrinsicsX86.h"

53#include

54#include

55#include

56

57using namespace llvm;

58

61 cl::desc("Disable autoupgrade of debug info"));

62

64

65

66

69

70

71 Type *Arg0Type = F->getFunctionType()->getParamType(0);

73 return false;

74

75

78 return true;

79}

80

81

82

85

86 Type *LastArgType = F->getFunctionType()->getParamType(

87 F->getFunctionType()->getNumParams() - 1);

89 return false;

90

91

94 return true;

95}

96

97

98

101

102 if (F->getReturnType()->isVectorTy())

103 return false;

104

107 return true;

108}

109

110

111

114

115 Type *Arg1Type = F->getFunctionType()->getParamType(1);

116 Type *Arg2Type = F->getFunctionType()->getParamType(2);

118 cast(Arg1Type)->getElementType()->isIntegerTy(8) &&

120 cast(Arg2Type)->getElementType()->isIntegerTy(8))

121 return false;

122

125 return true;

126}

127

128

129

132

133 Type *Arg1Type = F->getFunctionType()->getParamType(1);

134 Type *Arg2Type = F->getFunctionType()->getParamType(2);

136 cast(Arg1Type)->getElementType()->isIntegerTy(16) &&

138 cast(Arg2Type)->getElementType()->isIntegerTy(16))

139 return false;

140

143 return true;

144}

145

148 if (F->getReturnType()->getScalarType()->isBFloatTy())

149 return false;

150

153 return true;

154}

155

158 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())

159 return false;

160

163 return true;

164}

165

167

168

169

170

171

172 if (Name.consume_front("avx."))

173 return (Name.starts_with("blend.p") ||

174 Name == "cvt.ps2.pd.256" ||

175 Name == "cvtdq2.pd.256" ||

176 Name == "cvtdq2.ps.256" ||

177 Name.starts_with("movnt.") ||

178 Name.starts_with("sqrt.p") ||

179 Name.starts_with("storeu.") ||

180 Name.starts_with("vbroadcast.s") ||

181 Name.starts_with("vbroadcastf128") ||

182 Name.starts_with("vextractf128.") ||

183 Name.starts_with("vinsertf128.") ||

184 Name.starts_with("vperm2f128.") ||

185 Name.starts_with("vpermil."));

186

187 if (Name.consume_front("avx2."))

188 return (Name == "movntdqa" ||

189 Name.starts_with("pabs.") ||

190 Name.starts_with("padds.") ||

191 Name.starts_with("paddus.") ||

192 Name.starts_with("pblendd.") ||

193 Name == "pblendw" ||

194 Name.starts_with("pbroadcast") ||

195 Name.starts_with("pcmpeq.") ||

196 Name.starts_with("pcmpgt.") ||

197 Name.starts_with("pmax") ||

198 Name.starts_with("pmin") ||

199 Name.starts_with("pmovsx") ||

200 Name.starts_with("pmovzx") ||

201 Name == "pmul.dq" ||

202 Name == "pmulu.dq" ||

203 Name.starts_with("psll.dq") ||

204 Name.starts_with("psrl.dq") ||

205 Name.starts_with("psubs.") ||

206 Name.starts_with("psubus.") ||

207 Name.starts_with("vbroadcast") ||

208 Name == "vbroadcasti128" ||

209 Name == "vextracti128" ||

210 Name == "vinserti128" ||

211 Name == "vperm2i128");

212

213 if (Name.consume_front("avx512.")) {

214 if (Name.consume_front("mask."))

215

216 return (Name.starts_with("add.p") ||

217 Name.starts_with("and.") ||

218 Name.starts_with("andn.") ||

219 Name.starts_with("broadcast.s") ||

220 Name.starts_with("broadcastf32x4.") ||

221 Name.starts_with("broadcastf32x8.") ||

222 Name.starts_with("broadcastf64x2.") ||

223 Name.starts_with("broadcastf64x4.") ||

224 Name.starts_with("broadcasti32x4.") ||

225 Name.starts_with("broadcasti32x8.") ||

226 Name.starts_with("broadcasti64x2.") ||

227 Name.starts_with("broadcasti64x4.") ||

228 Name.starts_with("cmp.b") ||

229 Name.starts_with("cmp.d") ||

230 Name.starts_with("cmp.q") ||

231 Name.starts_with("cmp.w") ||

232 Name.starts_with("compress.b") ||

233 Name.starts_with("compress.d") ||

234 Name.starts_with("compress.p") ||

235 Name.starts_with("compress.q") ||

236 Name.starts_with("compress.store.") ||

237 Name.starts_with("compress.w") ||

238 Name.starts_with("conflict.") ||

239 Name.starts_with("cvtdq2pd.") ||

240 Name.starts_with("cvtdq2ps.") ||

241 Name == "cvtpd2dq.256" ||

242 Name == "cvtpd2ps.256" ||

243 Name == "cvtps2pd.128" ||

244 Name == "cvtps2pd.256" ||

245 Name.starts_with("cvtqq2pd.") ||

246 Name == "cvtqq2ps.256" ||

247 Name == "cvtqq2ps.512" ||

248 Name == "cvttpd2dq.256" ||

249 Name == "cvttps2dq.128" ||

250 Name == "cvttps2dq.256" ||

251 Name.starts_with("cvtudq2pd.") ||

252 Name.starts_with("cvtudq2ps.") ||

253 Name.starts_with("cvtuqq2pd.") ||

254 Name == "cvtuqq2ps.256" ||

255 Name == "cvtuqq2ps.512" ||

256 Name.starts_with("dbpsadbw.") ||

257 Name.starts_with("div.p") ||

258 Name.starts_with("expand.b") ||

259 Name.starts_with("expand.d") ||

260 Name.starts_with("expand.load.") ||

261 Name.starts_with("expand.p") ||

262 Name.starts_with("expand.q") ||

263 Name.starts_with("expand.w") ||

264 Name.starts_with("fpclass.p") ||

265 Name.starts_with("insert") ||

266 Name.starts_with("load.") ||

267 Name.starts_with("loadu.") ||

268 Name.starts_with("lzcnt.") ||

269 Name.starts_with("max.p") ||

270 Name.starts_with("min.p") ||

271 Name.starts_with("movddup") ||

272 Name.starts_with("move.s") ||

273 Name.starts_with("movshdup") ||

274 Name.starts_with("movsldup") ||

275 Name.starts_with("mul.p") ||

276 Name.starts_with("or.") ||

277 Name.starts_with("pabs.") ||

278 Name.starts_with("packssdw.") ||

279 Name.starts_with("packsswb.") ||

280 Name.starts_with("packusdw.") ||

281 Name.starts_with("packuswb.") ||

282 Name.starts_with("padd.") ||

283 Name.starts_with("padds.") ||

284 Name.starts_with("paddus.") ||

285 Name.starts_with("palignr.") ||

286 Name.starts_with("pand.") ||

287 Name.starts_with("pandn.") ||

288 Name.starts_with("pavg") ||

289 Name.starts_with("pbroadcast") ||

290 Name.starts_with("pcmpeq.") ||

291 Name.starts_with("pcmpgt.") ||

292 Name.starts_with("perm.df.") ||

293 Name.starts_with("perm.di.") ||

294 Name.starts_with("permvar.") ||

295 Name.starts_with("pmaddubs.w.") ||

296 Name.starts_with("pmaddw.d.") ||

297 Name.starts_with("pmax") ||

298 Name.starts_with("pmin") ||

299 Name == "pmov.qd.256" ||

300 Name == "pmov.qd.512" ||

301 Name == "pmov.wb.256" ||

302 Name == "pmov.wb.512" ||

303 Name.starts_with("pmovsx") ||

304 Name.starts_with("pmovzx") ||

305 Name.starts_with("pmul.dq.") ||

306 Name.starts_with("pmul.hr.sw.") ||

307 Name.starts_with("pmulh.w.") ||

308 Name.starts_with("pmulhu.w.") ||

309 Name.starts_with("pmull.") ||

310 Name.starts_with("pmultishift.qb.") ||

311 Name.starts_with("pmulu.dq.") ||

312 Name.starts_with("por.") ||

313 Name.starts_with("prol.") ||

314 Name.starts_with("prolv.") ||

315 Name.starts_with("pror.") ||

316 Name.starts_with("prorv.") ||

317 Name.starts_with("pshuf.b.") ||

318 Name.starts_with("pshuf.d.") ||

319 Name.starts_with("pshufh.w.") ||

320 Name.starts_with("pshufl.w.") ||

321 Name.starts_with("psll.d") ||

322 Name.starts_with("psll.q") ||

323 Name.starts_with("psll.w") ||

324 Name.starts_with("pslli") ||

325 Name.starts_with("psllv") ||

326 Name.starts_with("psra.d") ||

327 Name.starts_with("psra.q") ||

328 Name.starts_with("psra.w") ||

329 Name.starts_with("psrai") ||

330 Name.starts_with("psrav") ||

331 Name.starts_with("psrl.d") ||

332 Name.starts_with("psrl.q") ||

333 Name.starts_with("psrl.w") ||

334 Name.starts_with("psrli") ||

335 Name.starts_with("psrlv") ||

336 Name.starts_with("psub.") ||

337 Name.starts_with("psubs.") ||

338 Name.starts_with("psubus.") ||

339 Name.starts_with("pternlog.") ||

340 Name.starts_with("punpckh") ||

341 Name.starts_with("punpckl") ||

342 Name.starts_with("pxor.") ||

343 Name.starts_with("shuf.f") ||

344 Name.starts_with("shuf.i") ||

345 Name.starts_with("shuf.p") ||

346 Name.starts_with("sqrt.p") ||

347 Name.starts_with("store.b.") ||

348 Name.starts_with("store.d.") ||

349 Name.starts_with("store.p") ||

350 Name.starts_with("store.q.") ||

351 Name.starts_with("store.w.") ||

352 Name == "store.ss" ||

353 Name.starts_with("storeu.") ||

354 Name.starts_with("sub.p") ||

355 Name.starts_with("ucmp.") ||

356 Name.starts_with("unpckh.") ||

357 Name.starts_with("unpckl.") ||

358 Name.starts_with("valign.") ||

359 Name == "vcvtph2ps.128" ||

360 Name == "vcvtph2ps.256" ||

361 Name.starts_with("vextract") ||

362 Name.starts_with("vfmadd.") ||

363 Name.starts_with("vfmaddsub.") ||

364 Name.starts_with("vfnmadd.") ||

365 Name.starts_with("vfnmsub.") ||

366 Name.starts_with("vpdpbusd.") ||

367 Name.starts_with("vpdpbusds.") ||

368 Name.starts_with("vpdpwssd.") ||

369 Name.starts_with("vpdpwssds.") ||

370 Name.starts_with("vpermi2var.") ||

371 Name.starts_with("vpermil.p") ||

372 Name.starts_with("vpermilvar.") ||

373 Name.starts_with("vpermt2var.") ||

374 Name.starts_with("vpmadd52") ||

375 Name.starts_with("vpshld.") ||

376 Name.starts_with("vpshldv.") ||

377 Name.starts_with("vpshrd.") ||

378 Name.starts_with("vpshrdv.") ||

379 Name.starts_with("vpshufbitqmb.") ||

380 Name.starts_with("xor."));

381

382 if (Name.consume_front("mask3."))

383

384 return (Name.starts_with("vfmadd.") ||

385 Name.starts_with("vfmaddsub.") ||

386 Name.starts_with("vfmsub.") ||

387 Name.starts_with("vfmsubadd.") ||

388 Name.starts_with("vfnmsub."));

389

390 if (Name.consume_front("maskz."))

391

392 return (Name.starts_with("pternlog.") ||

393 Name.starts_with("vfmadd.") ||

394 Name.starts_with("vfmaddsub.") ||

395 Name.starts_with("vpdpbusd.") ||

396 Name.starts_with("vpdpbusds.") ||

397 Name.starts_with("vpdpwssd.") ||

398 Name.starts_with("vpdpwssds.") ||

399 Name.starts_with("vpermt2var.") ||

400 Name.starts_with("vpmadd52") ||

401 Name.starts_with("vpshldv.") ||

402 Name.starts_with("vpshrdv."));

403

404

405 return (Name == "movntdqa" ||

406 Name == "pmul.dq.512" ||

407 Name == "pmulu.dq.512" ||

408 Name.starts_with("broadcastm") ||

409 Name.starts_with("cmp.p") ||

410 Name.starts_with("cvtb2mask.") ||

411 Name.starts_with("cvtd2mask.") ||

412 Name.starts_with("cvtmask2") ||

413 Name.starts_with("cvtq2mask.") ||

414 Name == "cvtusi2sd" ||

415 Name.starts_with("cvtw2mask.") ||

416 Name == "kand.w" ||

417 Name == "kandn.w" ||

418 Name == "knot.w" ||

419 Name == "kor.w" ||

420 Name == "kortestc.w" ||

421 Name == "kortestz.w" ||

422 Name.starts_with("kunpck") ||

423 Name == "kxnor.w" ||

424 Name == "kxor.w" ||

425 Name.starts_with("padds.") ||

426 Name.starts_with("pbroadcast") ||

427 Name.starts_with("prol") ||

428 Name.starts_with("pror") ||

429 Name.starts_with("psll.dq") ||

430 Name.starts_with("psrl.dq") ||

431 Name.starts_with("psubs.") ||

432 Name.starts_with("ptestm") ||

433 Name.starts_with("ptestnm") ||

434 Name.starts_with("storent.") ||

435 Name.starts_with("vbroadcast.s") ||

436 Name.starts_with("vpshld.") ||

437 Name.starts_with("vpshrd."));

438 }

439

440 if (Name.consume_front("fma."))

441 return (Name.starts_with("vfmadd.") ||

442 Name.starts_with("vfmsub.") ||

443 Name.starts_with("vfmsubadd.") ||

444 Name.starts_with("vfnmadd.") ||

445 Name.starts_with("vfnmsub."));

446

447 if (Name.consume_front("fma4."))

448 return Name.starts_with("vfmadd.s");

449

450 if (Name.consume_front("sse."))

451 return (Name == "add.ss" ||

452 Name == "cvtsi2ss" ||

453 Name == "cvtsi642ss" ||

454 Name == "div.ss" ||

455 Name == "mul.ss" ||

456 Name.starts_with("sqrt.p") ||

457 Name == "sqrt.ss" ||

458 Name.starts_with("storeu.") ||

459 Name == "sub.ss");

460

461 if (Name.consume_front("sse2."))

462 return (Name == "add.sd" ||

463 Name == "cvtdq2pd" ||

464 Name == "cvtdq2ps" ||

465 Name == "cvtps2pd" ||

466 Name == "cvtsi2sd" ||

467 Name == "cvtsi642sd" ||

468 Name == "cvtss2sd" ||

469 Name == "div.sd" ||

470 Name == "mul.sd" ||

471 Name.starts_with("padds.") ||

472 Name.starts_with("paddus.") ||

473 Name.starts_with("pcmpeq.") ||

474 Name.starts_with("pcmpgt.") ||

475 Name == "pmaxs.w" ||

476 Name == "pmaxu.b" ||

477 Name == "pmins.w" ||

478 Name == "pminu.b" ||

479 Name == "pmulu.dq" ||

480 Name.starts_with("pshuf") ||

481 Name.starts_with("psll.dq") ||

482 Name.starts_with("psrl.dq") ||

483 Name.starts_with("psubs.") ||

484 Name.starts_with("psubus.") ||

485 Name.starts_with("sqrt.p") ||

486 Name == "sqrt.sd" ||

487 Name == "storel.dq" ||

488 Name.starts_with("storeu.") ||

489 Name == "sub.sd");

490

491 if (Name.consume_front("sse41."))

492 return (Name.starts_with("blendp") ||

493 Name == "movntdqa" ||

494 Name == "pblendw" ||

495 Name == "pmaxsb" ||

496 Name == "pmaxsd" ||

497 Name == "pmaxud" ||

498 Name == "pmaxuw" ||

499 Name == "pminsb" ||

500 Name == "pminsd" ||

501 Name == "pminud" ||

502 Name == "pminuw" ||

503 Name.starts_with("pmovsx") ||

504 Name.starts_with("pmovzx") ||

505 Name == "pmuldq");

506

507 if (Name.consume_front("sse42."))

508 return Name == "crc32.64.8";

509

510 if (Name.consume_front("sse4a."))

511 return Name.starts_with("movnt.");

512

513 if (Name.consume_front("ssse3."))

514 return (Name == "pabs.b.128" ||

515 Name == "pabs.d.128" ||

516 Name == "pabs.w.128");

517

518 if (Name.consume_front("xop."))

519 return (Name == "vpcmov" ||

520 Name == "vpcmov.256" ||

521 Name.starts_with("vpcom") ||

522 Name.starts_with("vprot"));

523

524 return (Name == "addcarry.u32" ||

525 Name == "addcarry.u64" ||

526 Name == "addcarryx.u32" ||

527 Name == "addcarryx.u64" ||

528 Name == "subborrow.u32" ||

529 Name == "subborrow.u64" ||

530 Name.starts_with("vcvtph2ps."));

531}

532

535

536 if (!Name.consume_front("x86."))

537 return false;

538

540 NewFn = nullptr;

541 return true;

542 }

543

544 if (Name == "rdtscp") {

545

546 if (F->getFunctionType()->getNumParams() == 0)

547 return false;

548

551 Intrinsic::x86_rdtscp);

552 return true;

553 }

554

556

557

558 if (Name.consume_front("sse41.ptest")) {

560 .Case("c", Intrinsic::x86_sse41_ptestc)

561 .Case("z", Intrinsic::x86_sse41_ptestz)

562 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)

566

567 return false;

568 }

569

570

571

572

573

575 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)

576 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)

577 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)

578 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)

579 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)

580 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)

584

585 if (Name.consume_front("avx512.")) {

586 if (Name.consume_front("mask.cmp.")) {

587

589 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)

590 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)

591 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)

592 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)

593 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)

594 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)

598 } else if (Name.starts_with("vpdpbusd.") ||

599 Name.starts_with("vpdpbusds.")) {

600

602 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)

603 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)

604 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)

605 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)

606 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)

607 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)

611 } else if (Name.starts_with("vpdpwssd.") ||

612 Name.starts_with("vpdpwssds.")) {

613

615 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)

616 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)

617 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)

618 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)

619 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)

620 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)

624 }

625 return false;

626 }

627

628 if (Name.consume_front("avx2.")) {

629 if (Name.consume_front("vpdpb")) {

630

632 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)

633 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)

634 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)

635 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)

636 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)

637 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)

638 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)

639 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)

640 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)

641 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)

642 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)

643 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)

647 } else if (Name.consume_front("vpdpw")) {

648

650 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)

651 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)

652 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)

653 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)

654 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)

655 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)

656 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)

657 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)

658 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)

659 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)

660 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)

661 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)

665 }

666 return false;

667 }

668

669 if (Name.consume_front("avx10.")) {

670 if (Name.consume_front("vpdpb")) {

671

673 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)

674 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)

675 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)

676 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)

677 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)

678 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)

682 } else if (Name.consume_front("vpdpw")) {

684 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)

685 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)

686 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)

687 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)

688 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)

689 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)

693 }

694 return false;

695 }

696

697 if (Name.consume_front("avx512bf16.")) {

698

700 .Case("cvtne2ps2bf16.128",

701 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)

702 .Case("cvtne2ps2bf16.256",

703 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)

704 .Case("cvtne2ps2bf16.512",

705 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)

706 .Case("mask.cvtneps2bf16.128",

707 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)

708 .Case("cvtneps2bf16.256",

709 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)

710 .Case("cvtneps2bf16.512",

711 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)

715

716

718 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)

719 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)

720 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)

724 return false;

725 }

726

727 if (Name.consume_front("xop.")) {

729 if (Name.starts_with("vpermil2")) {

730

731

732 auto Idx = F->getFunctionType()->getParamType(2);

733 if (Idx->isFPOrFPVectorTy()) {

734 unsigned IdxSize = Idx->getPrimitiveSizeInBits();

735 unsigned EltSize = Idx->getScalarSizeInBits();

736 if (EltSize == 64 && IdxSize == 128)

737 ID = Intrinsic::x86_xop_vpermil2pd;

738 else if (EltSize == 32 && IdxSize == 128)

739 ID = Intrinsic::x86_xop_vpermil2ps;

740 else if (EltSize == 64 && IdxSize == 256)

741 ID = Intrinsic::x86_xop_vpermil2pd_256;

742 else

743 ID = Intrinsic::x86_xop_vpermil2ps_256;

744 }

745 } else if (F->arg_size() == 2)

746

748 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)

749 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)

751

755 return true;

756 }

757 return false;

758 }

759

760 if (Name == "seh.recoverfp") {

762 Intrinsic::eh_recoverfp);

763 return true;

764 }

765

766 return false;

767}

768

769

770

774 if (Name.starts_with("rbit")) {

775

777 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());

778 return true;

779 }

780

781 if (Name == "thread.pointer") {

782

784 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());

785 return true;

786 }

787

788 bool Neon = Name.consume_front("neon.");

789 if (Neon) {

790

791

792

793 if (Name.consume_front("bfdot.")) {

794

797 .Cases({"v2f32.v8i8", "v4f32.v16i8"},

802 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();

803 assert((OperandWidth == 64 || OperandWidth == 128) &&

804 "Unexpected operand width");

805 LLVMContext &Ctx = F->getParent()->getContext();

806 std::array<Type *, 2> Tys{

807 {F->getReturnType(),

810 return true;

811 }

812 return false;

813 }

814

815

816

817 if (Name.consume_front("bfm")) {

818

819 if (Name.consume_back(".v4f32.v16i8")) {

820

824 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla

826 .Case("lalb",

827 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb

828 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)

829 .Case("lalt",

830 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt

831 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)

835 return true;

836 }

837 return false;

838 }

839 return false;

840 }

841

842 }

843

844

845 if (IsArm) {

846

847 if (Neon) {

848

850 .StartsWith("vclz.", Intrinsic::ctlz)

851 .StartsWith("vcnt.", Intrinsic::ctpop)

852 .StartsWith("vqadds.", Intrinsic::sadd_sat)

853 .StartsWith("vqaddu.", Intrinsic::uadd_sat)

854 .StartsWith("vqsubs.", Intrinsic::ssub_sat)

855 .StartsWith("vqsubu.", Intrinsic::usub_sat)

856 .StartsWith("vrinta.", Intrinsic::round)

857 .StartsWith("vrintn.", Intrinsic::roundeven)

858 .StartsWith("vrintm.", Intrinsic::floor)

859 .StartsWith("vrintp.", Intrinsic::ceil)

860 .StartsWith("vrintx.", Intrinsic::rint)

861 .StartsWith("vrintz.", Intrinsic::trunc)

865 F->arg_begin()->getType());

866 return true;

867 }

868

869 if (Name.consume_front("vst")) {

870

871 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");

875 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,

876 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};

877

879 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,

880 Intrinsic::arm_neon_vst4lane};

881

882 auto fArgs = F->getFunctionType()->params();

883 Type *Tys[] = {fArgs[0], fArgs[1]};

886 F->getParent(), StoreInts[fArgs.size() - 3], Tys);

887 else

889 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);

890 return true;

891 }

892 return false;

893 }

894

895 return false;

896 }

897

898 if (Name.consume_front("mve.")) {

899

900 if (Name == "vctp64") {

902

903

905 return true;

906 }

907 return false;

908 }

909

910 if (Name.starts_with("vrintn.v")) {

912 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());

913 return true;

914 }

915

916

917 if (Name.consume_back(".v4i1")) {

918

919 if (Name.consume_back(".predicated.v2i64.v4i32"))

920

921 return Name == "mull.int" || Name == "vqdmull";

922

923 if (Name.consume_back(".v2i64")) {

924

925 bool IsGather = Name.consume_front("vldr.gather.");

926 if (IsGather || Name.consume_front("vstr.scatter.")) {

927 if (Name.consume_front("base.")) {

928

929 Name.consume_front("wb.");

930

931

932 return Name == "predicated.v2i64";

933 }

934

935 if (Name.consume_front("offset.predicated."))

936 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||

937 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");

938

939

940 return false;

941 }

942

943 return false;

944 }

945 return false;

946 }

947 return false;

948 }

949

950 if (Name.consume_front("cde.vcx")) {

951

952 if (Name.consume_back(".predicated.v2i64.v4i1"))

953

954 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||

955 Name == "3q" || Name == "3qa";

956

957 return false;

958 }

959 } else {

960

961 if (Neon) {

962

964 .StartsWith("frintn", Intrinsic::roundeven)

965 .StartsWith("rbit", Intrinsic::bitreverse)

969 F->arg_begin()->getType());

970 return true;

971 }

972

973 if (Name.starts_with("addp")) {

974

975 if (F->arg_size() != 2)

976 return false;

978 if (Ty && Ty->getElementType()->isFloatingPointTy()) {

980 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);

981 return true;

982 }

983 }

984

985

986 if (Name.starts_with("bfcvt")) {

987 NewFn = nullptr;

988 return true;

989 }

990

991 return false;

992 }

993 if (Name.consume_front("sve.")) {

994

995 if (Name.consume_front("bf")) {

996 if (Name.consume_back(".lane")) {

997

1000 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)

1001 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)

1002 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)

1006 return true;

1007 }

1008 return false;

1009 }

1010 return false;

1011 }

1012

1013

1014 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {

1015 NewFn = nullptr;

1016 return true;

1017 }

1018

1019 if (Name.consume_front("addqv")) {

1020

1021 if (F->getReturnType()->isFPOrFPVectorTy())

1022 return false;

1023

1024 auto Args = F->getFunctionType()->params();

1025 Type *Tys[] = {F->getReturnType(), Args[1]};

1027 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);

1028 return true;

1029 }

1030

1031 if (Name.consume_front("ld")) {

1032

1033 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");

1034 if (LdRegex.match(Name)) {

1035 Type *ScalarTy =

1041 Intrinsic::aarch64_sve_ld2_sret,

1042 Intrinsic::aarch64_sve_ld3_sret,

1043 Intrinsic::aarch64_sve_ld4_sret,

1044 };

1046 LoadIDs[Name[0] - '2'], Ty);

1047 return true;

1048 }

1049 return false;

1050 }

1051

1052 if (Name.consume_front("tuple.")) {

1053

1054 if (Name.starts_with("get")) {

1055

1056 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};

1058 F->getParent(), Intrinsic::vector_extract, Tys);

1059 return true;

1060 }

1061

1062 if (Name.starts_with("set")) {

1063

1064 auto Args = F->getFunctionType()->params();

1065 Type *Tys[] = {Args[0], Args[2], Args[1]};

1067 F->getParent(), Intrinsic::vector_insert, Tys);

1068 return true;

1069 }

1070

1071 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");

1072 if (CreateTupleRegex.match(Name)) {

1073

1074 auto Args = F->getFunctionType()->params();

1075 Type *Tys[] = {F->getReturnType(), Args[1]};

1077 F->getParent(), Intrinsic::vector_insert, Tys);

1078 return true;

1079 }

1080 return false;

1081 }

1082

1083 if (Name.starts_with("rev.nxv")) {

1084

1086 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());

1087 return true;

1088 }

1089

1090 return false;

1091 }

1092 }

1093 return false;

1094}

1095

1098 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {

1101 .Case("im2col.3d",

1102 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)

1103 .Case("im2col.4d",

1104 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)

1105 .Case("im2col.5d",

1106 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)

1107 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)

1108 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)

1109 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)

1110 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)

1111 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)

1113

1115 return ID;

1116

1117

1118

1119

1120 if (F->getArg(0)->getType()->getPointerAddressSpace() ==

1122 return ID;

1123

1124

1125

1126

1127

1128

1129

1130

1131

1132

1133

1134 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;

1135 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);

1137 return ID;

1138 }

1139

1141}

1142

1145 if (Name.consume_front("mapa.shared.cluster"))

1146 if (F->getReturnType()->getPointerAddressSpace() ==

1148 return Intrinsic::nvvm_mapa_shared_cluster;

1149

1150 if (Name.consume_front("cp.async.bulk.")) {

1153 .Case("global.to.shared.cluster",

1154 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)

1155 .Case("shared.cta.to.cluster",

1156 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)

1158

1160 if (F->getArg(0)->getType()->getPointerAddressSpace() ==

1162 return ID;

1163 }

1164

1166}

1167

1169 if (Name.consume_front("fma.rn."))

1171 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)

1172 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)

1173 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)

1174 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)

1175 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)

1176 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)

1177 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)

1178 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)

1179 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)

1180 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)

1181 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)

1182 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)

1184

1185 if (Name.consume_front("fmax."))

1187 .Case("bf16", Intrinsic::nvvm_fmax_bf16)

1188 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)

1189 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)

1190 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)

1191 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)

1192 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)

1193 .Case("ftz.nan.xorsign.abs.bf16",

1194 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)

1195 .Case("ftz.nan.xorsign.abs.bf16x2",

1196 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)

1197 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)

1198 .Case("ftz.xorsign.abs.bf16x2",

1199 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)

1200 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)

1201 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)

1202 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)

1203 .Case("nan.xorsign.abs.bf16x2",

1204 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)

1205 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)

1206 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)

1208

1209 if (Name.consume_front("fmin."))

1211 .Case("bf16", Intrinsic::nvvm_fmin_bf16)

1212 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)

1213 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)

1214 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)

1215 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)

1216 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)

1217 .Case("ftz.nan.xorsign.abs.bf16",

1218 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)

1219 .Case("ftz.nan.xorsign.abs.bf16x2",

1220 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)

1221 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)

1222 .Case("ftz.xorsign.abs.bf16x2",

1223 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)

1224 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)

1225 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)

1226 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)

1227 .Case("nan.xorsign.abs.bf16x2",

1228 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)

1229 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)

1230 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)

1232

1233 if (Name.consume_front("neg."))

1235 .Case("bf16", Intrinsic::nvvm_neg_bf16)

1236 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)

1238

1240}

1241

1243 return Name.consume_front("local") || Name.consume_front("shared") ||

1244 Name.consume_front("global") || Name.consume_front("constant") ||

1245 Name.consume_front("param");

1246}

1247

1249 bool CanUpgradeDebugIntrinsicsToRecords) {

1250 assert(F && "Illegal to upgrade a non-existent Function.");

1251

1253

1254

1255 if (!Name.consume_front("llvm.") || Name.empty())

1256 return false;

1257

1258 switch (Name[0]) {

1259 default: break;

1260 case 'a': {

1261 bool IsArm = Name.consume_front("arm.");

1262 if (IsArm || Name.consume_front("aarch64.")) {

1264 return true;

1265 break;

1266 }

1267

1268 if (Name.consume_front("amdgcn.")) {

1269 if (Name == "alignbit") {

1270

1272 F->getParent(), Intrinsic::fshr, {F->getReturnType()});

1273 return true;

1274 }

1275

1276 if (Name.consume_front("atomic.")) {

1277 if (Name.starts_with("inc") || Name.starts_with("dec") ||

1278 Name.starts_with("cond.sub") || Name.starts_with("csub")) {

1279

1280

1281 NewFn = nullptr;

1282 return true;

1283 }

1284 break;

1285 }

1286

1287 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||

1288 Name.consume_front("flat.atomic.")) {

1289 if (Name.starts_with("fadd") ||

1290

1291 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||

1292 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {

1293

1294

1295 NewFn = nullptr;

1296 return true;

1297 }

1298 }

1299

1300 if (Name.starts_with("ldexp.")) {

1301

1303 F->getParent(), Intrinsic::ldexp,

1304 {F->getReturnType(), F->getArg(1)->getType()});

1305 return true;

1306 }

1307 break;

1308 }

1309

1310 break;

1311 }

1312 case 'c': {

1313 if (F->arg_size() == 1) {

1315 .StartsWith("ctlz.", Intrinsic::ctlz)

1316 .StartsWith("cttz.", Intrinsic::cttz)

1321 F->arg_begin()->getType());

1322 return true;

1323 }

1324 }

1325

1326 if (F->arg_size() == 2 && Name == "coro.end") {

1329 Intrinsic::coro_end);

1330 return true;

1331 }

1332

1333 break;

1334 }

1335 case 'd':

1336 if (Name.consume_front("dbg.")) {

1337

1338 if (CanUpgradeDebugIntrinsicsToRecords) {

1339 if (Name == "addr" || Name == "value" || Name == "assign" ||

1340 Name == "declare" || Name == "label") {

1341

1342 NewFn = nullptr;

1343

1344 return true;

1345 }

1346 }

1347

1348

1349 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {

1352 Intrinsic::dbg_value);

1353 return true;

1354 }

1355 break;

1356 }

1357 break;

1358 case 'e':

1359 if (Name.consume_front("experimental.vector.")) {

1362

1363

1365 .StartsWith("extract.", Intrinsic::vector_extract)

1366 .StartsWith("insert.", Intrinsic::vector_insert)

1367 .StartsWith("splice.", Intrinsic::vector_splice)

1368 .StartsWith("reverse.", Intrinsic::vector_reverse)

1369 .StartsWith("interleave2.", Intrinsic::vector_interleave2)

1370 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)

1372 Intrinsic::vector_partial_reduce_add)

1375 const auto *FT = F->getFunctionType();

1377 if (ID == Intrinsic::vector_extract ||

1378 ID == Intrinsic::vector_interleave2)

1379

1380 Tys.push_back(FT->getReturnType());

1381 if (ID != Intrinsic::vector_interleave2)

1382 Tys.push_back(FT->getParamType(0));

1383 if (ID == Intrinsic::vector_insert ||

1384 ID == Intrinsic::vector_partial_reduce_add)

1385

1386 Tys.push_back(FT->getParamType(1));

1389 return true;

1390 }

1391

1392 if (Name.consume_front("reduce.")) {

1394 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");

1395 if (R.match(Name, &Groups))

1397 .Case("add", Intrinsic::vector_reduce_add)

1398 .Case("mul", Intrinsic::vector_reduce_mul)

1399 .Case("and", Intrinsic::vector_reduce_and)

1400 .Case("or", Intrinsic::vector_reduce_or)

1401 .Case("xor", Intrinsic::vector_reduce_xor)

1402 .Case("smax", Intrinsic::vector_reduce_smax)

1403 .Case("smin", Intrinsic::vector_reduce_smin)

1404 .Case("umax", Intrinsic::vector_reduce_umax)

1405 .Case("umin", Intrinsic::vector_reduce_umin)

1406 .Case("fmax", Intrinsic::vector_reduce_fmax)

1407 .Case("fmin", Intrinsic::vector_reduce_fmin)

1409

1410 bool V2 = false;

1412 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");

1414 V2 = true;

1415 if (R2.match(Name, &Groups))

1417 .Case("fadd", Intrinsic::vector_reduce_fadd)

1418 .Case("fmul", Intrinsic::vector_reduce_fmul)

1420 }

1423 auto Args = F->getFunctionType()->params();

1425 {Args[V2 ? 1 : 0]});

1426 return true;

1427 }

1428 break;

1429 }

1430 break;

1431 }

1432 if (Name.consume_front("experimental.stepvector.")) {

1436 F->getParent(), ID, F->getFunctionType()->getReturnType());

1437 return true;

1438 }

1439 break;

1440 case 'f':

1441 if (Name.starts_with("flt.rounds")) {

1444 Intrinsic::get_rounding);

1445 return true;

1446 }

1447 break;

1448 case 'i':

1449 if (Name.starts_with("invariant.group.barrier")) {

1450

1451 auto Args = F->getFunctionType()->params();

1452 Type* ObjectPtr[1] = {Args[0]};

1455 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);

1456 return true;

1457 }

1458 break;

1459 case 'l':

1460 if ((Name.starts_with("lifetime.start") ||

1461 Name.starts_with("lifetime.end")) &&

1462 F->arg_size() == 2) {

1463 Intrinsic::ID IID = Name.starts_with("lifetime.start")

1464 ? Intrinsic::lifetime_start

1465 : Intrinsic::lifetime_end;

1468 F->getArg(0)->getType());

1469 return true;

1470 }

1471 break;

1472 case 'm': {

1473

1474

1475

1477 .StartsWith("memcpy.", Intrinsic::memcpy)

1478 .StartsWith("memmove.", Intrinsic::memmove)

1480 if (F->arg_size() == 5) {

1482

1484 F->getFunctionType()->params().slice(0, 3);

1485 NewFn =

1487 return true;

1488 }

1489 }

1490 if (Name.starts_with("memset.") && F->arg_size() == 5) {

1492

1493 const auto *FT = F->getFunctionType();

1494 Type *ParamTypes[2] = {

1495 FT->getParamType(0),

1496 FT->getParamType(2)

1497 };

1499 Intrinsic::memset, ParamTypes);

1500 return true;

1501 }

1502

1503 unsigned MaskedID =

1505 .StartsWith("masked.load", Intrinsic::masked_load)

1506 .StartsWith("masked.gather", Intrinsic::masked_gather)

1507 .StartsWith("masked.store", Intrinsic::masked_store)

1508 .StartsWith("masked.scatter", Intrinsic::masked_scatter)

1510 if (MaskedID && F->arg_size() == 4) {

1512 if (MaskedID == Intrinsic::masked_load ||

1513 MaskedID == Intrinsic::masked_gather) {

1515 F->getParent(), MaskedID,

1516 {F->getReturnType(), F->getArg(0)->getType()});

1517 return true;

1518 }

1520 F->getParent(), MaskedID,

1521 {F->getArg(0)->getType(), F->getArg(1)->getType()});

1522 return true;

1523 }

1524 break;

1525 }

1526 case 'n': {

1527 if (Name.consume_front("nvvm.")) {

1528

1529 if (F->arg_size() == 1) {

1532 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)

1533 .Case("clz.i", Intrinsic::ctlz)

1534 .Case("popc.i", Intrinsic::ctpop)

1538 {F->getReturnType()});

1539 return true;

1540 }

1541 }

1542

1543

1544 if (F->getReturnType()->getScalarType()->isBFloatTy()) {

1547 NewFn = nullptr;

1548 return true;

1549 }

1550 }

1551

1552

1557 return true;

1558 }

1559

1560

1565 return true;

1566 }

1567

1568

1569

1570

1571

1572 bool Expand = false;

1573 if (Name.consume_front("abs."))

1574

1575 Expand =

1576 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";

1577 else if (Name.consume_front("fabs."))

1578

1579 Expand = Name == "f" || Name == "ftz.f" || Name == "d";

1580 else if (Name.consume_front("ex2.approx."))

1581

1582 Expand =

1583 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";

1584 else if (Name.consume_front("max.") || Name.consume_front("min."))

1585

1586 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||

1587 Name == "ui" || Name == "ull";

1588 else if (Name.consume_front("atomic.load."))

1589

1590

1597 else if (Name.consume_front("bitcast."))

1598

1599 Expand =

1600 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";

1601 else if (Name.consume_front("rotate."))

1602

1603 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";

1604 else if (Name.consume_front("ptr.gen.to."))

1605

1607 else if (Name.consume_front("ptr."))

1608

1610 else if (Name.consume_front("ldg.global."))

1611

1612 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||

1613 Name.starts_with("p."));

1614 else

1616 .Case("barrier0", true)

1617 .Case("barrier.n", true)

1618 .Case("barrier.sync.cnt", true)

1619 .Case("barrier.sync", true)

1620 .Case("barrier", true)

1621 .Case("bar.sync", true)

1622 .Case("clz.ll", true)

1623 .Case("popc.ll", true)

1624 .Case("h2f", true)

1625 .Case("swap.lo.hi.b64", true)

1626 .Case("tanh.approx.f32", true)

1628

1629 if (Expand) {

1630 NewFn = nullptr;

1631 return true;

1632 }

1633 break;

1634 }

1635 break;

1636 }

1637 case 'o':

1638 if (Name.starts_with("objectsize.")) {

1639 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };

1640 if (F->arg_size() == 2 || F->arg_size() == 3) {

1643 Intrinsic::objectsize, Tys);

1644 return true;

1645 }

1646 }

1647 break;

1648

1649 case 'p':

1650 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {

1653 F->getParent(), Intrinsic::ptr_annotation,

1654 {F->arg_begin()->getType(), F->getArg(1)->getType()});

1655 return true;

1656 }

1657 break;

1658

1659 case 'r': {

1660 if (Name.consume_front("riscv.")) {

1663 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)

1664 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)

1665 .Case("aes32esi", Intrinsic::riscv_aes32esi)

1666 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)

1669 if (F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {

1672 return true;

1673 }

1674 break;

1675 }

1676

1678 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)

1679 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)

1682 if (F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||

1683 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {

1686 return true;

1687 }

1688 break;

1689 }

1690

1692 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)

1693 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)

1694 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)

1695 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)

1696 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)

1697 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)

1700 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {

1703 return true;

1704 }

1705 break;

1706 }

1707 break;

1708 }

1709 } break;

1710

1711 case 's':

1712 if (Name == "stackprotectorcheck") {

1713 NewFn = nullptr;

1714 return true;

1715 }

1716 break;

1717

1718 case 't':

1719 if (Name == "thread.pointer") {

1721 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());

1722 return true;

1723 }

1724 break;

1725

1726 case 'v': {

1727 if (Name == "var.annotation" && F->arg_size() == 4) {

1730 F->getParent(), Intrinsic::var_annotation,

1731 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});

1732 return true;

1733 }

1734 break;

1735 }

1736

1737 case 'w':

1738 if (Name.consume_front("wasm.")) {

1741 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)

1742 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)

1743 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)

1748 F->getReturnType());

1749 return true;

1750 }

1751

1752 if (Name.consume_front("dot.i8x16.i7x16.")) {

1754 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)

1755 .Case("add.signed",

1756 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)

1761 return true;

1762 }

1763 break;

1764 }

1765 break;

1766 }

1767 break;

1768

1769 case 'x':

1771 return true;

1772 }

1773

1775 if (ST && (ST->isLiteral() || ST->isPacked()) &&

1777

1778

1779

1780

1784 auto *FT = F->getFunctionType();

1786 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());

1787 std::string Name = F->getName().str();

1789 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),

1790 Name, F->getParent());

1791

1792

1795 return true;

1796 }

1797 }

1798

1799

1801 if (Result != std::nullopt) {

1803 return true;

1804 }

1805

1806

1807

1808

1809

1810 return false;

1811}

1812

1814 bool CanUpgradeDebugIntrinsicsToRecords) {

1815 NewFn = nullptr;

1816 bool Upgraded =

1818

1819

1820 if (NewFn)

1821 F = NewFn;

1823

1826 F->setAttributes(

1828 }

1829 return Upgraded;

1830}

1831

1833 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||

1834 GV->getName() == "llvm.global_dtors")) ||

1836 return nullptr;

1838 if (!ATy)

1839 return nullptr;

1842 return nullptr;

1843

1849 unsigned N = Init->getNumOperands();

1850 std::vector<Constant *> NewCtors(N);

1851 for (unsigned i = 0; i != N; ++i) {

1854 Ctor->getAggregateElement(1),

1856 }

1858

1860 NewInit, GV->getName());

1861}

1862

1863

1864

1866 unsigned Shift) {

1868 unsigned NumElts = ResultTy->getNumElements() * 8;

1869

1870

1872 Op = Builder.CreateBitCast(Op, VecTy, "cast");

1873

1874

1876

1877

1878

1879 if (Shift < 16) {

1880 int Idxs[64];

1881

1882 for (unsigned l = 0; l != NumElts; l += 16)

1883 for (unsigned i = 0; i != 16; ++i) {

1884 unsigned Idx = NumElts + i - Shift;

1885 if (Idx < NumElts)

1886 Idx -= NumElts - 16;

1887 Idxs[l + i] = Idx + l;

1888 }

1889

1890 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));

1891 }

1892

1893

1894 return Builder.CreateBitCast(Res, ResultTy, "cast");

1895}

1896

1897

1898

1900 unsigned Shift) {

1902 unsigned NumElts = ResultTy->getNumElements() * 8;

1903

1904

1906 Op = Builder.CreateBitCast(Op, VecTy, "cast");

1907

1908

1910

1911

1912

1913 if (Shift < 16) {

1914 int Idxs[64];

1915

1916 for (unsigned l = 0; l != NumElts; l += 16)

1917 for (unsigned i = 0; i != 16; ++i) {

1918 unsigned Idx = i + Shift;

1919 if (Idx >= 16)

1920 Idx += NumElts - 16;

1921 Idxs[l + i] = Idx + l;

1922 }

1923

1924 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));

1925 }

1926

1927

1928 return Builder.CreateBitCast(Res, ResultTy, "cast");

1929}

1930

1932 unsigned NumElts) {

1935 Builder.getInt1Ty(), cast(Mask->getType())->getBitWidth());

1936 Mask = Builder.CreateBitCast(Mask, MaskTy);

1937

1938

1939

1940 if (NumElts <= 4) {

1941 int Indices[4];

1942 for (unsigned i = 0; i != NumElts; ++i)

1943 Indices[i] = i;

1944 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),

1945 "extract");

1946 }

1947

1948 return Mask;

1949}

1950

1953

1955 if (C->isAllOnesValue())

1956 return Op0;

1957

1960 return Builder.CreateSelect(Mask, Op0, Op1);

1961}

1962

1965

1967 if (C->isAllOnesValue())

1968 return Op0;

1969

1971 Mask->getType()->getIntegerBitWidth());

1972 Mask = Builder.CreateBitCast(Mask, MaskTy);

1973 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);

1974 return Builder.CreateSelect(Mask, Op0, Op1);

1975}

1976

1977

1978

1979

1983 bool IsVALIGN) {

1985

1987 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");

1988 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");

1990

1991

1992 if (IsVALIGN)

1993 ShiftVal &= (NumElts - 1);

1994

1995

1996

1997 if (ShiftVal >= 32)

1999

2000

2001

2002 if (ShiftVal > 16) {

2003 ShiftVal -= 16;

2004 Op1 = Op0;

2006 }

2007

2008 int Indices[64];

2009

2010 for (unsigned l = 0; l < NumElts; l += 16) {

2011 for (unsigned i = 0; i != 16; ++i) {

2012 unsigned Idx = ShiftVal + i;

2013 if (!IsVALIGN && Idx >= 16)

2014 Idx += NumElts - 16;

2015 Indices[l + i] = Idx + l;

2016 }

2017 }

2018

2019 Value *Align = Builder.CreateShuffleVector(

2020 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");

2021

2023}

2024

2026 bool ZeroMask, bool IndexForm) {

2029 unsigned EltWidth = Ty->getScalarSizeInBits();

2030 bool IsFloat = Ty->isFPOrFPVectorTy();

2032 if (VecWidth == 128 && EltWidth == 32 && IsFloat)

2033 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;

2034 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)

2035 IID = Intrinsic::x86_avx512_vpermi2var_d_128;

2036 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)

2037 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;

2038 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)

2039 IID = Intrinsic::x86_avx512_vpermi2var_q_128;

2040 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)

2041 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;

2042 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)

2043 IID = Intrinsic::x86_avx512_vpermi2var_d_256;

2044 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)

2045 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;

2046 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)

2047 IID = Intrinsic::x86_avx512_vpermi2var_q_256;

2048 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)

2049 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;

2050 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)

2051 IID = Intrinsic::x86_avx512_vpermi2var_d_512;

2052 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)

2053 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;

2054 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)

2055 IID = Intrinsic::x86_avx512_vpermi2var_q_512;

2056 else if (VecWidth == 128 && EltWidth == 16)

2057 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;

2058 else if (VecWidth == 256 && EltWidth == 16)

2059 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;

2060 else if (VecWidth == 512 && EltWidth == 16)

2061 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;

2062 else if (VecWidth == 128 && EltWidth == 8)

2063 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;

2064 else if (VecWidth == 256 && EltWidth == 8)

2065 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;

2066 else if (VecWidth == 512 && EltWidth == 8)

2067 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;

2068 else

2070

2073

2074

2075 if (!IndexForm)

2077

2078 Value *V = Builder.CreateIntrinsic(IID, Args);

2081 Ty);

2083}

2084

2090 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});

2091

2092 if (CI.arg_size() == 4) {

2095 Res = emitX86Select(Builder, Mask, Res, VecSrc);

2096 }

2097 return Res;

2098}

2099

2101 bool IsRotateRight) {

2105

2106

2107

2108

2109 if (Amt->getType() != Ty) {

2111 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);

2112 Amt = Builder.CreateVectorSplat(NumElts, Amt);

2113 }

2114

2115 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;

2116 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});

2117

2118 if (CI.arg_size() == 4) {

2121 Res = emitX86Select(Builder, Mask, Res, VecSrc);

2122 }

2123 return Res;

2124}

2125

2127 bool IsSigned) {

2131

2133 switch (Imm) {

2134 case 0x0:

2136 break;

2137 case 0x1:

2139 break;

2140 case 0x2:

2142 break;

2143 case 0x3:

2145 break;

2146 case 0x4:

2148 break;

2149 case 0x5:

2151 break;

2152 case 0x6:

2154 case 0x7:

2156 default:

2158 }

2159

2160 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);

2161 Value *Ext = Builder.CreateSExt(Cmp, Ty);

2162 return Ext;

2163}

2164

2166 bool IsShiftRight, bool ZeroMask) {

2171

2172 if (IsShiftRight)

2174

2175

2176

2177

2178 if (Amt->getType() != Ty) {

2180 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);

2181 Amt = Builder.CreateVectorSplat(NumElts, Amt);

2182 }

2183

2184 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;

2185 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});

2186

2187 unsigned NumArgs = CI.arg_size();

2188 if (NumArgs >= 4) {

2193 Res = emitX86Select(Builder, Mask, Res, VecSrc);

2194 }

2195 return Res;

2196}

2197

2200 const Align Alignment =

2202 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)

2204

2205

2207 if (C->isAllOnesValue())

2208 return Builder.CreateAlignedStore(Data, Ptr, Alignment);

2209

2210

2213 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);

2214}

2215

2219 const Align Alignment =

2223 8)

2225

2226

2228 if (C->isAllOnesValue())

2229 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);

2230

2231

2234 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);

2235}

2236

2240 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,

2241 {Op0, Builder.getInt1(false)});

2244 return Res;

2245}

2246

2249

2250

2253

2254 if (IsSigned) {

2255

2256 Constant *ShiftAmt = ConstantInt::get(Ty, 32);

2257 LHS = Builder.CreateShl(LHS, ShiftAmt);

2258 LHS = Builder.CreateAShr(LHS, ShiftAmt);

2259 RHS = Builder.CreateShl(RHS, ShiftAmt);

2260 RHS = Builder.CreateAShr(RHS, ShiftAmt);

2261 } else {

2262

2263 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);

2264 LHS = Builder.CreateAnd(LHS, Mask);

2265 RHS = Builder.CreateAnd(RHS, Mask);

2266 }

2267

2268 Value *Res = Builder.CreateMul(LHS, RHS);

2269

2272

2273 return Res;

2274}

2275

2276

2280 if (Mask) {

2282 if (C || C->isAllOnesValue())

2283 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));

2284 }

2285

2286 if (NumElts < 8) {

2287 int Indices[8];

2288 for (unsigned i = 0; i != NumElts; ++i)

2289 Indices[i] = i;

2290 for (unsigned i = NumElts; i != 8; ++i)

2291 Indices[i] = NumElts + i % NumElts;

2292 Vec = Builder.CreateShuffleVector(Vec,

2294 Indices);

2295 }

2296 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));

2297}

2298

2300 unsigned CC, bool Signed) {

2303

2305 if (CC == 3) {

2308 } else if (CC == 7) {

2311 } else {

2313 switch (CC) {

2321 }

2322 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));

2323 }

2324

2326

2328}

2329

2330

2337

2343

2344 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));

2345 Value* Cmp = Builder.CreateIsNotNull(AndNode);

2346 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);

2347 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);

2348 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);

2349 return Builder.CreateInsertElement(A, Select, (uint64_t)0);

2350}

2351

2357 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");

2358}

2359

2360

2363 Name = Name.substr(12);

2364

2368 if (Name.starts_with("max.p")) {

2369 if (VecWidth == 128 && EltWidth == 32)

2370 IID = Intrinsic::x86_sse_max_ps;

2371 else if (VecWidth == 128 && EltWidth == 64)

2372 IID = Intrinsic::x86_sse2_max_pd;

2373 else if (VecWidth == 256 && EltWidth == 32)

2374 IID = Intrinsic::x86_avx_max_ps_256;

2375 else if (VecWidth == 256 && EltWidth == 64)

2376 IID = Intrinsic::x86_avx_max_pd_256;

2377 else

2379 } else if (Name.starts_with("min.p")) {

2380 if (VecWidth == 128 && EltWidth == 32)

2381 IID = Intrinsic::x86_sse_min_ps;

2382 else if (VecWidth == 128 && EltWidth == 64)

2383 IID = Intrinsic::x86_sse2_min_pd;

2384 else if (VecWidth == 256 && EltWidth == 32)

2385 IID = Intrinsic::x86_avx_min_ps_256;

2386 else if (VecWidth == 256 && EltWidth == 64)

2387 IID = Intrinsic::x86_avx_min_pd_256;

2388 else

2390 } else if (Name.starts_with("pshuf.b.")) {

2391 if (VecWidth == 128)

2392 IID = Intrinsic::x86_ssse3_pshuf_b_128;

2393 else if (VecWidth == 256)

2394 IID = Intrinsic::x86_avx2_pshuf_b;

2395 else if (VecWidth == 512)

2396 IID = Intrinsic::x86_avx512_pshuf_b_512;

2397 else

2399 } else if (Name.starts_with("pmul.hr.sw.")) {

2400 if (VecWidth == 128)

2401 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;

2402 else if (VecWidth == 256)

2403 IID = Intrinsic::x86_avx2_pmul_hr_sw;

2404 else if (VecWidth == 512)

2405 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;

2406 else

2408 } else if (Name.starts_with("pmulh.w.")) {

2409 if (VecWidth == 128)

2410 IID = Intrinsic::x86_sse2_pmulh_w;

2411 else if (VecWidth == 256)

2412 IID = Intrinsic::x86_avx2_pmulh_w;

2413 else if (VecWidth == 512)

2414 IID = Intrinsic::x86_avx512_pmulh_w_512;

2415 else

2417 } else if (Name.starts_with("pmulhu.w.")) {

2418 if (VecWidth == 128)

2419 IID = Intrinsic::x86_sse2_pmulhu_w;

2420 else if (VecWidth == 256)

2421 IID = Intrinsic::x86_avx2_pmulhu_w;

2422 else if (VecWidth == 512)

2423 IID = Intrinsic::x86_avx512_pmulhu_w_512;

2424 else

2426 } else if (Name.starts_with("pmaddw.d.")) {

2427 if (VecWidth == 128)

2428 IID = Intrinsic::x86_sse2_pmadd_wd;

2429 else if (VecWidth == 256)

2430 IID = Intrinsic::x86_avx2_pmadd_wd;

2431 else if (VecWidth == 512)

2432 IID = Intrinsic::x86_avx512_pmaddw_d_512;

2433 else

2435 } else if (Name.starts_with("pmaddubs.w.")) {

2436 if (VecWidth == 128)

2437 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;

2438 else if (VecWidth == 256)

2439 IID = Intrinsic::x86_avx2_pmadd_ub_sw;

2440 else if (VecWidth == 512)

2441 IID = Intrinsic::x86_avx512_pmaddubs_w_512;

2442 else

2444 } else if (Name.starts_with("packsswb.")) {

2445 if (VecWidth == 128)

2446 IID = Intrinsic::x86_sse2_packsswb_128;

2447 else if (VecWidth == 256)

2448 IID = Intrinsic::x86_avx2_packsswb;

2449 else if (VecWidth == 512)

2450 IID = Intrinsic::x86_avx512_packsswb_512;

2451 else

2453 } else if (Name.starts_with("packssdw.")) {

2454 if (VecWidth == 128)

2455 IID = Intrinsic::x86_sse2_packssdw_128;

2456 else if (VecWidth == 256)

2457 IID = Intrinsic::x86_avx2_packssdw;

2458 else if (VecWidth == 512)

2459 IID = Intrinsic::x86_avx512_packssdw_512;

2460 else

2462 } else if (Name.starts_with("packuswb.")) {

2463 if (VecWidth == 128)

2464 IID = Intrinsic::x86_sse2_packuswb_128;

2465 else if (VecWidth == 256)

2466 IID = Intrinsic::x86_avx2_packuswb;

2467 else if (VecWidth == 512)

2468 IID = Intrinsic::x86_avx512_packuswb_512;

2469 else

2471 } else if (Name.starts_with("packusdw.")) {

2472 if (VecWidth == 128)

2473 IID = Intrinsic::x86_sse41_packusdw;

2474 else if (VecWidth == 256)

2475 IID = Intrinsic::x86_avx2_packusdw;

2476 else if (VecWidth == 512)

2477 IID = Intrinsic::x86_avx512_packusdw_512;

2478 else

2480 } else if (Name.starts_with("vpermilvar.")) {

2481 if (VecWidth == 128 && EltWidth == 32)

2482 IID = Intrinsic::x86_avx_vpermilvar_ps;

2483 else if (VecWidth == 128 && EltWidth == 64)

2484 IID = Intrinsic::x86_avx_vpermilvar_pd;

2485 else if (VecWidth == 256 && EltWidth == 32)

2486 IID = Intrinsic::x86_avx_vpermilvar_ps_256;

2487 else if (VecWidth == 256 && EltWidth == 64)

2488 IID = Intrinsic::x86_avx_vpermilvar_pd_256;

2489 else if (VecWidth == 512 && EltWidth == 32)

2490 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;

2491 else if (VecWidth == 512 && EltWidth == 64)

2492 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;

2493 else

2495 } else if (Name == "cvtpd2dq.256") {

2496 IID = Intrinsic::x86_avx_cvt_pd2dq_256;

2497 } else if (Name == "cvtpd2ps.256") {

2498 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;

2499 } else if (Name == "cvttpd2dq.256") {

2500 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;

2501 } else if (Name == "cvttps2dq.128") {

2502 IID = Intrinsic::x86_sse2_cvttps2dq;

2503 } else if (Name == "cvttps2dq.256") {

2504 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;

2505 } else if (Name.starts_with("permvar.")) {

2507 if (VecWidth == 256 && EltWidth == 32 && IsFloat)

2508 IID = Intrinsic::x86_avx2_permps;

2509 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)

2510 IID = Intrinsic::x86_avx2_permd;

2511 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)

2512 IID = Intrinsic::x86_avx512_permvar_df_256;

2513 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)

2514 IID = Intrinsic::x86_avx512_permvar_di_256;

2515 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)

2516 IID = Intrinsic::x86_avx512_permvar_sf_512;

2517 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)

2518 IID = Intrinsic::x86_avx512_permvar_si_512;

2519 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)

2520 IID = Intrinsic::x86_avx512_permvar_df_512;

2521 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)

2522 IID = Intrinsic::x86_avx512_permvar_di_512;

2523 else if (VecWidth == 128 && EltWidth == 16)

2524 IID = Intrinsic::x86_avx512_permvar_hi_128;

2525 else if (VecWidth == 256 && EltWidth == 16)

2526 IID = Intrinsic::x86_avx512_permvar_hi_256;

2527 else if (VecWidth == 512 && EltWidth == 16)

2528 IID = Intrinsic::x86_avx512_permvar_hi_512;

2529 else if (VecWidth == 128 && EltWidth == 8)

2530 IID = Intrinsic::x86_avx512_permvar_qi_128;

2531 else if (VecWidth == 256 && EltWidth == 8)

2532 IID = Intrinsic::x86_avx512_permvar_qi_256;

2533 else if (VecWidth == 512 && EltWidth == 8)

2534 IID = Intrinsic::x86_avx512_permvar_qi_512;

2535 else

2537 } else if (Name.starts_with("dbpsadbw.")) {

2538 if (VecWidth == 128)

2539 IID = Intrinsic::x86_avx512_dbpsadbw_128;

2540 else if (VecWidth == 256)

2541 IID = Intrinsic::x86_avx512_dbpsadbw_256;

2542 else if (VecWidth == 512)

2543 IID = Intrinsic::x86_avx512_dbpsadbw_512;

2544 else

2546 } else if (Name.starts_with("pmultishift.qb.")) {

2547 if (VecWidth == 128)

2548 IID = Intrinsic::x86_avx512_pmultishift_qb_128;

2549 else if (VecWidth == 256)

2550 IID = Intrinsic::x86_avx512_pmultishift_qb_256;

2551 else if (VecWidth == 512)

2552 IID = Intrinsic::x86_avx512_pmultishift_qb_512;

2553 else

2555 } else if (Name.starts_with("conflict.")) {

2556 if (Name[9] == 'd' && VecWidth == 128)

2557 IID = Intrinsic::x86_avx512_conflict_d_128;

2558 else if (Name[9] == 'd' && VecWidth == 256)

2559 IID = Intrinsic::x86_avx512_conflict_d_256;

2560 else if (Name[9] == 'd' && VecWidth == 512)

2561 IID = Intrinsic::x86_avx512_conflict_d_512;

2562 else if (Name[9] == 'q' && VecWidth == 128)

2563 IID = Intrinsic::x86_avx512_conflict_q_128;

2564 else if (Name[9] == 'q' && VecWidth == 256)

2565 IID = Intrinsic::x86_avx512_conflict_q_256;

2566 else if (Name[9] == 'q' && VecWidth == 512)

2567 IID = Intrinsic::x86_avx512_conflict_q_512;

2568 else

2570 } else if (Name.starts_with("pavg.")) {

2571 if (Name[5] == 'b' && VecWidth == 128)

2572 IID = Intrinsic::x86_sse2_pavg_b;

2573 else if (Name[5] == 'b' && VecWidth == 256)

2574 IID = Intrinsic::x86_avx2_pavg_b;

2575 else if (Name[5] == 'b' && VecWidth == 512)

2576 IID = Intrinsic::x86_avx512_pavg_b_512;

2577 else if (Name[5] == 'w' && VecWidth == 128)

2578 IID = Intrinsic::x86_sse2_pavg_w;

2579 else if (Name[5] == 'w' && VecWidth == 256)

2580 IID = Intrinsic::x86_avx2_pavg_w;

2581 else if (Name[5] == 'w' && VecWidth == 512)

2582 IID = Intrinsic::x86_avx512_pavg_w_512;

2583 else

2585 } else

2586 return false;

2587

2589 Args.pop_back();

2590 Args.pop_back();

2591 Rep = Builder.CreateIntrinsic(IID, Args);

2592 unsigned NumArgs = CI.arg_size();

2595 return true;

2596}

2597

2598

2599

2601 size_t Pos;

2602 if (AsmStr->find("mov\tfp") == 0 &&

2603 AsmStr->find("objc_retainAutoreleaseReturnValue") != std:🧵:npos &&

2604 (Pos = AsmStr->find("# marker")) != std:🧵:npos) {

2605 AsmStr->replace(Pos, 1, ";");

2606 }

2607}

2608

2611 Value *Rep = nullptr;

2612

2613 if (Name == "abs.i" || Name == "abs.ll") {

2615 Value *Neg = Builder.CreateNeg(Arg, "neg");

2616 Value *Cmp = Builder.CreateICmpSGE(

2618 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");

2619 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {

2620 Type *Ty = (Name == "abs.bf16")

2624 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);

2625 Rep = Builder.CreateBitCast(Abs, CI->getType());

2626 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {

2627 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz

2628 : Intrinsic::nvvm_fabs;

2629 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));

2630 } else if (Name.consume_front("ex2.approx.")) {

2631

2632 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz

2633 : Intrinsic::nvvm_ex2_approx;

2634 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));

2635 } else if (Name.starts_with("atomic.load.add.f32.p") ||

2636 Name.starts_with("atomic.load.add.f64.p")) {

2641 } else if (Name.starts_with("atomic.load.inc.32.p") ||

2642 Name.starts_with("atomic.load.dec.32.p")) {

2647 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),

2649 } else if (Name.consume_front("max.") &&

2650 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||

2651 Name == "ui" || Name == "ull")) {

2654 Value *Cmp = Name.starts_with("u")

2655 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")

2656 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");

2657 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");

2658 } else if (Name.consume_front("min.") &&

2659 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||

2660 Name == "ui" || Name == "ull")) {

2663 Value *Cmp = Name.starts_with("u")

2664 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")

2665 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");

2666 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");

2667 } else if (Name == "clz.ll") {

2668

2670 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},

2671 {Arg, Builder.getFalse()},

2672 nullptr, "ctlz");

2673 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");

2674 } else if (Name == "popc.ll") {

2675

2676

2678 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},

2679 Arg, nullptr, "ctpop");

2680 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");

2681 } else if (Name == "h2f") {

2682 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,

2684 nullptr, "h2f");

2685 } else if (Name.consume_front("bitcast.") &&

2686 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||

2687 Name == "d2ll")) {

2689 } else if (Name == "rotate.b32") {

2692 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,

2693 {Arg, Arg, ShiftAmt});

2694 } else if (Name == "rotate.b64") {

2697 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);

2698 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,

2699 {Arg, Arg, ZExtShiftAmt});

2700 } else if (Name == "rotate.right.b64") {

2703 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);

2704 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,

2705 {Arg, Arg, ZExtShiftAmt});

2706 } else if (Name == "swap.lo.hi.b64") {

2709 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,

2710 {Arg, Arg, Builder.getInt64(32)});

2711 } else if ((Name.consume_front("ptr.gen.to.") &&

2714 Name.starts_with(".to.gen"))) {

2716 } else if (Name.consume_front("ldg.global")) {

2719

2720 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));

2721 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);

2723 LD->setMetadata(LLVMContext::MD_invariant_load, MD);

2724 return LD;

2725 } else if (Name == "tanh.approx.f32") {

2726

2729 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),

2730 FMF);

2731 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {

2733 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);

2734 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,

2735 {}, {Arg});

2736 } else if (Name == "barrier") {

2737 Rep = Builder.CreateIntrinsic(

2738 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},

2740 } else if (Name == "barrier.sync") {

2741 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},

2743 } else if (Name == "barrier.sync.cnt") {

2744 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},

2746 } else {

2749 F->getReturnType()->getScalarType()->isBFloatTy()) {

2753 for (size_t I = 0; I < NewFn->arg_size(); ++I) {

2757 Args.push_back(

2759 ? Builder.CreateBitCast(Arg, NewType)

2760 : Arg);

2761 }

2762 Rep = Builder.CreateCall(NewFn, Args);

2763 if (F->getReturnType()->isIntegerTy())

2764 Rep = Builder.CreateBitCast(Rep, F->getReturnType());

2765 }

2766 }

2767

2768 return Rep;

2769}

2770

2774 Value *Rep = nullptr;

2775

2776 if (Name.starts_with("sse4a.movnt.")) {

2781

2784

2785

2786

2787 Value *Extract =

2788 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");

2789

2790 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));

2791 SI->setMetadata(LLVMContext::MD_nontemporal, Node);

2792 } else if (Name.starts_with("avx.movnt.") ||

2793 Name.starts_with("avx512.storent.")) {

2798

2801

2802 StoreInst *SI = Builder.CreateAlignedStore(

2803 Arg1, Arg0,

2805 SI->setMetadata(LLVMContext::MD_nontemporal, Node);

2806 } else if (Name == "sse2.storel.dq") {

2809

2811 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");

2812 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);

2813 Builder.CreateAlignedStore(Elt, Arg0, Align(1));

2814 } else if (Name.starts_with("sse.storeu.") ||

2815 Name.starts_with("sse2.storeu.") ||

2816 Name.starts_with("avx.storeu.")) {

2819 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));

2820 } else if (Name == "avx512.mask.store.ss") {

2821 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));

2823 Mask, false);

2824 } else if (Name.starts_with("avx512.mask.store")) {

2825

2826 bool Aligned = Name[17] != 'u';

2829 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {

2830

2831

2832 bool CmpEq = Name[9] == 'e';

2835 Rep = Builder.CreateSExt(Rep, CI->getType(), "");

2836 } else if (Name.starts_with("avx512.broadcastm")) {

2842 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);

2843 Rep = Builder.CreateVectorSplat(NumElts, Rep);

2844 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {

2846 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);

2847 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);

2848 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);

2849 } else if (Name.starts_with("avx.sqrt.p") ||

2850 Name.starts_with("sse2.sqrt.p") ||

2851 Name.starts_with("sse.sqrt.p")) {

2852 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),

2853 {CI->getArgOperand(0)});

2854 } else if (Name.starts_with("avx512.mask.sqrt.p")) {

2858 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512

2859 : Intrinsic::x86_avx512_sqrt_pd_512;

2860

2862 Rep = Builder.CreateIntrinsic(IID, Args);

2863 } else {

2864 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),

2865 {CI->getArgOperand(0)});

2866 }

2867 Rep =

2869 } else if (Name.starts_with("avx512.ptestm") ||

2870 Name.starts_with("avx512.ptestnm")) {

2874 Rep = Builder.CreateAnd(Op0, Op1);

2880 Rep = Builder.CreateICmp(Pred, Rep, Zero);

2882 } else if (Name.starts_with("avx512.mask.pbroadcast")) {

2884 ->getNumElements();

2885 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));

2886 Rep =

2888 } else if (Name.starts_with("avx512.kunpck")) {

2892 int Indices[64];

2893 for (unsigned i = 0; i != NumElts; ++i)

2894 Indices[i] = i;

2895

2896

2897

2898 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));

2899 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));

2900

2901

2902 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));

2903 Rep = Builder.CreateBitCast(Rep, CI->getType());

2904 } else if (Name == "avx512.kand.w") {

2907 Rep = Builder.CreateAnd(LHS, RHS);

2908 Rep = Builder.CreateBitCast(Rep, CI->getType());

2909 } else if (Name == "avx512.kandn.w") {

2912 LHS = Builder.CreateNot(LHS);

2913 Rep = Builder.CreateAnd(LHS, RHS);

2914 Rep = Builder.CreateBitCast(Rep, CI->getType());

2915 } else if (Name == "avx512.kor.w") {

2918 Rep = Builder.CreateOr(LHS, RHS);

2919 Rep = Builder.CreateBitCast(Rep, CI->getType());

2920 } else if (Name == "avx512.kxor.w") {

2923 Rep = Builder.CreateXor(LHS, RHS);

2924 Rep = Builder.CreateBitCast(Rep, CI->getType());

2925 } else if (Name == "avx512.kxnor.w") {

2928 LHS = Builder.CreateNot(LHS);

2929 Rep = Builder.CreateXor(LHS, RHS);

2930 Rep = Builder.CreateBitCast(Rep, CI->getType());

2931 } else if (Name == "avx512.knot.w") {

2933 Rep = Builder.CreateNot(Rep);

2934 Rep = Builder.CreateBitCast(Rep, CI->getType());

2935 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {

2938 Rep = Builder.CreateOr(LHS, RHS);

2939 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());

2941 if (Name[14] == 'c')

2943 else

2945 Rep = Builder.CreateICmpEQ(Rep, C);

2946 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());

2947 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||

2948 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||

2949 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||

2950 Name == "sse.div.ss" || Name == "sse2.div.sd") {

2953 ConstantInt::get(I32Ty, 0));

2955 ConstantInt::get(I32Ty, 0));

2957 if (Name.contains(".add."))

2958 EltOp = Builder.CreateFAdd(Elt0, Elt1);

2959 else if (Name.contains(".sub."))

2960 EltOp = Builder.CreateFSub(Elt0, Elt1);

2961 else if (Name.contains(".mul."))

2962 EltOp = Builder.CreateFMul(Elt0, Elt1);

2963 else

2964 EltOp = Builder.CreateFDiv(Elt0, Elt1);

2965 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,

2966 ConstantInt::get(I32Ty, 0));

2967 } else if (Name.starts_with("avx512.mask.pcmp")) {

2968

2969 bool CmpEq = Name[16] == 'e';

2971 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {

2975 switch (VecWidth) {

2976 default:

2978 case 128:

2979 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;

2980 break;

2981 case 256:

2982 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;

2983 break;

2984 case 512:

2985 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;

2986 break;

2987 }

2988

2989 Rep =

2992 } else if (Name.starts_with("avx512.mask.fpclass.p")) {

2997 if (VecWidth == 128 && EltWidth == 32)

2998 IID = Intrinsic::x86_avx512_fpclass_ps_128;

2999 else if (VecWidth == 256 && EltWidth == 32)

3000 IID = Intrinsic::x86_avx512_fpclass_ps_256;

3001 else if (VecWidth == 512 && EltWidth == 32)

3002 IID = Intrinsic::x86_avx512_fpclass_ps_512;

3003 else if (VecWidth == 128 && EltWidth == 64)

3004 IID = Intrinsic::x86_avx512_fpclass_pd_128;

3005 else if (VecWidth == 256 && EltWidth == 64)

3006 IID = Intrinsic::x86_avx512_fpclass_pd_256;

3007 else if (VecWidth == 512 && EltWidth == 64)

3008 IID = Intrinsic::x86_avx512_fpclass_pd_512;

3009 else

3011

3012 Rep =

3015 } else if (Name.starts_with("avx512.cmp.p")) {

3017 Type *OpTy = Args[0]->getType();

3021 if (VecWidth == 128 && EltWidth == 32)

3022 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;

3023 else if (VecWidth == 256 && EltWidth == 32)

3024 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;

3025 else if (VecWidth == 512 && EltWidth == 32)

3026 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;

3027 else if (VecWidth == 128 && EltWidth == 64)

3028 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;

3029 else if (VecWidth == 256 && EltWidth == 64)

3030 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;

3031 else if (VecWidth == 512 && EltWidth == 64)

3032 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;

3033 else

3035

3037 if (VecWidth == 512)

3039 Args.push_back(Mask);

3040

3041 Rep = Builder.CreateIntrinsic(IID, Args);

3042 } else if (Name.starts_with("avx512.mask.cmp.")) {

3043

3046 } else if (Name.starts_with("avx512.mask.ucmp.")) {

3049 } else if (Name.starts_with("avx512.cvtb2mask.") ||

3050 Name.starts_with("avx512.cvtw2mask.") ||

3051 Name.starts_with("avx512.cvtd2mask.") ||

3052 Name.starts_with("avx512.cvtq2mask.")) {

3057 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||

3058 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||

3059 Name.starts_with("avx512.mask.pabs")) {

3061 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||

3062 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||

3063 Name.starts_with("avx512.mask.pmaxs")) {

3065 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||

3066 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||

3067 Name.starts_with("avx512.mask.pmaxu")) {

3069 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||

3070 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||

3071 Name.starts_with("avx512.mask.pmins")) {

3073 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||

3074 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||

3075 Name.starts_with("avx512.mask.pminu")) {

3077 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||

3078 Name == "avx512.pmulu.dq.512" ||

3079 Name.starts_with("avx512.mask.pmulu.dq.")) {

3080 Rep = upgradePMULDQ(Builder, *CI, false);

3081 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||

3082 Name == "avx512.pmul.dq.512" ||

3083 Name.starts_with("avx512.mask.pmul.dq.")) {

3084 Rep = upgradePMULDQ(Builder, *CI, true);

3085 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||

3086 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {

3087 Rep =

3091 } else if (Name == "avx512.cvtusi2sd") {

3092 Rep =

3096 } else if (Name == "sse2.cvtss2sd") {

3098 Rep = Builder.CreateFPExt(

3101 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||

3102 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||

3103 Name.starts_with("avx512.mask.cvtdq2pd.") ||

3104 Name.starts_with("avx512.mask.cvtudq2pd.") ||

3105 Name.starts_with("avx512.mask.cvtdq2ps.") ||

3106 Name.starts_with("avx512.mask.cvtudq2ps.") ||

3107 Name.starts_with("avx512.mask.cvtqq2pd.") ||

3108 Name.starts_with("avx512.mask.cvtuqq2pd.") ||

3109 Name == "avx512.mask.cvtqq2ps.256" ||

3110 Name == "avx512.mask.cvtqq2ps.512" ||

3111 Name == "avx512.mask.cvtuqq2ps.256" ||

3112 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||

3113 Name == "avx.cvt.ps2.pd.256" ||

3114 Name == "avx512.mask.cvtps2pd.128" ||

3115 Name == "avx512.mask.cvtps2pd.256") {

3119

3120 unsigned NumDstElts = DstTy->getNumElements();

3122 assert(NumDstElts == 2 && "Unexpected vector size");

3123 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef{0, 1});

3124 }

3125

3126 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();

3127 bool IsUnsigned = Name.contains("cvtu");

3128 if (IsPS2PD)

3129 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");

3130 else if (CI->arg_size() == 4 &&

3133 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round

3134 : Intrinsic::x86_avx512_sitofp_round;

3135 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},

3137 } else {

3138 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")

3139 : Builder.CreateSIToFP(Rep, DstTy, "cvt");

3140 }

3141

3145 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||

3146 Name.starts_with("vcvtph2ps.")) {

3150 unsigned NumDstElts = DstTy->getNumElements();

3151 if (NumDstElts != SrcTy->getNumElements()) {

3152 assert(NumDstElts == 4 && "Unexpected vector size");

3153 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef{0, 1, 2, 3});

3154 }

3155 Rep = Builder.CreateBitCast(

3157 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");

3161 } else if (Name.starts_with("avx512.mask.load")) {

3162

3163 bool Aligned = Name[16] != 'u';

3166 } else if (Name.starts_with("avx512.mask.expand.load.")) {

3169 ResultTy->getNumElements());

3170

3171 Rep = Builder.CreateIntrinsic(

3172 Intrinsic::masked_expandload, ResultTy,

3174 } else if (Name.starts_with("avx512.mask.compress.store.")) {

3176 Value *MaskVec =

3179

3180 Rep = Builder.CreateIntrinsic(

3181 Intrinsic::masked_compressstore, ResultTy,

3183 } else if (Name.starts_with("avx512.mask.compress.") ||

3184 Name.starts_with("avx512.mask.expand.")) {

3186

3188 ResultTy->getNumElements());

3189

3190 bool IsCompress = Name[12] == 'c';

3191 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress

3192 : Intrinsic::x86_avx512_mask_expand;

3193 Rep = Builder.CreateIntrinsic(

3195 } else if (Name.starts_with("xop.vpcom")) {

3196 bool IsSigned;

3197 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||

3198 Name.ends_with("uq"))

3199 IsSigned = false;

3200 else if (Name.ends_with("b") || Name.ends_with("w") ||

3201 Name.ends_with("d") || Name.ends_with("q"))

3202 IsSigned = true;

3203 else

3205

3206 unsigned Imm;

3209 } else {

3210 Name = Name.substr(9);

3211 if (Name.starts_with("lt"))

3212 Imm = 0;

3213 else if (Name.starts_with("le"))

3214 Imm = 1;

3215 else if (Name.starts_with("gt"))

3216 Imm = 2;

3217 else if (Name.starts_with("ge"))

3218 Imm = 3;

3219 else if (Name.starts_with("eq"))

3220 Imm = 4;

3221 else if (Name.starts_with("ne"))

3222 Imm = 5;

3223 else if (Name.starts_with("false"))

3224 Imm = 6;

3225 else if (Name.starts_with("true"))

3226 Imm = 7;

3227 else

3229 }

3230

3232 } else if (Name.starts_with("xop.vpcmov")) {

3234 Value *NotSel = Builder.CreateNot(Sel);

3237 Rep = Builder.CreateOr(Sel0, Sel1);

3238 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||

3239 Name.starts_with("avx512.mask.prol")) {

3241 } else if (Name.starts_with("avx512.pror") ||

3242 Name.starts_with("avx512.mask.pror")) {

3244 } else if (Name.starts_with("avx512.vpshld.") ||

3245 Name.starts_with("avx512.mask.vpshld") ||

3246 Name.starts_with("avx512.maskz.vpshld")) {

3247 bool ZeroMask = Name[11] == 'z';

3249 } else if (Name.starts_with("avx512.vpshrd.") ||

3250 Name.starts_with("avx512.mask.vpshrd") ||

3251 Name.starts_with("avx512.maskz.vpshrd")) {

3252 bool ZeroMask = Name[11] == 'z';

3254 } else if (Name == "sse42.crc32.64.8") {

3257 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,

3259 Rep = Builder.CreateZExt(Rep, CI->getType(), "");

3260 } else if (Name.starts_with("avx.vbroadcast.s") ||

3261 Name.starts_with("avx512.vbroadcast.s")) {

3262

3264 Type *EltTy = VecTy->getElementType();

3265 unsigned EltNum = VecTy->getNumElements();

3269 for (unsigned I = 0; I < EltNum; ++I)

3270 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));

3271 } else if (Name.starts_with("sse41.pmovsx") ||

3272 Name.starts_with("sse41.pmovzx") ||

3273 Name.starts_with("avx2.pmovsx") ||

3274 Name.starts_with("avx2.pmovzx") ||

3275 Name.starts_with("avx512.mask.pmovsx") ||

3276 Name.starts_with("avx512.mask.pmovzx")) {

3278 unsigned NumDstElts = DstTy->getNumElements();

3279

3280

3282 for (unsigned i = 0; i != NumDstElts; ++i)

3283 ShuffleMask[i] = i;

3284

3285 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);

3286

3287 bool DoSext = Name.contains("pmovsx");

3288 Rep =

3289 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);

3290

3294 } else if (Name == "avx512.mask.pmov.qd.256" ||

3295 Name == "avx512.mask.pmov.qd.512" ||

3296 Name == "avx512.mask.pmov.wb.256" ||

3297 Name == "avx512.mask.pmov.wb.512") {

3299 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);

3300 Rep =

3302 } else if (Name.starts_with("avx.vbroadcastf128") ||

3303 Name == "avx2.vbroadcasti128") {

3304

3309 if (NumSrcElts == 2)

3310 Rep = Builder.CreateShuffleVector(Load, ArrayRef{0, 1, 0, 1});

3311 else

3312 Rep = Builder.CreateShuffleVector(Load,

3314 } else if (Name.starts_with("avx512.mask.shuf.i") ||

3315 Name.starts_with("avx512.mask.shuf.f")) {

3320 unsigned ControlBitsMask = NumLanes - 1;

3321 unsigned NumControlBits = NumLanes / 2;

3323

3324 for (unsigned l = 0; l != NumLanes; ++l) {

3325 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;

3326

3327 if (l >= NumLanes / 2)

3328 LaneMask += NumLanes;

3329 for (unsigned i = 0; i != NumElementsInLane; ++i)

3330 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);

3331 }

3332 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),

3334 Rep =

3336 } else if (Name.starts_with("avx512.mask.broadcastf") ||

3337 Name.starts_with("avx512.mask.broadcasti")) {

3339 ->getNumElements();

3340 unsigned NumDstElts =

3342

3344 for (unsigned i = 0; i != NumDstElts; ++i)

3345 ShuffleMask[i] = i % NumSrcElts;

3346

3347 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),

3349 Rep =

3351 } else if (Name.starts_with("avx2.pbroadcast") ||

3352 Name.starts_with("avx2.vbroadcast") ||

3353 Name.starts_with("avx512.pbroadcast") ||

3354 Name.starts_with("avx512.mask.broadcast.s")) {

3355

3361 Rep = Builder.CreateShuffleVector(Op, M);

3362

3366 } else if (Name.starts_with("sse2.padds.") ||

3367 Name.starts_with("avx2.padds.") ||

3368 Name.starts_with("avx512.padds.") ||

3369 Name.starts_with("avx512.mask.padds.")) {

3371 } else if (Name.starts_with("sse2.psubs.") ||

3372 Name.starts_with("avx2.psubs.") ||

3373 Name.starts_with("avx512.psubs.") ||

3374 Name.starts_with("avx512.mask.psubs.")) {

3376 } else if (Name.starts_with("sse2.paddus.") ||

3377 Name.starts_with("avx2.paddus.") ||

3378 Name.starts_with("avx512.mask.paddus.")) {

3380 } else if (Name.starts_with("sse2.psubus.") ||

3381 Name.starts_with("avx2.psubus.") ||

3382 Name.starts_with("avx512.mask.psubus.")) {

3384 } else if (Name.starts_with("avx512.mask.palignr.")) {

3388 false);

3389 } else if (Name.starts_with("avx512.mask.valign.")) {

3393 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {

3394

3397 Shift / 8);

3398 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {

3399

3402 Shift / 8);

3403 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||

3404 Name == "avx512.psll.dq.512") {

3405

3408 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||

3409 Name == "avx512.psrl.dq.512") {

3410

3413 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||

3414 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||

3415 Name.starts_with("avx2.pblendd.")) {

3420 unsigned NumElts = VecTy->getNumElements();

3421

3423 for (unsigned i = 0; i != NumElts; ++i)

3424 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;

3425

3426 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);

3427 } else if (Name.starts_with("avx.vinsertf128.") ||

3428 Name == "avx2.vinserti128" ||

3429 Name.starts_with("avx512.mask.insert")) {

3433 unsigned DstNumElts =

3435 unsigned SrcNumElts =

3437 unsigned Scale = DstNumElts / SrcNumElts;

3438

3439

3440 Imm = Imm % Scale;

3441

3442

3444 for (unsigned i = 0; i != SrcNumElts; ++i)

3445 Idxs[i] = i;

3446 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)

3447 Idxs[i] = SrcNumElts;

3448 Rep = Builder.CreateShuffleVector(Op1, Idxs);

3449

3450

3451

3452

3453

3454

3455

3456

3457

3458

3459

3460

3461

3462 for (unsigned i = 0; i != DstNumElts; ++i)

3463 Idxs[i] = i;

3464

3465 for (unsigned i = 0; i != SrcNumElts; ++i)

3466 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;

3467 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);

3468

3469

3473 } else if (Name.starts_with("avx.vextractf128.") ||

3474 Name == "avx2.vextracti128" ||

3475 Name.starts_with("avx512.mask.vextract")) {

3478 unsigned DstNumElts =

3480 unsigned SrcNumElts =

3482 unsigned Scale = SrcNumElts / DstNumElts;

3483

3484

3485 Imm = Imm % Scale;

3486

3487

3489 for (unsigned i = 0; i != DstNumElts; ++i) {

3490 Idxs[i] = i + (Imm * DstNumElts);

3491 }

3492 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);

3493

3494

3498 } else if (Name.starts_with("avx512.mask.perm.df.") ||

3499 Name.starts_with("avx512.mask.perm.di.")) {

3503 unsigned NumElts = VecTy->getNumElements();

3504

3506 for (unsigned i = 0; i != NumElts; ++i)

3507 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);

3508

3509 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);

3510

3514 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {

3515

3516

3517

3518

3519

3520

3521

3522

3524

3526 unsigned HalfSize = NumElts / 2;

3528

3529

3532

3533

3536

3537

3538 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;

3539 for (unsigned i = 0; i < HalfSize; ++i)

3540 ShuffleMask[i] = StartIndex + i;

3541

3542

3543 StartIndex = (Imm & 0x10) ? HalfSize : 0;

3544 for (unsigned i = 0; i < HalfSize; ++i)

3545 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;

3546

3547 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);

3548

3549 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||

3550 Name.starts_with("avx512.mask.vpermil.p") ||

3551 Name.starts_with("avx512.mask.pshuf.d.")) {

3555 unsigned NumElts = VecTy->getNumElements();

3556

3557 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();

3558 unsigned IdxMask = ((1 << IdxSize) - 1);

3559

3561

3562

3563

3564 for (unsigned i = 0; i != NumElts; ++i)

3565 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);

3566

3567 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);

3568

3572 } else if (Name == "sse2.pshufl.w" ||

3573 Name.starts_with("avx512.mask.pshufl.w.")) {

3577

3579 for (unsigned l = 0; l != NumElts; l += 8) {

3580 for (unsigned i = 0; i != 4; ++i)

3581 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;

3582 for (unsigned i = 4; i != 8; ++i)

3583 Idxs[i + l] = i + l;

3584 }

3585

3586 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);

3587

3591 } else if (Name == "sse2.pshufh.w" ||

3592 Name.starts_with("avx512.mask.pshufh.w.")) {

3596

3598 for (unsigned l = 0; l != NumElts; l += 8) {

3599 for (unsigned i = 0; i != 4; ++i)

3600 Idxs[i + l] = i + l;

3601 for (unsigned i = 0; i != 4; ++i)

3602 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;

3603 }

3604

3605 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);

3606

3610 } else if (Name.starts_with("avx512.mask.shuf.p")) {

3615

3617 unsigned HalfLaneElts = NumLaneElts / 2;

3618

3620 for (unsigned i = 0; i != NumElts; ++i) {

3621

3622 Idxs[i] = i - (i % NumLaneElts);

3623

3624 if ((i % NumLaneElts) >= HalfLaneElts)

3625 Idxs[i] += NumElts;

3626

3627

3628 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);

3629 }

3630

3631 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);

3632

3633 Rep =

3635 } else if (Name.starts_with("avx512.mask.movddup") ||

3636 Name.starts_with("avx512.mask.movshdup") ||

3637 Name.starts_with("avx512.mask.movsldup")) {

3641

3642 unsigned Offset = 0;

3643 if (Name.starts_with("avx512.mask.movshdup."))

3645

3647 for (unsigned l = 0; l != NumElts; l += NumLaneElts)

3648 for (unsigned i = 0; i != NumLaneElts; i += 2) {

3649 Idxs[i + l + 0] = i + l + Offset;

3650 Idxs[i + l + 1] = i + l + Offset;

3651 }

3652

3653 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);

3654

3655 Rep =

3657 } else if (Name.starts_with("avx512.mask.punpckl") ||

3658 Name.starts_with("avx512.mask.unpckl.")) {

3663

3665 for (int l = 0; l != NumElts; l += NumLaneElts)

3666 for (int i = 0; i != NumLaneElts; ++i)

3667 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);

3668

3669 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);

3670

3671 Rep =

3673 } else if (Name.starts_with("avx512.mask.punpckh") ||

3674 Name.starts_with("avx512.mask.unpckh.")) {

3679

3681 for (int l = 0; l != NumElts; l += NumLaneElts)

3682 for (int i = 0; i != NumLaneElts; ++i)

3683 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);

3684

3685 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);

3686

3687 Rep =

3689 } else if (Name.starts_with("avx512.mask.and.") ||

3690 Name.starts_with("avx512.mask.pand.")) {

3693 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),

3694 Builder.CreateBitCast(CI->getArgOperand(1), ITy));

3695 Rep = Builder.CreateBitCast(Rep, FTy);

3696 Rep =

3698 } else if (Name.starts_with("avx512.mask.andn.") ||

3699 Name.starts_with("avx512.mask.pandn.")) {

3702 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));

3703 Rep = Builder.CreateAnd(Rep,

3704 Builder.CreateBitCast(CI->getArgOperand(1), ITy));

3705 Rep = Builder.CreateBitCast(Rep, FTy);

3706 Rep =

3708 } else if (Name.starts_with("avx512.mask.or.") ||

3709 Name.starts_with("avx512.mask.por.")) {

3712 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),

3713 Builder.CreateBitCast(CI->getArgOperand(1), ITy));

3714 Rep = Builder.CreateBitCast(Rep, FTy);

3715 Rep =

3717 } else if (Name.starts_with("avx512.mask.xor.") ||

3718 Name.starts_with("avx512.mask.pxor.")) {

3721 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),

3722 Builder.CreateBitCast(CI->getArgOperand(1), ITy));

3723 Rep = Builder.CreateBitCast(Rep, FTy);

3724 Rep =

3726 } else if (Name.starts_with("avx512.mask.padd.")) {

3728 Rep =

3730 } else if (Name.starts_with("avx512.mask.psub.")) {

3732 Rep =

3734 } else if (Name.starts_with("avx512.mask.pmull.")) {

3736 Rep =

3738 } else if (Name.starts_with("avx512.mask.add.p")) {

3739 if (Name.ends_with(".512")) {

3741 if (Name[17] == 's')

3742 IID = Intrinsic::x86_avx512_add_ps_512;

3743 else

3744 IID = Intrinsic::x86_avx512_add_pd_512;

3745

3746 Rep = Builder.CreateIntrinsic(

3747 IID,

3749 } else {

3751 }

3752 Rep =

3754 } else if (Name.starts_with("avx512.mask.div.p")) {

3755 if (Name.ends_with(".512")) {

3757 if (Name[17] == 's')

3758 IID = Intrinsic::x86_avx512_div_ps_512;

3759 else

3760 IID = Intrinsic::x86_avx512_div_pd_512;

3761

3762 Rep = Builder.CreateIntrinsic(

3763 IID,

3765 } else {

3767 }

3768 Rep =

3770 } else if (Name.starts_with("avx512.mask.mul.p")) {

3771 if (Name.ends_with(".512")) {

3773 if (Name[17] == 's')

3774 IID = Intrinsic::x86_avx512_mul_ps_512;

3775 else

3776 IID = Intrinsic::x86_avx512_mul_pd_512;

3777

3778 Rep = Builder.CreateIntrinsic(

3779 IID,

3781 } else {

3783 }

3784 Rep =

3786 } else if (Name.starts_with("avx512.mask.sub.p")) {

3787 if (Name.ends_with(".512")) {

3789 if (Name[17] == 's')

3790 IID = Intrinsic::x86_avx512_sub_ps_512;

3791 else

3792 IID = Intrinsic::x86_avx512_sub_pd_512;

3793

3794 Rep = Builder.CreateIntrinsic(

3795 IID,

3797 } else {

3799 }

3800 Rep =

3802 } else if ((Name.starts_with("avx512.mask.max.p") ||

3803 Name.starts_with("avx512.mask.min.p")) &&

3804 Name.drop_front(18) == ".512") {

3805 bool IsDouble = Name[17] == 'd';

3806 bool IsMin = Name[13] == 'i';

3808 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},

3809 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};

3811

3812 Rep = Builder.CreateIntrinsic(

3813 IID,

3815 Rep =

3817 } else if (Name.starts_with("avx512.mask.lzcnt.")) {

3818 Rep =

3819 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),

3820 {CI->getArgOperand(0), Builder.getInt1(false)});

3821 Rep =

3823 } else if (Name.starts_with("avx512.mask.psll")) {

3824 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');

3825 bool IsVariable = Name[16] == 'v';

3826 char Size = Name[16] == '.' ? Name[17]

3827 : Name[17] == '.' ? Name[18]

3828 : Name[18] == '.' ? Name[19]

3829 : Name[20];

3830

3832 if (IsVariable && Name[17] != '.') {

3833 if (Size == 'd' && Name[17] == '2')

3834 IID = Intrinsic::x86_avx2_psllv_q;

3835 else if (Size == 'd' && Name[17] == '4')

3836 IID = Intrinsic::x86_avx2_psllv_q_256;

3837 else if (Size == 's' && Name[17] == '4')

3838 IID = Intrinsic::x86_avx2_psllv_d;

3839 else if (Size == 's' && Name[17] == '8')

3840 IID = Intrinsic::x86_avx2_psllv_d_256;

3841 else if (Size == 'h' && Name[17] == '8')

3842 IID = Intrinsic::x86_avx512_psllv_w_128;

3843 else if (Size == 'h' && Name[17] == '1')

3844 IID = Intrinsic::x86_avx512_psllv_w_256;

3845 else if (Name[17] == '3' && Name[18] == '2')

3846 IID = Intrinsic::x86_avx512_psllv_w_512;

3847 else

3849 } else if (Name.ends_with(".128")) {

3850 if (Size == 'd')

3851 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d

3852 : Intrinsic::x86_sse2_psll_d;

3853 else if (Size == 'q')

3854 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q

3855 : Intrinsic::x86_sse2_psll_q;

3856 else if (Size == 'w')

3857 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w

3858 : Intrinsic::x86_sse2_psll_w;

3859 else

3861 } else if (Name.ends_with(".256")) {

3862 if (Size == 'd')

3863 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d

3864 : Intrinsic::x86_avx2_psll_d;

3865 else if (Size == 'q')

3866 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q

3867 : Intrinsic::x86_avx2_psll_q;

3868 else if (Size == 'w')

3869 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w

3870 : Intrinsic::x86_avx2_psll_w;

3871 else

3873 } else {

3874 if (Size == 'd')

3875 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512

3876 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512

3877 : Intrinsic::x86_avx512_psll_d_512;

3878 else if (Size == 'q')

3879 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512

3880 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512

3881 : Intrinsic::x86_avx512_psll_q_512;

3882 else if (Size == 'w')

3883 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512

3884 : Intrinsic::x86_avx512_psll_w_512;

3885 else

3887 }

3888

3890 } else if (Name.starts_with("avx512.mask.psrl")) {

3891 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');

3892 bool IsVariable = Name[16] == 'v';

3893 char Size = Name[16] == '.' ? Name[17]

3894 : Name[17] == '.' ? Name[18]

3895 : Name[18] == '.' ? Name[19]

3896 : Name[20];

3897

3899 if (IsVariable && Name[17] != '.') {

3900 if (Size == 'd' && Name[17] == '2')

3901 IID = Intrinsic::x86_avx2_psrlv_q;

3902 else if (Size == 'd' && Name[17] == '4')

3903 IID = Intrinsic::x86_avx2_psrlv_q_256;

3904 else if (Size == 's' && Name[17] == '4')

3905 IID = Intrinsic::x86_avx2_psrlv_d;

3906 else if (Size == 's' && Name[17] == '8')

3907 IID = Intrinsic::x86_avx2_psrlv_d_256;

3908 else if (Size == 'h' && Name[17] == '8')

3909 IID = Intrinsic::x86_avx512_psrlv_w_128;

3910 else if (Size == 'h' && Name[17] == '1')

3911 IID = Intrinsic::x86_avx512_psrlv_w_256;

3912 else if (Name[17] == '3' && Name[18] == '2')

3913 IID = Intrinsic::x86_avx512_psrlv_w_512;

3914 else

3916 } else if (Name.ends_with(".128")) {

3917 if (Size == 'd')

3918 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d

3919 : Intrinsic::x86_sse2_psrl_d;

3920 else if (Size == 'q')

3921 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q

3922 : Intrinsic::x86_sse2_psrl_q;

3923 else if (Size == 'w')

3924 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w

3925 : Intrinsic::x86_sse2_psrl_w;

3926 else

3928 } else if (Name.ends_with(".256")) {

3929 if (Size == 'd')

3930 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d

3931 : Intrinsic::x86_avx2_psrl_d;

3932 else if (Size == 'q')

3933 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q

3934 : Intrinsic::x86_avx2_psrl_q;

3935 else if (Size == 'w')

3936 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w

3937 : Intrinsic::x86_avx2_psrl_w;

3938 else

3940 } else {

3941 if (Size == 'd')

3942 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512

3943 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512

3944 : Intrinsic::x86_avx512_psrl_d_512;

3945 else if (Size == 'q')

3946 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512

3947 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512

3948 : Intrinsic::x86_avx512_psrl_q_512;

3949 else if (Size == 'w')

3950 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512

3951 : Intrinsic::x86_avx512_psrl_w_512;

3952 else

3954 }

3955

3957 } else if (Name.starts_with("avx512.mask.psra")) {

3958 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');

3959 bool IsVariable = Name[16] == 'v';

3960 char Size = Name[16] == '.' ? Name[17]

3961 : Name[17] == '.' ? Name[18]

3962 : Name[18] == '.' ? Name[19]

3963 : Name[20];

3964

3966 if (IsVariable && Name[17] != '.') {

3967 if (Size == 's' && Name[17] == '4')

3968 IID = Intrinsic::x86_avx2_psrav_d;

3969 else if (Size == 's' && Name[17] == '8')

3970 IID = Intrinsic::x86_avx2_psrav_d_256;

3971 else if (Size == 'h' && Name[17] == '8')

3972 IID = Intrinsic::x86_avx512_psrav_w_128;

3973 else if (Size == 'h' && Name[17] == '1')

3974 IID = Intrinsic::x86_avx512_psrav_w_256;

3975 else if (Name[17] == '3' && Name[18] == '2')

3976 IID = Intrinsic::x86_avx512_psrav_w_512;

3977 else

3979 } else if (Name.ends_with(".128")) {

3980 if (Size == 'd')

3981 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d

3982 : Intrinsic::x86_sse2_psra_d;

3983 else if (Size == 'q')

3984 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128

3985 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128

3986 : Intrinsic::x86_avx512_psra_q_128;

3987 else if (Size == 'w')

3988 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w

3989 : Intrinsic::x86_sse2_psra_w;

3990 else

3992 } else if (Name.ends_with(".256")) {

3993 if (Size == 'd')

3994 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d

3995 : Intrinsic::x86_avx2_psra_d;

3996 else if (Size == 'q')

3997 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256

3998 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256

3999 : Intrinsic::x86_avx512_psra_q_256;

4000 else if (Size == 'w')

4001 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w

4002 : Intrinsic::x86_avx2_psra_w;

4003 else

4005 } else {

4006 if (Size == 'd')

4007 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512

4008 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512

4009 : Intrinsic::x86_avx512_psra_d_512;

4010 else if (Size == 'q')

4011 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512

4012 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512

4013 : Intrinsic::x86_avx512_psra_q_512;

4014 else if (Size == 'w')

4015 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512

4016 : Intrinsic::x86_avx512_psra_w_512;

4017 else

4019 }

4020

4022 } else if (Name.starts_with("avx512.mask.move.s")) {

4024 } else if (Name.starts_with("avx512.cvtmask2")) {

4026 } else if (Name.ends_with(".movntdqa")) {

4029

4030 LoadInst *LI = Builder.CreateAlignedLoad(

4034 Rep = LI;

4035 } else if (Name.starts_with("fma.vfmadd.") ||

4036 Name.starts_with("fma.vfmsub.") ||

4037 Name.starts_with("fma.vfnmadd.") ||

4038 Name.starts_with("fma.vfnmsub.")) {

4039 bool NegMul = Name[6] == 'n';

4040 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';

4041 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';

4042

4045

4046 if (IsScalar) {

4047 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);

4048 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);

4049 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);

4050 }

4051

4052 if (NegMul && !IsScalar)

4053 Ops[0] = Builder.CreateFNeg(Ops[0]);

4054 if (NegMul && IsScalar)

4055 Ops[1] = Builder.CreateFNeg(Ops[1]);

4056 if (NegAcc)

4057 Ops[2] = Builder.CreateFNeg(Ops[2]);

4058

4059 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);

4060

4061 if (IsScalar)

4063 } else if (Name.starts_with("fma4.vfmadd.s")) {

4066

4067 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);

4068 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);

4069 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);

4070

4071 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);

4072

4075 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||

4076 Name.starts_with("avx512.maskz.vfmadd.s") ||

4077 Name.starts_with("avx512.mask3.vfmadd.s") ||

4078 Name.starts_with("avx512.mask3.vfmsub.s") ||

4079 Name.starts_with("avx512.mask3.vfnmsub.s")) {

4080 bool IsMask3 = Name[11] == '3';

4081 bool IsMaskZ = Name[11] == 'z';

4082

4083 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);

4084 bool NegMul = Name[2] == 'n';

4085 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';

4086

4090

4091 if (NegMul && (IsMask3 || IsMaskZ))

4092 A = Builder.CreateFNeg(A);

4093 if (NegMul && !(IsMask3 || IsMaskZ))

4094 B = Builder.CreateFNeg(B);

4095 if (NegAcc)

4096 C = Builder.CreateFNeg(C);

4097

4098 A = Builder.CreateExtractElement(A, (uint64_t)0);

4099 B = Builder.CreateExtractElement(B, (uint64_t)0);

4100 C = Builder.CreateExtractElement(C, (uint64_t)0);

4101

4105

4107 if (Name.back() == 'd')

4108 IID = Intrinsic::x86_avx512_vfmadd_f64;

4109 else

4110 IID = Intrinsic::x86_avx512_vfmadd_f32;

4111 Rep = Builder.CreateIntrinsic(IID, Ops);

4112 } else {

4113 Rep = Builder.CreateFMA(A, B, C);

4114 }

4115

4117 : IsMask3 ? C

4118 : A;

4119

4120

4121

4122 if (NegAcc && IsMask3)

4123 PassThru =

4125

4127 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,

4129 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||

4130 Name.starts_with("avx512.mask.vfnmadd.p") ||

4131 Name.starts_with("avx512.mask.vfnmsub.p") ||

4132 Name.starts_with("avx512.mask3.vfmadd.p") ||

4133 Name.starts_with("avx512.mask3.vfmsub.p") ||

4134 Name.starts_with("avx512.mask3.vfnmsub.p") ||

4135 Name.starts_with("avx512.maskz.vfmadd.p")) {

4136 bool IsMask3 = Name[11] == '3';

4137 bool IsMaskZ = Name[11] == 'z';

4138

4139 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);

4140 bool NegMul = Name[2] == 'n';

4141 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';

4142

4146

4147 if (NegMul && (IsMask3 || IsMaskZ))

4148 A = Builder.CreateFNeg(A);

4149 if (NegMul && !(IsMask3 || IsMaskZ))

4150 B = Builder.CreateFNeg(B);

4151 if (NegAcc)

4152 C = Builder.CreateFNeg(C);

4153

4158

4159 if (Name[Name.size() - 5] == 's')

4160 IID = Intrinsic::x86_avx512_vfmadd_ps_512;

4161 else

4162 IID = Intrinsic::x86_avx512_vfmadd_pd_512;

4163

4164 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});

4165 } else {

4166 Rep = Builder.CreateFMA(A, B, C);

4167 }

4168

4172

4174 } else if (Name.starts_with("fma.vfmsubadd.p")) {

4178 if (VecWidth == 128 && EltWidth == 32)

4179 IID = Intrinsic::x86_fma_vfmaddsub_ps;

4180 else if (VecWidth == 256 && EltWidth == 32)

4181 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;

4182 else if (VecWidth == 128 && EltWidth == 64)

4183 IID = Intrinsic::x86_fma_vfmaddsub_pd;

4184 else if (VecWidth == 256 && EltWidth == 64)

4185 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;

4186 else

4188

4191 Ops[2] = Builder.CreateFNeg(Ops[2]);

4192 Rep = Builder.CreateIntrinsic(IID, Ops);

4193 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||

4194 Name.starts_with("avx512.mask3.vfmaddsub.p") ||

4195 Name.starts_with("avx512.maskz.vfmaddsub.p") ||

4196 Name.starts_with("avx512.mask3.vfmsubadd.p")) {

4197 bool IsMask3 = Name[11] == '3';

4198 bool IsMaskZ = Name[11] == 'z';

4199

4200 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);

4201 bool IsSubAdd = Name[3] == 's';

4204

4205 if (Name[Name.size() - 5] == 's')

4206 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;

4207 else

4208 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;

4209

4212 if (IsSubAdd)

4213 Ops[2] = Builder.CreateFNeg(Ops[2]);

4214

4215 Rep = Builder.CreateIntrinsic(IID, Ops);

4216 } else {

4218

4221

4223 CI->getModule(), Intrinsic::fma, Ops[0]->getType());

4224 Value *Odd = Builder.CreateCall(FMA, Ops);

4225 Ops[2] = Builder.CreateFNeg(Ops[2]);

4226 Value *Even = Builder.CreateCall(FMA, Ops);

4227

4228 if (IsSubAdd)

4230

4232 for (int i = 0; i != NumElts; ++i)

4233 Idxs[i] = i + (i % 2) * NumElts;

4234

4235 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);

4236 }

4237

4241

4243 } else if (Name.starts_with("avx512.mask.pternlog.") ||

4244 Name.starts_with("avx512.maskz.pternlog.")) {

4245 bool ZeroMask = Name[11] == 'z';

4249 if (VecWidth == 128 && EltWidth == 32)

4250 IID = Intrinsic::x86_avx512_pternlog_d_128;

4251 else if (VecWidth == 256 && EltWidth == 32)

4252 IID = Intrinsic::x86_avx512_pternlog_d_256;

4253 else if (VecWidth == 512 && EltWidth == 32)

4254 IID = Intrinsic::x86_avx512_pternlog_d_512;

4255 else if (VecWidth == 128 && EltWidth == 64)

4256 IID = Intrinsic::x86_avx512_pternlog_q_128;

4257 else if (VecWidth == 256 && EltWidth == 64)

4258 IID = Intrinsic::x86_avx512_pternlog_q_256;

4259 else if (VecWidth == 512 && EltWidth == 64)

4260 IID = Intrinsic::x86_avx512_pternlog_q_512;

4261 else

4263

4266 Rep = Builder.CreateIntrinsic(IID, Args);

4270 } else if (Name.starts_with("avx512.mask.vpmadd52") ||

4271 Name.starts_with("avx512.maskz.vpmadd52")) {

4272 bool ZeroMask = Name[11] == 'z';

4273 bool High = Name[20] == 'h' || Name[21] == 'h';

4276 if (VecWidth == 128 && High)

4277 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;

4278 else if (VecWidth == 256 && High)

4279 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;

4280 else if (VecWidth == 512 && High)

4281 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;

4282 else if (VecWidth == 128 && High)

4283 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;

4284 else if (VecWidth == 256 && High)

4285 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;

4286 else if (VecWidth == 512 && High)

4287 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;

4288 else

4290

4293 Rep = Builder.CreateIntrinsic(IID, Args);

4297 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||

4298 Name.starts_with("avx512.mask.vpermt2var.") ||

4299 Name.starts_with("avx512.maskz.vpermt2var.")) {

4300 bool ZeroMask = Name[11] == 'z';

4301 bool IndexForm = Name[17] == 'i';

4303 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||

4304 Name.starts_with("avx512.maskz.vpdpbusd.") ||

4305 Name.starts_with("avx512.mask.vpdpbusds.") ||

4306 Name.starts_with("avx512.maskz.vpdpbusds.")) {

4307 bool ZeroMask = Name[11] == 'z';

4308 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';

4311 if (VecWidth == 128 && !IsSaturating)

4312 IID = Intrinsic::x86_avx512_vpdpbusd_128;

4313 else if (VecWidth == 256 && !IsSaturating)

4314 IID = Intrinsic::x86_avx512_vpdpbusd_256;

4315 else if (VecWidth == 512 && !IsSaturating)

4316 IID = Intrinsic::x86_avx512_vpdpbusd_512;

4317 else if (VecWidth == 128 && IsSaturating)

4318 IID = Intrinsic::x86_avx512_vpdpbusds_128;

4319 else if (VecWidth == 256 && IsSaturating)

4320 IID = Intrinsic::x86_avx512_vpdpbusds_256;

4321 else if (VecWidth == 512 && IsSaturating)

4322 IID = Intrinsic::x86_avx512_vpdpbusds_512;

4323 else

4325

4328

4329

4330

4331

4332 if (Args[1]->getType()->isVectorTy() &&

4334 ->getElementType()

4335 ->isIntegerTy(32) &&

4336 Args[2]->getType()->isVectorTy() &&

4338 ->getElementType()

4339 ->isIntegerTy(32)) {

4340 Type *NewArgType = nullptr;

4341 if (VecWidth == 128)

4342 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);

4343 else if (VecWidth == 256)

4344 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);

4345 else if (VecWidth == 512)

4346 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);

4347 else

4349

4350 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);

4351 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);

4352 }

4353

4354 Rep = Builder.CreateIntrinsic(IID, Args);

4358 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||

4359 Name.starts_with("avx512.maskz.vpdpwssd.") ||

4360 Name.starts_with("avx512.mask.vpdpwssds.") ||

4361 Name.starts_with("avx512.maskz.vpdpwssds.")) {

4362 bool ZeroMask = Name[11] == 'z';

4363 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';

4366 if (VecWidth == 128 && !IsSaturating)

4367 IID = Intrinsic::x86_avx512_vpdpwssd_128;

4368 else if (VecWidth == 256 && !IsSaturating)

4369 IID = Intrinsic::x86_avx512_vpdpwssd_256;

4370 else if (VecWidth == 512 && !IsSaturating)

4371 IID = Intrinsic::x86_avx512_vpdpwssd_512;

4372 else if (VecWidth == 128 && IsSaturating)

4373 IID = Intrinsic::x86_avx512_vpdpwssds_128;

4374 else if (VecWidth == 256 && IsSaturating)

4375 IID = Intrinsic::x86_avx512_vpdpwssds_256;

4376 else if (VecWidth == 512 && IsSaturating)

4377 IID = Intrinsic::x86_avx512_vpdpwssds_512;

4378 else

4380

4383

4384

4385

4386

4387 if (Args[1]->getType()->isVectorTy() &&

4389 ->getElementType()

4390 ->isIntegerTy(32) &&

4391 Args[2]->getType()->isVectorTy() &&

4393 ->getElementType()

4394 ->isIntegerTy(32)) {

4395 Type *NewArgType = nullptr;

4396 if (VecWidth == 128)

4397 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);

4398 else if (VecWidth == 256)

4399 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);

4400 else if (VecWidth == 512)

4401 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);

4402 else

4404

4405 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);

4406 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);

4407 }

4408

4409 Rep = Builder.CreateIntrinsic(IID, Args);

4413 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||

4414 Name == "addcarry.u32" || Name == "addcarry.u64" ||

4415 Name == "subborrow.u32" || Name == "subborrow.u64") {

4417 if (Name[0] == 'a' && Name.back() == '2')

4418 IID = Intrinsic::x86_addcarry_32;

4419 else if (Name[0] == 'a' && Name.back() == '4')

4420 IID = Intrinsic::x86_addcarry_64;

4421 else if (Name[0] == 's' && Name.back() == '2')

4422 IID = Intrinsic::x86_subborrow_32;

4423 else if (Name[0] == 's' && Name.back() == '4')

4424 IID = Intrinsic::x86_subborrow_64;

4425 else

4427

4428

4431 Value *NewCall = Builder.CreateIntrinsic(IID, Args);

4432

4433

4434 Value *Data = Builder.CreateExtractValue(NewCall, 1);

4436

4437 Value *CF = Builder.CreateExtractValue(NewCall, 0);

4438

4440 Rep = nullptr;

4441 } else if (Name.starts_with("avx512.mask.") &&

4443

4444 }

4445

4446 return Rep;

4447}

4448

4451 if (Name.starts_with("neon.bfcvt")) {

4452 if (Name.starts_with("neon.bfcvtn2")) {

4454 std::iota(LoMask.begin(), LoMask.end(), 0);

4456 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);

4457 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);

4460 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);

4461 } else if (Name.starts_with("neon.bfcvtn")) {

4463 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);

4464 Type *V4BF16 =

4466 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);

4467 dbgs() << "Trunc: " << *Trunc << "\n";

4468 return Builder.CreateShuffleVector(

4470 } else {

4471 return Builder.CreateFPTrunc(CI->getOperand(0),

4473 }

4474 } else if (Name.starts_with("sve.fcvt")) {

4477 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)

4478 .Case("sve.fcvtnt.bf16f32",

4479 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)

4483

4485

4486

4487

4490

4491 if (Args[1]->getType() != BadPredTy)

4493

4494 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,

4495 BadPredTy, Args[1]);

4496 Args[1] = Builder.CreateIntrinsic(

4497 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);

4498

4499 return Builder.CreateIntrinsic(NewID, Args, nullptr,

4501 }

4502

4504}

4505

4508 if (Name == "mve.vctp64.old") {

4509

4510

4511 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},

4513 nullptr, CI->getName());

4514 Value *C1 = Builder.CreateIntrinsic(

4515 Intrinsic::arm_mve_pred_v2i,

4516 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);

4517 return Builder.CreateIntrinsic(

4518 Intrinsic::arm_mve_pred_i2v,

4520 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||

4521 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||

4522 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||

4523 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||

4524 Name ==

4525 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||

4526 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||

4527 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||

4528 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||

4529 Name ==

4530 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||

4531 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||

4532 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||

4533 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||

4534 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||

4535 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||

4536 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||

4537 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {

4538 std::vector<Type *> Tys;

4541 switch (ID) {

4542 case Intrinsic::arm_mve_mull_int_predicated:

4543 case Intrinsic::arm_mve_vqdmull_predicated:

4544 case Intrinsic::arm_mve_vldr_gather_base_predicated:

4546 break;

4547 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:

4548 case Intrinsic::arm_mve_vstr_scatter_base_predicated:

4549 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:

4551 V2I1Ty};

4552 break;

4553 case Intrinsic::arm_mve_vldr_gather_offset_predicated:

4556 break;

4557 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:

4560 break;

4561 case Intrinsic::arm_cde_vcx1q_predicated:

4562 case Intrinsic::arm_cde_vcx1qa_predicated:

4563 case Intrinsic::arm_cde_vcx2q_predicated:

4564 case Intrinsic::arm_cde_vcx2qa_predicated:

4565 case Intrinsic::arm_cde_vcx3q_predicated:

4566 case Intrinsic::arm_cde_vcx3qa_predicated:

4568 break;

4569 default:

4571 }

4572

4573 std::vector<Value *> Ops;

4575 Type *Ty = Op->getType();

4576 if (Ty->getScalarSizeInBits() == 1) {

4577 Value *C1 = Builder.CreateIntrinsic(

4578 Intrinsic::arm_mve_pred_v2i,

4580 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);

4581 }

4582 Ops.push_back(Op);

4583 }

4584

4585 return Builder.CreateIntrinsic(ID, Tys, Ops, nullptr,

4587 }

4588 llvm_unreachable("Unknown function for ARM CallBase upgrade.");

4589}

4590

4591

4592

4593

4594

4595

4613

4615 if (NumOperands < 3)

4616 return nullptr;

4617

4620 if (!PtrTy)

4621 return nullptr;

4622

4625 return nullptr;

4626

4628 bool IsVolatile = false;

4629

4630

4631

4632 if (NumOperands > 3)

4634

4635

4636

4637 if (NumOperands > 5) {

4639 IsVolatile = !VolatileArg || !VolatileArg->isZero();

4640 }

4641

4647

4649

4650

4653 if (VT->getElementType()->isIntegerTy(16)) {

4656 Val = Builder.CreateBitCast(Val, AsBF16);

4657 }

4658 }

4659

4660

4661

4662 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");

4664 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);

4665

4666 unsigned AddrSpace = PtrTy->getAddressSpace();

4669 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);

4671 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);

4672 }

4673

4676 MDNode *RangeNotPrivate =

4679 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);

4680 }

4681

4682 if (IsVolatile)

4684

4685 return Builder.CreateBitCast(RMW, RetTy);

4686}

4687

4688

4689

4690

4695 Metadata *MD = MAV->getMetadata();

4697 }

4698 }

4699 return nullptr;

4700}

4701

4702

4706 return MAV->getMetadata();

4707 return nullptr;

4708}

4709

4711

4712

4713 return I->getDebugLoc().getAsMDNode();

4714}

4715

4716

4717

4718

4721 if (Name == "label") {

4724 } else if (Name == "assign") {

4729

4731 } else if (Name == "declare") {

4736 } else if (Name == "addr") {

4737

4739

4740

4743 }

4746 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,

4748 } else if (Name == "value") {

4749

4750 unsigned VarOp = 1;

4751 unsigned ExprOp = 2;

4754

4756 return;

4757 VarOp = 2;

4758 ExprOp = 3;

4759 }

4764 }

4765 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");

4767}

4768

4769

4770

4772

4773

4774

4776 if (F)

4777 return;

4778

4782

4783 if (!NewFn) {

4784

4786

4787 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");

4788 Name = Name.substr(5);

4789

4790 bool IsX86 = Name.consume_front("x86.");

4791 bool IsNVVM = Name.consume_front("nvvm.");

4792 bool IsAArch64 = Name.consume_front("aarch64.");

4793 bool IsARM = Name.consume_front("arm.");

4794 bool IsAMDGCN = Name.consume_front("amdgcn.");

4795 bool IsDbg = Name.consume_front("dbg.");

4796 Value *Rep = nullptr;

4797

4798 if (!IsX86 && Name == "stackprotectorcheck") {

4799 Rep = nullptr;

4800 } else if (IsNVVM) {

4802 } else if (IsX86) {

4804 } else if (IsAArch64) {

4806 } else if (IsARM) {

4808 } else if (IsAMDGCN) {

4810 } else if (IsDbg) {

4812 } else {

4814 }

4815

4816 if (Rep)

4819 return;

4820 }

4821

4822 const auto &DefaultCase = [&]() -> void {

4823 if (F == NewFn)

4824 return;

4825

4827

4830 "Unknown function for CallBase upgrade and isn't just a name change");

4832 return;

4833 }

4834

4835

4838 "Return type must have changed");

4839 assert(OldST->getNumElements() ==

4841 "Must have same number of elements");

4842

4844 CallInst *NewCI = Builder.CreateCall(NewFn, Args);

4847 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {

4848 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);

4849 Res = Builder.CreateInsertValue(Res, Elem, Idx);

4850 }

4853 return;

4854 }

4855

4856

4857

4860 return;

4861 };

4862 CallInst *NewCall = nullptr;

4864 default: {

4865 DefaultCase();

4866 return;

4867 }

4868 case Intrinsic::arm_neon_vst1:

4869 case Intrinsic::arm_neon_vst2:

4870 case Intrinsic::arm_neon_vst3:

4871 case Intrinsic::arm_neon_vst4:

4872 case Intrinsic::arm_neon_vst2lane:

4873 case Intrinsic::arm_neon_vst3lane:

4874 case Intrinsic::arm_neon_vst4lane: {

4876 NewCall = Builder.CreateCall(NewFn, Args);

4877 break;

4878 }

4879 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:

4880 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:

4881 case Intrinsic::aarch64_sve_bfdot_lane_v2: {

4882 LLVMContext &Ctx = F->getParent()->getContext();

4886 NewCall = Builder.CreateCall(NewFn, Args);

4887 break;

4888 }

4889 case Intrinsic::aarch64_sve_ld3_sret:

4890 case Intrinsic::aarch64_sve_ld4_sret:

4891 case Intrinsic::aarch64_sve_ld2_sret: {

4893 Name = Name.substr(5);

4900 unsigned MinElts = RetTy->getMinNumElements() / N;

4902 Value *NewLdCall = Builder.CreateCall(NewFn, Args);

4904 for (unsigned I = 0; I < N; I++) {

4905 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);

4906 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);

4907 }

4909 break;

4910 }

4911

4912 case Intrinsic::coro_end: {

4915 NewCall = Builder.CreateCall(NewFn, Args);

4916 break;

4917 }

4918

4919 case Intrinsic::vector_extract: {

4921 Name = Name.substr(5);

4922 if (!Name.starts_with("aarch64.sve.tuple.get")) {

4923 DefaultCase();

4924 return;

4925 }

4927 unsigned MinElts = RetTy->getMinNumElements();

4930 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});

4931 break;

4932 }

4933

4934 case Intrinsic::vector_insert: {

4936 Name = Name.substr(5);

4937 if (!Name.starts_with("aarch64.sve.tuple")) {

4938 DefaultCase();

4939 return;

4940 }

4941 if (Name.starts_with("aarch64.sve.tuple.set")) {

4946 NewCall = Builder.CreateCall(

4948 break;

4949 }

4950 if (Name.starts_with("aarch64.sve.tuple.create")) {

4952 .StartsWith("aarch64.sve.tuple.create2", 2)

4953 .StartsWith("aarch64.sve.tuple.create3", 3)

4954 .StartsWith("aarch64.sve.tuple.create4", 4)

4956 assert(N > 1 && "Create is expected to be between 2-4");

4959 unsigned MinElts = RetTy->getMinNumElements() / N;

4960 for (unsigned I = 0; I < N; I++) {

4962 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);

4963 }

4965 }

4966 break;

4967 }

4968

4969 case Intrinsic::arm_neon_bfdot:

4970 case Intrinsic::arm_neon_bfmmla:

4971 case Intrinsic::arm_neon_bfmlalb:

4972 case Intrinsic::arm_neon_bfmlalt:

4973 case Intrinsic::aarch64_neon_bfdot:

4974 case Intrinsic::aarch64_neon_bfmmla:

4975 case Intrinsic::aarch64_neon_bfmlalb:

4976 case Intrinsic::aarch64_neon_bfmlalt: {

4979 "Mismatch between function args and call args");

4980 size_t OperandWidth =

4982 assert((OperandWidth == 64 || OperandWidth == 128) &&

4983 "Unexpected operand width");

4985 auto Iter = CI->args().begin();

4986 Args.push_back(*Iter++);

4987 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));

4988 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));

4989 NewCall = Builder.CreateCall(NewFn, Args);

4990 break;

4991 }

4992

4993 case Intrinsic::bitreverse:

4994 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});

4995 break;

4996

4997 case Intrinsic::ctlz:

4998 case Intrinsic::cttz:

5000 "Mismatch between function args and call args");

5001 NewCall =

5002 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});

5003 break;

5004

5005 case Intrinsic::objectsize: {

5006 Value *NullIsUnknownSize =

5010 NewCall = Builder.CreateCall(

5012 break;

5013 }

5014

5015 case Intrinsic::ctpop:

5016 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});

5017 break;

5018

5019 case Intrinsic::convert_from_fp16:

5020 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});

5021 break;

5022

5023 case Intrinsic::dbg_value: {

5025 Name = Name.substr(5);

5026

5027 if (Name.starts_with("dbg.addr")) {

5031 NewCall =

5034 break;

5035 }

5036

5037

5039

5041 if (Offset->isZeroValue()) {

5042 NewCall = Builder.CreateCall(

5043 NewFn,

5045 break;

5046 }

5048 return;

5049 }

5050

5051 case Intrinsic::ptr_annotation:

5052

5054 DefaultCase();

5055 return;

5056 }

5057

5058

5059 NewCall = Builder.CreateCall(

5060 NewFn,

5066 return;

5067

5068 case Intrinsic::var_annotation:

5069

5071 DefaultCase();

5072 return;

5073 }

5074

5075 NewCall = Builder.CreateCall(

5076 NewFn,

5082 return;

5083

5084 case Intrinsic::riscv_aes32dsi:

5085 case Intrinsic::riscv_aes32dsmi:

5086 case Intrinsic::riscv_aes32esi:

5087 case Intrinsic::riscv_aes32esmi:

5088 case Intrinsic::riscv_sm4ks:

5089 case Intrinsic::riscv_sm4ed: {

5090

5091

5094 return;

5095

5099 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());

5100 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());

5101 }

5102

5105

5106 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});

5107 Value *Res = NewCall;

5109 Res = Builder.CreateIntCast(NewCall, CI->getType(), true);

5113 return;

5114 }

5115 case Intrinsic::nvvm_mapa_shared_cluster: {

5116

5117 NewCall =

5119 Value *Res = NewCall;

5120 Res = Builder.CreateAddrSpaceCast(

5125 return;

5126 }

5127 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:

5128 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {

5129

5131 Args[0] = Builder.CreateAddrSpaceCast(

5133

5134 NewCall = Builder.CreateCall(NewFn, Args);

5138 return;

5139 }

5140 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:

5141 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:

5142 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:

5143 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:

5144 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:

5145 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:

5146 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:

5147 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {

5149

5150

5151

5154 Args[0] = Builder.CreateAddrSpaceCast(

5156

5157

5158

5159

5160 size_t NumArgs = CI->arg_size();

5163 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));

5164

5165 NewCall = Builder.CreateCall(NewFn, Args);

5169 return;

5170 }

5171 case Intrinsic::riscv_sha256sig0:

5172 case Intrinsic::riscv_sha256sig1:

5173 case Intrinsic::riscv_sha256sum0:

5174 case Intrinsic::riscv_sha256sum1:

5175 case Intrinsic::riscv_sm3p0:

5176 case Intrinsic::riscv_sm3p1: {

5177

5178

5180 return;

5181

5183 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());

5184

5185 NewCall = Builder.CreateCall(NewFn, Arg);

5187 Builder.CreateIntCast(NewCall, CI->getType(), true);

5191 return;

5192 }

5193

5194 case Intrinsic::x86_xop_vfrcz_ss:

5195 case Intrinsic::x86_xop_vfrcz_sd:

5196 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});

5197 break;

5198

5199 case Intrinsic::x86_xop_vpermil2pd:

5200 case Intrinsic::x86_xop_vpermil2ps:

5201 case Intrinsic::x86_xop_vpermil2pd_256:

5202 case Intrinsic::x86_xop_vpermil2ps_256: {

5206 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);

5207 NewCall = Builder.CreateCall(NewFn, Args);

5208 break;

5209 }

5210

5211 case Intrinsic::x86_sse41_ptestc:

5212 case Intrinsic::x86_sse41_ptestz:

5213 case Intrinsic::x86_sse41_ptestnzc: {

5214

5215

5216

5217

5220 return;

5221

5222

5224

5226

5227 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");

5228 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");

5229

5230 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});

5231 break;

5232 }

5233

5234 case Intrinsic::x86_rdtscp: {

5235

5236

5238 return;

5239

5240 NewCall = Builder.CreateCall(NewFn);

5241

5242 Value *Data = Builder.CreateExtractValue(NewCall, 1);

5244

5245 Value *TSC = Builder.CreateExtractValue(NewCall, 0);

5246

5250 return;

5251 }

5252

5253 case Intrinsic::x86_sse41_insertps:

5254 case Intrinsic::x86_sse41_dppd:

5255 case Intrinsic::x86_sse41_dpps:

5256 case Intrinsic::x86_sse41_mpsadbw:

5257 case Intrinsic::x86_avx_dp_ps_256:

5258 case Intrinsic::x86_avx2_mpsadbw: {

5259

5260

5262

5263

5264 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");

5265 NewCall = Builder.CreateCall(NewFn, Args);

5266 break;

5267 }

5268

5269 case Intrinsic::x86_avx512_mask_cmp_pd_128:

5270 case Intrinsic::x86_avx512_mask_cmp_pd_256:

5271 case Intrinsic::x86_avx512_mask_cmp_pd_512:

5272 case Intrinsic::x86_avx512_mask_cmp_ps_128:

5273 case Intrinsic::x86_avx512_mask_cmp_ps_256:

5274 case Intrinsic::x86_avx512_mask_cmp_ps_512: {

5276 unsigned NumElts =

5278 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);

5279

5280 NewCall = Builder.CreateCall(NewFn, Args);

5282

5286 return;

5287 }

5288

5289 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:

5290 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:

5291 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:

5292 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:

5293 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:

5294 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {

5298 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)

5299 Args[1] = Builder.CreateBitCast(

5301

5302 NewCall = Builder.CreateCall(NewFn, Args);

5303 Value *Res = Builder.CreateBitCast(

5305

5309 return;

5310 }

5311 case Intrinsic::x86_avx512bf16_dpbf16ps_128:

5312 case Intrinsic::x86_avx512bf16_dpbf16ps_256:

5313 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{

5315 unsigned NumElts =

5317 Args[1] = Builder.CreateBitCast(

5319 Args[2] = Builder.CreateBitCast(

5321

5322 NewCall = Builder.CreateCall(NewFn, Args);

5323 break;

5324 }

5325

5326 case Intrinsic::thread_pointer: {

5327 NewCall = Builder.CreateCall(NewFn, {});

5328 break;

5329 }

5330

5331 case Intrinsic::memcpy:

5332 case Intrinsic::memmove:

5333 case Intrinsic::memset: {

5334

5335

5336

5337

5338

5339

5340

5342 DefaultCase();

5343 return;

5344 }

5345

5346

5349 NewCall = Builder.CreateCall(NewFn, Args);

5351 AttributeList NewAttrs = AttributeList::get(

5352 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),

5353 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),

5354 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});

5357

5359 MemCI->setDestAlignment(Align->getMaybeAlignValue());

5360

5362 MTI->setSourceAlignment(Align->getMaybeAlignValue());

5363 break;

5364 }

5365

5366 case Intrinsic::masked_load:

5367 case Intrinsic::masked_gather:

5368 case Intrinsic::masked_store:

5369 case Intrinsic::masked_scatter: {

5371 DefaultCase();

5372 return;

5373 }

5374

5375 auto GetMaybeAlign = [](Value *Op) {

5377 uint64_t Val = CI->getZExtValue();

5378 if (Val == 0)

5382 }

5384 };

5385 auto GetAlign = [&](Value *Op) {

5390 };

5391

5394 case Intrinsic::masked_load:

5395 NewCall = Builder.CreateMaskedLoad(

5398 break;

5399 case Intrinsic::masked_gather:

5400 NewCall = Builder.CreateMaskedGather(

5402 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),

5405 break;

5406 case Intrinsic::masked_store:

5407 NewCall = Builder.CreateMaskedStore(

5410 break;

5411 case Intrinsic::masked_scatter:

5412 NewCall = Builder.CreateMaskedScatter(

5414 DL.getValueOrABITypeAlignment(

5418 break;

5419 default:

5421 }

5422

5425 break;

5426 }

5427

5428 case Intrinsic::lifetime_start:

5429 case Intrinsic::lifetime_end: {

5431 DefaultCase();

5432 return;

5433 }

5434

5436

5439

5440 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)

5441 NewCall = Builder.CreateLifetimeStart(Ptr);

5442 else

5443 NewCall = Builder.CreateLifetimeEnd(Ptr);

5444 break;

5445 }

5446

5447

5449 return;

5450 }

5451

5452 case Intrinsic::x86_avx512_vpdpbusd_128:

5453 case Intrinsic::x86_avx512_vpdpbusd_256:

5454 case Intrinsic::x86_avx512_vpdpbusd_512:

5455 case Intrinsic::x86_avx512_vpdpbusds_128:

5456 case Intrinsic::x86_avx512_vpdpbusds_256:

5457 case Intrinsic::x86_avx512_vpdpbusds_512:

5458 case Intrinsic::x86_avx2_vpdpbssd_128:

5459 case Intrinsic::x86_avx2_vpdpbssd_256:

5460 case Intrinsic::x86_avx10_vpdpbssd_512:

5461 case Intrinsic::x86_avx2_vpdpbssds_128:

5462 case Intrinsic::x86_avx2_vpdpbssds_256:

5463 case Intrinsic::x86_avx10_vpdpbssds_512:

5464 case Intrinsic::x86_avx2_vpdpbsud_128:

5465 case Intrinsic::x86_avx2_vpdpbsud_256:

5466 case Intrinsic::x86_avx10_vpdpbsud_512:

5467 case Intrinsic::x86_avx2_vpdpbsuds_128:

5468 case Intrinsic::x86_avx2_vpdpbsuds_256:

5469 case Intrinsic::x86_avx10_vpdpbsuds_512:

5470 case Intrinsic::x86_avx2_vpdpbuud_128:

5471 case Intrinsic::x86_avx2_vpdpbuud_256:

5472 case Intrinsic::x86_avx10_vpdpbuud_512:

5473 case Intrinsic::x86_avx2_vpdpbuuds_128:

5474 case Intrinsic::x86_avx2_vpdpbuuds_256:

5475 case Intrinsic::x86_avx10_vpdpbuuds_512: {

5479 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);

5480 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);

5481 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);

5482

5483 NewCall = Builder.CreateCall(NewFn, Args);

5484 break;

5485 }

5486 case Intrinsic::x86_avx512_vpdpwssd_128:

5487 case Intrinsic::x86_avx512_vpdpwssd_256:

5488 case Intrinsic::x86_avx512_vpdpwssd_512:

5489 case Intrinsic::x86_avx512_vpdpwssds_128:

5490 case Intrinsic::x86_avx512_vpdpwssds_256:

5491 case Intrinsic::x86_avx512_vpdpwssds_512:

5492 case Intrinsic::x86_avx2_vpdpwsud_128:

5493 case Intrinsic::x86_avx2_vpdpwsud_256:

5494 case Intrinsic::x86_avx10_vpdpwsud_512:

5495 case Intrinsic::x86_avx2_vpdpwsuds_128:

5496 case Intrinsic::x86_avx2_vpdpwsuds_256:

5497 case Intrinsic::x86_avx10_vpdpwsuds_512:

5498 case Intrinsic::x86_avx2_vpdpwusd_128:

5499 case Intrinsic::x86_avx2_vpdpwusd_256:

5500 case Intrinsic::x86_avx10_vpdpwusd_512:

5501 case Intrinsic::x86_avx2_vpdpwusds_128:

5502 case Intrinsic::x86_avx2_vpdpwusds_256:

5503 case Intrinsic::x86_avx10_vpdpwusds_512:

5504 case Intrinsic::x86_avx2_vpdpwuud_128:

5505 case Intrinsic::x86_avx2_vpdpwuud_256:

5506 case Intrinsic::x86_avx10_vpdpwuud_512:

5507 case Intrinsic::x86_avx2_vpdpwuuds_128:

5508 case Intrinsic::x86_avx2_vpdpwuuds_256:

5509 case Intrinsic::x86_avx10_vpdpwuuds_512:

5513 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);

5514 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);

5515 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);

5516

5517 NewCall = Builder.CreateCall(NewFn, Args);

5518 break;

5519 }

5520 assert(NewCall && "Should have either set this variable or returned through "

5521 "the default case");

5525}

5526

5528 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");

5529

5530

5531

5534

5535

5539

5540

5541 if (F != NewFn)

5542 F->eraseFromParent();

5543 }

5544}

5545

5548 if (NumOperands == 0)

5549 return &MD;

5550

5551

5553 return &MD;

5554

5556 if (NumOperands == 3) {

5559

5560 Metadata *Elts2[] = {ScalarType, ScalarType,

5565 }

5566

5570}

5571

5574 if (Opc != Instruction::BitCast)

5575 return nullptr;

5576

5577 Temp = nullptr;

5578 Type *SrcTy = V->getType();

5582

5583

5584

5587

5588 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);

5589 }

5590

5591 return nullptr;

5592}

5593

5595 if (Opc != Instruction::BitCast)

5596 return nullptr;

5597

5598 Type *SrcTy = C->getType();

5602

5603

5604

5606

5608 DestTy);

5609 }

5610

5611 return nullptr;

5612}

5613

5614

5615

5618 return false;

5619

5621

5622

5623

5625 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {

5626 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {

5627 if (Flag->getNumOperands() < 3)

5628 return false;

5629 if (MDString *K = dyn_cast_or_null(Flag->getOperand(1)))

5630 return K->getString() == "Debug Info Version";

5631 return false;

5632 });

5633 if (OpIt != ModFlags->op_end()) {

5634 const MDOperand &ValOp = (*OpIt)->getOperand(2);

5636 Version = CI->getZExtValue();

5637 }

5638 }

5639

5641 bool BrokenDebugInfo = false;

5644 if (!BrokenDebugInfo)

5645

5646 return false;

5647 else {

5648

5650 M.getContext().diagnose(Diag);

5651 }

5652 }

5655

5657 M.getContext().diagnose(DiagVersion);

5658 }

5660}

5661

5665

5667 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};

5668 unsigned Length = 0;

5669

5670 if (F->hasFnAttribute(Attr)) {

5671

5672

5673 StringRef S = F->getFnAttribute(Attr).getValueAsString();

5675 auto [Part, Rest] = S.split(',');

5677 S = Rest;

5678 }

5679 }

5680

5681 const unsigned Dim = DimC - 'x';

5682 assert(Dim < 3 && "Unexpected dim char");

5683

5685

5686

5687 const std::string VStr = llvm::utostr(VInt);

5688 Vect3[Dim] = VStr;

5690

5692 F->addFnAttr(Attr, NewAttr);

5693}

5694

5696 return S == "x" || S == "y" || S == "z";

5697}

5698

5701 if (K == "kernel") {

5704 return true;

5705 }

5706 if (K == "align") {

5707

5708

5709

5710

5711 const uint64_t AlignIdxValuePair =

5713 const unsigned Idx = (AlignIdxValuePair >> 16);

5714 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);

5717 return true;

5718 }

5719 if (K == "maxclusterrank" || K == "cluster_max_blocks") {

5722 return true;

5723 }

5724 if (K == "minctasm") {

5727 return true;

5728 }

5729 if (K == "maxnreg") {

5732 return true;

5733 }

5734 if (K.consume_front("maxntid") && isXYZ(K)) {

5736 return true;

5737 }

5738 if (K.consume_front("reqntid") && isXYZ(K)) {

5740 return true;

5741 }

5742 if (K.consume_front("cluster_dim_") && isXYZ(K)) {

5744 return true;

5745 }

5746 if (K == "grid_constant") {

5748 for (const auto &Op : cast(V)->operands()) {

5749

5750

5753 }

5754 return true;

5755 }

5756

5757 return false;

5758}

5759

5761 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");

5762 if (!NamedMD)

5763 return;

5764

5768 if (!SeenNodes.insert(MD).second)

5769 continue;

5770

5772 if (!GV)

5773 continue;

5774

5775 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");

5776

5778

5779

5780

5781

5782 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {

5784 const MDOperand &V = MD->getOperand(j + 1);

5786 if (!Upgraded)

5787 NewOperands.append({K, V});

5788 }

5789

5790 if (NewOperands.size() > 1)

5792 }

5793

5795 for (MDNode *N : NewNodes)

5797}

5798

5799

5800

5803 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";

5804 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);

5805 if (ModRetainReleaseMarker) {

5807 if (Op) {

5809 if (ID) {

5811 ID->getString().split(ValueComp, "#");

5812 if (ValueComp.size() == 2) {

5813 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();

5815 }

5817 M.eraseNamedMetadata(ModRetainReleaseMarker);

5819 }

5820 }

5821 }

5823}

5824

5826

5827

5828 auto UpgradeToIntrinsic = [&](const char *OldFunc,

5831

5832 if (!Fn)

5833 return;

5834

5837

5841 continue;

5842

5846

5847

5848

5852 continue;

5853

5854 bool InvalidCast = false;

5855

5856 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {

5858

5859

5860

5862

5863

5866 InvalidCast = true;

5867 break;

5868 }

5869 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));

5870 }

5871 Args.push_back(Arg);

5872 }

5873

5874 if (InvalidCast)

5875 continue;

5876

5877

5878 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);

5881

5882

5883 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());

5884

5888 }

5889

5892 };

5893

5894

5895

5896 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);

5897

5898

5899

5900

5902 return;

5903

5904 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {

5905 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},

5906 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},

5907 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},

5908 {"objc_autoreleaseReturnValue",

5909 llvm::Intrinsic::objc_autoreleaseReturnValue},

5910 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},

5911 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},

5912 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},

5913 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},

5914 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},

5915 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},

5916 {"objc_release", llvm::Intrinsic::objc_release},

5917 {"objc_retain", llvm::Intrinsic::objc_retain},

5918 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},

5919 {"objc_retainAutoreleaseReturnValue",

5920 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},

5921 {"objc_retainAutoreleasedReturnValue",

5922 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},

5923 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},

5924 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},

5925 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},

5926 {"objc_unsafeClaimAutoreleasedReturnValue",

5927 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},

5928 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},

5929 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},

5930 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},

5931 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},

5932 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},

5933 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},

5934 {"objc_arc_annotation_topdown_bbstart",

5935 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},

5936 {"objc_arc_annotation_topdown_bbend",

5937 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},

5938 {"objc_arc_annotation_bottomup_bbstart",

5939 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},

5940 {"objc_arc_annotation_bottomup_bbend",

5941 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};

5942

5943 for (auto &I : RuntimeFuncs)

5944 UpgradeToIntrinsic(I.first, I.second);

5945}

5946

5948 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();

5949 if (!ModFlags)

5950 return false;

5951

5952 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;

5953 bool HasSwiftVersionFlag = false;

5954 uint8_t SwiftMajorVersion, SwiftMinorVersion;

5958

5959 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {

5961 if (Op->getNumOperands() != 3)

5962 continue;

5964 if (ID)

5965 continue;

5970 Op->getOperand(2)};

5973 };

5974

5975 if (ID->getString() == "Objective-C Image Info Version")

5976 HasObjCFlag = true;

5977 if (ID->getString() == "Objective-C Class Properties")

5978 HasClassProperties = true;

5979

5980 if (ID->getString() == "PIC Level") {

5981 if (auto *Behavior =

5983 uint64_t V = Behavior->getLimitedValue();

5986 }

5987 }

5988

5989 if (ID->getString() == "PIE Level")

5990 if (auto *Behavior =

5992 if (Behavior->getLimitedValue() == Module::Error)

5994

5995

5996

5997 if (ID->getString() == "branch-target-enforcement" ||

5998 ID->getString().starts_with("sign-return-address")) {

5999 if (auto *Behavior =

6001 if (Behavior->getLimitedValue() == Module::Error) {

6005 Op->getOperand(1), Op->getOperand(2)};

6008 }

6009 }

6010 }

6011

6012

6013

6014

6015 if (ID->getString() == "Objective-C Image Info Section") {

6018 Value->getString().split(ValueComp, " ");

6019 if (ValueComp.size() != 1) {

6020 std::string NewValue;

6021 for (auto &S : ValueComp)

6022 NewValue += S.str();

6023 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),

6027 }

6028 }

6029 }

6030

6031

6032

6033 if (ID->getString() == "Objective-C Garbage Collection") {

6035 if (Md) {

6036 assert(Md->getValue() && "Expected non-empty metadata");

6037 auto Type = Md->getValue()->getType();

6038 if (Type == Int8Ty)

6039 continue;

6040 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();

6041 if ((Val & 0xff) != Val) {

6042 HasSwiftVersionFlag = true;

6043 SwiftABIVersion = (Val & 0xff00) >> 8;

6044 SwiftMajorVersion = (Val & 0xff000000) >> 24;

6045 SwiftMinorVersion = (Val & 0xff0000) >> 16;

6046 }

6049 Op->getOperand(1),

6053 }

6054 }

6055

6056 if (ID->getString() == "amdgpu_code_object_version") {

6058 Op->getOperand(0),

6059 MDString::get(M.getContext(), "amdhsa_code_object_version"),

6060 Op->getOperand(2)};

6063 }

6064 }

6065

6066

6067

6068

6069

6070

6071 if (HasObjCFlag && !HasClassProperties) {

6075 }

6076

6077 if (HasSwiftVersionFlag) {

6078 M.addModuleFlag(Module::Error, "Swift ABI Version",

6079 SwiftABIVersion);

6080 M.addModuleFlag(Module::Error, "Swift Major Version",

6081 ConstantInt::get(Int8Ty, SwiftMajorVersion));

6082 M.addModuleFlag(Module::Error, "Swift Minor Version",

6083 ConstantInt::get(Int8Ty, SwiftMinorVersion));

6085 }

6086

6088}

6089

6091 auto TrimSpaces = [](StringRef Section) -> std::string {

6093 Section.split(Components, ',');

6094

6097

6098 for (auto Component : Components)

6099 OS << ',' << Component.trim();

6100

6101 return std::string(OS.str().substr(1));

6102 };

6103

6104 for (auto &GV : M.globals()) {

6105 if (!GV.hasSection())

6106 continue;

6107

6108 StringRef Section = GV.getSection();

6109

6110 if (!Section.starts_with("__DATA, __objc_catlist"))

6111 continue;

6112

6113

6114

6115 GV.setSection(TrimSpaces(Section));

6116 }

6117}

6118

6119namespace {

6120

6121

6122

6123

6124

6125

6126

6127

6128

6129

6130

6131struct StrictFPUpgradeVisitor : public InstVisitor {

6132 StrictFPUpgradeVisitor() = default;

6133

6135 if (Call.isStrictFP())

6136 return;

6138 return;

6139

6140

6141 Call.removeFnAttr(Attribute::StrictFP);

6142 Call.addFnAttr(Attribute::NoBuiltin);

6143 }

6144};

6145

6146

6147struct AMDGPUUnsafeFPAtomicsUpgradeVisitor

6148 : public InstVisitor {

6149 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;

6150

6151 void visitAtomicRMWInst(AtomicRMWInst &RMW) {

6153 return;

6154

6156 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);

6159 }

6160};

6161}

6162

6164

6165

6166 if (F.isDeclaration() && F.hasFnAttribute(Attribute::StrictFP)) {

6167 StrictFPUpgradeVisitor SFPV;

6168 SFPV.visit(F);

6169 }

6170

6171

6172 F.removeRetAttrs(AttributeFuncs::typeIncompatible(

6173 F.getReturnType(), F.getAttributes().getRetAttrs()));

6174 for (auto &Arg : F.args())

6175 Arg.removeAttrs(

6176 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));

6177

6178

6179

6180 if (Attribute A = F.getFnAttribute("implicit-section-name");

6181 A.isValid() && A.isStringAttribute()) {

6182 F.setSection(A.getValueAsString());

6183 F.removeFnAttr("implicit-section-name");

6184 }

6185

6186 if (F.empty()) {

6187

6188

6189

6190 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");

6191 A.isValid()) {

6192

6193 if (A.getValueAsBool()) {

6194 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;

6195 Visitor.visit(F);

6196 }

6197

6198

6199

6200 F.removeFnAttr("amdgpu-unsafe-fp-atomics");

6201 }

6202 }

6203}

6204

6205

6208 if (F.hasFnAttribute(FnAttrName))

6209 F.addFnAttr(FnAttrName, Value);

6210}

6211

6212

6213

6214

6216 if (F.hasFnAttribute(FnAttrName)) {

6217 if (Set)

6218 F.addFnAttr(FnAttrName);

6219 } else {

6220 auto A = F.getFnAttribute(FnAttrName);

6221 if ("false" == A.getValueAsString())

6222 F.removeFnAttr(FnAttrName);

6223 else if ("true" == A.getValueAsString()) {

6224 F.removeFnAttr(FnAttrName);

6225 F.addFnAttr(FnAttrName);

6226 }

6227 }

6228}

6229

6231 Triple T(M.getTargetTriple());

6232 if (T.isThumb() && T.isARM() && T.isAArch64())

6233 return;

6234

6241

6242 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();

6243 if (ModFlags) {

6244 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {

6246 if (Op->getNumOperands() != 3)

6247 continue;

6248

6251 if (ID || !CI)

6252 continue;

6253

6255 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue

6256 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue

6257 : IDStr == "guarded-control-stack" ? &GCSValue

6258 : IDStr == "sign-return-address" ? &SRAValue

6259 : IDStr == "sign-return-address-all" ? &SRAALLValue

6260 : IDStr == "sign-return-address-with-bkey"

6261 ? &SRABKeyValue

6262 : nullptr;

6263 if (!ValPtr)

6264 continue;

6265

6266 *ValPtr = CI->getZExtValue();

6267 if (*ValPtr == 2)

6268 return;

6269 }

6270 }

6271

6272 bool BTE = BTEValue == 1;

6273 bool BPPLR = BPPLRValue == 1;

6274 bool GCS = GCSValue == 1;

6275 bool SRA = SRAValue == 1;

6276

6277 StringRef SignTypeValue = "non-leaf";

6278 if (SRA && SRAALLValue == 1)

6279 SignTypeValue = "all";

6280

6281 StringRef SignKeyValue = "a_key";

6282 if (SRA && SRABKeyValue == 1)

6283 SignKeyValue = "b_key";

6284

6285 for (Function &F : M.getFunctionList()) {

6286 if (F.isDeclaration())

6287 continue;

6288

6289 if (SRA) {

6292 } else {

6293 if (auto A = F.getFnAttribute("sign-return-address");

6294 A.isValid() && "none" == A.getValueAsString()) {

6295 F.removeFnAttr("sign-return-address");

6296 F.removeFnAttr("sign-return-address-key");

6297 }

6298 }

6302 }

6303

6304 if (BTE)

6305 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);

6306 if (BPPLR)

6307 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);

6308 if (GCS)

6310 if (SRA) {

6312 if (SRAALLValue == 1)

6313 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);

6314 if (SRABKeyValue == 1)

6315 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);

6316 }

6317}

6318

6321 if (T)

6322 return false;

6323 if (T->getNumOperands() < 1)

6324 return false;

6326 if (!S)

6327 return false;

6328 return S->getString().starts_with("llvm.vectorizer.");

6329}

6330

6332 StringRef OldPrefix = "llvm.vectorizer.";

6334

6335 if (OldTag == "llvm.vectorizer.unroll")

6336 return MDString::get(C, "llvm.loop.interleave.count");

6337

6340 .str());

6341}

6342

6345 if (T)

6346 return MD;

6347 if (T->getNumOperands() < 1)

6348 return MD;

6350 if (!OldTag)

6351 return MD;

6352 if (!OldTag->getString().starts_with("llvm.vectorizer."))

6353 return MD;

6354

6355

6357 Ops.reserve(T->getNumOperands());

6358 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));

6359 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)

6360 Ops.push_back(T->getOperand(I));

6361

6363}

6364

6367 if (T)

6368 return &N;

6369

6371 return &N;

6372

6374 Ops.reserve(T->getNumOperands());

6375 for (Metadata *MD : T->operands())

6377

6379}

6380

6383

6384

6385 if ((T.isSPIR() || (T.isSPIRV() && T.isSPIRVLogical())) &&

6386 DL.contains("-G") && DL.starts_with("G")) {

6387 return DL.empty() ? std::string("G1") : (DL + "-G1").str();

6388 }

6389

6390 if (T.isLoongArch64() || T.isRISCV64()) {

6391

6392 auto I = DL.find("-n64-");

6394 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();

6395 return DL.str();

6396 }

6397

6398

6399 std::string Res = DL.str();

6400 if (T.isAMDGPU()) {

6401

6402 if (DL.contains("-G") && DL.starts_with("G"))

6403 Res.append(Res.empty() ? "G1" : "-G1");

6404

6405

6406 if (T.isAMDGCN()) {

6407

6408

6409

6410

6411 if (DL.contains("-ni") && DL.starts_with("ni"))

6412 Res.append("-ni:7:8:9");

6413

6414 if (DL.ends_with("ni:7"))

6415 Res.append(":8:9");

6416 if (DL.ends_with("ni:7:8"))

6417 Res.append(":9");

6418

6419

6420

6421 if (DL.contains("-p7") && DL.starts_with("p7"))

6422 Res.append("-p7:160:256:256:32");

6423 if (DL.contains("-p8") && DL.starts_with("p8"))

6424 Res.append("-p8:128:128:128:48");

6425 constexpr StringRef OldP8("-p8:128:128-");

6426 if (DL.contains(OldP8))

6427 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");

6428 if (DL.contains("-p9") && DL.starts_with("p9"))

6429 Res.append("-p9:192:256:256:32");

6430 }

6431

6432

6433 if (DL.contains("m:e"))

6434 Res = Res.empty() ? "m:e" : "m:e-" + Res;

6435

6436 return Res;

6437 }

6438

6439 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {

6440

6441

6442 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};

6443 if (DL.contains(AddrSpaces)) {

6445 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");

6446 if (R.match(Res, &Groups))

6447 Res = (Groups[1] + AddrSpaces + Groups[3]).str();

6448 }

6449 };

6450

6451

6452 if (T.isAArch64()) {

6453

6454 if (DL.empty() && DL.contains("-Fn32"))

6455 Res.append("-Fn32");

6456 AddPtr32Ptr64AddrSpaces();

6457 return Res;

6458 }

6459

6460 if (T.isSPARC() || (T.isMIPS64() && DL.contains("m:m")) || T.isPPC64() ||

6461 T.isWasm()) {

6462

6463

6464 std::string I64 = "-i64:64";

6465 std::string I128 = "-i128:128";

6467 size_t Pos = Res.find(I64);

6468 if (Pos != size_t(-1))

6469 Res.insert(Pos + I64.size(), I128);

6470 }

6471 }

6472

6473 if (T.isPPC() && T.isOSAIX() && DL.contains("f64:32:64") && DL.empty()) {

6474 size_t Pos = Res.find("-S128");

6476 Pos = Res.size();

6477 Res.insert(Pos, "-f64:32:64");

6478 }

6479

6480 if (T.isX86())

6481 return Res;

6482

6483 AddPtr32Ptr64AddrSpaces();

6484

6485

6486

6487

6488

6489

6490

6491 if (T.isOSIAMCU()) {

6492 std::string I128 = "-i128:128";

6495 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");

6496 if (R.match(Res, &Groups))

6498 }

6499 }

6500

6501

6502

6503

6504 if (T.isWindowsMSVCEnvironment() && T.isArch64Bit()) {

6506 auto I = Ref.find("-f80:32-");

6508 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();

6509 }

6510

6511 return Res;

6512}

6513

6516 Attribute A = B.getAttribute("no-frame-pointer-elim");

6517 if (A.isValid()) {

6518

6519 FramePointer = A.getValueAsString() == "true" ? "all" : "none";

6520 B.removeAttribute("no-frame-pointer-elim");

6521 }

6522 if (B.contains("no-frame-pointer-elim-non-leaf")) {

6523

6524 if (FramePointer != "all")

6525 FramePointer = "non-leaf";

6526 B.removeAttribute("no-frame-pointer-elim-non-leaf");

6527 }

6528 if (!FramePointer.empty())

6529 B.addAttribute("frame-pointer", FramePointer);

6530

6531 A = B.getAttribute("null-pointer-is-valid");

6532 if (A.isValid()) {

6533

6534 bool NullPointerIsValid = A.getValueAsString() == "true";

6535 B.removeAttribute("null-pointer-is-valid");

6536 if (NullPointerIsValid)

6537 B.addAttribute(Attribute::NullPointerIsValid);

6538 }

6539}

6540

6542

6543

6544

6545

6547 return OBD.getTag() == "clang.arc.attachedcall" &&

6548 OBD.inputs().empty();

6549 });

6550}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPU address space definition.

AMDGPU Register Bank Select

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

This file contains the simple types necessary to represent the attributes associated with functions a...

static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)

Definition AutoUpgrade.cpp:2025

static Metadata * upgradeLoopArgument(Metadata *MD)

Definition AutoUpgrade.cpp:6343

static bool isXYZ(StringRef S)

Definition AutoUpgrade.cpp:5695

static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)

Definition AutoUpgrade.cpp:1248

static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)

Definition AutoUpgrade.cpp:1865

static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)

Definition AutoUpgrade.cpp:1143

static bool upgradeRetainReleaseMarker(Module &M)

This checks for objc retain release marker which should be upgraded.

Definition AutoUpgrade.cpp:5801

static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)

Definition AutoUpgrade.cpp:2126

static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)

Definition AutoUpgrade.cpp:2352

static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)

Definition AutoUpgrade.cpp:2100

static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)

Definition AutoUpgrade.cpp:112

static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)

Definition AutoUpgrade.cpp:6206

static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)

Definition AutoUpgrade.cpp:1168

static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)

Definition AutoUpgrade.cpp:5699

static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)

Helper to unwrap intrinsic call MetadataAsValue operands.

Definition AutoUpgrade.cpp:4691

static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)

Definition AutoUpgrade.cpp:6331

static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)

Definition AutoUpgrade.cpp:5662

static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)

Definition AutoUpgrade.cpp:99

static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)

Definition AutoUpgrade.cpp:1980

static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)

Definition AutoUpgrade.cpp:2237

static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)

Definition AutoUpgrade.cpp:1951

static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)

Definition AutoUpgrade.cpp:4449

static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)

Definition AutoUpgrade.cpp:2338

static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)

Definition AutoUpgrade.cpp:533

static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)

Definition AutoUpgrade.cpp:2277

static bool consumeNVVMPtrAddrSpace(StringRef &Name)

Definition AutoUpgrade.cpp:1242

static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)

Definition AutoUpgrade.cpp:166

static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)

Definition AutoUpgrade.cpp:1899

static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)

Definition AutoUpgrade.cpp:1096

static bool isOldLoopArgument(Metadata *MD)

Definition AutoUpgrade.cpp:6319

static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)

Definition AutoUpgrade.cpp:4506

static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)

Definition AutoUpgrade.cpp:83

static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)

Definition AutoUpgrade.cpp:4596

static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)

Definition AutoUpgrade.cpp:2216

static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)

Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.

Definition AutoUpgrade.cpp:4703

static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)

Definition AutoUpgrade.cpp:146

static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)

Definition AutoUpgrade.cpp:771

static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)

Definition AutoUpgrade.cpp:1931

static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)

Definition AutoUpgrade.cpp:1963

static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)

Definition AutoUpgrade.cpp:2165

static void rename(GlobalValue *GV)

Definition AutoUpgrade.cpp:63

static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)

Definition AutoUpgrade.cpp:67

static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)

Definition AutoUpgrade.cpp:156

static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))

static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)

Definition AutoUpgrade.cpp:2299

static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)

Definition AutoUpgrade.cpp:2085

static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)

Definition AutoUpgrade.cpp:2609

static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)

Definition AutoUpgrade.cpp:2331

static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)

Definition AutoUpgrade.cpp:2361

static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)

Convert debug intrinsic calls to non-instruction debug records.

Definition AutoUpgrade.cpp:4719

static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)

Definition AutoUpgrade.cpp:6215

static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)

Definition AutoUpgrade.cpp:2247

static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)

Definition AutoUpgrade.cpp:2198

static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)

Definition AutoUpgrade.cpp:130

static MDNode * getDebugLocSafe(const Instruction *I)

Definition AutoUpgrade.cpp:4710

static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)

Definition AutoUpgrade.cpp:2771

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

This file contains the declarations for the subclasses of Constant, which represent the different fla...

This file contains constants used for implementing Dwarf debug support.

Module.h This file contains the declarations for the Module class.

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)

NVPTX address space definition.

static unsigned getNumElements(Type *Ty)

static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)

This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...

static SymbolRef::Type getType(const Symbol *Sym)

LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty

static const X86InstrFMA3Group Groups[]

Class for arbitrary precision integers.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Class to represent array types.

static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)

This static method is the primary way to construct an ArrayType.

Type * getElementType() const

an instruction that atomically reads a memory location, combines it with another value,...

void setVolatile(bool V)

Specify whether this is a volatile RMW or not.

BinOp

This enumeration lists the possible modifications atomicrmw can make.

@ USubCond

Subtract only if no unsigned overflow.

@ USubSat

*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.

@ UIncWrap

Increment one up to a maximum value.

@ FMin

*p = minnum(old, v) minnum matches the behavior of llvm.minnum.

@ FMax

*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.

@ UDecWrap

Decrement one until a minimum value or zero.

bool isFloatingPointOperation() const

Functions, function parameters, and return types can have attributes to indicate how they should be t...

static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)

static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)

Return a uniquified Attribute object.

Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...

Function * getCalledFunction() const

Returns the function called, or null if this is an indirect function invocation or the function signa...

Value * getCalledOperand() const

void setAttributes(AttributeList A)

Set the attributes for this call.

Value * getArgOperand(unsigned i) const

FunctionType * getFunctionType() const

LLVM_ABI Intrinsic::ID getIntrinsicID() const

Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...

iterator_range< User::op_iterator > args()

Iteration adapter for range-for loops.

void setCalledOperand(Value *V)

unsigned arg_size() const

AttributeList getAttributes() const

Return the attributes for this call.

void setCalledFunction(Function *Fn)

Sets the function called, including updating the function type.

This class represents a function call, abstracting a target machine's calling convention.

void setTailCallKind(TailCallKind TCK)

static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)

Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...

static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)

This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

@ ICMP_SLT

signed less than

@ ICMP_SLE

signed less or equal

@ ICMP_UGE

unsigned greater or equal

@ ICMP_UGT

unsigned greater than

@ ICMP_SGT

signed greater than

@ ICMP_ULT

unsigned less than

@ ICMP_SGE

signed greater or equal

@ ICMP_ULE

unsigned less or equal

static LLVM_ABI ConstantAggregateZero * get(Type *Ty)

static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)

static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)

static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)

Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.

static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)

This is the shared class of boolean and integer constants.

bool isZero() const

This is just a convenience method to make client code smaller for a common code.

uint64_t getZExtValue() const

Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...

static LLVM_ABI ConstantPointerNull * get(PointerType *T)

Static factory methods - Return objects of the specified value.

static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)

static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)

Return the ConstantTokenNone.

This is an important base class in LLVM.

static LLVM_ABI Constant * getAllOnesValue(Type *Ty)

static LLVM_ABI Constant * getNullValue(Type *Ty)

Constructor to create a '0' constant of arbitrary type.

static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)

Append the opcodes Ops to DIExpr.

A parsed version of the target data layout string in and methods for querying it.

static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)

For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.

Base class for non-instruction debug metadata records that have positions within IR.

static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)

Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...

Convenience struct for specifying and reasoning about fast-math flags.

void setApproxFunc(bool B=true)

static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)

Class to represent function types.

Type * getParamType(unsigned i) const

Parameter type accessors.

Type * getReturnType() const

static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)

This static method is the primary way of constructing a FunctionType.

static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)

FunctionType * getFunctionType() const

Returns the FunctionType for me.

Intrinsic::ID getIntrinsicID() const LLVM_READONLY

getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...

const Function & getFunction() const

void eraseFromParent()

eraseFromParent - This method unlinks 'this' from the containing module and deletes it.

Type * getReturnType() const

Returns the type of the ret val.

Argument * getArg(unsigned i) const

LinkageTypes getLinkage() const

Type * getValueType() const

const Constant * getInitializer() const

getInitializer - Return the initializer for this global variable.

bool hasInitializer() const

Definitions have initializers, declarations don't.

PointerType * getPtrTy(unsigned AddrSpace=0)

Fetch the type representing a pointer.

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

Base class for instruction visitors.

const DebugLoc & getDebugLoc() const

Return the debug location for this node as a DebugLoc.

LLVM_ABI const Module * getModule() const

Return the module owning the function this instruction belongs to or nullptr it the function does not...

LLVM_ABI InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)

Set the metadata of the specified kind to the specified node.

LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())

Copy metadata from SrcInst to this instruction.

LLVM_ABI const DataLayout & getDataLayout() const

Get the data layout of the module this instruction belongs to.

This is an important class for using LLVM in a threaded context.

An instruction for reading from memory.

LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)

Return metadata describing the range [Lo, Hi).

const MDOperand & getOperand(unsigned I) const

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

unsigned getNumOperands() const

Return number of MDNode operands.

LLVMContext & getContext() const

Tracking metadata reference owned by Metadata.

static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

A Module instance is used to store all the information related to an LLVM module.

ModFlagBehavior

This enumeration defines the supported behaviors of module flags.

@ Override

Uses the specified value, regardless of the behavior or value of the other module.

@ Error

Emits an error if two values disagree, otherwise the resulting value is that of the operands.

@ Min

Takes the min of the two values, which are required to be integers.

@ Max

Takes the max of the two values, which are required to be integers.

LLVM_ABI void setOperand(unsigned I, MDNode *New)

LLVM_ABI MDNode * getOperand(unsigned i) const

LLVM_ABI unsigned getNumOperands() const

LLVM_ABI void clearOperands()

Drop all references to this node's operands.

iterator_range< op_iterator > operands()

LLVM_ABI void addOperand(MDNode *M)

ArrayRef< InputTy > inputs() const

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const

matches - Match the regex against a given String.

static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)

ArrayRef< int > getShuffleMask() const

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...

void append(ItTy in_start, ItTy in_end)

Add the specified range to the end of the SmallVector.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

An instruction for storing to memory.

A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...

StringRef - Represent a constant reference to a string, i.e.

std::pair< StringRef, StringRef > split(char Separator) const

Split into two substrings around the first occurrence of a separator character.

static constexpr size_t npos

constexpr StringRef substr(size_t Start, size_t N=npos) const

Return a reference to the substring from [Start, Start + N).

bool starts_with(StringRef Prefix) const

Check if this string starts with the given Prefix.

constexpr bool empty() const

empty - Check if the string is empty.

StringRef drop_front(size_t N=1) const

Return a StringRef equal to 'this' but with the first N elements dropped.

constexpr size_t size() const

size - Get the string size.

StringRef trim(char Char) const

Return string with consecutive Char characters starting from the left and right removed.

A switch()-like statement whose cases are string literals.

StringSwitch & Case(StringLiteral S, T Value)

StringSwitch & StartsWith(StringLiteral S, T Value)

StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)

Class to represent struct types.

static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)

This static method is the primary way to create a literal StructType.

unsigned getNumElements() const

Random access to the elements.

Type * getElementType(unsigned N) const

The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.

Triple - Helper class for working with autoconf configuration names.

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

The instances of the Type class are immutable: once they are created, they are never changed.

static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)

bool isVectorTy() const

True if this is an instance of VectorType.

static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)

bool isFloatTy() const

Return true if this is 'float', a 32-bit IEEE fp type.

bool isBFloatTy() const

Return true if this is 'bfloat', a 16-bit bfloat type.

LLVM_ABI unsigned getPointerAddressSpace() const

Get the address space of this pointer or pointer vector type.

static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)

Type * getScalarType() const

If this is a vector type, return the element type, otherwise return 'this'.

LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY

Return the basic size of this type if it is a primitive type.

LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

bool isPtrOrPtrVectorTy() const

Return true if this is a pointer type or a vector of pointer types.

bool isIntegerTy() const

True if this is an instance of IntegerType.

bool isFPOrFPVectorTy() const

Return true if this is a FP type or a vector of FP.

static LLVM_ABI Type * getFloatTy(LLVMContext &C)

static LLVM_ABI Type * getBFloatTy(LLVMContext &C)

static LLVM_ABI Type * getHalfTy(LLVMContext &C)

Value * getOperand(unsigned i) const

unsigned getNumOperands() const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI void setName(const Twine &Name)

Change the name of the value.

LLVM_ABI void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

iterator_range< user_iterator > users()

LLVM_ABI const Value * stripPointerCasts() const

Strip off pointer casts, all-zero GEPs and address space casts.

LLVM_ABI LLVMContext & getContext() const

All values hold a context through their type.

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

LLVM_ABI void takeName(Value *V)

Transfer the name from V to this value.

Base class of all SIMD vector types.

static VectorType * getInteger(VectorType *VTy)

This static method gets a VectorType with the same number of elements as the input type,...

static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)

This static method is the primary way to construct an VectorType.

constexpr ScalarTy getFixedValue() const

const ParentTy * getParent() const

self_iterator getIterator()

A raw_ostream that writes to an SmallVector or SmallString.

StringRef str() const

Return a StringRef for the vector contents.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ LOCAL_ADDRESS

Address space for local memory.

@ FLAT_ADDRESS

Address space for flat memory.

@ PRIVATE_ADDRESS

Address space for private memory.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ PTX_Kernel

Call to a PTX kernel. Passes all arguments in parameter space.

@ C

The default llvm calling convention, compatible with C.

LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})

Look up the Function declaration of the intrinsic id in the Module M.

LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)

Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.

LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)

LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)

Return the attributes for an intrinsic.

LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)

Gets the type arguments of an intrinsic call by matching type contraints specified by the ....

@ ADDRESS_SPACE_SHARED_CLUSTER

std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)

Extract a Value from Metadata, if any, allowing null.

std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)

Extract a Value from Metadata, if any.

std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)

Extract a Value from Metadata.

This is an optimization pass for GlobalISel generic memory operations.

LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)

This is the complement to the above, replacing a specific call to an intrinsic function with a call t...

Definition AutoUpgrade.cpp:4771

LLVM_ABI void UpgradeSectionAttributes(Module &M)

Definition AutoUpgrade.cpp:6090

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)

Upgrade comment in call to inline asm that represents an objc retain release marker.

Definition AutoUpgrade.cpp:2600

bool isValidAtomicOrdering(Int I)

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty

LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)

This is a more granular function that simply checks an intrinsic function for upgrading,...

Definition AutoUpgrade.cpp:1813

LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)

Upgrade the loop attachment metadata node.

Definition AutoUpgrade.cpp:6365

auto dyn_cast_if_present(const Y &Val)

dyn_cast_if_present - Functionally identical to dyn_cast, except that a null (or none in the case ...

LLVM_ABI void UpgradeAttributes(AttrBuilder &B)

Upgrade attributes that changed format or kind.

Definition AutoUpgrade.cpp:6514

LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)

This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...

Definition AutoUpgrade.cpp:5527

LLVM_ABI void UpgradeNVVMAnnotations(Module &M)

Convert legacy nvvm.annotations metadata to appropriate function attributes.

Definition AutoUpgrade.cpp:5760

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

LLVM_ABI bool UpgradeModuleFlags(Module &M)

This checks for module flags which should be upgraded.

Definition AutoUpgrade.cpp:5947

std::string utostr(uint64_t X, bool isNeg=false)

constexpr bool isPowerOf2_64(uint64_t Value)

Return true if the argument is a power of two > 0 (64 bit edition.)

void copyModuleAttrToFunctions(Module &M)

Copies module attributes to the functions in the module.

Definition AutoUpgrade.cpp:6230

LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)

Upgrade operand bundles (without knowing about their user instruction).

Definition AutoUpgrade.cpp:6541

LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)

This is an auto-upgrade for bitcast constant expression between pointers with different address space...

Definition AutoUpgrade.cpp:5594

auto dyn_cast_or_null(const Y &Val)

FunctionAddr VTableAddr uintptr_t uintptr_t Version

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)

Upgrade the datalayout string by adding a section for address space pointers.

Definition AutoUpgrade.cpp:6381

bool none_of(R &&Range, UnaryPredicate P)

Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)

This checks for global variables which should be upgraded.

Definition AutoUpgrade.cpp:1832

LLVM_ABI raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

LLVM_ABI bool StripDebugInfo(Module &M)

Strip debug info in the module if it exists.

AtomicOrdering

Atomic ordering for LLVM's memory model.

@ Ref

The access may reference the value stored in memory.

std::string join(IteratorT Begin, IteratorT End, StringRef Separator)

Joins the strings in the range [Begin, End), adding Separator between the elements.

FunctionAddr VTableAddr uintptr_t uintptr_t Data

OperandBundleDefT< Value * > OperandBundleDef

LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)

This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...

Definition AutoUpgrade.cpp:5572

DWARFExpression::Operation Op

@ Dynamic

Denotes mode unknown at compile time.

ArrayRef(const T &OneElt) -> ArrayRef< T >

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

void erase_if(Container &C, UnaryPredicate P)

Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...

LLVM_ABI bool UpgradeDebugInfo(Module &M)

Check the debug info version number, if it is out-dated, drop the debug info.

Definition AutoUpgrade.cpp:5616

LLVM_ABI void UpgradeFunctionAttributes(Function &F)

Correct any IR that is relying on old function attribute behavior.

Definition AutoUpgrade.cpp:6163

@ Default

The result values are uniform if and only if all operands are uniform.

LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)

If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...

Definition AutoUpgrade.cpp:5546

LLVM_ABI void UpgradeARCRuntime(Module &M)

Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...

Definition AutoUpgrade.cpp:5825

LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)

Check a module for errors.

LLVM_ABI void reportFatalUsageError(Error Err)

Report a fatal error that does not indicate a bug in LLVM.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

This struct is a compact representation of a valid (non-zero power of two) alignment.

This struct is a compact representation of a valid (power of two) or undefined (0) alignment.