LLVM: lib/IR/AutoUpgrade.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsARM.h"
36#include "llvm/IR/IntrinsicsNVPTX.h"
37#include "llvm/IR/IntrinsicsRISCV.h"
38#include "llvm/IR/IntrinsicsWebAssembly.h"
39#include "llvm/IR/IntrinsicsX86.h"
53#include
54#include
55#include
56
57using namespace llvm;
58
61 cl::desc("Disable autoupgrade of debug info"));
62
64
65
66
69
70
71 Type *Arg0Type = F->getFunctionType()->getParamType(0);
73 return false;
74
75
78 return true;
79}
80
81
82
85
86 Type *LastArgType = F->getFunctionType()->getParamType(
87 F->getFunctionType()->getNumParams() - 1);
89 return false;
90
91
94 return true;
95}
96
97
98
101
102 if (F->getReturnType()->isVectorTy())
103 return false;
104
107 return true;
108}
109
110
111
114
115 Type *Arg1Type = F->getFunctionType()->getParamType(1);
116 Type *Arg2Type = F->getFunctionType()->getParamType(2);
118 cast(Arg1Type)->getElementType()->isIntegerTy(8) &&
120 cast(Arg2Type)->getElementType()->isIntegerTy(8))
121 return false;
122
125 return true;
126}
127
128
129
132
133 Type *Arg1Type = F->getFunctionType()->getParamType(1);
134 Type *Arg2Type = F->getFunctionType()->getParamType(2);
136 cast(Arg1Type)->getElementType()->isIntegerTy(16) &&
138 cast(Arg2Type)->getElementType()->isIntegerTy(16))
139 return false;
140
143 return true;
144}
145
148 if (F->getReturnType()->getScalarType()->isBFloatTy())
149 return false;
150
153 return true;
154}
155
158 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
159 return false;
160
163 return true;
164}
165
167
168
169
170
171
172 if (Name.consume_front("avx."))
173 return (Name.starts_with("blend.p") ||
174 Name == "cvt.ps2.pd.256" ||
175 Name == "cvtdq2.pd.256" ||
176 Name == "cvtdq2.ps.256" ||
177 Name.starts_with("movnt.") ||
178 Name.starts_with("sqrt.p") ||
179 Name.starts_with("storeu.") ||
180 Name.starts_with("vbroadcast.s") ||
181 Name.starts_with("vbroadcastf128") ||
182 Name.starts_with("vextractf128.") ||
183 Name.starts_with("vinsertf128.") ||
184 Name.starts_with("vperm2f128.") ||
185 Name.starts_with("vpermil."));
186
187 if (Name.consume_front("avx2."))
188 return (Name == "movntdqa" ||
189 Name.starts_with("pabs.") ||
190 Name.starts_with("padds.") ||
191 Name.starts_with("paddus.") ||
192 Name.starts_with("pblendd.") ||
193 Name == "pblendw" ||
194 Name.starts_with("pbroadcast") ||
195 Name.starts_with("pcmpeq.") ||
196 Name.starts_with("pcmpgt.") ||
197 Name.starts_with("pmax") ||
198 Name.starts_with("pmin") ||
199 Name.starts_with("pmovsx") ||
200 Name.starts_with("pmovzx") ||
201 Name == "pmul.dq" ||
202 Name == "pmulu.dq" ||
203 Name.starts_with("psll.dq") ||
204 Name.starts_with("psrl.dq") ||
205 Name.starts_with("psubs.") ||
206 Name.starts_with("psubus.") ||
207 Name.starts_with("vbroadcast") ||
208 Name == "vbroadcasti128" ||
209 Name == "vextracti128" ||
210 Name == "vinserti128" ||
211 Name == "vperm2i128");
212
213 if (Name.consume_front("avx512.")) {
214 if (Name.consume_front("mask."))
215
216 return (Name.starts_with("add.p") ||
217 Name.starts_with("and.") ||
218 Name.starts_with("andn.") ||
219 Name.starts_with("broadcast.s") ||
220 Name.starts_with("broadcastf32x4.") ||
221 Name.starts_with("broadcastf32x8.") ||
222 Name.starts_with("broadcastf64x2.") ||
223 Name.starts_with("broadcastf64x4.") ||
224 Name.starts_with("broadcasti32x4.") ||
225 Name.starts_with("broadcasti32x8.") ||
226 Name.starts_with("broadcasti64x2.") ||
227 Name.starts_with("broadcasti64x4.") ||
228 Name.starts_with("cmp.b") ||
229 Name.starts_with("cmp.d") ||
230 Name.starts_with("cmp.q") ||
231 Name.starts_with("cmp.w") ||
232 Name.starts_with("compress.b") ||
233 Name.starts_with("compress.d") ||
234 Name.starts_with("compress.p") ||
235 Name.starts_with("compress.q") ||
236 Name.starts_with("compress.store.") ||
237 Name.starts_with("compress.w") ||
238 Name.starts_with("conflict.") ||
239 Name.starts_with("cvtdq2pd.") ||
240 Name.starts_with("cvtdq2ps.") ||
241 Name == "cvtpd2dq.256" ||
242 Name == "cvtpd2ps.256" ||
243 Name == "cvtps2pd.128" ||
244 Name == "cvtps2pd.256" ||
245 Name.starts_with("cvtqq2pd.") ||
246 Name == "cvtqq2ps.256" ||
247 Name == "cvtqq2ps.512" ||
248 Name == "cvttpd2dq.256" ||
249 Name == "cvttps2dq.128" ||
250 Name == "cvttps2dq.256" ||
251 Name.starts_with("cvtudq2pd.") ||
252 Name.starts_with("cvtudq2ps.") ||
253 Name.starts_with("cvtuqq2pd.") ||
254 Name == "cvtuqq2ps.256" ||
255 Name == "cvtuqq2ps.512" ||
256 Name.starts_with("dbpsadbw.") ||
257 Name.starts_with("div.p") ||
258 Name.starts_with("expand.b") ||
259 Name.starts_with("expand.d") ||
260 Name.starts_with("expand.load.") ||
261 Name.starts_with("expand.p") ||
262 Name.starts_with("expand.q") ||
263 Name.starts_with("expand.w") ||
264 Name.starts_with("fpclass.p") ||
265 Name.starts_with("insert") ||
266 Name.starts_with("load.") ||
267 Name.starts_with("loadu.") ||
268 Name.starts_with("lzcnt.") ||
269 Name.starts_with("max.p") ||
270 Name.starts_with("min.p") ||
271 Name.starts_with("movddup") ||
272 Name.starts_with("move.s") ||
273 Name.starts_with("movshdup") ||
274 Name.starts_with("movsldup") ||
275 Name.starts_with("mul.p") ||
276 Name.starts_with("or.") ||
277 Name.starts_with("pabs.") ||
278 Name.starts_with("packssdw.") ||
279 Name.starts_with("packsswb.") ||
280 Name.starts_with("packusdw.") ||
281 Name.starts_with("packuswb.") ||
282 Name.starts_with("padd.") ||
283 Name.starts_with("padds.") ||
284 Name.starts_with("paddus.") ||
285 Name.starts_with("palignr.") ||
286 Name.starts_with("pand.") ||
287 Name.starts_with("pandn.") ||
288 Name.starts_with("pavg") ||
289 Name.starts_with("pbroadcast") ||
290 Name.starts_with("pcmpeq.") ||
291 Name.starts_with("pcmpgt.") ||
292 Name.starts_with("perm.df.") ||
293 Name.starts_with("perm.di.") ||
294 Name.starts_with("permvar.") ||
295 Name.starts_with("pmaddubs.w.") ||
296 Name.starts_with("pmaddw.d.") ||
297 Name.starts_with("pmax") ||
298 Name.starts_with("pmin") ||
299 Name == "pmov.qd.256" ||
300 Name == "pmov.qd.512" ||
301 Name == "pmov.wb.256" ||
302 Name == "pmov.wb.512" ||
303 Name.starts_with("pmovsx") ||
304 Name.starts_with("pmovzx") ||
305 Name.starts_with("pmul.dq.") ||
306 Name.starts_with("pmul.hr.sw.") ||
307 Name.starts_with("pmulh.w.") ||
308 Name.starts_with("pmulhu.w.") ||
309 Name.starts_with("pmull.") ||
310 Name.starts_with("pmultishift.qb.") ||
311 Name.starts_with("pmulu.dq.") ||
312 Name.starts_with("por.") ||
313 Name.starts_with("prol.") ||
314 Name.starts_with("prolv.") ||
315 Name.starts_with("pror.") ||
316 Name.starts_with("prorv.") ||
317 Name.starts_with("pshuf.b.") ||
318 Name.starts_with("pshuf.d.") ||
319 Name.starts_with("pshufh.w.") ||
320 Name.starts_with("pshufl.w.") ||
321 Name.starts_with("psll.d") ||
322 Name.starts_with("psll.q") ||
323 Name.starts_with("psll.w") ||
324 Name.starts_with("pslli") ||
325 Name.starts_with("psllv") ||
326 Name.starts_with("psra.d") ||
327 Name.starts_with("psra.q") ||
328 Name.starts_with("psra.w") ||
329 Name.starts_with("psrai") ||
330 Name.starts_with("psrav") ||
331 Name.starts_with("psrl.d") ||
332 Name.starts_with("psrl.q") ||
333 Name.starts_with("psrl.w") ||
334 Name.starts_with("psrli") ||
335 Name.starts_with("psrlv") ||
336 Name.starts_with("psub.") ||
337 Name.starts_with("psubs.") ||
338 Name.starts_with("psubus.") ||
339 Name.starts_with("pternlog.") ||
340 Name.starts_with("punpckh") ||
341 Name.starts_with("punpckl") ||
342 Name.starts_with("pxor.") ||
343 Name.starts_with("shuf.f") ||
344 Name.starts_with("shuf.i") ||
345 Name.starts_with("shuf.p") ||
346 Name.starts_with("sqrt.p") ||
347 Name.starts_with("store.b.") ||
348 Name.starts_with("store.d.") ||
349 Name.starts_with("store.p") ||
350 Name.starts_with("store.q.") ||
351 Name.starts_with("store.w.") ||
352 Name == "store.ss" ||
353 Name.starts_with("storeu.") ||
354 Name.starts_with("sub.p") ||
355 Name.starts_with("ucmp.") ||
356 Name.starts_with("unpckh.") ||
357 Name.starts_with("unpckl.") ||
358 Name.starts_with("valign.") ||
359 Name == "vcvtph2ps.128" ||
360 Name == "vcvtph2ps.256" ||
361 Name.starts_with("vextract") ||
362 Name.starts_with("vfmadd.") ||
363 Name.starts_with("vfmaddsub.") ||
364 Name.starts_with("vfnmadd.") ||
365 Name.starts_with("vfnmsub.") ||
366 Name.starts_with("vpdpbusd.") ||
367 Name.starts_with("vpdpbusds.") ||
368 Name.starts_with("vpdpwssd.") ||
369 Name.starts_with("vpdpwssds.") ||
370 Name.starts_with("vpermi2var.") ||
371 Name.starts_with("vpermil.p") ||
372 Name.starts_with("vpermilvar.") ||
373 Name.starts_with("vpermt2var.") ||
374 Name.starts_with("vpmadd52") ||
375 Name.starts_with("vpshld.") ||
376 Name.starts_with("vpshldv.") ||
377 Name.starts_with("vpshrd.") ||
378 Name.starts_with("vpshrdv.") ||
379 Name.starts_with("vpshufbitqmb.") ||
380 Name.starts_with("xor."));
381
382 if (Name.consume_front("mask3."))
383
384 return (Name.starts_with("vfmadd.") ||
385 Name.starts_with("vfmaddsub.") ||
386 Name.starts_with("vfmsub.") ||
387 Name.starts_with("vfmsubadd.") ||
388 Name.starts_with("vfnmsub."));
389
390 if (Name.consume_front("maskz."))
391
392 return (Name.starts_with("pternlog.") ||
393 Name.starts_with("vfmadd.") ||
394 Name.starts_with("vfmaddsub.") ||
395 Name.starts_with("vpdpbusd.") ||
396 Name.starts_with("vpdpbusds.") ||
397 Name.starts_with("vpdpwssd.") ||
398 Name.starts_with("vpdpwssds.") ||
399 Name.starts_with("vpermt2var.") ||
400 Name.starts_with("vpmadd52") ||
401 Name.starts_with("vpshldv.") ||
402 Name.starts_with("vpshrdv."));
403
404
405 return (Name == "movntdqa" ||
406 Name == "pmul.dq.512" ||
407 Name == "pmulu.dq.512" ||
408 Name.starts_with("broadcastm") ||
409 Name.starts_with("cmp.p") ||
410 Name.starts_with("cvtb2mask.") ||
411 Name.starts_with("cvtd2mask.") ||
412 Name.starts_with("cvtmask2") ||
413 Name.starts_with("cvtq2mask.") ||
414 Name == "cvtusi2sd" ||
415 Name.starts_with("cvtw2mask.") ||
416 Name == "kand.w" ||
417 Name == "kandn.w" ||
418 Name == "knot.w" ||
419 Name == "kor.w" ||
420 Name == "kortestc.w" ||
421 Name == "kortestz.w" ||
422 Name.starts_with("kunpck") ||
423 Name == "kxnor.w" ||
424 Name == "kxor.w" ||
425 Name.starts_with("padds.") ||
426 Name.starts_with("pbroadcast") ||
427 Name.starts_with("prol") ||
428 Name.starts_with("pror") ||
429 Name.starts_with("psll.dq") ||
430 Name.starts_with("psrl.dq") ||
431 Name.starts_with("psubs.") ||
432 Name.starts_with("ptestm") ||
433 Name.starts_with("ptestnm") ||
434 Name.starts_with("storent.") ||
435 Name.starts_with("vbroadcast.s") ||
436 Name.starts_with("vpshld.") ||
437 Name.starts_with("vpshrd."));
438 }
439
440 if (Name.consume_front("fma."))
441 return (Name.starts_with("vfmadd.") ||
442 Name.starts_with("vfmsub.") ||
443 Name.starts_with("vfmsubadd.") ||
444 Name.starts_with("vfnmadd.") ||
445 Name.starts_with("vfnmsub."));
446
447 if (Name.consume_front("fma4."))
448 return Name.starts_with("vfmadd.s");
449
450 if (Name.consume_front("sse."))
451 return (Name == "add.ss" ||
452 Name == "cvtsi2ss" ||
453 Name == "cvtsi642ss" ||
454 Name == "div.ss" ||
455 Name == "mul.ss" ||
456 Name.starts_with("sqrt.p") ||
457 Name == "sqrt.ss" ||
458 Name.starts_with("storeu.") ||
459 Name == "sub.ss");
460
461 if (Name.consume_front("sse2."))
462 return (Name == "add.sd" ||
463 Name == "cvtdq2pd" ||
464 Name == "cvtdq2ps" ||
465 Name == "cvtps2pd" ||
466 Name == "cvtsi2sd" ||
467 Name == "cvtsi642sd" ||
468 Name == "cvtss2sd" ||
469 Name == "div.sd" ||
470 Name == "mul.sd" ||
471 Name.starts_with("padds.") ||
472 Name.starts_with("paddus.") ||
473 Name.starts_with("pcmpeq.") ||
474 Name.starts_with("pcmpgt.") ||
475 Name == "pmaxs.w" ||
476 Name == "pmaxu.b" ||
477 Name == "pmins.w" ||
478 Name == "pminu.b" ||
479 Name == "pmulu.dq" ||
480 Name.starts_with("pshuf") ||
481 Name.starts_with("psll.dq") ||
482 Name.starts_with("psrl.dq") ||
483 Name.starts_with("psubs.") ||
484 Name.starts_with("psubus.") ||
485 Name.starts_with("sqrt.p") ||
486 Name == "sqrt.sd" ||
487 Name == "storel.dq" ||
488 Name.starts_with("storeu.") ||
489 Name == "sub.sd");
490
491 if (Name.consume_front("sse41."))
492 return (Name.starts_with("blendp") ||
493 Name == "movntdqa" ||
494 Name == "pblendw" ||
495 Name == "pmaxsb" ||
496 Name == "pmaxsd" ||
497 Name == "pmaxud" ||
498 Name == "pmaxuw" ||
499 Name == "pminsb" ||
500 Name == "pminsd" ||
501 Name == "pminud" ||
502 Name == "pminuw" ||
503 Name.starts_with("pmovsx") ||
504 Name.starts_with("pmovzx") ||
505 Name == "pmuldq");
506
507 if (Name.consume_front("sse42."))
508 return Name == "crc32.64.8";
509
510 if (Name.consume_front("sse4a."))
511 return Name.starts_with("movnt.");
512
513 if (Name.consume_front("ssse3."))
514 return (Name == "pabs.b.128" ||
515 Name == "pabs.d.128" ||
516 Name == "pabs.w.128");
517
518 if (Name.consume_front("xop."))
519 return (Name == "vpcmov" ||
520 Name == "vpcmov.256" ||
521 Name.starts_with("vpcom") ||
522 Name.starts_with("vprot"));
523
524 return (Name == "addcarry.u32" ||
525 Name == "addcarry.u64" ||
526 Name == "addcarryx.u32" ||
527 Name == "addcarryx.u64" ||
528 Name == "subborrow.u32" ||
529 Name == "subborrow.u64" ||
530 Name.starts_with("vcvtph2ps."));
531}
532
535
536 if (!Name.consume_front("x86."))
537 return false;
538
540 NewFn = nullptr;
541 return true;
542 }
543
544 if (Name == "rdtscp") {
545
546 if (F->getFunctionType()->getNumParams() == 0)
547 return false;
548
551 Intrinsic::x86_rdtscp);
552 return true;
553 }
554
556
557
558 if (Name.consume_front("sse41.ptest")) {
560 .Case("c", Intrinsic::x86_sse41_ptestc)
561 .Case("z", Intrinsic::x86_sse41_ptestz)
562 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
566
567 return false;
568 }
569
570
571
572
573
575 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
576 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
577 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
578 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
579 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
580 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
584
585 if (Name.consume_front("avx512.")) {
586 if (Name.consume_front("mask.cmp.")) {
587
589 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
590 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
591 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
592 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
593 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
594 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
598 } else if (Name.starts_with("vpdpbusd.") ||
599 Name.starts_with("vpdpbusds.")) {
600
602 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
603 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
604 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
605 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
606 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
607 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
611 } else if (Name.starts_with("vpdpwssd.") ||
612 Name.starts_with("vpdpwssds.")) {
613
615 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
616 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
617 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
618 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
619 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
620 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
624 }
625 return false;
626 }
627
628 if (Name.consume_front("avx2.")) {
629 if (Name.consume_front("vpdpb")) {
630
632 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
633 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
634 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
635 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
636 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
637 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
638 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
639 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
640 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
641 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
642 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
643 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
647 } else if (Name.consume_front("vpdpw")) {
648
650 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
651 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
652 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
653 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
654 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
655 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
656 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
657 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
658 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
659 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
660 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
661 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
665 }
666 return false;
667 }
668
669 if (Name.consume_front("avx10.")) {
670 if (Name.consume_front("vpdpb")) {
671
673 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
674 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
675 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
676 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
677 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
678 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
682 } else if (Name.consume_front("vpdpw")) {
684 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
685 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
686 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
687 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
688 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
689 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
693 }
694 return false;
695 }
696
697 if (Name.consume_front("avx512bf16.")) {
698
700 .Case("cvtne2ps2bf16.128",
701 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
702 .Case("cvtne2ps2bf16.256",
703 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
704 .Case("cvtne2ps2bf16.512",
705 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
706 .Case("mask.cvtneps2bf16.128",
707 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
708 .Case("cvtneps2bf16.256",
709 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
710 .Case("cvtneps2bf16.512",
711 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
715
716
718 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
719 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
720 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
724 return false;
725 }
726
727 if (Name.consume_front("xop.")) {
729 if (Name.starts_with("vpermil2")) {
730
731
732 auto Idx = F->getFunctionType()->getParamType(2);
733 if (Idx->isFPOrFPVectorTy()) {
734 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
735 unsigned EltSize = Idx->getScalarSizeInBits();
736 if (EltSize == 64 && IdxSize == 128)
737 ID = Intrinsic::x86_xop_vpermil2pd;
738 else if (EltSize == 32 && IdxSize == 128)
739 ID = Intrinsic::x86_xop_vpermil2ps;
740 else if (EltSize == 64 && IdxSize == 256)
741 ID = Intrinsic::x86_xop_vpermil2pd_256;
742 else
743 ID = Intrinsic::x86_xop_vpermil2ps_256;
744 }
745 } else if (F->arg_size() == 2)
746
748 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
749 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
751
755 return true;
756 }
757 return false;
758 }
759
760 if (Name == "seh.recoverfp") {
762 Intrinsic::eh_recoverfp);
763 return true;
764 }
765
766 return false;
767}
768
769
770
774 if (Name.starts_with("rbit")) {
775
777 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
778 return true;
779 }
780
781 if (Name == "thread.pointer") {
782
784 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
785 return true;
786 }
787
788 bool Neon = Name.consume_front("neon.");
789 if (Neon) {
790
791
792
793 if (Name.consume_front("bfdot.")) {
794
797 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
802 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
803 assert((OperandWidth == 64 || OperandWidth == 128) &&
804 "Unexpected operand width");
805 LLVMContext &Ctx = F->getParent()->getContext();
806 std::array<Type *, 2> Tys{
807 {F->getReturnType(),
810 return true;
811 }
812 return false;
813 }
814
815
816
817 if (Name.consume_front("bfm")) {
818
819 if (Name.consume_back(".v4f32.v16i8")) {
820
824 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
826 .Case("lalb",
827 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
828 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
829 .Case("lalt",
830 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
831 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
835 return true;
836 }
837 return false;
838 }
839 return false;
840 }
841
842 }
843
844
845 if (IsArm) {
846
847 if (Neon) {
848
850 .StartsWith("vclz.", Intrinsic::ctlz)
851 .StartsWith("vcnt.", Intrinsic::ctpop)
852 .StartsWith("vqadds.", Intrinsic::sadd_sat)
853 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
854 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
855 .StartsWith("vqsubu.", Intrinsic::usub_sat)
856 .StartsWith("vrinta.", Intrinsic::round)
857 .StartsWith("vrintn.", Intrinsic::roundeven)
858 .StartsWith("vrintm.", Intrinsic::floor)
859 .StartsWith("vrintp.", Intrinsic::ceil)
860 .StartsWith("vrintx.", Intrinsic::rint)
861 .StartsWith("vrintz.", Intrinsic::trunc)
865 F->arg_begin()->getType());
866 return true;
867 }
868
869 if (Name.consume_front("vst")) {
870
871 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
875 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
876 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
877
879 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
880 Intrinsic::arm_neon_vst4lane};
881
882 auto fArgs = F->getFunctionType()->params();
883 Type *Tys[] = {fArgs[0], fArgs[1]};
886 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
887 else
889 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
890 return true;
891 }
892 return false;
893 }
894
895 return false;
896 }
897
898 if (Name.consume_front("mve.")) {
899
900 if (Name == "vctp64") {
902
903
905 return true;
906 }
907 return false;
908 }
909
910 if (Name.starts_with("vrintn.v")) {
912 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
913 return true;
914 }
915
916
917 if (Name.consume_back(".v4i1")) {
918
919 if (Name.consume_back(".predicated.v2i64.v4i32"))
920
921 return Name == "mull.int" || Name == "vqdmull";
922
923 if (Name.consume_back(".v2i64")) {
924
925 bool IsGather = Name.consume_front("vldr.gather.");
926 if (IsGather || Name.consume_front("vstr.scatter.")) {
927 if (Name.consume_front("base.")) {
928
929 Name.consume_front("wb.");
930
931
932 return Name == "predicated.v2i64";
933 }
934
935 if (Name.consume_front("offset.predicated."))
936 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
937 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
938
939
940 return false;
941 }
942
943 return false;
944 }
945 return false;
946 }
947 return false;
948 }
949
950 if (Name.consume_front("cde.vcx")) {
951
952 if (Name.consume_back(".predicated.v2i64.v4i1"))
953
954 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
955 Name == "3q" || Name == "3qa";
956
957 return false;
958 }
959 } else {
960
961 if (Neon) {
962
964 .StartsWith("frintn", Intrinsic::roundeven)
965 .StartsWith("rbit", Intrinsic::bitreverse)
969 F->arg_begin()->getType());
970 return true;
971 }
972
973 if (Name.starts_with("addp")) {
974
975 if (F->arg_size() != 2)
976 return false;
978 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
980 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
981 return true;
982 }
983 }
984
985
986 if (Name.starts_with("bfcvt")) {
987 NewFn = nullptr;
988 return true;
989 }
990
991 return false;
992 }
993 if (Name.consume_front("sve.")) {
994
995 if (Name.consume_front("bf")) {
996 if (Name.consume_back(".lane")) {
997
1000 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1001 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1002 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1006 return true;
1007 }
1008 return false;
1009 }
1010 return false;
1011 }
1012
1013
1014 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1015 NewFn = nullptr;
1016 return true;
1017 }
1018
1019 if (Name.consume_front("addqv")) {
1020
1021 if (->getReturnType()->isFPOrFPVectorTy())
1022 return false;
1023
1024 auto Args = F->getFunctionType()->params();
1025 Type *Tys[] = {F->getReturnType(), Args[1]};
1027 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1028 return true;
1029 }
1030
1031 if (Name.consume_front("ld")) {
1032
1033 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1034 if (LdRegex.match(Name)) {
1035 Type *ScalarTy =
1041 Intrinsic::aarch64_sve_ld2_sret,
1042 Intrinsic::aarch64_sve_ld3_sret,
1043 Intrinsic::aarch64_sve_ld4_sret,
1044 };
1046 LoadIDs[Name[0] - '2'], Ty);
1047 return true;
1048 }
1049 return false;
1050 }
1051
1052 if (Name.consume_front("tuple.")) {
1053
1054 if (Name.starts_with("get")) {
1055
1056 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1058 F->getParent(), Intrinsic::vector_extract, Tys);
1059 return true;
1060 }
1061
1062 if (Name.starts_with("set")) {
1063
1064 auto Args = F->getFunctionType()->params();
1065 Type *Tys[] = {Args[0], Args[2], Args[1]};
1067 F->getParent(), Intrinsic::vector_insert, Tys);
1068 return true;
1069 }
1070
1071 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1072 if (CreateTupleRegex.match(Name)) {
1073
1074 auto Args = F->getFunctionType()->params();
1075 Type *Tys[] = {F->getReturnType(), Args[1]};
1077 F->getParent(), Intrinsic::vector_insert, Tys);
1078 return true;
1079 }
1080 return false;
1081 }
1082
1083 if (Name.starts_with("rev.nxv")) {
1084
1086 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1087 return true;
1088 }
1089
1090 return false;
1091 }
1092 }
1093 return false;
1094}
1095
1098 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1101 .Case("im2col.3d",
1102 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1103 .Case("im2col.4d",
1104 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1105 .Case("im2col.5d",
1106 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1107 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1108 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1109 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1110 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1111 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1113
1115 return ID;
1116
1117
1118
1119
1120 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1122 return ID;
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1135 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1137 return ID;
1138 }
1139
1141}
1142
1145 if (Name.consume_front("mapa.shared.cluster"))
1146 if (F->getReturnType()->getPointerAddressSpace() ==
1148 return Intrinsic::nvvm_mapa_shared_cluster;
1149
1150 if (Name.consume_front("cp.async.bulk.")) {
1153 .Case("global.to.shared.cluster",
1154 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1155 .Case("shared.cta.to.cluster",
1156 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1158
1160 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1162 return ID;
1163 }
1164
1166}
1167
1169 if (Name.consume_front("fma.rn."))
1171 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1172 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1173 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
1174 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
1175 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
1176 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
1177 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
1178 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
1179 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1180 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1181 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
1182 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
1184
1185 if (Name.consume_front("fmax."))
1187 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1188 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1189 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1190 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1191 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1192 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1193 .Case("ftz.nan.xorsign.abs.bf16",
1194 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1195 .Case("ftz.nan.xorsign.abs.bf16x2",
1196 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1197 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1198 .Case("ftz.xorsign.abs.bf16x2",
1199 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1200 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1201 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1202 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1203 .Case("nan.xorsign.abs.bf16x2",
1204 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1205 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1206 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1208
1209 if (Name.consume_front("fmin."))
1211 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1212 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1213 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1214 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1215 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1216 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1217 .Case("ftz.nan.xorsign.abs.bf16",
1218 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1219 .Case("ftz.nan.xorsign.abs.bf16x2",
1220 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1221 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1222 .Case("ftz.xorsign.abs.bf16x2",
1223 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1224 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1225 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1226 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1227 .Case("nan.xorsign.abs.bf16x2",
1228 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1229 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1230 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1232
1233 if (Name.consume_front("neg."))
1235 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1236 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1238
1240}
1241
1243 return Name.consume_front("local") || Name.consume_front("shared") ||
1244 Name.consume_front("global") || Name.consume_front("constant") ||
1245 Name.consume_front("param");
1246}
1247
1249 bool CanUpgradeDebugIntrinsicsToRecords) {
1250 assert(F && "Illegal to upgrade a non-existent Function.");
1251
1253
1254
1255 if (!Name.consume_front("llvm.") || Name.empty())
1256 return false;
1257
1258 switch (Name[0]) {
1259 default: break;
1260 case 'a': {
1261 bool IsArm = Name.consume_front("arm.");
1262 if (IsArm || Name.consume_front("aarch64.")) {
1264 return true;
1265 break;
1266 }
1267
1268 if (Name.consume_front("amdgcn.")) {
1269 if (Name == "alignbit") {
1270
1272 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1273 return true;
1274 }
1275
1276 if (Name.consume_front("atomic.")) {
1277 if (Name.starts_with("inc") || Name.starts_with("dec") ||
1278 Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1279
1280
1281 NewFn = nullptr;
1282 return true;
1283 }
1284 break;
1285 }
1286
1287 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1288 Name.consume_front("flat.atomic.")) {
1289 if (Name.starts_with("fadd") ||
1290
1291 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1292 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1293
1294
1295 NewFn = nullptr;
1296 return true;
1297 }
1298 }
1299
1300 if (Name.starts_with("ldexp.")) {
1301
1303 F->getParent(), Intrinsic::ldexp,
1304 {F->getReturnType(), F->getArg(1)->getType()});
1305 return true;
1306 }
1307 break;
1308 }
1309
1310 break;
1311 }
1312 case 'c': {
1313 if (F->arg_size() == 1) {
1315 .StartsWith("ctlz.", Intrinsic::ctlz)
1316 .StartsWith("cttz.", Intrinsic::cttz)
1321 F->arg_begin()->getType());
1322 return true;
1323 }
1324 }
1325
1326 if (F->arg_size() == 2 && Name == "coro.end") {
1329 Intrinsic::coro_end);
1330 return true;
1331 }
1332
1333 break;
1334 }
1335 case 'd':
1336 if (Name.consume_front("dbg.")) {
1337
1338 if (CanUpgradeDebugIntrinsicsToRecords) {
1339 if (Name == "addr" || Name == "value" || Name == "assign" ||
1340 Name == "declare" || Name == "label") {
1341
1342 NewFn = nullptr;
1343
1344 return true;
1345 }
1346 }
1347
1348
1349 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1352 Intrinsic::dbg_value);
1353 return true;
1354 }
1355 break;
1356 }
1357 break;
1358 case 'e':
1359 if (Name.consume_front("experimental.vector.")) {
1362
1363
1365 .StartsWith("extract.", Intrinsic::vector_extract)
1366 .StartsWith("insert.", Intrinsic::vector_insert)
1367 .StartsWith("splice.", Intrinsic::vector_splice)
1368 .StartsWith("reverse.", Intrinsic::vector_reverse)
1369 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1370 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1372 Intrinsic::vector_partial_reduce_add)
1375 const auto *FT = F->getFunctionType();
1377 if (ID == Intrinsic::vector_extract ||
1378 ID == Intrinsic::vector_interleave2)
1379
1380 Tys.push_back(FT->getReturnType());
1381 if (ID != Intrinsic::vector_interleave2)
1382 Tys.push_back(FT->getParamType(0));
1383 if (ID == Intrinsic::vector_insert ||
1384 ID == Intrinsic::vector_partial_reduce_add)
1385
1386 Tys.push_back(FT->getParamType(1));
1389 return true;
1390 }
1391
1392 if (Name.consume_front("reduce.")) {
1394 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1395 if (R.match(Name, &Groups))
1397 .Case("add", Intrinsic::vector_reduce_add)
1398 .Case("mul", Intrinsic::vector_reduce_mul)
1399 .Case("and", Intrinsic::vector_reduce_and)
1400 .Case("or", Intrinsic::vector_reduce_or)
1401 .Case("xor", Intrinsic::vector_reduce_xor)
1402 .Case("smax", Intrinsic::vector_reduce_smax)
1403 .Case("smin", Intrinsic::vector_reduce_smin)
1404 .Case("umax", Intrinsic::vector_reduce_umax)
1405 .Case("umin", Intrinsic::vector_reduce_umin)
1406 .Case("fmax", Intrinsic::vector_reduce_fmax)
1407 .Case("fmin", Intrinsic::vector_reduce_fmin)
1409
1410 bool V2 = false;
1412 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1414 V2 = true;
1415 if (R2.match(Name, &Groups))
1417 .Case("fadd", Intrinsic::vector_reduce_fadd)
1418 .Case("fmul", Intrinsic::vector_reduce_fmul)
1420 }
1423 auto Args = F->getFunctionType()->params();
1425 {Args[V2 ? 1 : 0]});
1426 return true;
1427 }
1428 break;
1429 }
1430 break;
1431 }
1432 if (Name.consume_front("experimental.stepvector.")) {
1436 F->getParent(), ID, F->getFunctionType()->getReturnType());
1437 return true;
1438 }
1439 break;
1440 case 'f':
1441 if (Name.starts_with("flt.rounds")) {
1444 Intrinsic::get_rounding);
1445 return true;
1446 }
1447 break;
1448 case 'i':
1449 if (Name.starts_with("invariant.group.barrier")) {
1450
1451 auto Args = F->getFunctionType()->params();
1452 Type* ObjectPtr[1] = {Args[0]};
1455 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1456 return true;
1457 }
1458 break;
1459 case 'l':
1460 if ((Name.starts_with("lifetime.start") ||
1461 Name.starts_with("lifetime.end")) &&
1462 F->arg_size() == 2) {
1463 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1464 ? Intrinsic::lifetime_start
1465 : Intrinsic::lifetime_end;
1468 F->getArg(0)->getType());
1469 return true;
1470 }
1471 break;
1472 case 'm': {
1473
1474
1475
1477 .StartsWith("memcpy.", Intrinsic::memcpy)
1478 .StartsWith("memmove.", Intrinsic::memmove)
1480 if (F->arg_size() == 5) {
1482
1484 F->getFunctionType()->params().slice(0, 3);
1485 NewFn =
1487 return true;
1488 }
1489 }
1490 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1492
1493 const auto *FT = F->getFunctionType();
1494 Type *ParamTypes[2] = {
1495 FT->getParamType(0),
1496 FT->getParamType(2)
1497 };
1499 Intrinsic::memset, ParamTypes);
1500 return true;
1501 }
1502
1503 unsigned MaskedID =
1505 .StartsWith("masked.load", Intrinsic::masked_load)
1506 .StartsWith("masked.gather", Intrinsic::masked_gather)
1507 .StartsWith("masked.store", Intrinsic::masked_store)
1508 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1510 if (MaskedID && F->arg_size() == 4) {
1512 if (MaskedID == Intrinsic::masked_load ||
1513 MaskedID == Intrinsic::masked_gather) {
1515 F->getParent(), MaskedID,
1516 {F->getReturnType(), F->getArg(0)->getType()});
1517 return true;
1518 }
1520 F->getParent(), MaskedID,
1521 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1522 return true;
1523 }
1524 break;
1525 }
1526 case 'n': {
1527 if (Name.consume_front("nvvm.")) {
1528
1529 if (F->arg_size() == 1) {
1532 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1533 .Case("clz.i", Intrinsic::ctlz)
1534 .Case("popc.i", Intrinsic::ctpop)
1538 {F->getReturnType()});
1539 return true;
1540 }
1541 }
1542
1543
1544 if (->getReturnType()->getScalarType()->isBFloatTy()) {
1547 NewFn = nullptr;
1548 return true;
1549 }
1550 }
1551
1552
1557 return true;
1558 }
1559
1560
1565 return true;
1566 }
1567
1568
1569
1570
1571
1572 bool Expand = false;
1573 if (Name.consume_front("abs."))
1574
1575 Expand =
1576 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1577 else if (Name.consume_front("fabs."))
1578
1579 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1580 else if (Name.consume_front("ex2.approx."))
1581
1582 Expand =
1583 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1584 else if (Name.consume_front("max.") || Name.consume_front("min."))
1585
1586 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1587 Name == "ui" || Name == "ull";
1588 else if (Name.consume_front("atomic.load."))
1589
1590
1597 else if (Name.consume_front("bitcast."))
1598
1599 Expand =
1600 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1601 else if (Name.consume_front("rotate."))
1602
1603 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1604 else if (Name.consume_front("ptr.gen.to."))
1605
1607 else if (Name.consume_front("ptr."))
1608
1610 else if (Name.consume_front("ldg.global."))
1611
1612 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1613 Name.starts_with("p."));
1614 else
1616 .Case("barrier0", true)
1617 .Case("barrier.n", true)
1618 .Case("barrier.sync.cnt", true)
1619 .Case("barrier.sync", true)
1620 .Case("barrier", true)
1621 .Case("bar.sync", true)
1622 .Case("clz.ll", true)
1623 .Case("popc.ll", true)
1624 .Case("h2f", true)
1625 .Case("swap.lo.hi.b64", true)
1626 .Case("tanh.approx.f32", true)
1628
1629 if (Expand) {
1630 NewFn = nullptr;
1631 return true;
1632 }
1633 break;
1634 }
1635 break;
1636 }
1637 case 'o':
1638 if (Name.starts_with("objectsize.")) {
1639 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1640 if (F->arg_size() == 2 || F->arg_size() == 3) {
1643 Intrinsic::objectsize, Tys);
1644 return true;
1645 }
1646 }
1647 break;
1648
1649 case 'p':
1650 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1653 F->getParent(), Intrinsic::ptr_annotation,
1654 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1655 return true;
1656 }
1657 break;
1658
1659 case 'r': {
1660 if (Name.consume_front("riscv.")) {
1663 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1664 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1665 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1666 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1669 if (->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1672 return true;
1673 }
1674 break;
1675 }
1676
1678 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1679 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1682 if (->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1683 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1686 return true;
1687 }
1688 break;
1689 }
1690
1692 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1693 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1694 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1695 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1696 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1697 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1700 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1703 return true;
1704 }
1705 break;
1706 }
1707 break;
1708 }
1709 } break;
1710
1711 case 's':
1712 if (Name == "stackprotectorcheck") {
1713 NewFn = nullptr;
1714 return true;
1715 }
1716 break;
1717
1718 case 't':
1719 if (Name == "thread.pointer") {
1721 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1722 return true;
1723 }
1724 break;
1725
1726 case 'v': {
1727 if (Name == "var.annotation" && F->arg_size() == 4) {
1730 F->getParent(), Intrinsic::var_annotation,
1731 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1732 return true;
1733 }
1734 break;
1735 }
1736
1737 case 'w':
1738 if (Name.consume_front("wasm.")) {
1741 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1742 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1743 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1748 F->getReturnType());
1749 return true;
1750 }
1751
1752 if (Name.consume_front("dot.i8x16.i7x16.")) {
1754 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1755 .Case("add.signed",
1756 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1761 return true;
1762 }
1763 break;
1764 }
1765 break;
1766 }
1767 break;
1768
1769 case 'x':
1771 return true;
1772 }
1773
1775 if (ST && (->isLiteral() || ST->isPacked()) &&
1777
1778
1779
1780
1784 auto *FT = F->getFunctionType();
1786 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1787 std::string Name = F->getName().str();
1789 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1790 Name, F->getParent());
1791
1792
1795 return true;
1796 }
1797 }
1798
1799
1801 if (Result != std::nullopt) {
1803 return true;
1804 }
1805
1806
1807
1808
1809
1810 return false;
1811}
1812
1814 bool CanUpgradeDebugIntrinsicsToRecords) {
1815 NewFn = nullptr;
1816 bool Upgraded =
1818
1819
1820 if (NewFn)
1821 F = NewFn;
1823
1826 F->setAttributes(
1828 }
1829 return Upgraded;
1830}
1831
1833 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1834 GV->getName() == "llvm.global_dtors")) ||
1836 return nullptr;
1838 if (!ATy)
1839 return nullptr;
1842 return nullptr;
1843
1849 unsigned N = Init->getNumOperands();
1850 std::vector<Constant *> NewCtors(N);
1851 for (unsigned i = 0; i != N; ++i) {
1854 Ctor->getAggregateElement(1),
1856 }
1858
1860 NewInit, GV->getName());
1861}
1862
1863
1864
1866 unsigned Shift) {
1868 unsigned NumElts = ResultTy->getNumElements() * 8;
1869
1870
1872 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1873
1874
1876
1877
1878
1879 if (Shift < 16) {
1880 int Idxs[64];
1881
1882 for (unsigned l = 0; l != NumElts; l += 16)
1883 for (unsigned i = 0; i != 16; ++i) {
1884 unsigned Idx = NumElts + i - Shift;
1885 if (Idx < NumElts)
1886 Idx -= NumElts - 16;
1887 Idxs[l + i] = Idx + l;
1888 }
1889
1890 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1891 }
1892
1893
1894 return Builder.CreateBitCast(Res, ResultTy, "cast");
1895}
1896
1897
1898
1900 unsigned Shift) {
1902 unsigned NumElts = ResultTy->getNumElements() * 8;
1903
1904
1906 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1907
1908
1910
1911
1912
1913 if (Shift < 16) {
1914 int Idxs[64];
1915
1916 for (unsigned l = 0; l != NumElts; l += 16)
1917 for (unsigned i = 0; i != 16; ++i) {
1918 unsigned Idx = i + Shift;
1919 if (Idx >= 16)
1920 Idx += NumElts - 16;
1921 Idxs[l + i] = Idx + l;
1922 }
1923
1924 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1925 }
1926
1927
1928 return Builder.CreateBitCast(Res, ResultTy, "cast");
1929}
1930
1932 unsigned NumElts) {
1935 Builder.getInt1Ty(), cast(Mask->getType())->getBitWidth());
1936 Mask = Builder.CreateBitCast(Mask, MaskTy);
1937
1938
1939
1940 if (NumElts <= 4) {
1941 int Indices[4];
1942 for (unsigned i = 0; i != NumElts; ++i)
1943 Indices[i] = i;
1944 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1945 "extract");
1946 }
1947
1948 return Mask;
1949}
1950
1953
1955 if (C->isAllOnesValue())
1956 return Op0;
1957
1960 return Builder.CreateSelect(Mask, Op0, Op1);
1961}
1962
1965
1967 if (C->isAllOnesValue())
1968 return Op0;
1969
1971 Mask->getType()->getIntegerBitWidth());
1972 Mask = Builder.CreateBitCast(Mask, MaskTy);
1973 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1974 return Builder.CreateSelect(Mask, Op0, Op1);
1975}
1976
1977
1978
1979
1983 bool IsVALIGN) {
1985
1987 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1988 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1990
1991
1992 if (IsVALIGN)
1993 ShiftVal &= (NumElts - 1);
1994
1995
1996
1997 if (ShiftVal >= 32)
1999
2000
2001
2002 if (ShiftVal > 16) {
2003 ShiftVal -= 16;
2004 Op1 = Op0;
2006 }
2007
2008 int Indices[64];
2009
2010 for (unsigned l = 0; l < NumElts; l += 16) {
2011 for (unsigned i = 0; i != 16; ++i) {
2012 unsigned Idx = ShiftVal + i;
2013 if (!IsVALIGN && Idx >= 16)
2014 Idx += NumElts - 16;
2015 Indices[l + i] = Idx + l;
2016 }
2017 }
2018
2019 Value *Align = Builder.CreateShuffleVector(
2020 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
2021
2023}
2024
2026 bool ZeroMask, bool IndexForm) {
2029 unsigned EltWidth = Ty->getScalarSizeInBits();
2030 bool IsFloat = Ty->isFPOrFPVectorTy();
2032 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2033 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2034 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2035 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2036 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2037 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2038 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2039 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2040 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2041 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2042 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2043 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2044 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2045 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2046 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2047 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2048 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2049 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2050 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2051 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2052 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2053 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2054 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2055 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2056 else if (VecWidth == 128 && EltWidth == 16)
2057 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2058 else if (VecWidth == 256 && EltWidth == 16)
2059 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2060 else if (VecWidth == 512 && EltWidth == 16)
2061 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2062 else if (VecWidth == 128 && EltWidth == 8)
2063 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2064 else if (VecWidth == 256 && EltWidth == 8)
2065 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2066 else if (VecWidth == 512 && EltWidth == 8)
2067 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2068 else
2070
2073
2074
2075 if (!IndexForm)
2077
2078 Value *V = Builder.CreateIntrinsic(IID, Args);
2081 Ty);
2083}
2084
2090 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2091
2092 if (CI.arg_size() == 4) {
2095 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2096 }
2097 return Res;
2098}
2099
2101 bool IsRotateRight) {
2105
2106
2107
2108
2109 if (Amt->getType() != Ty) {
2111 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2112 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2113 }
2114
2115 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2116 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2117
2118 if (CI.arg_size() == 4) {
2121 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2122 }
2123 return Res;
2124}
2125
2127 bool IsSigned) {
2131
2133 switch (Imm) {
2134 case 0x0:
2136 break;
2137 case 0x1:
2139 break;
2140 case 0x2:
2142 break;
2143 case 0x3:
2145 break;
2146 case 0x4:
2148 break;
2149 case 0x5:
2151 break;
2152 case 0x6:
2154 case 0x7:
2156 default:
2158 }
2159
2160 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2161 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2162 return Ext;
2163}
2164
2166 bool IsShiftRight, bool ZeroMask) {
2171
2172 if (IsShiftRight)
2174
2175
2176
2177
2178 if (Amt->getType() != Ty) {
2180 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2181 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2182 }
2183
2184 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2185 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2186
2187 unsigned NumArgs = CI.arg_size();
2188 if (NumArgs >= 4) {
2193 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2194 }
2195 return Res;
2196}
2197
2200 const Align Alignment =
2202 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2204
2205
2207 if (C->isAllOnesValue())
2208 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2209
2210
2213 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2214}
2215
2219 const Align Alignment =
2223 8)
2225
2226
2228 if (C->isAllOnesValue())
2229 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2230
2231
2234 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2235}
2236
2240 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2241 {Op0, Builder.getInt1(false)});
2244 return Res;
2245}
2246
2249
2250
2253
2254 if (IsSigned) {
2255
2256 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2257 LHS = Builder.CreateShl(LHS, ShiftAmt);
2258 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2259 RHS = Builder.CreateShl(RHS, ShiftAmt);
2260 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2261 } else {
2262
2263 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2264 LHS = Builder.CreateAnd(LHS, Mask);
2265 RHS = Builder.CreateAnd(RHS, Mask);
2266 }
2267
2268 Value *Res = Builder.CreateMul(LHS, RHS);
2269
2272
2273 return Res;
2274}
2275
2276
2280 if (Mask) {
2282 if ( ||
->isAllOnesValue())
2283 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2284 }
2285
2286 if (NumElts < 8) {
2287 int Indices[8];
2288 for (unsigned i = 0; i != NumElts; ++i)
2289 Indices[i] = i;
2290 for (unsigned i = NumElts; i != 8; ++i)
2291 Indices[i] = NumElts + i % NumElts;
2292 Vec = Builder.CreateShuffleVector(Vec,
2294 Indices);
2295 }
2296 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2297}
2298
2300 unsigned CC, bool Signed) {
2303
2305 if (CC == 3) {
2308 } else if (CC == 7) {
2311 } else {
2313 switch (CC) {
2321 }
2322 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2323 }
2324
2326
2328}
2329
2330
2337
2343
2344 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2345 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2346 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2347 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2348 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2349 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2350}
2351
2357 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2358}
2359
2360
2363 Name = Name.substr(12);
2364
2368 if (Name.starts_with("max.p")) {
2369 if (VecWidth == 128 && EltWidth == 32)
2370 IID = Intrinsic::x86_sse_max_ps;
2371 else if (VecWidth == 128 && EltWidth == 64)
2372 IID = Intrinsic::x86_sse2_max_pd;
2373 else if (VecWidth == 256 && EltWidth == 32)
2374 IID = Intrinsic::x86_avx_max_ps_256;
2375 else if (VecWidth == 256 && EltWidth == 64)
2376 IID = Intrinsic::x86_avx_max_pd_256;
2377 else
2379 } else if (Name.starts_with("min.p")) {
2380 if (VecWidth == 128 && EltWidth == 32)
2381 IID = Intrinsic::x86_sse_min_ps;
2382 else if (VecWidth == 128 && EltWidth == 64)
2383 IID = Intrinsic::x86_sse2_min_pd;
2384 else if (VecWidth == 256 && EltWidth == 32)
2385 IID = Intrinsic::x86_avx_min_ps_256;
2386 else if (VecWidth == 256 && EltWidth == 64)
2387 IID = Intrinsic::x86_avx_min_pd_256;
2388 else
2390 } else if (Name.starts_with("pshuf.b.")) {
2391 if (VecWidth == 128)
2392 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2393 else if (VecWidth == 256)
2394 IID = Intrinsic::x86_avx2_pshuf_b;
2395 else if (VecWidth == 512)
2396 IID = Intrinsic::x86_avx512_pshuf_b_512;
2397 else
2399 } else if (Name.starts_with("pmul.hr.sw.")) {
2400 if (VecWidth == 128)
2401 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2402 else if (VecWidth == 256)
2403 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2404 else if (VecWidth == 512)
2405 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2406 else
2408 } else if (Name.starts_with("pmulh.w.")) {
2409 if (VecWidth == 128)
2410 IID = Intrinsic::x86_sse2_pmulh_w;
2411 else if (VecWidth == 256)
2412 IID = Intrinsic::x86_avx2_pmulh_w;
2413 else if (VecWidth == 512)
2414 IID = Intrinsic::x86_avx512_pmulh_w_512;
2415 else
2417 } else if (Name.starts_with("pmulhu.w.")) {
2418 if (VecWidth == 128)
2419 IID = Intrinsic::x86_sse2_pmulhu_w;
2420 else if (VecWidth == 256)
2421 IID = Intrinsic::x86_avx2_pmulhu_w;
2422 else if (VecWidth == 512)
2423 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2424 else
2426 } else if (Name.starts_with("pmaddw.d.")) {
2427 if (VecWidth == 128)
2428 IID = Intrinsic::x86_sse2_pmadd_wd;
2429 else if (VecWidth == 256)
2430 IID = Intrinsic::x86_avx2_pmadd_wd;
2431 else if (VecWidth == 512)
2432 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2433 else
2435 } else if (Name.starts_with("pmaddubs.w.")) {
2436 if (VecWidth == 128)
2437 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2438 else if (VecWidth == 256)
2439 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2440 else if (VecWidth == 512)
2441 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2442 else
2444 } else if (Name.starts_with("packsswb.")) {
2445 if (VecWidth == 128)
2446 IID = Intrinsic::x86_sse2_packsswb_128;
2447 else if (VecWidth == 256)
2448 IID = Intrinsic::x86_avx2_packsswb;
2449 else if (VecWidth == 512)
2450 IID = Intrinsic::x86_avx512_packsswb_512;
2451 else
2453 } else if (Name.starts_with("packssdw.")) {
2454 if (VecWidth == 128)
2455 IID = Intrinsic::x86_sse2_packssdw_128;
2456 else if (VecWidth == 256)
2457 IID = Intrinsic::x86_avx2_packssdw;
2458 else if (VecWidth == 512)
2459 IID = Intrinsic::x86_avx512_packssdw_512;
2460 else
2462 } else if (Name.starts_with("packuswb.")) {
2463 if (VecWidth == 128)
2464 IID = Intrinsic::x86_sse2_packuswb_128;
2465 else if (VecWidth == 256)
2466 IID = Intrinsic::x86_avx2_packuswb;
2467 else if (VecWidth == 512)
2468 IID = Intrinsic::x86_avx512_packuswb_512;
2469 else
2471 } else if (Name.starts_with("packusdw.")) {
2472 if (VecWidth == 128)
2473 IID = Intrinsic::x86_sse41_packusdw;
2474 else if (VecWidth == 256)
2475 IID = Intrinsic::x86_avx2_packusdw;
2476 else if (VecWidth == 512)
2477 IID = Intrinsic::x86_avx512_packusdw_512;
2478 else
2480 } else if (Name.starts_with("vpermilvar.")) {
2481 if (VecWidth == 128 && EltWidth == 32)
2482 IID = Intrinsic::x86_avx_vpermilvar_ps;
2483 else if (VecWidth == 128 && EltWidth == 64)
2484 IID = Intrinsic::x86_avx_vpermilvar_pd;
2485 else if (VecWidth == 256 && EltWidth == 32)
2486 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2487 else if (VecWidth == 256 && EltWidth == 64)
2488 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2489 else if (VecWidth == 512 && EltWidth == 32)
2490 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2491 else if (VecWidth == 512 && EltWidth == 64)
2492 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2493 else
2495 } else if (Name == "cvtpd2dq.256") {
2496 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2497 } else if (Name == "cvtpd2ps.256") {
2498 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2499 } else if (Name == "cvttpd2dq.256") {
2500 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2501 } else if (Name == "cvttps2dq.128") {
2502 IID = Intrinsic::x86_sse2_cvttps2dq;
2503 } else if (Name == "cvttps2dq.256") {
2504 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2505 } else if (Name.starts_with("permvar.")) {
2507 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2508 IID = Intrinsic::x86_avx2_permps;
2509 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2510 IID = Intrinsic::x86_avx2_permd;
2511 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2512 IID = Intrinsic::x86_avx512_permvar_df_256;
2513 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2514 IID = Intrinsic::x86_avx512_permvar_di_256;
2515 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2516 IID = Intrinsic::x86_avx512_permvar_sf_512;
2517 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2518 IID = Intrinsic::x86_avx512_permvar_si_512;
2519 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2520 IID = Intrinsic::x86_avx512_permvar_df_512;
2521 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2522 IID = Intrinsic::x86_avx512_permvar_di_512;
2523 else if (VecWidth == 128 && EltWidth == 16)
2524 IID = Intrinsic::x86_avx512_permvar_hi_128;
2525 else if (VecWidth == 256 && EltWidth == 16)
2526 IID = Intrinsic::x86_avx512_permvar_hi_256;
2527 else if (VecWidth == 512 && EltWidth == 16)
2528 IID = Intrinsic::x86_avx512_permvar_hi_512;
2529 else if (VecWidth == 128 && EltWidth == 8)
2530 IID = Intrinsic::x86_avx512_permvar_qi_128;
2531 else if (VecWidth == 256 && EltWidth == 8)
2532 IID = Intrinsic::x86_avx512_permvar_qi_256;
2533 else if (VecWidth == 512 && EltWidth == 8)
2534 IID = Intrinsic::x86_avx512_permvar_qi_512;
2535 else
2537 } else if (Name.starts_with("dbpsadbw.")) {
2538 if (VecWidth == 128)
2539 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2540 else if (VecWidth == 256)
2541 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2542 else if (VecWidth == 512)
2543 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2544 else
2546 } else if (Name.starts_with("pmultishift.qb.")) {
2547 if (VecWidth == 128)
2548 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2549 else if (VecWidth == 256)
2550 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2551 else if (VecWidth == 512)
2552 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2553 else
2555 } else if (Name.starts_with("conflict.")) {
2556 if (Name[9] == 'd' && VecWidth == 128)
2557 IID = Intrinsic::x86_avx512_conflict_d_128;
2558 else if (Name[9] == 'd' && VecWidth == 256)
2559 IID = Intrinsic::x86_avx512_conflict_d_256;
2560 else if (Name[9] == 'd' && VecWidth == 512)
2561 IID = Intrinsic::x86_avx512_conflict_d_512;
2562 else if (Name[9] == 'q' && VecWidth == 128)
2563 IID = Intrinsic::x86_avx512_conflict_q_128;
2564 else if (Name[9] == 'q' && VecWidth == 256)
2565 IID = Intrinsic::x86_avx512_conflict_q_256;
2566 else if (Name[9] == 'q' && VecWidth == 512)
2567 IID = Intrinsic::x86_avx512_conflict_q_512;
2568 else
2570 } else if (Name.starts_with("pavg.")) {
2571 if (Name[5] == 'b' && VecWidth == 128)
2572 IID = Intrinsic::x86_sse2_pavg_b;
2573 else if (Name[5] == 'b' && VecWidth == 256)
2574 IID = Intrinsic::x86_avx2_pavg_b;
2575 else if (Name[5] == 'b' && VecWidth == 512)
2576 IID = Intrinsic::x86_avx512_pavg_b_512;
2577 else if (Name[5] == 'w' && VecWidth == 128)
2578 IID = Intrinsic::x86_sse2_pavg_w;
2579 else if (Name[5] == 'w' && VecWidth == 256)
2580 IID = Intrinsic::x86_avx2_pavg_w;
2581 else if (Name[5] == 'w' && VecWidth == 512)
2582 IID = Intrinsic::x86_avx512_pavg_w_512;
2583 else
2585 } else
2586 return false;
2587
2589 Args.pop_back();
2590 Args.pop_back();
2591 Rep = Builder.CreateIntrinsic(IID, Args);
2592 unsigned NumArgs = CI.arg_size();
2595 return true;
2596}
2597
2598
2599
2601 size_t Pos;
2602 if (AsmStr->find("mov\tfp") == 0 &&
2603 AsmStr->find("objc_retainAutoreleaseReturnValue") != std:🧵:npos &&
2604 (Pos = AsmStr->find("# marker")) != std:🧵:npos) {
2605 AsmStr->replace(Pos, 1, ";");
2606 }
2607}
2608
2611 Value *Rep = nullptr;
2612
2613 if (Name == "abs.i" || Name == "abs.ll") {
2615 Value *Neg = Builder.CreateNeg(Arg, "neg");
2616 Value *Cmp = Builder.CreateICmpSGE(
2618 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2619 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2620 Type *Ty = (Name == "abs.bf16")
2624 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2625 Rep = Builder.CreateBitCast(Abs, CI->getType());
2626 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2627 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2628 : Intrinsic::nvvm_fabs;
2629 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2630 } else if (Name.consume_front("ex2.approx.")) {
2631
2632 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2633 : Intrinsic::nvvm_ex2_approx;
2634 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2635 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2636 Name.starts_with("atomic.load.add.f64.p")) {
2641 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2642 Name.starts_with("atomic.load.dec.32.p")) {
2647 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2649 } else if (Name.consume_front("max.") &&
2650 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2651 Name == "ui" || Name == "ull")) {
2654 Value *Cmp = Name.starts_with("u")
2655 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2656 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2657 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2658 } else if (Name.consume_front("min.") &&
2659 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2660 Name == "ui" || Name == "ull")) {
2663 Value *Cmp = Name.starts_with("u")
2664 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2665 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2666 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2667 } else if (Name == "clz.ll") {
2668
2670 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2671 {Arg, Builder.getFalse()},
2672 nullptr, "ctlz");
2673 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2674 } else if (Name == "popc.ll") {
2675
2676
2678 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2679 Arg, nullptr, "ctpop");
2680 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2681 } else if (Name == "h2f") {
2682 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2684 nullptr, "h2f");
2685 } else if (Name.consume_front("bitcast.") &&
2686 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2687 Name == "d2ll")) {
2689 } else if (Name == "rotate.b32") {
2692 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2693 {Arg, Arg, ShiftAmt});
2694 } else if (Name == "rotate.b64") {
2697 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2698 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2699 {Arg, Arg, ZExtShiftAmt});
2700 } else if (Name == "rotate.right.b64") {
2703 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2704 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2705 {Arg, Arg, ZExtShiftAmt});
2706 } else if (Name == "swap.lo.hi.b64") {
2709 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2710 {Arg, Arg, Builder.getInt64(32)});
2711 } else if ((Name.consume_front("ptr.gen.to.") &&
2714 Name.starts_with(".to.gen"))) {
2716 } else if (Name.consume_front("ldg.global")) {
2719
2720 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2721 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2723 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2724 return LD;
2725 } else if (Name == "tanh.approx.f32") {
2726
2729 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2730 FMF);
2731 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2733 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2734 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2735 {}, {Arg});
2736 } else if (Name == "barrier") {
2737 Rep = Builder.CreateIntrinsic(
2738 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2740 } else if (Name == "barrier.sync") {
2741 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2743 } else if (Name == "barrier.sync.cnt") {
2744 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2746 } else {
2749 ->getReturnType()->getScalarType()->isBFloatTy()) {
2753 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2757 Args.push_back(
2759 ? Builder.CreateBitCast(Arg, NewType)
2760 : Arg);
2761 }
2762 Rep = Builder.CreateCall(NewFn, Args);
2763 if (F->getReturnType()->isIntegerTy())
2764 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2765 }
2766 }
2767
2768 return Rep;
2769}
2770
2774 Value *Rep = nullptr;
2775
2776 if (Name.starts_with("sse4a.movnt.")) {
2781
2784
2785
2786
2787 Value *Extract =
2788 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2789
2790 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2791 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2792 } else if (Name.starts_with("avx.movnt.") ||
2793 Name.starts_with("avx512.storent.")) {
2798
2801
2802 StoreInst *SI = Builder.CreateAlignedStore(
2803 Arg1, Arg0,
2805 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2806 } else if (Name == "sse2.storel.dq") {
2809
2811 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2812 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2813 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2814 } else if (Name.starts_with("sse.storeu.") ||
2815 Name.starts_with("sse2.storeu.") ||
2816 Name.starts_with("avx.storeu.")) {
2819 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2820 } else if (Name == "avx512.mask.store.ss") {
2821 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2823 Mask, false);
2824 } else if (Name.starts_with("avx512.mask.store")) {
2825
2826 bool Aligned = Name[17] != 'u';
2829 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2830
2831
2832 bool CmpEq = Name[9] == 'e';
2835 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2836 } else if (Name.starts_with("avx512.broadcastm")) {
2842 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2843 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2844 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2846 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2847 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2848 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2849 } else if (Name.starts_with("avx.sqrt.p") ||
2850 Name.starts_with("sse2.sqrt.p") ||
2851 Name.starts_with("sse.sqrt.p")) {
2852 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2853 {CI->getArgOperand(0)});
2854 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2858 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2859 : Intrinsic::x86_avx512_sqrt_pd_512;
2860
2862 Rep = Builder.CreateIntrinsic(IID, Args);
2863 } else {
2864 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2865 {CI->getArgOperand(0)});
2866 }
2867 Rep =
2869 } else if (Name.starts_with("avx512.ptestm") ||
2870 Name.starts_with("avx512.ptestnm")) {
2874 Rep = Builder.CreateAnd(Op0, Op1);
2880 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2882 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2884 ->getNumElements();
2885 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2886 Rep =
2888 } else if (Name.starts_with("avx512.kunpck")) {
2892 int Indices[64];
2893 for (unsigned i = 0; i != NumElts; ++i)
2894 Indices[i] = i;
2895
2896
2897
2898 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2899 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2900
2901
2902 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2903 Rep = Builder.CreateBitCast(Rep, CI->getType());
2904 } else if (Name == "avx512.kand.w") {
2907 Rep = Builder.CreateAnd(LHS, RHS);
2908 Rep = Builder.CreateBitCast(Rep, CI->getType());
2909 } else if (Name == "avx512.kandn.w") {
2912 LHS = Builder.CreateNot(LHS);
2913 Rep = Builder.CreateAnd(LHS, RHS);
2914 Rep = Builder.CreateBitCast(Rep, CI->getType());
2915 } else if (Name == "avx512.kor.w") {
2918 Rep = Builder.CreateOr(LHS, RHS);
2919 Rep = Builder.CreateBitCast(Rep, CI->getType());
2920 } else if (Name == "avx512.kxor.w") {
2923 Rep = Builder.CreateXor(LHS, RHS);
2924 Rep = Builder.CreateBitCast(Rep, CI->getType());
2925 } else if (Name == "avx512.kxnor.w") {
2928 LHS = Builder.CreateNot(LHS);
2929 Rep = Builder.CreateXor(LHS, RHS);
2930 Rep = Builder.CreateBitCast(Rep, CI->getType());
2931 } else if (Name == "avx512.knot.w") {
2933 Rep = Builder.CreateNot(Rep);
2934 Rep = Builder.CreateBitCast(Rep, CI->getType());
2935 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2938 Rep = Builder.CreateOr(LHS, RHS);
2939 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2941 if (Name[14] == 'c')
2943 else
2945 Rep = Builder.CreateICmpEQ(Rep, C);
2946 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2947 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2948 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2949 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2950 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2953 ConstantInt::get(I32Ty, 0));
2955 ConstantInt::get(I32Ty, 0));
2957 if (Name.contains(".add."))
2958 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2959 else if (Name.contains(".sub."))
2960 EltOp = Builder.CreateFSub(Elt0, Elt1);
2961 else if (Name.contains(".mul."))
2962 EltOp = Builder.CreateFMul(Elt0, Elt1);
2963 else
2964 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2965 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2966 ConstantInt::get(I32Ty, 0));
2967 } else if (Name.starts_with("avx512.mask.pcmp")) {
2968
2969 bool CmpEq = Name[16] == 'e';
2971 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2975 switch (VecWidth) {
2976 default:
2978 case 128:
2979 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2980 break;
2981 case 256:
2982 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2983 break;
2984 case 512:
2985 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2986 break;
2987 }
2988
2989 Rep =
2992 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
2997 if (VecWidth == 128 && EltWidth == 32)
2998 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2999 else if (VecWidth == 256 && EltWidth == 32)
3000 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3001 else if (VecWidth == 512 && EltWidth == 32)
3002 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3003 else if (VecWidth == 128 && EltWidth == 64)
3004 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3005 else if (VecWidth == 256 && EltWidth == 64)
3006 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3007 else if (VecWidth == 512 && EltWidth == 64)
3008 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3009 else
3011
3012 Rep =
3015 } else if (Name.starts_with("avx512.cmp.p")) {
3017 Type *OpTy = Args[0]->getType();
3021 if (VecWidth == 128 && EltWidth == 32)
3022 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3023 else if (VecWidth == 256 && EltWidth == 32)
3024 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3025 else if (VecWidth == 512 && EltWidth == 32)
3026 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3027 else if (VecWidth == 128 && EltWidth == 64)
3028 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3029 else if (VecWidth == 256 && EltWidth == 64)
3030 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3031 else if (VecWidth == 512 && EltWidth == 64)
3032 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3033 else
3035
3037 if (VecWidth == 512)
3039 Args.push_back(Mask);
3040
3041 Rep = Builder.CreateIntrinsic(IID, Args);
3042 } else if (Name.starts_with("avx512.mask.cmp.")) {
3043
3046 } else if (Name.starts_with("avx512.mask.ucmp.")) {
3049 } else if (Name.starts_with("avx512.cvtb2mask.") ||
3050 Name.starts_with("avx512.cvtw2mask.") ||
3051 Name.starts_with("avx512.cvtd2mask.") ||
3052 Name.starts_with("avx512.cvtq2mask.")) {
3057 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3058 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
3059 Name.starts_with("avx512.mask.pabs")) {
3061 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3062 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
3063 Name.starts_with("avx512.mask.pmaxs")) {
3065 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3066 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3067 Name.starts_with("avx512.mask.pmaxu")) {
3069 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3070 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3071 Name.starts_with("avx512.mask.pmins")) {
3073 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3074 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3075 Name.starts_with("avx512.mask.pminu")) {
3077 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3078 Name == "avx512.pmulu.dq.512" ||
3079 Name.starts_with("avx512.mask.pmulu.dq.")) {
3080 Rep = upgradePMULDQ(Builder, *CI, false);
3081 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3082 Name == "avx512.pmul.dq.512" ||
3083 Name.starts_with("avx512.mask.pmul.dq.")) {
3084 Rep = upgradePMULDQ(Builder, *CI, true);
3085 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3086 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3087 Rep =
3091 } else if (Name == "avx512.cvtusi2sd") {
3092 Rep =
3096 } else if (Name == "sse2.cvtss2sd") {
3098 Rep = Builder.CreateFPExt(
3101 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3102 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3103 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3104 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3105 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3106 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3107 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3108 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3109 Name == "avx512.mask.cvtqq2ps.256" ||
3110 Name == "avx512.mask.cvtqq2ps.512" ||
3111 Name == "avx512.mask.cvtuqq2ps.256" ||
3112 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3113 Name == "avx.cvt.ps2.pd.256" ||
3114 Name == "avx512.mask.cvtps2pd.128" ||
3115 Name == "avx512.mask.cvtps2pd.256") {
3119
3120 unsigned NumDstElts = DstTy->getNumElements();
3122 assert(NumDstElts == 2 && "Unexpected vector size");
3123 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef{0, 1});
3124 }
3125
3126 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3127 bool IsUnsigned = Name.contains("cvtu");
3128 if (IsPS2PD)
3129 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3130 else if (CI->arg_size() == 4 &&
3133 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3134 : Intrinsic::x86_avx512_sitofp_round;
3135 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3137 } else {
3138 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3139 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3140 }
3141
3145 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3146 Name.starts_with("vcvtph2ps.")) {
3150 unsigned NumDstElts = DstTy->getNumElements();
3151 if (NumDstElts != SrcTy->getNumElements()) {
3152 assert(NumDstElts == 4 && "Unexpected vector size");
3153 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef{0, 1, 2, 3});
3154 }
3155 Rep = Builder.CreateBitCast(
3157 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3161 } else if (Name.starts_with("avx512.mask.load")) {
3162
3163 bool Aligned = Name[16] != 'u';
3166 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3169 ResultTy->getNumElements());
3170
3171 Rep = Builder.CreateIntrinsic(
3172 Intrinsic::masked_expandload, ResultTy,
3174 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3176 Value *MaskVec =
3179
3180 Rep = Builder.CreateIntrinsic(
3181 Intrinsic::masked_compressstore, ResultTy,
3183 } else if (Name.starts_with("avx512.mask.compress.") ||
3184 Name.starts_with("avx512.mask.expand.")) {
3186
3188 ResultTy->getNumElements());
3189
3190 bool IsCompress = Name[12] == 'c';
3191 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3192 : Intrinsic::x86_avx512_mask_expand;
3193 Rep = Builder.CreateIntrinsic(
3195 } else if (Name.starts_with("xop.vpcom")) {
3196 bool IsSigned;
3197 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3198 Name.ends_with("uq"))
3199 IsSigned = false;
3200 else if (Name.ends_with("b") || Name.ends_with("w") ||
3201 Name.ends_with("d") || Name.ends_with("q"))
3202 IsSigned = true;
3203 else
3205
3206 unsigned Imm;
3209 } else {
3210 Name = Name.substr(9);
3211 if (Name.starts_with("lt"))
3212 Imm = 0;
3213 else if (Name.starts_with("le"))
3214 Imm = 1;
3215 else if (Name.starts_with("gt"))
3216 Imm = 2;
3217 else if (Name.starts_with("ge"))
3218 Imm = 3;
3219 else if (Name.starts_with("eq"))
3220 Imm = 4;
3221 else if (Name.starts_with("ne"))
3222 Imm = 5;
3223 else if (Name.starts_with("false"))
3224 Imm = 6;
3225 else if (Name.starts_with("true"))
3226 Imm = 7;
3227 else
3229 }
3230
3232 } else if (Name.starts_with("xop.vpcmov")) {
3234 Value *NotSel = Builder.CreateNot(Sel);
3237 Rep = Builder.CreateOr(Sel0, Sel1);
3238 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3239 Name.starts_with("avx512.mask.prol")) {
3241 } else if (Name.starts_with("avx512.pror") ||
3242 Name.starts_with("avx512.mask.pror")) {
3244 } else if (Name.starts_with("avx512.vpshld.") ||
3245 Name.starts_with("avx512.mask.vpshld") ||
3246 Name.starts_with("avx512.maskz.vpshld")) {
3247 bool ZeroMask = Name[11] == 'z';
3249 } else if (Name.starts_with("avx512.vpshrd.") ||
3250 Name.starts_with("avx512.mask.vpshrd") ||
3251 Name.starts_with("avx512.maskz.vpshrd")) {
3252 bool ZeroMask = Name[11] == 'z';
3254 } else if (Name == "sse42.crc32.64.8") {
3257 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3259 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3260 } else if (Name.starts_with("avx.vbroadcast.s") ||
3261 Name.starts_with("avx512.vbroadcast.s")) {
3262
3264 Type *EltTy = VecTy->getElementType();
3265 unsigned EltNum = VecTy->getNumElements();
3269 for (unsigned I = 0; I < EltNum; ++I)
3270 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3271 } else if (Name.starts_with("sse41.pmovsx") ||
3272 Name.starts_with("sse41.pmovzx") ||
3273 Name.starts_with("avx2.pmovsx") ||
3274 Name.starts_with("avx2.pmovzx") ||
3275 Name.starts_with("avx512.mask.pmovsx") ||
3276 Name.starts_with("avx512.mask.pmovzx")) {
3278 unsigned NumDstElts = DstTy->getNumElements();
3279
3280
3282 for (unsigned i = 0; i != NumDstElts; ++i)
3283 ShuffleMask[i] = i;
3284
3285 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3286
3287 bool DoSext = Name.contains("pmovsx");
3288 Rep =
3289 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3290
3294 } else if (Name == "avx512.mask.pmov.qd.256" ||
3295 Name == "avx512.mask.pmov.qd.512" ||
3296 Name == "avx512.mask.pmov.wb.256" ||
3297 Name == "avx512.mask.pmov.wb.512") {
3299 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3300 Rep =
3302 } else if (Name.starts_with("avx.vbroadcastf128") ||
3303 Name == "avx2.vbroadcasti128") {
3304
3309 if (NumSrcElts == 2)
3310 Rep = Builder.CreateShuffleVector(Load, ArrayRef{0, 1, 0, 1});
3311 else
3312 Rep = Builder.CreateShuffleVector(Load,
3314 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3315 Name.starts_with("avx512.mask.shuf.f")) {
3320 unsigned ControlBitsMask = NumLanes - 1;
3321 unsigned NumControlBits = NumLanes / 2;
3323
3324 for (unsigned l = 0; l != NumLanes; ++l) {
3325 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3326
3327 if (l >= NumLanes / 2)
3328 LaneMask += NumLanes;
3329 for (unsigned i = 0; i != NumElementsInLane; ++i)
3330 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3331 }
3332 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3334 Rep =
3336 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3337 Name.starts_with("avx512.mask.broadcasti")) {
3339 ->getNumElements();
3340 unsigned NumDstElts =
3342
3344 for (unsigned i = 0; i != NumDstElts; ++i)
3345 ShuffleMask[i] = i % NumSrcElts;
3346
3347 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3349 Rep =
3351 } else if (Name.starts_with("avx2.pbroadcast") ||
3352 Name.starts_with("avx2.vbroadcast") ||
3353 Name.starts_with("avx512.pbroadcast") ||
3354 Name.starts_with("avx512.mask.broadcast.s")) {
3355
3361 Rep = Builder.CreateShuffleVector(Op, M);
3362
3366 } else if (Name.starts_with("sse2.padds.") ||
3367 Name.starts_with("avx2.padds.") ||
3368 Name.starts_with("avx512.padds.") ||
3369 Name.starts_with("avx512.mask.padds.")) {
3371 } else if (Name.starts_with("sse2.psubs.") ||
3372 Name.starts_with("avx2.psubs.") ||
3373 Name.starts_with("avx512.psubs.") ||
3374 Name.starts_with("avx512.mask.psubs.")) {
3376 } else if (Name.starts_with("sse2.paddus.") ||
3377 Name.starts_with("avx2.paddus.") ||
3378 Name.starts_with("avx512.mask.paddus.")) {
3380 } else if (Name.starts_with("sse2.psubus.") ||
3381 Name.starts_with("avx2.psubus.") ||
3382 Name.starts_with("avx512.mask.psubus.")) {
3384 } else if (Name.starts_with("avx512.mask.palignr.")) {
3388 false);
3389 } else if (Name.starts_with("avx512.mask.valign.")) {
3393 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3394
3397 Shift / 8);
3398 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3399
3402 Shift / 8);
3403 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3404 Name == "avx512.psll.dq.512") {
3405
3408 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3409 Name == "avx512.psrl.dq.512") {
3410
3413 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3414 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3415 Name.starts_with("avx2.pblendd.")) {
3420 unsigned NumElts = VecTy->getNumElements();
3421
3423 for (unsigned i = 0; i != NumElts; ++i)
3424 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3425
3426 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3427 } else if (Name.starts_with("avx.vinsertf128.") ||
3428 Name == "avx2.vinserti128" ||
3429 Name.starts_with("avx512.mask.insert")) {
3433 unsigned DstNumElts =
3435 unsigned SrcNumElts =
3437 unsigned Scale = DstNumElts / SrcNumElts;
3438
3439
3440 Imm = Imm % Scale;
3441
3442
3444 for (unsigned i = 0; i != SrcNumElts; ++i)
3445 Idxs[i] = i;
3446 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3447 Idxs[i] = SrcNumElts;
3448 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462 for (unsigned i = 0; i != DstNumElts; ++i)
3463 Idxs[i] = i;
3464
3465 for (unsigned i = 0; i != SrcNumElts; ++i)
3466 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3467 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3468
3469
3473 } else if (Name.starts_with("avx.vextractf128.") ||
3474 Name == "avx2.vextracti128" ||
3475 Name.starts_with("avx512.mask.vextract")) {
3478 unsigned DstNumElts =
3480 unsigned SrcNumElts =
3482 unsigned Scale = SrcNumElts / DstNumElts;
3483
3484
3485 Imm = Imm % Scale;
3486
3487
3489 for (unsigned i = 0; i != DstNumElts; ++i) {
3490 Idxs[i] = i + (Imm * DstNumElts);
3491 }
3492 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3493
3494
3498 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3499 Name.starts_with("avx512.mask.perm.di.")) {
3503 unsigned NumElts = VecTy->getNumElements();
3504
3506 for (unsigned i = 0; i != NumElts; ++i)
3507 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3508
3509 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3510
3514 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3515
3516
3517
3518
3519
3520
3521
3522
3524
3526 unsigned HalfSize = NumElts / 2;
3528
3529
3532
3533
3536
3537
3538 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3539 for (unsigned i = 0; i < HalfSize; ++i)
3540 ShuffleMask[i] = StartIndex + i;
3541
3542
3543 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3544 for (unsigned i = 0; i < HalfSize; ++i)
3545 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3546
3547 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3548
3549 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3550 Name.starts_with("avx512.mask.vpermil.p") ||
3551 Name.starts_with("avx512.mask.pshuf.d.")) {
3555 unsigned NumElts = VecTy->getNumElements();
3556
3557 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3558 unsigned IdxMask = ((1 << IdxSize) - 1);
3559
3561
3562
3563
3564 for (unsigned i = 0; i != NumElts; ++i)
3565 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3566
3567 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3568
3572 } else if (Name == "sse2.pshufl.w" ||
3573 Name.starts_with("avx512.mask.pshufl.w.")) {
3577
3579 for (unsigned l = 0; l != NumElts; l += 8) {
3580 for (unsigned i = 0; i != 4; ++i)
3581 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3582 for (unsigned i = 4; i != 8; ++i)
3583 Idxs[i + l] = i + l;
3584 }
3585
3586 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3587
3591 } else if (Name == "sse2.pshufh.w" ||
3592 Name.starts_with("avx512.mask.pshufh.w.")) {
3596
3598 for (unsigned l = 0; l != NumElts; l += 8) {
3599 for (unsigned i = 0; i != 4; ++i)
3600 Idxs[i + l] = i + l;
3601 for (unsigned i = 0; i != 4; ++i)
3602 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3603 }
3604
3605 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3606
3610 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3615
3617 unsigned HalfLaneElts = NumLaneElts / 2;
3618
3620 for (unsigned i = 0; i != NumElts; ++i) {
3621
3622 Idxs[i] = i - (i % NumLaneElts);
3623
3624 if ((i % NumLaneElts) >= HalfLaneElts)
3625 Idxs[i] += NumElts;
3626
3627
3628 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3629 }
3630
3631 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3632
3633 Rep =
3635 } else if (Name.starts_with("avx512.mask.movddup") ||
3636 Name.starts_with("avx512.mask.movshdup") ||
3637 Name.starts_with("avx512.mask.movsldup")) {
3641
3642 unsigned Offset = 0;
3643 if (Name.starts_with("avx512.mask.movshdup."))
3645
3647 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3648 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3649 Idxs[i + l + 0] = i + l + Offset;
3650 Idxs[i + l + 1] = i + l + Offset;
3651 }
3652
3653 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3654
3655 Rep =
3657 } else if (Name.starts_with("avx512.mask.punpckl") ||
3658 Name.starts_with("avx512.mask.unpckl.")) {
3663
3665 for (int l = 0; l != NumElts; l += NumLaneElts)
3666 for (int i = 0; i != NumLaneElts; ++i)
3667 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3668
3669 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3670
3671 Rep =
3673 } else if (Name.starts_with("avx512.mask.punpckh") ||
3674 Name.starts_with("avx512.mask.unpckh.")) {
3679
3681 for (int l = 0; l != NumElts; l += NumLaneElts)
3682 for (int i = 0; i != NumLaneElts; ++i)
3683 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3684
3685 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3686
3687 Rep =
3689 } else if (Name.starts_with("avx512.mask.and.") ||
3690 Name.starts_with("avx512.mask.pand.")) {
3693 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3694 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3695 Rep = Builder.CreateBitCast(Rep, FTy);
3696 Rep =
3698 } else if (Name.starts_with("avx512.mask.andn.") ||
3699 Name.starts_with("avx512.mask.pandn.")) {
3702 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3703 Rep = Builder.CreateAnd(Rep,
3704 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3705 Rep = Builder.CreateBitCast(Rep, FTy);
3706 Rep =
3708 } else if (Name.starts_with("avx512.mask.or.") ||
3709 Name.starts_with("avx512.mask.por.")) {
3712 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3713 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3714 Rep = Builder.CreateBitCast(Rep, FTy);
3715 Rep =
3717 } else if (Name.starts_with("avx512.mask.xor.") ||
3718 Name.starts_with("avx512.mask.pxor.")) {
3721 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3722 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3723 Rep = Builder.CreateBitCast(Rep, FTy);
3724 Rep =
3726 } else if (Name.starts_with("avx512.mask.padd.")) {
3728 Rep =
3730 } else if (Name.starts_with("avx512.mask.psub.")) {
3732 Rep =
3734 } else if (Name.starts_with("avx512.mask.pmull.")) {
3736 Rep =
3738 } else if (Name.starts_with("avx512.mask.add.p")) {
3739 if (Name.ends_with(".512")) {
3741 if (Name[17] == 's')
3742 IID = Intrinsic::x86_avx512_add_ps_512;
3743 else
3744 IID = Intrinsic::x86_avx512_add_pd_512;
3745
3746 Rep = Builder.CreateIntrinsic(
3747 IID,
3749 } else {
3751 }
3752 Rep =
3754 } else if (Name.starts_with("avx512.mask.div.p")) {
3755 if (Name.ends_with(".512")) {
3757 if (Name[17] == 's')
3758 IID = Intrinsic::x86_avx512_div_ps_512;
3759 else
3760 IID = Intrinsic::x86_avx512_div_pd_512;
3761
3762 Rep = Builder.CreateIntrinsic(
3763 IID,
3765 } else {
3767 }
3768 Rep =
3770 } else if (Name.starts_with("avx512.mask.mul.p")) {
3771 if (Name.ends_with(".512")) {
3773 if (Name[17] == 's')
3774 IID = Intrinsic::x86_avx512_mul_ps_512;
3775 else
3776 IID = Intrinsic::x86_avx512_mul_pd_512;
3777
3778 Rep = Builder.CreateIntrinsic(
3779 IID,
3781 } else {
3783 }
3784 Rep =
3786 } else if (Name.starts_with("avx512.mask.sub.p")) {
3787 if (Name.ends_with(".512")) {
3789 if (Name[17] == 's')
3790 IID = Intrinsic::x86_avx512_sub_ps_512;
3791 else
3792 IID = Intrinsic::x86_avx512_sub_pd_512;
3793
3794 Rep = Builder.CreateIntrinsic(
3795 IID,
3797 } else {
3799 }
3800 Rep =
3802 } else if ((Name.starts_with("avx512.mask.max.p") ||
3803 Name.starts_with("avx512.mask.min.p")) &&
3804 Name.drop_front(18) == ".512") {
3805 bool IsDouble = Name[17] == 'd';
3806 bool IsMin = Name[13] == 'i';
3808 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3809 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3811
3812 Rep = Builder.CreateIntrinsic(
3813 IID,
3815 Rep =
3817 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3818 Rep =
3819 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3820 {CI->getArgOperand(0), Builder.getInt1(false)});
3821 Rep =
3823 } else if (Name.starts_with("avx512.mask.psll")) {
3824 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3825 bool IsVariable = Name[16] == 'v';
3826 char Size = Name[16] == '.' ? Name[17]
3827 : Name[17] == '.' ? Name[18]
3828 : Name[18] == '.' ? Name[19]
3829 : Name[20];
3830
3832 if (IsVariable && Name[17] != '.') {
3833 if (Size == 'd' && Name[17] == '2')
3834 IID = Intrinsic::x86_avx2_psllv_q;
3835 else if (Size == 'd' && Name[17] == '4')
3836 IID = Intrinsic::x86_avx2_psllv_q_256;
3837 else if (Size == 's' && Name[17] == '4')
3838 IID = Intrinsic::x86_avx2_psllv_d;
3839 else if (Size == 's' && Name[17] == '8')
3840 IID = Intrinsic::x86_avx2_psllv_d_256;
3841 else if (Size == 'h' && Name[17] == '8')
3842 IID = Intrinsic::x86_avx512_psllv_w_128;
3843 else if (Size == 'h' && Name[17] == '1')
3844 IID = Intrinsic::x86_avx512_psllv_w_256;
3845 else if (Name[17] == '3' && Name[18] == '2')
3846 IID = Intrinsic::x86_avx512_psllv_w_512;
3847 else
3849 } else if (Name.ends_with(".128")) {
3850 if (Size == 'd')
3851 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3852 : Intrinsic::x86_sse2_psll_d;
3853 else if (Size == 'q')
3854 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3855 : Intrinsic::x86_sse2_psll_q;
3856 else if (Size == 'w')
3857 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3858 : Intrinsic::x86_sse2_psll_w;
3859 else
3861 } else if (Name.ends_with(".256")) {
3862 if (Size == 'd')
3863 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3864 : Intrinsic::x86_avx2_psll_d;
3865 else if (Size == 'q')
3866 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3867 : Intrinsic::x86_avx2_psll_q;
3868 else if (Size == 'w')
3869 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3870 : Intrinsic::x86_avx2_psll_w;
3871 else
3873 } else {
3874 if (Size == 'd')
3875 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3876 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3877 : Intrinsic::x86_avx512_psll_d_512;
3878 else if (Size == 'q')
3879 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3880 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3881 : Intrinsic::x86_avx512_psll_q_512;
3882 else if (Size == 'w')
3883 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3884 : Intrinsic::x86_avx512_psll_w_512;
3885 else
3887 }
3888
3890 } else if (Name.starts_with("avx512.mask.psrl")) {
3891 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3892 bool IsVariable = Name[16] == 'v';
3893 char Size = Name[16] == '.' ? Name[17]
3894 : Name[17] == '.' ? Name[18]
3895 : Name[18] == '.' ? Name[19]
3896 : Name[20];
3897
3899 if (IsVariable && Name[17] != '.') {
3900 if (Size == 'd' && Name[17] == '2')
3901 IID = Intrinsic::x86_avx2_psrlv_q;
3902 else if (Size == 'd' && Name[17] == '4')
3903 IID = Intrinsic::x86_avx2_psrlv_q_256;
3904 else if (Size == 's' && Name[17] == '4')
3905 IID = Intrinsic::x86_avx2_psrlv_d;
3906 else if (Size == 's' && Name[17] == '8')
3907 IID = Intrinsic::x86_avx2_psrlv_d_256;
3908 else if (Size == 'h' && Name[17] == '8')
3909 IID = Intrinsic::x86_avx512_psrlv_w_128;
3910 else if (Size == 'h' && Name[17] == '1')
3911 IID = Intrinsic::x86_avx512_psrlv_w_256;
3912 else if (Name[17] == '3' && Name[18] == '2')
3913 IID = Intrinsic::x86_avx512_psrlv_w_512;
3914 else
3916 } else if (Name.ends_with(".128")) {
3917 if (Size == 'd')
3918 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3919 : Intrinsic::x86_sse2_psrl_d;
3920 else if (Size == 'q')
3921 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3922 : Intrinsic::x86_sse2_psrl_q;
3923 else if (Size == 'w')
3924 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3925 : Intrinsic::x86_sse2_psrl_w;
3926 else
3928 } else if (Name.ends_with(".256")) {
3929 if (Size == 'd')
3930 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3931 : Intrinsic::x86_avx2_psrl_d;
3932 else if (Size == 'q')
3933 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3934 : Intrinsic::x86_avx2_psrl_q;
3935 else if (Size == 'w')
3936 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3937 : Intrinsic::x86_avx2_psrl_w;
3938 else
3940 } else {
3941 if (Size == 'd')
3942 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3943 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3944 : Intrinsic::x86_avx512_psrl_d_512;
3945 else if (Size == 'q')
3946 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3947 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3948 : Intrinsic::x86_avx512_psrl_q_512;
3949 else if (Size == 'w')
3950 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3951 : Intrinsic::x86_avx512_psrl_w_512;
3952 else
3954 }
3955
3957 } else if (Name.starts_with("avx512.mask.psra")) {
3958 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3959 bool IsVariable = Name[16] == 'v';
3960 char Size = Name[16] == '.' ? Name[17]
3961 : Name[17] == '.' ? Name[18]
3962 : Name[18] == '.' ? Name[19]
3963 : Name[20];
3964
3966 if (IsVariable && Name[17] != '.') {
3967 if (Size == 's' && Name[17] == '4')
3968 IID = Intrinsic::x86_avx2_psrav_d;
3969 else if (Size == 's' && Name[17] == '8')
3970 IID = Intrinsic::x86_avx2_psrav_d_256;
3971 else if (Size == 'h' && Name[17] == '8')
3972 IID = Intrinsic::x86_avx512_psrav_w_128;
3973 else if (Size == 'h' && Name[17] == '1')
3974 IID = Intrinsic::x86_avx512_psrav_w_256;
3975 else if (Name[17] == '3' && Name[18] == '2')
3976 IID = Intrinsic::x86_avx512_psrav_w_512;
3977 else
3979 } else if (Name.ends_with(".128")) {
3980 if (Size == 'd')
3981 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3982 : Intrinsic::x86_sse2_psra_d;
3983 else if (Size == 'q')
3984 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3985 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3986 : Intrinsic::x86_avx512_psra_q_128;
3987 else if (Size == 'w')
3988 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3989 : Intrinsic::x86_sse2_psra_w;
3990 else
3992 } else if (Name.ends_with(".256")) {
3993 if (Size == 'd')
3994 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3995 : Intrinsic::x86_avx2_psra_d;
3996 else if (Size == 'q')
3997 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3998 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3999 : Intrinsic::x86_avx512_psra_q_256;
4000 else if (Size == 'w')
4001 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4002 : Intrinsic::x86_avx2_psra_w;
4003 else
4005 } else {
4006 if (Size == 'd')
4007 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4008 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4009 : Intrinsic::x86_avx512_psra_d_512;
4010 else if (Size == 'q')
4011 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4012 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4013 : Intrinsic::x86_avx512_psra_q_512;
4014 else if (Size == 'w')
4015 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4016 : Intrinsic::x86_avx512_psra_w_512;
4017 else
4019 }
4020
4022 } else if (Name.starts_with("avx512.mask.move.s")) {
4024 } else if (Name.starts_with("avx512.cvtmask2")) {
4026 } else if (Name.ends_with(".movntdqa")) {
4029
4030 LoadInst *LI = Builder.CreateAlignedLoad(
4034 Rep = LI;
4035 } else if (Name.starts_with("fma.vfmadd.") ||
4036 Name.starts_with("fma.vfmsub.") ||
4037 Name.starts_with("fma.vfnmadd.") ||
4038 Name.starts_with("fma.vfnmsub.")) {
4039 bool NegMul = Name[6] == 'n';
4040 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4041 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4042
4045
4046 if (IsScalar) {
4047 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4048 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4049 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4050 }
4051
4052 if (NegMul && !IsScalar)
4053 Ops[0] = Builder.CreateFNeg(Ops[0]);
4054 if (NegMul && IsScalar)
4055 Ops[1] = Builder.CreateFNeg(Ops[1]);
4056 if (NegAcc)
4057 Ops[2] = Builder.CreateFNeg(Ops[2]);
4058
4059 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4060
4061 if (IsScalar)
4063 } else if (Name.starts_with("fma4.vfmadd.s")) {
4066
4067 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4068 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4069 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4070
4071 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4072
4075 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4076 Name.starts_with("avx512.maskz.vfmadd.s") ||
4077 Name.starts_with("avx512.mask3.vfmadd.s") ||
4078 Name.starts_with("avx512.mask3.vfmsub.s") ||
4079 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4080 bool IsMask3 = Name[11] == '3';
4081 bool IsMaskZ = Name[11] == 'z';
4082
4083 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4084 bool NegMul = Name[2] == 'n';
4085 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4086
4090
4091 if (NegMul && (IsMask3 || IsMaskZ))
4092 A = Builder.CreateFNeg(A);
4093 if (NegMul && !(IsMask3 || IsMaskZ))
4094 B = Builder.CreateFNeg(B);
4095 if (NegAcc)
4096 C = Builder.CreateFNeg(C);
4097
4098 A = Builder.CreateExtractElement(A, (uint64_t)0);
4099 B = Builder.CreateExtractElement(B, (uint64_t)0);
4100 C = Builder.CreateExtractElement(C, (uint64_t)0);
4101
4105
4107 if (Name.back() == 'd')
4108 IID = Intrinsic::x86_avx512_vfmadd_f64;
4109 else
4110 IID = Intrinsic::x86_avx512_vfmadd_f32;
4111 Rep = Builder.CreateIntrinsic(IID, Ops);
4112 } else {
4113 Rep = Builder.CreateFMA(A, B, C);
4114 }
4115
4117 : IsMask3 ? C
4118 : A;
4119
4120
4121
4122 if (NegAcc && IsMask3)
4123 PassThru =
4125
4127 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4129 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4130 Name.starts_with("avx512.mask.vfnmadd.p") ||
4131 Name.starts_with("avx512.mask.vfnmsub.p") ||
4132 Name.starts_with("avx512.mask3.vfmadd.p") ||
4133 Name.starts_with("avx512.mask3.vfmsub.p") ||
4134 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4135 Name.starts_with("avx512.maskz.vfmadd.p")) {
4136 bool IsMask3 = Name[11] == '3';
4137 bool IsMaskZ = Name[11] == 'z';
4138
4139 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4140 bool NegMul = Name[2] == 'n';
4141 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4142
4146
4147 if (NegMul && (IsMask3 || IsMaskZ))
4148 A = Builder.CreateFNeg(A);
4149 if (NegMul && !(IsMask3 || IsMaskZ))
4150 B = Builder.CreateFNeg(B);
4151 if (NegAcc)
4152 C = Builder.CreateFNeg(C);
4153
4158
4159 if (Name[Name.size() - 5] == 's')
4160 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4161 else
4162 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4163
4164 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4165 } else {
4166 Rep = Builder.CreateFMA(A, B, C);
4167 }
4168
4172
4174 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4178 if (VecWidth == 128 && EltWidth == 32)
4179 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4180 else if (VecWidth == 256 && EltWidth == 32)
4181 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4182 else if (VecWidth == 128 && EltWidth == 64)
4183 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4184 else if (VecWidth == 256 && EltWidth == 64)
4185 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4186 else
4188
4191 Ops[2] = Builder.CreateFNeg(Ops[2]);
4192 Rep = Builder.CreateIntrinsic(IID, Ops);
4193 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4194 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4195 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4196 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4197 bool IsMask3 = Name[11] == '3';
4198 bool IsMaskZ = Name[11] == 'z';
4199
4200 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4201 bool IsSubAdd = Name[3] == 's';
4204
4205 if (Name[Name.size() - 5] == 's')
4206 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4207 else
4208 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4209
4212 if (IsSubAdd)
4213 Ops[2] = Builder.CreateFNeg(Ops[2]);
4214
4215 Rep = Builder.CreateIntrinsic(IID, Ops);
4216 } else {
4218
4221
4223 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4224 Value *Odd = Builder.CreateCall(FMA, Ops);
4225 Ops[2] = Builder.CreateFNeg(Ops[2]);
4226 Value *Even = Builder.CreateCall(FMA, Ops);
4227
4228 if (IsSubAdd)
4230
4232 for (int i = 0; i != NumElts; ++i)
4233 Idxs[i] = i + (i % 2) * NumElts;
4234
4235 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4236 }
4237
4241
4243 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4244 Name.starts_with("avx512.maskz.pternlog.")) {
4245 bool ZeroMask = Name[11] == 'z';
4249 if (VecWidth == 128 && EltWidth == 32)
4250 IID = Intrinsic::x86_avx512_pternlog_d_128;
4251 else if (VecWidth == 256 && EltWidth == 32)
4252 IID = Intrinsic::x86_avx512_pternlog_d_256;
4253 else if (VecWidth == 512 && EltWidth == 32)
4254 IID = Intrinsic::x86_avx512_pternlog_d_512;
4255 else if (VecWidth == 128 && EltWidth == 64)
4256 IID = Intrinsic::x86_avx512_pternlog_q_128;
4257 else if (VecWidth == 256 && EltWidth == 64)
4258 IID = Intrinsic::x86_avx512_pternlog_q_256;
4259 else if (VecWidth == 512 && EltWidth == 64)
4260 IID = Intrinsic::x86_avx512_pternlog_q_512;
4261 else
4263
4266 Rep = Builder.CreateIntrinsic(IID, Args);
4270 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4271 Name.starts_with("avx512.maskz.vpmadd52")) {
4272 bool ZeroMask = Name[11] == 'z';
4273 bool High = Name[20] == 'h' || Name[21] == 'h';
4276 if (VecWidth == 128 && )
4277 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4278 else if (VecWidth == 256 && )
4279 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4280 else if (VecWidth == 512 && )
4281 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4282 else if (VecWidth == 128 && High)
4283 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4284 else if (VecWidth == 256 && High)
4285 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4286 else if (VecWidth == 512 && High)
4287 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4288 else
4290
4293 Rep = Builder.CreateIntrinsic(IID, Args);
4297 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4298 Name.starts_with("avx512.mask.vpermt2var.") ||
4299 Name.starts_with("avx512.maskz.vpermt2var.")) {
4300 bool ZeroMask = Name[11] == 'z';
4301 bool IndexForm = Name[17] == 'i';
4303 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4304 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4305 Name.starts_with("avx512.mask.vpdpbusds.") ||
4306 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4307 bool ZeroMask = Name[11] == 'z';
4308 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4311 if (VecWidth == 128 && !IsSaturating)
4312 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4313 else if (VecWidth == 256 && !IsSaturating)
4314 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4315 else if (VecWidth == 512 && !IsSaturating)
4316 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4317 else if (VecWidth == 128 && IsSaturating)
4318 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4319 else if (VecWidth == 256 && IsSaturating)
4320 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4321 else if (VecWidth == 512 && IsSaturating)
4322 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4323 else
4325
4328
4329
4330
4331
4332 if (Args[1]->getType()->isVectorTy() &&
4334 ->getElementType()
4335 ->isIntegerTy(32) &&
4336 Args[2]->getType()->isVectorTy() &&
4338 ->getElementType()
4339 ->isIntegerTy(32)) {
4340 Type *NewArgType = nullptr;
4341 if (VecWidth == 128)
4342 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4343 else if (VecWidth == 256)
4344 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4345 else if (VecWidth == 512)
4346 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4347 else
4349
4350 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4351 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4352 }
4353
4354 Rep = Builder.CreateIntrinsic(IID, Args);
4358 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4359 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4360 Name.starts_with("avx512.mask.vpdpwssds.") ||
4361 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4362 bool ZeroMask = Name[11] == 'z';
4363 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4366 if (VecWidth == 128 && !IsSaturating)
4367 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4368 else if (VecWidth == 256 && !IsSaturating)
4369 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4370 else if (VecWidth == 512 && !IsSaturating)
4371 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4372 else if (VecWidth == 128 && IsSaturating)
4373 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4374 else if (VecWidth == 256 && IsSaturating)
4375 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4376 else if (VecWidth == 512 && IsSaturating)
4377 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4378 else
4380
4383
4384
4385
4386
4387 if (Args[1]->getType()->isVectorTy() &&
4389 ->getElementType()
4390 ->isIntegerTy(32) &&
4391 Args[2]->getType()->isVectorTy() &&
4393 ->getElementType()
4394 ->isIntegerTy(32)) {
4395 Type *NewArgType = nullptr;
4396 if (VecWidth == 128)
4397 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);
4398 else if (VecWidth == 256)
4399 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);
4400 else if (VecWidth == 512)
4401 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);
4402 else
4404
4405 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4406 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4407 }
4408
4409 Rep = Builder.CreateIntrinsic(IID, Args);
4413 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4414 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4415 Name == "subborrow.u32" || Name == "subborrow.u64") {
4417 if (Name[0] == 'a' && Name.back() == '2')
4418 IID = Intrinsic::x86_addcarry_32;
4419 else if (Name[0] == 'a' && Name.back() == '4')
4420 IID = Intrinsic::x86_addcarry_64;
4421 else if (Name[0] == 's' && Name.back() == '2')
4422 IID = Intrinsic::x86_subborrow_32;
4423 else if (Name[0] == 's' && Name.back() == '4')
4424 IID = Intrinsic::x86_subborrow_64;
4425 else
4427
4428
4431 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4432
4433
4434 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4436
4437 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4438
4440 Rep = nullptr;
4441 } else if (Name.starts_with("avx512.mask.") &&
4443
4444 }
4445
4446 return Rep;
4447}
4448
4451 if (Name.starts_with("neon.bfcvt")) {
4452 if (Name.starts_with("neon.bfcvtn2")) {
4454 std::iota(LoMask.begin(), LoMask.end(), 0);
4456 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4457 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4460 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4461 } else if (Name.starts_with("neon.bfcvtn")) {
4463 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4464 Type *V4BF16 =
4466 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4467 dbgs() << "Trunc: " << *Trunc << "\n";
4468 return Builder.CreateShuffleVector(
4470 } else {
4471 return Builder.CreateFPTrunc(CI->getOperand(0),
4473 }
4474 } else if (Name.starts_with("sve.fcvt")) {
4477 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4478 .Case("sve.fcvtnt.bf16f32",
4479 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4483
4485
4486
4487
4490
4491 if (Args[1]->getType() != BadPredTy)
4493
4494 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4495 BadPredTy, Args[1]);
4496 Args[1] = Builder.CreateIntrinsic(
4497 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4498
4499 return Builder.CreateIntrinsic(NewID, Args, nullptr,
4501 }
4502
4504}
4505
4508 if (Name == "mve.vctp64.old") {
4509
4510
4511 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4513 nullptr, CI->getName());
4514 Value *C1 = Builder.CreateIntrinsic(
4515 Intrinsic::arm_mve_pred_v2i,
4516 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4517 return Builder.CreateIntrinsic(
4518 Intrinsic::arm_mve_pred_i2v,
4520 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4521 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4522 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4523 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4524 Name ==
4525 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4526 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4527 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4528 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4529 Name ==
4530 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4531 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4532 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4533 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4534 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4535 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4536 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4537 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4538 std::vector<Type *> Tys;
4541 switch (ID) {
4542 case Intrinsic::arm_mve_mull_int_predicated:
4543 case Intrinsic::arm_mve_vqdmull_predicated:
4544 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4546 break;
4547 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4548 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4549 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4551 V2I1Ty};
4552 break;
4553 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4556 break;
4557 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4560 break;
4561 case Intrinsic::arm_cde_vcx1q_predicated:
4562 case Intrinsic::arm_cde_vcx1qa_predicated:
4563 case Intrinsic::arm_cde_vcx2q_predicated:
4564 case Intrinsic::arm_cde_vcx2qa_predicated:
4565 case Intrinsic::arm_cde_vcx3q_predicated:
4566 case Intrinsic::arm_cde_vcx3qa_predicated:
4568 break;
4569 default:
4571 }
4572
4573 std::vector<Value *> Ops;
4575 Type *Ty = Op->getType();
4576 if (Ty->getScalarSizeInBits() == 1) {
4577 Value *C1 = Builder.CreateIntrinsic(
4578 Intrinsic::arm_mve_pred_v2i,
4580 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4581 }
4583 }
4584
4585 return Builder.CreateIntrinsic(ID, Tys, Ops, nullptr,
4587 }
4588 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4589}
4590
4591
4592
4593
4594
4595
4613
4615 if (NumOperands < 3)
4616 return nullptr;
4617
4620 if (!PtrTy)
4621 return nullptr;
4622
4625 return nullptr;
4626
4628 bool IsVolatile = false;
4629
4630
4631
4632 if (NumOperands > 3)
4634
4635
4636
4637 if (NumOperands > 5) {
4639 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4640 }
4641
4647
4649
4650
4653 if (VT->getElementType()->isIntegerTy(16)) {
4656 Val = Builder.CreateBitCast(Val, AsBF16);
4657 }
4658 }
4659
4660
4661
4662 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4664 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4665
4666 unsigned AddrSpace = PtrTy->getAddressSpace();
4669 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4671 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4672 }
4673
4676 MDNode *RangeNotPrivate =
4679 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4680 }
4681
4682 if (IsVolatile)
4684
4685 return Builder.CreateBitCast(RMW, RetTy);
4686}
4687
4688
4689
4690
4695 Metadata *MD = MAV->getMetadata();
4697 }
4698 }
4699 return nullptr;
4700}
4701
4702
4706 return MAV->getMetadata();
4707 return nullptr;
4708}
4709
4711
4712
4713 return I->getDebugLoc().getAsMDNode();
4714}
4715
4716
4717
4718
4721 if (Name == "label") {
4724 } else if (Name == "assign") {
4729
4731 } else if (Name == "declare") {
4736 } else if (Name == "addr") {
4737
4739
4740
4743 }
4746 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4748 } else if (Name == "value") {
4749
4750 unsigned VarOp = 1;
4751 unsigned ExprOp = 2;
4754
4756 return;
4757 VarOp = 2;
4758 ExprOp = 3;
4759 }
4764 }
4765 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4767}
4768
4769
4770
4772
4773
4774
4776 if ()
4777 return;
4778
4782
4783 if (!NewFn) {
4784
4786
4787 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4788 Name = Name.substr(5);
4789
4790 bool IsX86 = Name.consume_front("x86.");
4791 bool IsNVVM = Name.consume_front("nvvm.");
4792 bool IsAArch64 = Name.consume_front("aarch64.");
4793 bool IsARM = Name.consume_front("arm.");
4794 bool IsAMDGCN = Name.consume_front("amdgcn.");
4795 bool IsDbg = Name.consume_front("dbg.");
4796 Value *Rep = nullptr;
4797
4798 if (!IsX86 && Name == "stackprotectorcheck") {
4799 Rep = nullptr;
4800 } else if (IsNVVM) {
4802 } else if (IsX86) {
4804 } else if (IsAArch64) {
4806 } else if (IsARM) {
4808 } else if (IsAMDGCN) {
4810 } else if (IsDbg) {
4812 } else {
4814 }
4815
4816 if (Rep)
4819 return;
4820 }
4821
4822 const auto &DefaultCase = [&]() -> void {
4823 if (F == NewFn)
4824 return;
4825
4827
4830 "Unknown function for CallBase upgrade and isn't just a name change");
4832 return;
4833 }
4834
4835
4838 "Return type must have changed");
4839 assert(OldST->getNumElements() ==
4841 "Must have same number of elements");
4842
4844 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4847 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4848 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4849 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4850 }
4853 return;
4854 }
4855
4856
4857
4860 return;
4861 };
4862 CallInst *NewCall = nullptr;
4864 default: {
4865 DefaultCase();
4866 return;
4867 }
4868 case Intrinsic::arm_neon_vst1:
4869 case Intrinsic::arm_neon_vst2:
4870 case Intrinsic::arm_neon_vst3:
4871 case Intrinsic::arm_neon_vst4:
4872 case Intrinsic::arm_neon_vst2lane:
4873 case Intrinsic::arm_neon_vst3lane:
4874 case Intrinsic::arm_neon_vst4lane: {
4876 NewCall = Builder.CreateCall(NewFn, Args);
4877 break;
4878 }
4879 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4880 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4881 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4882 LLVMContext &Ctx = F->getParent()->getContext();
4886 NewCall = Builder.CreateCall(NewFn, Args);
4887 break;
4888 }
4889 case Intrinsic::aarch64_sve_ld3_sret:
4890 case Intrinsic::aarch64_sve_ld4_sret:
4891 case Intrinsic::aarch64_sve_ld2_sret: {
4893 Name = Name.substr(5);
4900 unsigned MinElts = RetTy->getMinNumElements() / N;
4902 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4904 for (unsigned I = 0; I < N; I++) {
4905 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4906 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
4907 }
4909 break;
4910 }
4911
4912 case Intrinsic::coro_end: {
4915 NewCall = Builder.CreateCall(NewFn, Args);
4916 break;
4917 }
4918
4919 case Intrinsic::vector_extract: {
4921 Name = Name.substr(5);
4922 if (!Name.starts_with("aarch64.sve.tuple.get")) {
4923 DefaultCase();
4924 return;
4925 }
4927 unsigned MinElts = RetTy->getMinNumElements();
4930 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4931 break;
4932 }
4933
4934 case Intrinsic::vector_insert: {
4936 Name = Name.substr(5);
4937 if (!Name.starts_with("aarch64.sve.tuple")) {
4938 DefaultCase();
4939 return;
4940 }
4941 if (Name.starts_with("aarch64.sve.tuple.set")) {
4946 NewCall = Builder.CreateCall(
4948 break;
4949 }
4950 if (Name.starts_with("aarch64.sve.tuple.create")) {
4952 .StartsWith("aarch64.sve.tuple.create2", 2)
4953 .StartsWith("aarch64.sve.tuple.create3", 3)
4954 .StartsWith("aarch64.sve.tuple.create4", 4)
4956 assert(N > 1 && "Create is expected to be between 2-4");
4959 unsigned MinElts = RetTy->getMinNumElements() / N;
4960 for (unsigned I = 0; I < N; I++) {
4962 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
4963 }
4965 }
4966 break;
4967 }
4968
4969 case Intrinsic::arm_neon_bfdot:
4970 case Intrinsic::arm_neon_bfmmla:
4971 case Intrinsic::arm_neon_bfmlalb:
4972 case Intrinsic::arm_neon_bfmlalt:
4973 case Intrinsic::aarch64_neon_bfdot:
4974 case Intrinsic::aarch64_neon_bfmmla:
4975 case Intrinsic::aarch64_neon_bfmlalb:
4976 case Intrinsic::aarch64_neon_bfmlalt: {
4979 "Mismatch between function args and call args");
4980 size_t OperandWidth =
4982 assert((OperandWidth == 64 || OperandWidth == 128) &&
4983 "Unexpected operand width");
4985 auto Iter = CI->args().begin();
4986 Args.push_back(*Iter++);
4987 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4988 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4989 NewCall = Builder.CreateCall(NewFn, Args);
4990 break;
4991 }
4992
4993 case Intrinsic::bitreverse:
4994 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4995 break;
4996
4997 case Intrinsic::ctlz:
4998 case Intrinsic::cttz:
5000 "Mismatch between function args and call args");
5001 NewCall =
5002 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
5003 break;
5004
5005 case Intrinsic::objectsize: {
5006 Value *NullIsUnknownSize =
5010 NewCall = Builder.CreateCall(
5012 break;
5013 }
5014
5015 case Intrinsic::ctpop:
5016 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5017 break;
5018
5019 case Intrinsic::convert_from_fp16:
5020 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5021 break;
5022
5023 case Intrinsic::dbg_value: {
5025 Name = Name.substr(5);
5026
5027 if (Name.starts_with("dbg.addr")) {
5031 NewCall =
5034 break;
5035 }
5036
5037
5039
5041 if (Offset->isZeroValue()) {
5042 NewCall = Builder.CreateCall(
5043 NewFn,
5045 break;
5046 }
5048 return;
5049 }
5050
5051 case Intrinsic::ptr_annotation:
5052
5054 DefaultCase();
5055 return;
5056 }
5057
5058
5059 NewCall = Builder.CreateCall(
5060 NewFn,
5066 return;
5067
5068 case Intrinsic::var_annotation:
5069
5071 DefaultCase();
5072 return;
5073 }
5074
5075 NewCall = Builder.CreateCall(
5076 NewFn,
5082 return;
5083
5084 case Intrinsic::riscv_aes32dsi:
5085 case Intrinsic::riscv_aes32dsmi:
5086 case Intrinsic::riscv_aes32esi:
5087 case Intrinsic::riscv_aes32esmi:
5088 case Intrinsic::riscv_sm4ks:
5089 case Intrinsic::riscv_sm4ed: {
5090
5091
5094 return;
5095
5099 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5100 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5101 }
5102
5105
5106 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5107 Value *Res = NewCall;
5109 Res = Builder.CreateIntCast(NewCall, CI->getType(), true);
5113 return;
5114 }
5115 case Intrinsic::nvvm_mapa_shared_cluster: {
5116
5117 NewCall =
5119 Value *Res = NewCall;
5120 Res = Builder.CreateAddrSpaceCast(
5125 return;
5126 }
5127 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5128 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5129
5131 Args[0] = Builder.CreateAddrSpaceCast(
5133
5134 NewCall = Builder.CreateCall(NewFn, Args);
5138 return;
5139 }
5140 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5141 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5142 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5143 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5144 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5145 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5146 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5147 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5149
5150
5151
5154 Args[0] = Builder.CreateAddrSpaceCast(
5156
5157
5158
5159
5160 size_t NumArgs = CI->arg_size();
5163 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5164
5165 NewCall = Builder.CreateCall(NewFn, Args);
5169 return;
5170 }
5171 case Intrinsic::riscv_sha256sig0:
5172 case Intrinsic::riscv_sha256sig1:
5173 case Intrinsic::riscv_sha256sum0:
5174 case Intrinsic::riscv_sha256sum1:
5175 case Intrinsic::riscv_sm3p0:
5176 case Intrinsic::riscv_sm3p1: {
5177
5178
5180 return;
5181
5183 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5184
5185 NewCall = Builder.CreateCall(NewFn, Arg);
5187 Builder.CreateIntCast(NewCall, CI->getType(), true);
5191 return;
5192 }
5193
5194 case Intrinsic::x86_xop_vfrcz_ss:
5195 case Intrinsic::x86_xop_vfrcz_sd:
5196 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5197 break;
5198
5199 case Intrinsic::x86_xop_vpermil2pd:
5200 case Intrinsic::x86_xop_vpermil2ps:
5201 case Intrinsic::x86_xop_vpermil2pd_256:
5202 case Intrinsic::x86_xop_vpermil2ps_256: {
5206 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5207 NewCall = Builder.CreateCall(NewFn, Args);
5208 break;
5209 }
5210
5211 case Intrinsic::x86_sse41_ptestc:
5212 case Intrinsic::x86_sse41_ptestz:
5213 case Intrinsic::x86_sse41_ptestnzc: {
5214
5215
5216
5217
5220 return;
5221
5222
5224
5226
5227 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5228 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5229
5230 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5231 break;
5232 }
5233
5234 case Intrinsic::x86_rdtscp: {
5235
5236
5238 return;
5239
5240 NewCall = Builder.CreateCall(NewFn);
5241
5242 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5244
5245 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5246
5250 return;
5251 }
5252
5253 case Intrinsic::x86_sse41_insertps:
5254 case Intrinsic::x86_sse41_dppd:
5255 case Intrinsic::x86_sse41_dpps:
5256 case Intrinsic::x86_sse41_mpsadbw:
5257 case Intrinsic::x86_avx_dp_ps_256:
5258 case Intrinsic::x86_avx2_mpsadbw: {
5259
5260
5262
5263
5264 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5265 NewCall = Builder.CreateCall(NewFn, Args);
5266 break;
5267 }
5268
5269 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5270 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5271 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5272 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5273 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5274 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5276 unsigned NumElts =
5278 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5279
5280 NewCall = Builder.CreateCall(NewFn, Args);
5282
5286 return;
5287 }
5288
5289 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5290 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5291 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5292 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5293 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5294 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5298 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5299 Args[1] = Builder.CreateBitCast(
5301
5302 NewCall = Builder.CreateCall(NewFn, Args);
5303 Value *Res = Builder.CreateBitCast(
5305
5309 return;
5310 }
5311 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5312 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5313 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5315 unsigned NumElts =
5317 Args[1] = Builder.CreateBitCast(
5319 Args[2] = Builder.CreateBitCast(
5321
5322 NewCall = Builder.CreateCall(NewFn, Args);
5323 break;
5324 }
5325
5326 case Intrinsic::thread_pointer: {
5327 NewCall = Builder.CreateCall(NewFn, {});
5328 break;
5329 }
5330
5331 case Intrinsic::memcpy:
5332 case Intrinsic::memmove:
5333 case Intrinsic::memset: {
5334
5335
5336
5337
5338
5339
5340
5342 DefaultCase();
5343 return;
5344 }
5345
5346
5349 NewCall = Builder.CreateCall(NewFn, Args);
5351 AttributeList NewAttrs = AttributeList::get(
5352 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5353 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5354 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5357
5359 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5360
5362 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5363 break;
5364 }
5365
5366 case Intrinsic::masked_load:
5367 case Intrinsic::masked_gather:
5368 case Intrinsic::masked_store:
5369 case Intrinsic::masked_scatter: {
5371 DefaultCase();
5372 return;
5373 }
5374
5375 auto GetMaybeAlign = [](Value *Op) {
5377 uint64_t Val = CI->getZExtValue();
5378 if (Val == 0)
5382 }
5384 };
5385 auto GetAlign = [&](Value *Op) {
5390 };
5391
5394 case Intrinsic::masked_load:
5395 NewCall = Builder.CreateMaskedLoad(
5398 break;
5399 case Intrinsic::masked_gather:
5400 NewCall = Builder.CreateMaskedGather(
5402 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5405 break;
5406 case Intrinsic::masked_store:
5407 NewCall = Builder.CreateMaskedStore(
5410 break;
5411 case Intrinsic::masked_scatter:
5412 NewCall = Builder.CreateMaskedScatter(
5414 DL.getValueOrABITypeAlignment(
5418 break;
5419 default:
5421 }
5422
5425 break;
5426 }
5427
5428 case Intrinsic::lifetime_start:
5429 case Intrinsic::lifetime_end: {
5431 DefaultCase();
5432 return;
5433 }
5434
5436
5439
5440 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5441 NewCall = Builder.CreateLifetimeStart(Ptr);
5442 else
5443 NewCall = Builder.CreateLifetimeEnd(Ptr);
5444 break;
5445 }
5446
5447
5449 return;
5450 }
5451
5452 case Intrinsic::x86_avx512_vpdpbusd_128:
5453 case Intrinsic::x86_avx512_vpdpbusd_256:
5454 case Intrinsic::x86_avx512_vpdpbusd_512:
5455 case Intrinsic::x86_avx512_vpdpbusds_128:
5456 case Intrinsic::x86_avx512_vpdpbusds_256:
5457 case Intrinsic::x86_avx512_vpdpbusds_512:
5458 case Intrinsic::x86_avx2_vpdpbssd_128:
5459 case Intrinsic::x86_avx2_vpdpbssd_256:
5460 case Intrinsic::x86_avx10_vpdpbssd_512:
5461 case Intrinsic::x86_avx2_vpdpbssds_128:
5462 case Intrinsic::x86_avx2_vpdpbssds_256:
5463 case Intrinsic::x86_avx10_vpdpbssds_512:
5464 case Intrinsic::x86_avx2_vpdpbsud_128:
5465 case Intrinsic::x86_avx2_vpdpbsud_256:
5466 case Intrinsic::x86_avx10_vpdpbsud_512:
5467 case Intrinsic::x86_avx2_vpdpbsuds_128:
5468 case Intrinsic::x86_avx2_vpdpbsuds_256:
5469 case Intrinsic::x86_avx10_vpdpbsuds_512:
5470 case Intrinsic::x86_avx2_vpdpbuud_128:
5471 case Intrinsic::x86_avx2_vpdpbuud_256:
5472 case Intrinsic::x86_avx10_vpdpbuud_512:
5473 case Intrinsic::x86_avx2_vpdpbuuds_128:
5474 case Intrinsic::x86_avx2_vpdpbuuds_256:
5475 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5479 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5480 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5481 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5482
5483 NewCall = Builder.CreateCall(NewFn, Args);
5484 break;
5485 }
5486 case Intrinsic::x86_avx512_vpdpwssd_128:
5487 case Intrinsic::x86_avx512_vpdpwssd_256:
5488 case Intrinsic::x86_avx512_vpdpwssd_512:
5489 case Intrinsic::x86_avx512_vpdpwssds_128:
5490 case Intrinsic::x86_avx512_vpdpwssds_256:
5491 case Intrinsic::x86_avx512_vpdpwssds_512:
5492 case Intrinsic::x86_avx2_vpdpwsud_128:
5493 case Intrinsic::x86_avx2_vpdpwsud_256:
5494 case Intrinsic::x86_avx10_vpdpwsud_512:
5495 case Intrinsic::x86_avx2_vpdpwsuds_128:
5496 case Intrinsic::x86_avx2_vpdpwsuds_256:
5497 case Intrinsic::x86_avx10_vpdpwsuds_512:
5498 case Intrinsic::x86_avx2_vpdpwusd_128:
5499 case Intrinsic::x86_avx2_vpdpwusd_256:
5500 case Intrinsic::x86_avx10_vpdpwusd_512:
5501 case Intrinsic::x86_avx2_vpdpwusds_128:
5502 case Intrinsic::x86_avx2_vpdpwusds_256:
5503 case Intrinsic::x86_avx10_vpdpwusds_512:
5504 case Intrinsic::x86_avx2_vpdpwuud_128:
5505 case Intrinsic::x86_avx2_vpdpwuud_256:
5506 case Intrinsic::x86_avx10_vpdpwuud_512:
5507 case Intrinsic::x86_avx2_vpdpwuuds_128:
5508 case Intrinsic::x86_avx2_vpdpwuuds_256:
5509 case Intrinsic::x86_avx10_vpdpwuuds_512:
5513 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);
5514 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5515 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5516
5517 NewCall = Builder.CreateCall(NewFn, Args);
5518 break;
5519 }
5520 assert(NewCall && "Should have either set this variable or returned through "
5521 "the default case");
5525}
5526
5528 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5529
5530
5531
5534
5535
5539
5540
5541 if (F != NewFn)
5542 F->eraseFromParent();
5543 }
5544}
5545
5548 if (NumOperands == 0)
5549 return &MD;
5550
5551
5553 return &MD;
5554
5556 if (NumOperands == 3) {
5559
5560 Metadata *Elts2[] = {ScalarType, ScalarType,
5565 }
5566
5570}
5571
5574 if (Opc != Instruction::BitCast)
5575 return nullptr;
5576
5577 Temp = nullptr;
5578 Type *SrcTy = V->getType();
5582
5583
5584
5587
5588 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5589 }
5590
5591 return nullptr;
5592}
5593
5595 if (Opc != Instruction::BitCast)
5596 return nullptr;
5597
5598 Type *SrcTy = C->getType();
5602
5603
5604
5606
5608 DestTy);
5609 }
5610
5611 return nullptr;
5612}
5613
5614
5615
5618 return false;
5619
5621
5622
5623
5625 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5626 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5627 if (Flag->getNumOperands() < 3)
5628 return false;
5629 if (MDString *K = dyn_cast_or_null(Flag->getOperand(1)))
5630 return K->getString() == "Debug Info Version";
5631 return false;
5632 });
5633 if (OpIt != ModFlags->op_end()) {
5634 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5636 Version = CI->getZExtValue();
5637 }
5638 }
5639
5641 bool BrokenDebugInfo = false;
5644 if (!BrokenDebugInfo)
5645
5646 return false;
5647 else {
5648
5650 M.getContext().diagnose(Diag);
5651 }
5652 }
5655
5657 M.getContext().diagnose(DiagVersion);
5658 }
5660}
5661
5665
5667 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5668 unsigned Length = 0;
5669
5670 if (F->hasFnAttribute(Attr)) {
5671
5672
5673 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5675 auto [Part, Rest] = S.split(',');
5677 S = Rest;
5678 }
5679 }
5680
5681 const unsigned Dim = DimC - 'x';
5682 assert(Dim < 3 && "Unexpected dim char");
5683
5685
5686
5687 const std::string VStr = llvm::utostr(VInt);
5688 Vect3[Dim] = VStr;
5690
5692 F->addFnAttr(Attr, NewAttr);
5693}
5694
5696 return S == "x" || S == "y" || S == "z";
5697}
5698
5701 if (K == "kernel") {
5704 return true;
5705 }
5706 if (K == "align") {
5707
5708
5709
5710
5711 const uint64_t AlignIdxValuePair =
5713 const unsigned Idx = (AlignIdxValuePair >> 16);
5714 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5717 return true;
5718 }
5719 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5722 return true;
5723 }
5724 if (K == "minctasm") {
5727 return true;
5728 }
5729 if (K == "maxnreg") {
5732 return true;
5733 }
5734 if (K.consume_front("maxntid") && isXYZ(K)) {
5736 return true;
5737 }
5738 if (K.consume_front("reqntid") && isXYZ(K)) {
5740 return true;
5741 }
5742 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5744 return true;
5745 }
5746 if (K == "grid_constant") {
5748 for (const auto &Op : cast(V)->operands()) {
5749
5750
5753 }
5754 return true;
5755 }
5756
5757 return false;
5758}
5759
5761 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5762 if (!NamedMD)
5763 return;
5764
5768 if (!SeenNodes.insert(MD).second)
5769 continue;
5770
5772 if (!GV)
5773 continue;
5774
5775 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5776
5778
5779
5780
5781
5782 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5784 const MDOperand &V = MD->getOperand(j + 1);
5786 if (!Upgraded)
5787 NewOperands.append({K, V});
5788 }
5789
5790 if (NewOperands.size() > 1)
5792 }
5793
5795 for (MDNode *N : NewNodes)
5797}
5798
5799
5800
5803 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5804 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5805 if (ModRetainReleaseMarker) {
5807 if (Op) {
5809 if (ID) {
5811 ID->getString().split(ValueComp, "#");
5812 if (ValueComp.size() == 2) {
5813 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5815 }
5817 M.eraseNamedMetadata(ModRetainReleaseMarker);
5819 }
5820 }
5821 }
5823}
5824
5826
5827
5828 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5831
5832 if (!Fn)
5833 return;
5834
5837
5841 continue;
5842
5846
5847
5848
5852 continue;
5853
5854 bool InvalidCast = false;
5855
5856 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5858
5859
5860
5862
5863
5866 InvalidCast = true;
5867 break;
5868 }
5869 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5870 }
5871 Args.push_back(Arg);
5872 }
5873
5874 if (InvalidCast)
5875 continue;
5876
5877
5878 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5881
5882
5883 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5884
5888 }
5889
5892 };
5893
5894
5895
5896 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5897
5898
5899
5900
5902 return;
5903
5904 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5905 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5906 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5907 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5908 {"objc_autoreleaseReturnValue",
5909 llvm::Intrinsic::objc_autoreleaseReturnValue},
5910 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5911 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5912 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5913 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5914 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5915 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5916 {"objc_release", llvm::Intrinsic::objc_release},
5917 {"objc_retain", llvm::Intrinsic::objc_retain},
5918 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5919 {"objc_retainAutoreleaseReturnValue",
5920 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5921 {"objc_retainAutoreleasedReturnValue",
5922 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5923 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5924 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5925 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5926 {"objc_unsafeClaimAutoreleasedReturnValue",
5927 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5928 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5929 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5930 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5931 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5932 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5933 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5934 {"objc_arc_annotation_topdown_bbstart",
5935 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5936 {"objc_arc_annotation_topdown_bbend",
5937 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5938 {"objc_arc_annotation_bottomup_bbstart",
5939 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5940 {"objc_arc_annotation_bottomup_bbend",
5941 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5942
5943 for (auto &I : RuntimeFuncs)
5944 UpgradeToIntrinsic(I.first, I.second);
5945}
5946
5948 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5949 if (!ModFlags)
5950 return false;
5951
5952 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5953 bool HasSwiftVersionFlag = false;
5954 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5958
5959 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5961 if (Op->getNumOperands() != 3)
5962 continue;
5964 if ()
5965 continue;
5970 Op->getOperand(2)};
5973 };
5974
5975 if (ID->getString() == "Objective-C Image Info Version")
5976 HasObjCFlag = true;
5977 if (ID->getString() == "Objective-C Class Properties")
5978 HasClassProperties = true;
5979
5980 if (ID->getString() == "PIC Level") {
5981 if (auto *Behavior =
5983 uint64_t V = Behavior->getLimitedValue();
5986 }
5987 }
5988
5989 if (ID->getString() == "PIE Level")
5990 if (auto *Behavior =
5992 if (Behavior->getLimitedValue() == Module::Error)
5994
5995
5996
5997 if (ID->getString() == "branch-target-enforcement" ||
5998 ID->getString().starts_with("sign-return-address")) {
5999 if (auto *Behavior =
6001 if (Behavior->getLimitedValue() == Module::Error) {
6005 Op->getOperand(1), Op->getOperand(2)};
6008 }
6009 }
6010 }
6011
6012
6013
6014
6015 if (ID->getString() == "Objective-C Image Info Section") {
6018 Value->getString().split(ValueComp, " ");
6019 if (ValueComp.size() != 1) {
6020 std::string NewValue;
6021 for (auto &S : ValueComp)
6022 NewValue += S.str();
6023 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
6027 }
6028 }
6029 }
6030
6031
6032
6033 if (ID->getString() == "Objective-C Garbage Collection") {
6035 if (Md) {
6036 assert(Md->getValue() && "Expected non-empty metadata");
6037 auto Type = Md->getValue()->getType();
6038 if (Type == Int8Ty)
6039 continue;
6040 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6041 if ((Val & 0xff) != Val) {
6042 HasSwiftVersionFlag = true;
6043 SwiftABIVersion = (Val & 0xff00) >> 8;
6044 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6045 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6046 }
6049 Op->getOperand(1),
6053 }
6054 }
6055
6056 if (ID->getString() == "amdgpu_code_object_version") {
6058 Op->getOperand(0),
6059 MDString::get(M.getContext(), "amdhsa_code_object_version"),
6060 Op->getOperand(2)};
6063 }
6064 }
6065
6066
6067
6068
6069
6070
6071 if (HasObjCFlag && !HasClassProperties) {
6075 }
6076
6077 if (HasSwiftVersionFlag) {
6078 M.addModuleFlag(Module::Error, "Swift ABI Version",
6079 SwiftABIVersion);
6080 M.addModuleFlag(Module::Error, "Swift Major Version",
6081 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6082 M.addModuleFlag(Module::Error, "Swift Minor Version",
6083 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6085 }
6086
6088}
6089
6091 auto TrimSpaces = [](StringRef Section) -> std::string {
6093 Section.split(Components, ',');
6094
6097
6098 for (auto Component : Components)
6099 OS << ',' << Component.trim();
6100
6101 return std::string(OS.str().substr(1));
6102 };
6103
6104 for (auto &GV : M.globals()) {
6105 if (!GV.hasSection())
6106 continue;
6107
6108 StringRef Section = GV.getSection();
6109
6110 if (!Section.starts_with("__DATA, __objc_catlist"))
6111 continue;
6112
6113
6114
6115 GV.setSection(TrimSpaces(Section));
6116 }
6117}
6118
6119namespace {
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131struct StrictFPUpgradeVisitor : public InstVisitor {
6132 StrictFPUpgradeVisitor() = default;
6133
6135 if (.isStrictFP())
6136 return;
6138 return;
6139
6140
6141 Call.removeFnAttr(Attribute::StrictFP);
6142 Call.addFnAttr(Attribute::NoBuiltin);
6143 }
6144};
6145
6146
6147struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6148 : public InstVisitor {
6149 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6150
6151 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6153 return;
6154
6156 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6159 }
6160};
6161}
6162
6164
6165
6166 if (.isDeclaration() &&
.hasFnAttribute(Attribute::StrictFP)) {
6167 StrictFPUpgradeVisitor SFPV;
6168 SFPV.visit(F);
6169 }
6170
6171
6172 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6173 F.getReturnType(), F.getAttributes().getRetAttrs()));
6174 for (auto &Arg : F.args())
6175 Arg.removeAttrs(
6176 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6177
6178
6179
6180 if (Attribute A = F.getFnAttribute("implicit-section-name");
6181 A.isValid() && A.isStringAttribute()) {
6182 F.setSection(A.getValueAsString());
6183 F.removeFnAttr("implicit-section-name");
6184 }
6185
6186 if (.empty()) {
6187
6188
6189
6190 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6191 A.isValid()) {
6192
6193 if (A.getValueAsBool()) {
6194 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6195 Visitor.visit(F);
6196 }
6197
6198
6199
6200 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
6201 }
6202 }
6203}
6204
6205
6208 if (.hasFnAttribute(FnAttrName))
6209 F.addFnAttr(FnAttrName, Value);
6210}
6211
6212
6213
6214
6216 if (.hasFnAttribute(FnAttrName)) {
6217 if (Set)
6218 F.addFnAttr(FnAttrName);
6219 } else {
6220 auto A = F.getFnAttribute(FnAttrName);
6221 if ("false" == A.getValueAsString())
6222 F.removeFnAttr(FnAttrName);
6223 else if ("true" == A.getValueAsString()) {
6224 F.removeFnAttr(FnAttrName);
6225 F.addFnAttr(FnAttrName);
6226 }
6227 }
6228}
6229
6231 Triple T(M.getTargetTriple());
6232 if (.isThumb() &&
.isARM() &&
.isAArch64())
6233 return;
6234
6241
6242 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6243 if (ModFlags) {
6244 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6246 if (Op->getNumOperands() != 3)
6247 continue;
6248
6251 if ( || !CI)
6252 continue;
6253
6255 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6256 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6257 : IDStr == "guarded-control-stack" ? &GCSValue
6258 : IDStr == "sign-return-address" ? &SRAValue
6259 : IDStr == "sign-return-address-all" ? &SRAALLValue
6260 : IDStr == "sign-return-address-with-bkey"
6261 ? &SRABKeyValue
6262 : nullptr;
6263 if (!ValPtr)
6264 continue;
6265
6266 *ValPtr = CI->getZExtValue();
6267 if (*ValPtr == 2)
6268 return;
6269 }
6270 }
6271
6272 bool BTE = BTEValue == 1;
6273 bool BPPLR = BPPLRValue == 1;
6274 bool GCS = GCSValue == 1;
6275 bool SRA = SRAValue == 1;
6276
6277 StringRef SignTypeValue = "non-leaf";
6278 if (SRA && SRAALLValue == 1)
6279 SignTypeValue = "all";
6280
6281 StringRef SignKeyValue = "a_key";
6282 if (SRA && SRABKeyValue == 1)
6283 SignKeyValue = "b_key";
6284
6285 for (Function &F : M.getFunctionList()) {
6286 if (F.isDeclaration())
6287 continue;
6288
6289 if (SRA) {
6292 } else {
6293 if (auto A = F.getFnAttribute("sign-return-address");
6294 A.isValid() && "none" == A.getValueAsString()) {
6295 F.removeFnAttr("sign-return-address");
6296 F.removeFnAttr("sign-return-address-key");
6297 }
6298 }
6302 }
6303
6304 if (BTE)
6305 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6306 if (BPPLR)
6307 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6308 if (GCS)
6310 if (SRA) {
6312 if (SRAALLValue == 1)
6313 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6314 if (SRABKeyValue == 1)
6315 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6316 }
6317}
6318
6321 if ()
6322 return false;
6323 if (T->getNumOperands() < 1)
6324 return false;
6326 if (!S)
6327 return false;
6328 return S->getString().starts_with("llvm.vectorizer.");
6329}
6330
6332 StringRef OldPrefix = "llvm.vectorizer.";
6334
6335 if (OldTag == "llvm.vectorizer.unroll")
6336 return MDString::get(C, "llvm.loop.interleave.count");
6337
6340 .str());
6341}
6342
6345 if ()
6346 return MD;
6347 if (T->getNumOperands() < 1)
6348 return MD;
6350 if (!OldTag)
6351 return MD;
6352 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6353 return MD;
6354
6355
6357 Ops.reserve(T->getNumOperands());
6358 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6359 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6360 Ops.push_back(T->getOperand(I));
6361
6363}
6364
6367 if ()
6368 return &N;
6369
6371 return &N;
6372
6374 Ops.reserve(T->getNumOperands());
6375 for (Metadata *MD : T->operands())
6377
6379}
6380
6383
6384
6385 if ((T.isSPIR() || (T.isSPIRV() && .isSPIRVLogical())) &&
6386 .contains("-G") &&
.starts_with("G")) {
6387 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6388 }
6389
6390 if (T.isLoongArch64() || T.isRISCV64()) {
6391
6392 auto I = DL.find("-n64-");
6394 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6395 return DL.str();
6396 }
6397
6398
6399 std::string Res = DL.str();
6400 if (T.isAMDGPU()) {
6401
6402 if (.contains("-G") &&
.starts_with("G"))
6403 Res.append(Res.empty() ? "G1" : "-G1");
6404
6405
6406 if (T.isAMDGCN()) {
6407
6408
6409
6410
6411 if (.contains("-ni") &&
.starts_with("ni"))
6412 Res.append("-ni:7:8:9");
6413
6414 if (DL.ends_with("ni:7"))
6415 Res.append(":8:9");
6416 if (DL.ends_with("ni:7:8"))
6417 Res.append(":9");
6418
6419
6420
6421 if (.contains("-p7") &&
.starts_with("p7"))
6422 Res.append("-p7:160:256:256:32");
6423 if (.contains("-p8") &&
.starts_with("p8"))
6424 Res.append("-p8:128:128:128:48");
6425 constexpr StringRef OldP8("-p8:128:128-");
6426 if (DL.contains(OldP8))
6427 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6428 if (.contains("-p9") &&
.starts_with("p9"))
6429 Res.append("-p9:192:256:256:32");
6430 }
6431
6432
6433 if (.contains("m:e"))
6434 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6435
6436 return Res;
6437 }
6438
6439 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6440
6441
6442 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6443 if (.contains(AddrSpaces)) {
6445 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6446 if (R.match(Res, &Groups))
6447 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6448 }
6449 };
6450
6451
6452 if (T.isAArch64()) {
6453
6454 if (.empty() &&
.contains("-Fn32"))
6455 Res.append("-Fn32");
6456 AddPtr32Ptr64AddrSpaces();
6457 return Res;
6458 }
6459
6460 if (T.isSPARC() || (T.isMIPS64() && .contains("m:m")) || T.isPPC64() ||
6461 T.isWasm()) {
6462
6463
6464 std::string I64 = "-i64:64";
6465 std::string I128 = "-i128:128";
6467 size_t Pos = Res.find(I64);
6468 if (Pos != size_t(-1))
6469 Res.insert(Pos + I64.size(), I128);
6470 }
6471 }
6472
6473 if (T.isPPC() && T.isOSAIX() && .contains("f64:32:64") &&
.empty()) {
6474 size_t Pos = Res.find("-S128");
6476 Pos = Res.size();
6477 Res.insert(Pos, "-f64:32:64");
6478 }
6479
6480 if (.isX86())
6481 return Res;
6482
6483 AddPtr32Ptr64AddrSpaces();
6484
6485
6486
6487
6488
6489
6490
6491 if (.isOSIAMCU()) {
6492 std::string I128 = "-i128:128";
6495 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6496 if (R.match(Res, &Groups))
6498 }
6499 }
6500
6501
6502
6503
6504 if (T.isWindowsMSVCEnvironment() && .isArch64Bit()) {
6506 auto I = Ref.find("-f80:32-");
6508 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6509 }
6510
6511 return Res;
6512}
6513
6516 Attribute A = B.getAttribute("no-frame-pointer-elim");
6517 if (A.isValid()) {
6518
6519 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6520 B.removeAttribute("no-frame-pointer-elim");
6521 }
6522 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6523
6524 if (FramePointer != "all")
6525 FramePointer = "non-leaf";
6526 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6527 }
6528 if (!FramePointer.empty())
6529 B.addAttribute("frame-pointer", FramePointer);
6530
6531 A = B.getAttribute("null-pointer-is-valid");
6532 if (A.isValid()) {
6533
6534 bool NullPointerIsValid = A.getValueAsString() == "true";
6535 B.removeAttribute("null-pointer-is-valid");
6536 if (NullPointerIsValid)
6537 B.addAttribute(Attribute::NullPointerIsValid);
6538 }
6539}
6540
6542
6543
6544
6545
6547 return OBD.getTag() == "clang.arc.attachedcall" &&
6548 OBD.inputs().empty();
6549 });
6550}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
Definition AutoUpgrade.cpp:2025
static Metadata * upgradeLoopArgument(Metadata *MD)
Definition AutoUpgrade.cpp:6343
static bool isXYZ(StringRef S)
Definition AutoUpgrade.cpp:5695
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
Definition AutoUpgrade.cpp:1248
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
Definition AutoUpgrade.cpp:1865
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
Definition AutoUpgrade.cpp:1143
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
Definition AutoUpgrade.cpp:5801
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
Definition AutoUpgrade.cpp:2126
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
Definition AutoUpgrade.cpp:2352
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
Definition AutoUpgrade.cpp:2100
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition AutoUpgrade.cpp:112
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
Definition AutoUpgrade.cpp:6206
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
Definition AutoUpgrade.cpp:1168
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
Definition AutoUpgrade.cpp:5699
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
Definition AutoUpgrade.cpp:4691
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
Definition AutoUpgrade.cpp:6331
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
Definition AutoUpgrade.cpp:5662
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition AutoUpgrade.cpp:99
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
Definition AutoUpgrade.cpp:1980
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
Definition AutoUpgrade.cpp:2237
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
Definition AutoUpgrade.cpp:1951
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
Definition AutoUpgrade.cpp:4449
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
Definition AutoUpgrade.cpp:2338
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
Definition AutoUpgrade.cpp:533
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
Definition AutoUpgrade.cpp:2277
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
Definition AutoUpgrade.cpp:1242
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
Definition AutoUpgrade.cpp:166
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
Definition AutoUpgrade.cpp:1899
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
Definition AutoUpgrade.cpp:1096
static bool isOldLoopArgument(Metadata *MD)
Definition AutoUpgrade.cpp:6319
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
Definition AutoUpgrade.cpp:4506
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition AutoUpgrade.cpp:83
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
Definition AutoUpgrade.cpp:4596
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
Definition AutoUpgrade.cpp:2216
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
Definition AutoUpgrade.cpp:4703
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition AutoUpgrade.cpp:146
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
Definition AutoUpgrade.cpp:771
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
Definition AutoUpgrade.cpp:1931
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
Definition AutoUpgrade.cpp:1963
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
Definition AutoUpgrade.cpp:2165
static void rename(GlobalValue *GV)
Definition AutoUpgrade.cpp:63
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition AutoUpgrade.cpp:67
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition AutoUpgrade.cpp:156
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
Definition AutoUpgrade.cpp:2299
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
Definition AutoUpgrade.cpp:2085
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
Definition AutoUpgrade.cpp:2609
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
Definition AutoUpgrade.cpp:2331
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
Definition AutoUpgrade.cpp:2361
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
Definition AutoUpgrade.cpp:4719
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
Definition AutoUpgrade.cpp:6215
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
Definition AutoUpgrade.cpp:2247
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
Definition AutoUpgrade.cpp:2198
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition AutoUpgrade.cpp:130
static MDNode * getDebugLocSafe(const Instruction *I)
Definition AutoUpgrade.cpp:4710
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
Definition AutoUpgrade.cpp:2771
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
NVPTX address space definition.
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Convenience struct for specifying and reasoning about fast-math flags.
void setApproxFunc(bool B=true)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Type * getReturnType() const
Returns the type of the ret val.
Argument * getArg(unsigned i) const
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
A Module instance is used to store all the information related to an LLVM module.
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
@ Min
Takes the min of the two values, which are required to be integers.
@ Max
Takes the max of the two values, which are required to be integers.
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
self_iterator getIterator()
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
@ ADDRESS_SPACE_SHARED_CLUSTER
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
Definition AutoUpgrade.cpp:4771
LLVM_ABI void UpgradeSectionAttributes(Module &M)
Definition AutoUpgrade.cpp:6090
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
Definition AutoUpgrade.cpp:2600
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
Definition AutoUpgrade.cpp:1813
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
Definition AutoUpgrade.cpp:6365
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present - Functionally identical to dyn_cast, except that a null (or none in the case ...
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
Definition AutoUpgrade.cpp:6514
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
Definition AutoUpgrade.cpp:5527
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
Definition AutoUpgrade.cpp:5760
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
Definition AutoUpgrade.cpp:5947
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
Definition AutoUpgrade.cpp:6230
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
Definition AutoUpgrade.cpp:6541
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
Definition AutoUpgrade.cpp:5594
auto dyn_cast_or_null(const Y &Val)
FunctionAddr VTableAddr uintptr_t uintptr_t Version
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
Definition AutoUpgrade.cpp:6381
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
Definition AutoUpgrade.cpp:1832
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
OperandBundleDefT< Value * > OperandBundleDef
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
Definition AutoUpgrade.cpp:5572
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
Definition AutoUpgrade.cpp:5616
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
Definition AutoUpgrade.cpp:6163
@ Default
The result values are uniform if and only if all operands are uniform.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
Definition AutoUpgrade.cpp:5546
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
Definition AutoUpgrade.cpp:5825
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.