LLVM: lib/Target/X86/X86FixupInstTuning.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
30
31using namespace llvm;
32
33#define DEBUG_TYPE "x86-fixup-inst-tuning"
34
35STATISTIC(NumInstChanges, "Number of instructions changes");
36
37namespace {
39public:
40 static char ID;
41
43
44 StringRef getPassName() const override { return "X86 Fixup Inst Tuning"; }
45
46 bool runOnMachineFunction(MachineFunction &MF) override;
47 bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB,
49
50
51 MachineFunctionProperties getRequiredProperties() const override {
52 return MachineFunctionProperties().setNoVRegs();
53 }
54
55private:
56 const X86InstrInfo *TII = nullptr;
57 const X86Subtarget *ST = nullptr;
58 const MCSchedModel *SM = nullptr;
59};
60}
61
62char X86FixupInstTuningPass::ID = 0;
63
65
67 return new X86FixupInstTuningPass();
68}
69
70template
72 if (NewVal.has_value() && CurVal.has_value() && *NewVal != *CurVal)
73 return *NewVal < *CurVal;
74
75 return std::nullopt;
76}
77
78bool X86FixupInstTuningPass::processInstruction(
82 unsigned Opc = MI.getOpcode();
83 unsigned NumOperands = MI.getDesc().getNumOperands();
85
86 auto GetInstTput = [&](unsigned Opcode) -> std::optional {
87
90 };
91
92 auto GetInstLat = [&](unsigned Opcode) -> std::optional {
93
96 };
97
98 auto GetInstSize = [&](unsigned Opcode) -> std::optional {
101
102 return std::nullopt;
103 };
104
105 auto NewOpcPreferable = [&](unsigned NewOpc,
106 bool ReplaceInTie = true) -> bool {
107 std::optional Res;
109
110 Res = CmpOptionals(GetInstTput(NewOpc), GetInstTput(Opc));
111 if (Res.has_value())
112 return *Res;
113
115 if (Res.has_value())
116 return *Res;
117 }
118
119 Res = CmpOptionals(GetInstSize(Opc), GetInstSize(NewOpc));
120 if (Res.has_value())
121 return *Res;
122
123
124
125 return ReplaceInTie;
126 };
127
128
129
130
131
132 auto ProcessVPERMILPDri = [&](unsigned NewOpc) -> bool {
133 if (!NewOpcPreferable(NewOpc))
134 return false;
136 {
137 unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm();
138 MI.removeOperand(NumOperands - 1);
139 MI.addOperand(MI.getOperand(NumOperands - 2));
140 MI.setDesc(TII->get(NewOpc));
142 }
144 return true;
145 };
146
147
148
149
150
151 auto ProcessVPERMILPSri = [&](unsigned NewOpc) -> bool {
152 if (!NewOpcPreferable(NewOpc))
153 return false;
155 {
156 unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm();
157 MI.removeOperand(NumOperands - 1);
158 MI.addOperand(MI.getOperand(NumOperands - 2));
159 MI.setDesc(TII->get(NewOpc));
161 }
163 return true;
164 };
165
166
167
168
169 auto ProcessVPERMILPSmi = [&](unsigned NewOpc) -> bool {
170
171
173 !NewOpcPreferable(NewOpc, false))
174 return false;
176 {
177 MI.setDesc(TII->get(NewOpc));
178 }
180 return true;
181 };
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197 auto ProcessUNPCK = [&](unsigned NewOpc, unsigned MaskImm) -> bool {
198 if (!NewOpcPreferable(NewOpc, false))
199 return false;
201 {
202 MI.setDesc(TII->get(NewOpc));
204 }
206 return true;
207 };
208
209 auto ProcessUNPCKToIntDomain = [&](unsigned NewOpc) -> bool {
210
211
212
214 !NewOpcPreferable(NewOpc, false))
215 return false;
217 {
218 MI.setDesc(TII->get(NewOpc));
219 }
221 return true;
222 };
223
224 auto ProcessUNPCKLPDrr = [&](unsigned NewOpcIntDomain,
225 unsigned NewOpc) -> bool {
226 if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
227 return true;
228 return ProcessUNPCK(NewOpc, 0x00);
229 };
230 auto ProcessUNPCKHPDrr = [&](unsigned NewOpcIntDomain,
231 unsigned NewOpc) -> bool {
232 if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
233 return true;
234 return ProcessUNPCK(NewOpc, 0xff);
235 };
236
237 auto ProcessUNPCKPDrm = [&](unsigned NewOpcIntDomain) -> bool {
238 return ProcessUNPCKToIntDomain(NewOpcIntDomain);
239 };
240
241 auto ProcessUNPCKPS = [&](unsigned NewOpc) -> bool {
242 return ProcessUNPCKToIntDomain(NewOpc);
243 };
244
245 auto ProcessBLENDWToBLENDD = [&](unsigned MovOpc, unsigned NumElts) -> bool {
246 if (!ST->hasAVX2() || !NewOpcPreferable(MovOpc))
247 return false;
248
249 APInt MaskW =
250 APInt(8, MI.getOperand(NumOperands - 1).getImm(), false);
253 return false;
256 {
257 MI.setDesc(TII->get(MovOpc));
258 MI.removeOperand(NumOperands - 1);
260 }
262 return true;
263 };
264
265 auto ProcessBLENDToMOV = [&](unsigned MovOpc, unsigned Mask,
266 unsigned MovImm) -> bool {
267 if ((MI.getOperand(NumOperands - 1).getImm() & Mask) != MovImm)
268 return false;
269 if (!OptSize && !NewOpcPreferable(MovOpc))
270 return false;
272 {
273 MI.setDesc(TII->get(MovOpc));
274 MI.removeOperand(NumOperands - 1);
275 }
277 return true;
278 };
279
280
281 auto ProcessShiftLeftToAdd = [&](unsigned AddOpc) -> bool {
282 if (MI.getOperand(NumOperands - 1).getImm() != 1)
283 return false;
284 if (!NewOpcPreferable(AddOpc, true))
285 return false;
287 {
288 MI.setDesc(TII->get(AddOpc));
289 MI.removeOperand(NumOperands - 1);
290 MI.addOperand(MI.getOperand(NumOperands - 2));
291 }
293 return false;
294 };
295
296 switch (Opc) {
297 case X86::BLENDPDrri:
298 return ProcessBLENDToMOV(X86::MOVSDrr, 0x3, 0x1);
299 case X86::VBLENDPDrri:
300 return ProcessBLENDToMOV(X86::VMOVSDrr, 0x3, 0x1);
301
302 case X86::BLENDPSrri:
303 return ProcessBLENDToMOV(X86::MOVSSrr, 0xF, 0x1) ||
304 ProcessBLENDToMOV(X86::MOVSDrr, 0xF, 0x3);
305 case X86::VBLENDPSrri:
306 return ProcessBLENDToMOV(X86::VMOVSSrr, 0xF, 0x1) ||
307 ProcessBLENDToMOV(X86::VMOVSDrr, 0xF, 0x3);
308
309 case X86::VPBLENDWrri:
310
311
312
313 return ProcessBLENDWToBLENDD(X86::VPBLENDDrri, 4);
314
315 case X86::VPERMILPDri:
316 return ProcessVPERMILPDri(X86::VSHUFPDrri);
317 case X86::VPERMILPDYri:
318 return ProcessVPERMILPDri(X86::VSHUFPDYrri);
319 case X86::VPERMILPDZ128ri:
320 return ProcessVPERMILPDri(X86::VSHUFPDZ128rri);
321 case X86::VPERMILPDZ256ri:
322 return ProcessVPERMILPDri(X86::VSHUFPDZ256rri);
323 case X86::VPERMILPDZri:
324 return ProcessVPERMILPDri(X86::VSHUFPDZrri);
325 case X86::VPERMILPDZ128rikz:
326 return ProcessVPERMILPDri(X86::VSHUFPDZ128rrikz);
327 case X86::VPERMILPDZ256rikz:
328 return ProcessVPERMILPDri(X86::VSHUFPDZ256rrikz);
329 case X86::VPERMILPDZrikz:
330 return ProcessVPERMILPDri(X86::VSHUFPDZrrikz);
331 case X86::VPERMILPDZ128rik:
332 return ProcessVPERMILPDri(X86::VSHUFPDZ128rrik);
333 case X86::VPERMILPDZ256rik:
334 return ProcessVPERMILPDri(X86::VSHUFPDZ256rrik);
335 case X86::VPERMILPDZrik:
336 return ProcessVPERMILPDri(X86::VSHUFPDZrrik);
337
338 case X86::VPERMILPSri:
339 return ProcessVPERMILPSri(X86::VSHUFPSrri);
340 case X86::VPERMILPSYri:
341 return ProcessVPERMILPSri(X86::VSHUFPSYrri);
342 case X86::VPERMILPSZ128ri:
343 return ProcessVPERMILPSri(X86::VSHUFPSZ128rri);
344 case X86::VPERMILPSZ256ri:
345 return ProcessVPERMILPSri(X86::VSHUFPSZ256rri);
346 case X86::VPERMILPSZri:
347 return ProcessVPERMILPSri(X86::VSHUFPSZrri);
348 case X86::VPERMILPSZ128rikz:
349 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrikz);
350 case X86::VPERMILPSZ256rikz:
351 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrikz);
352 case X86::VPERMILPSZrikz:
353 return ProcessVPERMILPSri(X86::VSHUFPSZrrikz);
354 case X86::VPERMILPSZ128rik:
355 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrik);
356 case X86::VPERMILPSZ256rik:
357 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrik);
358 case X86::VPERMILPSZrik:
359 return ProcessVPERMILPSri(X86::VSHUFPSZrrik);
360 case X86::VPERMILPSmi:
361 return ProcessVPERMILPSmi(X86::VPSHUFDmi);
362 case X86::VPERMILPSYmi:
363
364
365 return ST->hasAVX2() ? ProcessVPERMILPSmi(X86::VPSHUFDYmi) : false;
366 case X86::VPERMILPSZ128mi:
367 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mi);
368 case X86::VPERMILPSZ256mi:
369 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mi);
370 case X86::VPERMILPSZmi:
371 return ProcessVPERMILPSmi(X86::VPSHUFDZmi);
372 case X86::VPERMILPSZ128mikz:
373 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mikz);
374 case X86::VPERMILPSZ256mikz:
375 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mikz);
376 case X86::VPERMILPSZmikz:
377 return ProcessVPERMILPSmi(X86::VPSHUFDZmikz);
378 case X86::VPERMILPSZ128mik:
379 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mik);
380 case X86::VPERMILPSZ256mik:
381 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mik);
382 case X86::VPERMILPSZmik:
383 return ProcessVPERMILPSmi(X86::VPSHUFDZmik);
384
385 case X86::MOVLHPSrr:
386 case X86::UNPCKLPDrr:
387 return ProcessUNPCKLPDrr(X86::PUNPCKLQDQrr, X86::SHUFPDrri);
388 case X86::VMOVLHPSrr:
389 case X86::VUNPCKLPDrr:
390 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQrr, X86::VSHUFPDrri);
391 case X86::VUNPCKLPDYrr:
392 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQYrr, X86::VSHUFPDYrri);
393
394 case X86::VMOVLHPSZrr:
395 case X86::VUNPCKLPDZ128rr:
396 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rr, X86::VSHUFPDZ128rri);
397 case X86::VUNPCKLPDZ256rr:
398 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rr, X86::VSHUFPDZ256rri);
399 case X86::VUNPCKLPDZrr:
400 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrr, X86::VSHUFPDZrri);
401 case X86::VUNPCKLPDZ128rrk:
402 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrk, X86::VSHUFPDZ128rrik);
403 case X86::VUNPCKLPDZ256rrk:
404 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrk, X86::VSHUFPDZ256rrik);
405 case X86::VUNPCKLPDZrrk:
406 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrk, X86::VSHUFPDZrrik);
407 case X86::VUNPCKLPDZ128rrkz:
408 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
409 case X86::VUNPCKLPDZ256rrkz:
410 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
411 case X86::VUNPCKLPDZrrkz:
412 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrkz, X86::VSHUFPDZrrikz);
413 case X86::UNPCKHPDrr:
414 return ProcessUNPCKHPDrr(X86::PUNPCKHQDQrr, X86::SHUFPDrri);
415 case X86::VUNPCKHPDrr:
416 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQrr, X86::VSHUFPDrri);
417 case X86::VUNPCKHPDYrr:
418 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQYrr, X86::VSHUFPDYrri);
419 case X86::VUNPCKHPDZ128rr:
420 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rr, X86::VSHUFPDZ128rri);
421 case X86::VUNPCKHPDZ256rr:
422 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rr, X86::VSHUFPDZ256rri);
423 case X86::VUNPCKHPDZrr:
424 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrr, X86::VSHUFPDZrri);
425 case X86::VUNPCKHPDZ128rrk:
426 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrk, X86::VSHUFPDZ128rrik);
427 case X86::VUNPCKHPDZ256rrk:
428 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrk, X86::VSHUFPDZ256rrik);
429 case X86::VUNPCKHPDZrrk:
430 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrk, X86::VSHUFPDZrrik);
431 case X86::VUNPCKHPDZ128rrkz:
432 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
433 case X86::VUNPCKHPDZ256rrkz:
434 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
435 case X86::VUNPCKHPDZrrkz:
436 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrkz, X86::VSHUFPDZrrikz);
437 case X86::UNPCKLPDrm:
438 return ProcessUNPCKPDrm(X86::PUNPCKLQDQrm);
439 case X86::VUNPCKLPDrm:
440 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQrm);
441 case X86::VUNPCKLPDYrm:
442 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQYrm);
443 case X86::VUNPCKLPDZ128rm:
444 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rm);
445 case X86::VUNPCKLPDZ256rm:
446 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rm);
447 case X86::VUNPCKLPDZrm:
448 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrm);
449 case X86::VUNPCKLPDZ128rmk:
450 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmk);
451 case X86::VUNPCKLPDZ256rmk:
452 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmk);
453 case X86::VUNPCKLPDZrmk:
454 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmk);
455 case X86::VUNPCKLPDZ128rmkz:
456 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmkz);
457 case X86::VUNPCKLPDZ256rmkz:
458 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmkz);
459 case X86::VUNPCKLPDZrmkz:
460 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmkz);
461 case X86::UNPCKHPDrm:
462 return ProcessUNPCKPDrm(X86::PUNPCKHQDQrm);
463 case X86::VUNPCKHPDrm:
464 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQrm);
465 case X86::VUNPCKHPDYrm:
466 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQYrm);
467 case X86::VUNPCKHPDZ128rm:
468 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rm);
469 case X86::VUNPCKHPDZ256rm:
470 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rm);
471 case X86::VUNPCKHPDZrm:
472 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrm);
473 case X86::VUNPCKHPDZ128rmk:
474 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmk);
475 case X86::VUNPCKHPDZ256rmk:
476 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmk);
477 case X86::VUNPCKHPDZrmk:
478 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmk);
479 case X86::VUNPCKHPDZ128rmkz:
480 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmkz);
481 case X86::VUNPCKHPDZ256rmkz:
482 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmkz);
483 case X86::VUNPCKHPDZrmkz:
484 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmkz);
485
486 case X86::UNPCKLPSrr:
487 return ProcessUNPCKPS(X86::PUNPCKLDQrr);
488 case X86::VUNPCKLPSrr:
489 return ProcessUNPCKPS(X86::VPUNPCKLDQrr);
490 case X86::VUNPCKLPSYrr:
491 return ProcessUNPCKPS(X86::VPUNPCKLDQYrr);
492 case X86::VUNPCKLPSZ128rr:
493 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rr);
494 case X86::VUNPCKLPSZ256rr:
495 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rr);
496 case X86::VUNPCKLPSZrr:
497 return ProcessUNPCKPS(X86::VPUNPCKLDQZrr);
498 case X86::VUNPCKLPSZ128rrk:
499 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrk);
500 case X86::VUNPCKLPSZ256rrk:
501 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrk);
502 case X86::VUNPCKLPSZrrk:
503 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrk);
504 case X86::VUNPCKLPSZ128rrkz:
505 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrkz);
506 case X86::VUNPCKLPSZ256rrkz:
507 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrkz);
508 case X86::VUNPCKLPSZrrkz:
509 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrkz);
510 case X86::UNPCKHPSrr:
511 return ProcessUNPCKPS(X86::PUNPCKHDQrr);
512 case X86::VUNPCKHPSrr:
513 return ProcessUNPCKPS(X86::VPUNPCKHDQrr);
514 case X86::VUNPCKHPSYrr:
515 return ProcessUNPCKPS(X86::VPUNPCKHDQYrr);
516 case X86::VUNPCKHPSZ128rr:
517 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rr);
518 case X86::VUNPCKHPSZ256rr:
519 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rr);
520 case X86::VUNPCKHPSZrr:
521 return ProcessUNPCKPS(X86::VPUNPCKHDQZrr);
522 case X86::VUNPCKHPSZ128rrk:
523 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrk);
524 case X86::VUNPCKHPSZ256rrk:
525 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrk);
526 case X86::VUNPCKHPSZrrk:
527 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrk);
528 case X86::VUNPCKHPSZ128rrkz:
529 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrkz);
530 case X86::VUNPCKHPSZ256rrkz:
531 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrkz);
532 case X86::VUNPCKHPSZrrkz:
533 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrkz);
534 case X86::UNPCKLPSrm:
535 return ProcessUNPCKPS(X86::PUNPCKLDQrm);
536 case X86::VUNPCKLPSrm:
537 return ProcessUNPCKPS(X86::VPUNPCKLDQrm);
538 case X86::VUNPCKLPSYrm:
539 return ProcessUNPCKPS(X86::VPUNPCKLDQYrm);
540 case X86::VUNPCKLPSZ128rm:
541 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rm);
542 case X86::VUNPCKLPSZ256rm:
543 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rm);
544 case X86::VUNPCKLPSZrm:
545 return ProcessUNPCKPS(X86::VPUNPCKLDQZrm);
546 case X86::VUNPCKLPSZ128rmk:
547 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmk);
548 case X86::VUNPCKLPSZ256rmk:
549 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmk);
550 case X86::VUNPCKLPSZrmk:
551 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmk);
552 case X86::VUNPCKLPSZ128rmkz:
553 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmkz);
554 case X86::VUNPCKLPSZ256rmkz:
555 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmkz);
556 case X86::VUNPCKLPSZrmkz:
557 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmkz);
558 case X86::UNPCKHPSrm:
559 return ProcessUNPCKPS(X86::PUNPCKHDQrm);
560 case X86::VUNPCKHPSrm:
561 return ProcessUNPCKPS(X86::VPUNPCKHDQrm);
562 case X86::VUNPCKHPSYrm:
563 return ProcessUNPCKPS(X86::VPUNPCKHDQYrm);
564 case X86::VUNPCKHPSZ128rm:
565 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rm);
566 case X86::VUNPCKHPSZ256rm:
567 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rm);
568 case X86::VUNPCKHPSZrm:
569 return ProcessUNPCKPS(X86::VPUNPCKHDQZrm);
570 case X86::VUNPCKHPSZ128rmk:
571 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmk);
572 case X86::VUNPCKHPSZ256rmk:
573 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmk);
574 case X86::VUNPCKHPSZrmk:
575 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmk);
576 case X86::VUNPCKHPSZ128rmkz:
577 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmkz);
578 case X86::VUNPCKHPSZ256rmkz:
579 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmkz);
580 case X86::VUNPCKHPSZrmkz:
581 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmkz);
582
583 case X86::PSLLWri:
584 return ProcessShiftLeftToAdd(X86::PADDWrr);
585 case X86::VPSLLWri:
586 return ProcessShiftLeftToAdd(X86::VPADDWrr);
587 case X86::VPSLLWYri:
588 return ProcessShiftLeftToAdd(X86::VPADDWYrr);
589 case X86::VPSLLWZ128ri:
590 return ProcessShiftLeftToAdd(X86::VPADDWZ128rr);
591 case X86::VPSLLWZ256ri:
592 return ProcessShiftLeftToAdd(X86::VPADDWZ256rr);
593 case X86::VPSLLWZri:
594 return ProcessShiftLeftToAdd(X86::VPADDWZrr);
595 case X86::PSLLDri:
596 return ProcessShiftLeftToAdd(X86::PADDDrr);
597 case X86::VPSLLDri:
598 return ProcessShiftLeftToAdd(X86::VPADDDrr);
599 case X86::VPSLLDYri:
600 return ProcessShiftLeftToAdd(X86::VPADDDYrr);
601 case X86::VPSLLDZ128ri:
602 return ProcessShiftLeftToAdd(X86::VPADDDZ128rr);
603 case X86::VPSLLDZ256ri:
604 return ProcessShiftLeftToAdd(X86::VPADDDZ256rr);
605 case X86::VPSLLDZri:
606 return ProcessShiftLeftToAdd(X86::VPADDDZrr);
607 case X86::PSLLQri:
608 return ProcessShiftLeftToAdd(X86::PADDQrr);
609 case X86::VPSLLQri:
610 return ProcessShiftLeftToAdd(X86::VPADDQrr);
611 case X86::VPSLLQYri:
612 return ProcessShiftLeftToAdd(X86::VPADDQYrr);
613 case X86::VPSLLQZ128ri:
614 return ProcessShiftLeftToAdd(X86::VPADDQZ128rr);
615 case X86::VPSLLQZ256ri:
616 return ProcessShiftLeftToAdd(X86::VPADDQZ256rr);
617 case X86::VPSLLQZri:
618 return ProcessShiftLeftToAdd(X86::VPADDQZrr);
619
620 default:
621 return false;
622 }
623}
624
625bool X86FixupInstTuningPass::runOnMachineFunction(MachineFunction &MF) {
630 SM = &ST->getSchedModel();
631
632 for (MachineBasicBlock &MBB : MF) {
634 if (processInstruction(MF, MBB, I)) {
635 ++NumInstChanges;
637 }
638 }
639 }
642}
Function Alias Analysis false
const HexagonInstrInfo * TII
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static std::optional< bool > CmpOptionals(T NewVal, T CurVal)
Definition X86FixupInstTuning.cpp:71
uint64_t getZExtValue() const
Get zero extended value.
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
FunctionPass class - This class is used to implement most global optimizations.
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
unsigned getSize(const MachineInstr &MI) const
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
static MachineOperand CreateImm(int64_t Val)
bool hasNoDomainDelayShuffle() const
const X86InstrInfo * getInstrInfo() const override
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionPass * createX86FixupInstTuning()
Return a pass that replaces equivalent slower instructions with faster ones.
Definition X86FixupInstTuning.cpp:66
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
static LLVM_ABI int computeInstrLatency(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Returns the latency value for the scheduling class.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)