[AArch64][DAG] Allow fptos/ui.sat to scalarized. (#126799) · llvm/llvm-project@cc60c22 (original) (raw)
`@@ -5548,3 +5548,151 @@ define <16 x i16> @test_signed_v16f64_v16i16(<16 x double> %f) {
`
5548
5548
`%x = call <16 x i16> @llvm.fptosi.sat.v16f64.v16i16(<16 x double> %f)
`
5549
5549
`ret <16 x i16> %x
`
5550
5550
`}
`
``
5551
+
``
5552
`+
define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) {
`
``
5553
`+
; CHECK-SD-LABEL: test_signed_v2f128_v2i64:
`
``
5554
`+
; CHECK-SD: // %bb.0:
`
``
5555
`+
; CHECK-SD-NEXT: sub sp, sp, #96
`
``
5556
`+
; CHECK-SD-NEXT: stp x30, x21, [sp, #64] // 16-byte Folded Spill
`
``
5557
`+
; CHECK-SD-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
`
``
5558
`+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 96
`
``
5559
`+
; CHECK-SD-NEXT: .cfi_offset w19, -8
`
``
5560
`+
; CHECK-SD-NEXT: .cfi_offset w20, -16
`
``
5561
`+
; CHECK-SD-NEXT: .cfi_offset w21, -24
`
``
5562
`+
; CHECK-SD-NEXT: .cfi_offset w30, -32
`
``
5563
`+
; CHECK-SD-NEXT: mov v2.16b, v1.16b
`
``
5564
`+
; CHECK-SD-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill
`
``
5565
`+
; CHECK-SD-NEXT: adrp x8, .LCPI86_0
`
``
5566
`+
; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI86_0]
`
``
5567
`+
; CHECK-SD-NEXT: mov v0.16b, v2.16b
`
``
5568
`+
; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
`
``
5569
`+
; CHECK-SD-NEXT: bl __getf2
`
``
5570
`+
; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
`
``
5571
`+
; CHECK-SD-NEXT: mov w19, w0
`
``
5572
`+
; CHECK-SD-NEXT: bl __fixtfdi
`
``
5573
`+
; CHECK-SD-NEXT: adrp x8, .LCPI86_1
`
``
5574
`+
; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
`
``
5575
`+
; CHECK-SD-NEXT: cmp w19, #0
`
``
5576
`+
; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI86_1]
`
``
5577
`+
; CHECK-SD-NEXT: mov x20, #-9223372036854775808 // =0x8000000000000000
`
``
5578
`+
; CHECK-SD-NEXT: csel x19, x20, x0, lt
`
``
5579
`+
; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill
`
``
5580
`+
; CHECK-SD-NEXT: bl __gttf2
`
``
5581
`+
; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
`
``
5582
`+
; CHECK-SD-NEXT: mov x21, #9223372036854775807 // =0x7fffffffffffffff
`
``
5583
`+
; CHECK-SD-NEXT: cmp w0, #0
`
``
5584
`+
; CHECK-SD-NEXT: csel x19, x21, x19, gt
`
``
5585
`+
; CHECK-SD-NEXT: mov v1.16b, v0.16b
`
``
5586
`+
; CHECK-SD-NEXT: bl __unordtf2
`
``
5587
`+
; CHECK-SD-NEXT: cmp w0, #0
`
``
5588
`+
; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
`
``
5589
`+
; CHECK-SD-NEXT: csel x8, xzr, x19, ne
`
``
5590
`+
; CHECK-SD-NEXT: fmov d0, x8
`
``
5591
`+
; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
`
``
5592
`+
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
`
``
5593
`+
; CHECK-SD-NEXT: bl __getf2
`
``
5594
`+
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
`
``
5595
`+
; CHECK-SD-NEXT: mov w19, w0
`
``
5596
`+
; CHECK-SD-NEXT: bl __fixtfdi
`
``
5597
`+
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
`
``
5598
`+
; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
`
``
5599
`+
; CHECK-SD-NEXT: cmp w19, #0
`
``
5600
`+
; CHECK-SD-NEXT: csel x19, x20, x0, lt
`
``
5601
`+
; CHECK-SD-NEXT: bl __gttf2
`
``
5602
`+
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
`
``
5603
`+
; CHECK-SD-NEXT: cmp w0, #0
`
``
5604
`+
; CHECK-SD-NEXT: csel x19, x21, x19, gt
`
``
5605
`+
; CHECK-SD-NEXT: mov v1.16b, v0.16b
`
``
5606
`+
; CHECK-SD-NEXT: bl __unordtf2
`
``
5607
`+
; CHECK-SD-NEXT: cmp w0, #0
`
``
5608
`+
; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
`
``
5609
`+
; CHECK-SD-NEXT: csel x8, xzr, x19, ne
`
``
5610
`+
; CHECK-SD-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
`
``
5611
`+
; CHECK-SD-NEXT: fmov d0, x8
`
``
5612
`+
; CHECK-SD-NEXT: ldp x30, x21, [sp, #64] // 16-byte Folded Reload
`
``
5613
`+
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
`
``
5614
`+
; CHECK-SD-NEXT: add sp, sp, #96
`
``
5615
`+
; CHECK-SD-NEXT: ret
`
``
5616
`+
;
`
``
5617
`+
; CHECK-GI-LABEL: test_signed_v2f128_v2i64:
`
``
5618
`+
; CHECK-GI: // %bb.0:
`
``
5619
`+
; CHECK-GI-NEXT: sub sp, sp, #112
`
``
5620
`+
; CHECK-GI-NEXT: stp x30, x23, [sp, #64] // 16-byte Folded Spill
`
``
5621
`+
; CHECK-GI-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
`
``
5622
`+
; CHECK-GI-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
`
``
5623
`+
; CHECK-GI-NEXT: .cfi_def_cfa_offset 112
`
``
5624
`+
; CHECK-GI-NEXT: .cfi_offset w19, -8
`
``
5625
`+
; CHECK-GI-NEXT: .cfi_offset w20, -16
`
``
5626
`+
; CHECK-GI-NEXT: .cfi_offset w21, -24
`
``
5627
`+
; CHECK-GI-NEXT: .cfi_offset w22, -32
`
``
5628
`+
; CHECK-GI-NEXT: .cfi_offset w23, -40
`
``
5629
`+
; CHECK-GI-NEXT: .cfi_offset w30, -48
`
``
5630
`+
; CHECK-GI-NEXT: adrp x8, .LCPI86_1
`
``
5631
`+
; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Folded Spill
`
``
5632
`+
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI86_1]
`
``
5633
`+
; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
`
``
5634
`+
; CHECK-GI-NEXT: mov v1.16b, v2.16b
`
``
5635
`+
; CHECK-GI-NEXT: str q2, [sp, #16] // 16-byte Folded Spill
`
``
5636
`+
; CHECK-GI-NEXT: bl __getf2
`
``
5637
`+
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
`
``
5638
`+
; CHECK-GI-NEXT: cmp w0, #0
`
``
5639
`+
; CHECK-GI-NEXT: mov x20, #-4594234569871327232 // =0xc03e000000000000
`
``
5640
`+
; CHECK-GI-NEXT: fmov x8, d0
`
``
5641
`+
; CHECK-GI-NEXT: csel x19, x8, xzr, lt
`
``
5642
`+
; CHECK-GI-NEXT: mov x8, v0.d[1]
`
``
5643
`+
; CHECK-GI-NEXT: mov v0.d[0], x19
`
``
5644
`+
; CHECK-GI-NEXT: csel x21, x8, x20, lt
`
``
5645
`+
; CHECK-GI-NEXT: adrp x8, .LCPI86_0
`
``
5646
`+
; CHECK-GI-NEXT: mov v0.d[1], x21
`
``
5647
`+
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI86_0]
`
``
5648
`+
; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
`
``
5649
`+
; CHECK-GI-NEXT: bl __gttf2
`
``
5650
`+
; CHECK-GI-NEXT: mov x22, #-1125899906842624 // =0xfffc000000000000
`
``
5651
`+
; CHECK-GI-NEXT: cmp w0, #0
`
``
5652
`+
; CHECK-GI-NEXT: mov x23, #4629137466983448575 // =0x403dffffffffffff
`
``
5653
`+
; CHECK-GI-NEXT: csel x8, x19, x22, gt
`
``
5654
`+
; CHECK-GI-NEXT: mov v0.d[0], x8
`
``
5655
`+
; CHECK-GI-NEXT: csel x8, x21, x23, gt
`
``
5656
`+
; CHECK-GI-NEXT: mov v0.d[1], x8
`
``
5657
`+
; CHECK-GI-NEXT: bl __fixtfdi
`
``
5658
`+
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
`
``
5659
`+
; CHECK-GI-NEXT: mov x19, x0
`
``
5660
`+
; CHECK-GI-NEXT: mov v1.16b, v0.16b
`
``
5661
`+
; CHECK-GI-NEXT: bl __unordtf2
`
``
5662
`+
; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
`
``
5663
`+
; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
`
``
5664
`+
; CHECK-GI-NEXT: cmp w0, #0
`
``
5665
`+
; CHECK-GI-NEXT: csel x21, xzr, x19, ne
`
``
5666
`+
; CHECK-GI-NEXT: bl __getf2
`
``
5667
`+
; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
`
``
5668
`+
; CHECK-GI-NEXT: cmp w0, #0
`
``
5669
`+
; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload
`
``
5670
`+
; CHECK-GI-NEXT: fmov x8, d0
`
``
5671
`+
; CHECK-GI-NEXT: csel x19, x8, xzr, lt
`
``
5672
`+
; CHECK-GI-NEXT: mov x8, v0.d[1]
`
``
5673
`+
; CHECK-GI-NEXT: mov v0.d[0], x19
`
``
5674
`+
; CHECK-GI-NEXT: csel x20, x8, x20, lt
`
``
5675
`+
; CHECK-GI-NEXT: mov v0.d[1], x20
`
``
5676
`+
; CHECK-GI-NEXT: bl __gttf2
`
``
5677
`+
; CHECK-GI-NEXT: cmp w0, #0
`
``
5678
`+
; CHECK-GI-NEXT: csel x8, x19, x22, gt
`
``
5679
`+
; CHECK-GI-NEXT: mov v0.d[0], x8
`
``
5680
`+
; CHECK-GI-NEXT: csel x8, x20, x23, gt
`
``
5681
`+
; CHECK-GI-NEXT: mov v0.d[1], x8
`
``
5682
`+
; CHECK-GI-NEXT: bl __fixtfdi
`
``
5683
`+
; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
`
``
5684
`+
; CHECK-GI-NEXT: mov x19, x0
`
``
5685
`+
; CHECK-GI-NEXT: mov v1.16b, v0.16b
`
``
5686
`+
; CHECK-GI-NEXT: bl __unordtf2
`
``
5687
`+
; CHECK-GI-NEXT: mov v0.d[0], x21
`
``
5688
`+
; CHECK-GI-NEXT: cmp w0, #0
`
``
5689
`+
; CHECK-GI-NEXT: csel x8, xzr, x19, ne
`
``
5690
`+
; CHECK-GI-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
`
``
5691
`+
; CHECK-GI-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
`
``
5692
`+
; CHECK-GI-NEXT: ldp x30, x23, [sp, #64] // 16-byte Folded Reload
`
``
5693
`+
; CHECK-GI-NEXT: mov v0.d[1], x8
`
``
5694
`+
; CHECK-GI-NEXT: add sp, sp, #112
`
``
5695
`+
; CHECK-GI-NEXT: ret
`
``
5696
`+
%x = call <2 x i64> @llvm.fptosi.sat.v2f128.v2i64(<2 x fp128> %f)
`
``
5697
`+
ret <2 x i64> %x
`
``
5698
`+
}
`