[AArch64] Add BE test coverage for popcount. NFC · llvm/llvm-project@05be3ca (original) (raw)
`@@ -2,6 +2,7 @@
`
2
2
`; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
`
3
3
`; RUN: llc < %s -mtriple=aarch64 -mattr -neon -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-NONEON %s
`
4
4
`; RUN: llc < %s -mtriple=aarch64 -mattr +cssc -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-CSSC %s
`
``
5
`+
; RUN: llc < %s -mtriple=aarch64_be-none-eabi | FileCheck %s --check-prefix=CHECK-BE
`
5
6
``
6
7
`define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
`
7
8
`; CHECK-LABEL: cnt32_advsimd:
`
`@@ -32,6 +33,14 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
`
32
33
`; CHECK-CSSC: // %bb.0:
`
33
34
`; CHECK-CSSC-NEXT: cnt w0, w0
`
34
35
`; CHECK-CSSC-NEXT: ret
`
``
36
`+
;
`
``
37
`+
; CHECK-BE-LABEL: cnt32_advsimd:
`
``
38
`+
; CHECK-BE: // %bb.0:
`
``
39
`+
; CHECK-BE-NEXT: fmov s0, w0
`
``
40
`+
; CHECK-BE-NEXT: cnt v0.8b, v0.8b
`
``
41
`+
; CHECK-BE-NEXT: addv b0, v0.8b
`
``
42
`+
; CHECK-BE-NEXT: fmov w0, s0
`
``
43
`+
; CHECK-BE-NEXT: ret
`
35
44
`%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
`
36
45
`ret i32 %cnt
`
37
46
`}
`
`@@ -69,6 +78,16 @@ define i32 @cnt32_advsimd_2(<2 x i32> %x) {
`
69
78
`; CHECK-CSSC-NEXT: fmov w8, s0
`
70
79
`; CHECK-CSSC-NEXT: cnt w0, w8
`
71
80
`; CHECK-CSSC-NEXT: ret
`
``
81
`+
;
`
``
82
`+
; CHECK-BE-LABEL: cnt32_advsimd_2:
`
``
83
`+
; CHECK-BE: // %bb.0:
`
``
84
`+
; CHECK-BE-NEXT: rev64 v0.2s, v0.2s
`
``
85
`+
; CHECK-BE-NEXT: fmov w8, s0
`
``
86
`+
; CHECK-BE-NEXT: fmov s0, w8
`
``
87
`+
; CHECK-BE-NEXT: cnt v0.8b, v0.8b
`
``
88
`+
; CHECK-BE-NEXT: addv b0, v0.8b
`
``
89
`+
; CHECK-BE-NEXT: fmov w0, s0
`
``
90
`+
; CHECK-BE-NEXT: ret
`
72
91
`%1 = extractelement <2 x i32> %x, i64 0
`
73
92
`%2 = tail call i32 @llvm.ctpop.i32(i32 %1)
`
74
93
`ret i32 %2
`
`@@ -103,6 +122,16 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
`
103
122
`; CHECK-CSSC: // %bb.0:
`
104
123
`; CHECK-CSSC-NEXT: cnt x0, x0
`
105
124
`; CHECK-CSSC-NEXT: ret
`
``
125
`+
;
`
``
126
`+
; CHECK-BE-LABEL: cnt64_advsimd:
`
``
127
`+
; CHECK-BE: // %bb.0:
`
``
128
`+
; CHECK-BE-NEXT: fmov d0, x0
`
``
129
`+
; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
`
``
130
`+
; CHECK-BE-NEXT: cnt v0.8b, v0.8b
`
``
131
`+
; CHECK-BE-NEXT: addv b0, v0.8b
`
``
132
`+
; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
`
``
133
`+
; CHECK-BE-NEXT: fmov x0, d0
`
``
134
`+
; CHECK-BE-NEXT: ret
`
106
135
`%cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
`
107
136
`ret i64 %cnt
`
108
137
`}
`
`@@ -147,6 +176,22 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
`
147
176
`; CHECK-CSSC: // %bb.0:
`
148
177
`; CHECK-CSSC-NEXT: cnt w0, w0
`
149
178
`; CHECK-CSSC-NEXT: ret
`
``
179
`+
;
`
``
180
`+
; CHECK-BE-LABEL: cnt32:
`
``
181
`+
; CHECK-BE: // %bb.0:
`
``
182
`+
; CHECK-BE-NEXT: lsr w9, w0, #1
`
``
183
`+
; CHECK-BE-NEXT: mov w8, #16843009 // =0x1010101
`
``
184
`+
; CHECK-BE-NEXT: and w9, w9, #0x55555555
`
``
185
`+
; CHECK-BE-NEXT: sub w9, w0, w9
`
``
186
`+
; CHECK-BE-NEXT: lsr w10, w9, #2
`
``
187
`+
; CHECK-BE-NEXT: and w9, w9, #0x33333333
`
``
188
`+
; CHECK-BE-NEXT: and w10, w10, #0x33333333
`
``
189
`+
; CHECK-BE-NEXT: add w9, w9, w10
`
``
190
`+
; CHECK-BE-NEXT: add w9, w9, w9, lsr #4
`
``
191
`+
; CHECK-BE-NEXT: and w9, w9, #0xf0f0f0f
`
``
192
`+
; CHECK-BE-NEXT: mul w8, w9, w8
`
``
193
`+
; CHECK-BE-NEXT: lsr w0, w8, #24
`
``
194
`+
; CHECK-BE-NEXT: ret
`
150
195
`%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
`
151
196
`ret i32 %cnt
`
152
197
`}
`
`@@ -188,6 +233,22 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
`
188
233
`; CHECK-CSSC: // %bb.0:
`
189
234
`; CHECK-CSSC-NEXT: cnt x0, x0
`
190
235
`; CHECK-CSSC-NEXT: ret
`
``
236
`+
;
`
``
237
`+
; CHECK-BE-LABEL: cnt64:
`
``
238
`+
; CHECK-BE: // %bb.0:
`
``
239
`+
; CHECK-BE-NEXT: lsr x9, x0, #1
`
``
240
`+
; CHECK-BE-NEXT: mov x8, #72340172838076673 // =0x101010101010101
`
``
241
`+
; CHECK-BE-NEXT: and x9, x9, #0x5555555555555555
`
``
242
`+
; CHECK-BE-NEXT: sub x9, x0, x9
`
``
243
`+
; CHECK-BE-NEXT: lsr x10, x9, #2
`
``
244
`+
; CHECK-BE-NEXT: and x9, x9, #0x3333333333333333
`
``
245
`+
; CHECK-BE-NEXT: and x10, x10, #0x3333333333333333
`
``
246
`+
; CHECK-BE-NEXT: add x9, x9, x10
`
``
247
`+
; CHECK-BE-NEXT: add x9, x9, x9, lsr #4
`
``
248
`+
; CHECK-BE-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f
`
``
249
`+
; CHECK-BE-NEXT: mul x8, x9, x8
`
``
250
`+
; CHECK-BE-NEXT: lsr x0, x8, #56
`
``
251
`+
; CHECK-BE-NEXT: ret
`
191
252
`%cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
`
192
253
`ret i64 %cnt
`
193
254
`}
`
`@@ -215,6 +276,14 @@ define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
`
215
276
`; CHECK-CSSC-NEXT: cmp x8, #1
`
216
277
`; CHECK-CSSC-NEXT: cset w0, eq
`
217
278
`; CHECK-CSSC-NEXT: ret
`
``
279
`+
;
`
``
280
`+
; CHECK-BE-LABEL: ctpop_eq_one:
`
``
281
`+
; CHECK-BE: // %bb.0:
`
``
282
`+
; CHECK-BE-NEXT: sub x8, x0, #1
`
``
283
`+
; CHECK-BE-NEXT: eor x9, x0, x8
`
``
284
`+
; CHECK-BE-NEXT: cmp x9, x8
`
``
285
`+
; CHECK-BE-NEXT: cset w0, hi
`
``
286
`+
; CHECK-BE-NEXT: ret
`
218
287
`%count = tail call i64 @llvm.ctpop.i64(i64 %x)
`
219
288
`%cmp = icmp eq i64 %count, 1
`
220
289
`%conv = zext i1 %cmp to i32
`
`@@ -244,6 +313,14 @@ define i32 @ctpop_ne_one(i64 %x) nounwind readnone {
`
244
313
`; CHECK-CSSC-NEXT: cmp x8, #1
`
245
314
`; CHECK-CSSC-NEXT: cset w0, ne
`
246
315
`; CHECK-CSSC-NEXT: ret
`
``
316
`+
;
`
``
317
`+
; CHECK-BE-LABEL: ctpop_ne_one:
`
``
318
`+
; CHECK-BE: // %bb.0:
`
``
319
`+
; CHECK-BE-NEXT: sub x8, x0, #1
`
``
320
`+
; CHECK-BE-NEXT: eor x9, x0, x8
`
``
321
`+
; CHECK-BE-NEXT: cmp x9, x8
`
``
322
`+
; CHECK-BE-NEXT: cset w0, ls
`
``
323
`+
; CHECK-BE-NEXT: ret
`
247
324
`%count = tail call i64 @llvm.ctpop.i64(i64 %x)
`
248
325
`%cmp = icmp ne i64 %count, 1
`
249
326
`%conv = zext i1 %cmp to i32
`
`@@ -273,6 +350,14 @@ define i1 @ctpop32_ne_one(i32 %x) nounwind readnone {
`
273
350
`; CHECK-CSSC-NEXT: cmp w8, #1
`
274
351
`; CHECK-CSSC-NEXT: cset w0, ne
`
275
352
`; CHECK-CSSC-NEXT: ret
`
``
353
`+
;
`
``
354
`+
; CHECK-BE-LABEL: ctpop32_ne_one:
`
``
355
`+
; CHECK-BE: // %bb.0:
`
``
356
`+
; CHECK-BE-NEXT: sub w8, w0, #1
`
``
357
`+
; CHECK-BE-NEXT: eor w9, w0, w8
`
``
358
`+
; CHECK-BE-NEXT: cmp w9, w8
`
``
359
`+
; CHECK-BE-NEXT: cset w0, ls
`
``
360
`+
; CHECK-BE-NEXT: ret
`
276
361
`%count = tail call i32 @llvm.ctpop.i32(i32 %x)
`
277
362
`%cmp = icmp ne i32 %count, 1
`
278
363
`ret i1 %cmp
`
`@@ -299,6 +384,13 @@ define i1 @ctpop32_eq_one_nonzero(i32 %x) {
`
299
384
`; CHECK-CSSC-NEXT: tst w0, w8
`
300
385
`; CHECK-CSSC-NEXT: cset w0, eq
`
301
386
`; CHECK-CSSC-NEXT: ret
`
``
387
`+
;
`
``
388
`+
; CHECK-BE-LABEL: ctpop32_eq_one_nonzero:
`
``
389
`+
; CHECK-BE: // %bb.0: // %entry
`
``
390
`+
; CHECK-BE-NEXT: sub w8, w0, #1
`
``
391
`+
; CHECK-BE-NEXT: tst w0, w8
`
``
392
`+
; CHECK-BE-NEXT: cset w0, eq
`
``
393
`+
; CHECK-BE-NEXT: ret
`
302
394
`entry:
`
303
395
`%popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
`
304
396
`%cmp = icmp eq i32 %popcnt, 1
`
`@@ -326,11 +418,80 @@ define i1 @ctpop32_ne_one_nonzero(i32 %x) {
`
326
418
`; CHECK-CSSC-NEXT: tst w0, w8
`
327
419
`; CHECK-CSSC-NEXT: cset w0, ne
`
328
420
`; CHECK-CSSC-NEXT: ret
`
``
421
`+
;
`
``
422
`+
; CHECK-BE-LABEL: ctpop32_ne_one_nonzero:
`
``
423
`+
; CHECK-BE: // %bb.0: // %entry
`
``
424
`+
; CHECK-BE-NEXT: sub w8, w0, #1
`
``
425
`+
; CHECK-BE-NEXT: tst w0, w8
`
``
426
`+
; CHECK-BE-NEXT: cset w0, ne
`
``
427
`+
; CHECK-BE-NEXT: ret
`
329
428
`entry:
`
330
429
`%popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
`
331
430
`%cmp = icmp ne i32 %popcnt, 1
`
332
431
`ret i1 %cmp
`
333
432
`}
`
334
433
``
``
434
`+
define i128 @cnt128(i128 %x) nounwind readnone {
`
``
435
`+
; CHECK-LABEL: cnt128:
`
``
436
`+
; CHECK: // %bb.0:
`
``
437
`+
; CHECK-NEXT: fmov d0, x0
`
``
438
`+
; CHECK-NEXT: mov.d v0[1], x1
`
``
439
`+
; CHECK-NEXT: cnt.16b v0, v0
`
``
440
`+
; CHECK-NEXT: addv.16b b0, v0
`
``
441
`+
; CHECK-NEXT: mov.d x1, v0[1]
`
``
442
`+
; CHECK-NEXT: fmov x0, d0
`
``
443
`+
; CHECK-NEXT: ret
`
``
444
`+
;
`
``
445
`+
; CHECK-NONEON-LABEL: cnt128:
`
``
446
`+
; CHECK-NONEON: // %bb.0:
`
``
447
`+
; CHECK-NONEON-NEXT: lsr x9, x0, #1
`
``
448
`+
; CHECK-NONEON-NEXT: lsr x10, x1, #1
`
``
449
`+
; CHECK-NONEON-NEXT: mov x8, #72340172838076673 // =0x101010101010101
`
``
450
`+
; CHECK-NONEON-NEXT: and x9, x9, #0x5555555555555555
`
``
451
`+
; CHECK-NONEON-NEXT: and x10, x10, #0x5555555555555555
`
``
452
`+
; CHECK-NONEON-NEXT: sub x9, x0, x9
`
``
453
`+
; CHECK-NONEON-NEXT: sub x10, x1, x10
`
``
454
`+
; CHECK-NONEON-NEXT: mov x1, xzr
`
``
455
`+
; CHECK-NONEON-NEXT: lsr x11, x9, #2
`
``
456
`+
; CHECK-NONEON-NEXT: lsr x12, x10, #2
`
``
457
`+
; CHECK-NONEON-NEXT: and x9, x9, #0x3333333333333333
`
``
458
`+
; CHECK-NONEON-NEXT: and x10, x10, #0x3333333333333333
`
``
459
`+
; CHECK-NONEON-NEXT: and x11, x11, #0x3333333333333333
`
``
460
`+
; CHECK-NONEON-NEXT: add x9, x9, x11
`
``
461
`+
; CHECK-NONEON-NEXT: and x11, x12, #0x3333333333333333
`
``
462
`+
; CHECK-NONEON-NEXT: add x9, x9, x9, lsr #4
`
``
463
`+
; CHECK-NONEON-NEXT: add x10, x10, x11
`
``
464
`+
; CHECK-NONEON-NEXT: add x10, x10, x10, lsr #4
`
``
465
`+
; CHECK-NONEON-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f
`
``
466
`+
; CHECK-NONEON-NEXT: mul x9, x9, x8
`
``
467
`+
; CHECK-NONEON-NEXT: and x10, x10, #0xf0f0f0f0f0f0f0f
`
``
468
`+
; CHECK-NONEON-NEXT: mul x8, x10, x8
`
``
469
`+
; CHECK-NONEON-NEXT: lsr x9, x9, #56
`
``
470
`+
; CHECK-NONEON-NEXT: add x0, x9, x8, lsr #56
`
``
471
`+
; CHECK-NONEON-NEXT: ret
`
``
472
`+
;
`
``
473
`+
; CHECK-CSSC-LABEL: cnt128:
`
``
474
`+
; CHECK-CSSC: // %bb.0:
`
``
475
`+
; CHECK-CSSC-NEXT: cnt x8, x1
`
``
476
`+
; CHECK-CSSC-NEXT: cnt x9, x0
`
``
477
`+
; CHECK-CSSC-NEXT: mov x1, xzr
`
``
478
`+
; CHECK-CSSC-NEXT: add x0, x9, x8
`
``
479
`+
; CHECK-CSSC-NEXT: ret
`
``
480
`+
;
`
``
481
`+
; CHECK-BE-LABEL: cnt128:
`
``
482
`+
; CHECK-BE: // %bb.0:
`
``
483
`+
; CHECK-BE-NEXT: fmov d0, x0
`
``
484
`+
; CHECK-BE-NEXT: mov v0.d[1], x1
`
``
485
`+
; CHECK-BE-NEXT: rev64 v0.16b, v0.16b
`
``
486
`+
; CHECK-BE-NEXT: cnt v0.16b, v0.16b
`
``
487
`+
; CHECK-BE-NEXT: addv b0, v0.16b
`
``
488
`+
; CHECK-BE-NEXT: rev64 v0.16b, v0.16b
`
``
489
`+
; CHECK-BE-NEXT: mov x1, v0.d[1]
`
``
490
`+
; CHECK-BE-NEXT: fmov x0, d0
`
``
491
`+
; CHECK-BE-NEXT: ret
`
``
492
`+
%cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
`
``
493
`+
ret i128 %cnt
`
``
494
`+
}
`
``
495
+
335
496
`declare i32 @llvm.ctpop.i32(i32) nounwind readnone
`
336
497
`declare i64 @llvm.ctpop.i64(i64) nounwind readnone
`