[AArch64] Add BE test coverage for popcount. NFC · llvm/llvm-project@05be3ca (original) (raw)

`@@ -2,6 +2,7 @@

`

2

2

`; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s

`

3

3

`; RUN: llc < %s -mtriple=aarch64 -mattr -neon -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-NONEON %s

`

4

4

`; RUN: llc < %s -mtriple=aarch64 -mattr +cssc -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-CSSC %s

`

``

5

`+

; RUN: llc < %s -mtriple=aarch64_be-none-eabi | FileCheck %s --check-prefix=CHECK-BE

`

5

6

``

6

7

`define i32 @cnt32_advsimd(i32 %x) nounwind readnone {

`

7

8

`; CHECK-LABEL: cnt32_advsimd:

`

`@@ -32,6 +33,14 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone {

`

32

33

`; CHECK-CSSC: // %bb.0:

`

33

34

`; CHECK-CSSC-NEXT: cnt w0, w0

`

34

35

`; CHECK-CSSC-NEXT: ret

`

``

36

`+

;

`

``

37

`+

; CHECK-BE-LABEL: cnt32_advsimd:

`

``

38

`+

; CHECK-BE: // %bb.0:

`

``

39

`+

; CHECK-BE-NEXT: fmov s0, w0

`

``

40

`+

; CHECK-BE-NEXT: cnt v0.8b, v0.8b

`

``

41

`+

; CHECK-BE-NEXT: addv b0, v0.8b

`

``

42

`+

; CHECK-BE-NEXT: fmov w0, s0

`

``

43

`+

; CHECK-BE-NEXT: ret

`

35

44

`%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)

`

36

45

`ret i32 %cnt

`

37

46

`}

`

`@@ -69,6 +78,16 @@ define i32 @cnt32_advsimd_2(<2 x i32> %x) {

`

69

78

`; CHECK-CSSC-NEXT: fmov w8, s0

`

70

79

`; CHECK-CSSC-NEXT: cnt w0, w8

`

71

80

`; CHECK-CSSC-NEXT: ret

`

``

81

`+

;

`

``

82

`+

; CHECK-BE-LABEL: cnt32_advsimd_2:

`

``

83

`+

; CHECK-BE: // %bb.0:

`

``

84

`+

; CHECK-BE-NEXT: rev64 v0.2s, v0.2s

`

``

85

`+

; CHECK-BE-NEXT: fmov w8, s0

`

``

86

`+

; CHECK-BE-NEXT: fmov s0, w8

`

``

87

`+

; CHECK-BE-NEXT: cnt v0.8b, v0.8b

`

``

88

`+

; CHECK-BE-NEXT: addv b0, v0.8b

`

``

89

`+

; CHECK-BE-NEXT: fmov w0, s0

`

``

90

`+

; CHECK-BE-NEXT: ret

`

72

91

`%1 = extractelement <2 x i32> %x, i64 0

`

73

92

`%2 = tail call i32 @llvm.ctpop.i32(i32 %1)

`

74

93

`ret i32 %2

`

`@@ -103,6 +122,16 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone {

`

103

122

`; CHECK-CSSC: // %bb.0:

`

104

123

`; CHECK-CSSC-NEXT: cnt x0, x0

`

105

124

`; CHECK-CSSC-NEXT: ret

`

``

125

`+

;

`

``

126

`+

; CHECK-BE-LABEL: cnt64_advsimd:

`

``

127

`+

; CHECK-BE: // %bb.0:

`

``

128

`+

; CHECK-BE-NEXT: fmov d0, x0

`

``

129

`+

; CHECK-BE-NEXT: rev64 v0.8b, v0.8b

`

``

130

`+

; CHECK-BE-NEXT: cnt v0.8b, v0.8b

`

``

131

`+

; CHECK-BE-NEXT: addv b0, v0.8b

`

``

132

`+

; CHECK-BE-NEXT: rev64 v0.8b, v0.8b

`

``

133

`+

; CHECK-BE-NEXT: fmov x0, d0

`

``

134

`+

; CHECK-BE-NEXT: ret

`

106

135

`%cnt = tail call i64 @llvm.ctpop.i64(i64 %x)

`

107

136

`ret i64 %cnt

`

108

137

`}

`

`@@ -147,6 +176,22 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {

`

147

176

`; CHECK-CSSC: // %bb.0:

`

148

177

`; CHECK-CSSC-NEXT: cnt w0, w0

`

149

178

`; CHECK-CSSC-NEXT: ret

`

``

179

`+

;

`

``

180

`+

; CHECK-BE-LABEL: cnt32:

`

``

181

`+

; CHECK-BE: // %bb.0:

`

``

182

`+

; CHECK-BE-NEXT: lsr w9, w0, #1

`

``

183

`+

; CHECK-BE-NEXT: mov w8, #16843009 // =0x1010101

`

``

184

`+

; CHECK-BE-NEXT: and w9, w9, #0x55555555

`

``

185

`+

; CHECK-BE-NEXT: sub w9, w0, w9

`

``

186

`+

; CHECK-BE-NEXT: lsr w10, w9, #2

`

``

187

`+

; CHECK-BE-NEXT: and w9, w9, #0x33333333

`

``

188

`+

; CHECK-BE-NEXT: and w10, w10, #0x33333333

`

``

189

`+

; CHECK-BE-NEXT: add w9, w9, w10

`

``

190

`+

; CHECK-BE-NEXT: add w9, w9, w9, lsr #4

`

``

191

`+

; CHECK-BE-NEXT: and w9, w9, #0xf0f0f0f

`

``

192

`+

; CHECK-BE-NEXT: mul w8, w9, w8

`

``

193

`+

; CHECK-BE-NEXT: lsr w0, w8, #24

`

``

194

`+

; CHECK-BE-NEXT: ret

`

150

195

`%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)

`

151

196

`ret i32 %cnt

`

152

197

`}

`

`@@ -188,6 +233,22 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {

`

188

233

`; CHECK-CSSC: // %bb.0:

`

189

234

`; CHECK-CSSC-NEXT: cnt x0, x0

`

190

235

`; CHECK-CSSC-NEXT: ret

`

``

236

`+

;

`

``

237

`+

; CHECK-BE-LABEL: cnt64:

`

``

238

`+

; CHECK-BE: // %bb.0:

`

``

239

`+

; CHECK-BE-NEXT: lsr x9, x0, #1

`

``

240

`+

; CHECK-BE-NEXT: mov x8, #72340172838076673 // =0x101010101010101

`

``

241

`+

; CHECK-BE-NEXT: and x9, x9, #0x5555555555555555

`

``

242

`+

; CHECK-BE-NEXT: sub x9, x0, x9

`

``

243

`+

; CHECK-BE-NEXT: lsr x10, x9, #2

`

``

244

`+

; CHECK-BE-NEXT: and x9, x9, #0x3333333333333333

`

``

245

`+

; CHECK-BE-NEXT: and x10, x10, #0x3333333333333333

`

``

246

`+

; CHECK-BE-NEXT: add x9, x9, x10

`

``

247

`+

; CHECK-BE-NEXT: add x9, x9, x9, lsr #4

`

``

248

`+

; CHECK-BE-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f

`

``

249

`+

; CHECK-BE-NEXT: mul x8, x9, x8

`

``

250

`+

; CHECK-BE-NEXT: lsr x0, x8, #56

`

``

251

`+

; CHECK-BE-NEXT: ret

`

191

252

`%cnt = tail call i64 @llvm.ctpop.i64(i64 %x)

`

192

253

`ret i64 %cnt

`

193

254

`}

`

`@@ -215,6 +276,14 @@ define i32 @ctpop_eq_one(i64 %x) nounwind readnone {

`

215

276

`; CHECK-CSSC-NEXT: cmp x8, #1

`

216

277

`; CHECK-CSSC-NEXT: cset w0, eq

`

217

278

`; CHECK-CSSC-NEXT: ret

`

``

279

`+

;

`

``

280

`+

; CHECK-BE-LABEL: ctpop_eq_one:

`

``

281

`+

; CHECK-BE: // %bb.0:

`

``

282

`+

; CHECK-BE-NEXT: sub x8, x0, #1

`

``

283

`+

; CHECK-BE-NEXT: eor x9, x0, x8

`

``

284

`+

; CHECK-BE-NEXT: cmp x9, x8

`

``

285

`+

; CHECK-BE-NEXT: cset w0, hi

`

``

286

`+

; CHECK-BE-NEXT: ret

`

218

287

`%count = tail call i64 @llvm.ctpop.i64(i64 %x)

`

219

288

`%cmp = icmp eq i64 %count, 1

`

220

289

`%conv = zext i1 %cmp to i32

`

`@@ -244,6 +313,14 @@ define i32 @ctpop_ne_one(i64 %x) nounwind readnone {

`

244

313

`; CHECK-CSSC-NEXT: cmp x8, #1

`

245

314

`; CHECK-CSSC-NEXT: cset w0, ne

`

246

315

`; CHECK-CSSC-NEXT: ret

`

``

316

`+

;

`

``

317

`+

; CHECK-BE-LABEL: ctpop_ne_one:

`

``

318

`+

; CHECK-BE: // %bb.0:

`

``

319

`+

; CHECK-BE-NEXT: sub x8, x0, #1

`

``

320

`+

; CHECK-BE-NEXT: eor x9, x0, x8

`

``

321

`+

; CHECK-BE-NEXT: cmp x9, x8

`

``

322

`+

; CHECK-BE-NEXT: cset w0, ls

`

``

323

`+

; CHECK-BE-NEXT: ret

`

247

324

`%count = tail call i64 @llvm.ctpop.i64(i64 %x)

`

248

325

`%cmp = icmp ne i64 %count, 1

`

249

326

`%conv = zext i1 %cmp to i32

`

`@@ -273,6 +350,14 @@ define i1 @ctpop32_ne_one(i32 %x) nounwind readnone {

`

273

350

`; CHECK-CSSC-NEXT: cmp w8, #1

`

274

351

`; CHECK-CSSC-NEXT: cset w0, ne

`

275

352

`; CHECK-CSSC-NEXT: ret

`

``

353

`+

;

`

``

354

`+

; CHECK-BE-LABEL: ctpop32_ne_one:

`

``

355

`+

; CHECK-BE: // %bb.0:

`

``

356

`+

; CHECK-BE-NEXT: sub w8, w0, #1

`

``

357

`+

; CHECK-BE-NEXT: eor w9, w0, w8

`

``

358

`+

; CHECK-BE-NEXT: cmp w9, w8

`

``

359

`+

; CHECK-BE-NEXT: cset w0, ls

`

``

360

`+

; CHECK-BE-NEXT: ret

`

276

361

`%count = tail call i32 @llvm.ctpop.i32(i32 %x)

`

277

362

`%cmp = icmp ne i32 %count, 1

`

278

363

`ret i1 %cmp

`

`@@ -299,6 +384,13 @@ define i1 @ctpop32_eq_one_nonzero(i32 %x) {

`

299

384

`; CHECK-CSSC-NEXT: tst w0, w8

`

300

385

`; CHECK-CSSC-NEXT: cset w0, eq

`

301

386

`; CHECK-CSSC-NEXT: ret

`

``

387

`+

;

`

``

388

`+

; CHECK-BE-LABEL: ctpop32_eq_one_nonzero:

`

``

389

`+

; CHECK-BE: // %bb.0: // %entry

`

``

390

`+

; CHECK-BE-NEXT: sub w8, w0, #1

`

``

391

`+

; CHECK-BE-NEXT: tst w0, w8

`

``

392

`+

; CHECK-BE-NEXT: cset w0, eq

`

``

393

`+

; CHECK-BE-NEXT: ret

`

302

394

`entry:

`

303

395

`%popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)

`

304

396

`%cmp = icmp eq i32 %popcnt, 1

`

`@@ -326,11 +418,80 @@ define i1 @ctpop32_ne_one_nonzero(i32 %x) {

`

326

418

`; CHECK-CSSC-NEXT: tst w0, w8

`

327

419

`; CHECK-CSSC-NEXT: cset w0, ne

`

328

420

`; CHECK-CSSC-NEXT: ret

`

``

421

`+

;

`

``

422

`+

; CHECK-BE-LABEL: ctpop32_ne_one_nonzero:

`

``

423

`+

; CHECK-BE: // %bb.0: // %entry

`

``

424

`+

; CHECK-BE-NEXT: sub w8, w0, #1

`

``

425

`+

; CHECK-BE-NEXT: tst w0, w8

`

``

426

`+

; CHECK-BE-NEXT: cset w0, ne

`

``

427

`+

; CHECK-BE-NEXT: ret

`

329

428

`entry:

`

330

429

`%popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)

`

331

430

`%cmp = icmp ne i32 %popcnt, 1

`

332

431

`ret i1 %cmp

`

333

432

`}

`

334

433

``

``

434

`+

define i128 @cnt128(i128 %x) nounwind readnone {

`

``

435

`+

; CHECK-LABEL: cnt128:

`

``

436

`+

; CHECK: // %bb.0:

`

``

437

`+

; CHECK-NEXT: fmov d0, x0

`

``

438

`+

; CHECK-NEXT: mov.d v0[1], x1

`

``

439

`+

; CHECK-NEXT: cnt.16b v0, v0

`

``

440

`+

; CHECK-NEXT: addv.16b b0, v0

`

``

441

`+

; CHECK-NEXT: mov.d x1, v0[1]

`

``

442

`+

; CHECK-NEXT: fmov x0, d0

`

``

443

`+

; CHECK-NEXT: ret

`

``

444

`+

;

`

``

445

`+

; CHECK-NONEON-LABEL: cnt128:

`

``

446

`+

; CHECK-NONEON: // %bb.0:

`

``

447

`+

; CHECK-NONEON-NEXT: lsr x9, x0, #1

`

``

448

`+

; CHECK-NONEON-NEXT: lsr x10, x1, #1

`

``

449

`+

; CHECK-NONEON-NEXT: mov x8, #72340172838076673 // =0x101010101010101

`

``

450

`+

; CHECK-NONEON-NEXT: and x9, x9, #0x5555555555555555

`

``

451

`+

; CHECK-NONEON-NEXT: and x10, x10, #0x5555555555555555

`

``

452

`+

; CHECK-NONEON-NEXT: sub x9, x0, x9

`

``

453

`+

; CHECK-NONEON-NEXT: sub x10, x1, x10

`

``

454

`+

; CHECK-NONEON-NEXT: mov x1, xzr

`

``

455

`+

; CHECK-NONEON-NEXT: lsr x11, x9, #2

`

``

456

`+

; CHECK-NONEON-NEXT: lsr x12, x10, #2

`

``

457

`+

; CHECK-NONEON-NEXT: and x9, x9, #0x3333333333333333

`

``

458

`+

; CHECK-NONEON-NEXT: and x10, x10, #0x3333333333333333

`

``

459

`+

; CHECK-NONEON-NEXT: and x11, x11, #0x3333333333333333

`

``

460

`+

; CHECK-NONEON-NEXT: add x9, x9, x11

`

``

461

`+

; CHECK-NONEON-NEXT: and x11, x12, #0x3333333333333333

`

``

462

`+

; CHECK-NONEON-NEXT: add x9, x9, x9, lsr #4

`

``

463

`+

; CHECK-NONEON-NEXT: add x10, x10, x11

`

``

464

`+

; CHECK-NONEON-NEXT: add x10, x10, x10, lsr #4

`

``

465

`+

; CHECK-NONEON-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f

`

``

466

`+

; CHECK-NONEON-NEXT: mul x9, x9, x8

`

``

467

`+

; CHECK-NONEON-NEXT: and x10, x10, #0xf0f0f0f0f0f0f0f

`

``

468

`+

; CHECK-NONEON-NEXT: mul x8, x10, x8

`

``

469

`+

; CHECK-NONEON-NEXT: lsr x9, x9, #56

`

``

470

`+

; CHECK-NONEON-NEXT: add x0, x9, x8, lsr #56

`

``

471

`+

; CHECK-NONEON-NEXT: ret

`

``

472

`+

;

`

``

473

`+

; CHECK-CSSC-LABEL: cnt128:

`

``

474

`+

; CHECK-CSSC: // %bb.0:

`

``

475

`+

; CHECK-CSSC-NEXT: cnt x8, x1

`

``

476

`+

; CHECK-CSSC-NEXT: cnt x9, x0

`

``

477

`+

; CHECK-CSSC-NEXT: mov x1, xzr

`

``

478

`+

; CHECK-CSSC-NEXT: add x0, x9, x8

`

``

479

`+

; CHECK-CSSC-NEXT: ret

`

``

480

`+

;

`

``

481

`+

; CHECK-BE-LABEL: cnt128:

`

``

482

`+

; CHECK-BE: // %bb.0:

`

``

483

`+

; CHECK-BE-NEXT: fmov d0, x0

`

``

484

`+

; CHECK-BE-NEXT: mov v0.d[1], x1

`

``

485

`+

; CHECK-BE-NEXT: rev64 v0.16b, v0.16b

`

``

486

`+

; CHECK-BE-NEXT: cnt v0.16b, v0.16b

`

``

487

`+

; CHECK-BE-NEXT: addv b0, v0.16b

`

``

488

`+

; CHECK-BE-NEXT: rev64 v0.16b, v0.16b

`

``

489

`+

; CHECK-BE-NEXT: mov x1, v0.d[1]

`

``

490

`+

; CHECK-BE-NEXT: fmov x0, d0

`

``

491

`+

; CHECK-BE-NEXT: ret

`

``

492

`+

%cnt = tail call i128 @llvm.ctpop.i128(i128 %x)

`

``

493

`+

ret i128 %cnt

`

``

494

`+

}

`

``

495

+

335

496

`declare i32 @llvm.ctpop.i32(i32) nounwind readnone

`

336

497

`declare i64 @llvm.ctpop.i64(i64) nounwind readnone

`