LLVM: lib/Target/ARM/ARMLatencyMutations.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

21#include

22#include

23#include <initializer_list>

24#include

25

26namespace llvm {

27

28namespace {

29

30

31

32class InstructionInformation {

33protected:

34 struct IInfo {

35 bool HasBRegAddr : 1;

36 bool HasBRegAddrShift : 1;

37 bool IsDivide : 1;

38 bool IsInlineShiftALU : 1;

39 bool IsMultiply : 1;

40 bool IsMVEIntMAC : 1;

41 bool IsNonSubwordLoad : 1;

42 bool IsShift : 1;

43 bool IsRev : 1;

44 bool ProducesQP : 1;

45 bool ProducesDP : 1;

46 bool ProducesSP : 1;

47 bool ConsumesQP : 1;

48 bool ConsumesDP : 1;

49 bool ConsumesSP : 1;

50 unsigned MVEIntMACMatched;

51 unsigned AddressOpMask;

52 IInfo()

53 : HasBRegAddr(false), HasBRegAddrShift(false), IsDivide(false),

54 IsInlineShiftALU(false), IsMultiply(false), IsMVEIntMAC(false),

58 MVEIntMACMatched(0), AddressOpMask(0) {}

59 };

60 typedef std::array<IInfo, ARM::INSTRUCTION_LIST_END> IInfoArray;

61 IInfoArray Info;

62

63public:

64

65 unsigned getAddressOpMask(unsigned Op) { return Info[Op].AddressOpMask; }

66 bool hasBRegAddr(unsigned Op) { return Info[Op].HasBRegAddr; }

67 bool hasBRegAddrShift(unsigned Op) { return Info[Op].HasBRegAddrShift; }

68 bool isDivide(unsigned Op) { return Info[Op].IsDivide; }

69 bool isInlineShiftALU(unsigned Op) { return Info[Op].IsInlineShiftALU; }

70 bool isMultiply(unsigned Op) { return Info[Op].IsMultiply; }

71 bool isMVEIntMAC(unsigned Op) { return Info[Op].IsMVEIntMAC; }

72 bool isNonSubwordLoad(unsigned Op) { return Info[Op].IsNonSubwordLoad; }

73 bool isRev(unsigned Op) { return Info[Op].IsRev; }

74 bool isShift(unsigned Op) { return Info[Op].IsShift; }

75

76

77 bool producesQP(unsigned Op) { return Info[Op].ProducesQP; }

78 bool producesDP(unsigned Op) { return Info[Op].ProducesDP; }

79 bool producesSP(unsigned Op) { return Info[Op].ProducesSP; }

80 bool consumesQP(unsigned Op) { return Info[Op].ConsumesQP; }

81 bool consumesDP(unsigned Op) { return Info[Op].ConsumesDP; }

82 bool consumesSP(unsigned Op) { return Info[Op].ConsumesSP; }

83

84 bool isMVEIntMACMatched(unsigned SrcOp, unsigned DstOp) {

85 return SrcOp == DstOp || Info[DstOp].MVEIntMACMatched == SrcOp;

86 }

87

88 InstructionInformation(const ARMBaseInstrInfo *TII);

89

90protected:

91 void markDPProducersConsumers(const ARMBaseInstrInfo *TII);

92};

93

94InstructionInformation::InstructionInformation(const ARMBaseInstrInfo *TII) {

95 using namespace ARM;

96

97 std::initializer_list hasBRegAddrList = {

98 t2LDRs, t2LDRBs, t2LDRHs, t2STRs, t2STRBs, t2STRHs,

99 tLDRr, tLDRBr, tLDRHr, tSTRr, tSTRBr, tSTRHr,

100 };

101 for (auto op : hasBRegAddrList) {

102 Info[op].HasBRegAddr = true;

103 }

104

105 std::initializer_list hasBRegAddrShiftList = {

106 t2LDRs, t2LDRBs, t2LDRHs, t2STRs, t2STRBs, t2STRHs,

107 };

108 for (auto op : hasBRegAddrShiftList) {

109 Info[op].HasBRegAddrShift = true;

110 }

111

112 Info[t2SDIV].IsDivide = Info[t2UDIV].IsDivide = true;

113

114 std::initializer_list isInlineShiftALUList = {

115 t2ADCrs, t2ADDSrs, t2ADDrs, t2BICrs, t2EORrs,

116 t2ORNrs, t2RSBSrs, t2RSBrs, t2SBCrs, t2SUBrs,

117 t2SUBSrs, t2CMPrs, t2CMNzrs, t2TEQrs, t2TSTrs,

118 };

119 for (auto op : isInlineShiftALUList) {

120 Info[op].IsInlineShiftALU = true;

121 }

122

123 Info[t2SDIV].IsDivide = Info[t2UDIV].IsDivide = true;

124

125 std::initializer_list isMultiplyList = {

126 t2MUL, t2MLA, t2MLS, t2SMLABB, t2SMLABT, t2SMLAD, t2SMLADX,

127 t2SMLAL, t2SMLALBB, t2SMLALBT, t2SMLALD, t2SMLALDX, t2SMLALTB, t2SMLALTT,

128 t2SMLATB, t2SMLATT, t2SMLAWT, t2SMLSD, t2SMLSDX, t2SMLSLD, t2SMLSLDX,

129 t2SMMLA, t2SMMLAR, t2SMMLS, t2SMMLSR, t2SMMUL, t2SMMULR, t2SMUAD,

130 t2SMUADX, t2SMULBB, t2SMULBT, t2SMULL, t2SMULTB, t2SMULTT, t2SMULWT,

131 t2SMUSD, t2SMUSDX, t2UMAAL, t2UMLAL, t2UMULL, tMUL,

132 };

133 for (auto op : isMultiplyList) {

134 Info[op].IsMultiply = true;

135 }

136

137 std::initializer_list isMVEIntMACList = {

138 MVE_VMLAS_qr_i16, MVE_VMLAS_qr_i32, MVE_VMLAS_qr_i8,

139 MVE_VMLA_qr_i16, MVE_VMLA_qr_i32, MVE_VMLA_qr_i8,

140 MVE_VQDMLAH_qrs16, MVE_VQDMLAH_qrs32, MVE_VQDMLAH_qrs8,

141 MVE_VQDMLASH_qrs16, MVE_VQDMLASH_qrs32, MVE_VQDMLASH_qrs8,

142 MVE_VQRDMLAH_qrs16, MVE_VQRDMLAH_qrs32, MVE_VQRDMLAH_qrs8,

143 MVE_VQRDMLASH_qrs16, MVE_VQRDMLASH_qrs32, MVE_VQRDMLASH_qrs8,

144 MVE_VQDMLADHXs16, MVE_VQDMLADHXs32, MVE_VQDMLADHXs8,

145 MVE_VQDMLADHs16, MVE_VQDMLADHs32, MVE_VQDMLADHs8,

146 MVE_VQDMLSDHXs16, MVE_VQDMLSDHXs32, MVE_VQDMLSDHXs8,

147 MVE_VQDMLSDHs16, MVE_VQDMLSDHs32, MVE_VQDMLSDHs8,

148 MVE_VQRDMLADHXs16, MVE_VQRDMLADHXs32, MVE_VQRDMLADHXs8,

149 MVE_VQRDMLADHs16, MVE_VQRDMLADHs32, MVE_VQRDMLADHs8,

150 MVE_VQRDMLSDHXs16, MVE_VQRDMLSDHXs32, MVE_VQRDMLSDHXs8,

151 MVE_VQRDMLSDHs16, MVE_VQRDMLSDHs32, MVE_VQRDMLSDHs8,

152 };

153 for (auto op : isMVEIntMACList) {

154 Info[op].IsMVEIntMAC = true;

155 }

156

157 std::initializer_list isNonSubwordLoadList = {

158 t2LDRi12, t2LDRi8, t2LDR_POST, t2LDR_PRE, t2LDRpci,

159 t2LDRs, t2LDRDi8, t2LDRD_POST, t2LDRD_PRE, tLDRi,

160 tLDRpci, tLDRr, tLDRspi,

161 };

162 for (auto op : isNonSubwordLoadList) {

163 Info[op].IsNonSubwordLoad = true;

164 }

165

166 std::initializer_list isRevList = {

167 t2REV, t2REV16, t2REVSH, t2RBIT, tREV, tREV16, tREVSH,

168 };

169 for (auto op : isRevList) {

170 Info[op].IsRev = true;

171 }

172

173 std::initializer_list isShiftList = {

174 t2ASRri, t2ASRrr, t2LSLri, t2LSLrr, t2LSRri, t2LSRrr, t2RORri, t2RORrr,

175 tASRri, tASRrr, tLSLSri, tLSLri, tLSLrr, tLSRri, tLSRrr, tROR,

176 };

177 for (auto op : isShiftList) {

178 Info[op].IsShift = true;

179 }

180

181 std::initializer_list Address1List = {

182 t2LDRBi12,

183 t2LDRBi8,

184 t2LDRBpci,

185 t2LDRBs,

186 t2LDRHi12,

187 t2LDRHi8,

188 t2LDRHpci,

189 t2LDRHs,

190 t2LDRSBi12,

191 t2LDRSBi8,

192 t2LDRSBpci,

193 t2LDRSBs,

194 t2LDRSHi12,

195 t2LDRSHi8,

196 t2LDRSHpci,

197 t2LDRSHs,

198 t2LDRi12,

199 t2LDRi8,

200 t2LDRpci,

201 t2LDRs,

202 tLDRBi,

203 tLDRBr,

204 tLDRHi,

205 tLDRHr,

206 tLDRSB,

207 tLDRSH,

208 tLDRi,

209 tLDRpci,

210 tLDRr,

211 tLDRspi,

212 t2STRBi12,

213 t2STRBi8,

214 t2STRBs,

215 t2STRHi12,

216 t2STRHi8,

217 t2STRHs,

218 t2STRi12,

219 t2STRi8,

220 t2STRs,

221 tSTRBi,

222 tSTRBr,

223 tSTRHi,

224 tSTRHr,

225 tSTRi,

226 tSTRr,

227 tSTRspi,

228 VLDRD,

229 VLDRH,

230 VLDRS,

231 VSTRD,

232 VSTRH,

233 VSTRS,

234 MVE_VLD20_16,

235 MVE_VLD20_32,

236 MVE_VLD20_8,

237 MVE_VLD21_16,

238 MVE_VLD21_32,

239 MVE_VLD21_8,

240 MVE_VLD40_16,

241 MVE_VLD40_32,

242 MVE_VLD40_8,

243 MVE_VLD41_16,

244 MVE_VLD41_32,

245 MVE_VLD41_8,

246 MVE_VLD42_16,

247 MVE_VLD42_32,

248 MVE_VLD42_8,

249 MVE_VLD43_16,

250 MVE_VLD43_32,

251 MVE_VLD43_8,

252 MVE_VLDRBS16,

253 MVE_VLDRBS16_rq,

254 MVE_VLDRBS32,

255 MVE_VLDRBS32_rq,

256 MVE_VLDRBU16,

257 MVE_VLDRBU16_rq,

258 MVE_VLDRBU32,

259 MVE_VLDRBU32_rq,

260 MVE_VLDRBU8,

261 MVE_VLDRBU8_rq,

262 MVE_VLDRDU64_qi,

263 MVE_VLDRDU64_rq,

264 MVE_VLDRDU64_rq_u,

265 MVE_VLDRHS32,

266 MVE_VLDRHS32_rq,

267 MVE_VLDRHS32_rq_u,

268 MVE_VLDRHU16,

269 MVE_VLDRHU16_rq,

270 MVE_VLDRHU16_rq_u,

271 MVE_VLDRHU32,

272 MVE_VLDRHU32_rq,

273 MVE_VLDRHU32_rq_u,

274 MVE_VLDRWU32,

275 MVE_VLDRWU32_qi,

276 MVE_VLDRWU32_rq,

277 MVE_VLDRWU32_rq_u,

278 MVE_VST20_16,

279 MVE_VST20_32,

280 MVE_VST20_8,

281 MVE_VST21_16,

282 MVE_VST21_32,

283 MVE_VST21_8,

284 MVE_VST40_16,

285 MVE_VST40_32,

286 MVE_VST40_8,

287 MVE_VST41_16,

288 MVE_VST41_32,

289 MVE_VST41_8,

290 MVE_VST42_16,

291 MVE_VST42_32,

292 MVE_VST42_8,

293 MVE_VST43_16,

294 MVE_VST43_32,

295 MVE_VST43_8,

296 MVE_VSTRB16,

297 MVE_VSTRB16_rq,

298 MVE_VSTRB32,

299 MVE_VSTRB32_rq,

300 MVE_VSTRBU8,

301 MVE_VSTRB8_rq,

302 MVE_VSTRD64_qi,

303 MVE_VSTRD64_rq,

304 MVE_VSTRD64_rq_u,

305 MVE_VSTRH32,

306 MVE_VSTRH32_rq,

307 MVE_VSTRH32_rq_u,

308 MVE_VSTRHU16,

309 MVE_VSTRH16_rq,

310 MVE_VSTRH16_rq_u,

311 MVE_VSTRWU32,

312 MVE_VSTRW32_qi,

313 MVE_VSTRW32_rq,

314 MVE_VSTRW32_rq_u,

315 };

316 std::initializer_list Address2List = {

317 t2LDRB_POST,

318 t2LDRB_PRE,

319 t2LDRDi8,

320 t2LDRH_POST,

321 t2LDRH_PRE,

322 t2LDRSB_POST,

323 t2LDRSB_PRE,

324 t2LDRSH_POST,

325 t2LDRSH_PRE,

326 t2LDR_POST,

327 t2LDR_PRE,

328 t2STRB_POST,

329 t2STRB_PRE,

330 t2STRDi8,

331 t2STRH_POST,

332 t2STRH_PRE,

333 t2STR_POST,

334 t2STR_PRE,

335 MVE_VLD20_16_wb,

336 MVE_VLD20_32_wb,

337 MVE_VLD20_8_wb,

338 MVE_VLD21_16_wb,

339 MVE_VLD21_32_wb,

340 MVE_VLD21_8_wb,

341 MVE_VLD40_16_wb,

342 MVE_VLD40_32_wb,

343 MVE_VLD40_8_wb,

344 MVE_VLD41_16_wb,

345 MVE_VLD41_32_wb,

346 MVE_VLD41_8_wb,

347 MVE_VLD42_16_wb,

348 MVE_VLD42_32_wb,

349 MVE_VLD42_8_wb,

350 MVE_VLD43_16_wb,

351 MVE_VLD43_32_wb,

352 MVE_VLD43_8_wb,

353 MVE_VLDRBS16_post,

354 MVE_VLDRBS16_pre,

355 MVE_VLDRBS32_post,

356 MVE_VLDRBS32_pre,

357 MVE_VLDRBU16_post,

358 MVE_VLDRBU16_pre,

359 MVE_VLDRBU32_post,

360 MVE_VLDRBU32_pre,

361 MVE_VLDRBU8_post,

362 MVE_VLDRBU8_pre,

363 MVE_VLDRDU64_qi_pre,

364 MVE_VLDRHS32_post,

365 MVE_VLDRHS32_pre,

366 MVE_VLDRHU16_post,

367 MVE_VLDRHU16_pre,

368 MVE_VLDRHU32_post,

369 MVE_VLDRHU32_pre,

370 MVE_VLDRWU32_post,

371 MVE_VLDRWU32_pre,

372 MVE_VLDRWU32_qi_pre,

373 MVE_VST20_16_wb,

374 MVE_VST20_32_wb,

375 MVE_VST20_8_wb,

376 MVE_VST21_16_wb,

377 MVE_VST21_32_wb,

378 MVE_VST21_8_wb,

379 MVE_VST40_16_wb,

380 MVE_VST40_32_wb,

381 MVE_VST40_8_wb,

382 MVE_VST41_16_wb,

383 MVE_VST41_32_wb,

384 MVE_VST41_8_wb,

385 MVE_VST42_16_wb,

386 MVE_VST42_32_wb,

387 MVE_VST42_8_wb,

388 MVE_VST43_16_wb,

389 MVE_VST43_32_wb,

390 MVE_VST43_8_wb,

391 MVE_VSTRB16_post,

392 MVE_VSTRB16_pre,

393 MVE_VSTRB32_post,

394 MVE_VSTRB32_pre,

395 MVE_VSTRBU8_post,

396 MVE_VSTRBU8_pre,

397 MVE_VSTRD64_qi_pre,

398 MVE_VSTRH32_post,

399 MVE_VSTRH32_pre,

400 MVE_VSTRHU16_post,

401 MVE_VSTRHU16_pre,

402 MVE_VSTRWU32_post,

403 MVE_VSTRWU32_pre,

404 MVE_VSTRW32_qi_pre,

405 };

406 std::initializer_list Address3List = {

407 t2LDRD_POST,

408 t2LDRD_PRE,

409 t2STRD_POST,

410 t2STRD_PRE,

411 };

412

413 for (auto &op : Address1List) {

414 Info[op].AddressOpMask = 0x6;

415 }

416 for (auto &op : Address2List) {

417 Info[op].AddressOpMask = 0xc;

418 }

419 for (auto &op : Address3List) {

420 Info[op].AddressOpMask = 0x18;

421 }

422 for (auto &op : hasBRegAddrShiftList) {

423 Info[op].AddressOpMask |= 0x8;

424 }

425}

426

427void InstructionInformation::markDPProducersConsumers(

428 const ARMBaseInstrInfo *TII) {

429

430 for (unsigned MI = 0; MI < ARM::INSTRUCTION_LIST_END; ++MI) {

431 const MCInstrDesc &MID = TII->get(MI);

432 auto Operands = MID.operands();

433 for (unsigned OI = 0, OIE = MID.getNumOperands(); OI != OIE; ++OI) {

434 bool MarkQP = false, MarkDP = false, MarkSP = false;

435 switch (Operands[OI].RegClass) {

436 case ARM::MQPRRegClassID:

437 case ARM::DPRRegClassID:

438 case ARM::DPR_8RegClassID:

439 case ARM::DPR_VFP2RegClassID:

440 case ARM::DPairRegClassID:

441 case ARM::DPairSpcRegClassID:

442 case ARM::DQuadRegClassID:

443 case ARM::DQuadSpcRegClassID:

444 case ARM::DTripleRegClassID:

445 case ARM::DTripleSpcRegClassID:

446 MarkDP = true;

447 break;

448 case ARM::QPRRegClassID:

449 case ARM::QPR_8RegClassID:

450 case ARM::QPR_VFP2RegClassID:

451 case ARM::QQPRRegClassID:

452 case ARM::QQQQPRRegClassID:

453 MarkQP = true;

454 break;

455 case ARM::SPRRegClassID:

456 case ARM::SPR_8RegClassID:

457 case ARM::FPWithVPRRegClassID:

458 MarkSP = true;

459 break;

460 default:

461 break;

462 }

463 if (MarkQP) {

464 if (OI < MID.getNumDefs())

465 Info[MI].ProducesQP = true;

466 else

467 Info[MI].ConsumesQP = true;

468 }

469 if (MarkDP) {

470 if (OI < MID.getNumDefs())

471 Info[MI].ProducesDP = true;

472 else

473 Info[MI].ConsumesDP = true;

474 }

475 if (MarkSP) {

476 if (OI < MID.getNumDefs())

477 Info[MI].ProducesSP = true;

478 else

479 Info[MI].ConsumesSP = true;

480 }

481 }

482 }

483}

484

485}

486

488 return MI->getDesc().hasImplicitUseOfPhysReg(ARM::CPSR);

489}

490

492 unsigned latency) {

494 Reverse.setSUnit(&SrcSU);

497 PDep.setLatency(latency);

499 break;

500 }

501 }

504}

505

507 return (a & 0xe) != (b & 0xe);

508}

509

510

511

512

516 return true;

517 }

518 return false;

519}

520

521

522

523

524

525

526

527

528

530

533 unsigned SrcOpcode = SrcMI->getOpcode();

535 unsigned DstOpcode = DstMI->getOpcode();

536

537 if (DstOpcode == ARM::BUNDLE && TII->isPredicated(*DstMI)) {

539 ISU, Dep,

541 return 1;

542 }

543 if (SrcOpcode == ARM::BUNDLE && TII->isPredicated(*SrcMI) &&

546 return 2;

547 }

548 return 0;

549}

550

551

552

554 unsigned latency) {

556 return false;

557 auto &SrcInst = *ISU.getInstr();

559 if (!SrcInst.mayStore() || !DstInst.mayLoad())

560 return false;

561

562 auto SrcMO = *SrcInst.memoperands().begin();

563 auto DstMO = *DstInst.memoperands().begin();

564 auto SrcVal = SrcMO->getValue();

565 auto DstVal = DstMO->getValue();

566 auto SrcPseudoVal = SrcMO->getPseudoValue();

567 auto DstPseudoVal = DstMO->getPseudoValue();

569 SrcMO->getOffset() == DstMO->getOffset()) {

571 return true;

572 } else if (SrcPseudoVal && DstPseudoVal &&

573 SrcPseudoVal->kind() == DstPseudoVal->kind() &&

575

578 if (FS0 == FS1) {

580 return true;

581 }

582 }

583 return false;

584}

585

586namespace {

587

588std::unique_ptr II;

589

590class CortexM7InstructionInformation : public InstructionInformation {

591public:

593 : InstructionInformation(TII) {}

594};

595

596class CortexM7Overrides : public ARMOverrideBypasses {

597public:

598 CortexM7Overrides(const ARMBaseInstrInfo *TII, AAResults *AA)

599 : ARMOverrideBypasses(TII, AA) {

600 if (II)

601 II.reset(new CortexM7InstructionInformation(TII));

602 }

603

604 void modifyBypasses(SUnit &) override;

605};

606

607void CortexM7Overrides::modifyBypasses(SUnit &ISU) {

608 const MachineInstr *SrcMI = ISU.getInstr();

609 unsigned SrcOpcode = SrcMI->getOpcode();

610 bool isNSWload = II->isNonSubwordLoad(SrcOpcode);

611

612

613 for (SDep &Dep : ISU.Succs) {

614

615

616

617 if (zeroOutputDependences(ISU, Dep))

618 continue;

619

620 if (memoryRAWHazard(ISU, Dep, 4))

621 continue;

622

623

624 if (Dep.getKind() != SDep::Data)

625 continue;

626

627 SUnit &DepSU = *Dep.getSUnit();

628 if (DepSU.isBoundaryNode())

629 continue;

630

631 if (makeBundleAssumptions(ISU, Dep) == 1)

632 continue;

633

634 const MachineInstr *DstMI = DepSU.getInstr();

635 unsigned DstOpcode = DstMI->getOpcode();

636

637

638

639

640

641

642 if (isNSWload && (II->isMultiply(DstOpcode) || II->isDivide(DstOpcode)))

643 setBidirLatencies(ISU, Dep, Dep.getLatency() + 1);

644

645

646

647

648 if (isNSWload && II->hasBRegAddr(DstOpcode) &&

649 DstMI->getOperand(2).getReg() == Dep.getReg())

650 setBidirLatencies(ISU, Dep, Dep.getLatency() + 1);

651

652

653

654 if (II->isMultiply(SrcOpcode)) {

655 unsigned OpMask = II->getAddressOpMask(DstOpcode) >> 1;

656 for (unsigned i = 1; OpMask; ++i, OpMask >>= 1) {

657 if ((OpMask & 1) && DstMI->getOperand(i).isReg() &&

658 DstMI->getOperand(i).getReg() == Dep.getReg()) {

659 setBidirLatencies(ISU, Dep, 4);

660 break;

661 }

662 }

663 }

664

665

666

667 if (TII->isPredicated(*SrcMI) && Dep.isAssignedRegDep() &&

668 (SrcOpcode == ARM::BUNDLE ||

670 TII->getPredicate(*DstMI)))) {

671 unsigned Lat = 1;

672

673 if (II->isInlineShiftALU(DstOpcode) && DstMI->getOperand(3).getImm() &&

674 DstMI->getOperand(1).getReg() == Dep.getReg())

675 Lat = 2;

676 Lat = std::min(3u, Dep.getLatency() + Lat);

677 setBidirLatencies(ISU, Dep, std::max(Dep.getLatency(), Lat));

678 }

679

680

681

682

683

684 if (Dep.isAssignedRegDep() && Dep.getReg() == ARM::CPSR &&

686 setBidirLatencies(ISU, Dep, 1);

687

688

689

690

691 if (II->isRev(SrcOpcode)) {

692 if (II->isInlineShiftALU(DstOpcode))

693 setBidirLatencies(ISU, Dep, 2);

694 else if (II->isShift(DstOpcode))

695 setBidirLatencies(ISU, Dep, 1);

696 }

697 }

698}

699

700class M85InstructionInformation : public InstructionInformation {

701public:

702 M85InstructionInformation(const ARMBaseInstrInfo *t)

703 : InstructionInformation(t) {

704 markDPProducersConsumers(t);

705 }

706};

707

708class M85Overrides : public ARMOverrideBypasses {

709public:

710 M85Overrides(const ARMBaseInstrInfo *t, AAResults *a)

711 : ARMOverrideBypasses(t, a) {

712 if (II)

713 II.reset(new M85InstructionInformation(t));

714 }

715

716 void modifyBypasses(SUnit &) override;

717

718private:

719 unsigned computeBypassStage(const MCSchedClassDesc *SCD);

720 signed modifyMixedWidthFP(const MachineInstr *SrcMI,

721 const MachineInstr *DstMI, unsigned RegID,

722 const MCSchedClassDesc *SCD);

723};

724

725unsigned M85Overrides::computeBypassStage(const MCSchedClassDesc *SCDesc) {

726 auto SM = DAG->getSchedModel();

727 unsigned DefIdx = 0;

728 if (DefIdx < SCDesc->NumWriteLatencyEntries) {

729

730 const MCWriteLatencyEntry *WLEntry =

731 SM->getSubtargetInfo()->getWriteLatencyEntry(SCDesc, DefIdx);

732 unsigned Latency = WLEntry->Cycles >= 0 ? WLEntry->Cycles : 1000;

734 return 2;

736 return 3;

738 return 3;

739 else

741 }

742 return 2;

743}

744

745

746

747

748

749signed M85Overrides::modifyMixedWidthFP(const MachineInstr *SrcMI,

750 const MachineInstr *DstMI,

751 unsigned RegID,

752 const MCSchedClassDesc *SCD) {

753

754 if (II->producesSP(SrcMI->getOpcode()) &&

755 II->producesDP(SrcMI->getOpcode()) &&

756 II->producesQP(SrcMI->getOpcode()))

757 return 0;

758

759 if (Register::isVirtualRegister(RegID)) {

760 if (II->producesSP(SrcMI->getOpcode()) &&

761 II->consumesDP(DstMI->getOpcode())) {

762 for (auto &OP : SrcMI->operands())

763 if (OP.isReg() && OP.isDef() && OP.getReg() == RegID &&

764 OP.getSubReg() == ARM::ssub_1)

765 return 5 - computeBypassStage(SCD);

766 } else if (II->producesSP(SrcMI->getOpcode()) &&

767 II->consumesQP(DstMI->getOpcode())) {

768 for (auto &OP : SrcMI->operands())

769 if (OP.isReg() && OP.isDef() && OP.getReg() == RegID &&

770 (OP.getSubReg() == ARM::ssub_1 || OP.getSubReg() == ARM::ssub_3))

771 return 5 - computeBypassStage(SCD) -

772 ((OP.getSubReg() == ARM::ssub_2 ||

773 OP.getSubReg() == ARM::ssub_3)

774 ? 1

775 : 0);

776 } else if (II->producesDP(SrcMI->getOpcode()) &&

777 II->consumesQP(DstMI->getOpcode())) {

778 for (auto &OP : SrcMI->operands())

779 if (OP.isReg() && OP.isDef() && OP.getReg() == RegID &&

780 OP.getSubReg() == ARM::ssub_1)

781 return -1;

782 } else if (II->producesDP(SrcMI->getOpcode()) &&

783 II->consumesSP(DstMI->getOpcode())) {

784 for (auto &OP : DstMI->operands())

785 if (OP.isReg() && OP.isUse() && OP.getReg() == RegID &&

786 OP.getSubReg() == ARM::ssub_1)

787 return 5 - computeBypassStage(SCD);

788 } else if (II->producesQP(SrcMI->getOpcode()) &&

789 II->consumesSP(DstMI->getOpcode())) {

790 for (auto &OP : DstMI->operands())

791 if (OP.isReg() && OP.isUse() && OP.getReg() == RegID &&

792 (OP.getSubReg() == ARM::ssub_1 || OP.getSubReg() == ARM::ssub_3))

793 return 5 - computeBypassStage(SCD) +

794 ((OP.getSubReg() == ARM::ssub_2 ||

795 OP.getSubReg() == ARM::ssub_3)

796 ? 1

797 : 0);

798 } else if (II->producesQP(SrcMI->getOpcode()) &&

799 II->consumesDP(DstMI->getOpcode())) {

800 for (auto &OP : DstMI->operands())

801 if (OP.isReg() && OP.isUse() && OP.getReg() == RegID &&

802 OP.getSubReg() == ARM::ssub_1)

803 return 1;

804 }

805 } else if (Register::isPhysicalRegister(RegID)) {

806

807

808

809

810

811

812 if (II->producesSP(SrcMI->getOpcode()) &&

813 II->consumesDP(DstMI->getOpcode())) {

814 for (auto &OP : SrcMI->operands())

815 if (OP.isReg() && OP.isDef() && OP.getReg() >= ARM::S1 &&

816 OP.getReg() <= ARM::S31 && (OP.getReg() - ARM::S0) % 2 &&

817 (OP.getReg() == RegID ||

818 (OP.getReg() - ARM::S0) / 2 + ARM::D0 == RegID ||

819 (OP.getReg() - ARM::S0) / 4 + ARM::Q0 == RegID))

820 return 5 - computeBypassStage(SCD);

821 } else if (II->producesSP(SrcMI->getOpcode()) &&

822 II->consumesQP(DstMI->getOpcode())) {

823 for (auto &OP : SrcMI->operands())

824 if (OP.isReg() && OP.isDef() && OP.getReg() >= ARM::S1 &&

825 OP.getReg() <= ARM::S31 && (OP.getReg() - ARM::S0) % 2 &&

826 (OP.getReg() == RegID ||

827 (OP.getReg() - ARM::S0) / 2 + ARM::D0 == RegID ||

828 (OP.getReg() - ARM::S0) / 4 + ARM::Q0 == RegID))

829 return 5 - computeBypassStage(SCD) -

830 (((OP.getReg() - ARM::S0) / 2) % 2 ? 1 : 0);

831 } else if (II->producesDP(SrcMI->getOpcode()) &&

832 II->consumesQP(DstMI->getOpcode())) {

833 for (auto &OP : SrcMI->operands())

834 if (OP.isReg() && OP.isDef() && OP.getReg() >= ARM::D0 &&

835 OP.getReg() <= ARM::D15 && (OP.getReg() - ARM::D0) % 2 &&

836 (OP.getReg() == RegID ||

837 (OP.getReg() - ARM::D0) / 2 + ARM::Q0 == RegID))

838 return -1;

839 } else if (II->producesDP(SrcMI->getOpcode()) &&

840 II->consumesSP(DstMI->getOpcode())) {

841 if (RegID >= ARM::S1 && RegID <= ARM::S31 && (RegID - ARM::S0) % 2)

842 return 5 - computeBypassStage(SCD);

843 } else if (II->producesQP(SrcMI->getOpcode()) &&

844 II->consumesSP(DstMI->getOpcode())) {

845 if (RegID >= ARM::S1 && RegID <= ARM::S31 && (RegID - ARM::S0) % 2)

846 return 5 - computeBypassStage(SCD) +

847 (((RegID - ARM::S0) / 2) % 2 ? 1 : 0);

848 } else if (II->producesQP(SrcMI->getOpcode()) &&

849 II->consumesDP(DstMI->getOpcode())) {

850 if (RegID >= ARM::D1 && RegID <= ARM::D15 && (RegID - ARM::D0) % 2)

851 return 1;

852 }

853 }

854 return 0;

855}

856

857void M85Overrides::modifyBypasses(SUnit &ISU) {

858 const MachineInstr *SrcMI = ISU.getInstr();

859 unsigned SrcOpcode = SrcMI->getOpcode();

860 bool isNSWload = II->isNonSubwordLoad(SrcOpcode);

861

862

863 for (SDep &Dep : ISU.Succs) {

864

865

866

867 if (zeroOutputDependences(ISU, Dep))

868 continue;

869

870 if (memoryRAWHazard(ISU, Dep, 3))

871 continue;

872

873

874 if (Dep.getKind() != SDep::Data)

875 continue;

876

877 SUnit &DepSU = *Dep.getSUnit();

878 if (DepSU.isBoundaryNode())

879 continue;

880

881 if (makeBundleAssumptions(ISU, Dep) == 1)

882 continue;

883

884 const MachineInstr *DstMI = DepSU.getInstr();

885 unsigned DstOpcode = DstMI->getOpcode();

886

887

888

889

890

891 if (isNSWload && II->hasBRegAddrShift(DstOpcode) &&

892 DstMI->getOperand(3).getImm() != 0 &&

893 DstMI->getOperand(2).getReg() == Dep.getReg())

894 setBidirLatencies(ISU, Dep, Dep.getLatency() + 1);

895

897 setBidirLatencies(ISU, Dep, Dep.getLatency() + 1);

898 }

899

900 if (II->isMVEIntMAC(DstOpcode) &&

901 II->isMVEIntMACMatched(SrcOpcode, DstOpcode) &&

902 DstMI->getOperand(0).isReg() &&

903 DstMI->getOperand(0).getReg() == Dep.getReg())

904 setBidirLatencies(ISU, Dep, Dep.getLatency() - 1);

905

906

907

908 if (Dep.isAssignedRegDep() && Dep.getReg() == ARM::CPSR &&

910 setBidirLatencies(ISU, Dep, 0);

911

912 if (signed ALat = modifyMixedWidthFP(SrcMI, DstMI, Dep.getReg(),

913 DAG->getSchedClass(&ISU)))

914 setBidirLatencies(ISU, Dep, std::max(0, signed(Dep.getLatency()) + ALat));

915

916 if (II->isRev(SrcOpcode)) {

917 if (II->isInlineShiftALU(DstOpcode))

918 setBidirLatencies(ISU, Dep, 1);

919 else if (II->isShift(DstOpcode))

920 setBidirLatencies(ISU, Dep, 1);

921 }

922 }

923}

924

925

926

927class CortexM55Overrides : public ARMOverrideBypasses {

928public:

929 CortexM55Overrides(const ARMBaseInstrInfo *TII, AAResults *AA)

930 : ARMOverrideBypasses(TII, AA) {}

931

932 void modifyBypasses(SUnit &SU) override {

933 MachineInstr *SrcMI = SU.getInstr();

934 if (!(SrcMI->getDesc().TSFlags & ARMII::HorizontalReduction))

935 return;

936

937 for (SDep &Dep : SU.Succs) {

938 if (Dep.getKind() != SDep::Data)

939 continue;

940 SUnit &DepSU = *Dep.getSUnit();

941 if (DepSU.isBoundaryNode())

942 continue;

943 MachineInstr *DstMI = DepSU.getInstr();

944

946 setBidirLatencies(SU, Dep, 3);

947 }

948 }

949};

950

951}

952

954 DAG = DAGInstrs;

957 continue;

958 modifyBypasses(ISU);

959 }

961 modifyBypasses(DAGInstrs->ExitSU);

962}

963

964std::unique_ptr

966 if (ST.isCortexM85())

967 return std::make_unique(ST.getInstrInfo(), AA);

968 else if (ST.isCortexM7())

969 return std::make_unique(ST.getInstrInfo(), AA);

970 else if (ST.isCortexM55())

971 return std::make_unique(ST.getInstrInfo(), AA);

972

973 return nullptr;

974}

975

976}

const TargetInstrInfo & TII

Function Alias Analysis false

Analysis containing CSE Info

uint64_t IntrinsicInst * II

bool memoryRAWHazard(SUnit &ISU, SDep &Dep, unsigned latency)

Definition ARMLatencyMutations.cpp:553

static void setBidirLatencies(SUnit &SrcSU, SDep &SrcDep, unsigned latency)

Definition ARMLatencyMutations.cpp:491

static bool zeroOutputDependences(SUnit &ISU, SDep &Dep)

Definition ARMLatencyMutations.cpp:513

void apply(ScheduleDAGInstrs *DAGInstrs) override

Definition ARMLatencyMutations.cpp:953

unsigned makeBundleAssumptions(SUnit &ISU, SDep &Dep)

Definition ARMLatencyMutations.cpp:529

const ARMBaseInstrInfo * TII

@ MustAlias

The two locations precisely alias each other.

Representation of each machine instruction.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

ArrayRef< MachineMemOperand * > memoperands() const

Access to memory operands of the instruction.

Kind getKind() const

Returns an enum value representing the kind of the dependence.

@ Output

A register output-dependence (aka WAW).

void setLatency(unsigned Lat)

Sets the latency for this edge.

bool isAssignedRegDep() const

Tests if this is a Data dependence that is associated with a register.

bool isNormalMemory() const

Tests if this is an Order dependence between two memory accesses where both sides of the dependence a...

Register getReg() const

Returns the register associated with this edge.

Scheduling unit. This is a node in the scheduling DAG.

LLVM_ABI void setHeightDirty()

Sets a flag in this node to indicate that its stored Height value will require recomputation the next...

bool isBoundaryNode() const

Boundary nodes are placeholders for the boundary of the scheduling region.

LLVM_ABI void setDepthDirty()

Sets a flag in this node to indicate that its stored Depth value will require recomputation the next ...

SmallVector< SDep, 4 > Preds

All sunit predecessors.

MachineInstr * getInstr() const

Returns the representative MachineInstr for this SUnit.

A ScheduleDAG for scheduling lists of MachineInstr.

std::vector< SUnit > SUnits

The scheduling units.

SUnit ExitSU

Special node for the region exit.

Abstract Attribute helper functions.

This is an optimization pass for GlobalISel generic memory operations.

std::unique_ptr< ScheduleDAGMutation > createARMLatencyMutations(const ARMSubtarget &ST, AAResults *AA)

Definition ARMLatencyMutations.cpp:965

bool isMVEVectorInstruction(const MachineInstr *MI)

static bool hasImplicitCPSRUse(const MachineInstr *MI)

Definition ARMLatencyMutations.cpp:487

DWARFExpression::Operation Op

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

static bool mismatchedPred(ARMCC::CondCodes a, ARMCC::CondCodes b)

Definition ARMLatencyMutations.cpp:506