LLVM: lib/Target/X86/X86VZeroUpper.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

36#include

37

38using namespace llvm;

39

40#define DEBUG_TYPE "x86-vzeroupper"

41

44 cl::desc("Minimize AVX to SSE transition penalty"),

46

47STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");

48

49namespace {

50

52 public:

54

55 bool runOnMachineFunction(MachineFunction &MF) override;

56

57 MachineFunctionProperties getRequiredProperties() const override {

58 return MachineFunctionProperties().setNoVRegs();

59 }

60

61 StringRef getPassName() const override { return "X86 vzeroupper inserter"; }

62

63 private:

64 void processBasicBlock(MachineBasicBlock &MBB);

66 MachineBasicBlock &MBB);

67 void addDirtySuccessor(MachineBasicBlock &MBB);

68

69 enum BlockExitState { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY };

70

71 static const char* getBlockExitStateName(BlockExitState ST);

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91 struct BlockState {

92 BlockExitState ExitState = PASS_THROUGH;

93 bool AddedToDirtySuccessors = false;

95

96 BlockState() = default;

97 };

98

99 using BlockStateMap = SmallVector<BlockState, 8>;

100 using DirtySuccessorsWorkList = SmallVector<MachineBasicBlock *, 8>;

101

102 BlockStateMap BlockStates;

103 DirtySuccessorsWorkList DirtySuccessors;

104 bool EverMadeChange;

105 bool IsX86INTR;

106 const TargetInstrInfo *TII;

107

108 static char ID;

109 };

110

111}

112

113char VZeroUpperInserter::ID = 0;

114

116 return new VZeroUpperInserter();

117}

118

119#ifndef NDEBUG

120const char* VZeroUpperInserter::getBlockExitStateName(BlockExitState ST) {

121 switch (ST) {

122 case PASS_THROUGH: return "Pass-through";

123 case EXITS_DIRTY: return "Exits-dirty";

124 case EXITS_CLEAN: return "Exits-clean";

125 }

127}

128#endif

129

130

131

133 return (Reg >= X86::YMM0 && Reg <= X86::YMM15) ||

134 (Reg >= X86::ZMM0 && Reg <= X86::ZMM15);

135}

136

138 for (std::pair<MCRegister, Register> LI : MRI.liveins())

140 return true;

141

142 return false;

143}

144

146 for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) {

148 return false;

149 }

150 for (unsigned reg = X86::ZMM0; reg <= X86::ZMM15; ++reg) {

152 return false;

153 }

154 return true;

155}

156

160 return true;

161 if (!MO.isReg())

162 continue;

163 if (MO.isDebug())

164 continue;

166 return true;

167 }

168 return false;

169}

170

171

173 assert(MI.isCall() && "Can only be called on call instructions.");

175 if (MO.isRegMask())

176 return true;

177 }

178 return false;

179}

180

181

183 MachineBasicBlock &MBB) {

184 BuildMI(MBB, I, I->getDebugLoc(), TII->get(X86::VZEROUPPER));

185 ++NumVZU;

186 EverMadeChange = true;

187}

188

189

190void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) {

191 if (!BlockStates[MBB.getNumber()].AddedToDirtySuccessors) {

193 BlockStates[MBB.getNumber()].AddedToDirtySuccessors = true;

194 }

195}

196

197

198

199void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {

200

201

202 BlockExitState CurState = PASS_THROUGH;

204

205 for (MachineInstr &MI : MBB) {

206 bool IsCall = MI.isCall();

207 bool IsReturn = MI.isReturn();

208 bool IsControlFlow = IsCall || IsReturn;

209

210

211

212 if (IsX86INTR && IsReturn)

213 continue;

214

215

216 if (MI.getOpcode() == X86::VZEROALL || MI.getOpcode() == X86::VZEROUPPER) {

217 CurState = EXITS_CLEAN;

218 continue;

219 }

220

221

222 if (!IsControlFlow && CurState == EXITS_DIRTY)

223 continue;

224

226

227

228 CurState = EXITS_DIRTY;

229 continue;

230 }

231

232

233

234 if (!IsControlFlow)

235 continue;

236

237

238

239

240

241

243 continue;

244

245

246

247

248

249

250

251

252 if (CurState == EXITS_DIRTY) {

253

254

255

256 insertVZeroUpper(MI, MBB);

257 CurState = EXITS_CLEAN;

258 } else if (CurState == PASS_THROUGH) {

259

260

261

262

263

264 BlockStates[MBB.getNumber()].FirstUnguardedCall = MI;

265 CurState = EXITS_CLEAN;

266 }

267 }

268

270 << getBlockExitStateName(CurState) << '\n');

271

272 if (CurState == EXITS_DIRTY)

274 addDirtySuccessor(*Succ);

275

276 BlockStates[MBB.getNumber()].ExitState = CurState;

277}

278

279

280

281bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {

283 return false;

284

285 const X86Subtarget &ST = MF.getSubtarget();

286 if (ST.hasAVX() || ST.insertVZEROUPPER())

287 return false;

288 TII = ST.getInstrInfo();

290 EverMadeChange = false;

292

294

295

296

297

298 bool YmmOrZmmUsed = FnHasLiveInYmmOrZmm;

299 for (const auto *RC : {&X86::VR256RegClass, &X86::VR512_0_15RegClass}) {

300 if (!YmmOrZmmUsed) {

302 if (MRI.reg_nodbg_empty(R)) {

303 YmmOrZmmUsed = true;

304 break;

305 }

306 }

307 }

308 }

309 if (!YmmOrZmmUsed)

310 return false;

311

313 "X86VZeroUpper state should be clear");

315

316

317

318

319 for (MachineBasicBlock &MBB : MF)

320 processBasicBlock(MBB);

321

322

323

324 if (FnHasLiveInYmmOrZmm)

325 addDirtySuccessor(MF.front());

326

327

328

329

330 while (!DirtySuccessors.empty()) {

331 MachineBasicBlock &MBB = *DirtySuccessors.back();

333 BlockState &BBState = BlockStates[MBB.getNumber()];

334

335

336

337 if (BBState.FirstUnguardedCall != MBB.end())

338 insertVZeroUpper(BBState.FirstUnguardedCall, MBB);

339

340

341

342

343 if (BBState.ExitState == PASS_THROUGH) {

345 << " was Pass-through, is now Dirty-out.\n");

347 addDirtySuccessor(*Succ);

348 }

349 }

350

351 BlockStates.clear();

352 return EverMadeChange;

353}

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const HexagonInstrInfo * TII

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

static bool callHasRegMask(MachineInstr &MI)

Check if given call instruction has a RegMask operand.

Definition X86VZeroUpper.cpp:172

static bool checkFnHasLiveInYmmOrZmm(MachineRegisterInfo &MRI)

Definition X86VZeroUpper.cpp:137

static bool hasYmmOrZmmReg(MachineInstr &MI)

Definition X86VZeroUpper.cpp:157

static bool isYmmOrZmmReg(MCRegister Reg)

VZEROUPPER cleans state that is related to Y/ZMM0-15 only.

Definition X86VZeroUpper.cpp:132

static bool clobbersAllYmmAndZmmRegs(const MachineOperand &MO)

Definition X86VZeroUpper.cpp:145

static cl::opt< bool > UseVZeroUpper("x86-use-vzeroupper", cl::Hidden, cl::desc("Minimize AVX to SSE transition penalty"), cl::init(true))

FunctionPass class - This class is used to implement most global optimizations.

CallingConv::ID getCallingConv() const

getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...

Wrapper class representing physical registers. Should be passed by value.

int getNumber() const

MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...

iterator_range< succ_iterator > successors()

MachineInstrBundleIterator< MachineInstr > iterator

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

unsigned getNumBlockIDs() const

getNumBlockIDs - Return the number of MBB ID's allocated.

Representation of each machine instruction.

MachineOperand class - Representation of each machine instruction operand.

static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)

clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

void push_back(const T &Elt)

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

FunctionPass * createX86IssueVZeroUpperPass()

This pass inserts AVX vzeroupper instructions before each call to avoid transition penalty between fu...

Definition X86VZeroUpper.cpp:115

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

uint16_t MCPhysReg

An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...