[ARM] Speedups for CombineBaseUpdate. (#129725) · llvm/llvm-project@d6d1dbf (original) (raw)

`@@ -149,6 +149,11 @@ MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,

`

149

149

` cl::desc("Maximum interleave factor for MVE VLDn to generate."),

`

150

150

` cl::init(2));

`

151

151

``

``

152

`+

cl::opt ArmMaxBaseUpdatesToCheck(

`

``

153

`+

"arm-max-base-updates-to-check", cl::Hidden,

`

``

154

`+

cl::desc("Maximum number of base-updates to check generating postindex."),

`

``

155

`+

cl::init(64));

`

``

156

+

152

157

`/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).

`

153

158

`constexpr MVT FlagsVT = MVT::i32;

`

154

159

``

`@@ -15842,6 +15847,22 @@ struct BaseUpdateUser {

`

15842

15847

` unsigned ConstInc;

`

15843

15848

`};

`

15844

15849

``

``

15850

`+

static bool isValidBaseUpdate(SDNode *N, SDNode *User) {

`

``

15851

`+

// Check that the add is independent of the load/store.

`

``

15852

`+

// Otherwise, folding it would create a cycle. Search through Addr

`

``

15853

`+

// as well, since the User may not be a direct user of Addr and

`

``

15854

`+

// only share a base pointer.

`

``

15855

`+

SmallPtrSet<const SDNode *, 32> Visited;

`

``

15856

`+

SmallVector<const SDNode *, 16> Worklist;

`

``

15857

`+

Worklist.push_back(N);

`

``

15858

`+

Worklist.push_back(User);

`

``

15859

`+

const unsigned MaxSteps = 1024;

`

``

15860

`+

if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||

`

``

15861

`+

SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))

`

``

15862

`+

return false;

`

``

15863

`+

return true;

`

``

15864

`+

}

`

``

15865

+

15845

15866

`static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,

`

15846

15867

` struct BaseUpdateUser &User,

`

15847

15868

` bool SimpleConstIncOnly,

`

`@@ -16043,6 +16064,9 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,

`

16043

16064

` if (SimpleConstIncOnly && User.ConstInc != NumBytes)

`

16044

16065

` return false;

`

16045

16066

``

``

16067

`+

if (!isValidBaseUpdate(N, User.N))

`

``

16068

`+

return false;

`

``

16069

+

16046

16070

` // OK, we found an ADD we can fold into the base update.

`

16047

16071

` // Now, create a _UPD node, taking care of not breaking alignment.

`

16048

16072

``

`@@ -16191,21 +16215,6 @@ static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc) {

`

16191

16215

` }

`

16192

16216

`}

`

16193

16217

``

16194

``

`-

static bool isValidBaseUpdate(SDNode *N, SDNode *User) {

`

16195

``

`-

// Check that the add is independent of the load/store.

`

16196

``

`-

// Otherwise, folding it would create a cycle. Search through Addr

`

16197

``

`-

// as well, since the User may not be a direct user of Addr and

`

16198

``

`-

// only share a base pointer.

`

16199

``

`-

SmallPtrSet<const SDNode *, 32> Visited;

`

16200

``

`-

SmallVector<const SDNode *, 16> Worklist;

`

16201

``

`-

Worklist.push_back(N);

`

16202

``

`-

Worklist.push_back(User);

`

16203

``

`-

if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||

`

16204

``

`-

SDNode::hasPredecessorHelper(User, Visited, Worklist))

`

16205

``

`-

return false;

`

16206

``

`-

return true;

`

16207

``

`-

}

`

16208

``

-

16209

16218

`/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,

`

16210

16219

`/// NEON load/store intrinsics, and generic vector load/stores, to merge

`

16211

16220

`/// base address updates.

`

`@@ -16219,6 +16228,10 @@ static SDValue CombineBaseUpdate(SDNode *N,

`

16219

16228

` const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);

`

16220

16229

` BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};

`

16221

16230

``

``

16231

`+

// Limit the number of possible base-updates we look at to prevent degenerate

`

``

16232

`+

// cases.

`

``

16233

`+

unsigned MaxBaseUpdates = ArmMaxBaseUpdatesToCheck;

`

``

16234

+

16222

16235

` SDValue Addr = N->getOperand(AddrOpIdx);

`

16223

16236

``

16224

16237

` SmallVector<BaseUpdateUser, 8> BaseUpdates;

`

`@@ -16233,8 +16246,11 @@ static SDValue CombineBaseUpdate(SDNode *N,

`

16233

16246

` unsigned ConstInc =

`

16234

16247

` getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);

`

16235

16248

``

16236

``

`-

if (ConstInc || User->getOpcode() == ISD::ADD)

`

``

16249

`+

if (ConstInc || User->getOpcode() == ISD::ADD) {

`

16237

16250

` BaseUpdates.push_back({User, Inc, ConstInc});

`

``

16251

`+

if (BaseUpdates.size() >= MaxBaseUpdates)

`

``

16252

`+

break;

`

``

16253

`+

}

`

16238

16254

` }

`

16239

16255

``

16240

16256

` // If the address is a constant pointer increment itself, find

`

`@@ -16261,27 +16277,19 @@ static SDValue CombineBaseUpdate(SDNode *N,

`

16261

16277

` unsigned NewConstInc = UserOffset - Offset;

`

16262

16278

` SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);

`

16263

16279

` BaseUpdates.push_back({User, NewInc, NewConstInc});

`

``

16280

`+

if (BaseUpdates.size() >= MaxBaseUpdates)

`

``

16281

`+

break;

`

16264

16282

` }

`

16265

16283

` }

`

16266

16284

``

16267

16285

` // Try to fold the load/store with an update that matches memory

`

16268

16286

` // access size. This should work well for sequential loads.

`

16269

``

`-

//

`

16270

``

`-

// Filter out invalid updates as well.

`

16271

16287

` unsigned NumValidUpd = BaseUpdates.size();

`

16272

``

`-

for (unsigned I = 0; I < NumValidUpd;) {

`

``

16288

`+

for (unsigned I = 0; I < NumValidUpd; I++) {

`

16273

16289

` BaseUpdateUser &User = BaseUpdates[I];

`

16274

``

`-

if (!isValidBaseUpdate(N, User.N)) {

`

16275

``

`-

--NumValidUpd;

`

16276

``

`-

std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);

`

16277

``

`-

continue;

`

16278

``

`-

}

`

16279

``

-

16280

16290

` if (TryCombineBaseUpdate(Target, User, /SimpleConstIncOnly=/true, DCI))

`

16281

16291

` return SDValue();

`

16282

``

`-

++I;

`

16283

16292

` }

`

16284

``

`-

BaseUpdates.resize(NumValidUpd);

`

16285

16293

``

16286

16294

` // Try to fold with other users. Non-constant updates are considered

`

16287

16295

` // first, and constant updates are sorted to not break a sequence of

`

`@@ -16337,8 +16345,9 @@ static SDValue PerformMVEVLDCombine(SDNode *N,

`

16337

16345

` Visited.insert(Addr.getNode());

`

16338

16346

` Worklist.push_back(N);

`

16339

16347

` Worklist.push_back(User);

`

16340

``

`-

if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||

`

16341

``

`-

SDNode::hasPredecessorHelper(User, Visited, Worklist))

`

``

16348

`+

const unsigned MaxSteps = 1024;

`

``

16349

`+

if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||

`

``

16350

`+

SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))

`

16342

16351

` continue;

`

16343

16352

``

16344

16353

` // Find the new opcode for the updating load/store.

`