[ARM] Speedups for CombineBaseUpdate. (#129725) · llvm/llvm-project@d6d1dbf (original) (raw)
`@@ -149,6 +149,11 @@ MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
`
149
149
` cl::desc("Maximum interleave factor for MVE VLDn to generate."),
`
150
150
` cl::init(2));
`
151
151
``
``
152
`+
cl::opt ArmMaxBaseUpdatesToCheck(
`
``
153
`+
"arm-max-base-updates-to-check", cl::Hidden,
`
``
154
`+
cl::desc("Maximum number of base-updates to check generating postindex."),
`
``
155
`+
cl::init(64));
`
``
156
+
152
157
`/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
`
153
158
`constexpr MVT FlagsVT = MVT::i32;
`
154
159
``
`@@ -15842,6 +15847,22 @@ struct BaseUpdateUser {
`
15842
15847
` unsigned ConstInc;
`
15843
15848
`};
`
15844
15849
``
``
15850
`+
static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
`
``
15851
`+
// Check that the add is independent of the load/store.
`
``
15852
`+
// Otherwise, folding it would create a cycle. Search through Addr
`
``
15853
`+
// as well, since the User may not be a direct user of Addr and
`
``
15854
`+
// only share a base pointer.
`
``
15855
`+
SmallPtrSet<const SDNode *, 32> Visited;
`
``
15856
`+
SmallVector<const SDNode *, 16> Worklist;
`
``
15857
`+
Worklist.push_back(N);
`
``
15858
`+
Worklist.push_back(User);
`
``
15859
`+
const unsigned MaxSteps = 1024;
`
``
15860
`+
if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||
`
``
15861
`+
SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
`
``
15862
`+
return false;
`
``
15863
`+
return true;
`
``
15864
`+
}
`
``
15865
+
15845
15866
`static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
`
15846
15867
` struct BaseUpdateUser &User,
`
15847
15868
` bool SimpleConstIncOnly,
`
`@@ -16043,6 +16064,9 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
`
16043
16064
` if (SimpleConstIncOnly && User.ConstInc != NumBytes)
`
16044
16065
` return false;
`
16045
16066
``
``
16067
`+
if (!isValidBaseUpdate(N, User.N))
`
``
16068
`+
return false;
`
``
16069
+
16046
16070
` // OK, we found an ADD we can fold into the base update.
`
16047
16071
` // Now, create a _UPD node, taking care of not breaking alignment.
`
16048
16072
``
`@@ -16191,21 +16215,6 @@ static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc) {
`
16191
16215
` }
`
16192
16216
`}
`
16193
16217
``
16194
``
`-
static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
`
16195
``
`-
// Check that the add is independent of the load/store.
`
16196
``
`-
// Otherwise, folding it would create a cycle. Search through Addr
`
16197
``
`-
// as well, since the User may not be a direct user of Addr and
`
16198
``
`-
// only share a base pointer.
`
16199
``
`-
SmallPtrSet<const SDNode *, 32> Visited;
`
16200
``
`-
SmallVector<const SDNode *, 16> Worklist;
`
16201
``
`-
Worklist.push_back(N);
`
16202
``
`-
Worklist.push_back(User);
`
16203
``
`-
if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
`
16204
``
`-
SDNode::hasPredecessorHelper(User, Visited, Worklist))
`
16205
``
`-
return false;
`
16206
``
`-
return true;
`
16207
``
`-
}
`
16208
``
-
16209
16218
`/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
`
16210
16219
`/// NEON load/store intrinsics, and generic vector load/stores, to merge
`
16211
16220
`/// base address updates.
`
`@@ -16219,6 +16228,10 @@ static SDValue CombineBaseUpdate(SDNode *N,
`
16219
16228
` const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
`
16220
16229
` BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
`
16221
16230
``
``
16231
`+
// Limit the number of possible base-updates we look at to prevent degenerate
`
``
16232
`+
// cases.
`
``
16233
`+
unsigned MaxBaseUpdates = ArmMaxBaseUpdatesToCheck;
`
``
16234
+
16222
16235
` SDValue Addr = N->getOperand(AddrOpIdx);
`
16223
16236
``
16224
16237
` SmallVector<BaseUpdateUser, 8> BaseUpdates;
`
`@@ -16233,8 +16246,11 @@ static SDValue CombineBaseUpdate(SDNode *N,
`
16233
16246
` unsigned ConstInc =
`
16234
16247
` getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
`
16235
16248
``
16236
``
`-
if (ConstInc || User->getOpcode() == ISD::ADD)
`
``
16249
`+
if (ConstInc || User->getOpcode() == ISD::ADD) {
`
16237
16250
` BaseUpdates.push_back({User, Inc, ConstInc});
`
``
16251
`+
if (BaseUpdates.size() >= MaxBaseUpdates)
`
``
16252
`+
break;
`
``
16253
`+
}
`
16238
16254
` }
`
16239
16255
``
16240
16256
` // If the address is a constant pointer increment itself, find
`
`@@ -16261,27 +16277,19 @@ static SDValue CombineBaseUpdate(SDNode *N,
`
16261
16277
` unsigned NewConstInc = UserOffset - Offset;
`
16262
16278
` SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
`
16263
16279
` BaseUpdates.push_back({User, NewInc, NewConstInc});
`
``
16280
`+
if (BaseUpdates.size() >= MaxBaseUpdates)
`
``
16281
`+
break;
`
16264
16282
` }
`
16265
16283
` }
`
16266
16284
``
16267
16285
` // Try to fold the load/store with an update that matches memory
`
16268
16286
` // access size. This should work well for sequential loads.
`
16269
``
`-
//
`
16270
``
`-
// Filter out invalid updates as well.
`
16271
16287
` unsigned NumValidUpd = BaseUpdates.size();
`
16272
``
`-
for (unsigned I = 0; I < NumValidUpd;) {
`
``
16288
`+
for (unsigned I = 0; I < NumValidUpd; I++) {
`
16273
16289
` BaseUpdateUser &User = BaseUpdates[I];
`
16274
``
`-
if (!isValidBaseUpdate(N, User.N)) {
`
16275
``
`-
--NumValidUpd;
`
16276
``
`-
std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
`
16277
``
`-
continue;
`
16278
``
`-
}
`
16279
``
-
16280
16290
` if (TryCombineBaseUpdate(Target, User, /SimpleConstIncOnly=/true, DCI))
`
16281
16291
` return SDValue();
`
16282
``
`-
++I;
`
16283
16292
` }
`
16284
``
`-
BaseUpdates.resize(NumValidUpd);
`
16285
16293
``
16286
16294
` // Try to fold with other users. Non-constant updates are considered
`
16287
16295
` // first, and constant updates are sorted to not break a sequence of
`
`@@ -16337,8 +16345,9 @@ static SDValue PerformMVEVLDCombine(SDNode *N,
`
16337
16345
` Visited.insert(Addr.getNode());
`
16338
16346
` Worklist.push_back(N);
`
16339
16347
` Worklist.push_back(User);
`
16340
``
`-
if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
`
16341
``
`-
SDNode::hasPredecessorHelper(User, Visited, Worklist))
`
``
16348
`+
const unsigned MaxSteps = 1024;
`
``
16349
`+
if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||
`
``
16350
`+
SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
`
16342
16351
` continue;
`
16343
16352
``
16344
16353
` // Find the new opcode for the updating load/store.
`