LLVM: lib/Support/SuffixTree.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
17
18using namespace llvm;
19
20
22 assert(N && "Got a null node?");
23 if (auto *Internal = dyn_cast(N))
24 if (Internal->isRoot())
25 return 0;
26 return N->getEndIdx() - N->getStartIdx() + 1;
27}
28
32 Root = insertRoot();
33 Active.Node = Root;
34
35
36
37 unsigned SuffixesToAdd = 0;
38
39
40
41
42 for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; PfxEndIdx++) {
43 SuffixesToAdd++;
44 LeafEndIdx = PfxEndIdx;
45 SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);
46 }
47
48
49 assert(Root && "Root node can't be nullptr!");
50 setSuffixIndices();
51
52
53
55 setLeafNodes();
56}
57
59 unsigned StartIdx, unsigned Edge) {
60 assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
61 auto *N = new (LeafNodeAllocator.Allocate())
64 return N;
65}
66
69 unsigned StartIdx, unsigned EndIdx,
70 unsigned Edge) {
71 assert(StartIdx <= EndIdx && "String can't start after it ends!");
73 "Non-root internal nodes must have parents!");
74 auto *N = new (InternalNodeAllocator.Allocate())
76 if (Parent)
78 return N;
79}
80
84}
85
86void SuffixTree::setSuffixIndices() {
87
88
90
91
93
94
95 unsigned CurrNodeLen = 0;
96 ToVisit.push_back({CurrNode, CurrNodeLen});
97 while (!ToVisit.empty()) {
98 std::tie(CurrNode, CurrNodeLen) = ToVisit.back();
100
102 if (auto *InternalNode = dyn_cast(CurrNode))
103 for (auto &ChildPair : InternalNode->Children) {
104 assert(ChildPair.second && "Node had a null child!");
106 {ChildPair.second,
108 }
109
110 if (auto *LeafNode = dyn_cast(CurrNode))
111 LeafNode->setSuffixIdx(Str.size() - CurrNodeLen);
112 }
113}
114
115void SuffixTree::setLeafNodes() {
116
119
120
121
122 unsigned LeafCounter = 0;
123
124
125
127 std::pair<SuffixTreeNode *, SuffixTreeNode *>>
128 ChildrenMap;
129
130
131 while (!ToVisit.empty()) {
133 if (auto *CurrInternalNode = dyn_cast(CurrNode)) {
134
135 auto I = ChildrenMap.find(CurrInternalNode);
136 if (I == ChildrenMap.end()) {
137
138
139
140
141 auto J = CurrInternalNode->Children.begin();
142 if (J != CurrInternalNode->Children.end()) {
146 for (; J != CurrInternalNode->Children.end(); ++J) {
147 LastChild = J->second;
149 }
150 ChildrenMap[CurrInternalNode] = {FirstChild, LastChild};
151 }
152 } else {
153
154
155
156 auto [FirstChild, LastChild] = I->second;
157
158
159
160
162
165 "LeftLeafIdx should not be larger than RightLeafIdx");
166 }
167 } else {
168
169
172 ++LeafCounter;
173 auto *CurrLeafNode = cast(CurrNode);
174 LeafNodes.push_back(CurrLeafNode);
175 }
176 }
177}
178
179unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
181
182 while (SuffixesToAdd > 0) {
183
184
185 if (Active.Len == 0) {
186
187 Active.Idx = EndIdx;
188 }
189
190 assert(Active.Idx <= EndIdx && "Start index can't be after end index!");
191
192
193 unsigned FirstChar = Str[Active.Idx];
194
195
196 if (Active.Node->Children.count(FirstChar) == 0) {
197
198 insertLeaf(*Active.Node, EndIdx, FirstChar);
199
200
201
202 if (NeedsLink) {
203 NeedsLink->setLink(Active.Node);
204 NeedsLink = nullptr;
205 }
206 } else {
207
208
209 SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];
210
212
213
214
215 if (Active.Len >= SubstringLen) {
216
217
218 assert(isa(NextNode) &&
219 "Expected an internal node?");
220 Active.Idx += SubstringLen;
221 Active.Len -= SubstringLen;
222 Active.Node = cast(NextNode);
223 continue;
224 }
225
226
227
228 unsigned LastChar = Str[EndIdx];
229
230
231 if (Str[NextNode->getStartIdx() + Active.Len] == LastChar) {
232
233
234
235 if (NeedsLink && !Active.Node->isRoot()) {
236 NeedsLink->setLink(Active.Node);
237 NeedsLink = nullptr;
238 }
239
240 Active.Len++;
241 break;
242 }
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
260 NextNode->getStartIdx() + Active.Len - 1, FirstChar);
261
262
263
264 insertLeaf(*SplitNode, EndIdx, LastChar);
265
266
267
270
271
272 if (NeedsLink)
273 NeedsLink->setLink(SplitNode);
274
275 NeedsLink = SplitNode;
276 }
277
278
279
280 SuffixesToAdd--;
281
282 if (Active.Node->isRoot()) {
283 if (Active.Len > 0) {
284 Active.Len--;
285 Active.Idx = EndIdx - SuffixesToAdd + 1;
286 }
287 } else {
288
289 Active.Node = Active.Node->getLink();
290 }
291 }
292
293 return SuffixesToAdd;
294}
295
296void SuffixTree::RepeatedSubstringIterator::advance() {
297
298
299 RS = RepeatedSubstring();
300 N = nullptr;
301
302
304
305
306 while (!InternalNodesToVisit.empty()) {
307 RepeatedSubstringStarts.clear();
308 auto *Curr = InternalNodesToVisit.back();
309 InternalNodesToVisit.pop_back();
310
311
312
313 unsigned Length = Curr->getConcatLen();
314
315
316
317 for (auto &ChildPair : Curr->Children)
318
319 if (auto *InternalChild =
320 dyn_cast(ChildPair.second))
321 InternalNodesToVisit.push_back(InternalChild);
322
323
324 if (Length < MinLength)
325 continue;
326
327
328
329 if (Curr->isRoot())
330 continue;
331
332
333 if (OutlinerLeafDescendants) {
334 for (unsigned I = Curr->getLeftLeafIdx(); I <= Curr->getRightLeafIdx();
335 ++I)
336 RepeatedSubstringStarts.push_back(LeafNodes[I]->getSuffixIdx());
337 } else {
338 for (auto &ChildPair : Curr->Children)
339 if (auto *Leaf = dyn_cast(ChildPair.second))
340 RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
341 }
342
343
344 if (RepeatedSubstringStarts.size() < 2)
345 continue;
346
347
348 N = Curr;
350 for (unsigned StartIdx : RepeatedSubstringStarts)
352 break;
353 }
354
355
356
357}
This file defines the BumpPtrAllocator interface.
static cl::opt< bool > OutlinerLeafDescendants("outliner-leaf-descendants", cl::init(true), cl::Hidden, cl::desc("Consider all leaf descendants of internal nodes of the suffix " "tree as candidates for outlining (if false, only leaf children " "are considered)"))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static size_t numElementsInSubstring(const SuffixTreeNode *N)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
SuffixTree(const ArrayRef< unsigned > &Str, bool OutlinerLeafDescendants=false)
Construct a suffix tree from a sequence of unsigned integers.
ArrayRef< unsigned > Str
Each element is an integer representing an instruction in the module.
bool OutlinerLeafDescendants
Whether to consider leaf descendants or only leaf children.
This is an optimization pass for GlobalISel generic memory operations.
DenseMap< unsigned, SuffixTreeNode * > Children
The children of this node.
void setLink(SuffixTreeInternalNode *L)
Sets Link to L. Assumes L is not null.
A node in a suffix tree which represents a substring or suffix.
void incrementStartIdx(unsigned Inc)
Advance this node's StartIdx by Inc.
void setConcatLen(unsigned Len)
Set the length of the string from the root to this node to Len.
unsigned getLeftLeafIdx() const
unsigned getRightLeafIdx() const
void setRightLeafIdx(unsigned Idx)
Set the index of the right most leaf node of this node to Idx.
static const unsigned EmptyIdx
Represents an undefined index in the suffix tree.
unsigned getStartIdx() const
void setLeftLeafIdx(unsigned Idx)
Set the index of the left most leaf node of this node to Idx.
SmallVector< unsigned > StartIndices
The start indices of each occurrence.
unsigned Length
The length of the string.