LLVM: lib/Support/SuffixTree.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
17
18using namespace llvm;
19
20
22 assert(N && "Got a null node?");
24 if (Internal->isRoot())
25 return 0;
26 return N->getEndIdx() - N->getStartIdx() + 1;
27}
28
32 Root = insertRoot();
33 Active.Node = Root;
34
35
36
37 unsigned SuffixesToAdd = 0;
38
39
40
41
42 for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; PfxEndIdx++) {
43 SuffixesToAdd++;
44 LeafEndIdx = PfxEndIdx;
45 SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);
46 }
47
48
49 assert(Root && "Root node can't be nullptr!");
50 setSuffixIndices();
51
52
53
55 setLeafNodes();
56}
57
59 unsigned StartIdx, unsigned Edge) {
60 assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
61 auto *N = new (LeafNodeAllocator.Allocate())
64 return N;
65}
66
69 unsigned StartIdx, unsigned EndIdx,
70 unsigned Edge) {
71 assert(StartIdx <= EndIdx && "String can't start after it ends!");
73 "Non-root internal nodes must have parents!");
74 auto *N = new (InternalNodeAllocator.Allocate())
76 if (Parent)
78 return N;
79}
80
84}
85
86void SuffixTree::setSuffixIndices() {
87
88
90
91
92 SuffixTreeNode *CurrNode = Root;
93
94
95 unsigned CurrNodeLen = 0;
96 ToVisit.push_back({CurrNode, CurrNodeLen});
97 while (!ToVisit.empty()) {
98 std::tie(CurrNode, CurrNodeLen) = ToVisit.pop_back_val();
99
102 for (auto &ChildPair : InternalNode->Children) {
103 assert(ChildPair.second && "Node had a null child!");
105 {ChildPair.second,
107 }
108
110 LeafNode->setSuffixIdx(Str.size() - CurrNodeLen);
111 }
112}
113
114void SuffixTree::setLeafNodes() {
115
118
119
120
121 unsigned LeafCounter = 0;
122
123
124
125 DenseMap<SuffixTreeInternalNode *,
126 std::pair<SuffixTreeNode *, SuffixTreeNode *>>
127 ChildrenMap;
128
129
130 while (!ToVisit.empty()) {
131 SuffixTreeNode *CurrNode = ToVisit.pop_back_val();
133
134 auto I = ChildrenMap.find(CurrInternalNode);
135 if (I == ChildrenMap.end()) {
136
137
138
139
140 auto J = CurrInternalNode->Children.begin();
141 if (J != CurrInternalNode->Children.end()) {
143 SuffixTreeNode *FirstChild = J->second;
144 SuffixTreeNode *LastChild = nullptr;
145 for (; J != CurrInternalNode->Children.end(); ++J) {
146 LastChild = J->second;
148 }
149 ChildrenMap[CurrInternalNode] = {FirstChild, LastChild};
150 }
151 } else {
152
153
154
155 auto [FirstChild, LastChild] = I->second;
156
157
158
159
161
164 "LeftLeafIdx should not be larger than RightLeafIdx");
165 }
166 } else {
167
168
171 ++LeafCounter;
173 LeafNodes.push_back(CurrLeafNode);
174 }
175 }
176}
177
178unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
179 SuffixTreeInternalNode *NeedsLink = nullptr;
180
181 while (SuffixesToAdd > 0) {
182
183
184 if (Active.Len == 0) {
185
186 Active.Idx = EndIdx;
187 }
188
189 assert(Active.Idx <= EndIdx && "Start index can't be after end index!");
190
191
192 unsigned FirstChar = Str[Active.Idx];
193
194
195 if (auto It = Active.Node->Children.find(FirstChar);
196 It == Active.Node->Children.end()) {
197
198 insertLeaf(*Active.Node, EndIdx, FirstChar);
199
200
201
202 if (NeedsLink) {
203 NeedsLink->setLink(Active.Node);
204 NeedsLink = nullptr;
205 }
206 } else {
207
208
209 SuffixTreeNode *NextNode = It->second;
210
212
213
214
215 if (Active.Len >= SubstringLen) {
216
217
219 "Expected an internal node?");
220 Active.Idx += SubstringLen;
221 Active.Len -= SubstringLen;
223 continue;
224 }
225
226
227
228 unsigned LastChar = Str[EndIdx];
229
230
231 if (Str[NextNode->getStartIdx() + Active.Len] == LastChar) {
232
233
234
235 if (NeedsLink && !Active.Node->isRoot()) {
236 NeedsLink->setLink(Active.Node);
237 NeedsLink = nullptr;
238 }
239
240 Active.Len++;
241 break;
242 }
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258 SuffixTreeInternalNode *SplitNode = insertInternalNode(
260 NextNode->getStartIdx() + Active.Len - 1, FirstChar);
261
262
263
264 insertLeaf(*SplitNode, EndIdx, LastChar);
265
266
267
270
271
272 if (NeedsLink)
273 NeedsLink->setLink(SplitNode);
274
275 NeedsLink = SplitNode;
276 }
277
278
279
280 SuffixesToAdd--;
281
282 if (Active.Node->isRoot()) {
283 if (Active.Len > 0) {
284 Active.Len--;
285 Active.Idx = EndIdx - SuffixesToAdd + 1;
286 }
287 } else {
288
289 Active.Node = Active.Node->getLink();
290 }
291 }
292
293 return SuffixesToAdd;
294}
295
296void SuffixTree::RepeatedSubstringIterator::advance() {
297
298
299 RS = RepeatedSubstring();
300 N = nullptr;
301
302
303 SmallVector RepeatedSubstringStarts;
304
305
306 while (!InternalNodesToVisit.empty()) {
307 RepeatedSubstringStarts.clear();
308 auto *Curr = InternalNodesToVisit.pop_back_val();
309
310
311
312 unsigned Length = Curr->getConcatLen();
313
314
315
316 for (auto &ChildPair : Curr->Children)
317
318 if (auto *InternalChild =
320 InternalNodesToVisit.push_back(InternalChild);
321
322
323 if (Length < MinLength)
324 continue;
325
326
327
328 if (Curr->isRoot())
329 continue;
330
331
332 if (OutlinerLeafDescendants) {
333 for (unsigned I = Curr->getLeftLeafIdx(); I <= Curr->getRightLeafIdx();
334 ++I)
335 RepeatedSubstringStarts.push_back(LeafNodes[I]->getSuffixIdx());
336 } else {
337 for (auto &ChildPair : Curr->Children)
339 RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
340 }
341
342
343 if (RepeatedSubstringStarts.size() < 2)
344 continue;
345
346
347 N = Curr;
350 break;
351 }
352
353
354
355}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the BumpPtrAllocator interface.
static size_t numElementsInSubstring(const SuffixTreeNode *N)
Definition SuffixTree.cpp:21
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
void push_back(const T &Elt)
LLVM_ABI SuffixTree(const ArrayRef< unsigned > &Str, bool OutlinerLeafDescendants=false)
Construct a suffix tree from a sequence of unsigned integers.
Definition SuffixTree.cpp:29
ArrayRef< unsigned > Str
Each element is an integer representing an instruction in the module.
bool OutlinerLeafDescendants
Whether to consider leaf descendants or only leaf children.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
DenseMap< unsigned, SuffixTreeNode * > Children
The children of this node.
void setLink(SuffixTreeInternalNode *L)
Sets Link to L. Assumes L is not null.
A node in a suffix tree which represents a substring or suffix.
LLVM_ABI void incrementStartIdx(unsigned Inc)
Advance this node's StartIdx by Inc.
LLVM_ABI void setConcatLen(unsigned Len)
Set the length of the string from the root to this node to Len.
LLVM_ABI unsigned getLeftLeafIdx() const
LLVM_ABI unsigned getRightLeafIdx() const
LLVM_ABI void setRightLeafIdx(unsigned Idx)
Set the index of the right most leaf node of this node to Idx.
static const unsigned EmptyIdx
Represents an undefined index in the suffix tree.
LLVM_ABI unsigned getStartIdx() const
LLVM_ABI void setLeftLeafIdx(unsigned Idx)
Set the index of the left most leaf node of this node to Idx.