LLVM: lib/Support/SuffixTree.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

17

18using namespace llvm;

19

20

22 assert(N && "Got a null node?");

24 if (Internal->isRoot())

25 return 0;

26 return N->getEndIdx() - N->getStartIdx() + 1;

27}

28

32 Root = insertRoot();

33 Active.Node = Root;

34

35

36

37 unsigned SuffixesToAdd = 0;

38

39

40

41

42 for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; PfxEndIdx++) {

43 SuffixesToAdd++;

44 LeafEndIdx = PfxEndIdx;

45 SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);

46 }

47

48

49 assert(Root && "Root node can't be nullptr!");

50 setSuffixIndices();

51

52

53

55 setLeafNodes();

56}

57

59 unsigned StartIdx, unsigned Edge) {

60 assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");

61 auto *N = new (LeafNodeAllocator.Allocate())

64 return N;

65}

66

69 unsigned StartIdx, unsigned EndIdx,

70 unsigned Edge) {

71 assert(StartIdx <= EndIdx && "String can't start after it ends!");

73 "Non-root internal nodes must have parents!");

74 auto *N = new (InternalNodeAllocator.Allocate())

76 if (Parent)

78 return N;

79}

80

84}

85

86void SuffixTree::setSuffixIndices() {

87

88

90

91

92 SuffixTreeNode *CurrNode = Root;

93

94

95 unsigned CurrNodeLen = 0;

96 ToVisit.push_back({CurrNode, CurrNodeLen});

97 while (!ToVisit.empty()) {

98 std::tie(CurrNode, CurrNodeLen) = ToVisit.pop_back_val();

99

102 for (auto &ChildPair : InternalNode->Children) {

103 assert(ChildPair.second && "Node had a null child!");

105 {ChildPair.second,

107 }

108

110 LeafNode->setSuffixIdx(Str.size() - CurrNodeLen);

111 }

112}

113

114void SuffixTree::setLeafNodes() {

115

118

119

120

121 unsigned LeafCounter = 0;

122

123

124

125 DenseMap<SuffixTreeInternalNode *,

126 std::pair<SuffixTreeNode *, SuffixTreeNode *>>

127 ChildrenMap;

128

129

130 while (!ToVisit.empty()) {

131 SuffixTreeNode *CurrNode = ToVisit.pop_back_val();

133

134 auto I = ChildrenMap.find(CurrInternalNode);

135 if (I == ChildrenMap.end()) {

136

137

138

139

140 auto J = CurrInternalNode->Children.begin();

141 if (J != CurrInternalNode->Children.end()) {

143 SuffixTreeNode *FirstChild = J->second;

144 SuffixTreeNode *LastChild = nullptr;

145 for (; J != CurrInternalNode->Children.end(); ++J) {

146 LastChild = J->second;

148 }

149 ChildrenMap[CurrInternalNode] = {FirstChild, LastChild};

150 }

151 } else {

152

153

154

155 auto [FirstChild, LastChild] = I->second;

156

157

158

159

161

164 "LeftLeafIdx should not be larger than RightLeafIdx");

165 }

166 } else {

167

168

171 ++LeafCounter;

173 LeafNodes.push_back(CurrLeafNode);

174 }

175 }

176}

177

178unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {

179 SuffixTreeInternalNode *NeedsLink = nullptr;

180

181 while (SuffixesToAdd > 0) {

182

183

184 if (Active.Len == 0) {

185

186 Active.Idx = EndIdx;

187 }

188

189 assert(Active.Idx <= EndIdx && "Start index can't be after end index!");

190

191

192 unsigned FirstChar = Str[Active.Idx];

193

194

195 if (auto It = Active.Node->Children.find(FirstChar);

196 It == Active.Node->Children.end()) {

197

198 insertLeaf(*Active.Node, EndIdx, FirstChar);

199

200

201

202 if (NeedsLink) {

203 NeedsLink->setLink(Active.Node);

204 NeedsLink = nullptr;

205 }

206 } else {

207

208

209 SuffixTreeNode *NextNode = It->second;

210

212

213

214

215 if (Active.Len >= SubstringLen) {

216

217

219 "Expected an internal node?");

220 Active.Idx += SubstringLen;

221 Active.Len -= SubstringLen;

223 continue;

224 }

225

226

227

228 unsigned LastChar = Str[EndIdx];

229

230

231 if (Str[NextNode->getStartIdx() + Active.Len] == LastChar) {

232

233

234

235 if (NeedsLink && !Active.Node->isRoot()) {

236 NeedsLink->setLink(Active.Node);

237 NeedsLink = nullptr;

238 }

239

240 Active.Len++;

241 break;

242 }

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258 SuffixTreeInternalNode *SplitNode = insertInternalNode(

260 NextNode->getStartIdx() + Active.Len - 1, FirstChar);

261

262

263

264 insertLeaf(*SplitNode, EndIdx, LastChar);

265

266

267

270

271

272 if (NeedsLink)

273 NeedsLink->setLink(SplitNode);

274

275 NeedsLink = SplitNode;

276 }

277

278

279

280 SuffixesToAdd--;

281

282 if (Active.Node->isRoot()) {

283 if (Active.Len > 0) {

284 Active.Len--;

285 Active.Idx = EndIdx - SuffixesToAdd + 1;

286 }

287 } else {

288

289 Active.Node = Active.Node->getLink();

290 }

291 }

292

293 return SuffixesToAdd;

294}

295

296void SuffixTree::RepeatedSubstringIterator::advance() {

297

298

299 RS = RepeatedSubstring();

300 N = nullptr;

301

302

303 SmallVector RepeatedSubstringStarts;

304

305

306 while (!InternalNodesToVisit.empty()) {

307 RepeatedSubstringStarts.clear();

308 auto *Curr = InternalNodesToVisit.pop_back_val();

309

310

311

312 unsigned Length = Curr->getConcatLen();

313

314

315

316 for (auto &ChildPair : Curr->Children)

317

318 if (auto *InternalChild =

320 InternalNodesToVisit.push_back(InternalChild);

321

322

323 if (Length < MinLength)

324 continue;

325

326

327

328 if (Curr->isRoot())

329 continue;

330

331

332 if (OutlinerLeafDescendants) {

333 for (unsigned I = Curr->getLeftLeafIdx(); I <= Curr->getRightLeafIdx();

334 ++I)

335 RepeatedSubstringStarts.push_back(LeafNodes[I]->getSuffixIdx());

336 } else {

337 for (auto &ChildPair : Curr->Children)

339 RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());

340 }

341

342

343 if (RepeatedSubstringStarts.size() < 2)

344 continue;

345

346

347 N = Curr;

350 break;

351 }

352

353

354

355}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

This file defines the BumpPtrAllocator interface.

static size_t numElementsInSubstring(const SuffixTreeNode *N)

Definition SuffixTree.cpp:21

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

void push_back(const T &Elt)

LLVM_ABI SuffixTree(const ArrayRef< unsigned > &Str, bool OutlinerLeafDescendants=false)

Construct a suffix tree from a sequence of unsigned integers.

Definition SuffixTree.cpp:29

ArrayRef< unsigned > Str

Each element is an integer representing an instruction in the module.

bool OutlinerLeafDescendants

Whether to consider leaf descendants or only leaf children.

This is an optimization pass for GlobalISel generic memory operations.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

DenseMap< unsigned, SuffixTreeNode * > Children

The children of this node.

void setLink(SuffixTreeInternalNode *L)

Sets Link to L. Assumes L is not null.

A node in a suffix tree which represents a substring or suffix.

LLVM_ABI void incrementStartIdx(unsigned Inc)

Advance this node's StartIdx by Inc.

LLVM_ABI void setConcatLen(unsigned Len)

Set the length of the string from the root to this node to Len.

LLVM_ABI unsigned getLeftLeafIdx() const

LLVM_ABI unsigned getRightLeafIdx() const

LLVM_ABI void setRightLeafIdx(unsigned Idx)

Set the index of the right most leaf node of this node to Idx.

static const unsigned EmptyIdx

Represents an undefined index in the suffix tree.

LLVM_ABI unsigned getStartIdx() const

LLVM_ABI void setLeftLeafIdx(unsigned Idx)

Set the index of the left most leaf node of this node to Idx.