LLVM: lib/Support/SuffixTree.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

17

18using namespace llvm;

19

20

22 assert(N && "Got a null node?");

23 if (auto *Internal = dyn_cast(N))

24 if (Internal->isRoot())

25 return 0;

26 return N->getEndIdx() - N->getStartIdx() + 1;

27}

28

32 Root = insertRoot();

33 Active.Node = Root;

34

35

36

37 unsigned SuffixesToAdd = 0;

38

39

40

41

42 for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; PfxEndIdx++) {

43 SuffixesToAdd++;

44 LeafEndIdx = PfxEndIdx;

45 SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);

46 }

47

48

49 assert(Root && "Root node can't be nullptr!");

50 setSuffixIndices();

51

52

53

55 setLeafNodes();

56}

57

59 unsigned StartIdx, unsigned Edge) {

60 assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");

61 auto *N = new (LeafNodeAllocator.Allocate())

64 return N;

65}

66

69 unsigned StartIdx, unsigned EndIdx,

70 unsigned Edge) {

71 assert(StartIdx <= EndIdx && "String can't start after it ends!");

73 "Non-root internal nodes must have parents!");

74 auto *N = new (InternalNodeAllocator.Allocate())

76 if (Parent)

78 return N;

79}

80

84}

85

86void SuffixTree::setSuffixIndices() {

87

88

90

91

93

94

95 unsigned CurrNodeLen = 0;

96 ToVisit.push_back({CurrNode, CurrNodeLen});

97 while (!ToVisit.empty()) {

98 std::tie(CurrNode, CurrNodeLen) = ToVisit.back();

100

102 if (auto *InternalNode = dyn_cast(CurrNode))

103 for (auto &ChildPair : InternalNode->Children) {

104 assert(ChildPair.second && "Node had a null child!");

106 {ChildPair.second,

108 }

109

110 if (auto *LeafNode = dyn_cast(CurrNode))

111 LeafNode->setSuffixIdx(Str.size() - CurrNodeLen);

112 }

113}

114

115void SuffixTree::setLeafNodes() {

116

119

120

121

122 unsigned LeafCounter = 0;

123

124

125

127 std::pair<SuffixTreeNode *, SuffixTreeNode *>>

128 ChildrenMap;

129

130

131 while (!ToVisit.empty()) {

133 if (auto *CurrInternalNode = dyn_cast(CurrNode)) {

134

135 auto I = ChildrenMap.find(CurrInternalNode);

136 if (I == ChildrenMap.end()) {

137

138

139

140

141 auto J = CurrInternalNode->Children.begin();

142 if (J != CurrInternalNode->Children.end()) {

146 for (; J != CurrInternalNode->Children.end(); ++J) {

147 LastChild = J->second;

149 }

150 ChildrenMap[CurrInternalNode] = {FirstChild, LastChild};

151 }

152 } else {

153

154

155

156 auto [FirstChild, LastChild] = I->second;

157

158

159

160

162

165 "LeftLeafIdx should not be larger than RightLeafIdx");

166 }

167 } else {

168

169

172 ++LeafCounter;

173 auto *CurrLeafNode = cast(CurrNode);

174 LeafNodes.push_back(CurrLeafNode);

175 }

176 }

177}

178

179unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {

181

182 while (SuffixesToAdd > 0) {

183

184

185 if (Active.Len == 0) {

186

187 Active.Idx = EndIdx;

188 }

189

190 assert(Active.Idx <= EndIdx && "Start index can't be after end index!");

191

192

193 unsigned FirstChar = Str[Active.Idx];

194

195

196 if (Active.Node->Children.count(FirstChar) == 0) {

197

198 insertLeaf(*Active.Node, EndIdx, FirstChar);

199

200

201

202 if (NeedsLink) {

203 NeedsLink->setLink(Active.Node);

204 NeedsLink = nullptr;

205 }

206 } else {

207

208

209 SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];

210

212

213

214

215 if (Active.Len >= SubstringLen) {

216

217

218 assert(isa(NextNode) &&

219 "Expected an internal node?");

220 Active.Idx += SubstringLen;

221 Active.Len -= SubstringLen;

222 Active.Node = cast(NextNode);

223 continue;

224 }

225

226

227

228 unsigned LastChar = Str[EndIdx];

229

230

231 if (Str[NextNode->getStartIdx() + Active.Len] == LastChar) {

232

233

234

235 if (NeedsLink && !Active.Node->isRoot()) {

236 NeedsLink->setLink(Active.Node);

237 NeedsLink = nullptr;

238 }

239

240 Active.Len++;

241 break;

242 }

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

260 NextNode->getStartIdx() + Active.Len - 1, FirstChar);

261

262

263

264 insertLeaf(*SplitNode, EndIdx, LastChar);

265

266

267

270

271

272 if (NeedsLink)

273 NeedsLink->setLink(SplitNode);

274

275 NeedsLink = SplitNode;

276 }

277

278

279

280 SuffixesToAdd--;

281

282 if (Active.Node->isRoot()) {

283 if (Active.Len > 0) {

284 Active.Len--;

285 Active.Idx = EndIdx - SuffixesToAdd + 1;

286 }

287 } else {

288

289 Active.Node = Active.Node->getLink();

290 }

291 }

292

293 return SuffixesToAdd;

294}

295

296void SuffixTree::RepeatedSubstringIterator::advance() {

297

298

299 RS = RepeatedSubstring();

300 N = nullptr;

301

302

304

305

306 while (!InternalNodesToVisit.empty()) {

307 RepeatedSubstringStarts.clear();

308 auto *Curr = InternalNodesToVisit.back();

309 InternalNodesToVisit.pop_back();

310

311

312

313 unsigned Length = Curr->getConcatLen();

314

315

316

317 for (auto &ChildPair : Curr->Children)

318

319 if (auto *InternalChild =

320 dyn_cast(ChildPair.second))

321 InternalNodesToVisit.push_back(InternalChild);

322

323

324 if (Length < MinLength)

325 continue;

326

327

328

329 if (Curr->isRoot())

330 continue;

331

332

333 if (OutlinerLeafDescendants) {

334 for (unsigned I = Curr->getLeftLeafIdx(); I <= Curr->getRightLeafIdx();

335 ++I)

336 RepeatedSubstringStarts.push_back(LeafNodes[I]->getSuffixIdx());

337 } else {

338 for (auto &ChildPair : Curr->Children)

339 if (auto *Leaf = dyn_cast(ChildPair.second))

340 RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());

341 }

342

343

344 if (RepeatedSubstringStarts.size() < 2)

345 continue;

346

347

348 N = Curr;

350 for (unsigned StartIdx : RepeatedSubstringStarts)

352 break;

353 }

354

355

356

357}

This file defines the BumpPtrAllocator interface.

static cl::opt< bool > OutlinerLeafDescendants("outliner-leaf-descendants", cl::init(true), cl::Hidden, cl::desc("Consider all leaf descendants of internal nodes of the suffix " "tree as candidates for outlining (if false, only leaf children " "are considered)"))

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

static size_t numElementsInSubstring(const SuffixTreeNode *N)

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

SuffixTree(const ArrayRef< unsigned > &Str, bool OutlinerLeafDescendants=false)

Construct a suffix tree from a sequence of unsigned integers.

ArrayRef< unsigned > Str

Each element is an integer representing an instruction in the module.

bool OutlinerLeafDescendants

Whether to consider leaf descendants or only leaf children.

This is an optimization pass for GlobalISel generic memory operations.

DenseMap< unsigned, SuffixTreeNode * > Children

The children of this node.

void setLink(SuffixTreeInternalNode *L)

Sets Link to L. Assumes L is not null.

A node in a suffix tree which represents a substring or suffix.

void incrementStartIdx(unsigned Inc)

Advance this node's StartIdx by Inc.

void setConcatLen(unsigned Len)

Set the length of the string from the root to this node to Len.

unsigned getLeftLeafIdx() const

unsigned getRightLeafIdx() const

void setRightLeafIdx(unsigned Idx)

Set the index of the right most leaf node of this node to Idx.

static const unsigned EmptyIdx

Represents an undefined index in the suffix tree.

unsigned getStartIdx() const

void setLeftLeafIdx(unsigned Idx)

Set the index of the left most leaf node of this node to Idx.

SmallVector< unsigned > StartIndices

The start indices of each occurrence.

unsigned Length

The length of the string.