LLVM: lib/Support/BalancedPartitioning.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

15#include "llvm/Config/llvm-config.h"

20

21using namespace llvm;

22#define DEBUG_TYPE "balanced-partitioning"

23

25 OS << formatv("{{ID={0} Utilities={{{1:$[,]}} Bucket={2}}", Id,

27}

28

29template

30void BalancedPartitioning::BPThreadPool::async(Func &&F) {

31#if LLVM_ENABLE_THREADS

32

33 ++NumActiveThreads;

34 TheThreadPool.async([this, F]() {

35

36 F();

37

38

39 if (--NumActiveThreads == 0) {

40

41 {

42 std::unique_lockstd::mutex lock(mtx);

43 assert(!IsFinishedSpawning);

44 IsFinishedSpawning = true;

45 }

46 cv.notify_one();

47 }

48 });

49#else

51#endif

52}

53

54void BalancedPartitioning::BPThreadPool::wait() {

55#if LLVM_ENABLE_THREADS

56

57

58 {

59 std::unique_lockstd::mutex lock(mtx);

60 cv.wait(lock, [&]() { return IsFinishedSpawning; });

61 assert(IsFinishedSpawning && NumActiveThreads == 0);

62 }

63

64 TheThreadPool.wait();

65#else

67#endif

68}

69

72 : Config(Config) {

73

74 Log2Cache[0] = 0.0;

75 for (unsigned I = 1; I < LOG_CACHE_SIZE; I++)

76 Log2Cache[I] = std::log2(I);

77}

78

82 "Partitioning %d nodes using depth %d and %d iterations per split\n",

83 Nodes.size(), Config.SplitDepth, Config.IterationsPerSplit));

84 std::optional TP;

85#if LLVM_ENABLE_THREADS

87 if (Config.TaskSplitDepth > 1)

88 TP.emplace(TheThreadPool);

89#endif

90

91

92 for (unsigned I = 0; I < Nodes.size(); I++)

93 Nodes[I].InputOrderIndex = I;

94

95 auto NodesRange = llvm::make_range(Nodes.begin(), Nodes.end());

96 auto BisectTask = [this, NodesRange, &TP]() {

97 bisect(NodesRange, 0, 1, 0, TP);

98 };

99 if (TP) {

100 TP->async(std::move(BisectTask));

101 TP->wait();

102 } else {

103 BisectTask();

104 }

105

107 return L.Bucket < R.Bucket;

108 });

109

110 LLVM_DEBUG(dbgs() << "Balanced partitioning completed\n");

111}

112

113void BalancedPartitioning::bisect(const FunctionNodeRange Nodes,

114 unsigned RecDepth, unsigned RootBucket,

116 std::optional &TP) const {

117 unsigned NumNodes = llvm::size(Nodes);

118 if (NumNodes <= 1 || RecDepth >= Config.SplitDepth) {

119

120

121 llvm::sort(Nodes, [](const auto &L, const auto &R) {

122 return L.InputOrderIndex < R.InputOrderIndex;

123 });

124 for (auto &N : Nodes)

126 return;

127 }

128

130 NumNodes, RootBucket));

131

132 std::mt19937 RNG(RootBucket);

133

134 unsigned LeftBucket = 2 * RootBucket;

135 unsigned RightBucket = 2 * RootBucket + 1;

136

137

138 split(Nodes, LeftBucket);

139

140 runIterations(Nodes, LeftBucket, RightBucket, RNG);

141

142

143 auto NodesMid =

144 llvm::partition(Nodes, [&](auto &N) { return N.Bucket == LeftBucket; });

145 unsigned MidOffset = Offset + std::distance(Nodes.begin(), NodesMid);

146

149

150 auto LeftRecTask = [this, LeftNodes, RecDepth, LeftBucket, Offset, &TP]() {

151 bisect(LeftNodes, RecDepth + 1, LeftBucket, Offset, TP);

152 };

153 auto RightRecTask = [this, RightNodes, RecDepth, RightBucket, MidOffset,

154 &TP]() {

155 bisect(RightNodes, RecDepth + 1, RightBucket, MidOffset, TP);

156 };

157

158 if (TP && RecDepth < Config.TaskSplitDepth && NumNodes >= 4) {

159 TP->async(std::move(LeftRecTask));

160 TP->async(std::move(RightRecTask));

161 } else {

162 LeftRecTask();

163 RightRecTask();

164 }

165}

166

167void BalancedPartitioning::runIterations(const FunctionNodeRange Nodes,

168 unsigned LeftBucket,

169 unsigned RightBucket,

170 std::mt19937 &RNG) const {

171 unsigned NumNodes = llvm::size(Nodes);

172 DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeIndex;

173 for (auto &N : Nodes)

174 for (auto &UN : N.UtilityNodes)

175 ++UtilityNodeIndex[UN];

176

177

178 for (auto &N : Nodes)

180 unsigned UNI = UtilityNodeIndex[UN];

181 return UNI == 1 || UNI == NumNodes;

182 });

183

184

185 UtilityNodeIndex.clear();

186 for (auto &N : Nodes)

187 for (auto &UN : N.UtilityNodes)

188 UN = UtilityNodeIndex.insert({UN, UtilityNodeIndex.size()}).first->second;

189

190

191 SignaturesT Signatures(UtilityNodeIndex.size());

192 for (auto &N : Nodes) {

193 for (auto &UN : N.UtilityNodes) {

194 assert(UN < Signatures.size());

195 if (N.Bucket == LeftBucket) {

196 Signatures[UN].LeftCount++;

197 } else {

198 Signatures[UN].RightCount++;

199 }

200 }

201 }

202

203 for (unsigned I = 0; I < Config.IterationsPerSplit; I++) {

204 unsigned NumMovedNodes =

205 runIteration(Nodes, LeftBucket, RightBucket, Signatures, RNG);

206 if (NumMovedNodes == 0)

207 break;

208 }

209}

210

211unsigned BalancedPartitioning::runIteration(const FunctionNodeRange Nodes,

212 unsigned LeftBucket,

213 unsigned RightBucket,

214 SignaturesT &Signatures,

215 std::mt19937 &RNG) const {

216

217 for (auto &Signature : Signatures) {

218 if (Signature.CachedGainIsValid)

219 continue;

220 unsigned L = Signature.LeftCount;

221 unsigned R = Signature.RightCount;

222 assert((L > 0 || R > 0) && "incorrect signature");

223 float Cost = logCost(L, R);

224 Signature.CachedGainLR = 0.f;

225 Signature.CachedGainRL = 0.f;

226 if (L > 0)

227 Signature.CachedGainLR = Cost - logCost(L - 1, R + 1);

228 if (R > 0)

229 Signature.CachedGainRL = Cost - logCost(L + 1, R - 1);

230 Signature.CachedGainIsValid = true;

231 }

232

233

234 using GainPair = std::pair<float, BPFunctionNode *>;

235 std::vector Gains;

236 for (auto &N : Nodes) {

237 bool FromLeftToRight = (N.Bucket == LeftBucket);

238 float Gain = moveGain(N, FromLeftToRight, Signatures);

239 Gains.push_back(std::make_pair(Gain, &N));

240 }

241

242

244 Gains, [&](const auto &GP) { return GP.second->Bucket == LeftBucket; });

247

248

249 auto LargerGain = [](const auto &L, const auto &R) {

250 return L.first > R.first;

251 };

254

255 unsigned NumMovedDataVertices = 0;

256 for (auto [LeftPair, RightPair] : llvm::zip(LeftRange, RightRange)) {

257 auto &[LeftGain, LeftNode] = LeftPair;

258 auto &[RightGain, RightNode] = RightPair;

259

260 if (LeftGain + RightGain <= 0.f)

261 break;

262

263 if (moveFunctionNode(*LeftNode, LeftBucket, RightBucket, Signatures, RNG))

264 ++NumMovedDataVertices;

265 if (moveFunctionNode(*RightNode, LeftBucket, RightBucket, Signatures, RNG))

266 ++NumMovedDataVertices;

267 }

268 return NumMovedDataVertices;

269}

270

271bool BalancedPartitioning::moveFunctionNode(BPFunctionNode &N,

272 unsigned LeftBucket,

273 unsigned RightBucket,

274 SignaturesT &Signatures,

275 std::mt19937 &RNG) const {

276

277 if (std::uniform_real_distribution(0.f, 1.f)(RNG) <=

278 Config.SkipProbability)

279 return false;

280

281 bool FromLeftToRight = (N.Bucket == LeftBucket);

282

283 N.Bucket = (FromLeftToRight ? RightBucket : LeftBucket);

284

285

286 if (FromLeftToRight) {

287 for (auto &UN : N.UtilityNodes) {

288 auto &Signature = Signatures[UN];

289 Signature.LeftCount--;

290 Signature.RightCount++;

291 Signature.CachedGainIsValid = false;

292 }

293 } else {

294 for (auto &UN : N.UtilityNodes) {

295 auto &Signature = Signatures[UN];

296 Signature.LeftCount++;

297 Signature.RightCount--;

298 Signature.CachedGainIsValid = false;

299 }

300 }

301 return true;

302}

303

304void BalancedPartitioning::split(const FunctionNodeRange Nodes,

305 unsigned StartBucket) const {

306 unsigned NumNodes = llvm::size(Nodes);

307 auto NodesMid = Nodes.begin() + (NumNodes + 1) / 2;

308

309 llvm::sort(Nodes, [](auto &L, auto &R) {

310 return L.InputOrderIndex < R.InputOrderIndex;

311 });

312

314 N.Bucket = StartBucket;

316 N.Bucket = StartBucket + 1;

317}

318

320 bool FromLeftToRight,

321 const SignaturesT &Signatures) {

322 float Gain = 0.f;

323 for (auto &UN : N.UtilityNodes)

324 Gain += (FromLeftToRight ? Signatures[UN].CachedGainLR

325 : Signatures[UN].CachedGainRL);

326 return Gain;

327}

328

329float BalancedPartitioning::logCost(unsigned X, unsigned Y) const {

330 return -(X * log2Cached(X + 1) + Y * log2Cached(Y + 1));

331}

332

333float BalancedPartitioning::log2Cached(unsigned i) const {

334 return (i < LOG_CACHE_SIZE) ? Log2Cache[i] : std::log2(i);

335}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

A function with a set of utility nodes where it is beneficial to order two functions close together i...

IDT Id

The ID of this node.

SmallVector< UtilityNodeT, 4 > UtilityNodes

The list of utility nodes associated with this node.

std::optional< unsigned > Bucket

The bucket assigned by balanced partitioning.

LLVM_ABI void dump(raw_ostream &OS) const

Definition BalancedPartitioning.cpp:24

static LLVM_ABI float moveGain(const BPFunctionNode &N, bool FromLeftToRight, const SignaturesT &Signatures)

Compute the move gain for uniform log-gap cost.

Definition BalancedPartitioning.cpp:319

LLVM_ABI void run(std::vector< BPFunctionNode > &Nodes) const

Run recursive graph partitioning that optimizes a given objective.

Definition BalancedPartitioning.cpp:79

LLVM_ABI BalancedPartitioning(const BalancedPartitioningConfig &Config)

Definition BalancedPartitioning.cpp:70

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

auto async(Function &&F, Args &&...ArgList)

Asynchronous submission of a task to the pool.

This class implements an extremely fast bulk output stream that can only output to a stream.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

This is an optimization pass for GlobalISel generic memory operations.

detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)

zip iterator for two or more iteratable types.

void stable_sort(R &&Range)

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)

void sort(IteratorTy Start, IteratorTy End)

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

format_object< Ts... > format(const char *Fmt, const Ts &... Vals)

These are helper functions used to produce formatted output.

SingleThreadExecutor DefaultThreadPool

auto partition(R &&Range, UnaryPredicate P)

Provide wrappers to std::partition which take ranges instead of having to pass begin/end explicitly.

void erase_if(Container &C, UnaryPredicate P)

Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...

Algorithm parameters; default values are tuned on real-world binaries.

unsigned SplitDepth

The depth of the recursive bisection.