Adding IR2Vec as an analysis pass by svkeerthy · Pull Request #134004 · llvm/llvm-project (original) (raw)

@llvm/pr-subscribers-llvm-analysis

Author: S. VenkataKeerthy (svkeerthy)

Changes

This PR introduces IR2Vec as an analysis pass. The changes include:

(Planning to post an RFC; Will update the PR with RFC link)

Acknowledgements: contributors - https://github.com/IITH-Compilers/IR2Vec/graphs/contributors


Patch is 88.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/134004.diff

10 Files Affected:

diff --git a/llvm/include/llvm/Analysis/IR2VecAnalysis.h b/llvm/include/llvm/Analysis/IR2VecAnalysis.h new file mode 100644 index 0000000000000..dd5c00a1168b8 --- /dev/null +++ b/llvm/include/llvm/Analysis/IR2VecAnalysis.h @@ -0,0 +1,131 @@ +//===- IR2VecAnalysis.h - IR2Vec Analysis Implementation -------- C++ --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM +// Exceptions. See the LICENSE file for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declaration of IR2VecAnalysis that computes +/// IR2Vec Embeddings of the program. +/// +/// Program Embeddings are typically or derived-from a learned +/// representation of the program. Such embeddings are used to represent the +/// programs as input to machine learning algorithms. IR2Vec represents the +/// LLVM IR as embeddings. +/// +/// The IR2Vec algorithm is described in the following paper: +/// +/// IR2Vec: LLVM IR Based Scalable Program Embeddings, S. VenkataKeerthy, +/// Rohit Aggarwal, Shalini Jain, Maunendra Sankar Desarkar, Ramakrishna +/// Upadrasta, and Y. N. Srikant, ACM Transactions on Architecture and +/// Code Optimization (TACO), 2020. https://doi.org/10.1145/3418463. +/// https://arxiv.org/abs/1909.06228 +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_IR2VECANALYSIS_H +#define LLVM_ANALYSIS_IR2VECANALYSIS_H + +#include "llvm/ADT/MapVector.h" +#include "llvm/IR/PassManager.h" +#include + +namespace llvm { + +class Module; +class BasicBlock; +class Instruction; +class Function; + +namespace ir2vec { +using Embedding = std::vector; +// FIXME: Current the keys are strings. This can be changed to +// use integers for cheaper lookups. +using Vocab = std::map<std::string, Embedding>; +} // namespace ir2vec + +class IR2VecVocabResult; +class IR2VecResult; + +/// This analysis provides the vocabulary for IR2Vec. The vocabulary provides a +/// mapping between an entity of the IR (like opcode, type, argument, etc.) and +/// its corresponding embedding. +class IR2VecVocabAnalysis : public AnalysisInfoMixin {

+}; + +class IR2VecResult {

diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index fbf3b587d6bd2..8a6399f756f27 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -67,6 +67,7 @@ add_llvm_component_library(LLVMAnalysis GlobalsModRef.cpp GuardUtils.cpp HeatUtils.cpp

+#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace ir2vec; + +#define DEBUG_TYPE "ir2vec" + +STATISTIC(DataMissCounter, "Number of data misses in the vocabulary"); + +/// IR2Vec computes two kinds of embeddings: Symbolic and Flow-aware. +/// Symbolic embeddings capture the "syntactic" and "statistical correlation" +/// of the IR entities. Flow-aware embeddings build on top of symbolic +/// embeddings and additionally capture the flow information in the IR. +/// IR2VecKind is used to specify the type of embeddings to generate. +// FIXME: Currently we support only Symbolic. Add support for +// Flow-aware in upcoming patches. +enum class IR2VecKind { Symbolic, Flowaware }; + +static cl::OptionCategory IR2VecAnalysisCategory("IR2Vec Analysis Options"); + +cl::opt

+} + +/// Adds two vectors: Vec += Vec2 +void addVectors(Embedding &Vec, const Embedding &Vec2) {

+} + +// FIXME: Currently lookups are string based. Use numeric Keys +// for efficiency. +Embedding Embeddings::lookupVocab(const std::string &Key) {

+} + +// ==----------------------------------------------------------------------===// +// IR2VecPrinterPass +//===----------------------------------------------------------------------===// + +void IR2VecPrinterPass::printVector(const Embedding &Vec) const {

[truncated]