Function bitcode index in Value Symbol Table and lazy reading support · llvm/llvm-project@b77b1f8 (original) (raw)

`@@ -147,6 +147,7 @@ class BitcodeReader : public GVMaterializer {

`

147

147

` BitstreamCursor Stream;

`

148

148

`uint64_t NextUnreadBit = 0;

`

149

149

`bool SeenValueSymbolTable = false;

`

``

150

`+

unsigned VSTOffset = 0;

`

150

151

``

151

152

` std::vector<Type*> TypeList;

`

152

153

` BitcodeReaderValueList ValueList;

`

`@@ -370,7 +371,9 @@ class BitcodeReader : public GVMaterializer {

`

370

371

` std::error_code parseTypeTable();

`

371

372

` std::error_code parseTypeTableBody();

`

372

373

``

373

``

`-

std::error_code parseValueSymbolTable();

`

``

374

`+

ErrorOr<Value *> recordValue(SmallVectorImpl &Record,

`

``

375

`+

unsigned NameIndex, Triple &TT);

`

``

376

`+

std::error_code parseValueSymbolTable(unsigned Offset = 0);

`

374

377

` std::error_code parseConstants();

`

375

378

` std::error_code rememberAndSkipFunctionBody();

`

376

379

`/// Save the positions of the Metadata blocks and skip parsing the blocks.

`

`@@ -1583,7 +1586,61 @@ std::error_code BitcodeReader::parseTypeTableBody() {

`

1583

1586

` }

`

1584

1587

`}

`

1585

1588

``

1586

``

`-

std::error_code BitcodeReader::parseValueSymbolTable() {

`

``

1589

`+

/// Associate a value with its name from the given index in the provided record.

`

``

1590

`+

ErrorOr<Value *> BitcodeReader::recordValue(SmallVectorImpl &Record,

`

``

1591

`+

unsigned NameIndex, Triple &TT) {

`

``

1592

`+

SmallString<128> ValueName;

`

``

1593

`+

if (convertToString(Record, NameIndex, ValueName))

`

``

1594

`+

return error("Invalid record");

`

``

1595

`+

unsigned ValueID = Record[0];

`

``

1596

`+

if (ValueID >= ValueList.size() || !ValueList[ValueID])

`

``

1597

`+

return error("Invalid record");

`

``

1598

`+

Value *V = ValueList[ValueID];

`

``

1599

+

``

1600

`+

V->setName(StringRef(ValueName.data(), ValueName.size()));

`

``

1601

`+

auto *GO = dyn_cast(V);

`

``

1602

`+

if (GO) {

`

``

1603

`+

if (GO->getComdat() == reinterpret_cast<Comdat *>(1)) {

`

``

1604

`+

if (TT.isOSBinFormatMachO())

`

``

1605

`+

GO->setComdat(nullptr);

`

``

1606

`+

else

`

``

1607

`+

GO->setComdat(TheModule->getOrInsertComdat(V->getName()));

`

``

1608

`+

}

`

``

1609

`+

}

`

``

1610

`+

return V;

`

``

1611

`+

}

`

``

1612

+

``

1613

`+

/// Parse the value symbol table at either the current parsing location or

`

``

1614

`+

/// at the given bit offset if provided.

`

``

1615

`+

std::error_code BitcodeReader::parseValueSymbolTable(unsigned Offset) {

`

``

1616

`+

uint64_t CurrentBit;

`

``

1617

`+

// Pass in the Offset to distinguish between calling for the module-level

`

``

1618

`+

// VST (where we want to jump to the VST offset) and the function-level

`

``

1619

`+

// VST (where we don't).

`

``

1620

`+

if (Offset > 0) {

`

``

1621

`+

// Save the current parsing location so we can jump back at the end

`

``

1622

`+

// of the VST read.

`

``

1623

`+

CurrentBit = Stream.GetCurrentBitNo();

`

``

1624

`+

Stream.JumpToBit(Offset * 32);

`

``

1625

`+

BitstreamEntry Entry = Stream.advance();

`

``

1626

`+

assert(Entry.Kind == BitstreamEntry::SubBlock);

`

``

1627

`+

assert(Entry.ID == bitc::VALUE_SYMTAB_BLOCK_ID);

`

``

1628

`+

}

`

``

1629

+

``

1630

`+

// Compute the delta between the bitcode indices in the VST (the word offset

`

``

1631

`+

// to the word-aligned ENTER_SUBBLOCK for the function block, and that

`

``

1632

`+

// expected by the lazy reader. The reader's EnterSubBlock expects to have

`

``

1633

`+

// already read the ENTER_SUBBLOCK code (size getAbbrevIDWidth) and BlockID

`

``

1634

`+

// (size BlockIDWidth). Note that we access the stream's AbbrevID width here

`

``

1635

`+

// just before entering the VST subblock because: 1) the EnterSubBlock

`

``

1636

`+

// changes the AbbrevID width; 2) the VST block is nested within the same

`

``

1637

`+

// outer MODULE_BLOCK as the FUNCTION_BLOCKs and therefore have the same

`

``

1638

`+

// AbbrevID width before calling EnterSubBlock; and 3) when we want to

`

``

1639

`+

// jump to the FUNCTION_BLOCK using this offset later, we don't want

`

``

1640

`+

// to rely on the stream's AbbrevID width being that of the MODULE_BLOCK.

`

``

1641

`+

unsigned FuncBitcodeOffsetDelta =

`

``

1642

`+

Stream.getAbbrevIDWidth() + bitc::BlockIDWidth;

`

``

1643

+

1587

1644

`if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))

`

1588

1645

`return error("Invalid record");

`

1589

1646

``

`@@ -1601,6 +1658,8 @@ std::error_code BitcodeReader::parseValueSymbolTable() {

`

1601

1658

`case BitstreamEntry::Error:

`

1602

1659

`return error("Malformed block");

`

1603

1660

`case BitstreamEntry::EndBlock:

`

``

1661

`+

if (Offset > 0)

`

``

1662

`+

Stream.JumpToBit(CurrentBit);

`

1604

1663

`return std::error_code();

`

1605

1664

`case BitstreamEntry::Record:

`

1606

1665

`// The interesting case.

`

`@@ -1613,23 +1672,39 @@ std::error_code BitcodeReader::parseValueSymbolTable() {

`

1613

1672

`default: // Default behavior: unknown type.

`

1614

1673

`break;

`

1615

1674

`case bitc::VST_CODE_ENTRY: { // VST_ENTRY: [valueid, namechar x N]

`

1616

``

`-

if (convertToString(Record, 1, ValueName))

`

1617

``

`-

return error("Invalid record");

`

1618

``

`-

unsigned ValueID = Record[0];

`

1619

``

`-

if (ValueID >= ValueList.size() || !ValueList[ValueID])

`

1620

``

`-

return error("Invalid record");

`

1621

``

`-

Value *V = ValueList[ValueID];

`

1622

``

-

1623

``

`-

V->setName(StringRef(ValueName.data(), ValueName.size()));

`

1624

``

`-

if (auto *GO = dyn_cast(V)) {

`

1625

``

`-

if (GO->getComdat() == reinterpret_cast<Comdat *>(1)) {

`

1626

``

`-

if (TT.isOSBinFormatMachO())

`

1627

``

`-

GO->setComdat(nullptr);

`

1628

``

`-

else

`

1629

``

`-

GO->setComdat(TheModule->getOrInsertComdat(V->getName()));

`

1630

``

`-

}

`

``

1675

`+

ErrorOr<Value *> ValOrErr = recordValue(Record, 1, TT);

`

``

1676

`+

if (std::error_code EC = ValOrErr.getError())

`

``

1677

`+

return EC;

`

``

1678

`+

ValOrErr.get();

`

``

1679

`+

break;

`

``

1680

`+

}

`

``

1681

`+

case bitc::VST_CODE_FNENTRY: {

`

``

1682

`+

// VST_FNENTRY: [valueid, offset, namechar x N]

`

``

1683

`+

ErrorOr<Value *> ValOrErr = recordValue(Record, 2, TT);

`

``

1684

`+

if (std::error_code EC = ValOrErr.getError())

`

``

1685

`+

return EC;

`

``

1686

`+

Value *V = ValOrErr.get();

`

``

1687

+

``

1688

`+

auto *GO = dyn_cast(V);

`

``

1689

`+

if (!GO) {

`

``

1690

`+

// If this is an alias, need to get the actual Function object

`

``

1691

`+

// it aliases, in order to set up the DeferredFunctionInfo entry below.

`

``

1692

`+

auto *GA = dyn_cast(V);

`

``

1693

`+

if (GA)

`

``

1694

`+

GO = GA->getBaseObject();

`

``

1695

`+

assert(GO);

`

1631

1696

` }

`

1632

``

`-

ValueName.clear();

`

``

1697

+

``

1698

`+

uint64_t FuncWordOffset = Record[1];

`

``

1699

`+

Function *F = dyn_cast(GO);

`

``

1700

`+

assert(F);

`

``

1701

`+

uint64_t FuncBitOffset = FuncWordOffset * 32;

`

``

1702

`+

DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta;

`

``

1703

`+

// Set the NextUnreadBit to point to the last function block.

`

``

1704

`+

// Later when parsing is resumed after function materialization,

`

``

1705

`+

// we can simply skip that last function block.

`

``

1706

`+

if (FuncBitOffset > NextUnreadBit)

`

``

1707

`+

NextUnreadBit = FuncBitOffset;

`

1633

1708

`break;

`

1634

1709

` }

`

1635

1710

`case bitc::VST_CODE_BBENTRY: {

`

`@@ -2852,9 +2927,23 @@ std::error_code BitcodeReader::parseModule(bool Resume,

`

2852

2927

`return EC;

`

2853

2928

`break;

`

2854

2929

`case bitc::VALUE_SYMTAB_BLOCK_ID:

`

2855

``

`-

if (std::error_code EC = parseValueSymbolTable())

`

2856

``

`-

return EC;

`

2857

``

`-

SeenValueSymbolTable = true;

`

``

2930

`+

if (!SeenValueSymbolTable) {

`

``

2931

`+

// Either this is an old form VST without function index and an

`

``

2932

`+

// associated VST forward declaration record (which would have caused

`

``

2933

`+

// the VST to be jumped to and parsed before it was encountered

`

``

2934

`+

// normally in the stream), or there were no function blocks to

`

``

2935

`+

// trigger an earlier parsing of the VST.

`

``

2936

`+

assert(VSTOffset == 0 || FunctionsWithBodies.empty());

`

``

2937

`+

if (std::error_code EC = parseValueSymbolTable())

`

``

2938

`+

return EC;

`

``

2939

`+

SeenValueSymbolTable = true;

`

``

2940

`+

} else {

`

``

2941

`+

// We must have had a VST forward declaration record, which caused

`

``

2942

`+

// the parser to jump to and parse the VST earlier.

`

``

2943

`+

assert(VSTOffset > 0);

`

``

2944

`+

if (Stream.SkipBlock())

`

``

2945

`+

return error("Invalid record");

`

``

2946

`+

}

`

2858

2947

`break;

`

2859

2948

`case bitc::CONSTANTS_BLOCK_ID:

`

2860

2949

`if (std::error_code EC = parseConstants())

`

`@@ -2882,6 +2971,32 @@ std::error_code BitcodeReader::parseModule(bool Resume,

`

2882

2971

` SeenFirstFunctionBody = true;

`

2883

2972

` }

`

2884

2973

``

``

2974

`+

if (VSTOffset > 0) {

`

``

2975

`+

// If we have a VST forward declaration record, make sure we

`

``

2976

`+

// parse the VST now if we haven't already. It is needed to

`

``

2977

`+

// set up the DeferredFunctionInfo vector for lazy reading.

`

``

2978

`+

if (!SeenValueSymbolTable) {

`

``

2979

`+

if (std::error_code EC =

`

``

2980

`+

BitcodeReader::parseValueSymbolTable(VSTOffset))

`

``

2981

`+

return EC;

`

``

2982

`+

SeenValueSymbolTable = true;

`

``

2983

`+

return std::error_code();

`

``

2984

`+

} else {

`

``

2985

`+

// If we have a VST forward declaration record, but have already

`

``

2986

`+

// parsed the VST (just above, when the first function body was

`

``

2987

`+

// encountered here), then we are resuming the parse after

`

``

2988

`+

// materializing functions. The NextUnreadBit points to the start

`

``

2989

`+

// of the last function block recorded in the VST (set when

`

``

2990

`+

// parsing the VST function entries). Skip it.

`

``

2991

`+

if (Stream.SkipBlock())

`

``

2992

`+

return error("Invalid record");

`

``

2993

`+

continue;

`

``

2994

`+

}

`

``

2995

`+

}

`

``

2996

+

``

2997

`+

// Support older bitcode files that did not have the function

`

``

2998

`+

// index in the VST, nor a VST forward declaration record.

`

``

2999

`+

// Build the DeferredFunctionInfo vector on the fly.

`

2885

3000

`if (std::error_code EC = rememberAndSkipFunctionBody())

`

2886

3001

`return EC;

`

2887

3002

`// Suspend parsing when we reach the function bodies. Subsequent

`

`@@ -3185,6 +3300,12 @@ std::error_code BitcodeReader::parseModule(bool Resume,

`

3185

3300

`return error("Invalid record");

`

3186

3301

` ValueList.shrinkTo(Record[0]);

`

3187

3302

`break;

`

``

3303

`+

/// MODULE_CODE_VSTOFFSET: [offset]

`

``

3304

`+

case bitc::MODULE_CODE_VSTOFFSET:

`

``

3305

`+

if (Record.size() < 1)

`

``

3306

`+

return error("Invalid record");

`

``

3307

`+

VSTOffset = Record[0];

`

``

3308

`+

break;

`

3188

3309

` }

`

3189

3310

` Record.clear();

`

3190

3311

` }

`

`@@ -4642,6 +4763,11 @@ std::error_code BitcodeReader::findFunctionInStream(

`

4642

4763

` Function *F,

`

4643

4764

` DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator) {

`

4644

4765

`while (DeferredFunctionInfoIterator->second == 0) {

`

``

4766

`+

// This is the fallback handling for the old format bitcode that

`

``

4767

`+

// didn't contain the function index in the VST. Assert if we end up

`

``

4768

`+

// here for the new format (which is the only time the VSTOffset would

`

``

4769

`+

// be non-zero).

`

``

4770

`+

assert(VSTOffset == 0);

`

4645

4771

`if (Stream.AtEndOfStream())

`

4646

4772

`return error("Could not find function in stream");

`

4647

4773

`// ParseModule will parse the next body in the stream and set its

`