Function bitcode index in Value Symbol Table and lazy reading support · llvm/llvm-project@b77b1f8 (original) (raw)
`@@ -147,6 +147,7 @@ class BitcodeReader : public GVMaterializer {
`
147
147
` BitstreamCursor Stream;
`
148
148
`uint64_t NextUnreadBit = 0;
`
149
149
`bool SeenValueSymbolTable = false;
`
``
150
`+
unsigned VSTOffset = 0;
`
150
151
``
151
152
` std::vector<Type*> TypeList;
`
152
153
` BitcodeReaderValueList ValueList;
`
`@@ -370,7 +371,9 @@ class BitcodeReader : public GVMaterializer {
`
370
371
` std::error_code parseTypeTable();
`
371
372
` std::error_code parseTypeTableBody();
`
372
373
``
373
``
`-
std::error_code parseValueSymbolTable();
`
``
374
`+
ErrorOr<Value *> recordValue(SmallVectorImpl &Record,
`
``
375
`+
unsigned NameIndex, Triple &TT);
`
``
376
`+
std::error_code parseValueSymbolTable(unsigned Offset = 0);
`
374
377
` std::error_code parseConstants();
`
375
378
` std::error_code rememberAndSkipFunctionBody();
`
376
379
`/// Save the positions of the Metadata blocks and skip parsing the blocks.
`
`@@ -1583,7 +1586,61 @@ std::error_code BitcodeReader::parseTypeTableBody() {
`
1583
1586
` }
`
1584
1587
`}
`
1585
1588
``
1586
``
`-
std::error_code BitcodeReader::parseValueSymbolTable() {
`
``
1589
`+
/// Associate a value with its name from the given index in the provided record.
`
``
1590
`+
ErrorOr<Value *> BitcodeReader::recordValue(SmallVectorImpl &Record,
`
``
1591
`+
unsigned NameIndex, Triple &TT) {
`
``
1592
`+
SmallString<128> ValueName;
`
``
1593
`+
if (convertToString(Record, NameIndex, ValueName))
`
``
1594
`+
return error("Invalid record");
`
``
1595
`+
unsigned ValueID = Record[0];
`
``
1596
`+
if (ValueID >= ValueList.size() || !ValueList[ValueID])
`
``
1597
`+
return error("Invalid record");
`
``
1598
`+
Value *V = ValueList[ValueID];
`
``
1599
+
``
1600
`+
V->setName(StringRef(ValueName.data(), ValueName.size()));
`
``
1601
`+
auto *GO = dyn_cast(V);
`
``
1602
`+
if (GO) {
`
``
1603
`+
if (GO->getComdat() == reinterpret_cast<Comdat *>(1)) {
`
``
1604
`+
if (TT.isOSBinFormatMachO())
`
``
1605
`+
GO->setComdat(nullptr);
`
``
1606
`+
else
`
``
1607
`+
GO->setComdat(TheModule->getOrInsertComdat(V->getName()));
`
``
1608
`+
}
`
``
1609
`+
}
`
``
1610
`+
return V;
`
``
1611
`+
}
`
``
1612
+
``
1613
`+
/// Parse the value symbol table at either the current parsing location or
`
``
1614
`+
/// at the given bit offset if provided.
`
``
1615
`+
std::error_code BitcodeReader::parseValueSymbolTable(unsigned Offset) {
`
``
1616
`+
uint64_t CurrentBit;
`
``
1617
`+
// Pass in the Offset to distinguish between calling for the module-level
`
``
1618
`+
// VST (where we want to jump to the VST offset) and the function-level
`
``
1619
`+
// VST (where we don't).
`
``
1620
`+
if (Offset > 0) {
`
``
1621
`+
// Save the current parsing location so we can jump back at the end
`
``
1622
`+
// of the VST read.
`
``
1623
`+
CurrentBit = Stream.GetCurrentBitNo();
`
``
1624
`+
Stream.JumpToBit(Offset * 32);
`
``
1625
`+
BitstreamEntry Entry = Stream.advance();
`
``
1626
`+
assert(Entry.Kind == BitstreamEntry::SubBlock);
`
``
1627
`+
assert(Entry.ID == bitc::VALUE_SYMTAB_BLOCK_ID);
`
``
1628
`+
}
`
``
1629
+
``
1630
`+
// Compute the delta between the bitcode indices in the VST (the word offset
`
``
1631
`+
// to the word-aligned ENTER_SUBBLOCK for the function block, and that
`
``
1632
`+
// expected by the lazy reader. The reader's EnterSubBlock expects to have
`
``
1633
`+
// already read the ENTER_SUBBLOCK code (size getAbbrevIDWidth) and BlockID
`
``
1634
`+
// (size BlockIDWidth). Note that we access the stream's AbbrevID width here
`
``
1635
`+
// just before entering the VST subblock because: 1) the EnterSubBlock
`
``
1636
`+
// changes the AbbrevID width; 2) the VST block is nested within the same
`
``
1637
`+
// outer MODULE_BLOCK as the FUNCTION_BLOCKs and therefore have the same
`
``
1638
`+
// AbbrevID width before calling EnterSubBlock; and 3) when we want to
`
``
1639
`+
// jump to the FUNCTION_BLOCK using this offset later, we don't want
`
``
1640
`+
// to rely on the stream's AbbrevID width being that of the MODULE_BLOCK.
`
``
1641
`+
unsigned FuncBitcodeOffsetDelta =
`
``
1642
`+
Stream.getAbbrevIDWidth() + bitc::BlockIDWidth;
`
``
1643
+
1587
1644
`if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
`
1588
1645
`return error("Invalid record");
`
1589
1646
``
`@@ -1601,6 +1658,8 @@ std::error_code BitcodeReader::parseValueSymbolTable() {
`
1601
1658
`case BitstreamEntry::Error:
`
1602
1659
`return error("Malformed block");
`
1603
1660
`case BitstreamEntry::EndBlock:
`
``
1661
`+
if (Offset > 0)
`
``
1662
`+
Stream.JumpToBit(CurrentBit);
`
1604
1663
`return std::error_code();
`
1605
1664
`case BitstreamEntry::Record:
`
1606
1665
`// The interesting case.
`
`@@ -1613,23 +1672,39 @@ std::error_code BitcodeReader::parseValueSymbolTable() {
`
1613
1672
`default: // Default behavior: unknown type.
`
1614
1673
`break;
`
1615
1674
`case bitc::VST_CODE_ENTRY: { // VST_ENTRY: [valueid, namechar x N]
`
1616
``
`-
if (convertToString(Record, 1, ValueName))
`
1617
``
`-
return error("Invalid record");
`
1618
``
`-
unsigned ValueID = Record[0];
`
1619
``
`-
if (ValueID >= ValueList.size() || !ValueList[ValueID])
`
1620
``
`-
return error("Invalid record");
`
1621
``
`-
Value *V = ValueList[ValueID];
`
1622
``
-
1623
``
`-
V->setName(StringRef(ValueName.data(), ValueName.size()));
`
1624
``
`-
if (auto *GO = dyn_cast(V)) {
`
1625
``
`-
if (GO->getComdat() == reinterpret_cast<Comdat *>(1)) {
`
1626
``
`-
if (TT.isOSBinFormatMachO())
`
1627
``
`-
GO->setComdat(nullptr);
`
1628
``
`-
else
`
1629
``
`-
GO->setComdat(TheModule->getOrInsertComdat(V->getName()));
`
1630
``
`-
}
`
``
1675
`+
ErrorOr<Value *> ValOrErr = recordValue(Record, 1, TT);
`
``
1676
`+
if (std::error_code EC = ValOrErr.getError())
`
``
1677
`+
return EC;
`
``
1678
`+
ValOrErr.get();
`
``
1679
`+
break;
`
``
1680
`+
}
`
``
1681
`+
case bitc::VST_CODE_FNENTRY: {
`
``
1682
`+
// VST_FNENTRY: [valueid, offset, namechar x N]
`
``
1683
`+
ErrorOr<Value *> ValOrErr = recordValue(Record, 2, TT);
`
``
1684
`+
if (std::error_code EC = ValOrErr.getError())
`
``
1685
`+
return EC;
`
``
1686
`+
Value *V = ValOrErr.get();
`
``
1687
+
``
1688
`+
auto *GO = dyn_cast(V);
`
``
1689
`+
if (!GO) {
`
``
1690
`+
// If this is an alias, need to get the actual Function object
`
``
1691
`+
// it aliases, in order to set up the DeferredFunctionInfo entry below.
`
``
1692
`+
auto *GA = dyn_cast(V);
`
``
1693
`+
if (GA)
`
``
1694
`+
GO = GA->getBaseObject();
`
``
1695
`+
assert(GO);
`
1631
1696
` }
`
1632
``
`-
ValueName.clear();
`
``
1697
+
``
1698
`+
uint64_t FuncWordOffset = Record[1];
`
``
1699
`+
Function *F = dyn_cast(GO);
`
``
1700
`+
assert(F);
`
``
1701
`+
uint64_t FuncBitOffset = FuncWordOffset * 32;
`
``
1702
`+
DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta;
`
``
1703
`+
// Set the NextUnreadBit to point to the last function block.
`
``
1704
`+
// Later when parsing is resumed after function materialization,
`
``
1705
`+
// we can simply skip that last function block.
`
``
1706
`+
if (FuncBitOffset > NextUnreadBit)
`
``
1707
`+
NextUnreadBit = FuncBitOffset;
`
1633
1708
`break;
`
1634
1709
` }
`
1635
1710
`case bitc::VST_CODE_BBENTRY: {
`
`@@ -2852,9 +2927,23 @@ std::error_code BitcodeReader::parseModule(bool Resume,
`
2852
2927
`return EC;
`
2853
2928
`break;
`
2854
2929
`case bitc::VALUE_SYMTAB_BLOCK_ID:
`
2855
``
`-
if (std::error_code EC = parseValueSymbolTable())
`
2856
``
`-
return EC;
`
2857
``
`-
SeenValueSymbolTable = true;
`
``
2930
`+
if (!SeenValueSymbolTable) {
`
``
2931
`+
// Either this is an old form VST without function index and an
`
``
2932
`+
// associated VST forward declaration record (which would have caused
`
``
2933
`+
// the VST to be jumped to and parsed before it was encountered
`
``
2934
`+
// normally in the stream), or there were no function blocks to
`
``
2935
`+
// trigger an earlier parsing of the VST.
`
``
2936
`+
assert(VSTOffset == 0 || FunctionsWithBodies.empty());
`
``
2937
`+
if (std::error_code EC = parseValueSymbolTable())
`
``
2938
`+
return EC;
`
``
2939
`+
SeenValueSymbolTable = true;
`
``
2940
`+
} else {
`
``
2941
`+
// We must have had a VST forward declaration record, which caused
`
``
2942
`+
// the parser to jump to and parse the VST earlier.
`
``
2943
`+
assert(VSTOffset > 0);
`
``
2944
`+
if (Stream.SkipBlock())
`
``
2945
`+
return error("Invalid record");
`
``
2946
`+
}
`
2858
2947
`break;
`
2859
2948
`case bitc::CONSTANTS_BLOCK_ID:
`
2860
2949
`if (std::error_code EC = parseConstants())
`
`@@ -2882,6 +2971,32 @@ std::error_code BitcodeReader::parseModule(bool Resume,
`
2882
2971
` SeenFirstFunctionBody = true;
`
2883
2972
` }
`
2884
2973
``
``
2974
`+
if (VSTOffset > 0) {
`
``
2975
`+
// If we have a VST forward declaration record, make sure we
`
``
2976
`+
// parse the VST now if we haven't already. It is needed to
`
``
2977
`+
// set up the DeferredFunctionInfo vector for lazy reading.
`
``
2978
`+
if (!SeenValueSymbolTable) {
`
``
2979
`+
if (std::error_code EC =
`
``
2980
`+
BitcodeReader::parseValueSymbolTable(VSTOffset))
`
``
2981
`+
return EC;
`
``
2982
`+
SeenValueSymbolTable = true;
`
``
2983
`+
return std::error_code();
`
``
2984
`+
} else {
`
``
2985
`+
// If we have a VST forward declaration record, but have already
`
``
2986
`+
// parsed the VST (just above, when the first function body was
`
``
2987
`+
// encountered here), then we are resuming the parse after
`
``
2988
`+
// materializing functions. The NextUnreadBit points to the start
`
``
2989
`+
// of the last function block recorded in the VST (set when
`
``
2990
`+
// parsing the VST function entries). Skip it.
`
``
2991
`+
if (Stream.SkipBlock())
`
``
2992
`+
return error("Invalid record");
`
``
2993
`+
continue;
`
``
2994
`+
}
`
``
2995
`+
}
`
``
2996
+
``
2997
`+
// Support older bitcode files that did not have the function
`
``
2998
`+
// index in the VST, nor a VST forward declaration record.
`
``
2999
`+
// Build the DeferredFunctionInfo vector on the fly.
`
2885
3000
`if (std::error_code EC = rememberAndSkipFunctionBody())
`
2886
3001
`return EC;
`
2887
3002
`// Suspend parsing when we reach the function bodies. Subsequent
`
`@@ -3185,6 +3300,12 @@ std::error_code BitcodeReader::parseModule(bool Resume,
`
3185
3300
`return error("Invalid record");
`
3186
3301
` ValueList.shrinkTo(Record[0]);
`
3187
3302
`break;
`
``
3303
`+
/// MODULE_CODE_VSTOFFSET: [offset]
`
``
3304
`+
case bitc::MODULE_CODE_VSTOFFSET:
`
``
3305
`+
if (Record.size() < 1)
`
``
3306
`+
return error("Invalid record");
`
``
3307
`+
VSTOffset = Record[0];
`
``
3308
`+
break;
`
3188
3309
` }
`
3189
3310
` Record.clear();
`
3190
3311
` }
`
`@@ -4642,6 +4763,11 @@ std::error_code BitcodeReader::findFunctionInStream(
`
4642
4763
` Function *F,
`
4643
4764
` DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator) {
`
4644
4765
`while (DeferredFunctionInfoIterator->second == 0) {
`
``
4766
`+
// This is the fallback handling for the old format bitcode that
`
``
4767
`+
// didn't contain the function index in the VST. Assert if we end up
`
``
4768
`+
// here for the new format (which is the only time the VSTOffset would
`
``
4769
`+
// be non-zero).
`
``
4770
`+
assert(VSTOffset == 0);
`
4645
4771
`if (Stream.AtEndOfStream())
`
4646
4772
`return error("Could not find function in stream");
`
4647
4773
`// ParseModule will parse the next body in the stream and set its
`