[clang] Fix ASTWriter crash after merging named enums by michael-jabbour-sonarsource · Pull Request #114240 · llvm/llvm-project (original) (raw)

@llvm/pr-subscribers-clang-modules

@llvm/pr-subscribers-clang

Author: Michael Jabbour (michael-jabbour-sonarsource)

Changes

Clang already has a mechanism to cleanup enumerators for typedefs to anonymous enums. So the following example code used to be handled correctly while merging, and ASTWriter behaves as expected:

typedef enum { Val } AnonEnum;

However, the mentioned mechanism didn't handle named enums. This lead to stale declarations in IdResolver, causing an assertion violation in ASTWriter Assertion DeclIDs.contains(D) && "Declaration not emitted!"' failed.` when a module is being serialized with the following merged enums:

typedef enum Enum1 { Val_A } Enum1; enum Enum2 { Val_B };

The PR applies the same mechanism in the named enums case. Additionally, I dropped the call to getLexicalDeclContext()->removeDecl as it was causing a wrong odr-violation diagnostic with anonymous enums.

Might be easier to to review commit by commit. Any feedback is appreciated.

Context

This fixes frontend crashes that were encountered when certain Objective-C modules are included on Xcode 16. For example, by running CC=/path/to/clang-19 xcodebuild clean build on a project that contains the following Objective-C file:

#include <os/atomic.h>

int main() { return memory_order_relaxed; }

This crashes the parser in release, when ASTReader tried to load the enumerator declaration.


Full diff: https://github.com/llvm/llvm-project/pull/114240.diff

5 Files Affected:

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 93d98e1cbb9c81..d53bcf63dd1bd9 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -3910,7 +3910,7 @@ class Sema final : public SemaBase { /// Perform ODR-like check for C/ObjC when merging tag types from modules. /// Differently from C++, actually parse the body and reject / error out /// in case of a structural mismatch.

@@ -4034,6 +4034,12 @@ class Sema final : public SemaBase { void MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New, LookupResult &OldDecls);

diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index f8e5f3c6d309d6..a38d2d00a5410d 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -2551,18 +2551,7 @@ void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New, // Make the old tag definition visible. makeMergedDefinitionVisible(Hidden);

@@ -2639,6 +2628,19 @@ void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New, notePreviousDefinition(Old, New->getLocation()); }

+void Sema::RetireNodesFromMergedDecl(Scope *S, Decl *New) {

-bool Sema::ActOnDuplicateDefinition(Decl *Prev, SkipBodyInfo &SkipBody) { +bool Sema::ActOnDuplicateDefinition(Scope *S, Decl *Prev,

diff --git a/clang/test/Modules/modules-merge-enum.m b/clang/test/Modules/modules-merge-enum.m new file mode 100644 index 00000000000000..4873c7e6b4708e --- /dev/null +++ b/clang/test/Modules/modules-merge-enum.m @@ -0,0 +1,94 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t + +// RUN: %clang -fmodules -fmodules-cache-path=%t/modcache -fsyntax-only %t/source.m -Xclang -ast-dump-all 2>&1 | FileCheck %s + + +//--- shared.h +// This header is shared between two modules, but it's not a module itself. +// The enums defined here are parsed in both modules, and merged while building ModB. + +typedef enum MyEnum1 { MyVal_A } MyEnum1; +// CHECK: |-EnumDecl 0x{{.}} imported in ModA.ModAFile1 MyEnum1 +// CHECK-NEXT: | |-also in ModB +// CHECK-NEXT: | -EnumConstantDecl 0x{{.*}} imported in ModA.ModAFile1 referenced MyVal_A 'int' +// CHECK-NEXT: |-TypedefDecl 0x{{.*}} imported in ModA.ModAFile1 hidden MyEnum1 'enum MyEnum1' +// CHECK-NEXT: | -ElaboratedType 0x{{.}} 'enum MyEnum1' sugar imported +// CHECK-NEXT: | -EnumType 0x{{.*}} 'enum MyEnum1' imported +// CHECK-NEXT: | -Enum 0x{{.}} 'MyEnum1' + + +enum MyEnum2 { MyVal_B }; +// CHECK: |-EnumDecl 0x{{.}} imported in ModA.ModAFile1 MyEnum2 +// CHECK-NEXT: | |-also in ModB +// CHECK-NEXT: | -EnumConstantDecl 0x{{.*}} imported in ModA.ModAFile1 referenced MyVal_B 'int' + + +typedef enum { MyVal_C } MyEnum3; +// CHECK: |-EnumDecl 0x{{.*}} imported in ModA.ModAFile1 <undeserialized declarations> +// CHECK-NEXT: | |-also in ModB +// CHECK-NEXT: | -EnumConstantDecl 0x{{.}} imported in ModA.ModAFile1 referenced MyVal_C 'int' +// CHECK-NEXT: |-TypedefDecl 0x{{.}} imported in ModA.ModAFile1 hidden MyEnum3 'enum MyEnum3':'MyEnum3' +// CHECK-NEXT: | -ElaboratedType 0x{{.*}} 'enum MyEnum3' sugar imported +// CHECK-NEXT: | -EnumType 0x{{.}} 'MyEnum3' imported +// CHECK-NEXT: | -Enum 0x{{.*}} '' + + +// In this case, no merging happens on the EnumDecl in Objective-C, and ASTWriter writes both EnumConstantDecls when building ModB. +enum { MyVal_D }; +// CHECK: |-EnumDecl 0x{{.*}} imported in ModA.ModAFile1 hidden <undeserialized declarations> +// CHECK-NEXT: | -EnumConstantDecl 0x{{.}} imported in ModA.ModAFile1 hidden MyVal_D 'int' + + +// Redeclarations coming from ModB. +// CHECK: |-TypedefDecl 0x{{.}} prev 0x{{.}} imported in ModB MyEnum1 'enum MyEnum1' +// CHECK-NEXT: | -ElaboratedType 0x{{.*}} 'enum MyEnum1' sugar imported +// CHECK-NEXT: | -EnumType 0x{{.}} 'enum MyEnum1' imported +// CHECK-NEXT: | -Enum 0x{{.*}} 'MyEnum1' + +// CHECK: |-EnumDecl 0x{{.*}} prev 0x{{.*}} imported in ModB <undeserialized declarations> +// CHECK-NEXT: | |-also in ModB +// CHECK-NEXT: | -EnumConstantDecl 0x{{.}} imported in ModB MyVal_C 'int' +// CHECK-NEXT: |-TypedefDecl 0x{{.}} prev 0x{{.}} imported in ModB MyEnum3 'enum MyEnum3':'MyEnum3' +// CHECK-NEXT: | -ElaboratedType 0x{{.*}} 'enum MyEnum3' sugar imported +// CHECK-NEXT: | -EnumType 0x{{.}} 'MyEnum3' imported +// CHECK-NEXT: | -Enum 0x{{.*}} '' + +// CHECK: |-EnumDecl 0x{{.*}} imported in ModB <undeserialized declarations> +// CHECK-NEXT: | -EnumConstantDecl 0x{{.}} first 0x{{.*}} imported in ModB referenced MyVal_D 'int' + +//--- module.modulemap +module ModA {

+// Including shared.h here causes Sema to merge the AST nodes from shared.h with the hidden ones from ModA. +#include "shared.h" + + +//--- source.m +#include "ModBFile.h" + +int main() { return MyVal_A + MyVal_B + MyVal_C + MyVal_D; }