[Clang][Comments] Allow HTML tags across multiple lines by Nerixyz · Pull Request #120843 · llvm/llvm-project (original) (raw)

@llvm/pr-subscribers-clang

Author: nerix (Nerixyz)

Changes

HTML starting tags that span multiple lines were previously not allowed (or rather, only the starting line was lexed as HTML). Doxygen allows those tags.

This PR allows the starting tags to span multiple lines. They can't span multiple (C-)Comments, though (it's likely a user-error). Multiple BCPL comments are fine as those are single lines (shown below).

Example:

/// <a /// href="foo" /// >Aaa</a>b int Test;

Fixes #28321.


Full diff: https://github.com/llvm/llvm-project/pull/120843.diff

4 Files Affected:

diff --git a/clang/lib/AST/CommentLexer.cpp b/clang/lib/AST/CommentLexer.cpp index ec9a5b480aa295..804be89a8d4ddc 100644 --- a/clang/lib/AST/CommentLexer.cpp +++ b/clang/lib/AST/CommentLexer.cpp @@ -196,6 +196,15 @@ const char *skipWhitespace(const char *BufferPtr, const char *BufferEnd) { return BufferEnd; }

+const char *skipHorizontalWhitespace(const char *BufferPtr,

void Lexer::lexHTMLStartTag(Token &T) { assert(State == LS_HTMLStartTag);

C = *BufferPtr;

@@ -829,6 +878,14 @@ void Lexer::lex(Token &T) { BufferPtr += 2; assert(BufferPtr <= BufferEnd);

diff --git a/clang/test/AST/ast-dump-comment.cpp b/clang/test/AST/ast-dump-comment.cpp index 9798295b420f9a..40c3edb62821bb 100644 --- a/clang/test/AST/ast-dump-comment.cpp +++ b/clang/test/AST/ast-dump-comment.cpp @@ -91,6 +91,19 @@ int Test_HTMLTagComment; // CHECK-NEXT: TextComment{{.}} Text=" " // CHECK-NEXT: HTMLStartTagComment{{.}} Name="br" SelfClosing

+/// <a +/// href="foo" +/// >Aaab +int Test_HTMLTagMultilineBCPL; +// CHECK: VarDecl{{.}}Test_HTMLTagMultilineBCPL +// CHECK-NEXT: FullComment +// CHECK-NEXT: ParagraphComment +// CHECK-NEXT: TextComment{{.}} Text=" " +// CHECK-NEXT: HTMLStartTagComment{{.}} Name="a" Attrs: "href="foo" +// CHECK-NEXT: TextComment{{.}} Text="Aaa" +// CHECK-NEXT: HTMLEndTagComment{{.}} Name="a" +// CHECK-NEXT: TextComment{{.}} Text="b" + /// \verbatim /// Aaa /// \endverbatim diff --git a/clang/unittests/AST/CommentLexer.cpp b/clang/unittests/AST/CommentLexer.cpp index 1e7bad89898f4c..2231a5d78af451 100644 --- a/clang/unittests/AST/CommentLexer.cpp +++ b/clang/unittests/AST/CommentLexer.cpp @@ -1453,6 +1453,129 @@ TEST_F(CommentLexerTest, HTML19) { ASSERT_EQ(tok::newline, Toks[2].getKind()); }

+TEST_F(CommentLexerTest, HTML20) {

diff --git a/clang/unittests/AST/CommentParser.cpp b/clang/unittests/AST/CommentParser.cpp index e0df182d430c36..aa08b6718e506f 100644 --- a/clang/unittests/AST/CommentParser.cpp +++ b/clang/unittests/AST/CommentParser.cpp @@ -1065,9 +1065,10 @@ TEST_F(CommentParserTest, InlineCommand5) {

TEST_F(CommentParserTest, HTML1) { const char *Sources[] = {

@@ -1088,8 +1089,9 @@ TEST_F(CommentParserTest, HTML1) {

TEST_F(CommentParserTest, HTML2) { const char *Sources[] = {

@@ -1110,10 +1112,8 @@ TEST_F(CommentParserTest, HTML2) {

TEST_F(CommentParserTest, HTML3) { const char *Sources[] = {

@@ -1134,8 +1134,9 @@ TEST_F(CommentParserTest, HTML3) {

TEST_F(CommentParserTest, HTML4) { const char *Sources[] = {