Skip to content

Commit f7e8be7

Browse files
authoredDec 5, 2024··
Skip escaped newlines before checking for whitespace in Lexer::getRawToken. (#117548)
The Lexer used in getRawToken is not told to keep whitespace, so when it skips over escaped newlines, it also ignores whitespace, regardless of getRawToken's IgnoreWhiteSpace parameter. Instead of letting this case fall through to lexing, check for whitespace after skipping over any escaped newlines.
1 parent 3a4b9f3 commit f7e8be7

File tree

2 files changed

+33
-1
lines changed

2 files changed

+33
-1
lines changed
 

‎clang/lib/Lex/Lexer.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,7 @@ bool Lexer::getRawToken(SourceLocation Loc, Token &Result,
527527

528528
const char *StrData = Buffer.data()+LocInfo.second;
529529

530-
if (!IgnoreWhiteSpace && isWhitespace(StrData[0]))
530+
if (!IgnoreWhiteSpace && isWhitespace(SkipEscapedNewLines(StrData)[0]))
531531
return true;
532532

533533
// Create a lexer starting at the beginning of this token.

‎clang/unittests/Lex/LexerTest.cpp

+32
Original file line numberDiff line numberDiff line change
@@ -652,6 +652,38 @@ TEST_F(LexerTest, RawAndNormalLexSameForLineComments) {
652652
EXPECT_TRUE(ToksView.empty());
653653
}
654654

655+
TEST_F(LexerTest, GetRawTokenOnEscapedNewLineChecksWhitespace) {
656+
const llvm::StringLiteral Source = R"cc(
657+
#define ONE \
658+
1
659+
660+
int i = ONE;
661+
)cc";
662+
std::vector<Token> Toks =
663+
CheckLex(Source, {tok::kw_int, tok::identifier, tok::equal,
664+
tok::numeric_constant, tok::semi});
665+
666+
// Set up by getting the raw token for the `1` in the macro definition.
667+
const Token &OneExpanded = Toks[3];
668+
Token Tok;
669+
ASSERT_FALSE(
670+
Lexer::getRawToken(OneExpanded.getLocation(), Tok, SourceMgr, LangOpts));
671+
// The `ONE`.
672+
ASSERT_EQ(Tok.getKind(), tok::raw_identifier);
673+
ASSERT_FALSE(
674+
Lexer::getRawToken(SourceMgr.getSpellingLoc(OneExpanded.getLocation()),
675+
Tok, SourceMgr, LangOpts));
676+
// The `1` in the macro definition.
677+
ASSERT_EQ(Tok.getKind(), tok::numeric_constant);
678+
679+
// Go back 4 characters: two spaces, one newline, and the backslash.
680+
SourceLocation EscapedNewLineLoc = Tok.getLocation().getLocWithOffset(-4);
681+
// Expect true (=failure) because the whitespace immediately after the
682+
// escaped newline is not ignored.
683+
EXPECT_TRUE(Lexer::getRawToken(EscapedNewLineLoc, Tok, SourceMgr, LangOpts,
684+
/*IgnoreWhiteSpace=*/false));
685+
}
686+
655687
TEST(LexerPreambleTest, PreambleBounds) {
656688
std::vector<std::string> Cases = {
657689
R"cc([[

0 commit comments

Comments
 (0)
Please sign in to comment.