Skip to content

Commit 708a9a0

Browse files
authoredJul 29, 2024··
[Clang][Parse] Fix ambiguity with nested-name-specifiers that may declarative (#96364)
Consider the following: ``` template<typename T> struct A { }; template<typename T> int A<T>::B::* f(); // error: no member named 'B' in 'A<T>' ``` Although this is clearly valid, clang rejects it because the _nested-name-specifier_ `A<T>::` is parsed as-if it was declarative, meaning, we parse it as-if it was the _nested-name-specifier_ in a redeclaration/specialization. However, we don't (and can't) know whether the _nested-name-specifier_ is declarative until we see the '`*`' token, but at that point we have already complained that `A` has no member named `B`! This patch addresses this bug by adding support for _fully_ unannotated _and_ unbounded tentative parsing, which allows for us to parse past tokens without having to cache them until we reach a point where we can guarantee to be past the construct we are disambiguating. I don't know where the approach taken here is ideal -- alternatives are welcome. However, the performance impact (as measured by llvm-compile-time-tracker (https://llvm-compile-time-tracker.com/?config=Overview&stat=instructions%3Au&remote=sdkrystian) is quite minimal (0.09%, which I plan to further improve).
1 parent c66d25d commit 708a9a0

File tree

10 files changed

+202
-106
lines changed

10 files changed

+202
-106
lines changed
 

‎clang/docs/ReleaseNotes.rst

+1
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ Bug Fixes to C++ Support
164164
- Fixed a crash when an expression with a dependent ``__typeof__`` type is used as the operand of a unary operator. (#GH97646)
165165
- Fixed a failed assertion when checking invalid delete operator declaration. (#GH96191)
166166
- Fix a crash when checking destructor reference with an invalid initializer. (#GH97230)
167+
- Clang now correctly parses potentially declarative nested-name-specifiers in pointer-to-member declarators.
167168

168169
Bug Fixes to AST Handling
169170
^^^^^^^^^^^^^^^^^^^^^^^^^

‎clang/include/clang/Lex/Preprocessor.h

+23-3
Original file line numberDiff line numberDiff line change
@@ -1160,6 +1160,11 @@ class Preprocessor {
11601160
/// invoked (at which point the last position is popped).
11611161
std::vector<CachedTokensTy::size_type> BacktrackPositions;
11621162

1163+
/// Stack of cached tokens/initial number of cached tokens pairs, allowing
1164+
/// nested unannotated backtracks.
1165+
std::vector<std::pair<CachedTokensTy, CachedTokensTy::size_type>>
1166+
UnannotatedBacktrackTokens;
1167+
11631168
/// True if \p Preprocessor::SkipExcludedConditionalBlock() is running.
11641169
/// This is used to guard against calling this function recursively.
11651170
///
@@ -1722,8 +1727,16 @@ class Preprocessor {
17221727
/// at some point after EnableBacktrackAtThisPos. If you don't, caching of
17231728
/// tokens will continue indefinitely.
17241729
///
1725-
void EnableBacktrackAtThisPos();
1730+
/// \param Unannotated Whether token annotations are reverted upon calling
1731+
/// Backtrack().
1732+
void EnableBacktrackAtThisPos(bool Unannotated = false);
1733+
1734+
private:
1735+
std::pair<CachedTokensTy::size_type, bool> LastBacktrackPos();
1736+
1737+
CachedTokensTy PopUnannotatedBacktrackTokens();
17261738

1739+
public:
17271740
/// Disable the last EnableBacktrackAtThisPos call.
17281741
void CommitBacktrackedTokens();
17291742

@@ -1735,6 +1748,12 @@ class Preprocessor {
17351748
/// caching of tokens is on.
17361749
bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
17371750

1751+
/// True if EnableBacktrackAtThisPos() was called and
1752+
/// caching of unannotated tokens is on.
1753+
bool isUnannotatedBacktrackEnabled() const {
1754+
return !UnannotatedBacktrackTokens.empty();
1755+
}
1756+
17381757
/// Lex the next token for this preprocessor.
17391758
void Lex(Token &Result);
17401759

@@ -1841,8 +1860,9 @@ class Preprocessor {
18411860
void RevertCachedTokens(unsigned N) {
18421861
assert(isBacktrackEnabled() &&
18431862
"Should only be called when tokens are cached for backtracking");
1844-
assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
1845-
&& "Should revert tokens up to the last backtrack position, not more");
1863+
assert(signed(CachedLexPos) - signed(N) >=
1864+
signed(LastBacktrackPos().first) &&
1865+
"Should revert tokens up to the last backtrack position, not more");
18461866
assert(signed(CachedLexPos) - signed(N) >= 0 &&
18471867
"Corrupted backtrack positions ?");
18481868
CachedLexPos -= N;

‎clang/include/clang/Parse/Parser.h

+8-7
Original file line numberDiff line numberDiff line change
@@ -1025,6 +1025,8 @@ class Parser : public CodeCompletionHandler {
10251025
/// ....
10261026
/// TPA.Revert();
10271027
///
1028+
/// If the Unannotated parameter is true, any token annotations created
1029+
/// during the tentative parse are reverted.
10281030
class TentativeParsingAction {
10291031
Parser &P;
10301032
PreferredTypeBuilder PrevPreferredType;
@@ -1034,15 +1036,15 @@ class Parser : public CodeCompletionHandler {
10341036
bool isActive;
10351037

10361038
public:
1037-
explicit TentativeParsingAction(Parser &p)
1039+
explicit TentativeParsingAction(Parser &p, bool Unannotated = false)
10381040
: P(p), PrevPreferredType(P.PreferredType) {
10391041
PrevTok = P.Tok;
10401042
PrevTentativelyDeclaredIdentifierCount =
10411043
P.TentativelyDeclaredIdentifiers.size();
10421044
PrevParenCount = P.ParenCount;
10431045
PrevBracketCount = P.BracketCount;
10441046
PrevBraceCount = P.BraceCount;
1045-
P.PP.EnableBacktrackAtThisPos();
1047+
P.PP.EnableBacktrackAtThisPos(Unannotated);
10461048
isActive = true;
10471049
}
10481050
void Commit() {
@@ -1073,13 +1075,11 @@ class Parser : public CodeCompletionHandler {
10731075
class RevertingTentativeParsingAction
10741076
: private Parser::TentativeParsingAction {
10751077
public:
1076-
RevertingTentativeParsingAction(Parser &P)
1077-
: Parser::TentativeParsingAction(P) {}
1078+
using TentativeParsingAction::TentativeParsingAction;
1079+
10781080
~RevertingTentativeParsingAction() { Revert(); }
10791081
};
10801082

1081-
class UnannotatedTentativeParsingAction;
1082-
10831083
/// ObjCDeclContextSwitch - An object used to switch context from
10841084
/// an objective-c decl context to its enclosing decl context and
10851085
/// back.
@@ -1984,7 +1984,8 @@ class Parser : public CodeCompletionHandler {
19841984
CXXScopeSpec &SS, ParsedType ObjectType, bool ObjectHasErrors,
19851985
bool EnteringContext, bool *MayBePseudoDestructor = nullptr,
19861986
bool IsTypename = false, const IdentifierInfo **LastII = nullptr,
1987-
bool OnlyNamespace = false, bool InUsingDeclaration = false);
1987+
bool OnlyNamespace = false, bool InUsingDeclaration = false,
1988+
bool Disambiguation = false);
19881989

19891990
//===--------------------------------------------------------------------===//
19901991
// C++11 5.1.2: Lambda expressions

‎clang/lib/Lex/PPCaching.cpp

+40-8
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,15 @@
1414
#include "clang/Lex/Preprocessor.h"
1515
using namespace clang;
1616

17+
std::pair<Preprocessor::CachedTokensTy::size_type, bool>
18+
Preprocessor::LastBacktrackPos() {
19+
assert(isBacktrackEnabled());
20+
auto BacktrackPos = BacktrackPositions.back();
21+
bool Unannotated =
22+
static_cast<CachedTokensTy::difference_type>(BacktrackPos) < 0;
23+
return {Unannotated ? ~BacktrackPos : BacktrackPos, Unannotated};
24+
}
25+
1726
// EnableBacktrackAtThisPos - From the point that this method is called, and
1827
// until CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
1928
// keeps track of the lexed tokens so that a subsequent Backtrack() call will
@@ -22,26 +31,45 @@ using namespace clang;
2231
// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
2332
// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
2433
// be combined with the EnableBacktrackAtThisPos calls in reverse order.
25-
void Preprocessor::EnableBacktrackAtThisPos() {
34+
void Preprocessor::EnableBacktrackAtThisPos(bool Unannotated) {
2635
assert(LexLevel == 0 && "cannot use lookahead while lexing");
27-
BacktrackPositions.push_back(CachedLexPos);
36+
BacktrackPositions.push_back(Unannotated ? ~CachedLexPos : CachedLexPos);
37+
if (Unannotated)
38+
UnannotatedBacktrackTokens.emplace_back(CachedTokens, CachedTokens.size());
2839
EnterCachingLexMode();
2940
}
3041

42+
Preprocessor::CachedTokensTy Preprocessor::PopUnannotatedBacktrackTokens() {
43+
assert(isUnannotatedBacktrackEnabled() && "missing unannotated tokens?");
44+
auto [UnannotatedTokens, NumCachedToks] =
45+
std::move(UnannotatedBacktrackTokens.back());
46+
UnannotatedBacktrackTokens.pop_back();
47+
// If another unannotated backtrack is active, propagate any tokens that were
48+
// lexed (not cached) since EnableBacktrackAtThisPos was last called.
49+
if (isUnannotatedBacktrackEnabled())
50+
UnannotatedBacktrackTokens.back().first.append(
51+
UnannotatedTokens.begin() + NumCachedToks, UnannotatedTokens.end());
52+
return std::move(UnannotatedTokens);
53+
}
54+
3155
// Disable the last EnableBacktrackAtThisPos call.
3256
void Preprocessor::CommitBacktrackedTokens() {
33-
assert(!BacktrackPositions.empty()
34-
&& "EnableBacktrackAtThisPos was not called!");
57+
assert(isBacktrackEnabled() && "EnableBacktrackAtThisPos was not called!");
58+
auto [BacktrackPos, Unannotated] = LastBacktrackPos();
3559
BacktrackPositions.pop_back();
60+
if (Unannotated)
61+
PopUnannotatedBacktrackTokens();
3662
}
3763

3864
// Make Preprocessor re-lex the tokens that were lexed since
3965
// EnableBacktrackAtThisPos() was previously called.
4066
void Preprocessor::Backtrack() {
41-
assert(!BacktrackPositions.empty()
42-
&& "EnableBacktrackAtThisPos was not called!");
43-
CachedLexPos = BacktrackPositions.back();
67+
assert(isBacktrackEnabled() && "EnableBacktrackAtThisPos was not called!");
68+
auto [BacktrackPos, Unannotated] = LastBacktrackPos();
4469
BacktrackPositions.pop_back();
70+
CachedLexPos = BacktrackPos;
71+
if (Unannotated)
72+
CachedTokens = PopUnannotatedBacktrackTokens();
4573
recomputeCurLexerKind();
4674
}
4775

@@ -67,6 +95,8 @@ void Preprocessor::CachingLex(Token &Result) {
6795
EnterCachingLexModeUnchecked();
6896
CachedTokens.push_back(Result);
6997
++CachedLexPos;
98+
if (isUnannotatedBacktrackEnabled())
99+
UnannotatedBacktrackTokens.back().first.push_back(Result);
70100
return;
71101
}
72102

@@ -108,6 +138,8 @@ const Token &Preprocessor::PeekAhead(unsigned N) {
108138
for (size_t C = CachedLexPos + N - CachedTokens.size(); C > 0; --C) {
109139
CachedTokens.push_back(Token());
110140
Lex(CachedTokens.back());
141+
if (isUnannotatedBacktrackEnabled())
142+
UnannotatedBacktrackTokens.back().first.push_back(CachedTokens.back());
111143
}
112144
EnterCachingLexMode();
113145
return CachedTokens.back();
@@ -124,7 +156,7 @@ void Preprocessor::AnnotatePreviousCachedTokens(const Token &Tok) {
124156
for (CachedTokensTy::size_type i = CachedLexPos; i != 0; --i) {
125157
CachedTokensTy::iterator AnnotBegin = CachedTokens.begin() + i-1;
126158
if (AnnotBegin->getLocation() == Tok.getLocation()) {
127-
assert((BacktrackPositions.empty() || BacktrackPositions.back() <= i) &&
159+
assert((!isBacktrackEnabled() || LastBacktrackPos().first <= i) &&
128160
"The backtrack pos points inside the annotated tokens!");
129161
// Replace the cached tokens with the single annotation token.
130162
if (i < CachedLexPos)

‎clang/lib/Lex/Preprocessor.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
170170
}
171171

172172
Preprocessor::~Preprocessor() {
173-
assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
173+
assert(!isBacktrackEnabled() && "EnableBacktrack/Backtrack imbalance!");
174174

175175
IncludeMacroStack.clear();
176176

‎clang/lib/Parse/ParseCXXInlineMethods.cpp

+2-39
Original file line numberDiff line numberDiff line change
@@ -1205,41 +1205,6 @@ bool Parser::ConsumeAndStoreConditional(CachedTokens &Toks) {
12051205
return true;
12061206
}
12071207

1208-
/// A tentative parsing action that can also revert token annotations.
1209-
class Parser::UnannotatedTentativeParsingAction : public TentativeParsingAction {
1210-
public:
1211-
explicit UnannotatedTentativeParsingAction(Parser &Self,
1212-
tok::TokenKind EndKind)
1213-
: TentativeParsingAction(Self), Self(Self), EndKind(EndKind) {
1214-
// Stash away the old token stream, so we can restore it once the
1215-
// tentative parse is complete.
1216-
TentativeParsingAction Inner(Self);
1217-
Self.ConsumeAndStoreUntil(EndKind, Toks, true, /*ConsumeFinalToken*/false);
1218-
Inner.Revert();
1219-
}
1220-
1221-
void RevertAnnotations() {
1222-
Revert();
1223-
1224-
// Put back the original tokens.
1225-
Self.SkipUntil(EndKind, StopAtSemi | StopBeforeMatch);
1226-
if (Toks.size()) {
1227-
auto Buffer = std::make_unique<Token[]>(Toks.size());
1228-
std::copy(Toks.begin() + 1, Toks.end(), Buffer.get());
1229-
Buffer[Toks.size() - 1] = Self.Tok;
1230-
Self.PP.EnterTokenStream(std::move(Buffer), Toks.size(), true,
1231-
/*IsReinject*/ true);
1232-
1233-
Self.Tok = Toks.front();
1234-
}
1235-
}
1236-
1237-
private:
1238-
Parser &Self;
1239-
CachedTokens Toks;
1240-
tok::TokenKind EndKind;
1241-
};
1242-
12431208
/// ConsumeAndStoreInitializer - Consume and store the token at the passed token
12441209
/// container until the end of the current initializer expression (either a
12451210
/// default argument or an in-class initializer for a non-static data member).
@@ -1277,9 +1242,7 @@ bool Parser::ConsumeAndStoreInitializer(CachedTokens &Toks,
12771242
// syntactically-valid init-declarator-list, then this comma ends
12781243
// the default initializer.
12791244
{
1280-
UnannotatedTentativeParsingAction PA(*this,
1281-
CIK == CIK_DefaultInitializer
1282-
? tok::semi : tok::r_paren);
1245+
TentativeParsingAction TPA(*this, /*Unannotated=*/true);
12831246
Sema::TentativeAnalysisScope Scope(Actions);
12841247

12851248
TPResult Result = TPResult::Error;
@@ -1307,7 +1270,7 @@ bool Parser::ConsumeAndStoreInitializer(CachedTokens &Toks,
13071270
// Put the token stream back and undo any annotations we performed
13081271
// after the comma. They may reflect a different parse than the one
13091272
// we will actually perform at the end of the class.
1310-
PA.RevertAnnotations();
1273+
TPA.Revert();
13111274

13121275
// If what follows could be a declaration, it is a declaration.
13131276
if (Result != TPResult::False && Result != TPResult::Error)

‎clang/lib/Parse/ParseDecl.cpp

+50-32
Original file line numberDiff line numberDiff line change
@@ -6650,48 +6650,66 @@ void Parser::ParseDeclaratorInternal(Declarator &D,
66506650
(Tok.is(tok::identifier) &&
66516651
(NextToken().is(tok::coloncolon) || NextToken().is(tok::less))) ||
66526652
Tok.is(tok::annot_cxxscope))) {
6653+
TentativeParsingAction TPA(*this, /*Unannotated=*/true);
66536654
bool EnteringContext = D.getContext() == DeclaratorContext::File ||
66546655
D.getContext() == DeclaratorContext::Member;
66556656
CXXScopeSpec SS;
66566657
SS.setTemplateParamLists(D.getTemplateParameterLists());
6657-
ParseOptionalCXXScopeSpecifier(SS, /*ObjectType=*/nullptr,
6658-
/*ObjectHasErrors=*/false, EnteringContext);
66596658

6660-
if (SS.isNotEmpty()) {
6661-
if (Tok.isNot(tok::star)) {
6662-
// The scope spec really belongs to the direct-declarator.
6663-
if (D.mayHaveIdentifier())
6664-
D.getCXXScopeSpec() = SS;
6665-
else
6666-
AnnotateScopeToken(SS, true);
6659+
if (ParseOptionalCXXScopeSpecifier(SS, /*ObjectType=*/nullptr,
6660+
/*ObjectHasErrors=*/false,
6661+
/*EnteringContext=*/false,
6662+
/*MayBePseudoDestructor=*/nullptr,
6663+
/*IsTypename=*/false, /*LastII=*/nullptr,
6664+
/*OnlyNamespace=*/false,
6665+
/*InUsingDeclaration=*/false,
6666+
/*Disambiguation=*/EnteringContext) ||
6667+
6668+
SS.isEmpty() || SS.isInvalid() || !EnteringContext ||
6669+
Tok.is(tok::star)) {
6670+
TPA.Commit();
6671+
if (SS.isNotEmpty() && Tok.is(tok::star)) {
6672+
if (SS.isValid()) {
6673+
checkCompoundToken(SS.getEndLoc(), tok::coloncolon,
6674+
CompoundToken::MemberPtr);
6675+
}
66676676

6668-
if (DirectDeclParser)
6669-
(this->*DirectDeclParser)(D);
6677+
SourceLocation StarLoc = ConsumeToken();
6678+
D.SetRangeEnd(StarLoc);
6679+
DeclSpec DS(AttrFactory);
6680+
ParseTypeQualifierListOpt(DS);
6681+
D.ExtendWithDeclSpec(DS);
6682+
6683+
// Recurse to parse whatever is left.
6684+
Actions.runWithSufficientStackSpace(D.getBeginLoc(), [&] {
6685+
ParseDeclaratorInternal(D, DirectDeclParser);
6686+
});
6687+
6688+
// Sema will have to catch (syntactically invalid) pointers into global
6689+
// scope. It has to catch pointers into namespace scope anyway.
6690+
D.AddTypeInfo(DeclaratorChunk::getMemberPointer(
6691+
SS, DS.getTypeQualifiers(), StarLoc, DS.getEndLoc()),
6692+
std::move(DS.getAttributes()),
6693+
/*EndLoc=*/SourceLocation());
66706694
return;
66716695
}
6696+
} else {
6697+
TPA.Revert();
6698+
SS.clear();
6699+
ParseOptionalCXXScopeSpecifier(SS, /*ObjectType=*/nullptr,
6700+
/*ObjectHasErrors=*/false,
6701+
/*EnteringContext=*/true);
6702+
}
66726703

6673-
if (SS.isValid()) {
6674-
checkCompoundToken(SS.getEndLoc(), tok::coloncolon,
6675-
CompoundToken::MemberPtr);
6676-
}
6704+
if (SS.isNotEmpty()) {
6705+
// The scope spec really belongs to the direct-declarator.
6706+
if (D.mayHaveIdentifier())
6707+
D.getCXXScopeSpec() = SS;
6708+
else
6709+
AnnotateScopeToken(SS, true);
66776710

6678-
SourceLocation StarLoc = ConsumeToken();
6679-
D.SetRangeEnd(StarLoc);
6680-
DeclSpec DS(AttrFactory);
6681-
ParseTypeQualifierListOpt(DS);
6682-
D.ExtendWithDeclSpec(DS);
6683-
6684-
// Recurse to parse whatever is left.
6685-
Actions.runWithSufficientStackSpace(D.getBeginLoc(), [&] {
6686-
ParseDeclaratorInternal(D, DirectDeclParser);
6687-
});
6688-
6689-
// Sema will have to catch (syntactically invalid) pointers into global
6690-
// scope. It has to catch pointers into namespace scope anyway.
6691-
D.AddTypeInfo(DeclaratorChunk::getMemberPointer(
6692-
SS, DS.getTypeQualifiers(), StarLoc, DS.getEndLoc()),
6693-
std::move(DS.getAttributes()),
6694-
/* Don't replace range end. */ SourceLocation());
6711+
if (DirectDeclParser)
6712+
(this->*DirectDeclParser)(D);
66956713
return;
66966714
}
66976715
}

0 commit comments

Comments
 (0)
Please sign in to comment.