-
Notifications
You must be signed in to change notification settings - Fork 13.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Clang][Parse] Fix ambiguity with nested-name-specifiers that may declarative #96364
Conversation
@llvm/pr-subscribers-clang Author: Krystian Stasiowski (sdkrystian) ChangesConsider the following: template<typename T>
struct A { };
template<typename T>
int A<T>::B::* f(); // error: no member named 'B' in 'A<T>' Although this is clearly valid, clang rejects it because the nested-name-specifier I don't know where the approach taken here is ideal -- alternatives are welcome. However, the performance impact (as measured by llvm-compile-time-tracker) is quite minimal (0.09%, which I plan to further improve). Patch is 20.70 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/96364.diff 8 Files Affected:
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 9d8a1aae23df3..838857b6b8833 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -1150,6 +1150,9 @@ class Preprocessor {
/// invoked (at which point the last position is popped).
std::vector<CachedTokensTy::size_type> BacktrackPositions;
+ std::vector<std::pair<CachedTokensTy, CachedTokensTy::size_type>>
+ UnannotatedBacktrackPositions;
+
/// True if \p Preprocessor::SkipExcludedConditionalBlock() is running.
/// This is used to guard against calling this function recursively.
///
@@ -1712,7 +1715,7 @@ class Preprocessor {
/// at some point after EnableBacktrackAtThisPos. If you don't, caching of
/// tokens will continue indefinitely.
///
- void EnableBacktrackAtThisPos();
+ void EnableBacktrackAtThisPos(bool Unannotated = false);
/// Disable the last EnableBacktrackAtThisPos call.
void CommitBacktrackedTokens();
@@ -1723,7 +1726,11 @@ class Preprocessor {
/// True if EnableBacktrackAtThisPos() was called and
/// caching of tokens is on.
+
bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
+ bool isUnannotatedBacktrackEnabled() const {
+ return !UnannotatedBacktrackPositions.empty();
+ }
/// Lex the next token for this preprocessor.
void Lex(Token &Result);
@@ -1827,8 +1834,9 @@ class Preprocessor {
void RevertCachedTokens(unsigned N) {
assert(isBacktrackEnabled() &&
"Should only be called when tokens are cached for backtracking");
- assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
- && "Should revert tokens up to the last backtrack position, not more");
+ assert(signed(CachedLexPos) - signed(N) >=
+ signed(BacktrackPositions.back() >> 1) &&
+ "Should revert tokens up to the last backtrack position, not more");
assert(signed(CachedLexPos) - signed(N) >= 0 &&
"Corrupted backtrack positions ?");
CachedLexPos -= N;
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index d054b8cf0d240..fc0bae6bdec2b 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -1033,7 +1033,7 @@ class Parser : public CodeCompletionHandler {
bool isActive;
public:
- explicit TentativeParsingAction(Parser &p)
+ explicit TentativeParsingAction(Parser &p, bool Unannotated = false)
: P(p), PrevPreferredType(P.PreferredType) {
PrevTok = P.Tok;
PrevTentativelyDeclaredIdentifierCount =
@@ -1041,7 +1041,7 @@ class Parser : public CodeCompletionHandler {
PrevParenCount = P.ParenCount;
PrevBracketCount = P.BracketCount;
PrevBraceCount = P.BraceCount;
- P.PP.EnableBacktrackAtThisPos();
+ P.PP.EnableBacktrackAtThisPos(Unannotated);
isActive = true;
}
void Commit() {
@@ -1072,13 +1072,11 @@ class Parser : public CodeCompletionHandler {
class RevertingTentativeParsingAction
: private Parser::TentativeParsingAction {
public:
- RevertingTentativeParsingAction(Parser &P)
- : Parser::TentativeParsingAction(P) {}
+ using TentativeParsingAction::TentativeParsingAction;
+
~RevertingTentativeParsingAction() { Revert(); }
};
- class UnannotatedTentativeParsingAction;
-
/// ObjCDeclContextSwitch - An object used to switch context from
/// an objective-c decl context to its enclosing decl context and
/// back.
@@ -1979,7 +1977,8 @@ class Parser : public CodeCompletionHandler {
CXXScopeSpec &SS, ParsedType ObjectType, bool ObjectHasErrors,
bool EnteringContext, bool *MayBePseudoDestructor = nullptr,
bool IsTypename = false, const IdentifierInfo **LastII = nullptr,
- bool OnlyNamespace = false, bool InUsingDeclaration = false);
+ bool OnlyNamespace = false, bool InUsingDeclaration = false,
+ bool Disambiguation = false);
//===--------------------------------------------------------------------===//
// C++11 5.1.2: Lambda expressions
diff --git a/clang/lib/Lex/PPCaching.cpp b/clang/lib/Lex/PPCaching.cpp
index f38ff62ebf437..bc52bfb237e5c 100644
--- a/clang/lib/Lex/PPCaching.cpp
+++ b/clang/lib/Lex/PPCaching.cpp
@@ -22,9 +22,12 @@ using namespace clang;
// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
// be combined with the EnableBacktrackAtThisPos calls in reverse order.
-void Preprocessor::EnableBacktrackAtThisPos() {
+void Preprocessor::EnableBacktrackAtThisPos(bool Unannotated) {
assert(LexLevel == 0 && "cannot use lookahead while lexing");
- BacktrackPositions.push_back(CachedLexPos);
+ BacktrackPositions.push_back((CachedLexPos << 1) | Unannotated);
+ if (Unannotated)
+ UnannotatedBacktrackPositions.emplace_back(CachedTokens,
+ CachedTokens.size());
EnterCachingLexMode();
}
@@ -32,7 +35,18 @@ void Preprocessor::EnableBacktrackAtThisPos() {
void Preprocessor::CommitBacktrackedTokens() {
assert(!BacktrackPositions.empty()
&& "EnableBacktrackAtThisPos was not called!");
+ auto BacktrackPos = BacktrackPositions.back();
BacktrackPositions.pop_back();
+ if (BacktrackPos & 1) {
+ assert(!UnannotatedBacktrackPositions.empty() &&
+ "missing unannotated tokens?");
+ auto [UnannotatedTokens, NumCachedToks] =
+ std::move(UnannotatedBacktrackPositions.back());
+ if (!UnannotatedBacktrackPositions.empty())
+ UnannotatedBacktrackPositions.back().first.append(
+ UnannotatedTokens.begin() + NumCachedToks, UnannotatedTokens.end());
+ UnannotatedBacktrackPositions.pop_back();
+ }
}
// Make Preprocessor re-lex the tokens that were lexed since
@@ -40,8 +54,20 @@ void Preprocessor::CommitBacktrackedTokens() {
void Preprocessor::Backtrack() {
assert(!BacktrackPositions.empty()
&& "EnableBacktrackAtThisPos was not called!");
- CachedLexPos = BacktrackPositions.back();
+ auto BacktrackPos = BacktrackPositions.back();
BacktrackPositions.pop_back();
+ CachedLexPos = BacktrackPos >> 1;
+ if (BacktrackPos & 1) {
+ assert(!UnannotatedBacktrackPositions.empty() &&
+ "missing unannotated tokens?");
+ auto [UnannotatedTokens, NumCachedToks] =
+ std::move(UnannotatedBacktrackPositions.back());
+ UnannotatedBacktrackPositions.pop_back();
+ if (!UnannotatedBacktrackPositions.empty())
+ UnannotatedBacktrackPositions.back().first.append(
+ UnannotatedTokens.begin() + NumCachedToks, UnannotatedTokens.end());
+ CachedTokens = std::move(UnannotatedTokens);
+ }
recomputeCurLexerKind();
}
@@ -67,6 +93,8 @@ void Preprocessor::CachingLex(Token &Result) {
EnterCachingLexModeUnchecked();
CachedTokens.push_back(Result);
++CachedLexPos;
+ if (isUnannotatedBacktrackEnabled())
+ UnannotatedBacktrackPositions.back().first.push_back(Result);
return;
}
@@ -108,6 +136,8 @@ const Token &Preprocessor::PeekAhead(unsigned N) {
for (size_t C = CachedLexPos + N - CachedTokens.size(); C > 0; --C) {
CachedTokens.push_back(Token());
Lex(CachedTokens.back());
+ if (isUnannotatedBacktrackEnabled())
+ UnannotatedBacktrackPositions.back().first.push_back(CachedTokens.back());
}
EnterCachingLexMode();
return CachedTokens.back();
@@ -124,7 +154,8 @@ void Preprocessor::AnnotatePreviousCachedTokens(const Token &Tok) {
for (CachedTokensTy::size_type i = CachedLexPos; i != 0; --i) {
CachedTokensTy::iterator AnnotBegin = CachedTokens.begin() + i-1;
if (AnnotBegin->getLocation() == Tok.getLocation()) {
- assert((BacktrackPositions.empty() || BacktrackPositions.back() <= i) &&
+ assert((BacktrackPositions.empty() ||
+ (BacktrackPositions.back() >> 1) <= i) &&
"The backtrack pos points inside the annotated tokens!");
// Replace the cached tokens with the single annotation token.
if (i < CachedLexPos)
diff --git a/clang/lib/Parse/ParseCXXInlineMethods.cpp b/clang/lib/Parse/ParseCXXInlineMethods.cpp
index 943ce0fdde3a3..86a89650e2583 100644
--- a/clang/lib/Parse/ParseCXXInlineMethods.cpp
+++ b/clang/lib/Parse/ParseCXXInlineMethods.cpp
@@ -1188,41 +1188,6 @@ bool Parser::ConsumeAndStoreConditional(CachedTokens &Toks) {
return true;
}
-/// A tentative parsing action that can also revert token annotations.
-class Parser::UnannotatedTentativeParsingAction : public TentativeParsingAction {
-public:
- explicit UnannotatedTentativeParsingAction(Parser &Self,
- tok::TokenKind EndKind)
- : TentativeParsingAction(Self), Self(Self), EndKind(EndKind) {
- // Stash away the old token stream, so we can restore it once the
- // tentative parse is complete.
- TentativeParsingAction Inner(Self);
- Self.ConsumeAndStoreUntil(EndKind, Toks, true, /*ConsumeFinalToken*/false);
- Inner.Revert();
- }
-
- void RevertAnnotations() {
- Revert();
-
- // Put back the original tokens.
- Self.SkipUntil(EndKind, StopAtSemi | StopBeforeMatch);
- if (Toks.size()) {
- auto Buffer = std::make_unique<Token[]>(Toks.size());
- std::copy(Toks.begin() + 1, Toks.end(), Buffer.get());
- Buffer[Toks.size() - 1] = Self.Tok;
- Self.PP.EnterTokenStream(std::move(Buffer), Toks.size(), true,
- /*IsReinject*/ true);
-
- Self.Tok = Toks.front();
- }
- }
-
-private:
- Parser &Self;
- CachedTokens Toks;
- tok::TokenKind EndKind;
-};
-
/// ConsumeAndStoreInitializer - Consume and store the token at the passed token
/// container until the end of the current initializer expression (either a
/// default argument or an in-class initializer for a non-static data member).
@@ -1260,9 +1225,7 @@ bool Parser::ConsumeAndStoreInitializer(CachedTokens &Toks,
// syntactically-valid init-declarator-list, then this comma ends
// the default initializer.
{
- UnannotatedTentativeParsingAction PA(*this,
- CIK == CIK_DefaultInitializer
- ? tok::semi : tok::r_paren);
+ TentativeParsingAction TPA(*this, /*Unannotated=*/true);
Sema::TentativeAnalysisScope Scope(Actions);
TPResult Result = TPResult::Error;
@@ -1290,7 +1253,7 @@ bool Parser::ConsumeAndStoreInitializer(CachedTokens &Toks,
// Put the token stream back and undo any annotations we performed
// after the comma. They may reflect a different parse than the one
// we will actually perform at the end of the class.
- PA.RevertAnnotations();
+ TPA.Revert();
// If what follows could be a declaration, it is a declaration.
if (Result != TPResult::False && Result != TPResult::Error)
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index c528917437332..cad40a120a7b4 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -6615,48 +6615,67 @@ void Parser::ParseDeclaratorInternal(Declarator &D,
(Tok.is(tok::identifier) &&
(NextToken().is(tok::coloncolon) || NextToken().is(tok::less))) ||
Tok.is(tok::annot_cxxscope))) {
+ TentativeParsingAction TPA(*this, /*Unannotated=*/true);
bool EnteringContext = D.getContext() == DeclaratorContext::File ||
D.getContext() == DeclaratorContext::Member;
+
CXXScopeSpec SS;
SS.setTemplateParamLists(D.getTemplateParameterLists());
- ParseOptionalCXXScopeSpecifier(SS, /*ObjectType=*/nullptr,
- /*ObjectHasErrors=*/false, EnteringContext);
- if (SS.isNotEmpty()) {
- if (Tok.isNot(tok::star)) {
- // The scope spec really belongs to the direct-declarator.
- if (D.mayHaveIdentifier())
- D.getCXXScopeSpec() = SS;
- else
- AnnotateScopeToken(SS, true);
+ if (ParseOptionalCXXScopeSpecifier(SS, /*ObjectType=*/nullptr,
+ /*ObjectHasErrors=*/false,
+ /*EnteringContext=*/false,
+ /*MayBePseudoDestructor=*/nullptr,
+ /*IsTypename=*/false, /*LastII=*/nullptr,
+ /*OnlyNamespace=*/false,
+ /*InUsingDeclaration=*/false,
+ /*Disambiguation=*/EnteringContext) ||
+
+ SS.isEmpty() || SS.isInvalid() || !EnteringContext ||
+ Tok.is(tok::star)) {
+ TPA.Commit();
+ if (SS.isNotEmpty() && Tok.is(tok::star)) {
+ if (SS.isValid()) {
+ checkCompoundToken(SS.getEndLoc(), tok::coloncolon,
+ CompoundToken::MemberPtr);
+ }
- if (DirectDeclParser)
- (this->*DirectDeclParser)(D);
+ SourceLocation StarLoc = ConsumeToken();
+ D.SetRangeEnd(StarLoc);
+ DeclSpec DS(AttrFactory);
+ ParseTypeQualifierListOpt(DS);
+ D.ExtendWithDeclSpec(DS);
+
+ // Recurse to parse whatever is left.
+ Actions.runWithSufficientStackSpace(D.getBeginLoc(), [&] {
+ ParseDeclaratorInternal(D, DirectDeclParser);
+ });
+
+ // Sema will have to catch (syntactically invalid) pointers into global
+ // scope. It has to catch pointers into namespace scope anyway.
+ D.AddTypeInfo(DeclaratorChunk::getMemberPointer(
+ SS, DS.getTypeQualifiers(), StarLoc, DS.getEndLoc()),
+ std::move(DS.getAttributes()),
+ /*EndLoc=*/SourceLocation());
return;
}
+ } else {
+ TPA.Revert();
+ SS.clear();
+ ParseOptionalCXXScopeSpecifier(SS, /*ObjectType=*/nullptr,
+ /*ObjectHasErrors=*/false,
+ /*EnteringContext=*/true);
+ }
- if (SS.isValid()) {
- checkCompoundToken(SS.getEndLoc(), tok::coloncolon,
- CompoundToken::MemberPtr);
- }
+ if (SS.isNotEmpty()) {
+ // The scope spec really belongs to the direct-declarator.
+ if (D.mayHaveIdentifier())
+ D.getCXXScopeSpec() = SS;
+ else
+ AnnotateScopeToken(SS, true);
- SourceLocation StarLoc = ConsumeToken();
- D.SetRangeEnd(StarLoc);
- DeclSpec DS(AttrFactory);
- ParseTypeQualifierListOpt(DS);
- D.ExtendWithDeclSpec(DS);
-
- // Recurse to parse whatever is left.
- Actions.runWithSufficientStackSpace(D.getBeginLoc(), [&] {
- ParseDeclaratorInternal(D, DirectDeclParser);
- });
-
- // Sema will have to catch (syntactically invalid) pointers into global
- // scope. It has to catch pointers into namespace scope anyway.
- D.AddTypeInfo(DeclaratorChunk::getMemberPointer(
- SS, DS.getTypeQualifiers(), StarLoc, DS.getEndLoc()),
- std::move(DS.getAttributes()),
- /* Don't replace range end. */ SourceLocation());
+ if (DirectDeclParser)
+ (this->*DirectDeclParser)(D);
return;
}
}
diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp
index 1d364f77a8146..c0eae73927cdd 100644
--- a/clang/lib/Parse/ParseExprCXX.cpp
+++ b/clang/lib/Parse/ParseExprCXX.cpp
@@ -159,8 +159,8 @@ void Parser::CheckForTemplateAndDigraph(Token &Next, ParsedType ObjectType,
bool Parser::ParseOptionalCXXScopeSpecifier(
CXXScopeSpec &SS, ParsedType ObjectType, bool ObjectHadErrors,
bool EnteringContext, bool *MayBePseudoDestructor, bool IsTypename,
- const IdentifierInfo **LastII, bool OnlyNamespace,
- bool InUsingDeclaration) {
+ const IdentifierInfo **LastII, bool OnlyNamespace, bool InUsingDeclaration,
+ bool Disambiguation) {
assert(getLangOpts().CPlusPlus &&
"Call sites of this function should be guarded by checking for C++");
@@ -528,13 +528,11 @@ bool Parser::ParseOptionalCXXScopeSpecifier(
UnqualifiedId TemplateName;
TemplateName.setIdentifier(&II, Tok.getLocation());
bool MemberOfUnknownSpecialization;
- if (TemplateNameKind TNK = Actions.isTemplateName(getCurScope(), SS,
- /*hasTemplateKeyword=*/false,
- TemplateName,
- ObjectType,
- EnteringContext,
- Template,
- MemberOfUnknownSpecialization)) {
+ if (TemplateNameKind TNK = Actions.isTemplateName(
+ getCurScope(), SS,
+ /*hasTemplateKeyword=*/false, TemplateName, ObjectType,
+ EnteringContext, Template, MemberOfUnknownSpecialization,
+ Disambiguation)) {
// If lookup didn't find anything, we treat the name as a template-name
// anyway. C++20 requires this, and in prior language modes it improves
// error recovery. But before we commit to this, check that we actually
@@ -557,7 +555,8 @@ bool Parser::ParseOptionalCXXScopeSpecifier(
continue;
}
- if (MemberOfUnknownSpecialization && (ObjectType || SS.isSet()) &&
+ if (MemberOfUnknownSpecialization && !Disambiguation &&
+ (ObjectType || SS.isSet()) &&
(IsTypename || isTemplateArgumentList(1) == TPResult::True)) {
// If we had errors before, ObjectType can be dependent even without any
// templates. Do not report missing template keyword in that case.
diff --git a/clang/test/CXX/dcl.decl/dcl.meaning/dcl.mptr/p2.cpp b/clang/test/CXX/dcl.decl/dcl.meaning/dcl.mptr/p2.cpp
new file mode 100644
index 0000000000000..a06b107755596
--- /dev/null
+++ b/clang/test/CXX/dcl.decl/dcl.meaning/dcl.mptr/p2.cpp
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+template<typename T>
+struct A0 {
+ struct B0;
+
+ template<typename U>
+ struct C0 {
+ struct D0;
+
+ template<typename V>
+ struct E0;
+ };
+};
+
+template<typename T>
+int A0<T>::B0::* f0();
+
+template<typename T>
+int A0<T>::B1::* f1();
+
+template<typename T>
+int A0<T>::C0<int>::* f2(); // expected-error {{expected unqualified-id}}
+
+template<typename T>
+int A0<T>::C1<int>::* f3(); // expected-error {{no member named 'C1' in 'A0<T>'}}
+ // expected-error@-1 {{expected ';' after top level declarator}}
+
+template<typename T>
+int A0<T>::template C2<int>::* f4();
+
+template<typename T>
+int A0<T>::template C0<int>::D0::* f5();
+
+template<typename T>
+int A0<T>::template C2<int>::D1::* f6();
+
+template<typename T>
+int A0<T>::template C0<int>::E0<int>::* f7(); // expected-error {{use 'template' keyword to treat 'E0' as a dependent template name}}
+ // expected-error@-1 {{expected unqualified-id}}
+
+template<typename T>
+int A0<T>::template C2<int>::E1<int>::* f8(); // expected-error {{no member named 'C2' in 'A0<T>'}}
+
+template<typename T>
+int A0<T>::template C0<int>::template E0<int>::* f9();
+
+template<typename T>
+int A0<T>::template C2<int>::template E1<int>::* f10();
+
+namespace TypoCorrection {
+ template<typename T>
+ struct A {
+ template<typename U>
+ struct Typo; // expected-note {{'Typo' declared here}}
+ };
+
+ template<typename T>
+ int A<T>::template typo<int>::* f();
+
+ template<typename T>
+ int A<T>::typo<int>::* g(); // expected-error {{no template named 'typo' in 'A<T>'; did you mean 'Typo'?}}
+ // expected-error@-1 {{expected unqualified-id}}
+}
diff --git a/clang/test/CXX/temp/temp.res/p3.cpp b/clang/test/CXX/temp/temp.res/p3.cpp
index 37ab93559e369..1eda967523a59 100644
--- a/clang/test/CXX/temp/temp.res/p3.cpp
+++ b/clang/test/CXX/temp/temp.res/p3.cpp
@@ -2,7 +2,7 @@
template<typename T> struct A {
template<typename U> struct B;
- template<typename U> using C = U; // expected-note {{here}}
+ template<typename U> using C = U;
};
struct X {
@@ -20,12 +20,10 @@ template<typename T> A<T>::C<T> f2(); // expected-warning {{miss...
[truncated]
|
I don't have any major objections with the approach taken here, except performance concerns and exploration of further alternatives perhaps. Would it be much larger / more complex change to keep the current UnannotatedTentativeParsingAction, but trap and release the diagnostics instead, until after we have disambiguated? |
@mizvekov My initial implementation used the existing |
96d66dd
to
cb58e0b
Compare
f4f5d9a
to
89bd4c4
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
b07d3f1
to
5ef2c0b
Compare
5ef2c0b
to
2603c38
Compare
I think this is ready to merge. I have a branch which fully implements annotated/unannotated preprocessor backtrack using RAII objects, but I think this is beyond the scope of this PR. |
Consider the following:
Although this is clearly valid, clang rejects it because the nested-name-specifier
A<T>::
is parsed as-if it was declarative, meaning, we parse it as-if it was the nested-name-specifier in a redeclaration/specialization. However, we don't (and can't) know whether the nested-name-specifier is declarative until we see the '*
' token, but at that point we have already complained thatA
has no member namedB
! This patch addresses this bug by adding support for fully unannotated and unbounded tentative parsing, which allows for us to parse past tokens without having to cache them until we reach a point where we can guarantee to be past the construct we are disambiguating.I don't know where the approach taken here is ideal -- alternatives are welcome. However, the performance impact (as measured by llvm-compile-time-tracker) is quite minimal (0.09%, which I plan to further improve).