Skip to content

refactor/feat: refactor identifier parsing a bit #109203

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 23, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion compiler/rustc_parse/messages.ftl
Original file line number Diff line number Diff line change
@@ -336,7 +336,7 @@ parse_expected_identifier_found_reserved_keyword = expected identifier, found re
parse_expected_identifier_found_doc_comment = expected identifier, found doc comment
parse_expected_identifier = expected identifier

parse_sugg_escape_to_use_as_identifier = escape `{$ident_name}` to use it as an identifier
parse_sugg_escape_identifier = escape `{$ident_name}` to use it as an identifier

parse_sugg_remove_comma = remove this comma

11 changes: 7 additions & 4 deletions compiler/rustc_parse/src/errors.rs
Original file line number Diff line number Diff line change
@@ -888,12 +888,12 @@ pub(crate) struct InvalidMetaItem {

#[derive(Subdiagnostic)]
#[suggestion(
parse_sugg_escape_to_use_as_identifier,
parse_sugg_escape_identifier,
style = "verbose",
applicability = "maybe-incorrect",
code = "r#"
)]
pub(crate) struct SuggEscapeToUseAsIdentifier {
pub(crate) struct SuggEscapeIdentifier {
#[primary_span]
pub span: Span,
pub ident_name: String,
@@ -937,7 +937,7 @@ impl ExpectedIdentifierFound {
pub(crate) struct ExpectedIdentifier {
pub span: Span,
pub token: Token,
pub suggest_raw: Option<SuggEscapeToUseAsIdentifier>,
pub suggest_raw: Option<SuggEscapeIdentifier>,
pub suggest_remove_comma: Option<SuggRemoveComma>,
pub help_cannot_start_number: Option<HelpIdentifierStartsWithNumber>,
}
@@ -986,7 +986,10 @@ impl<'a, G: EmissionGuarantee> IntoDiagnostic<'a, G> for ExpectedIdentifier {

#[derive(Subdiagnostic)]
#[help(parse_invalid_identifier_with_leading_number)]
pub(crate) struct HelpIdentifierStartsWithNumber;
pub(crate) struct HelpIdentifierStartsWithNumber {
#[primary_span]
pub num_span: Span,
}

pub(crate) struct ExpectedSemi {
pub span: Span,
123 changes: 89 additions & 34 deletions compiler/rustc_parse/src/parser/diagnostics.rs
Original file line number Diff line number Diff line change
@@ -6,14 +6,14 @@ use super::{
use crate::errors::{
AmbiguousPlus, AttributeOnParamType, BadQPathStage2, BadTypePlus, BadTypePlusSub,
ComparisonOperatorsCannotBeChained, ComparisonOperatorsCannotBeChainedSugg,
ConstGenericWithoutBraces, ConstGenericWithoutBracesSugg, DocCommentOnParamType,
DoubleColonInBound, ExpectedIdentifier, ExpectedSemi, ExpectedSemiSugg,
ConstGenericWithoutBraces, ConstGenericWithoutBracesSugg, DocCommentDoesNotDocumentAnything,
DocCommentOnParamType, DoubleColonInBound, ExpectedIdentifier, ExpectedSemi, ExpectedSemiSugg,
GenericParamsWithoutAngleBrackets, GenericParamsWithoutAngleBracketsSugg,
HelpIdentifierStartsWithNumber, InInTypo, IncorrectAwait, IncorrectSemicolon,
IncorrectUseOfAwait, ParenthesesInForHead, ParenthesesInForHeadSugg,
PatternMethodParamWithoutBody, QuestionMarkInType, QuestionMarkInTypeSugg, SelfParamNotFirst,
StructLiteralBodyWithoutPath, StructLiteralBodyWithoutPathSugg, StructLiteralNeedingParens,
StructLiteralNeedingParensSugg, SuggEscapeToUseAsIdentifier, SuggRemoveComma,
StructLiteralNeedingParensSugg, SuggEscapeIdentifier, SuggRemoveComma,
UnexpectedConstInGenericParam, UnexpectedConstParamDeclaration,
UnexpectedConstParamDeclarationSugg, UnmatchedAngleBrackets, UseEqInstead,
};
@@ -38,7 +38,7 @@ use rustc_errors::{
use rustc_session::errors::ExprParenthesesNeeded;
use rustc_span::source_map::Spanned;
use rustc_span::symbol::{kw, sym, Ident};
use rustc_span::{Span, SpanSnippetError, DUMMY_SP};
use rustc_span::{Span, SpanSnippetError, Symbol, DUMMY_SP};
use std::mem::take;
use std::ops::{Deref, DerefMut};
use thin_vec::{thin_vec, ThinVec};
@@ -268,7 +268,21 @@ impl<'a> Parser<'a> {
self.sess.source_map().span_to_snippet(span)
}

pub(super) fn expected_ident_found(&mut self) -> DiagnosticBuilder<'a, ErrorGuaranteed> {
/// Emits an error with suggestions if an identifier was expected but not found.
///
/// Returns a possibly recovered identifier.
pub(super) fn expected_ident_found(
&mut self,
recover: bool,
) -> PResult<'a, (Ident, /* is_raw */ bool)> {
if let TokenKind::DocComment(..) = self.prev_token.kind {
return Err(DocCommentDoesNotDocumentAnything {
span: self.prev_token.span,
missing_comma: None,
}
.into_diagnostic(&self.sess.span_diagnostic));
}

let valid_follow = &[
TokenKind::Eq,
TokenKind::Colon,
@@ -281,31 +295,51 @@ impl<'a> Parser<'a> {
TokenKind::CloseDelim(Delimiter::Parenthesis),
];

let suggest_raw = match self.token.ident() {
Some((ident, false))
if ident.is_raw_guess()
&& self.look_ahead(1, |t| valid_follow.contains(&t.kind)) =>
{
Some(SuggEscapeToUseAsIdentifier {
span: ident.span.shrink_to_lo(),
// `Symbol::to_string()` is different from `Symbol::into_diagnostic_arg()`,
// which uses `Symbol::to_ident_string()` and "helpfully" adds an implicit `r#`
ident_name: ident.name.to_string(),
})
}
_ => None,
};
let mut recovered_ident = None;
// we take this here so that the correct original token is retained in
// the diagnostic, regardless of eager recovery.
let bad_token = self.token.clone();

// suggest prepending a keyword in identifier position with `r#`
let suggest_raw = if let Some((ident, false)) = self.token.ident()
&& ident.is_raw_guess()
&& self.look_ahead(1, |t| valid_follow.contains(&t.kind))
{
recovered_ident = Some((ident, true));

// `Symbol::to_string()` is different from `Symbol::into_diagnostic_arg()`,
// which uses `Symbol::to_ident_string()` and "helpfully" adds an implicit `r#`
let ident_name = ident.name.to_string();

Some(SuggEscapeIdentifier {
span: ident.span.shrink_to_lo(),
ident_name
})
} else { None };

let suggest_remove_comma =
if self.token == token::Comma && self.look_ahead(1, |t| t.is_ident()) {
if recover {
self.bump();
recovered_ident = self.ident_or_err(false).ok();
};

let suggest_remove_comma = (self.token == token::Comma
&& self.look_ahead(1, |t| t.is_ident()))
.then_some(SuggRemoveComma { span: self.token.span });
Some(SuggRemoveComma { span: bad_token.span })
} else {
None
};

let help_cannot_start_number =
self.is_lit_bad_ident().then_some(HelpIdentifierStartsWithNumber);
let help_cannot_start_number = self.is_lit_bad_ident().map(|(len, valid_portion)| {
let (invalid, valid) = self.token.span.split_at(len as u32);

recovered_ident = Some((Ident::new(valid_portion, valid), false));

HelpIdentifierStartsWithNumber { num_span: invalid }
});

let err = ExpectedIdentifier {
span: self.token.span,
token: self.token.clone(),
span: bad_token.span,
token: bad_token,
suggest_raw,
suggest_remove_comma,
help_cannot_start_number,
@@ -314,6 +348,7 @@ impl<'a> Parser<'a> {

// if the token we have is a `<`
// it *might* be a misplaced generic
// FIXME: could we recover with this?
if self.token == token::Lt {
// all keywords that could have generic applied
let valid_prev_keywords =
@@ -364,18 +399,38 @@ impl<'a> Parser<'a> {
}
}

err
if let Some(recovered_ident) = recovered_ident && recover {
err.emit();
Ok(recovered_ident)
} else {
Err(err)
}
}

pub(super) fn expected_ident_found_err(&mut self) -> DiagnosticBuilder<'a, ErrorGuaranteed> {
self.expected_ident_found(false).unwrap_err()
}

/// Checks if the current token is a integer or float literal and looks like
/// it could be a invalid identifier with digits at the start.
pub(super) fn is_lit_bad_ident(&mut self) -> bool {
matches!(self.token.uninterpolate().kind, token::Literal(Lit { kind: token::LitKind::Integer | token::LitKind::Float, .. })
// ensure that the integer literal is followed by a *invalid*
// suffix: this is how we know that it is a identifier with an
// invalid beginning.
if rustc_ast::MetaItemLit::from_token(&self.token).is_none()
)
///
/// Returns the number of characters (bytes) composing the invalid portion
/// of the identifier and the valid portion of the identifier.
pub(super) fn is_lit_bad_ident(&mut self) -> Option<(usize, Symbol)> {
// ensure that the integer literal is followed by a *invalid*
// suffix: this is how we know that it is a identifier with an
// invalid beginning.
if let token::Literal(Lit {
kind: token::LitKind::Integer | token::LitKind::Float,
symbol,
suffix,
}) = self.token.kind
&& rustc_ast::MetaItemLit::from_token(&self.token).is_none()
{
Some((symbol.as_str().len(), suffix.unwrap()))
Comment on lines +426 to +430
Copy link
Member

@compiler-errors compiler-errors Apr 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

re: #110014

-             suffix,
+             suffix: Some(suffix),
         }) = self.token.kind
             && rustc_ast::MetaItemLit::from_token(&self.token).is_none()
         {
-            Some((symbol.as_str().len(), suffix.unwrap()))
+            Some((symbol.as_str().len(), suffix))

Copy link
Contributor Author

@Ezrashaw Ezrashaw Apr 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you like me to PR this? Done.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would be nice if you could put up the fix, yes :)

} else {
None
}
}

pub(super) fn expected_one_of_not_found(
10 changes: 5 additions & 5 deletions compiler/rustc_parse/src/parser/item.rs
Original file line number Diff line number Diff line change
@@ -1181,7 +1181,7 @@ impl<'a> Parser<'a> {
defaultness: Defaultness,
) -> PResult<'a, ItemInfo> {
let impl_span = self.token.span;
let mut err = self.expected_ident_found();
let mut err = self.expected_ident_found_err();

// Only try to recover if this is implementing a trait for a type
let mut impl_info = match self.parse_item_impl(attrs, defaultness) {
@@ -1744,7 +1744,7 @@ impl<'a> Parser<'a> {
/// Parses a field identifier. Specialized version of `parse_ident_common`
/// for better diagnostics and suggestions.
fn parse_field_ident(&mut self, adt_ty: &str, lo: Span) -> PResult<'a, Ident> {
let (ident, is_raw) = self.ident_or_err()?;
let (ident, is_raw) = self.ident_or_err(true)?;
if !is_raw && ident.is_reserved() {
let snapshot = self.create_snapshot_for_diagnostic();
let err = if self.check_fn_front_matter(false, Case::Sensitive) {
@@ -1776,7 +1776,7 @@ impl<'a> Parser<'a> {
Err(err) => {
err.cancel();
self.restore_snapshot(snapshot);
self.expected_ident_found()
self.expected_ident_found_err()
}
}
} else if self.eat_keyword(kw::Struct) {
@@ -1792,11 +1792,11 @@ impl<'a> Parser<'a> {
Err(err) => {
err.cancel();
self.restore_snapshot(snapshot);
self.expected_ident_found()
self.expected_ident_found_err()
}
}
} else {
let mut err = self.expected_ident_found();
let mut err = self.expected_ident_found_err();
if self.eat_keyword_noexpect(kw::Let)
&& let removal_span = self.prev_token.span.until(self.token.span)
&& let Ok(ident) = self.parse_ident_common(false)
34 changes: 19 additions & 15 deletions compiler/rustc_parse/src/parser/mod.rs
Original file line number Diff line number Diff line change
@@ -42,8 +42,7 @@ use thin_vec::ThinVec;
use tracing::debug;

use crate::errors::{
DocCommentDoesNotDocumentAnything, IncorrectVisibilityRestriction, MismatchedClosingDelimiter,
NonStringAbiLiteral,
IncorrectVisibilityRestriction, MismatchedClosingDelimiter, NonStringAbiLiteral,
};

bitflags::bitflags! {
@@ -552,21 +551,11 @@ impl<'a> Parser<'a> {
self.parse_ident_common(true)
}

fn ident_or_err(&mut self) -> PResult<'a, (Ident, /* is_raw */ bool)> {
self.token.ident().ok_or_else(|| match self.prev_token.kind {
TokenKind::DocComment(..) => DocCommentDoesNotDocumentAnything {
span: self.prev_token.span,
missing_comma: None,
}
.into_diagnostic(&self.sess.span_diagnostic),
_ => self.expected_ident_found(),
})
}

fn parse_ident_common(&mut self, recover: bool) -> PResult<'a, Ident> {
let (ident, is_raw) = self.ident_or_err()?;
let (ident, is_raw) = self.ident_or_err(recover)?;

if !is_raw && ident.is_reserved() {
let mut err = self.expected_ident_found();
let mut err = self.expected_ident_found_err();
if recover {
err.emit();
} else {
@@ -577,6 +566,21 @@ impl<'a> Parser<'a> {
Ok(ident)
}

fn ident_or_err(&mut self, recover: bool) -> PResult<'a, (Ident, /* is_raw */ bool)> {
let result = self.token.ident().ok_or_else(|| self.expected_ident_found(recover));

let (ident, is_raw) = match result {
Ok(ident) => ident,
Err(err) => match err {
// we recovered!
Ok(ident) => ident,
Err(err) => return Err(err),
},
};

Ok((ident, is_raw))
}

/// Checks if the next token is `tok`, and returns `true` if so.
///
/// This method will automatically add `tok` to `expected_tokens` if `tok` is not
14 changes: 8 additions & 6 deletions compiler/rustc_parse/src/parser/pat.rs
Original file line number Diff line number Diff line change
@@ -348,10 +348,6 @@ impl<'a> Parser<'a> {
lo = self.token.span;
}

if self.is_lit_bad_ident() {
return Err(self.expected_ident_found());
}

let pat = if self.check(&token::BinOp(token::And)) || self.token.kind == token::AndAnd {
self.parse_pat_deref(expected)?
} else if self.check(&token::OpenDelim(Delimiter::Parenthesis)) {
@@ -395,7 +391,13 @@ impl<'a> Parser<'a> {
} else {
PatKind::Lit(const_expr)
}
} else if self.can_be_ident_pat() {
// Don't eagerly error on semantically invalid tokens when matching
// declarative macros, as the input to those doesn't have to be
// semantically valid. For attribute/derive proc macros this is not the
// case, so doing the recovery for them is fine.
} else if self.can_be_ident_pat()
|| (self.is_lit_bad_ident().is_some() && self.may_recover())
{
// Parse `ident @ pat`
// This can give false positives and parse nullary enums,
// they are dealt with later in resolve.
@@ -594,7 +596,7 @@ impl<'a> Parser<'a> {
// Make sure we don't allow e.g. `let mut $p;` where `$p:pat`.
if let token::Interpolated(nt) = &self.token.kind {
if let token::NtPat(_) = **nt {
self.expected_ident_found().emit();
self.expected_ident_found_err().emit();
}
}

12 changes: 12 additions & 0 deletions compiler/rustc_span/src/lib.rs
Original file line number Diff line number Diff line change
@@ -795,6 +795,18 @@ impl Span {
})
}

/// Splits a span into two composite spans around a certain position.
pub fn split_at(self, pos: u32) -> (Span, Span) {
let len = self.hi().0 - self.lo().0;
debug_assert!(pos <= len);

let split_pos = BytePos(self.lo().0 + pos);
(
Span::new(self.lo(), split_pos, self.ctxt(), self.parent()),
Span::new(split_pos, self.hi(), self.ctxt(), self.parent()),
)
}

/// Returns a `Span` that would enclose both `self` and `end`.
///
/// Note that this can also be used to extend the span "backwards":
16 changes: 16 additions & 0 deletions tests/ui/parser/ident-recovery.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
fn ,comma() {
//~^ ERROR expected identifier, found `,`
struct Foo {
x: i32,,
//~^ ERROR expected identifier, found `,`
y: u32,
}
}

fn break() {
//~^ ERROR expected identifier, found keyword `break`
let continue = 5;
//~^ ERROR expected identifier, found keyword `continue`
}

fn main() {}
Loading