From 57106e4a46648421de7b20c50483b21b58f48809 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Wed, 10 Apr 2024 16:44:10 -0700 Subject: [PATCH 1/3] Rename proc_macro::Literal tests from parse.rs to literal.rs This module contains tests not just of parse (FromStr) but also to_string (Display) for literals. --- tests/ui/proc-macro/auxiliary/api/{parse.rs => literal.rs} | 0 tests/ui/proc-macro/auxiliary/api/mod.rs | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename tests/ui/proc-macro/auxiliary/api/{parse.rs => literal.rs} (100%) diff --git a/tests/ui/proc-macro/auxiliary/api/parse.rs b/tests/ui/proc-macro/auxiliary/api/literal.rs similarity index 100% rename from tests/ui/proc-macro/auxiliary/api/parse.rs rename to tests/ui/proc-macro/auxiliary/api/literal.rs diff --git a/tests/ui/proc-macro/auxiliary/api/mod.rs b/tests/ui/proc-macro/auxiliary/api/mod.rs index 45ef6922d2834..e0a381cb6c1a7 100644 --- a/tests/ui/proc-macro/auxiliary/api/mod.rs +++ b/tests/ui/proc-macro/auxiliary/api/mod.rs @@ -10,7 +10,7 @@ extern crate proc_macro; mod cmp; -mod parse; +mod literal; use proc_macro::TokenStream; @@ -19,7 +19,7 @@ pub fn run(input: TokenStream) -> TokenStream { assert!(input.is_empty()); cmp::test(); - parse::test(); + literal::test(); TokenStream::new() } From 2cc02849059a7efb7fd5f7a726a31a38ee732c1e Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Wed, 10 Apr 2024 17:52:30 -0700 Subject: [PATCH 2/3] Add more Literal::to_string tests --- tests/ui/proc-macro/auxiliary/api/literal.rs | 47 +++++++++++++++----- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/tests/ui/proc-macro/auxiliary/api/literal.rs b/tests/ui/proc-macro/auxiliary/api/literal.rs index 801c616c80404..a3519f5791a81 100644 --- a/tests/ui/proc-macro/auxiliary/api/literal.rs +++ b/tests/ui/proc-macro/auxiliary/api/literal.rs @@ -19,17 +19,42 @@ fn test_display_literal() { "10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0", ); - assert_eq!(Literal::string("a \t ❤ ' \" \u{1}").to_string(), "\"a \\t ❤ ' \\\" \\u{1}\"",); - assert_eq!(Literal::c_string(c"\'\"\x7f\u{7fff}").to_string(), r#"c"\'\"\x7f\xe7\xbf\xbf""#); - assert_eq!(Literal::character('a').to_string(), "'a'"); - assert_eq!(Literal::character('\t').to_string(), "'\\t'"); - assert_eq!(Literal::character('❤').to_string(), "'❤'"); - assert_eq!(Literal::character('\'').to_string(), "'\\''"); - assert_eq!(Literal::character('"').to_string(), "'\"'"); - assert_eq!(Literal::character('\u{1}').to_string(), "'\\u{1}'"); - - assert_eq!(Literal::byte_character(b'a').to_string(), "b'a'"); - assert_eq!(Literal::byte_character(0).to_string(), "b'\\x00'"); + assert_eq!(Literal::string("aA").to_string(), r#" "aA" "#.trim()); + assert_eq!(Literal::string("\t").to_string(), r#" "\t" "#.trim()); + assert_eq!(Literal::string("❤").to_string(), r#" "❤" "#.trim()); + assert_eq!(Literal::string("'").to_string(), r#" "'" "#.trim()); + assert_eq!(Literal::string("\"").to_string(), r#" "\"" "#.trim()); + assert_eq!(Literal::string("\0").to_string(), r#" "\0" "#.trim()); + assert_eq!(Literal::string("\u{1}").to_string(), r#" "\u{1}" "#.trim()); + + assert_eq!(Literal::byte_string(b"aA").to_string(), r#" b"aA" "#.trim()); + assert_eq!(Literal::byte_string(b"\t").to_string(), r#" b"\t" "#.trim()); + assert_eq!(Literal::byte_string(b"'").to_string(), r#" b"\'" "#.trim()); + assert_eq!(Literal::byte_string(b"\"").to_string(), r#" b"\"" "#.trim()); + assert_eq!(Literal::byte_string(b"\0").to_string(), r#" b"\x00" "#.trim()); + assert_eq!(Literal::byte_string(b"\x01").to_string(), r#" b"\x01" "#.trim()); + + assert_eq!(Literal::c_string(c"aA").to_string(), r#" c"aA" "#.trim()); + assert_eq!(Literal::c_string(c"\t").to_string(), r#" c"\t" "#.trim()); + assert_eq!(Literal::c_string(c"❤").to_string(), r#" c"\xe2\x9d\xa4" "#.trim()); + assert_eq!(Literal::c_string(c"\'").to_string(), r#" c"\'" "#.trim()); + assert_eq!(Literal::c_string(c"\"").to_string(), r#" c"\"" "#.trim()); + assert_eq!(Literal::c_string(c"\x7f\xff\xfe\u{333}").to_string(), r#" c"\x7f\xff\xfe\xcc\xb3" "#.trim()); + + assert_eq!(Literal::character('a').to_string(), r#" 'a' "#.trim()); + assert_eq!(Literal::character('\t').to_string(), r#" '\t' "#.trim()); + assert_eq!(Literal::character('❤').to_string(), r#" '❤' "#.trim()); + assert_eq!(Literal::character('\'').to_string(), r#" '\'' "#.trim()); + assert_eq!(Literal::character('"').to_string(), r#" '"' "#.trim()); + assert_eq!(Literal::character('\0').to_string(), r#" '\0' "#.trim()); + assert_eq!(Literal::character('\u{1}').to_string(), r#" '\u{1}' "#.trim()); + + assert_eq!(Literal::byte_character(b'a').to_string(), r#" b'a' "#.trim()); + assert_eq!(Literal::byte_character(b'\t').to_string(), r#" b'\t' "#.trim()); + assert_eq!(Literal::byte_character(b'\'').to_string(), r#" b'\'' "#.trim()); + assert_eq!(Literal::byte_character(b'"').to_string(), r#" b'\"' "#.trim()); + assert_eq!(Literal::byte_character(0).to_string(), r#" b'\x00' "#.trim()); + assert_eq!(Literal::byte_character(1).to_string(), r#" b'\x01' "#.trim()); } fn test_parse_literal() { From 7ddc89e893ebb6c60af4fe92c439c4a60c9118dd Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Wed, 10 Apr 2024 20:55:59 -0700 Subject: [PATCH 3/3] Remove superfluous escaping from byte, byte str, and c str literals --- library/proc_macro/src/escape.rs | 57 ++++++++++++++++++++ library/proc_macro/src/lib.rs | 51 +++++++++++++----- tests/ui/proc-macro/auxiliary/api/literal.rs | 14 ++--- 3 files changed, 101 insertions(+), 21 deletions(-) create mode 100644 library/proc_macro/src/escape.rs diff --git a/library/proc_macro/src/escape.rs b/library/proc_macro/src/escape.rs new file mode 100644 index 0000000000000..87a4d1d50fd48 --- /dev/null +++ b/library/proc_macro/src/escape.rs @@ -0,0 +1,57 @@ +#[derive(Copy, Clone)] +pub(crate) struct EscapeOptions { + /// Produce \'. + pub escape_single_quote: bool, + /// Produce \". + pub escape_double_quote: bool, + /// Produce \x escapes for non-ASCII, and use \x rather than \u for ASCII + /// control characters. + pub escape_nonascii: bool, +} + +pub(crate) fn escape_bytes(bytes: &[u8], opt: EscapeOptions) -> String { + let mut repr = String::new(); + + if opt.escape_nonascii { + for &byte in bytes { + escape_single_byte(byte, opt, &mut repr); + } + } else { + let mut chunks = bytes.utf8_chunks(); + while let Some(chunk) = chunks.next() { + for ch in chunk.valid().chars() { + escape_single_char(ch, opt, &mut repr); + } + for &byte in chunk.invalid() { + escape_single_byte(byte, opt, &mut repr); + } + } + } + + repr +} + +fn escape_single_byte(byte: u8, opt: EscapeOptions, repr: &mut String) { + if byte == b'\0' { + repr.push_str("\\0"); + } else if (byte == b'\'' && !opt.escape_single_quote) + || (byte == b'"' && !opt.escape_double_quote) + { + repr.push(byte as char); + } else { + // Escapes \t, \r, \n, \\, \', \", and uses \x## for non-ASCII and + // for ASCII control characters. + repr.extend(byte.escape_ascii().map(char::from)); + } +} + +fn escape_single_char(ch: char, opt: EscapeOptions, repr: &mut String) { + if (ch == '\'' && !opt.escape_single_quote) || (ch == '"' && !opt.escape_double_quote) { + repr.push(ch); + } else { + // Escapes \0, \t, \r, \n, \\, \', \", and uses \u{...} for + // non-printable characters and for Grapheme_Extend characters, which + // includes things like U+0300 "Combining Grave Accent". + repr.extend(ch.escape_debug()); + } +} diff --git a/library/proc_macro/src/lib.rs b/library/proc_macro/src/lib.rs index 3d7d36b27e53b..581d7e3efe373 100644 --- a/library/proc_macro/src/lib.rs +++ b/library/proc_macro/src/lib.rs @@ -43,10 +43,12 @@ pub mod bridge; mod diagnostic; +mod escape; #[unstable(feature = "proc_macro_diagnostic", issue = "54140")] pub use diagnostic::{Diagnostic, Level, MultiSpan}; +use crate::escape::{escape_bytes, EscapeOptions}; use std::ffi::CStr; use std::ops::{Range, RangeBounds}; use std::path::PathBuf; @@ -1356,40 +1358,61 @@ impl Literal { /// String literal. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn string(string: &str) -> Literal { - let quoted = format!("{:?}", string); - assert!(quoted.starts_with('"') && quoted.ends_with('"')); - let symbol = "ed[1..quoted.len() - 1]; - Literal::new(bridge::LitKind::Str, symbol, None) + let escape = EscapeOptions { + escape_single_quote: false, + escape_double_quote: true, + escape_nonascii: false, + }; + let repr = escape_bytes(string.as_bytes(), escape); + Literal::new(bridge::LitKind::Str, &repr, None) } /// Character literal. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn character(ch: char) -> Literal { - let quoted = format!("{:?}", ch); - assert!(quoted.starts_with('\'') && quoted.ends_with('\'')); - let symbol = "ed[1..quoted.len() - 1]; - Literal::new(bridge::LitKind::Char, symbol, None) + let escape = EscapeOptions { + escape_single_quote: true, + escape_double_quote: false, + escape_nonascii: false, + }; + let repr = escape_bytes(ch.encode_utf8(&mut [0u8; 4]).as_bytes(), escape); + Literal::new(bridge::LitKind::Char, &repr, None) } /// Byte character literal. #[stable(feature = "proc_macro_byte_character", since = "1.79.0")] pub fn byte_character(byte: u8) -> Literal { - let string = [byte].escape_ascii().to_string(); - Literal::new(bridge::LitKind::Byte, &string, None) + let escape = EscapeOptions { + escape_single_quote: true, + escape_double_quote: false, + escape_nonascii: true, + }; + let repr = escape_bytes(&[byte], escape); + Literal::new(bridge::LitKind::Byte, &repr, None) } /// Byte string literal. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn byte_string(bytes: &[u8]) -> Literal { - let string = bytes.escape_ascii().to_string(); - Literal::new(bridge::LitKind::ByteStr, &string, None) + let escape = EscapeOptions { + escape_single_quote: false, + escape_double_quote: true, + escape_nonascii: true, + }; + let repr = escape_bytes(bytes, escape); + Literal::new(bridge::LitKind::ByteStr, &repr, None) } /// C string literal. #[stable(feature = "proc_macro_c_str_literals", since = "1.79.0")] pub fn c_string(string: &CStr) -> Literal { - let string = string.to_bytes().escape_ascii().to_string(); - Literal::new(bridge::LitKind::CStr, &string, None) + let escape = EscapeOptions { + escape_single_quote: false, + escape_double_quote: true, + escape_nonascii: false, + }; + let repr = escape_bytes(string.to_bytes(), escape); + Literal::new(bridge::LitKind::CStr, &repr, None) } /// Returns the span encompassing this literal. diff --git a/tests/ui/proc-macro/auxiliary/api/literal.rs b/tests/ui/proc-macro/auxiliary/api/literal.rs index a3519f5791a81..7109340bb645b 100644 --- a/tests/ui/proc-macro/auxiliary/api/literal.rs +++ b/tests/ui/proc-macro/auxiliary/api/literal.rs @@ -29,17 +29,17 @@ fn test_display_literal() { assert_eq!(Literal::byte_string(b"aA").to_string(), r#" b"aA" "#.trim()); assert_eq!(Literal::byte_string(b"\t").to_string(), r#" b"\t" "#.trim()); - assert_eq!(Literal::byte_string(b"'").to_string(), r#" b"\'" "#.trim()); + assert_eq!(Literal::byte_string(b"'").to_string(), r#" b"'" "#.trim()); assert_eq!(Literal::byte_string(b"\"").to_string(), r#" b"\"" "#.trim()); - assert_eq!(Literal::byte_string(b"\0").to_string(), r#" b"\x00" "#.trim()); + assert_eq!(Literal::byte_string(b"\0").to_string(), r#" b"\0" "#.trim()); assert_eq!(Literal::byte_string(b"\x01").to_string(), r#" b"\x01" "#.trim()); assert_eq!(Literal::c_string(c"aA").to_string(), r#" c"aA" "#.trim()); assert_eq!(Literal::c_string(c"\t").to_string(), r#" c"\t" "#.trim()); - assert_eq!(Literal::c_string(c"❤").to_string(), r#" c"\xe2\x9d\xa4" "#.trim()); - assert_eq!(Literal::c_string(c"\'").to_string(), r#" c"\'" "#.trim()); + assert_eq!(Literal::c_string(c"❤").to_string(), r#" c"❤" "#.trim()); + assert_eq!(Literal::c_string(c"\'").to_string(), r#" c"'" "#.trim()); assert_eq!(Literal::c_string(c"\"").to_string(), r#" c"\"" "#.trim()); - assert_eq!(Literal::c_string(c"\x7f\xff\xfe\u{333}").to_string(), r#" c"\x7f\xff\xfe\xcc\xb3" "#.trim()); + assert_eq!(Literal::c_string(c"\x7f\xff\xfe\u{333}").to_string(), r#" c"\u{7f}\xff\xfe\u{333}" "#.trim()); assert_eq!(Literal::character('a').to_string(), r#" 'a' "#.trim()); assert_eq!(Literal::character('\t').to_string(), r#" '\t' "#.trim()); @@ -52,8 +52,8 @@ fn test_display_literal() { assert_eq!(Literal::byte_character(b'a').to_string(), r#" b'a' "#.trim()); assert_eq!(Literal::byte_character(b'\t').to_string(), r#" b'\t' "#.trim()); assert_eq!(Literal::byte_character(b'\'').to_string(), r#" b'\'' "#.trim()); - assert_eq!(Literal::byte_character(b'"').to_string(), r#" b'\"' "#.trim()); - assert_eq!(Literal::byte_character(0).to_string(), r#" b'\x00' "#.trim()); + assert_eq!(Literal::byte_character(b'"').to_string(), r#" b'"' "#.trim()); + assert_eq!(Literal::byte_character(0).to_string(), r#" b'\0' "#.trim()); assert_eq!(Literal::byte_character(1).to_string(), r#" b'\x01' "#.trim()); }