reimplement C string literals

This commit is contained in:
Deadbeef 2023-07-16 18:59:05 +00:00
parent cec34a43b1
commit df9bd80d74
10 changed files with 48 additions and 89 deletions

View file

@ -24,6 +24,10 @@ pub fn new(input: &'a str) -> Cursor<'a> {
}
}
pub fn as_str(&self) -> &'a str {
self.chars.as_str()
}
/// Returns the last eaten symbol (or `'\0'` in release builds).
/// (For debug assertions only.)
pub(crate) fn prev(&self) -> char {

View file

@ -367,6 +367,13 @@ pub fn advance_token(&mut self) -> Token {
Some(|terminated| Byte { terminated }),
),
// c-string literal, raw c-string literal or identifier.
'c' => self.c_or_byte_string(
|terminated| CStr { terminated },
|n_hashes| RawCStr { n_hashes },
None,
),
// Identifier (this should be checked after other variant that can
// start as identifier).
c if is_id_start(c) => self.ident_or_unknown_prefix(),

View file

@ -9,8 +9,8 @@
use rustc_ast::util::unicode::contains_text_flow_control_chars;
use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey};
use rustc_lexer::unescape::{self, EscapeError, Mode};
use rustc_lexer::Cursor;
use rustc_lexer::{Base, DocStyle, RawStrError};
use rustc_lexer::{Cursor, LiteralKind};
use rustc_session::lint::builtin::{
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
};
@ -118,6 +118,7 @@ fn next_token(&mut self) -> (Token, bool) {
let mut swallow_next_invalid = 0;
// Skip trivial (whitespace & comments) tokens
loop {
let str_before = self.cursor.as_str();
let token = self.cursor.advance_token();
let start = self.pos;
self.pos = self.pos + BytePos(token.len);
@ -203,6 +204,29 @@ fn next_token(&mut self) -> (Token, bool) {
.push(span);
token::Ident(sym, false)
}
// split up (raw) c string literals to an ident and a string literal when edition < 2021.
rustc_lexer::TokenKind::Literal {
kind: kind @ (LiteralKind::CStr { .. } | LiteralKind::RawCStr { .. }),
suffix_start: _,
} if !self.mk_sp(start, self.pos).edition().at_least_rust_2021() => {
let prefix_len = match kind {
LiteralKind::CStr { .. } => 1,
LiteralKind::RawCStr { .. } => 2,
_ => unreachable!(),
};
// reset the state so that only the prefix ("c" or "cr")
// was consumed.
let lit_start = start + BytePos(prefix_len);
self.pos = lit_start;
self.cursor = Cursor::new(&str_before[prefix_len as usize..]);
self.report_unknown_prefix(start);
let sym = nfc_normalize(self.str_from(start));
let prefix_span = self.mk_sp(start, lit_start);
self.sess.symbol_gallery.insert(sym, prefix_span);
return (Token::new(token::Ident(sym, false), prefix_span), preceded_by_whitespace);
}
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
let suffix_start = start + BytePos(suffix_start);
let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);

View file

@ -1,5 +1,4 @@
// FIXME(c_str_literals): This should be `run-pass`
// known-bug: #113333
// run-pass
// edition: 2021
#![feature(c_str_literals)]

View file

@ -1,25 +0,0 @@
error: prefix `c` is unknown
--> $DIR/basic.rs:8:27
|
LL | assert_eq!(b"test\0", c"test".to_bytes_with_nul());
| ^ unknown prefix
|
= note: prefixed identifiers and literals are reserved since Rust 2021
help: consider inserting whitespace here
|
LL | assert_eq!(b"test\0", c "test".to_bytes_with_nul());
| +
error: no rules expected the token `"test"`
--> $DIR/basic.rs:8:28
|
LL | assert_eq!(b"test\0", c"test".to_bytes_with_nul());
| -^^^^^
| |
| no rules expected this token in macro call
| help: missing comma here
|
= note: while trying to match sequence start
error: aborting due to 2 previous errors

View file

@ -1,32 +1,21 @@
error: prefix `c` is unknown
error[E0658]: `c".."` literals are experimental
--> $DIR/gate.rs:10:5
|
LL | c"foo";
| ^ unknown prefix
| ^^^^^^
|
= note: prefixed identifiers and literals are reserved since Rust 2021
help: consider inserting whitespace here
|
LL | c "foo";
| +
= note: see issue #105723 <https://github.com/rust-lang/rust/issues/105723> for more information
= help: add `#![feature(c_str_literals)]` to the crate attributes to enable
error: prefix `c` is unknown
error[E0658]: `c".."` literals are experimental
--> $DIR/gate.rs:13:8
|
LL | m!(c"test");
| ^ unknown prefix
| ^^^^^^^
|
= note: prefixed identifiers and literals are reserved since Rust 2021
help: consider inserting whitespace here
|
LL | m!(c "test");
| +
= note: see issue #105723 <https://github.com/rust-lang/rust/issues/105723> for more information
= help: add `#![feature(c_str_literals)]` to the crate attributes to enable
error: expected one of `!`, `.`, `::`, `;`, `?`, `{`, `}`, or an operator, found `"foo"`
--> $DIR/gate.rs:10:6
|
LL | c"foo";
| ^^^^^ expected one of 8 possible tokens
error: aborting due to 3 previous errors
error: aborting due to 2 previous errors
For more information about this error, try `rustc --explain E0658`.

View file

@ -1,5 +1,4 @@
// FIXME(c_str_literals): This should be `run-pass`
// known-bug: #113333
// run-pass
// edition: 2021
#![feature(c_str_literals)]

View file

@ -1,38 +0,0 @@
error: prefix `c` is unknown
--> $DIR/non-ascii.rs:9:9
|
LL | c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(),
| ^ unknown prefix
|
= note: prefixed identifiers and literals are reserved since Rust 2021
help: consider inserting whitespace here
|
LL | c "\xEF\x80🦀\u{1F980}".to_bytes_with_nul(),
| +
error: out of range hex escape
--> $DIR/non-ascii.rs:9:11
|
LL | c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(),
| ^^^^ must be a character in the range [\x00-\x7f]
error: out of range hex escape
--> $DIR/non-ascii.rs:9:15
|
LL | c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(),
| ^^^^ must be a character in the range [\x00-\x7f]
error: no rules expected the token `"\xEF\x80🦀\u{1F980}"`
--> $DIR/non-ascii.rs:9:10
|
LL | c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(),
| -^^^^^^^^^^^^^^^^^^^^
| |
| no rules expected this token in macro call
| help: missing comma here
|
note: while trying to match `,`
--> $SRC_DIR/core/src/macros/mod.rs:LL:COL
error: aborting due to 4 previous errors