mirror of
https://github.com/rust-lang/rust
synced 2024-11-05 20:45:15 +00:00
Add support in lexer for utf8 identifiers. No NFKC logic in char yet.
This commit is contained in:
parent
5fd0a3be0c
commit
36c55b20a8
4 changed files with 48 additions and 5 deletions
|
@ -595,9 +595,10 @@ otherwise defined as keywords or reserved
|
|||
tokens. @xref{Ref.Lex.Key}. @xref{Ref.Lex.Res}.
|
||||
|
||||
That is: an identifier starts with any character having derived property
|
||||
@code{XID_Start} and continues with zero or more characters having derived
|
||||
property @code{XID_Continue}; and such an identifier is NFKC-normalized during
|
||||
lexing, such that all subsequent comparison of identifiers is performed on the
|
||||
@code{XID_Start}, or the character U+005F (underscore, @code{_}), and
|
||||
continues with zero or more characters having derived property
|
||||
@code{XID_Continue}. An identifier is NFKC-normalized during lexing, such
|
||||
that all subsequent comparison of identifiers is performed on the
|
||||
NFKC-normalized forms.
|
||||
|
||||
@emph{TODO: define relationship between Unicode and Rust versions}.
|
||||
|
|
|
@ -309,14 +309,16 @@ fn next_token(rdr: reader) -> {tok: token::token, chpos: uint, bpos: uint} {
|
|||
fn next_token_inner(rdr: reader) -> token::token {
|
||||
let accum_str = "";
|
||||
let c = rdr.curr();
|
||||
if is_alpha(c) || c == '_' {
|
||||
while is_alnum(c) || c == '_' {
|
||||
if char::is_XID_start(c) || c == '_' {
|
||||
while char::is_XID_continue(c) {
|
||||
str::push_char(accum_str, c);
|
||||
rdr.bump();
|
||||
c = rdr.curr();
|
||||
}
|
||||
if str::eq(accum_str, "_") { ret token::UNDERSCORE; }
|
||||
let is_mod_name = c == ':' && rdr.next() == ':';
|
||||
|
||||
// FIXME: perform NFKC normalization here.
|
||||
ret token::IDENT(interner::intern::<str>(*rdr.get_interner(),
|
||||
accum_str), is_mod_name);
|
||||
}
|
||||
|
|
|
@ -37,6 +37,12 @@
|
|||
Cn Unassigned a reserved unassigned code point or a noncharacter
|
||||
*/
|
||||
|
||||
export is_alphabetic,
|
||||
is_XID_start, is_XID_continue,
|
||||
is_lowercase, is_uppercase,
|
||||
is_whitespace, is_alphanumeric,
|
||||
to_digit, maybe_digit, cmp;
|
||||
|
||||
import is_alphabetic = unicode::derived_property::Alphabetic;
|
||||
import is_XID_start = unicode::derived_property::XID_Start;
|
||||
import is_XID_continue = unicode::derived_property::XID_Continue;
|
||||
|
|
34
src/test/run-pass/utf8_idents.rs
Normal file
34
src/test/run-pass/utf8_idents.rs
Normal file
|
@ -0,0 +1,34 @@
|
|||
fn main() {
|
||||
let Π = 3.14;
|
||||
let लंच = Π * Π + 1.54;
|
||||
assert लंच - 1.54 == Π * Π;
|
||||
assert საჭმელად_გემრიელი_სადილი() == 0;
|
||||
}
|
||||
|
||||
fn საჭმელად_გემრიელი_სადილი() -> int {
|
||||
|
||||
// Lunch in several languages.
|
||||
|
||||
let ランチ = 10;
|
||||
let 午餐 = 10;
|
||||
|
||||
let ארוחת_צהריי = 10;
|
||||
let غداء = 10;
|
||||
let լանչ = 10;
|
||||
let обед = 10;
|
||||
let абед = 10;
|
||||
let μεσημεριανό = 10;
|
||||
let hádegismatur = 10;
|
||||
let ручек = 10;
|
||||
|
||||
let ăn_trưa = 10;
|
||||
let อาหารกลางวัน = 10;
|
||||
|
||||
// Lunchy arithmetic, mm.
|
||||
|
||||
assert hádegismatur * ручек * обед == 1000;
|
||||
assert 10 == ארוחת_צהריי;
|
||||
assert ランチ + 午餐 + μεσημεριανό == 30;
|
||||
assert ăn_trưa + อาหารกลางวัน == 20;
|
||||
ret (абед + լանչ) >> غداء;
|
||||
}
|
Loading…
Reference in a new issue