Add support in lexer for utf8 identifiers. No NFKC logic in char yet.

This commit is contained in:
Graydon Hoare 2011-12-29 14:45:18 -08:00
parent 5fd0a3be0c
commit 36c55b20a8
4 changed files with 48 additions and 5 deletions

View file

@ -595,9 +595,10 @@ otherwise defined as keywords or reserved
tokens. @xref{Ref.Lex.Key}. @xref{Ref.Lex.Res}.
That is: an identifier starts with any character having derived property
@code{XID_Start} and continues with zero or more characters having derived
property @code{XID_Continue}; and such an identifier is NFKC-normalized during
lexing, such that all subsequent comparison of identifiers is performed on the
@code{XID_Start}, or the character U+005F (underscore, @code{_}), and
continues with zero or more characters having derived property
@code{XID_Continue}. An identifier is NFKC-normalized during lexing, such
that all subsequent comparison of identifiers is performed on the
NFKC-normalized forms.
@emph{TODO: define relationship between Unicode and Rust versions}.

View file

@ -309,14 +309,16 @@ fn next_token(rdr: reader) -> {tok: token::token, chpos: uint, bpos: uint} {
fn next_token_inner(rdr: reader) -> token::token {
let accum_str = "";
let c = rdr.curr();
if is_alpha(c) || c == '_' {
while is_alnum(c) || c == '_' {
if char::is_XID_start(c) || c == '_' {
while char::is_XID_continue(c) {
str::push_char(accum_str, c);
rdr.bump();
c = rdr.curr();
}
if str::eq(accum_str, "_") { ret token::UNDERSCORE; }
let is_mod_name = c == ':' && rdr.next() == ':';
// FIXME: perform NFKC normalization here.
ret token::IDENT(interner::intern::<str>(*rdr.get_interner(),
accum_str), is_mod_name);
}

View file

@ -37,6 +37,12 @@
Cn Unassigned a reserved unassigned code point or a noncharacter
*/
export is_alphabetic,
is_XID_start, is_XID_continue,
is_lowercase, is_uppercase,
is_whitespace, is_alphanumeric,
to_digit, maybe_digit, cmp;
import is_alphabetic = unicode::derived_property::Alphabetic;
import is_XID_start = unicode::derived_property::XID_Start;
import is_XID_continue = unicode::derived_property::XID_Continue;

View file

@ -0,0 +1,34 @@
fn main() {
let Π = 3.14;
let = Π * Π + 1.54;
assert - 1.54 == Π * Π;
assert _გემრიელი_სადილი() == 0;
}
fn _გემრიელი_სადილი() -> int {
// Lunch in several languages.
let = 10;
let = 10;
let ארוחת_צהריי = 10;
let غداء = 10;
let լանչ = 10;
let обед = 10;
let абед = 10;
let μεσημεριανό = 10;
let hádegismatur = 10;
let ручек = 10;
let ăn_trưa = 10;
let = 10;
// Lunchy arithmetic, mm.
assert hádegismatur * ручек * обед == 1000;
assert 10 == ארוחת_צהריי;
assert + + μεσημεριανό == 30;
assert ăn_trưa + == 20;
ret (абед + լանչ) >> غداء;
}