// Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. library peg_tests; import 'dart:core' hide Symbol; import '../../peg/pegparser.dart'; testParens() { Grammar g = new Grammar(); Symbol a = g['A']; a.def = ['(', MANY(a, min: 0), ')', (a) => a]; check(g, a, "", null); check(g, a, "()", '[]'); check(g, a, "(()())", '[[],[]]'); check(g, a, "(()((()))())", '[[],[[[]]],[]]'); } testBlockComment() { // Block comment in whitespace. Grammar g = new Grammar(); Symbol blockComment = g['blockComment']; blockComment.def = [ '/*', MANY( OR([ blockComment, [NOT('*/'), CHAR()], [END, ERROR('EOF in block comment')] ]), min: 0), '*/' ]; print(blockComment); var a = MANY(TEXT('x')); g.whitespace = OR([g.whitespace, blockComment]); check(g, a, "x /**/ x", '[x,x]'); check(g, a, "x /*/**/*/ x", '[x,x]'); check(g, a, "x /*/***/ x", 'EOF in block comment'); check(g, a, "x /*/*/x**/**/ x", '[x,x]'); check( g, a, r""" /* Comment */ /* Following comment with /* nested comment*/ */ x /* x in comment */ x /* outside comment */ """, '[x,x]'); } testTEXT() { Grammar g = new Grammar(); // TEXT grabs the parsed text, check(g, TEXT(LEX(MANY(OR(['1', 'a'])))), ' 1a1 ', '1a1'); // Without the lexical context, TEXT will grab intervening whitespace. check(g, TEXT(MANY(OR(['1', 'a']))), ' 1a1 ', '1a1'); check(g, TEXT(MANY(OR(['1', 'a']))), ' 1 a 1 ', '1 a 1'); // Custom processing of the TEXT substring. var binaryNumber = TEXT(LEX(MANY(OR(['0', '1']))), (str, start, end) { var r = 0; var zero = '0'.codeUnitAt(0); for (int i = start; i < end; i++) r = r * 2 + (str.codeUnitAt(i) - zero); return r; }); check(g, binaryNumber, ' 10101 ', 21); check(g, binaryNumber, '1010111', 87); check(g, binaryNumber, '1010 111', null); } testOR() { // OR matches the first match. Grammar g = new Grammar(); check( g, OR([ ['a', NOT(END), () => 1], ['a', () => 2], ['a', () => 3] ]), 'a', 2); } testCODE() { Grammar g = new Grammar(); var a = TEXT(LEX('thing', MANY(CHAR('bcd')))); check(g, a, 'bbb', 'bbb'); check(g, a, 'ccc', 'ccc'); check(g, a, 'ddd', 'ddd'); check(g, a, 'bad', null); // a is outside range. check(g, a, 'bed', null); // e is outside range. } testC() { // Curried tree builders. binary(operation) => (second) => (first) => [operation, first, second]; unary(operation) => () => (first) => [operation, first]; reform(a, fns) { var r = a; for (var fn in fns) r = fn(r); return r; } Grammar g = new Grammar(); Symbol expression = g['expression']; Symbol postfix_e = g['postfix_e']; Symbol unary_e = g['unary_e']; Symbol cast_e = g['cast_e']; Symbol mult_e = g['mult_e']; Symbol add_e = g['add_e']; Symbol shift_e = g['shift_e']; Symbol relational_e = g['relational_e']; Symbol equality_e = g['equality_e']; Symbol cond_e = g['cond_e']; Symbol assignment_e = g['assignment_e']; // Lexical elements. var idStartChar = CHAR(r"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); var idNextChar = CHAR(r"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789$_"); var id = TEXT(LEX('identifier', [idStartChar, MANY(idNextChar, min: 0)])); var lit = TEXT(LEX('literal', MANY(CHAR('0123456789')))); var type_name = id; // Expression grammar. var primary_e = OR([ id, lit, ['(', expression, ')', (e) => e] ]); var postfixes = OR([ ['(', MANY(assignment_e, separator: ',', min: 0), ')', binary('apply')], ['++', unary('postinc')], ['--', unary('postdec')], ['.', id, binary('field')], ['->', id, binary('ptr')], ]); postfix_e.def = [primary_e, MANY(postfixes, min: 0), reform]; var unary_op = OR([ ['&', () => 'address'], ['*', () => 'indir'], ['!', () => 'not'], ['~', () => 'not'], ['-', () => 'negate'], ['+', () => 'uplus'], ]); var sizeof = LEX('sizeof', ['sizeof', NOT(idNextChar)]); Symbol unary_e_plain = g['unary_e_plain']; unary_e_plain.def = OR([ [ '++', unary_e, (e) => ['preinc', e] // ], [ '--', unary_e, (e) => ['predec', e] // ], [ unary_op, cast_e, (o, e) => [o, e] // ], [ sizeof, unary_e, (e) => ['sizeof-expr', e] // ], [ sizeof, '(', type_name, ')', (t) => ['sizeof-type', t] // ], postfix_e ]); unary_e.def = MEMO(unary_e_plain); //unary_e.def = unary_e_plain; cast_e.def = OR([ [ '(', type_name, ')', cast_e, (t, e) => ['cast', t, e] // ], unary_e, ]); var mult_ops = OR([ ['*', cast_e, binary('mult')], ['/', cast_e, binary('div')], ['%', cast_e, binary('rem')], ]); mult_e.def = [cast_e, MANY(mult_ops, min: 0), reform]; var add_ops = OR([ ['+', mult_e, binary('add')], ['-', mult_e, binary('sub')], ]); add_e.def = [mult_e, MANY(add_ops, min: 0), reform]; var shift_ops = OR([ ['>>', add_e, binary('shl')], ['<<', add_e, binary('shr')], ]); shift_e.def = [add_e, MANY(shift_ops, min: 0), reform]; var relational_ops = OR([ ['<=', shift_e, binary('le')], ['>=', shift_e, binary('ge')], ['<', shift_e, binary('lt')], ['>', shift_e, binary('gt')], ]); relational_e.def = [shift_e, MANY(relational_ops, min: 0), reform]; var equality_ops = OR([ ['==', shift_e, binary('eq')], ['!=', shift_e, binary('ne')], ]); equality_e.def = [relational_e, MANY(equality_ops, min: 0), reform]; var bit_and_op = LEX('&', ['&', NOT('&')]); // Don't see '&&' and '&', '&' var bit_or_op = LEX('|', ['|', NOT('|')]); var and_e = [ equality_e, MANY([bit_and_op, equality_e, binary('bitand')], min: 0), reform ]; var xor_e = [ and_e, MANY(['^', and_e, binary('bitxor')], min: 0), reform ]; var or_e = [ xor_e, MANY([bit_or_op, xor_e, binary('bitor')], min: 0), reform ]; var log_and_e = [ or_e, MANY(['&&', or_e, binary('and')], min: 0), reform ]; var log_or_e = [ log_and_e, MANY(['||', log_and_e, binary('or')], min: 0), reform ]; //cond_e.def = OR([ [log_or_e, '?', expression, ':', cond_e, // (p,a,b) => ['cond', p, a, b]], // log_or_e]); // Alternate version avoids reparsing log_or_e. cond_e.def = [ log_or_e, MAYBE(['?', expression, ':', cond_e]), (p, r) => r == null || r == false ? p : ['cond', p, r[0], r[1]] ]; var assign_op = OR([ ['*=', () => 'mulassign'], ['=', () => 'assign'] ]); // TODO: Figure out how not to re-parse a unary_e. // Order matters - cond_e can't go first since cond_e will succeed on, e.g. 'a'. assignment_e.def = OR([ [ unary_e, assign_op, assignment_e, (u, op, a) => [op, u, a] ], cond_e ]); expression.def = [ assignment_e, MANY([',', assignment_e, binary('comma')], min: 0), reform ]; show(g, expression, 'a'); check(g, expression, 'a', 'a'); check(g, expression, '(a)', 'a'); check(g, expression, ' ( ( a ) ) ', 'a'); check(g, expression, 'a(~1,2)', '[apply,a,[[not,1],2]]'); check(g, expression, 'a(1)(x,2)', '[apply,[apply,a,[1]],[x,2]]'); check(g, expression, 'a(1,2())', '[apply,a,[1,[apply,2,[]]]]'); check(g, expression, '++a++', '[preinc,[postinc,a]]'); check(g, expression, 'a++++b', null); check(g, expression, 'a++ ++b', null); check(g, expression, 'a+ +++b', '[add,a,[preinc,[uplus,b]]]'); check(g, expression, 'a+ + ++b', '[add,a,[uplus,[preinc,b]]]'); check(g, expression, 'a+ + + +b', '[add,a,[uplus,[uplus,[uplus,b]]]]'); check(g, expression, 'a+ ++ +b', '[add,a,[preinc,[uplus,b]]]'); check(g, expression, 'a++ + +b', '[add,[postinc,a],[uplus,b]]'); check(g, expression, 'a+++ +b', '[add,[postinc,a],[uplus,b]]'); check(g, expression, '((T)f)(x)', '[apply,[cast,T,f],[x]]'); check(g, expression, '(T)f(x)', '[cast,T,[apply,f,[x]]]'); check(g, expression, 'a++*++b', '[mult,[postinc,a],[preinc,b]]'); check(g, expression, 'a<<1>>++b', '[shl,[shr,a,1],[preinc,b]]'); check(g, expression, 'a<1&&b', '[and,[lt,a,1],b]'); check(g, expression, 'a<1 & &b', '[bitand,[lt,a,1],[address,b]]'); check(g, expression, 'a ? b ? c : d : e ? f : g', '[cond,a,[cond,b,c,d],[cond,e,f,g]]'); check(g, expression, 'a,b,c', '[comma,[comma,a,b],c]'); check(g, expression, 'a=1,b,c', '[comma,[comma,[assign,a,1],b],c]'); check(g, expression, '((((((((((((a))))))))))))=1,b,c', '[comma,[comma,[assign,a,1],b],c]'); check(g, expression, 'sizeof a', '[sizeof-expr,a]'); check(g, expression, 'sizeofa', 'sizeofa'); check(g, expression, 'sizeof (a)', '[sizeof-expr,a]'); } show(grammar, rule, input) { print('show: "$input"'); var ast; try { ast = grammar.parse(rule, input); } catch (exception) { if (exception is ParseError) ast = exception; else rethrow; } print('${printList(ast)}'); } void check(grammar, rule, input, expected) { // If [expected] is String then the result is coerced to string. // If [expected] is !String, the result is compared directly. print('check: "$input"'); var ast; try { ast = grammar.parse(rule, input); } catch (exception) { ast = exception; } var formatted = ast; if (expected is String) formatted = printList(ast); //Expect.equals(expected, formatted, "parse: $input"); if (expected != formatted) { throw new ArgumentError("parse: $input" "\n expected: $expected" "\n found: $formatted"); } } // Prints the list in [1,2,3] notation, including nested lists. printList(item) { if (item is List) { StringBuffer sb = new StringBuffer(); sb.write('['); var sep = ''; for (var x in item) { sb.write(sep); sb.write(printList(x)); sep = ','; } sb.write(']'); return sb.toString(); } if (item == null) return 'null'; return item.toString(); } main() { testCODE(); testParens(); testOR(); testTEXT(); testBlockComment(); testC(); }