Add ast.Constant

Issue #26146: Add a new kind of AST node: ast.Constant. It can be used by
external AST optimizers, but the compiler does not emit directly such node.

An optimizer can replace the following AST nodes with ast.Constant:

* ast.NameConstant: None, False, True
* ast.Num: int, float, complex
* ast.Str: str
* ast.Bytes: bytes
* ast.Tuple if items are constants too: tuple
* frozenset

Update code to accept ast.Constant instead of ast.Num and/or ast.Str:

* compiler
* docstrings
* ast.literal_eval()
* Tools/parser/unparse.py
This commit is contained in:
Victor Stinner 2016-01-26 00:40:57 +01:00
parent 0dceb91866
commit f2c1aa1661
14 changed files with 401 additions and 44 deletions

View file

@ -202,9 +202,9 @@ enum _expr_kind {BoolOp_kind=1, BinOp_kind=2, UnaryOp_kind=3, Lambda_kind=4,
Await_kind=12, Yield_kind=13, YieldFrom_kind=14,
Compare_kind=15, Call_kind=16, Num_kind=17, Str_kind=18,
FormattedValue_kind=19, JoinedStr_kind=20, Bytes_kind=21,
NameConstant_kind=22, Ellipsis_kind=23, Attribute_kind=24,
Subscript_kind=25, Starred_kind=26, Name_kind=27,
List_kind=28, Tuple_kind=29};
NameConstant_kind=22, Ellipsis_kind=23, Constant_kind=24,
Attribute_kind=25, Subscript_kind=26, Starred_kind=27,
Name_kind=28, List_kind=29, Tuple_kind=30};
struct _expr {
enum _expr_kind kind;
union {
@ -315,6 +315,10 @@ struct _expr {
singleton value;
} NameConstant;
struct {
constant value;
} Constant;
struct {
expr_ty value;
identifier attr;
@ -567,6 +571,9 @@ expr_ty _Py_NameConstant(singleton value, int lineno, int col_offset, PyArena
*arena);
#define Ellipsis(a0, a1, a2) _Py_Ellipsis(a0, a1, a2)
expr_ty _Py_Ellipsis(int lineno, int col_offset, PyArena *arena);
#define Constant(a0, a1, a2, a3) _Py_Constant(a0, a1, a2, a3)
expr_ty _Py_Constant(constant value, int lineno, int col_offset, PyArena
*arena);
#define Attribute(a0, a1, a2, a3, a4, a5) _Py_Attribute(a0, a1, a2, a3, a4, a5)
expr_ty _Py_Attribute(expr_ty value, identifier attr, expr_context_ty ctx, int
lineno, int col_offset, PyArena *arena);

View file

@ -6,6 +6,7 @@ typedef PyObject * string;
typedef PyObject * bytes;
typedef PyObject * object;
typedef PyObject * singleton;
typedef PyObject * constant;
/* It would be nice if the code generated by asdl_c.py was completely
independent of Python, but it is a goal the requires too much work

View file

@ -35,6 +35,8 @@ def parse(source, filename='<unknown>', mode='exec'):
return compile(source, filename, mode, PyCF_ONLY_AST)
_NUM_TYPES = (int, float, complex)
def literal_eval(node_or_string):
"""
Safely evaluate an expression node or a string containing a Python
@ -47,7 +49,9 @@ def literal_eval(node_or_string):
if isinstance(node_or_string, Expression):
node_or_string = node_or_string.body
def _convert(node):
if isinstance(node, (Str, Bytes)):
if isinstance(node, Constant):
return node.value
elif isinstance(node, (Str, Bytes)):
return node.s
elif isinstance(node, Num):
return node.n
@ -62,24 +66,21 @@ def _convert(node):
in zip(node.keys, node.values))
elif isinstance(node, NameConstant):
return node.value
elif isinstance(node, UnaryOp) and \
isinstance(node.op, (UAdd, USub)) and \
isinstance(node.operand, (Num, UnaryOp, BinOp)):
elif isinstance(node, UnaryOp) and isinstance(node.op, (UAdd, USub)):
operand = _convert(node.operand)
if isinstance(node.op, UAdd):
return + operand
else:
return - operand
elif isinstance(node, BinOp) and \
isinstance(node.op, (Add, Sub)) and \
isinstance(node.right, (Num, UnaryOp, BinOp)) and \
isinstance(node.left, (Num, UnaryOp, BinOp)):
if isinstance(operand, _NUM_TYPES):
if isinstance(node.op, UAdd):
return + operand
else:
return - operand
elif isinstance(node, BinOp) and isinstance(node.op, (Add, Sub)):
left = _convert(node.left)
right = _convert(node.right)
if isinstance(node.op, Add):
return left + right
else:
return left - right
if isinstance(left, _NUM_TYPES) and isinstance(right, _NUM_TYPES):
if isinstance(node.op, Add):
return left + right
else:
return left - right
raise ValueError('malformed node or string: ' + repr(node))
return _convert(node_or_string)
@ -196,12 +197,19 @@ def get_docstring(node, clean=True):
"""
if not isinstance(node, (AsyncFunctionDef, FunctionDef, ClassDef, Module)):
raise TypeError("%r can't have docstrings" % node.__class__.__name__)
if node.body and isinstance(node.body[0], Expr) and \
isinstance(node.body[0].value, Str):
if clean:
import inspect
return inspect.cleandoc(node.body[0].value.s)
return node.body[0].value.s
if not(node.body and isinstance(node.body[0], Expr)):
return
node = node.body[0].value
if isinstance(node, Str):
text = node.s
elif isinstance(node, Constant) and isinstance(node.value, str):
text = node.value
else:
return
if clean:
import inspect
text = inspect.cleandoc(text)
return text
def walk(node):

View file

@ -1,7 +1,8 @@
import ast
import dis
import os
import sys
import unittest
import ast
import weakref
from test import support
@ -933,6 +934,123 @@ def test_stdlib_validates(self):
compile(mod, fn, "exec")
class ConstantTests(unittest.TestCase):
"""Tests on the ast.Constant node type."""
def compile_constant(self, value):
tree = ast.parse("x = 123")
node = tree.body[0].value
new_node = ast.Constant(value=value)
ast.copy_location(new_node, node)
tree.body[0].value = new_node
code = compile(tree, "<string>", "exec")
ns = {}
exec(code, ns)
return ns['x']
def test_singletons(self):
for const in (None, False, True, Ellipsis, b'', frozenset()):
with self.subTest(const=const):
value = self.compile_constant(const)
self.assertIs(value, const)
def test_values(self):
nested_tuple = (1,)
nested_frozenset = frozenset({1})
for level in range(3):
nested_tuple = (nested_tuple, 2)
nested_frozenset = frozenset({nested_frozenset, 2})
values = (123, 123.0, 123j,
"unicode", b'bytes',
tuple("tuple"), frozenset("frozenset"),
nested_tuple, nested_frozenset)
for value in values:
with self.subTest(value=value):
result = self.compile_constant(value)
self.assertEqual(result, value)
def test_assign_to_constant(self):
tree = ast.parse("x = 1")
target = tree.body[0].targets[0]
new_target = ast.Constant(value=1)
ast.copy_location(new_target, target)
tree.body[0].targets[0] = new_target
with self.assertRaises(ValueError) as cm:
compile(tree, "string", "exec")
self.assertEqual(str(cm.exception),
"expression which can't be assigned "
"to in Store context")
def test_get_docstring(self):
tree = ast.parse("'docstring'\nx = 1")
self.assertEqual(ast.get_docstring(tree), 'docstring')
tree.body[0].value = ast.Constant(value='constant docstring')
self.assertEqual(ast.get_docstring(tree), 'constant docstring')
def get_load_const(self, tree):
# Compile to bytecode, disassemble and get parameter of LOAD_CONST
# instructions
co = compile(tree, '<string>', 'exec')
consts = []
for instr in dis.get_instructions(co):
if instr.opname == 'LOAD_CONST':
consts.append(instr.argval)
return consts
@support.cpython_only
def test_load_const(self):
consts = [None,
True, False,
124,
2.0,
3j,
"unicode",
b'bytes',
(1, 2, 3)]
code = '\n'.join(map(repr, consts))
code += '\n...'
code_consts = [const for const in consts
if (not isinstance(const, (str, int, float, complex))
or isinstance(const, bool))]
code_consts.append(Ellipsis)
# the compiler adds a final "LOAD_CONST None"
code_consts.append(None)
tree = ast.parse(code)
self.assertEqual(self.get_load_const(tree), code_consts)
# Replace expression nodes with constants
for expr_node, const in zip(tree.body, consts):
assert isinstance(expr_node, ast.Expr)
new_node = ast.Constant(value=const)
ast.copy_location(new_node, expr_node.value)
expr_node.value = new_node
self.assertEqual(self.get_load_const(tree), code_consts)
def test_literal_eval(self):
tree = ast.parse("1 + 2")
binop = tree.body[0].value
new_left = ast.Constant(value=10)
ast.copy_location(new_left, binop.left)
binop.left = new_left
new_right = ast.Constant(value=20)
ast.copy_location(new_right, binop.right)
binop.right = new_right
self.assertEqual(ast.literal_eval(binop), 30)
def main():
if __name__ != '__main__':
return

View file

@ -10,6 +10,10 @@ Release date: tba
Core and Builtins
-----------------
- Issue #26146: Add a new kind of AST node: ``ast.Constant``. It can be used
by external AST optimizers, but the compiler does not emit directly such
node.
- Issue #18018: Import raises ImportError instead of SystemError if a relative
import is attempted without a known parent package.

View file

@ -1,4 +1,8 @@
-- ASDL's six builtin types are identifier, int, string, bytes, object, singleton
-- ASDL's 7 builtin types are:
-- identifier, int, string, bytes, object, singleton, constant
--
-- singleton: None, True or False
-- constant can be None, whereas None means "no value" for object.
module Python
{
@ -76,6 +80,7 @@ module Python
| Bytes(bytes s)
| NameConstant(singleton value)
| Ellipsis
| Constant(constant value)
-- the following expression can appear in assignment context
| Attribute(expr value, identifier attr, expr_context ctx)

View file

@ -33,7 +33,8 @@
# See the EBNF at the top of the file to understand the logical connection
# between the various node types.
builtin_types = {'identifier', 'string', 'bytes', 'int', 'object', 'singleton'}
builtin_types = {'identifier', 'string', 'bytes', 'int', 'object', 'singleton',
'constant'}
class AST:
def __repr__(self):

View file

@ -834,6 +834,7 @@ def visitModule(self, mod):
return (PyObject*)o;
}
#define ast2obj_singleton ast2obj_object
#define ast2obj_constant ast2obj_object
#define ast2obj_identifier ast2obj_object
#define ast2obj_string ast2obj_object
#define ast2obj_bytes ast2obj_object
@ -871,6 +872,26 @@ def visitModule(self, mod):
return 0;
}
static int obj2ast_constant(PyObject* obj, PyObject** out, PyArena* arena)
{
if (obj == Py_None || obj == Py_True || obj == Py_False) {
/* don't increment the reference counter, Constant uses a borrowed
* reference, not a strong reference */
*out = obj;
return 0;
}
if (obj) {
if (PyArena_AddPyObject(arena, obj) < 0) {
*out = NULL;
return -1;
}
Py_INCREF(obj);
}
*out = obj;
return 0;
}
static int obj2ast_identifier(PyObject* obj, PyObject** out, PyArena* arena)
{
if (!PyUnicode_CheckExact(obj) && obj != Py_None) {

View file

@ -306,6 +306,10 @@ static char *NameConstant_fields[]={
"value",
};
static PyTypeObject *Ellipsis_type;
static PyTypeObject *Constant_type;
static char *Constant_fields[]={
"value",
};
static PyTypeObject *Attribute_type;
_Py_IDENTIFIER(attr);
_Py_IDENTIFIER(ctx);
@ -709,6 +713,7 @@ static PyObject* ast2obj_object(void *o)
return (PyObject*)o;
}
#define ast2obj_singleton ast2obj_object
#define ast2obj_constant ast2obj_object
#define ast2obj_identifier ast2obj_object
#define ast2obj_string ast2obj_object
#define ast2obj_bytes ast2obj_object
@ -746,6 +751,26 @@ static int obj2ast_object(PyObject* obj, PyObject** out, PyArena* arena)
return 0;
}
static int obj2ast_constant(PyObject* obj, PyObject** out, PyArena* arena)
{
if (obj == Py_None || obj == Py_True || obj == Py_False) {
/* don't increment the reference counter, Constant uses a borrowed
* reference, not a strong reference */
*out = obj;
return 0;
}
if (obj) {
if (PyArena_AddPyObject(arena, obj) < 0) {
*out = NULL;
return -1;
}
Py_INCREF(obj);
}
*out = obj;
return 0;
}
static int obj2ast_identifier(PyObject* obj, PyObject** out, PyArena* arena)
{
if (!PyUnicode_CheckExact(obj) && obj != Py_None) {
@ -941,6 +966,8 @@ static int init_types(void)
if (!NameConstant_type) return 0;
Ellipsis_type = make_type("Ellipsis", expr_type, NULL, 0);
if (!Ellipsis_type) return 0;
Constant_type = make_type("Constant", expr_type, Constant_fields, 1);
if (!Constant_type) return 0;
Attribute_type = make_type("Attribute", expr_type, Attribute_fields, 3);
if (!Attribute_type) return 0;
Subscript_type = make_type("Subscript", expr_type, Subscript_fields, 3);
@ -2166,6 +2193,25 @@ Ellipsis(int lineno, int col_offset, PyArena *arena)
return p;
}
expr_ty
Constant(constant value, int lineno, int col_offset, PyArena *arena)
{
expr_ty p;
if (!value) {
PyErr_SetString(PyExc_ValueError,
"field value is required for Constant");
return NULL;
}
p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
if (!p)
return NULL;
p->kind = Constant_kind;
p->v.Constant.value = value;
p->lineno = lineno;
p->col_offset = col_offset;
return p;
}
expr_ty
Attribute(expr_ty value, identifier attr, expr_context_ty ctx, int lineno, int
col_offset, PyArena *arena)
@ -3267,6 +3313,15 @@ ast2obj_expr(void* _o)
result = PyType_GenericNew(Ellipsis_type, NULL, NULL);
if (!result) goto failed;
break;
case Constant_kind:
result = PyType_GenericNew(Constant_type, NULL, NULL);
if (!result) goto failed;
value = ast2obj_constant(o->v.Constant.value);
if (!value) goto failed;
if (_PyObject_SetAttrId(result, &PyId_value, value) == -1)
goto failed;
Py_DECREF(value);
break;
case Attribute_kind:
result = PyType_GenericNew(Attribute_type, NULL, NULL);
if (!result) goto failed;
@ -6240,6 +6295,28 @@ obj2ast_expr(PyObject* obj, expr_ty* out, PyArena* arena)
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Constant_type);
if (isinstance == -1) {
return 1;
}
if (isinstance) {
constant value;
if (_PyObject_HasAttrId(obj, &PyId_value)) {
int res;
tmp = _PyObject_GetAttrId(obj, &PyId_value);
if (tmp == NULL) goto failed;
res = obj2ast_constant(tmp, &value, arena);
if (res != 0) goto failed;
Py_CLEAR(tmp);
} else {
PyErr_SetString(PyExc_TypeError, "required field \"value\" missing from Constant");
return 1;
}
*out = Constant(value, lineno, col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
isinstance = PyObject_IsInstance(obj, (PyObject*)Attribute_type);
if (isinstance == -1) {
return 1;
@ -7517,6 +7594,8 @@ PyInit__ast(void)
0) return NULL;
if (PyDict_SetItemString(d, "Ellipsis", (PyObject*)Ellipsis_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "Constant", (PyObject*)Constant_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "Attribute", (PyObject*)Attribute_type) < 0)
return NULL;
if (PyDict_SetItemString(d, "Subscript", (PyObject*)Subscript_type) < 0)

View file

@ -131,6 +131,50 @@ validate_arguments(arguments_ty args)
return validate_exprs(args->defaults, Load, 0) && validate_exprs(args->kw_defaults, Load, 1);
}
static int
validate_constant(PyObject *value)
{
if (value == Py_None || value == Py_Ellipsis)
return 1;
if (PyLong_CheckExact(value)
|| PyFloat_CheckExact(value)
|| PyComplex_CheckExact(value)
|| PyBool_Check(value)
|| PyUnicode_CheckExact(value)
|| PyBytes_CheckExact(value))
return 1;
if (PyTuple_CheckExact(value) || PyFrozenSet_CheckExact(value)) {
PyObject *it;
it = PyObject_GetIter(value);
if (it == NULL)
return 0;
while (1) {
PyObject *item = PyIter_Next(it);
if (item == NULL) {
if (PyErr_Occurred()) {
Py_DECREF(it);
return 0;
}
break;
}
if (!validate_constant(item)) {
Py_DECREF(it);
return 0;
}
}
Py_DECREF(it);
return 1;
}
return 0;
}
static int
validate_expr(expr_ty exp, expr_context_ty ctx)
{
@ -240,6 +284,12 @@ validate_expr(expr_ty exp, expr_context_ty ctx)
return validate_expr(exp->v.Call.func, Load) &&
validate_exprs(exp->v.Call.args, Load, 0) &&
validate_keywords(exp->v.Call.keywords);
case Constant_kind:
if (!validate_constant(exp->v.Constant.value)) {
PyErr_SetString(PyExc_TypeError, "invalid type in Constant");
return 0;
}
return 1;
case Num_kind: {
PyObject *n = exp->v.Num.n;
if (!PyLong_CheckExact(n) && !PyFloat_CheckExact(n) &&

View file

@ -1314,7 +1314,11 @@ compiler_isdocstring(stmt_ty s)
{
if (s->kind != Expr_kind)
return 0;
return s->v.Expr.value->kind == Str_kind;
if (s->v.Expr.value->kind == Str_kind)
return 1;
if (s->v.Expr.value->kind == Constant_kind)
return PyUnicode_CheckExact(s->v.Expr.value->v.Constant.value);
return 0;
}
/* Compile a sequence of statements, checking for a docstring. */
@ -1688,8 +1692,12 @@ compiler_function(struct compiler *c, stmt_ty s, int is_async)
st = (stmt_ty)asdl_seq_GET(body, 0);
docstring = compiler_isdocstring(st);
if (docstring && c->c_optimize < 2)
first_const = st->v.Expr.value->v.Str.s;
if (docstring && c->c_optimize < 2) {
if (st->v.Expr.value->kind == Constant_kind)
first_const = st->v.Expr.value->v.Constant.value;
else
first_const = st->v.Expr.value->v.Str.s;
}
if (compiler_add_o(c, c->u->u_consts, first_const) < 0) {
compiler_exit_scope(c);
return 0;
@ -2599,6 +2607,36 @@ compiler_assert(struct compiler *c, stmt_ty s)
return 1;
}
static int
compiler_visit_stmt_expr(struct compiler *c, expr_ty value)
{
if (c->c_interactive && c->c_nestlevel <= 1) {
VISIT(c, expr, value);
ADDOP(c, PRINT_EXPR);
return 1;
}
if (value->kind == Str_kind || value->kind == Num_kind) {
/* ignore strings and numbers */
return 1;
}
if (value->kind == Constant_kind) {
PyObject *cst = value->v.Constant.value;
if (PyUnicode_CheckExact(cst)
|| PyLong_CheckExact(cst)
|| PyFloat_CheckExact(cst)
|| PyComplex_CheckExact(cst)) {
/* ignore strings and numbers */
return 1;
}
}
VISIT(c, expr, value);
ADDOP(c, POP_TOP);
return 1;
}
static int
compiler_visit_stmt(struct compiler *c, stmt_ty s)
{
@ -2669,16 +2707,7 @@ compiler_visit_stmt(struct compiler *c, stmt_ty s)
case Nonlocal_kind:
break;
case Expr_kind:
if (c->c_interactive && c->c_nestlevel <= 1) {
VISIT(c, expr, s->v.Expr.value);
ADDOP(c, PRINT_EXPR);
}
else if (s->v.Expr.value->kind != Str_kind &&
s->v.Expr.value->kind != Num_kind) {
VISIT(c, expr, s->v.Expr.value);
ADDOP(c, POP_TOP);
}
break;
return compiler_visit_stmt_expr(c, s->v.Expr.value);
case Pass_kind:
break;
case Break_kind:
@ -3625,6 +3654,8 @@ expr_constant(struct compiler *c, expr_ty e)
switch (e->kind) {
case Ellipsis_kind:
return 1;
case Constant_kind:
return PyObject_IsTrue(e->v.Constant.value);
case Num_kind:
return PyObject_IsTrue(e->v.Num.n);
case Str_kind:
@ -3912,6 +3943,9 @@ compiler_visit_expr(struct compiler *c, expr_ty e)
return compiler_compare(c, e);
case Call_kind:
return compiler_call(c, e);
case Constant_kind:
ADDOP_O(c, LOAD_CONST, e->v.Constant.value, consts);
break;
case Num_kind:
ADDOP_O(c, LOAD_CONST, e->v.Num.n, consts);
break;

View file

@ -79,7 +79,10 @@ future_parse(PyFutureFeatures *ff, mod_ty mod, PyObject *filename)
i = 0;
first = (stmt_ty)asdl_seq_GET(mod->v.Module.body, i);
if (first->kind == Expr_kind && first->v.Expr.value->kind == Str_kind)
if (first->kind == Expr_kind
&& (first->v.Expr.value->kind == Str_kind
|| (first->v.Expr.value->kind == Constant_kind
&& PyUnicode_CheckExact(first->v.Expr.value->v.Constant.value))))
i++;

View file

@ -1455,6 +1455,7 @@ symtable_visit_expr(struct symtable *st, expr_ty e)
case JoinedStr_kind:
VISIT_SEQ(st, expr, e->v.JoinedStr.values);
break;
case Constant_kind:
case Num_kind:
case Str_kind:
case Bytes_kind:

View file

@ -343,6 +343,11 @@ def _fstring_Str(self, t, write):
value = t.s.replace("{", "{{").replace("}", "}}")
write(value)
def _fstring_Constant(self, t, write):
assert isinstance(t.value, str)
value = t.value.replace("{", "{{").replace("}", "}}")
write(value)
def _fstring_FormattedValue(self, t, write):
write("{")
expr = io.StringIO()
@ -364,6 +369,25 @@ def _fstring_FormattedValue(self, t, write):
def _Name(self, t):
self.write(t.id)
def _write_constant(self, value):
if isinstance(value, (float, complex)):
self.write(repr(value).replace("inf", INFSTR))
else:
self.write(repr(value))
def _Constant(self, t):
value = t.value
if isinstance(value, tuple):
self.write("(")
if len(value) == 1:
self._write_constant(value[0])
self.write(",")
else:
interleave(lambda: self.write(", "), self._write_constant, value)
self.write(")")
else:
self._write_constant(t.value)
def _NameConstant(self, t):
self.write(repr(t.value))
@ -443,7 +467,7 @@ def write_pair(pair):
def _Tuple(self, t):
self.write("(")
if len(t.elts) == 1:
(elt,) = t.elts
elt = t.elts[0]
self.dispatch(elt)
self.write(",")
else:
@ -490,7 +514,8 @@ def _Attribute(self,t):
# Special case: 3.__abs__() is a syntax error, so if t.value
# is an integer literal then we need to either parenthesize
# it or add an extra space to get 3 .__abs__().
if isinstance(t.value, ast.Num) and isinstance(t.value.n, int):
if ((isinstance(t.value, ast.Num) and isinstance(t.value.n, int))
or (isinstance(t.value, ast.Constant) and isinstance(t.value.value, int))):
self.write(" ")
self.write(".")
self.write(t.attr)