#! /usr/bin/env python """Generate C code from an ASDL description.""" # TO DO # handle fields that have a type but no name import os, sys, traceback import asdl TABSIZE = 8 MAX_COL = 80 def get_c_type(name): """Return a string for the C name of the type. This function special cases the default types provided by asdl: identifier, string, int, bool. """ # XXX ack! need to figure out where Id is useful and where string if isinstance(name, asdl.Id): name = name.value if name in asdl.builtin_types: return name else: return "%s_ty" % name def reflow_lines(s, depth): """Reflow the line s indented depth tabs. Return a sequence of lines where no line extends beyond MAX_COL when properly indented. The first line is properly indented based exclusively on depth * TABSIZE. All following lines -- these are the reflowed lines generated by this function -- start at the same column as the first character beyond the opening { in the first line. """ size = MAX_COL - depth * TABSIZE if len(s) < size: return [s] lines = [] cur = s padding = "" while len(cur) > size: i = cur.rfind(' ', 0, size) # XXX this should be fixed for real if i == -1 and 'GeneratorExp' in cur: i = size + 3 assert i != -1, "Impossible line %d to reflow: %s" % (size, `s`) lines.append(padding + cur[:i]) if len(lines) == 1: # find new size based on brace j = cur.find('{', 0, i) if j >= 0: j += 2 # account for the brace and the space after it size -= j padding = " " * j else: j = cur.find('(', 0, i) if j >= 0: j += 1 # account for the paren (no space after it) size -= j padding = " " * j cur = cur[i+1:] else: lines.append(padding + cur) return lines def is_simple(sum): """Return True if a sum is a simple. A sum is simple if its types have no fields, e.g. unaryop = Invert | Not | UAdd | USub """ for t in sum.types: if t.fields: return False return True class EmitVisitor(asdl.VisitorBase): """Visit that emits lines""" def __init__(self, file): self.file = file super(EmitVisitor, self).__init__() def emit(self, s, depth, reflow=1): # XXX reflow long lines? if reflow: lines = reflow_lines(s, depth) else: lines = [s] for line in lines: line = (" " * TABSIZE * depth) + line + "\n" self.file.write(line) class TypeDefVisitor(EmitVisitor): def visitModule(self, mod): for dfn in mod.dfns: self.visit(dfn) def visitType(self, type, depth=0): self.visit(type.value, type.name, depth) def visitSum(self, sum, name, depth): if is_simple(sum): self.simple_sum(sum, name, depth) else: self.sum_with_constructors(sum, name, depth) def simple_sum(self, sum, name, depth): enum = [] for i in range(len(sum.types)): type = sum.types[i] enum.append("%s=%d" % (type.name, i + 1)) enums = ", ".join(enum) ctype = get_c_type(name) s = "typedef enum _%s { %s } %s;" % (name, enums, ctype) self.emit(s, depth) self.emit("", depth) def sum_with_constructors(self, sum, name, depth): ctype = get_c_type(name) s = "typedef struct _%(name)s *%(ctype)s;" % locals() self.emit(s, depth) self.emit("", depth) def visitProduct(self, product, name, depth): ctype = get_c_type(name) s = "typedef struct _%(name)s *%(ctype)s;" % locals() self.emit(s, depth) self.emit("", depth) class StructVisitor(EmitVisitor): """Visitor to generate typdefs for AST.""" def visitModule(self, mod): for dfn in mod.dfns: self.visit(dfn) def visitType(self, type, depth=0): self.visit(type.value, type.name, depth) def visitSum(self, sum, name, depth): if not is_simple(sum): self.sum_with_constructors(sum, name, depth) def sum_with_constructors(self, sum, name, depth): def emit(s, depth=depth): self.emit(s % sys._getframe(1).f_locals, depth) enum = [] for i in range(len(sum.types)): type = sum.types[i] enum.append("%s_kind=%d" % (type.name, i + 1)) emit("struct _%(name)s {") emit("enum { " + ", ".join(enum) + " } kind;", depth + 1) emit("union {", depth + 1) for t in sum.types: self.visit(t, depth + 2) emit("} v;", depth + 1) for field in sum.attributes: # rudimentary attribute handling type = str(field.type) assert type in asdl.builtin_types, type emit("%s %s;" % (type, field.name), depth + 1); emit("};") emit("") def visitConstructor(self, cons, depth): if cons.fields: self.emit("struct {", depth) for f in cons.fields: self.visit(f, depth + 1) self.emit("} %s;" % cons.name, depth) self.emit("", depth) else: # XXX not sure what I want here, nothing is probably fine pass def visitField(self, field, depth): # XXX need to lookup field.type, because it might be something # like a builtin... ctype = get_c_type(field.type) name = field.name if field.seq: self.emit("asdl_seq *%(name)s;" % locals(), depth) else: self.emit("%(ctype)s %(name)s;" % locals(), depth) def visitProduct(self, product, name, depth): self.emit("struct _%(name)s {" % locals(), depth) for f in product.fields: self.visit(f, depth + 1) self.emit("};", depth) self.emit("", depth) class PrototypeVisitor(EmitVisitor): """Generate function prototypes for the .h file""" def visitModule(self, mod): for dfn in mod.dfns: self.visit(dfn) def visitType(self, type): self.visit(type.value, type.name) def visitSum(self, sum, name): if is_simple(sum): pass # XXX else: for t in sum.types: self.visit(t, name, sum.attributes) def get_args(self, fields): """Return list of C argument into, one for each field. Argument info is 3-tuple of a C type, variable name, and flag that is true if type can be NULL. """ args = [] unnamed = {} for f in fields: if f.name is None: name = f.type c = unnamed[name] = unnamed.get(name, 0) + 1 if c > 1: name = "name%d" % (c - 1) else: name = f.name # XXX should extend get_c_type() to handle this if f.seq: ctype = "asdl_seq *" else: ctype = get_c_type(f.type) args.append((ctype, name, f.opt or f.seq)) return args def visitConstructor(self, cons, type, attrs): args = self.get_args(cons.fields) attrs = self.get_args(attrs) ctype = get_c_type(type) self.emit_function(cons.name, ctype, args, attrs) def emit_function(self, name, ctype, args, attrs, union=1): args = args + attrs if args: argstr = ", ".join(["%s %s" % (atype, aname) for atype, aname, opt in args]) argstr += ", PyArena *arena" else: argstr = "PyArena *arena" self.emit("%s %s(%s);" % (ctype, name, argstr), 0) def visitProduct(self, prod, name): self.emit_function(name, get_c_type(name), self.get_args(prod.fields), [], union=0) class FunctionVisitor(PrototypeVisitor): """Visitor to generate constructor functions for AST.""" def emit_function(self, name, ctype, args, attrs, union=1): def emit(s, depth=0, reflow=1): self.emit(s, depth, reflow) argstr = ", ".join(["%s %s" % (atype, aname) for atype, aname, opt in args + attrs]) if argstr: argstr += ", PyArena *arena" else: argstr = "PyArena *arena" self.emit("%s" % ctype, 0) emit("%s(%s)" % (name, argstr)) emit("{") emit("%s p;" % ctype, 1) for argtype, argname, opt in args: # XXX hack alert: false is allowed for a bool if not opt and not argtype == "bool": emit("if (!%s) {" % argname, 1) emit("PyErr_SetString(PyExc_ValueError,", 2) msg = "field %s is required for %s" % (argname, name) emit(' "%s");' % msg, 2, reflow=0) emit('return NULL;', 2) emit('}', 1) emit("p = (%s)PyArena_Malloc(arena, sizeof(*p));" % ctype, 1); emit("if (!p) {", 1) emit("PyErr_NoMemory();", 2) emit("return NULL;", 2) emit("}", 1) if union: self.emit_body_union(name, args, attrs) else: self.emit_body_struct(name, args, attrs) emit("return p;", 1) emit("}") emit("") def emit_body_union(self, name, args, attrs): def emit(s, depth=0, reflow=1): self.emit(s, depth, reflow) emit("p->kind = %s_kind;" % name, 1) for argtype, argname, opt in args: emit("p->v.%s.%s = %s;" % (name, argname, argname), 1) for argtype, argname, opt in attrs: emit("p->%s = %s;" % (argname, argname), 1) def emit_body_struct(self, name, args, attrs): def emit(s, depth=0, reflow=1): self.emit(s, depth, reflow) for argtype, argname, opt in args: emit("p->%s = %s;" % (argname, argname), 1) assert not attrs class PickleVisitor(EmitVisitor): def visitModule(self, mod): for dfn in mod.dfns: self.visit(dfn) def visitType(self, type): self.visit(type.value, type.name) def visitSum(self, sum, name): pass def visitProduct(self, sum, name): pass def visitConstructor(self, cons, name): pass def visitField(self, sum): pass class MarshalPrototypeVisitor(PickleVisitor): def prototype(self, sum, name): ctype = get_c_type(name) self.emit("static int marshal_write_%s(PyObject **, int *, %s);" % (name, ctype), 0) visitProduct = visitSum = prototype class PyTypesDeclareVisitor(PickleVisitor): def prototype(self, sum, name): ctype = get_c_type(name) self.emit("void free_%s(%s);" % (name, ctype), 0) def visitProduct(self, prod, name): self.emit("PyTypeObject *%s_type;" % name, 0) self.emit("static PyObject* ast2obj_%s(void*);" % name, 0) if prod.fields: self.emit("char *%s_fields[]={" % name,0) for f in prod.fields: self.emit('"%s",' % f.name, 1) self.emit("};", 0) def visitSum(self, sum, name): self.emit("PyTypeObject *%s_type;" % name, 0) ptype = "void*" if is_simple(sum): ptype = get_c_type(name) tnames = [] for t in sum.types: tnames.append(str(t.name)+"_singleton") tnames = ", *".join(tnames) self.emit("static PyObject *%s;" % tnames, 0) self.emit("static PyObject* ast2obj_%s(%s);" % (name, ptype), 0) for t in sum.types: self.visitConstructor(t, name) def visitConstructor(self, cons, name): self.emit("PyTypeObject *%s_type;" % cons.name, 0) if cons.fields: self.emit("char *%s_fields[]={" % cons.name, 0) for t in cons.fields: self.emit('"%s",' % t.name, 1) self.emit("};",0) class PyTypesVisitor(PickleVisitor): def visitModule(self, mod): self.emit(""" static PyTypeObject* make_type(char *type, PyTypeObject* base, char**fields, int num_fields) { PyObject *fnames, *result; int i; if (num_fields) { fnames = PyTuple_New(num_fields); if (!fnames) return NULL; } else { fnames = Py_None; Py_INCREF(Py_None); } for(i=0; i < num_fields; i++) { PyObject *field = PyString_FromString(fields[i]); if (!field) { Py_DECREF(fnames); return NULL; } PyTuple_SET_ITEM(fnames, i, field); } result = PyObject_CallFunction((PyObject*)&PyType_Type, "s(O){sO}", type, base, "_fields", fnames); Py_DECREF(fnames); return (PyTypeObject*)result; } static PyObject* ast2obj_list(asdl_seq *seq, PyObject* (*func)(void*)) { int i, n = asdl_seq_LEN(seq); PyObject *result = PyList_New(n); PyObject *value; if (!result) return NULL; for (i = 0; i < n; i++) { value = func(asdl_seq_GET(seq, i)); if (!value) { Py_DECREF(result); return NULL; } PyList_SET_ITEM(result, i, value); } return result; } static PyObject* ast2obj_object(void *o) { if (!o) o = Py_None; Py_INCREF((PyObject*)o); return (PyObject*)o; } #define ast2obj_identifier ast2obj_object #define ast2obj_string ast2obj_object static PyObject* ast2obj_bool(bool b) { return PyBool_FromLong(b); } """, 0, reflow=False) self.emit("static int initialized;", 0) self.emit("static int init_types(void)",0) self.emit("{", 0) self.emit("if (initialized) return 1;", 1) for dfn in mod.dfns: self.visit(dfn) self.emit("return 1;", 0); self.emit("}", 0) def visitProduct(self, prod, name): if prod.fields: fields = name.value+"_fields" else: fields = "NULL" self.emit('%s_type = make_type("%s", &PyBaseObject_Type, %s, %d);' % (name, name, fields, len(prod.fields)), 1) def visitSum(self, sum, name): self.emit('%s_type = make_type("%s", &PyBaseObject_Type, NULL, 0);' % (name, name), 1) simple = is_simple(sum) for t in sum.types: self.visitConstructor(t, name, simple) def visitConstructor(self, cons, name, simple): if cons.fields: fields = cons.name.value+"_fields" else: fields = "NULL" self.emit('%s_type = make_type("%s", %s_type, %s, %d);' % (cons.name, cons.name, name, fields, len(cons.fields)), 1) if simple: self.emit("%s_singleton = PyType_GenericNew(%s_type, NULL, NULL);" % (cons.name, cons.name), 1) _SPECIALIZED_SEQUENCES = ('stmt', 'expr') def find_sequence(fields, doing_specialization): """Return True if any field uses a sequence.""" for f in fields: if f.seq: if not doing_specialization: return True if str(f.type) not in _SPECIALIZED_SEQUENCES: return True return False def has_sequence(types, doing_specialization): for t in types: if find_sequence(t.fields, doing_specialization): return True return False class StaticVisitor(PickleVisitor): CODE = '''Very simple, always emit this static code. Overide CODE''' def visit(self, object): self.emit(self.CODE, 0, reflow=False) class FreeUtilVisitor(StaticVisitor): CODE = '''static void free_seq_exprs(asdl_seq *seq) { int i, n; n = asdl_seq_LEN(seq); for (i = 0; i < n; i++) free_expr((expr_ty)asdl_seq_GET(seq, i)); asdl_seq_free(seq); } static void free_seq_stmts(asdl_seq *seq) { int i, n; n = asdl_seq_LEN(seq); for (i = 0; i < n; i++) free_stmt((stmt_ty)asdl_seq_GET(seq, i)); asdl_seq_free(seq); } ''' class ObjVisitor(PickleVisitor): def func_begin(self, name, has_seq): ctype = get_c_type(name) self.emit("PyObject*", 0) self.emit("ast2obj_%s(void* _o)" % (name), 0) self.emit("{", 0) self.emit("%s o = (%s)_o;" % (ctype, ctype), 1) self.emit("PyObject *result = NULL, *value = NULL;", 1) self.emit('if (!o) {', 1) self.emit("Py_INCREF(Py_None);", 2) self.emit('return Py_None;', 2) self.emit("}", 1) self.emit('', 0) def func_end(self, has_seq): self.emit("return result;", 1) self.emit("failed:", 0) self.emit("Py_XDECREF(value);", 1) self.emit("Py_XDECREF(result);", 1) self.emit("return NULL;", 1) self.emit("}", 0) self.emit("", 0) def visitSum(self, sum, name): if is_simple(sum): self.simpleSum(sum, name) return has_seq = has_sequence(sum.types, False) self.func_begin(name, has_seq) self.emit("switch (o->kind) {", 1) for i in range(len(sum.types)): t = sum.types[i] self.visitConstructor(t, i + 1, name) self.emit("}", 1) self.emit("", 0) self.func_end(has_seq) def simpleSum(self, sum, name): self.emit("PyObject* ast2obj_%s(%s_ty o)" % (name, name), 0) self.emit("{", 0) self.emit("switch(o) {", 1) for t in sum.types: self.emit("case %s:" % t.name, 2) self.emit("Py_INCREF(%s_singleton);" % t.name, 3) self.emit("return %s_singleton;" % t.name, 3) self.emit("}", 1) self.emit("return NULL; /* cannot happen */", 1) self.emit("}", 0) def visitProduct(self, prod, name): has_seq = find_sequence(prod.fields, False) self.func_begin(name, has_seq) self.emit("result = PyType_GenericNew(%s_type, NULL, NULL);" % name, 1); self.emit("if (!result) return NULL;", 1) for field in prod.fields: self.visitField(field, name, 1, True) self.emit("", 0) self.func_end(has_seq) def visitConstructor(self, cons, enum, name): self.emit("case %s_kind:" % cons.name, 1) self.emit("result = PyType_GenericNew(%s_type, NULL, NULL);" % cons.name, 2); self.emit("if (!result) goto failed;", 2) for f in cons.fields: self.visitField(f, cons.name, 2, False) self.emit("break;", 2) def visitField(self, field, name, depth, product): def emit(s, d): self.emit(s, depth + d) if product: value = "o->%s" % field.name else: value = "o->v.%s.%s" % (name, field.name) self.set(field, value, depth) emit("if (!value) goto failed;", 0) emit('if (PyObject_SetAttrString(result, "%s", value) == -1)' % field.name, 0) emit("goto failed;", 1) emit("Py_DECREF(value);", 0) def emitSeq(self, field, value, depth, emit): emit("seq = %s;" % value, 0) emit("n = asdl_seq_LEN(seq);", 0) emit("value = PyList_New(n);", 0) emit("if (!value) goto failed;", 0) emit("for (i = 0; i < n; i++) {", 0) self.set("value", field, "asdl_seq_GET(seq, i)", depth + 1) emit("if (!value1) goto failed;", 1) emit("PyList_SET_ITEM(value, i, value1);", 1) emit("value1 = NULL;", 1) emit("}", 0) def set(self, field, value, depth): if field.seq: # XXX should really check for is_simple, but that requires a symbol table if field.type.value == "cmpop": # While the sequence elements are stored as void*, # ast2obj_cmpop expects an enum self.emit("{", depth) self.emit("int i, n = asdl_seq_LEN(%s);" % value, depth+1) self.emit("value = PyList_New(n);", depth+1) self.emit("if (!value) goto failed;", depth+1) self.emit("for(i = 0; i < n; i++)", depth+1) # This cannot fail, so no need for error handling self.emit("PyList_SET_ITEM(value, i, ast2obj_%s((%s_ty)asdl_seq_GET(%s, i)));" % (field.type, field.type, value), depth+2, reflow=False) self.emit("}", depth) else: self.emit("value = ast2obj_list(%s, ast2obj_%s);" % (value, field.type), depth) else: ctype = get_c_type(field.type) self.emit("value = ast2obj_%s(%s);" % (field.type, value), depth, reflow=False) class MarshalUtilVisitor(StaticVisitor): CODE = ''' #define CHECKSIZE(BUF, OFF, MIN) { \\ int need = *(OFF) + MIN; \\ if (need >= PyString_GET_SIZE(*(BUF))) { \\ int newsize = PyString_GET_SIZE(*(BUF)) * 2; \\ if (newsize < need) \\ newsize = need; \\ if (_PyString_Resize((BUF), newsize) < 0) \\ return 0; \\ } \\ } static int marshal_write_int(PyObject **buf, int *offset, int x) { char *s; CHECKSIZE(buf, offset, 4) s = PyString_AS_STRING(*buf) + (*offset); s[0] = (x & 0xff); s[1] = (x >> 8) & 0xff; s[2] = (x >> 16) & 0xff; s[3] = (x >> 24) & 0xff; *offset += 4; return 1; } static int marshal_write_bool(PyObject **buf, int *offset, bool b) { if (b) marshal_write_int(buf, offset, 1); else marshal_write_int(buf, offset, 0); return 1; } static int marshal_write_identifier(PyObject **buf, int *offset, identifier id) { int l = PyString_GET_SIZE(id); marshal_write_int(buf, offset, l); CHECKSIZE(buf, offset, l); memcpy(PyString_AS_STRING(*buf) + *offset, PyString_AS_STRING(id), l); *offset += l; return 1; } static int marshal_write_string(PyObject **buf, int *offset, string s) { int len = PyString_GET_SIZE(s); marshal_write_int(buf, offset, len); CHECKSIZE(buf, offset, len); memcpy(PyString_AS_STRING(*buf) + *offset, PyString_AS_STRING(s), len); *offset += len; return 1; } static int marshal_write_object(PyObject **buf, int *offset, object s) { /* XXX */ return 0; } ''' class MarshalFunctionVisitor(PickleVisitor): def func_begin(self, name, has_seq): ctype = get_c_type(name) self.emit("static int", 0) self.emit("marshal_write_%s(PyObject **buf, int *off, %s o)" % (name, ctype), 0) self.emit("{", 0) if has_seq: self.emit("int i;", 1) def func_end(self): self.emit("return 1;", 1) self.emit("}", 0) self.emit("", 0) def visitSum(self, sum, name): self.func_begin(name, has_sequence(sum.types, False)) simple = is_simple(sum) if simple: self.emit("switch (o) {", 1) else: self.emit("switch (o->kind) {", 1) for i in range(len(sum.types)): t = sum.types[i] self.visitConstructor(t, i + 1, name, simple) self.emit("}", 1) self.func_end() def visitProduct(self, prod, name): self.func_begin(name, find_sequence(prod.fields, False)) for field in prod.fields: self.visitField(field, name, 1, 1) self.func_end() def visitConstructor(self, cons, enum, name, simple): if simple: self.emit("case %s:" % cons.name, 1) self.emit("marshal_write_int(buf, off, %d);" % enum, 2); self.emit("break;", 2) else: self.emit("case %s_kind:" % cons.name, 1) self.emit("marshal_write_int(buf, off, %d);" % enum, 2) for f in cons.fields: self.visitField(f, cons.name, 2, 0) self.emit("break;", 2) def visitField(self, field, name, depth, product): def emit(s, d): self.emit(s, depth + d) if product: value = "o->%s" % field.name else: value = "o->v.%s.%s" % (name, field.name) if field.seq: emit("marshal_write_int(buf, off, asdl_seq_LEN(%s));" % value, 0) emit("for (i = 0; i < asdl_seq_LEN(%s); i++) {" % value, 0) emit("void *elt = asdl_seq_GET(%s, i);" % value, 1); ctype = get_c_type(field.type); emit("marshal_write_%s(buf, off, (%s)elt);" % (field.type, ctype), 1) emit("}", 0) elif field.opt: emit("if (%s) {" % value, 0) emit("marshal_write_int(buf, off, 1);", 1) emit("marshal_write_%s(buf, off, %s);" % (field.type, value), 1) emit("}", 0) emit("else {", 0) emit("marshal_write_int(buf, off, 0);", 1) emit("}", 0) else: emit("marshal_write_%s(buf, off, %s);" % (field.type, value), 0) class PartingShots(StaticVisitor): CODE = """ PyObject* PyAST_mod2obj(mod_ty t) { init_types(); return ast2obj_mod(t); } """ class ChainOfVisitors: def __init__(self, *visitors): self.visitors = visitors def visit(self, object): for v in self.visitors: v.visit(object) v.emit("", 0) def main(srcfile): argv0 = sys.argv[0] components = argv0.split(os.sep) argv0 = os.sep.join(components[-2:]) auto_gen_msg = '/* File automatically generated by %s */\n' % argv0 mod = asdl.parse(srcfile) if not asdl.check(mod): sys.exit(1) if INC_DIR: p = "%s/%s-ast.h" % (INC_DIR, mod.name) else: p = "%s-ast.h" % mod.name f = open(p, "wb") print >> f, auto_gen_msg print >> f, '#include "asdl.h"\n' c = ChainOfVisitors(TypeDefVisitor(f), StructVisitor(f), PrototypeVisitor(f), ## FreePrototypeVisitor(f), ) c.visit(mod) print >>f, "PyObject* PyAST_mod2obj(mod_ty t);" f.close() if SRC_DIR: p = "%s/%s-ast.c" % (SRC_DIR, mod.name) else: p = "%s-ast.c" % mod.name f = open(p, "wb") print >> f, auto_gen_msg print >> f, '#include "Python.h"' print >> f, '#include "%s-ast.h"' % mod.name print >> f v = ChainOfVisitors( # MarshalPrototypeVisitor(f), PyTypesDeclareVisitor(f), PyTypesVisitor(f), FunctionVisitor(f), ## FreeUtilVisitor(f), ObjVisitor(f), #MarshalUtilVisitor(f), #MarshalFunctionVisitor(f), PartingShots(f), ) v.visit(mod) f.close() if __name__ == "__main__": import sys import getopt INC_DIR = '' SRC_DIR = '' opts, args = getopt.getopt(sys.argv[1:], "h:c:") for o, v in opts: if o == '-h': INC_DIR = v if o == '-c': SRC_DIR = v if len(args) != 1: print "Must specify single input file" main(args[0])