Issue #10785: Store the filename as Unicode in the Python parser.

This commit is contained in:
Victor Stinner 2011-04-05 00:39:01 +02:00
parent 9bdb43e43f
commit 7f2fee3640
9 changed files with 94 additions and 42 deletions

View file

@ -9,7 +9,10 @@ extern "C" {
typedef struct {
int error;
const char *filename; /* decoded from the filesystem encoding */
#ifndef PGEN
/* The filename is useless for pgen, see comment in tok_state structure */
PyObject *filename;
#endif
int lineno;
int offset;
char *text; /* UTF-8-encoded string */
@ -66,8 +69,10 @@ PyAPI_FUNC(node *) PyParser_ParseStringFlagsFilenameEx(
perrdetail *err_ret,
int *flags);
/* Note that he following function is defined in pythonrun.c not parsetok.c. */
/* Note that the following functions are defined in pythonrun.c,
not in parsetok.c */
PyAPI_FUNC(void) PyParser_SetError(perrdetail *);
PyAPI_FUNC(void) PyParser_ClearError(perrdetail *);
#ifdef __cplusplus
}

View file

@ -238,14 +238,13 @@ POBJS= \
Parser/listnode.o \
Parser/node.o \
Parser/parser.o \
Parser/parsetok.o \
Parser/bitset.o \
Parser/metagrammar.o \
Parser/firstsets.o \
Parser/grammar.o \
Parser/pgen.o
PARSER_OBJS= $(POBJS) Parser/myreadline.o Parser/tokenizer.o
PARSER_OBJS= $(POBJS) Parser/myreadline.o Parser/parsetok.o Parser/tokenizer.o
PGOBJS= \
Objects/obmalloc.o \
@ -254,10 +253,12 @@ PGOBJS= \
Python/pyctype.o \
Parser/tokenizer_pgen.o \
Parser/printgrammar.o \
Parser/parsetok_pgen.o \
Parser/pgenmain.o
PARSER_HEADERS= \
Parser/parser.h \
Include/parsetok.h \
Parser/tokenizer.h
PGENOBJS= $(PGENMAIN) $(POBJS) $(PGOBJS)
@ -593,6 +594,7 @@ Parser/grammar.o: $(srcdir)/Parser/grammar.c \
Parser/metagrammar.o: $(srcdir)/Parser/metagrammar.c
Parser/tokenizer_pgen.o: $(srcdir)/Parser/tokenizer.c
Parser/parsetok_pgen.o: $(srcdir)/Parser/parsetok.c
Parser/pgenmain.o: $(srcdir)/Include/parsetok.h
@ -700,7 +702,6 @@ PYTHON_HEADERS= \
Include/objimpl.h \
Include/opcode.h \
Include/osdefs.h \
Include/parsetok.h \
Include/patchlevel.h \
Include/pgen.h \
Include/pgenheaders.h \

View file

@ -10,6 +10,8 @@ What's New in Python 3.3 Alpha 1?
Core and Builtins
-----------------
- Issue #10785: Store the filename as Unicode in the Python parser.
- Issue #11619: _PyImport_LoadDynamicModule() doesn't encode the path to bytes
on Windows.

View file

@ -584,6 +584,7 @@ parser_do_parse(PyObject *args, PyObject *kw, char *argspec, int type)
else
PyParser_SetError(&err);
}
PyParser_ClearError(&err);
return (res);
}

View file

@ -13,7 +13,7 @@
/* Forward */
static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
static void initerr(perrdetail *err_ret, const char* filename);
static int initerr(perrdetail *err_ret, const char* filename);
/* Parse input coming from a string. Return error code, print some errors. */
node *
@ -48,7 +48,8 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
struct tok_state *tok;
int exec_input = start == file_input;
initerr(err_ret, filename);
if (initerr(err_ret, filename) < 0)
return NULL;
if (*flags & PyPARSE_IGNORE_COOKIE)
tok = PyTokenizer_FromUTF8(s, exec_input);
@ -59,7 +60,10 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
return NULL;
}
tok->filename = filename ? filename : "<string>";
#ifndef PGEN
Py_INCREF(err_ret->filename);
tok->filename = err_ret->filename;
#endif
return parsetok(tok, g, start, err_ret, flags);
}
@ -90,13 +94,17 @@ PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
{
struct tok_state *tok;
initerr(err_ret, filename);
if (initerr(err_ret, filename) < 0)
return NULL;
if ((tok = PyTokenizer_FromFile(fp, (char *)enc, ps1, ps2)) == NULL) {
err_ret->error = E_NOMEM;
return NULL;
}
tok->filename = filename;
#ifndef PGEN
Py_INCREF(err_ret->filename);
tok->filename = err_ret->filename;
#endif
return parsetok(tok, g, start, err_ret, flags);
}
@ -267,14 +275,24 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
return n;
}
static void
static int
initerr(perrdetail *err_ret, const char *filename)
{
err_ret->error = E_OK;
err_ret->filename = filename;
err_ret->lineno = 0;
err_ret->offset = 0;
err_ret->text = NULL;
err_ret->token = -1;
err_ret->expected = -1;
#ifndef PGEN
if (filename)
err_ret->filename = PyUnicode_DecodeFSDefault(filename);
else
err_ret->filename = PyUnicode_FromString("<string>");
if (err_ret->filename == NULL) {
err_ret->error = E_ERROR;
return -1;
}
#endif
return 0;
}

2
Parser/parsetok_pgen.c Normal file
View file

@ -0,0 +1,2 @@
#define PGEN
#include "parsetok.c"

View file

@ -128,7 +128,6 @@ tok_new(void)
tok->prompt = tok->nextprompt = NULL;
tok->lineno = 0;
tok->level = 0;
tok->filename = NULL;
tok->altwarning = 1;
tok->alterror = 1;
tok->alttabsize = 1;
@ -140,6 +139,7 @@ tok_new(void)
tok->encoding = NULL;
tok->cont_line = 0;
#ifndef PGEN
tok->filename = NULL;
tok->decoding_readline = NULL;
tok->decoding_buffer = NULL;
#endif
@ -545,7 +545,6 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
{
char *line = NULL;
int badchar = 0;
PyObject *filename;
for (;;) {
if (tok->decoding_state == STATE_NORMAL) {
/* We already have a codec associated with
@ -586,16 +585,12 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
if (badchar) {
/* Need to add 1 to the line number, since this line
has not been counted, yet. */
filename = PyUnicode_DecodeFSDefault(tok->filename);
if (filename != NULL) {
PyErr_Format(PyExc_SyntaxError,
"Non-UTF-8 code starting with '\\x%.2x' "
"in file %U on line %i, "
"but no encoding declared; "
"see http://python.org/dev/peps/pep-0263/ for details",
badchar, filename, tok->lineno + 1);
Py_DECREF(filename);
}
PyErr_Format(PyExc_SyntaxError,
"Non-UTF-8 code starting with '\\x%.2x' "
"in file %U on line %i, "
"but no encoding declared; "
"see http://python.org/dev/peps/pep-0263/ for details",
badchar, tok->filename, tok->lineno + 1);
return error_ret(tok);
}
#endif
@ -853,6 +848,7 @@ PyTokenizer_Free(struct tok_state *tok)
#ifndef PGEN
Py_XDECREF(tok->decoding_readline);
Py_XDECREF(tok->decoding_buffer);
Py_XDECREF(tok->filename);
#endif
if (tok->fp != NULL && tok->buf != NULL)
PyMem_FREE(tok->buf);
@ -1247,8 +1243,13 @@ indenterror(struct tok_state *tok)
return 1;
}
if (tok->altwarning) {
PySys_WriteStderr("%s: inconsistent use of tabs and spaces "
#ifdef PGEN
PySys_WriteStderr("inconsistent use of tabs and spaces "
"in indentation\n");
#else
PySys_FormatStderr("%U: inconsistent use of tabs and spaces "
"in indentation\n", tok->filename);
#endif
tok->altwarning = 0;
}
return 0;
@ -1718,6 +1719,11 @@ PyTokenizer_FindEncoding(int fd)
fclose(fp);
return NULL;
}
#ifndef PGEN
tok->filename = PyUnicode_FromString("<string>");
if (tok->filename == NULL)
goto error;
#endif
while (tok->lineno < 2 && tok->done == E_OK) {
PyTokenizer_Get(tok, &p_start, &p_end);
}
@ -1727,6 +1733,9 @@ PyTokenizer_FindEncoding(int fd)
if (encoding)
strcpy(encoding, tok->encoding);
}
#ifndef PGEN
error:
#endif
PyTokenizer_Free(tok);
return encoding;
}

View file

@ -40,7 +40,13 @@ struct tok_state {
int level; /* () [] {} Parentheses nesting level */
/* Used to allow free continuations inside them */
/* Stuff for checking on different tab sizes */
const char *filename; /* encoded to the filesystem encoding */
#ifndef PGEN
/* pgen doesn't have access to Python codecs, it cannot decode the input
filename. The bytes filename might be kept, but it is only used by
indenterror() and it is not really needed: pgen only compiles one file
(Grammar/Grammar). */
PyObject *filename;
#endif
int altwarning; /* Issue warning if alternate tabs don't match */
int alterror; /* Issue error if alternate tabs don't match */
int alttabsize; /* Alternate tab spacing */

View file

@ -62,6 +62,7 @@ static PyObject *run_mod(mod_ty, const char *, PyObject *, PyObject *,
static PyObject *run_pyc_file(FILE *, const char *, PyObject *, PyObject *,
PyCompilerFlags *);
static void err_input(perrdetail *);
static void err_free(perrdetail *);
static void initsigs(void);
static void call_py_exitfuncs(void);
static void wait_for_thread_shutdown(void);
@ -1887,12 +1888,13 @@ PyParser_ASTFromString(const char *s, const char *filename, int start,
flags->cf_flags |= iflags & PyCF_MASK;
mod = PyAST_FromNode(n, flags, filename, arena);
PyNode_Free(n);
return mod;
}
else {
err_input(&err);
return NULL;
mod = NULL;
}
err_free(&err);
return mod;
}
mod_ty
@ -1917,14 +1919,15 @@ PyParser_ASTFromFile(FILE *fp, const char *filename, const char* enc,
flags->cf_flags |= iflags & PyCF_MASK;
mod = PyAST_FromNode(n, flags, filename, arena);
PyNode_Free(n);
return mod;
}
else {
err_input(&err);
if (errcode)
*errcode = err.error;
return NULL;
mod = NULL;
}
err_free(&err);
return mod;
}
/* Simplified interface to parsefile -- return node or set exception */
@ -1938,6 +1941,7 @@ PyParser_SimpleParseFileFlags(FILE *fp, const char *filename, int start, int fla
start, NULL, NULL, &err, flags);
if (n == NULL)
err_input(&err);
err_free(&err);
return n;
}
@ -1952,6 +1956,7 @@ PyParser_SimpleParseStringFlags(const char *str, int start, int flags)
start, &err, flags);
if (n == NULL)
err_input(&err);
err_free(&err);
return n;
}
@ -1964,6 +1969,7 @@ PyParser_SimpleParseStringFlagsFilename(const char *str, const char *filename,
&_PyParser_Grammar, start, &err, flags);
if (n == NULL)
err_input(&err);
err_free(&err);
return n;
}
@ -1976,12 +1982,24 @@ PyParser_SimpleParseStringFilename(const char *str, const char *filename, int st
/* May want to move a more generalized form of this to parsetok.c or
even parser modules. */
void
PyParser_ClearError(perrdetail *err)
{
err_free(err);
}
void
PyParser_SetError(perrdetail *err)
{
err_input(err);
}
static void
err_free(perrdetail *err)
{
Py_CLEAR(err->filename);
}
/* Set the error appropriate to the given input error code (see errcode.h) */
static void
@ -1989,7 +2007,6 @@ err_input(perrdetail *err)
{
PyObject *v, *w, *errtype, *errtext;
PyObject *msg_obj = NULL;
PyObject *filename;
char *msg = NULL;
errtype = PyExc_SyntaxError;
@ -2075,17 +2092,8 @@ err_input(perrdetail *err)
errtext = PyUnicode_DecodeUTF8(err->text, strlen(err->text),
"replace");
}
if (err->filename != NULL)
filename = PyUnicode_DecodeFSDefault(err->filename);
else {
Py_INCREF(Py_None);
filename = Py_None;
}
if (filename != NULL)
v = Py_BuildValue("(NiiN)", filename,
err->lineno, err->offset, errtext);
else
v = NULL;
v = Py_BuildValue("(OiiN)", err->filename,
err->lineno, err->offset, errtext);
if (v != NULL) {
if (msg_obj)
w = Py_BuildValue("(OO)", msg_obj, v);