diff --git a/Doc/c-api/concrete.rst b/Doc/c-api/concrete.rst index 6cd4be49ce7..e48056c8e15 100644 --- a/Doc/c-api/concrete.rst +++ b/Doc/c-api/concrete.rst @@ -2425,6 +2425,12 @@ change in future releases of Python. pointer, *fp*. The function *close* will be called when the file should be closed. Return *NULL* on failure. +.. cfunction:: PyFile_FromFileEx(FILE *fp, char *name, char *mode, int (*close)(FILE *), int buffering, char *encoding, char *newline) + + Create a new :ctype:`PyFileObject` from the already-open standard C file + pointer, *fp*. The functions works similar to *PyFile_FromFile* but takes + optional arguments for *buffering*, *encoding* and *newline*. Use -1 resp. + *NULL* for default values. .. cfunction:: FILE* PyFile_AsFile(PyObject *p) diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index 4555301f112..5c3fc990d60 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -338,6 +338,15 @@ PyFile_FromFile:char*:name:: PyFile_FromFile:char*:mode:: PyFile_FromFile:int(*:close):: +PyFile_FromFileEx:PyObject*::+1: +PyFile_FromFileEx:FILE*:fp:: +PyFile_FromFileEx:char*:name:: +PyFile_FromFileEx:char*:mode:: +PyFile_FromFileEx:int(*:close):: +PyFile_FromFileEx:int:buffering:: +PyFile_FromFileEx:char*:encoding:: +PyFile_FromFileEx:char*:newline:: + PyFile_FromString:PyObject*::+1: PyFile_FromString:char*:name:: PyFile_FromString:char*:mode:: diff --git a/Include/fileobject.h b/Include/fileobject.h index 2d8c397989a..b65d9847d45 100644 --- a/Include/fileobject.h +++ b/Include/fileobject.h @@ -9,6 +9,9 @@ extern "C" { #define PY_STDIOTEXTMODE "b" PyAPI_FUNC(PyObject *) PyFile_FromFile(FILE *, char *, char *, int (*)(FILE*)); +PyAPI_FUNC(PyObject *) PyFile_FromFileEx(FILE *, char *, char *, + int (*)(FILE *), int, char *, + char *); PyAPI_FUNC(PyObject *) PyFile_GetLine(PyObject *, int); PyAPI_FUNC(int) PyFile_WriteObject(PyObject *, PyObject *, int); PyAPI_FUNC(int) PyFile_WriteString(const char *, PyObject *); diff --git a/Lib/io.py b/Lib/io.py index e7533b5cfcd..07846bfb83f 100644 --- a/Lib/io.py +++ b/Lib/io.py @@ -178,6 +178,18 @@ def open(file, mode="r", buffering=None, encoding=None, newline=None): return text +class OpenWrapper: + """Wrapper for __builtin__.open + + Trick so that open won't become a bound method when stored + as a class variable (as dumbdbm does). + + See initstdio() in Python/pythonrun.c. + """ + def __new__(cls, *args, **kwargs): + return open(*args, **kwargs) + + class UnsupportedOperation(ValueError, IOError): pass diff --git a/Lib/site.py b/Lib/site.py index 53e859ea857..f1db89c9f0b 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -402,23 +402,6 @@ def execsitecustomize(): (err.__class__.__name__, err)) -def installnewio(): - """Install new I/O library as default.""" - import io - # Hack to avoid a nasty recursion issue when Python is invoked - # in verbose mode: pre-import the Latin-1 and UTF-8 codecs - from encodings import latin_1, utf_8 - # Trick so that open won't become a bound method when stored - # as a class variable (as dumbdbm does) - class open: - def __new__(cls, *args, **kwds): - return io.open(*args, **kwds) - __builtin__.open = open - sys.__stdin__ = sys.stdin = io.open(0, "r", newline='\n') - sys.__stdout__ = sys.stdout = io.open(1, "w", newline='\n') - sys.__stderr__ = sys.stderr = io.open(2, "w", newline='\n') - - def main(): abs__file__() paths_in_sys = removeduppaths() @@ -433,7 +416,6 @@ def main(): sethelper() aliasmbcs() setencoding() - installnewio() execsitecustomize() # Remove sys.setdefaultencoding() so that users cannot change the # encoding after initialization. The test for presence is needed when diff --git a/Lib/test/test_imp.py b/Lib/test/test_imp.py index 62b14e071bd..87efc33415e 100644 --- a/Lib/test/test_imp.py +++ b/Lib/test/test_imp.py @@ -38,9 +38,16 @@ def testLock(self): self.fail("release_lock() without lock should raise " "RuntimeError") +class ImportTests(unittest.TestCase): + + def test_find_module_encoding(self): + fd = imp.find_module("heapq")[0] + self.assertEqual(fd.encoding, "iso-8859-1") + def test_main(): test_support.run_unittest( LockTests, + ImportTests, ) if __name__ == "__main__": diff --git a/Objects/fileobject.c b/Objects/fileobject.c index 02675f5f3ef..b6d200d0c21 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -28,22 +28,32 @@ extern "C" { PyObject * PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *)) { - PyObject *io, *stream, *nameobj; + return PyFile_FromFileEx(fp, name, mode, close, -1, NULL, NULL); +} + +PyObject * +PyFile_FromFileEx(FILE *fp, char *name, char *mode, int (*close)(FILE *), + int buffering, char *encoding, char *newline) +{ + PyObject *io, *stream, *nameobj=NULL; io = PyImport_ImportModule("io"); if (io == NULL) return NULL; - stream = PyObject_CallMethod(io, "open", "is", fileno(fp), mode); - Py_DECREF(io); + stream = PyObject_CallMethod(io, "open", "isiss", fileno(fp), mode, + buffering, encoding, newline); + Py_DECREF(io); if (stream == NULL) return NULL; - nameobj = PyUnicode_FromString(name); - if (nameobj == NULL) - PyErr_Clear(); - else { - if (PyObject_SetAttrString(stream, "name", nameobj) < 0) + if (name != NULL) { + nameobj = PyUnicode_FromString(name); + if (nameobj == NULL) PyErr_Clear(); - Py_DECREF(nameobj); + else { + if (PyObject_SetAttrString(stream, "name", nameobj) < 0) + PyErr_Clear(); + Py_DECREF(nameobj); + } } return stream; } diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 8f67e0e62ab..0ccd02b58d1 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1601,7 +1601,28 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset) } #endif - +/* Get -*- encoding -*- from a Python file + + PyTokenizer_FindEncoding returns NULL when it can't find the encoding in + the first or second line of the file. In this case the encoding is + PyUnicode_GetDefaultEncoding(). +*/ +char * +PyTokenizer_FindEncoding(FILE *fp) { + struct tok_state *tok; + char *p_start=NULL, *p_end=NULL; + + if ((tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL)) == NULL) { + rewind(fp); + return NULL; + } + while(((tok->lineno <= 2) && (tok->done == E_OK))) { + PyTokenizer_Get(tok, &p_start, &p_end); + } + + rewind(fp); + return tok->encoding; +} #ifdef Py_DEBUG diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 72982bde47e..a66d78e7d97 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -67,6 +67,7 @@ extern void PyTokenizer_Free(struct tok_state *); extern int PyTokenizer_Get(struct tok_state *, char **, char **); extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset); +extern char * PyTokenizer_FindEncoding(FILE *fp); #ifdef __cplusplus } diff --git a/Python/import.c b/Python/import.c index 21dcbd45d7a..323b55a540e 100644 --- a/Python/import.c +++ b/Python/import.c @@ -91,6 +91,9 @@ static PyObject *extensions = NULL; /* This table is defined in config.c: */ extern struct _inittab _PyImport_Inittab[]; +/* Method from Parser/tokenizer.c */ +extern char * PyTokenizer_FindEncoding(FILE *fp); + struct _inittab *PyImport_Inittab = _PyImport_Inittab; /* these tables define the module suffixes that Python recognizes */ @@ -2558,6 +2561,7 @@ call_find_module(char *name, PyObject *path) struct filedescr *fdp; char pathname[MAXPATHLEN+1]; FILE *fp = NULL; + char *encoding = NULL; pathname[0] = '\0'; if (path == Py_None) @@ -2566,7 +2570,14 @@ call_find_module(char *name, PyObject *path) if (fdp == NULL) return NULL; if (fp != NULL) { - fob = PyFile_FromFile(fp, pathname, fdp->mode, fclose); + if (strchr(fdp->mode, 'b') == NULL) { + /* Python text file, get encoding from tokenizer */ + encoding = PyTokenizer_FindEncoding(fp); + encoding = (encoding != NULL) ? encoding : + (char*)PyUnicode_GetDefaultEncoding(); + } + fob = PyFile_FromFileEx(fp, pathname, fdp->mode, fclose, -1, + (char*)encoding, NULL); if (fob == NULL) { fclose(fp); return NULL; diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 4e239c94acc..f641547afd5 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -51,6 +51,7 @@ extern grammar _PyParser_Grammar; /* From graminit.c */ /* Forward */ static void initmain(void); static void initsite(void); +static int initstdio(void); static PyObject *run_mod(mod_ty, const char *, PyObject *, PyObject *, PyCompilerFlags *, PyArena *); static PyObject *run_pyc_file(FILE *, const char *, PyObject *, PyObject *, @@ -241,6 +242,9 @@ Py_InitializeEx(int install_sigs) initsigs(); /* Signal handling stuff, including initintr() */ initmain(); /* Module __main__ */ + if (initstdio() < 0) + Py_FatalError( + "Py_Initialize: can't initialize sys standard streams"); if (!Py_NoSiteFlag) initsite(); /* Module site */ @@ -676,6 +680,81 @@ initsite(void) } } +/* Initialize sys.stdin, stdout, stderr and __builtin__.open */ +static int +initstdio(void) +{ + PyObject *iomod = NULL, *wrapper; + PyObject *bimod = NULL; + PyObject *m; + PyObject *std = NULL; + int status = 0; + + /* Hack to avoid a nasty recursion issue when Python is invoked + in verbose mode: pre-import the Latin-1 and UTF-8 codecs */ + if ((m = PyImport_ImportModule("encodings.utf_8")) == NULL) { + goto error; + } + Py_DECREF(m); + + if (!(m = PyImport_ImportModule("encodings.latin_1"))) { + goto error; + } + Py_DECREF(m); + + if (!(bimod = PyImport_ImportModule("__builtin__"))) { + goto error; + } + + if (!(iomod = PyImport_ImportModule("io"))) { + goto error; + } + if (!(wrapper = PyObject_GetAttrString(iomod, "OpenWrapper"))) { + goto error; + } + + /* Set __builtin__.open */ + if (PyObject_SetAttrString(bimod, "open", wrapper) == -1) { + goto error; + } + + /* Set sys.stdin */ + if (!(std = PyFile_FromFileEx(stdin, "", "r", fclose, -1, + NULL, "\n"))) { + goto error; + } + PySys_SetObject("__stdin__", std); + PySys_SetObject("stdin", std); + Py_DECREF(std); + + /* Set sys.stdout */ + if (!(std = PyFile_FromFileEx(stdout, "", "w", fclose, -1, + NULL, "\n"))) { + goto error; + } + PySys_SetObject("__stdout__", std); + PySys_SetObject("stdout", std); + Py_DECREF(std); + + /* Set sys.stderr */ + if (!(std = PyFile_FromFileEx(stderr, "", "w", fclose, -1, + NULL, "\n"))) { + goto error; + } + PySys_SetObject("__stderr__", std); + PySys_SetObject("stderr", std); + Py_DECREF(std); + + if (0) { + error: + status = -1; + } + + Py_XDECREF(bimod); + Py_XDECREF(iomod); + return status; +} + /* Parse input from a file and execute it */ int @@ -1146,10 +1225,10 @@ PyErr_Display(PyObject *exception, PyObject *value, PyObject *tb) int err = 0; PyObject *f = PySys_GetObject("stderr"); Py_INCREF(value); - if (f == NULL) + if (f == NULL) { _PyObject_Dump(value); - if (f == NULL) fprintf(stderr, "lost sys.stderr\n"); + } else { fflush(stdout); if (tb && tb != Py_None) @@ -1589,6 +1668,9 @@ void Py_FatalError(const char *msg) { fprintf(stderr, "Fatal Python error: %s\n", msg); + if (PyErr_Occurred()) { + PyErr_Print(); + } #ifdef MS_WINDOWS OutputDebugString("Fatal Python error: "); OutputDebugString(msg);