From ef82d2fdfe1aba18e29abbd59b22d19d490e9fca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20v=2E=20L=C3=B6wis?= Date: Sun, 27 Jun 2004 16:51:46 +0000 Subject: [PATCH] Patch #923098: Share interned strings in marshal. --- Doc/api/utilities.tex | 17 ++++++-- Doc/lib/libmarshal.tex | 18 ++++++++- Include/marshal.h | 8 ++-- Misc/NEWS | 3 ++ Python/import.c | 18 ++++----- Python/marshal.c | 89 +++++++++++++++++++++++++++++++++++------- 6 files changed, 122 insertions(+), 31 deletions(-) diff --git a/Doc/api/utilities.tex b/Doc/api/utilities.tex index f4fa899ec07..bfcfe2759a2 100644 --- a/Doc/api/utilities.tex +++ b/Doc/api/utilities.tex @@ -283,20 +283,31 @@ data must be opened in binary mode. Numeric values are stored with the least significant byte first. -\begin{cfuncdesc}{void}{PyMarshal_WriteLongToFile}{long value, FILE *file} +The module supports two versions of the data format: version 0 is the +historical version, version 1 (new in Python 2.4) shares interned +strings in the file, and upon unmarshalling. \var{Py_MARSHAL_VERSION} +indicates the current file format (currently 1). + +\begin{cfuncdesc}{void}{PyMarshal_WriteLongToFile}{long value, FILE *file, int version} Marshal a \ctype{long} integer, \var{value}, to \var{file}. This will only write the least-significant 32 bits of \var{value}; regardless of the size of the native \ctype{long} type. + + \versionchanged[\var{version} indicates the file format]{2.4} \end{cfuncdesc} \begin{cfuncdesc}{void}{PyMarshal_WriteObjectToFile}{PyObject *value, - FILE *file} + FILE *file, int version} Marshal a Python object, \var{value}, to \var{file}. + + \versionchanged[\var{version} indicates the file format]{2.4} \end{cfuncdesc} -\begin{cfuncdesc}{PyObject*}{PyMarshal_WriteObjectToString}{PyObject *value} +\begin{cfuncdesc}{PyObject*}{PyMarshal_WriteObjectToString}{PyObject *value, int version} Return a string object containing the marshalled representation of \var{value}. + + \versionchanged[\var{version} indicates the file format]{2.4} \end{cfuncdesc} The following functions allow marshalled values to be read back in. diff --git a/Doc/lib/libmarshal.tex b/Doc/lib/libmarshal.tex index f597e84aceb..53ca6688562 100644 --- a/Doc/lib/libmarshal.tex +++ b/Doc/lib/libmarshal.tex @@ -73,6 +73,9 @@ The module defines these functions: a \exception{ValueError} exception is raised --- but garbage data will also be written to the file. The object will not be properly read back by \function{load()}. + + \versionadded[The \var{version} argument indicates the data + format that \code{dumps} should use.]{2.4} \end{funcdesc} \begin{funcdesc}{load}{file} @@ -86,11 +89,14 @@ The module defines these functions: \code{None} for the unmarshallable type.} \end{funcdesc} -\begin{funcdesc}{dumps}{value} +\begin{funcdesc}{dumps}{value\optional{, version}} Return the string that would be written to a file by \code{dump(\var{value}, \var{file})}. The value must be a supported type. Raise a \exception{ValueError} exception if value has (or contains an object that has) an unsupported type. + + \versionadded[The \var{version} argument indicates the data + format that \code{dumps} should use.]{2.4} \end{funcdesc} \begin{funcdesc}{loads}{string} @@ -98,3 +104,13 @@ The module defines these functions: \exception{EOFError}, \exception{ValueError} or \exception{TypeError}. Extra characters in the string are ignored. \end{funcdesc} + +In addition, the following constants are defined: + +\begin{datadesc}{version} + Indicates the format that the module uses. Version 0 is the + historical format, version 1 (added in Python 2.4) shares + interned strings. The current version is 1. + + \versionadded{2.4} +\end{datadesc} \ No newline at end of file diff --git a/Include/marshal.h b/Include/marshal.h index f12309371b0..fc491dda9aa 100644 --- a/Include/marshal.h +++ b/Include/marshal.h @@ -7,9 +7,11 @@ extern "C" { #endif -PyAPI_FUNC(void) PyMarshal_WriteLongToFile(long, FILE *); -PyAPI_FUNC(void) PyMarshal_WriteObjectToFile(PyObject *, FILE *); -PyAPI_FUNC(PyObject *) PyMarshal_WriteObjectToString(PyObject *); +#define Py_MARSHAL_VERSION 1 + +PyAPI_FUNC(void) PyMarshal_WriteLongToFile(long, FILE *, int); +PyAPI_FUNC(void) PyMarshal_WriteObjectToFile(PyObject *, FILE *, int); +PyAPI_FUNC(PyObject *) PyMarshal_WriteObjectToString(PyObject *, int); PyAPI_FUNC(long) PyMarshal_ReadLongFromFile(FILE *); PyAPI_FUNC(int) PyMarshal_ReadShortFromFile(FILE *); diff --git a/Misc/NEWS b/Misc/NEWS index f24adc55a98..41a79c87511 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,9 @@ What's New in Python 2.4 alpha 1? Core and builtins ----------------- +- marshal now shares interned strings. This change introduces + a new .pyc magic. + - Bug #966623. classes created with type() in an exec(, {}) don't have a __module__, but code in typeobject assumed it would always be there. diff --git a/Python/import.c b/Python/import.c index d4c9e2e40ac..e76ada97838 100644 --- a/Python/import.c +++ b/Python/import.c @@ -26,9 +26,9 @@ extern time_t PyOS_GetLastModificationTime(char *, FILE *); a .pyc file in text mode the magic number will be wrong; also, the Apple MPW compiler swaps their values, botching string constants. - Apparently, there was a distinction made between even and odd - bytecodes that is related to Unicode. The details aren't clear, - but the magic number has been odd for a long time. + The magic numbers must be spaced apart atleast 2 values, as the + -U interpeter flag will cause MAGIC+1 being used. They have been + odd numbers for some time now. There were a variety of old schemes for setting the magic number. The current working scheme is to increment the previous value by @@ -47,9 +47,9 @@ extern time_t PyOS_GetLastModificationTime(char *, FILE *); Python 2.3a0: 62011 Python 2.3a0: 62021 Python 2.3a0: 62011 (!) - Python 2.4a0: 62031 + Python 2.4a0: 62041 */ -#define MAGIC (62031 | ((long)'\r'<<16) | ((long)'\n'<<24)) +#define MAGIC (62041 | ((long)'\r'<<16) | ((long)'\n'<<24)) /* Magic word as global; note that _PyImport_Init() can change the value of this global to accommodate for alterations of how the @@ -797,10 +797,10 @@ write_compiled_module(PyCodeObject *co, char *cpathname, long mtime) "# can't create %s\n", cpathname); return; } - PyMarshal_WriteLongToFile(pyc_magic, fp); + PyMarshal_WriteLongToFile(pyc_magic, fp, Py_MARSHAL_VERSION); /* First write a 0 for mtime */ - PyMarshal_WriteLongToFile(0L, fp); - PyMarshal_WriteObjectToFile((PyObject *)co, fp); + PyMarshal_WriteLongToFile(0L, fp, Py_MARSHAL_VERSION); + PyMarshal_WriteObjectToFile((PyObject *)co, fp, Py_MARSHAL_VERSION); if (fflush(fp) != 0 || ferror(fp)) { if (Py_VerboseFlag) PySys_WriteStderr("# can't write %s\n", cpathname); @@ -811,7 +811,7 @@ write_compiled_module(PyCodeObject *co, char *cpathname, long mtime) } /* Now write the true mtime */ fseek(fp, 4L, 0); - PyMarshal_WriteLongToFile(mtime, fp); + PyMarshal_WriteLongToFile(mtime, fp, Py_MARSHAL_VERSION); fflush(fp); fclose(fp); if (Py_VerboseFlag) diff --git a/Python/marshal.c b/Python/marshal.c index 901b9c69742..590e1ca3911 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -27,6 +27,8 @@ #define TYPE_COMPLEX 'x' #define TYPE_LONG 'l' #define TYPE_STRING 's' +#define TYPE_INTERNED 't' +#define TYPE_STRINGREF 'R' #define TYPE_TUPLE '(' #define TYPE_LIST '[' #define TYPE_DICT '{' @@ -42,6 +44,7 @@ typedef struct { PyObject *str; char *ptr; char *end; + PyObject *strings; /* dict on marshal, list on unmarshal */ } WFILE; #define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \ @@ -189,7 +192,24 @@ w_object(PyObject *v, WFILE *p) } #endif else if (PyString_Check(v)) { - w_byte(TYPE_STRING, p); + if (p->strings && PyString_CHECK_INTERNED(v)) { + PyObject *o = PyDict_GetItem(p->strings, v); + if (o) { + long w = PyInt_AsLong(o); + w_byte(TYPE_STRINGREF, p); + w_long(w, p); + goto exit; + } + else { + o = PyInt_FromLong(PyDict_Size(p->strings)); + PyDict_SetItem(p->strings, v, o); + Py_DECREF(o); + w_byte(TYPE_INTERNED, p); + } + } + else { + w_byte(TYPE_STRING, p); + } n = PyString_GET_SIZE(v); w_long((long)n, p); w_string(PyString_AS_STRING(v), n, p); @@ -269,28 +289,32 @@ w_object(PyObject *v, WFILE *p) w_byte(TYPE_UNKNOWN, p); p->error = 1; } - + exit: p->depth--; } +/* version currently has no effect for writing longs. */ void -PyMarshal_WriteLongToFile(long x, FILE *fp) +PyMarshal_WriteLongToFile(long x, FILE *fp, int version) { WFILE wf; wf.fp = fp; wf.error = 0; wf.depth = 0; + wf.strings = NULL; w_long(x, &wf); } void -PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp) +PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version) { WFILE wf; wf.fp = fp; wf.error = 0; wf.depth = 0; + wf.strings = (version > 0) ? PyDict_New() : NULL; w_object(x, &wf); + Py_XDECREF(wf.strings); } typedef WFILE RFILE; /* Same struct with different invariants */ @@ -491,6 +515,7 @@ r_object(RFILE *p) } #endif + case TYPE_INTERNED: case TYPE_STRING: n = r_long(p); if (n < 0) { @@ -506,6 +531,16 @@ r_object(RFILE *p) "EOF read where object expected"); } } + if (type == TYPE_INTERNED) { + PyString_InternInPlace(&v); + PyList_Append(p->strings, v); + } + return v; + + case TYPE_STRINGREF: + n = r_long(p); + v = PyList_GET_ITEM(p->strings, n); + Py_INCREF(v); return v; #ifdef Py_USING_UNICODE @@ -673,6 +708,7 @@ PyMarshal_ReadShortFromFile(FILE *fp) { RFILE rf; rf.fp = fp; + rf.strings = NULL; return r_short(&rf); } @@ -681,6 +717,7 @@ PyMarshal_ReadLongFromFile(FILE *fp) { RFILE rf; rf.fp = fp; + rf.strings = NULL; return r_long(&rf); } @@ -747,22 +784,30 @@ PyObject * PyMarshal_ReadObjectFromFile(FILE *fp) { RFILE rf; + PyObject *result; rf.fp = fp; - return read_object(&rf); + rf.strings = PyList_New(0); + result = r_object(&rf); + Py_DECREF(rf.strings); + return result; } PyObject * PyMarshal_ReadObjectFromString(char *str, int len) { RFILE rf; + PyObject *result; rf.fp = NULL; rf.ptr = str; rf.end = str + len; - return read_object(&rf); + rf.strings = PyList_New(0); + result = r_object(&rf); + Py_DECREF(rf.strings); + return result; } PyObject * -PyMarshal_WriteObjectToString(PyObject *x) /* wrs_object() */ +PyMarshal_WriteObjectToString(PyObject *x, int version) { WFILE wf; wf.fp = NULL; @@ -773,7 +818,9 @@ PyMarshal_WriteObjectToString(PyObject *x) /* wrs_object() */ wf.end = wf.ptr + PyString_Size(wf.str); wf.error = 0; wf.depth = 0; + wf.strings = (version > 0) ? PyDict_New() : NULL; w_object(x, &wf); + Py_XDECREF(wf.strings); if (wf.str != NULL) _PyString_Resize(&wf.str, (int) (wf.ptr - @@ -796,7 +843,8 @@ marshal_dump(PyObject *self, PyObject *args) WFILE wf; PyObject *x; PyObject *f; - if (!PyArg_ParseTuple(args, "OO:dump", &x, &f)) + int version = Py_MARSHAL_VERSION; + if (!PyArg_ParseTuple(args, "OO|i:dump", &x, &f, &version)) return NULL; if (!PyFile_Check(f)) { PyErr_SetString(PyExc_TypeError, @@ -808,7 +856,9 @@ marshal_dump(PyObject *self, PyObject *args) wf.ptr = wf.end = NULL; wf.error = 0; wf.depth = 0; + wf.strings = (version > 0) ? PyDict_New() : 0; w_object(x, &wf); + Py_XDECREF(wf.strings); if (wf.error) { PyErr_SetString(PyExc_ValueError, (wf.error==1)?"unmarshallable object" @@ -823,7 +873,7 @@ static PyObject * marshal_load(PyObject *self, PyObject *args) { RFILE rf; - PyObject *f; + PyObject *f, *result; if (!PyArg_ParseTuple(args, "O:load", &f)) return NULL; if (!PyFile_Check(f)) { @@ -832,16 +882,20 @@ marshal_load(PyObject *self, PyObject *args) return NULL; } rf.fp = PyFile_AsFile(f); - return read_object(&rf); + rf.strings = PyList_New(0); + result = read_object(&rf); + Py_DECREF(rf.strings); + return result; } static PyObject * marshal_dumps(PyObject *self, PyObject *args) { PyObject *x; - if (!PyArg_ParseTuple(args, "O:dumps", &x)) + int version = Py_MARSHAL_VERSION; + if (!PyArg_ParseTuple(args, "O|i:dumps", &x, version)) return NULL; - return PyMarshal_WriteObjectToString(x); + return PyMarshal_WriteObjectToString(x, version); } static PyObject * @@ -850,12 +904,16 @@ marshal_loads(PyObject *self, PyObject *args) RFILE rf; char *s; int n; - if (!PyArg_ParseTuple(args, "s#:loads", &s, &n)) + PyObject* result; + if (!PyArg_ParseTuple(args, "s#|i:loads", &s, &n)) return NULL; rf.fp = NULL; rf.ptr = s; rf.end = s + n; - return read_object(&rf); + rf.strings = PyList_New(0); + result = read_object(&rf); + Py_DECREF(rf.strings); + return result; } static PyMethodDef marshal_methods[] = { @@ -869,5 +927,6 @@ static PyMethodDef marshal_methods[] = { PyMODINIT_FUNC PyMarshal_Init(void) { - (void) Py_InitModule("marshal", marshal_methods); + PyObject *mod = Py_InitModule("marshal", marshal_methods); + PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION); }