Patch #923098: Share interned strings in marshal.

This commit is contained in:
Martin v. Löwis 2004-06-27 16:51:46 +00:00
parent 8d97e33bb7
commit ef82d2fdfe
6 changed files with 122 additions and 31 deletions

View file

@ -283,20 +283,31 @@ data must be opened in binary mode.
Numeric values are stored with the least significant byte first. Numeric values are stored with the least significant byte first.
\begin{cfuncdesc}{void}{PyMarshal_WriteLongToFile}{long value, FILE *file} The module supports two versions of the data format: version 0 is the
historical version, version 1 (new in Python 2.4) shares interned
strings in the file, and upon unmarshalling. \var{Py_MARSHAL_VERSION}
indicates the current file format (currently 1).
\begin{cfuncdesc}{void}{PyMarshal_WriteLongToFile}{long value, FILE *file, int version}
Marshal a \ctype{long} integer, \var{value}, to \var{file}. This Marshal a \ctype{long} integer, \var{value}, to \var{file}. This
will only write the least-significant 32 bits of \var{value}; will only write the least-significant 32 bits of \var{value};
regardless of the size of the native \ctype{long} type. regardless of the size of the native \ctype{long} type.
\versionchanged[\var{version} indicates the file format]{2.4}
\end{cfuncdesc} \end{cfuncdesc}
\begin{cfuncdesc}{void}{PyMarshal_WriteObjectToFile}{PyObject *value, \begin{cfuncdesc}{void}{PyMarshal_WriteObjectToFile}{PyObject *value,
FILE *file} FILE *file, int version}
Marshal a Python object, \var{value}, to \var{file}. Marshal a Python object, \var{value}, to \var{file}.
\versionchanged[\var{version} indicates the file format]{2.4}
\end{cfuncdesc} \end{cfuncdesc}
\begin{cfuncdesc}{PyObject*}{PyMarshal_WriteObjectToString}{PyObject *value} \begin{cfuncdesc}{PyObject*}{PyMarshal_WriteObjectToString}{PyObject *value, int version}
Return a string object containing the marshalled representation of Return a string object containing the marshalled representation of
\var{value}. \var{value}.
\versionchanged[\var{version} indicates the file format]{2.4}
\end{cfuncdesc} \end{cfuncdesc}
The following functions allow marshalled values to be read back in. The following functions allow marshalled values to be read back in.

View file

@ -73,6 +73,9 @@ The module defines these functions:
a \exception{ValueError} exception is raised --- but garbage data a \exception{ValueError} exception is raised --- but garbage data
will also be written to the file. The object will not be properly will also be written to the file. The object will not be properly
read back by \function{load()}. read back by \function{load()}.
\versionadded[The \var{version} argument indicates the data
format that \code{dumps} should use.]{2.4}
\end{funcdesc} \end{funcdesc}
\begin{funcdesc}{load}{file} \begin{funcdesc}{load}{file}
@ -86,11 +89,14 @@ The module defines these functions:
\code{None} for the unmarshallable type.} \code{None} for the unmarshallable type.}
\end{funcdesc} \end{funcdesc}
\begin{funcdesc}{dumps}{value} \begin{funcdesc}{dumps}{value\optional{, version}}
Return the string that would be written to a file by Return the string that would be written to a file by
\code{dump(\var{value}, \var{file})}. The value must be a supported \code{dump(\var{value}, \var{file})}. The value must be a supported
type. Raise a \exception{ValueError} exception if value has (or type. Raise a \exception{ValueError} exception if value has (or
contains an object that has) an unsupported type. contains an object that has) an unsupported type.
\versionadded[The \var{version} argument indicates the data
format that \code{dumps} should use.]{2.4}
\end{funcdesc} \end{funcdesc}
\begin{funcdesc}{loads}{string} \begin{funcdesc}{loads}{string}
@ -98,3 +104,13 @@ The module defines these functions:
\exception{EOFError}, \exception{ValueError} or \exception{EOFError}, \exception{ValueError} or
\exception{TypeError}. Extra characters in the string are ignored. \exception{TypeError}. Extra characters in the string are ignored.
\end{funcdesc} \end{funcdesc}
In addition, the following constants are defined:
\begin{datadesc}{version}
Indicates the format that the module uses. Version 0 is the
historical format, version 1 (added in Python 2.4) shares
interned strings. The current version is 1.
\versionadded{2.4}
\end{datadesc}

View file

@ -7,9 +7,11 @@
extern "C" { extern "C" {
#endif #endif
PyAPI_FUNC(void) PyMarshal_WriteLongToFile(long, FILE *); #define Py_MARSHAL_VERSION 1
PyAPI_FUNC(void) PyMarshal_WriteObjectToFile(PyObject *, FILE *);
PyAPI_FUNC(PyObject *) PyMarshal_WriteObjectToString(PyObject *); PyAPI_FUNC(void) PyMarshal_WriteLongToFile(long, FILE *, int);
PyAPI_FUNC(void) PyMarshal_WriteObjectToFile(PyObject *, FILE *, int);
PyAPI_FUNC(PyObject *) PyMarshal_WriteObjectToString(PyObject *, int);
PyAPI_FUNC(long) PyMarshal_ReadLongFromFile(FILE *); PyAPI_FUNC(long) PyMarshal_ReadLongFromFile(FILE *);
PyAPI_FUNC(int) PyMarshal_ReadShortFromFile(FILE *); PyAPI_FUNC(int) PyMarshal_ReadShortFromFile(FILE *);

View file

@ -12,6 +12,9 @@ What's New in Python 2.4 alpha 1?
Core and builtins Core and builtins
----------------- -----------------
- marshal now shares interned strings. This change introduces
a new .pyc magic.
- Bug #966623. classes created with type() in an exec(, {}) don't - Bug #966623. classes created with type() in an exec(, {}) don't
have a __module__, but code in typeobject assumed it would always have a __module__, but code in typeobject assumed it would always
be there. be there.

View file

@ -26,9 +26,9 @@ extern time_t PyOS_GetLastModificationTime(char *, FILE *);
a .pyc file in text mode the magic number will be wrong; also, the a .pyc file in text mode the magic number will be wrong; also, the
Apple MPW compiler swaps their values, botching string constants. Apple MPW compiler swaps their values, botching string constants.
Apparently, there was a distinction made between even and odd The magic numbers must be spaced apart atleast 2 values, as the
bytecodes that is related to Unicode. The details aren't clear, -U interpeter flag will cause MAGIC+1 being used. They have been
but the magic number has been odd for a long time. odd numbers for some time now.
There were a variety of old schemes for setting the magic number. There were a variety of old schemes for setting the magic number.
The current working scheme is to increment the previous value by The current working scheme is to increment the previous value by
@ -47,9 +47,9 @@ extern time_t PyOS_GetLastModificationTime(char *, FILE *);
Python 2.3a0: 62011 Python 2.3a0: 62011
Python 2.3a0: 62021 Python 2.3a0: 62021
Python 2.3a0: 62011 (!) Python 2.3a0: 62011 (!)
Python 2.4a0: 62031 Python 2.4a0: 62041
*/ */
#define MAGIC (62031 | ((long)'\r'<<16) | ((long)'\n'<<24)) #define MAGIC (62041 | ((long)'\r'<<16) | ((long)'\n'<<24))
/* Magic word as global; note that _PyImport_Init() can change the /* Magic word as global; note that _PyImport_Init() can change the
value of this global to accommodate for alterations of how the value of this global to accommodate for alterations of how the
@ -797,10 +797,10 @@ write_compiled_module(PyCodeObject *co, char *cpathname, long mtime)
"# can't create %s\n", cpathname); "# can't create %s\n", cpathname);
return; return;
} }
PyMarshal_WriteLongToFile(pyc_magic, fp); PyMarshal_WriteLongToFile(pyc_magic, fp, Py_MARSHAL_VERSION);
/* First write a 0 for mtime */ /* First write a 0 for mtime */
PyMarshal_WriteLongToFile(0L, fp); PyMarshal_WriteLongToFile(0L, fp, Py_MARSHAL_VERSION);
PyMarshal_WriteObjectToFile((PyObject *)co, fp); PyMarshal_WriteObjectToFile((PyObject *)co, fp, Py_MARSHAL_VERSION);
if (fflush(fp) != 0 || ferror(fp)) { if (fflush(fp) != 0 || ferror(fp)) {
if (Py_VerboseFlag) if (Py_VerboseFlag)
PySys_WriteStderr("# can't write %s\n", cpathname); PySys_WriteStderr("# can't write %s\n", cpathname);
@ -811,7 +811,7 @@ write_compiled_module(PyCodeObject *co, char *cpathname, long mtime)
} }
/* Now write the true mtime */ /* Now write the true mtime */
fseek(fp, 4L, 0); fseek(fp, 4L, 0);
PyMarshal_WriteLongToFile(mtime, fp); PyMarshal_WriteLongToFile(mtime, fp, Py_MARSHAL_VERSION);
fflush(fp); fflush(fp);
fclose(fp); fclose(fp);
if (Py_VerboseFlag) if (Py_VerboseFlag)

View file

@ -27,6 +27,8 @@
#define TYPE_COMPLEX 'x' #define TYPE_COMPLEX 'x'
#define TYPE_LONG 'l' #define TYPE_LONG 'l'
#define TYPE_STRING 's' #define TYPE_STRING 's'
#define TYPE_INTERNED 't'
#define TYPE_STRINGREF 'R'
#define TYPE_TUPLE '(' #define TYPE_TUPLE '('
#define TYPE_LIST '[' #define TYPE_LIST '['
#define TYPE_DICT '{' #define TYPE_DICT '{'
@ -42,6 +44,7 @@ typedef struct {
PyObject *str; PyObject *str;
char *ptr; char *ptr;
char *end; char *end;
PyObject *strings; /* dict on marshal, list on unmarshal */
} WFILE; } WFILE;
#define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \ #define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \
@ -189,7 +192,24 @@ w_object(PyObject *v, WFILE *p)
} }
#endif #endif
else if (PyString_Check(v)) { else if (PyString_Check(v)) {
if (p->strings && PyString_CHECK_INTERNED(v)) {
PyObject *o = PyDict_GetItem(p->strings, v);
if (o) {
long w = PyInt_AsLong(o);
w_byte(TYPE_STRINGREF, p);
w_long(w, p);
goto exit;
}
else {
o = PyInt_FromLong(PyDict_Size(p->strings));
PyDict_SetItem(p->strings, v, o);
Py_DECREF(o);
w_byte(TYPE_INTERNED, p);
}
}
else {
w_byte(TYPE_STRING, p); w_byte(TYPE_STRING, p);
}
n = PyString_GET_SIZE(v); n = PyString_GET_SIZE(v);
w_long((long)n, p); w_long((long)n, p);
w_string(PyString_AS_STRING(v), n, p); w_string(PyString_AS_STRING(v), n, p);
@ -269,28 +289,32 @@ w_object(PyObject *v, WFILE *p)
w_byte(TYPE_UNKNOWN, p); w_byte(TYPE_UNKNOWN, p);
p->error = 1; p->error = 1;
} }
exit:
p->depth--; p->depth--;
} }
/* version currently has no effect for writing longs. */
void void
PyMarshal_WriteLongToFile(long x, FILE *fp) PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
{ {
WFILE wf; WFILE wf;
wf.fp = fp; wf.fp = fp;
wf.error = 0; wf.error = 0;
wf.depth = 0; wf.depth = 0;
wf.strings = NULL;
w_long(x, &wf); w_long(x, &wf);
} }
void void
PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp) PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
{ {
WFILE wf; WFILE wf;
wf.fp = fp; wf.fp = fp;
wf.error = 0; wf.error = 0;
wf.depth = 0; wf.depth = 0;
wf.strings = (version > 0) ? PyDict_New() : NULL;
w_object(x, &wf); w_object(x, &wf);
Py_XDECREF(wf.strings);
} }
typedef WFILE RFILE; /* Same struct with different invariants */ typedef WFILE RFILE; /* Same struct with different invariants */
@ -491,6 +515,7 @@ r_object(RFILE *p)
} }
#endif #endif
case TYPE_INTERNED:
case TYPE_STRING: case TYPE_STRING:
n = r_long(p); n = r_long(p);
if (n < 0) { if (n < 0) {
@ -506,6 +531,16 @@ r_object(RFILE *p)
"EOF read where object expected"); "EOF read where object expected");
} }
} }
if (type == TYPE_INTERNED) {
PyString_InternInPlace(&v);
PyList_Append(p->strings, v);
}
return v;
case TYPE_STRINGREF:
n = r_long(p);
v = PyList_GET_ITEM(p->strings, n);
Py_INCREF(v);
return v; return v;
#ifdef Py_USING_UNICODE #ifdef Py_USING_UNICODE
@ -673,6 +708,7 @@ PyMarshal_ReadShortFromFile(FILE *fp)
{ {
RFILE rf; RFILE rf;
rf.fp = fp; rf.fp = fp;
rf.strings = NULL;
return r_short(&rf); return r_short(&rf);
} }
@ -681,6 +717,7 @@ PyMarshal_ReadLongFromFile(FILE *fp)
{ {
RFILE rf; RFILE rf;
rf.fp = fp; rf.fp = fp;
rf.strings = NULL;
return r_long(&rf); return r_long(&rf);
} }
@ -747,22 +784,30 @@ PyObject *
PyMarshal_ReadObjectFromFile(FILE *fp) PyMarshal_ReadObjectFromFile(FILE *fp)
{ {
RFILE rf; RFILE rf;
PyObject *result;
rf.fp = fp; rf.fp = fp;
return read_object(&rf); rf.strings = PyList_New(0);
result = r_object(&rf);
Py_DECREF(rf.strings);
return result;
} }
PyObject * PyObject *
PyMarshal_ReadObjectFromString(char *str, int len) PyMarshal_ReadObjectFromString(char *str, int len)
{ {
RFILE rf; RFILE rf;
PyObject *result;
rf.fp = NULL; rf.fp = NULL;
rf.ptr = str; rf.ptr = str;
rf.end = str + len; rf.end = str + len;
return read_object(&rf); rf.strings = PyList_New(0);
result = r_object(&rf);
Py_DECREF(rf.strings);
return result;
} }
PyObject * PyObject *
PyMarshal_WriteObjectToString(PyObject *x) /* wrs_object() */ PyMarshal_WriteObjectToString(PyObject *x, int version)
{ {
WFILE wf; WFILE wf;
wf.fp = NULL; wf.fp = NULL;
@ -773,7 +818,9 @@ PyMarshal_WriteObjectToString(PyObject *x) /* wrs_object() */
wf.end = wf.ptr + PyString_Size(wf.str); wf.end = wf.ptr + PyString_Size(wf.str);
wf.error = 0; wf.error = 0;
wf.depth = 0; wf.depth = 0;
wf.strings = (version > 0) ? PyDict_New() : NULL;
w_object(x, &wf); w_object(x, &wf);
Py_XDECREF(wf.strings);
if (wf.str != NULL) if (wf.str != NULL)
_PyString_Resize(&wf.str, _PyString_Resize(&wf.str,
(int) (wf.ptr - (int) (wf.ptr -
@ -796,7 +843,8 @@ marshal_dump(PyObject *self, PyObject *args)
WFILE wf; WFILE wf;
PyObject *x; PyObject *x;
PyObject *f; PyObject *f;
if (!PyArg_ParseTuple(args, "OO:dump", &x, &f)) int version = Py_MARSHAL_VERSION;
if (!PyArg_ParseTuple(args, "OO|i:dump", &x, &f, &version))
return NULL; return NULL;
if (!PyFile_Check(f)) { if (!PyFile_Check(f)) {
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
@ -808,7 +856,9 @@ marshal_dump(PyObject *self, PyObject *args)
wf.ptr = wf.end = NULL; wf.ptr = wf.end = NULL;
wf.error = 0; wf.error = 0;
wf.depth = 0; wf.depth = 0;
wf.strings = (version > 0) ? PyDict_New() : 0;
w_object(x, &wf); w_object(x, &wf);
Py_XDECREF(wf.strings);
if (wf.error) { if (wf.error) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
(wf.error==1)?"unmarshallable object" (wf.error==1)?"unmarshallable object"
@ -823,7 +873,7 @@ static PyObject *
marshal_load(PyObject *self, PyObject *args) marshal_load(PyObject *self, PyObject *args)
{ {
RFILE rf; RFILE rf;
PyObject *f; PyObject *f, *result;
if (!PyArg_ParseTuple(args, "O:load", &f)) if (!PyArg_ParseTuple(args, "O:load", &f))
return NULL; return NULL;
if (!PyFile_Check(f)) { if (!PyFile_Check(f)) {
@ -832,16 +882,20 @@ marshal_load(PyObject *self, PyObject *args)
return NULL; return NULL;
} }
rf.fp = PyFile_AsFile(f); rf.fp = PyFile_AsFile(f);
return read_object(&rf); rf.strings = PyList_New(0);
result = read_object(&rf);
Py_DECREF(rf.strings);
return result;
} }
static PyObject * static PyObject *
marshal_dumps(PyObject *self, PyObject *args) marshal_dumps(PyObject *self, PyObject *args)
{ {
PyObject *x; PyObject *x;
if (!PyArg_ParseTuple(args, "O:dumps", &x)) int version = Py_MARSHAL_VERSION;
if (!PyArg_ParseTuple(args, "O|i:dumps", &x, version))
return NULL; return NULL;
return PyMarshal_WriteObjectToString(x); return PyMarshal_WriteObjectToString(x, version);
} }
static PyObject * static PyObject *
@ -850,12 +904,16 @@ marshal_loads(PyObject *self, PyObject *args)
RFILE rf; RFILE rf;
char *s; char *s;
int n; int n;
if (!PyArg_ParseTuple(args, "s#:loads", &s, &n)) PyObject* result;
if (!PyArg_ParseTuple(args, "s#|i:loads", &s, &n))
return NULL; return NULL;
rf.fp = NULL; rf.fp = NULL;
rf.ptr = s; rf.ptr = s;
rf.end = s + n; rf.end = s + n;
return read_object(&rf); rf.strings = PyList_New(0);
result = read_object(&rf);
Py_DECREF(rf.strings);
return result;
} }
static PyMethodDef marshal_methods[] = { static PyMethodDef marshal_methods[] = {
@ -869,5 +927,6 @@ static PyMethodDef marshal_methods[] = {
PyMODINIT_FUNC PyMODINIT_FUNC
PyMarshal_Init(void) PyMarshal_Init(void)
{ {
(void) Py_InitModule("marshal", marshal_methods); PyObject *mod = Py_InitModule("marshal", marshal_methods);
PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION);
} }