Apply patch diff.txt from SF feature request

http://www.python.org/sf/444708

This adds the optional argument for str.strip
to unicode.strip too and makes it possible
to call str.strip with a unicode argument
and unicode.strip with a str argument.
This commit is contained in:
Walter Dörwald 2002-04-22 17:42:37 +00:00
parent a7cc43b9e8
commit de02bcb265
6 changed files with 243 additions and 80 deletions

View file

@ -235,17 +235,28 @@ The functions defined in this module are:
\function{joinfields()} was only used with two arguments.)
\end{funcdesc}
\begin{funcdesc}{lstrip}{s}
Return a copy of \var{s} but without leading whitespace characters.
\begin{funcdesc}{lstrip}{s\optional{, chars}}
Return a copy of the string with leading characters removed. If
\var{chars} is omitted or \code{None}, whitespace characters are
removed. If given and not \code{None}, \var{chars} must be a string;
the characters in the string will be stripped from the beginning of
the string this method is called on.
\end{funcdesc}
\begin{funcdesc}{rstrip}{s}
Return a copy of \var{s} but without trailing whitespace
characters.
\begin{funcdesc}{rstrip}{s\optional{, chars}}
Return a copy of the string with trailing characters removed. If
\var{chars} is omitted or \code{None}, whitespace characters are
removed. If given and not \code{None}, \var{chars} must be a string;
the characters in the string will be stripped from the end of the
string this method is called on.
\end{funcdesc}
\begin{funcdesc}{strip}{s}
Return a copy of \var{s} without leading or trailing whitespace.
\begin{funcdesc}{strip}{s\optional{, chars}}
Return a copy of the string with leading and trailing characters
removed. If \var{chars} is omitted or \code{None}, whitespace
characters are removed. If given and not \code{None}, \var{chars}
must be a string; the characters in the string will be stripped from
the both ends of the string this method is called on.
\end{funcdesc}
\begin{funcdesc}{swapcase}{s}

View file

@ -1040,6 +1040,13 @@ extern DL_IMPORT(int) PyUnicode_Contains(
PyObject *element /* Element string */
);
/* Externally visible for str.strip(unicode) */
extern DL_IMPORT(PyObject *) _PyUnicode_XStrip(
PyUnicodeObject *self,
int striptype,
PyObject *sepobj
);
/* === Characters Type APIs =============================================== */
/* These should not be used directly. Use the Py_UNICODE_IS* and

View file

@ -169,12 +169,18 @@ def run_method_tests(test):
test('rstrip', ' hello ', ' hello', None)
test('strip', 'hello', 'hello', None)
# strip/lstrip/rstrip with real arg
# strip/lstrip/rstrip with str arg
test('strip', 'xyzzyhelloxyzzy', 'hello', 'xyz')
test('lstrip', 'xyzzyhelloxyzzy', 'helloxyzzy', 'xyz')
test('rstrip', 'xyzzyhelloxyzzy', 'xyzzyhello', 'xyz')
test('strip', 'hello', 'hello', 'xyz')
# strip/lstrip/rstrip with unicode arg
test('strip', 'xyzzyhelloxyzzy', u'hello', u'xyz')
test('lstrip', 'xyzzyhelloxyzzy', u'helloxyzzy', u'xyz')
test('rstrip', 'xyzzyhelloxyzzy', u'xyzzyhello', u'xyz')
test('strip', 'hello', u'hello', u'xyz')
test('swapcase', 'HeLLo cOmpUteRs', 'hEllO CoMPuTErS')
test('translate', 'xyzabcdef', 'xyzxyz', transtable, 'def')

View file

@ -169,6 +169,24 @@ def __getitem__(self, i): return self.seq[i]
test('rstrip', u' hello ', u' hello')
test('strip', u'hello', u'hello')
# strip/lstrip/rstrip with None arg
test('strip', u' hello ', u'hello', None)
test('lstrip', u' hello ', u'hello ', None)
test('rstrip', u' hello ', u' hello', None)
test('strip', u'hello', u'hello', None)
# strip/lstrip/rstrip with unicode arg
test('strip', u'xyzzyhelloxyzzy', u'hello', u'xyz')
test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', u'xyz')
test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', u'xyz')
test('strip', u'hello', u'hello', u'xyz')
# strip/lstrip/rstrip with str arg
test('strip', u'xyzzyhelloxyzzy', u'hello', 'xyz')
test('lstrip', u'xyzzyhelloxyzzy', u'helloxyzzy', 'xyz')
test('rstrip', u'xyzzyhelloxyzzy', u'xyzzyhello', 'xyz')
test('strip', u'hello', u'hello', 'xyz')
test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
if 0:

View file

@ -1005,7 +1005,9 @@ static PyBufferProcs string_as_buffer = {
#define BOTHSTRIP 2
/* Arrays indexed by above */
static const char *stripname[] = {"lstrip", "rstrip", "strip"};
static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
#define STRIPNAME(i) (stripformat[i]+3)
static PyObject *
@ -1449,15 +1451,26 @@ do_argstrip(PyStringObject *self, int striptype, PyObject *args)
{
PyObject *sep = NULL;
if (!PyArg_ParseTuple(args, "|O:[lr]strip", &sep))
if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
return NULL;
if (sep != NULL && sep != Py_None) {
/* XXX What about Unicode? */
if (!PyString_Check(sep)) {
if (PyString_Check(sep))
return do_xstrip(self, striptype, sep);
else if (PyUnicode_Check(sep)) {
PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
PyObject *res;
if (uniself==NULL)
return NULL;
res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
striptype, sep);
Py_DECREF(uniself);
return res;
}
else {
PyErr_Format(PyExc_TypeError,
"%s arg must be None or string",
stripname[striptype]);
"%s arg must be None, str or unicode",
STRIPNAME(striptype));
return NULL;
}
return do_xstrip(self, striptype, sep);
@ -1468,11 +1481,12 @@ do_argstrip(PyStringObject *self, int striptype, PyObject *args)
static char strip__doc__[] =
"S.strip([sep]) -> string\n\
"S.strip([sep]) -> string or unicode\n\
\n\
Return a copy of the string S with leading and trailing\n\
whitespace removed.\n\
If sep is given and not None, remove characters in sep instead.";
If sep is given and not None, remove characters in sep instead.\n\
If sep is unicode, S will be converted to unicode before stripping";
static PyObject *
string_strip(PyStringObject *self, PyObject *args)
@ -1485,10 +1499,11 @@ string_strip(PyStringObject *self, PyObject *args)
static char lstrip__doc__[] =
"S.lstrip([sep]) -> string\n\
"S.lstrip([sep]) -> string or unicode\n\
\n\
Return a copy of the string S with leading whitespace removed.\n\
If sep is given and not None, remove characters in sep instead.";
If sep is given and not None, remove characters in sep instead.\n\
If sep is unicode, S will be converted to unicode before stripping";
static PyObject *
string_lstrip(PyStringObject *self, PyObject *args)
@ -1501,10 +1516,11 @@ string_lstrip(PyStringObject *self, PyObject *args)
static char rstrip__doc__[] =
"S.rstrip([sep]) -> string\n\
"S.rstrip([sep]) -> string or unicode\n\
\n\
Return a copy of the string S with trailing whitespace removed.\n\
If sep is given and not None, remove characters in sep instead.";
If sep is given and not None, remove characters in sep instead.\n\
If sep is unicode, S will be converted to unicode before stripping";
static PyObject *
string_rstrip(PyStringObject *self, PyObject *args)

View file

@ -3503,35 +3503,6 @@ PyObject *split(PyUnicodeObject *self,
return split_substring(self,list,substring,maxcount);
}
static
PyObject *strip(PyUnicodeObject *self,
int left,
int right)
{
Py_UNICODE *p = self->str;
int start = 0;
int end = self->length;
if (left)
while (start < end && Py_UNICODE_ISSPACE(p[start]))
start++;
if (right)
while (end > start && Py_UNICODE_ISSPACE(p[end-1]))
end--;
if (start == 0 && end == self->length && PyUnicode_CheckExact(self)) {
/* couldn't strip anything off, return original string */
Py_INCREF(self);
return (PyObject*) self;
}
return (PyObject*) PyUnicode_FromUnicode(
self->str + start,
end - start
);
}
static
PyObject *replace(PyUnicodeObject *self,
PyUnicodeObject *str1,
@ -4464,17 +4435,173 @@ unicode_lower(PyUnicodeObject *self)
return fixup(self, fixlower);
}
static char lstrip__doc__[] =
"S.lstrip() -> unicode\n\
\n\
Return a copy of the string S with leading whitespace removed.";
#define LEFTSTRIP 0
#define RIGHTSTRIP 1
#define BOTHSTRIP 2
/* Arrays indexed by above */
static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
#define STRIPNAME(i) (stripformat[i]+3)
static const Py_UNICODE *
unicode_memchr(const Py_UNICODE *s, Py_UNICODE c, size_t n)
{
int i;
for (i = 0; i<n; ++i)
if (s[i]==c)
return s+i;
return NULL;
}
/* externally visible for str.strip(unicode) */
PyObject *
_PyUnicode_XStrip(PyUnicodeObject *self, int striptype, PyObject *sepobj)
{
Py_UNICODE *s = PyUnicode_AS_UNICODE(self);
int len = PyUnicode_GET_SIZE(self);
Py_UNICODE *sep = PyUnicode_AS_UNICODE(sepobj);
int seplen = PyUnicode_GET_SIZE(sepobj);
int i, j;
i = 0;
if (striptype != RIGHTSTRIP) {
while (i < len && unicode_memchr(sep, s[i], seplen)) {
i++;
}
}
j = len;
if (striptype != LEFTSTRIP) {
do {
j--;
} while (j >= i && unicode_memchr(sep, s[j], seplen));
j++;
}
if (i == 0 && j == len && PyUnicode_CheckExact(self)) {
Py_INCREF(self);
return (PyObject*)self;
}
else
return PyUnicode_FromUnicode(s+i, j-i);
}
static PyObject *
unicode_lstrip(PyUnicodeObject *self)
do_strip(PyUnicodeObject *self, int striptype)
{
return strip(self, 1, 0);
Py_UNICODE *s = PyUnicode_AS_UNICODE(self);
int len = PyUnicode_GET_SIZE(self), i, j;
i = 0;
if (striptype != RIGHTSTRIP) {
while (i < len && Py_UNICODE_ISSPACE(s[i])) {
i++;
}
}
j = len;
if (striptype != LEFTSTRIP) {
do {
j--;
} while (j >= i && Py_UNICODE_ISSPACE(s[j]));
j++;
}
if (i == 0 && j == len && PyUnicode_CheckExact(self)) {
Py_INCREF(self);
return (PyObject*)self;
}
else
return PyUnicode_FromUnicode(s+i, j-i);
}
static PyObject *
do_argstrip(PyUnicodeObject *self, int striptype, PyObject *args)
{
PyObject *sep = NULL;
if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
return NULL;
if (sep != NULL && sep != Py_None) {
if (PyUnicode_Check(sep))
return _PyUnicode_XStrip(self, striptype, sep);
else if (PyString_Check(sep)) {
PyObject *res;
sep = PyUnicode_FromObject(sep);
if (sep==NULL)
return NULL;
res = _PyUnicode_XStrip(self, striptype, sep);
Py_DECREF(sep);
return res;
}
else {
PyErr_Format(PyExc_TypeError,
"%s arg must be None, unicode or str",
STRIPNAME(striptype));
return NULL;
}
}
return do_strip(self, striptype);
}
static char strip__doc__[] =
"S.strip([sep]) -> unicode\n\
\n\
Return a copy of the string S with leading and trailing\n\
whitespace removed.\n\
If sep is given and not None, remove characters in sep instead.\n\
If sep is a str, it will be converted to unicode before stripping";
static PyObject *
unicode_strip(PyUnicodeObject *self, PyObject *args)
{
if (PyTuple_GET_SIZE(args) == 0)
return do_strip(self, BOTHSTRIP); /* Common case */
else
return do_argstrip(self, BOTHSTRIP, args);
}
static char lstrip__doc__[] =
"S.lstrip([sep]) -> unicode\n\
\n\
Return a copy of the string S with leading whitespace removed.\n\
If sep is given and not None, remove characters in sep instead.\n\
If sep is a str, it will be converted to unicode before stripping";
static PyObject *
unicode_lstrip(PyUnicodeObject *self, PyObject *args)
{
if (PyTuple_GET_SIZE(args) == 0)
return do_strip(self, LEFTSTRIP); /* Common case */
else
return do_argstrip(self, LEFTSTRIP, args);
}
static char rstrip__doc__[] =
"S.rstrip([sep]) -> unicode\n\
\n\
Return a copy of the string S with trailing whitespace removed.\n\
If sep is given and not None, remove characters in sep instead.\n\
If sep is a str, it will be converted to unicode before stripping";
static PyObject *
unicode_rstrip(PyUnicodeObject *self, PyObject *args)
{
if (PyTuple_GET_SIZE(args) == 0)
return do_strip(self, RIGHTSTRIP); /* Common case */
else
return do_argstrip(self, RIGHTSTRIP, args);
}
static PyObject*
unicode_repeat(PyUnicodeObject *str, int len)
{
@ -4677,17 +4804,6 @@ unicode_rjust(PyUnicodeObject *self, PyObject *args)
return (PyObject*) pad(self, width - self->length, 0, ' ');
}
static char rstrip__doc__[] =
"S.rstrip() -> unicode\n\
\n\
Return a copy of the string S with trailing whitespace removed.";
static PyObject *
unicode_rstrip(PyUnicodeObject *self)
{
return strip(self, 0, 1);
}
static PyObject*
unicode_slice(PyUnicodeObject *self, int start, int end)
{
@ -4783,17 +4899,6 @@ PyObject *unicode_str(PyUnicodeObject *self)
return PyUnicode_AsEncodedString((PyObject *)self, NULL, NULL);
}
static char strip__doc__[] =
"S.strip() -> unicode\n\
\n\
Return a copy of S with leading and trailing whitespace removed.";
static PyObject *
unicode_strip(PyUnicodeObject *self)
{
return strip(self, 1, 1);
}
static char swapcase__doc__[] =
"S.swapcase() -> unicode\n\
\n\
@ -4966,14 +5071,14 @@ static PyMethodDef unicode_methods[] = {
{"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__},
{"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__},
{"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__},
{"lstrip", (PyCFunction) unicode_lstrip, METH_NOARGS, lstrip__doc__},
{"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS, lstrip__doc__},
/* {"maketrans", (PyCFunction) unicode_maketrans, METH_VARARGS, maketrans__doc__}, */
{"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__},
{"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__},
{"rjust", (PyCFunction) unicode_rjust, METH_VARARGS, rjust__doc__},
{"rstrip", (PyCFunction) unicode_rstrip, METH_NOARGS, rstrip__doc__},
{"rstrip", (PyCFunction) unicode_rstrip, METH_VARARGS, rstrip__doc__},
{"splitlines", (PyCFunction) unicode_splitlines, METH_VARARGS, splitlines__doc__},
{"strip", (PyCFunction) unicode_strip, METH_NOARGS, strip__doc__},
{"strip", (PyCFunction) unicode_strip, METH_VARARGS, strip__doc__},
{"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS, swapcase__doc__},
{"translate", (PyCFunction) unicode_translate, METH_O, translate__doc__},
{"upper", (PyCFunction) unicode_upper, METH_NOARGS, upper__doc__},