gh-98836: Extend PyUnicode_FromFormat() (GH-98838)

* Support for conversion specifiers o (octal) and X (uppercase hexadecimal).
* Support for length modifiers j (intmax_t) and t (ptrdiff_t).
* Length modifiers are now applied to all integer conversions.
* Support for wchar_t C strings (%ls and %lV).
* Support for variable width and precision (*).
* Support for flag - (left alignment).
This commit is contained in:
Serhiy Storchaka 2023-05-22 00:32:39 +03:00 committed by GitHub
parent 6ba8406cb6
commit f3466bc040
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 584 additions and 287 deletions

View file

@ -394,98 +394,149 @@ APIs:
arguments, calculate the size of the resulting Python Unicode string and return
a string with the values formatted into it. The variable arguments must be C
types and must correspond exactly to the format characters in the *format*
ASCII-encoded string. The following format characters are allowed:
ASCII-encoded string.
.. % This should be exactly the same as the table in PyErr_Format.
A conversion specifier contains two or more characters and has the following
components, which must occur in this order:
.. tabularcolumns:: |l|l|L|
#. The ``'%'`` character, which marks the start of the specifier.
+-------------------+---------------------+----------------------------------+
| Format Characters | Type | Comment |
+===================+=====================+==================================+
| :attr:`%%` | *n/a* | The literal % character. |
+-------------------+---------------------+----------------------------------+
| :attr:`%c` | int | A single character, |
| | | represented as a C int. |
+-------------------+---------------------+----------------------------------+
| :attr:`%d` | int | Equivalent to |
| | | ``printf("%d")``. [1]_ |
+-------------------+---------------------+----------------------------------+
| :attr:`%u` | unsigned int | Equivalent to |
| | | ``printf("%u")``. [1]_ |
+-------------------+---------------------+----------------------------------+
| :attr:`%ld` | long | Equivalent to |
| | | ``printf("%ld")``. [1]_ |
+-------------------+---------------------+----------------------------------+
| :attr:`%li` | long | Equivalent to |
| | | ``printf("%li")``. [1]_ |
+-------------------+---------------------+----------------------------------+
| :attr:`%lu` | unsigned long | Equivalent to |
| | | ``printf("%lu")``. [1]_ |
+-------------------+---------------------+----------------------------------+
| :attr:`%lld` | long long | Equivalent to |
| | | ``printf("%lld")``. [1]_ |
+-------------------+---------------------+----------------------------------+
| :attr:`%lli` | long long | Equivalent to |
| | | ``printf("%lli")``. [1]_ |
+-------------------+---------------------+----------------------------------+
| :attr:`%llu` | unsigned long long | Equivalent to |
| | | ``printf("%llu")``. [1]_ |
+-------------------+---------------------+----------------------------------+
| :attr:`%zd` | :c:type:`\ | Equivalent to |
| | Py_ssize_t` | ``printf("%zd")``. [1]_ |
+-------------------+---------------------+----------------------------------+
| :attr:`%zi` | :c:type:`\ | Equivalent to |
| | Py_ssize_t` | ``printf("%zi")``. [1]_ |
+-------------------+---------------------+----------------------------------+
| :attr:`%zu` | size_t | Equivalent to |
| | | ``printf("%zu")``. [1]_ |
+-------------------+---------------------+----------------------------------+
| :attr:`%i` | int | Equivalent to |
| | | ``printf("%i")``. [1]_ |
+-------------------+---------------------+----------------------------------+
| :attr:`%x` | int | Equivalent to |
| | | ``printf("%x")``. [1]_ |
+-------------------+---------------------+----------------------------------+
| :attr:`%s` | const char\* | A null-terminated C character |
| | | array. |
+-------------------+---------------------+----------------------------------+
| :attr:`%p` | const void\* | The hex representation of a C |
| | | pointer. Mostly equivalent to |
| | | ``printf("%p")`` except that |
| | | it is guaranteed to start with |
| | | the literal ``0x`` regardless |
| | | of what the platform's |
| | | ``printf`` yields. |
+-------------------+---------------------+----------------------------------+
| :attr:`%A` | PyObject\* | The result of calling |
| | | :func:`ascii`. |
+-------------------+---------------------+----------------------------------+
| :attr:`%U` | PyObject\* | A Unicode object. |
+-------------------+---------------------+----------------------------------+
| :attr:`%V` | PyObject\*, | A Unicode object (which may be |
| | const char\* | ``NULL``) and a null-terminated |
| | | C character array as a second |
| | | parameter (which will be used, |
| | | if the first parameter is |
| | | ``NULL``). |
+-------------------+---------------------+----------------------------------+
| :attr:`%S` | PyObject\* | The result of calling |
| | | :c:func:`PyObject_Str`. |
+-------------------+---------------------+----------------------------------+
| :attr:`%R` | PyObject\* | The result of calling |
| | | :c:func:`PyObject_Repr`. |
+-------------------+---------------------+----------------------------------+
#. Conversion flags (optional), which affect the result of some conversion
types.
#. Minimum field width (optional).
If specified as an ``'*'`` (asterisk), the actual width is given in the
next argument, which must be of type :c:expr:`int`, and the object to
convert comes after the minimum field width and optional precision.
#. Precision (optional), given as a ``'.'`` (dot) followed by the precision.
If specified as ``'*'`` (an asterisk), the actual precision is given in
the next argument, which must be of type :c:expr:`int`, and the value to
convert comes after the precision.
#. Length modifier (optional).
#. Conversion type.
The conversion flag characters are:
.. tabularcolumns:: |l|L|
+-------+-------------------------------------------------------------+
| Flag | Meaning |
+=======+=============================================================+
| ``0`` | The conversion will be zero padded for numeric values. |
+-------+-------------------------------------------------------------+
| ``-`` | The converted value is left adjusted (overrides the ``0`` |
| | flag if both are given). |
+-------+-------------------------------------------------------------+
The length modifiers for following integer conversions (``d``, ``i``,
``o``, ``u``, ``x``, or ``X``) specify the type of the argument
(:c:expr:`int` by default):
.. tabularcolumns:: |l|L|
+----------+-----------------------------------------------------+
| Modifier | Types |
+==========+=====================================================+
| ``l`` | :c:expr:`long` or :c:expr:`unsigned long` |
+----------+-----------------------------------------------------+
| ``ll`` | :c:expr:`long long` or :c:expr:`unsigned long long` |
+----------+-----------------------------------------------------+
| ``j`` | :c:expr:`intmax_t` or :c:expr:`uintmax_t` |
+----------+-----------------------------------------------------+
| ``z`` | :c:expr:`size_t` or :c:expr:`ssize_t` |
+----------+-----------------------------------------------------+
| ``t`` | :c:expr:`ptrdiff_t` |
+----------+-----------------------------------------------------+
The length modifier ``l`` for following conversions ``s`` or ``V`` specify
that the type of the argument is :c:expr:`const wchar_t*`.
The conversion specifiers are:
.. list-table::
:widths: auto
:header-rows: 1
* - Conversion Specifier
- Type
- Comment
* - ``%``
- *n/a*
- The literal ``%`` character.
* - ``d``, ``i``
- Specified by the length modifier
- The decimal representation of a signed C integer.
* - ``u``
- Specified by the length modifier
- The decimal representation of an unsigned C integer.
* - ``o``
- Specified by the length modifier
- The octal representation of an unsigned C integer.
* - ``x``
- Specified by the length modifier
- The hexadecimal representation of an unsigned C integer (lowercase).
* - ``X``
- Specified by the length modifier
- The hexadecimal representation of an unsigned C integer (uppercase).
* - ``c``
- :c:expr:`int`
- A single character.
* - ``s``
- :c:expr:`const char*` or :c:expr:`const wchar_t*`
- A null-terminated C character array.
* - ``p``
- :c:expr:`const void*`
- The hex representation of a C pointer.
Mostly equivalent to ``printf("%p")`` except that it is guaranteed to
start with the literal ``0x`` regardless of what the platform's
``printf`` yields.
* - ``A``
- :c:expr:`PyObject*`
- The result of calling :func:`ascii`.
* - ``U``
- :c:expr:`PyObject*`
- A Unicode object.
* - ``V``
- :c:expr:`PyObject*`, :c:expr:`const char*` or :c:expr:`const wchar_t*`
- A Unicode object (which may be ``NULL``) and a null-terminated
C character array as a second parameter (which will be used,
if the first parameter is ``NULL``).
* - ``S``
- :c:expr:`PyObject*`
- The result of calling :c:func:`PyObject_Str`.
* - ``R``
- :c:expr:`PyObject*`
- The result of calling :c:func:`PyObject_Repr`.
.. note::
The width formatter unit is number of characters rather than bytes.
The precision formatter unit is number of bytes for ``"%s"`` and
The precision formatter unit is number of bytes or :c:expr:`wchar_t`
items (if the length modifier ``l`` is used) for ``"%s"`` and
``"%V"`` (if the ``PyObject*`` argument is ``NULL``), and a number of
characters for ``"%A"``, ``"%U"``, ``"%S"``, ``"%R"`` and ``"%V"``
(if the ``PyObject*`` argument is not ``NULL``).
.. [1] For integer specifiers (d, u, ld, li, lu, lld, lli, llu, zd, zi,
zu, i, x): the 0-conversion flag has effect even when a precision is given.
.. note::
Unlike to C :c:func:`printf` the ``0`` flag has effect even when
a precision is given for integer conversions (``d``, ``i``, ``u``, ``o``,
``x``, or ``X``).
.. versionchanged:: 3.2
Support for ``"%lld"`` and ``"%llu"`` added.
@ -498,6 +549,13 @@ APIs:
``"%V"``, ``"%S"``, ``"%R"`` added.
.. versionchanged:: 3.12
Support for conversion specifiers ``o`` and ``X``.
Support for length modifiers ``j`` and ``t``.
Length modifiers are now applied to all integer conversions.
Length modifier ``l`` is now applied to conversion specifiers ``s`` and ``V``.
Support for variable width and precision ``*``.
Support for flag ``-``.
An unrecognized format character now sets a :exc:`SystemError`.
In previous versions it caused all the rest of the format string to be
copied as-is to the result string, and any extra arguments discarded.

View file

@ -1402,6 +1402,12 @@ Porting to Python 3.12
:py:meth:`~class.__subclasses__` (using :c:func:`PyObject_CallMethod`,
for example).
* Add support of more formatting options (left aligning, octals, uppercase
hexadecimals, ``intmax_t``, ``ptrdiff_t``, ``wchar_t`` C
strings, variable width and precision) in :c:func:`PyUnicode_FromFormat` and
:c:func:`PyUnicode_FromFormatV`.
(Contributed by Serhiy Storchaka in :gh:`98836`.)
* An unrecognized format character in :c:func:`PyUnicode_FromFormat` and
:c:func:`PyUnicode_FromFormatV` now sets a :exc:`SystemError`.
In previous versions it caused all the rest of the format string to be

View file

@ -319,12 +319,17 @@ def test_fromobject(self):
def test_from_format(self):
"""Test PyUnicode_FromFormat()"""
# Length modifiers "j" and "t" are not tested here because ctypes does
# not expose types for intmax_t and ptrdiff_t.
# _testcapi.test_string_from_format() has a wider coverage of all
# formats.
import_helper.import_module('ctypes')
from ctypes import (
c_char_p,
pythonapi, py_object, sizeof,
c_int, c_long, c_longlong, c_ssize_t,
c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p)
c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p,
sizeof, c_wchar, c_wchar_p)
name = "PyUnicode_FromFormat"
_PyUnicode_FromFormat = getattr(pythonapi, name)
_PyUnicode_FromFormat.argtypes = (c_char_p,)
@ -449,37 +454,28 @@ def check_format(expected, format, *args):
check_format("repr= 12",
b'repr=%5.2V', None, b'123')
# test integer formats (%i, %d, %u)
# test integer formats (%i, %d, %u, %o, %x, %X)
check_format('010',
b'%03i', c_int(10))
check_format('0010',
b'%0.4i', c_int(10))
check_format('-123',
b'%i', c_int(-123))
check_format('-123',
b'%li', c_long(-123))
check_format('-123',
b'%lli', c_longlong(-123))
check_format('-123',
b'%zi', c_ssize_t(-123))
check_format('-123',
b'%d', c_int(-123))
check_format('-123',
b'%ld', c_long(-123))
check_format('-123',
b'%lld', c_longlong(-123))
check_format('-123',
b'%zd', c_ssize_t(-123))
check_format('123',
b'%u', c_uint(123))
check_format('123',
b'%lu', c_ulong(123))
check_format('123',
b'%llu', c_ulonglong(123))
check_format('123',
b'%zu', c_size_t(123))
for conv, signed, value, expected in [
(b'i', True, -123, '-123'),
(b'd', True, -123, '-123'),
(b'u', False, 123, '123'),
(b'o', False, 0o123, '123'),
(b'x', False, 0xabc, 'abc'),
(b'X', False, 0xabc, 'ABC'),
]:
for mod, ctype in [
(b'', c_int if signed else c_uint),
(b'l', c_long if signed else c_ulong),
(b'll', c_longlong if signed else c_ulonglong),
(b'z', c_ssize_t if signed else c_size_t),
]:
with self.subTest(format=b'%' + mod + conv):
check_format(expected,
b'%' + mod + conv, ctype(value))
# test long output
min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1))
@ -494,40 +490,144 @@ def check_format(expected, format, *args):
PyUnicode_FromFormat(b'%p', c_void_p(-1))
# test padding (width and/or precision)
check_format('123'.rjust(10, '0'),
b'%010i', c_int(123))
check_format('123'.rjust(100),
b'%100i', c_int(123))
check_format('123'.rjust(100, '0'),
b'%.100i', c_int(123))
check_format('123'.rjust(80, '0').rjust(100),
b'%100.80i', c_int(123))
check_format('123', b'%2i', c_int(123))
check_format(' 123', b'%10i', c_int(123))
check_format('0000000123', b'%010i', c_int(123))
check_format('123 ', b'%-10i', c_int(123))
check_format('123 ', b'%-010i', c_int(123))
check_format('123', b'%.2i', c_int(123))
check_format('0000123', b'%.7i', c_int(123))
check_format(' 123', b'%10.2i', c_int(123))
check_format(' 0000123', b'%10.7i', c_int(123))
check_format('0000000123', b'%010.7i', c_int(123))
check_format('0000123 ', b'%-10.7i', c_int(123))
check_format('0000123 ', b'%-010.7i', c_int(123))
check_format('123'.rjust(10, '0'),
b'%010u', c_uint(123))
check_format('123'.rjust(100),
b'%100u', c_uint(123))
check_format('123'.rjust(100, '0'),
b'%.100u', c_uint(123))
check_format('123'.rjust(80, '0').rjust(100),
b'%100.80u', c_uint(123))
check_format('-123', b'%2i', c_int(-123))
check_format(' -123', b'%10i', c_int(-123))
check_format('-000000123', b'%010i', c_int(-123))
check_format('-123 ', b'%-10i', c_int(-123))
check_format('-123 ', b'%-010i', c_int(-123))
check_format('-123', b'%.2i', c_int(-123))
check_format('-0000123', b'%.7i', c_int(-123))
check_format(' -123', b'%10.2i', c_int(-123))
check_format(' -0000123', b'%10.7i', c_int(-123))
check_format('-000000123', b'%010.7i', c_int(-123))
check_format('-0000123 ', b'%-10.7i', c_int(-123))
check_format('-0000123 ', b'%-010.7i', c_int(-123))
check_format('123'.rjust(10, '0'),
b'%010x', c_int(0x123))
check_format('123'.rjust(100),
b'%100x', c_int(0x123))
check_format('123'.rjust(100, '0'),
b'%.100x', c_int(0x123))
check_format('123'.rjust(80, '0').rjust(100),
b'%100.80x', c_int(0x123))
check_format('123', b'%2u', c_uint(123))
check_format(' 123', b'%10u', c_uint(123))
check_format('0000000123', b'%010u', c_uint(123))
check_format('123 ', b'%-10u', c_uint(123))
check_format('123 ', b'%-010u', c_uint(123))
check_format('123', b'%.2u', c_uint(123))
check_format('0000123', b'%.7u', c_uint(123))
check_format(' 123', b'%10.2u', c_uint(123))
check_format(' 0000123', b'%10.7u', c_uint(123))
check_format('0000000123', b'%010.7u', c_uint(123))
check_format('0000123 ', b'%-10.7u', c_uint(123))
check_format('0000123 ', b'%-010.7u', c_uint(123))
check_format('123', b'%2o', c_uint(0o123))
check_format(' 123', b'%10o', c_uint(0o123))
check_format('0000000123', b'%010o', c_uint(0o123))
check_format('123 ', b'%-10o', c_uint(0o123))
check_format('123 ', b'%-010o', c_uint(0o123))
check_format('123', b'%.2o', c_uint(0o123))
check_format('0000123', b'%.7o', c_uint(0o123))
check_format(' 123', b'%10.2o', c_uint(0o123))
check_format(' 0000123', b'%10.7o', c_uint(0o123))
check_format('0000000123', b'%010.7o', c_uint(0o123))
check_format('0000123 ', b'%-10.7o', c_uint(0o123))
check_format('0000123 ', b'%-010.7o', c_uint(0o123))
check_format('abc', b'%2x', c_uint(0xabc))
check_format(' abc', b'%10x', c_uint(0xabc))
check_format('0000000abc', b'%010x', c_uint(0xabc))
check_format('abc ', b'%-10x', c_uint(0xabc))
check_format('abc ', b'%-010x', c_uint(0xabc))
check_format('abc', b'%.2x', c_uint(0xabc))
check_format('0000abc', b'%.7x', c_uint(0xabc))
check_format(' abc', b'%10.2x', c_uint(0xabc))
check_format(' 0000abc', b'%10.7x', c_uint(0xabc))
check_format('0000000abc', b'%010.7x', c_uint(0xabc))
check_format('0000abc ', b'%-10.7x', c_uint(0xabc))
check_format('0000abc ', b'%-010.7x', c_uint(0xabc))
check_format('ABC', b'%2X', c_uint(0xabc))
check_format(' ABC', b'%10X', c_uint(0xabc))
check_format('0000000ABC', b'%010X', c_uint(0xabc))
check_format('ABC ', b'%-10X', c_uint(0xabc))
check_format('ABC ', b'%-010X', c_uint(0xabc))
check_format('ABC', b'%.2X', c_uint(0xabc))
check_format('0000ABC', b'%.7X', c_uint(0xabc))
check_format(' ABC', b'%10.2X', c_uint(0xabc))
check_format(' 0000ABC', b'%10.7X', c_uint(0xabc))
check_format('0000000ABC', b'%010.7X', c_uint(0xabc))
check_format('0000ABC ', b'%-10.7X', c_uint(0xabc))
check_format('0000ABC ', b'%-010.7X', c_uint(0xabc))
# test %A
check_format(r"%A:'abc\xe9\uabcd\U0010ffff'",
b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
# test %V
check_format('repr=abc',
b'repr=%V', 'abc', b'xyz')
check_format('abc',
b'%V', 'abc', b'xyz')
check_format('xyz',
b'%V', None, b'xyz')
# test %ls
check_format('abc', b'%ls', c_wchar_p('abc'))
check_format('\u4eba\u6c11', b'%ls', c_wchar_p('\u4eba\u6c11'))
check_format('\U0001f4bb+\U0001f40d',
b'%ls', c_wchar_p('\U0001f4bb+\U0001f40d'))
check_format(' ab', b'%5.2ls', c_wchar_p('abc'))
check_format(' \u4eba\u6c11', b'%5ls', c_wchar_p('\u4eba\u6c11'))
check_format(' \U0001f4bb+\U0001f40d',
b'%5ls', c_wchar_p('\U0001f4bb+\U0001f40d'))
check_format('\u4eba', b'%.1ls', c_wchar_p('\u4eba\u6c11'))
check_format('\U0001f4bb' if sizeof(c_wchar) > 2 else '\ud83d',
b'%.1ls', c_wchar_p('\U0001f4bb+\U0001f40d'))
check_format('\U0001f4bb+' if sizeof(c_wchar) > 2 else '\U0001f4bb',
b'%.2ls', c_wchar_p('\U0001f4bb+\U0001f40d'))
# test %lV
check_format('abc',
b'%lV', 'abc', c_wchar_p('xyz'))
check_format('xyz',
b'%lV', None, c_wchar_p('xyz'))
check_format('\u4eba\u6c11',
b'%lV', None, c_wchar_p('\u4eba\u6c11'))
check_format('\U0001f4bb+\U0001f40d',
b'%lV', None, c_wchar_p('\U0001f4bb+\U0001f40d'))
check_format(' ab',
b'%5.2lV', None, c_wchar_p('abc'))
check_format(' \u4eba\u6c11',
b'%5lV', None, c_wchar_p('\u4eba\u6c11'))
check_format(' \U0001f4bb+\U0001f40d',
b'%5lV', None, c_wchar_p('\U0001f4bb+\U0001f40d'))
check_format('\u4eba',
b'%.1lV', None, c_wchar_p('\u4eba\u6c11'))
check_format('\U0001f4bb' if sizeof(c_wchar) > 2 else '\ud83d',
b'%.1lV', None, c_wchar_p('\U0001f4bb+\U0001f40d'))
check_format('\U0001f4bb+' if sizeof(c_wchar) > 2 else '\U0001f4bb',
b'%.2lV', None, c_wchar_p('\U0001f4bb+\U0001f40d'))
# test variable width and precision
check_format(' abc', b'%*s', c_int(5), b'abc')
check_format('ab', b'%.*s', c_int(2), b'abc')
check_format(' ab', b'%*.*s', c_int(5), c_int(2), b'abc')
check_format(' abc', b'%*U', c_int(5), 'abc')
check_format('ab', b'%.*U', c_int(2), 'abc')
check_format(' ab', b'%*.*U', c_int(5), c_int(2), 'abc')
check_format(' ab', b'%*.*V', c_int(5), c_int(2), None, b'abc')
check_format(' ab', b'%*.*lV', c_int(5), c_int(2),
None, c_wchar_p('abc'))
check_format(' 123', b'%*i', c_int(8), c_int(123))
check_format('00123', b'%.*i', c_int(5), c_int(123))
check_format(' 00123', b'%*.*i', c_int(8), c_int(5), c_int(123))
# test %p
# We cannot test the exact result,
@ -564,10 +664,11 @@ def check_format(expected, format, *args):
check_format('',
b'%s', b'')
# check for crashes
# test invalid format strings. these tests are just here
# to check for crashes and should not be considered as specifications
for fmt in (b'%', b'%0', b'%01', b'%.', b'%.1',
b'%0%s', b'%1%s', b'%.%s', b'%.1%s', b'%1abc',
b'%l', b'%ll', b'%z', b'%ls', b'%lls', b'%zs'):
b'%l', b'%ll', b'%z', b'%lls', b'%zs'):
with self.subTest(fmt=fmt):
self.assertRaisesRegex(SystemError, 'invalid format string',
PyUnicode_FromFormat, fmt, b'abc')

View file

@ -0,0 +1,4 @@
Add support of more formatting options (left aligning, octals, uppercase
hexadecimals, :c:expr:`intmax_t`, :c:expr:`ptrdiff_t`, :c:expr:`wchar_t` C
strings, variable width and precision) in :c:func:`PyUnicode_FromFormat` and
:c:func:`PyUnicode_FromFormatV`.

View file

@ -1330,10 +1330,8 @@ _get_peer_alt_names (_sslmodulestate *state, X509 *certificate) {
p[0], p[1], p[2], p[3]
);
} else if (name->d.ip->length == 16) {
/* PyUnicode_FromFormat() does not support %X */
unsigned char *p = name->d.ip->data;
len = sprintf(
buf,
v = PyUnicode_FromFormat(
"%X:%X:%X:%X:%X:%X:%X:%X",
p[0] << 8 | p[1],
p[2] << 8 | p[3],
@ -1344,7 +1342,6 @@ _get_peer_alt_names (_sslmodulestate *state, X509 *certificate) {
p[12] << 8 | p[13],
p[14] << 8 | p[15]
);
v = PyUnicode_FromStringAndSize(buf, len);
} else {
v = PyUnicode_FromString("<invalid>");
}

View file

@ -1,3 +1,5 @@
#include <stddef.h> // ptrdiff_t
#define PY_SSIZE_T_CLEAN
#include "parts.h"
@ -1130,25 +1132,48 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored))
CHECK_FORMAT_1( "%c", "c", 'c');
CHECK_FORMAT_1( "%0c", "c", 'c');
CHECK_FORMAT_1("%00c", "c", 'c');
CHECK_FORMAT_1( "%2c", "c", 'c');
CHECK_FORMAT_1("%02c", "c", 'c');
CHECK_FORMAT_1("%.0c", "c", 'c');
CHECK_FORMAT_1("%.2c", "c", 'c');
CHECK_FORMAT_1( "%2c", NULL, 'c');
CHECK_FORMAT_1("%02c", NULL, 'c');
CHECK_FORMAT_1("%.0c", NULL, 'c');
CHECK_FORMAT_1("%.2c", NULL, 'c');
// Integers
CHECK_FORMAT_1("%d", "123", (int)123);
CHECK_FORMAT_1("%i", "123", (int)123);
CHECK_FORMAT_1("%u", "123", (unsigned int)123);
CHECK_FORMAT_1("%x", "7b", (unsigned int)123);
CHECK_FORMAT_1("%X", "7B", (unsigned int)123);
CHECK_FORMAT_1("%o", "173", (unsigned int)123);
CHECK_FORMAT_1("%ld", "123", (long)123);
CHECK_FORMAT_1("%li", "123", (long)123);
CHECK_FORMAT_1("%lu", "123", (unsigned long)123);
CHECK_FORMAT_1("%lx", "7b", (unsigned long)123);
CHECK_FORMAT_1("%lX", "7B", (unsigned long)123);
CHECK_FORMAT_1("%lo", "173", (unsigned long)123);
CHECK_FORMAT_1("%lld", "123", (long long)123);
CHECK_FORMAT_1("%lli", "123", (long long)123);
CHECK_FORMAT_1("%llu", "123", (unsigned long long)123);
CHECK_FORMAT_1("%llx", "7b", (unsigned long long)123);
CHECK_FORMAT_1("%llX", "7B", (unsigned long long)123);
CHECK_FORMAT_1("%llo", "173", (unsigned long long)123);
CHECK_FORMAT_1("%zd", "123", (Py_ssize_t)123);
CHECK_FORMAT_1("%zi", "123", (Py_ssize_t)123);
CHECK_FORMAT_1("%zu", "123", (size_t)123);
CHECK_FORMAT_1("%x", "7b", (int)123);
CHECK_FORMAT_1("%zx", "7b", (size_t)123);
CHECK_FORMAT_1("%zX", "7B", (size_t)123);
CHECK_FORMAT_1("%zo", "173", (size_t)123);
CHECK_FORMAT_1("%td", "123", (ptrdiff_t)123);
CHECK_FORMAT_1("%ti", "123", (ptrdiff_t)123);
CHECK_FORMAT_1("%tu", "123", (ptrdiff_t)123);
CHECK_FORMAT_1("%tx", "7b", (ptrdiff_t)123);
CHECK_FORMAT_1("%tX", "7B", (ptrdiff_t)123);
CHECK_FORMAT_1("%to", "173", (ptrdiff_t)123);
CHECK_FORMAT_1("%jd", "123", (intmax_t)123);
CHECK_FORMAT_1("%ji", "123", (intmax_t)123);
CHECK_FORMAT_1("%ju", "123", (uintmax_t)123);
CHECK_FORMAT_1("%jx", "7b", (uintmax_t)123);
CHECK_FORMAT_1("%jX", "7B", (uintmax_t)123);
CHECK_FORMAT_1("%jo", "173", (uintmax_t)123);
CHECK_FORMAT_1("%d", "-123", (int)-123);
CHECK_FORMAT_1("%i", "-123", (int)-123);
@ -1158,7 +1183,10 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored))
CHECK_FORMAT_1("%lli", "-123", (long long)-123);
CHECK_FORMAT_1("%zd", "-123", (Py_ssize_t)-123);
CHECK_FORMAT_1("%zi", "-123", (Py_ssize_t)-123);
CHECK_FORMAT_1("%x", "ffffff85", (int)-123);
CHECK_FORMAT_1("%td", "-123", (ptrdiff_t)-123);
CHECK_FORMAT_1("%ti", "-123", (ptrdiff_t)-123);
CHECK_FORMAT_1("%jd", "-123", (intmax_t)-123);
CHECK_FORMAT_1("%ji", "-123", (intmax_t)-123);
// Integers: width < length
CHECK_FORMAT_1("%1d", "123", (int)123);
@ -1183,7 +1211,6 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored))
CHECK_FORMAT_1("%1lli", "-123", (long long)-123);
CHECK_FORMAT_1("%1zd", "-123", (Py_ssize_t)-123);
CHECK_FORMAT_1("%1zi", "-123", (Py_ssize_t)-123);
CHECK_FORMAT_1("%1x", "ffffff85", (int)-123);
// Integers: width > length
CHECK_FORMAT_1("%5d", " 123", (int)123);
@ -1208,7 +1235,6 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored))
CHECK_FORMAT_1("%5lli", " -123", (long long)-123);
CHECK_FORMAT_1("%5zd", " -123", (Py_ssize_t)-123);
CHECK_FORMAT_1("%5zi", " -123", (Py_ssize_t)-123);
CHECK_FORMAT_1("%9x", " ffffff85", (int)-123);
// Integers: width > length, 0-flag
CHECK_FORMAT_1("%05d", "00123", (int)123);
@ -1233,7 +1259,6 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored))
CHECK_FORMAT_1("%05lli", "-0123", (long long)-123);
CHECK_FORMAT_1("%05zd", "-0123", (Py_ssize_t)-123);
CHECK_FORMAT_1("%05zi", "-0123", (Py_ssize_t)-123);
CHECK_FORMAT_1("%09x", "0ffffff85", (int)-123);
// Integers: precision < length
CHECK_FORMAT_1("%.1d", "123", (int)123);
@ -1258,7 +1283,6 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored))
CHECK_FORMAT_1("%.1lli", "-123", (long long)-123);
CHECK_FORMAT_1("%.1zd", "-123", (Py_ssize_t)-123);
CHECK_FORMAT_1("%.1zi", "-123", (Py_ssize_t)-123);
CHECK_FORMAT_1("%.1x", "ffffff85", (int)-123);
// Integers: precision > length
CHECK_FORMAT_1("%.5d", "00123", (int)123);
@ -1283,7 +1307,6 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored))
CHECK_FORMAT_1("%.5lli", "-00123", (long long)-123);
CHECK_FORMAT_1("%.5zd", "-00123", (Py_ssize_t)-123);
CHECK_FORMAT_1("%.5zi", "-00123", (Py_ssize_t)-123);
CHECK_FORMAT_1("%.9x", "0ffffff85", (int)-123);
// Integers: width > precision > length
CHECK_FORMAT_1("%7.5d", " 00123", (int)123);
@ -1308,7 +1331,6 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored))
CHECK_FORMAT_1("%7.5lli", " -00123", (long long)-123);
CHECK_FORMAT_1("%7.5zd", " -00123", (Py_ssize_t)-123);
CHECK_FORMAT_1("%7.5zi", " -00123", (Py_ssize_t)-123);
CHECK_FORMAT_1("%10.9x", " 0ffffff85", (int)-123);
// Integers: width > precision > length, 0-flag
CHECK_FORMAT_1("%07.5d", "0000123", (int)123);
@ -1333,7 +1355,6 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored))
CHECK_FORMAT_1("%07.5lli", "-000123", (long long)-123);
CHECK_FORMAT_1("%07.5zd", "-000123", (Py_ssize_t)-123);
CHECK_FORMAT_1("%07.5zi", "-000123", (Py_ssize_t)-123);
CHECK_FORMAT_1("%010.9x", "00ffffff85", (int)-123);
// Integers: precision > width > length
CHECK_FORMAT_1("%5.7d", "0000123", (int)123);
@ -1358,7 +1379,6 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored))
CHECK_FORMAT_1("%5.7lli", "-0000123", (long long)-123);
CHECK_FORMAT_1("%5.7zd", "-0000123", (Py_ssize_t)-123);
CHECK_FORMAT_1("%5.7zi", "-0000123", (Py_ssize_t)-123);
CHECK_FORMAT_1("%9.10x", "00ffffff85", (int)-123);
// Integers: precision > width > length, 0-flag
CHECK_FORMAT_1("%05.7d", "0000123", (int)123);
@ -1383,7 +1403,6 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored))
CHECK_FORMAT_1("%05.7lli", "-0000123", (long long)-123);
CHECK_FORMAT_1("%05.7zd", "-0000123", (Py_ssize_t)-123);
CHECK_FORMAT_1("%05.7zi", "-0000123", (Py_ssize_t)-123);
CHECK_FORMAT_1("%09.10x", "00ffffff85", (int)-123);
// Integers: precision = 0, arg = 0 (empty string in C)
CHECK_FORMAT_1("%.0d", "0", (int)0);
@ -1402,66 +1421,80 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored))
// Strings
CHECK_FORMAT_1("%s", "None", "None");
CHECK_FORMAT_1("%ls", "None", L"None");
CHECK_FORMAT_1("%U", "None", unicode);
CHECK_FORMAT_1("%A", "None", Py_None);
CHECK_FORMAT_1("%S", "None", Py_None);
CHECK_FORMAT_1("%R", "None", Py_None);
CHECK_FORMAT_2("%V", "None", unicode, "ignored");
CHECK_FORMAT_2("%V", "None", NULL, "None");
CHECK_FORMAT_2("%lV", "None", NULL, L"None");
// Strings: width < length
CHECK_FORMAT_1("%1s", "None", "None");
CHECK_FORMAT_1("%1ls", "None", L"None");
CHECK_FORMAT_1("%1U", "None", unicode);
CHECK_FORMAT_1("%1A", "None", Py_None);
CHECK_FORMAT_1("%1S", "None", Py_None);
CHECK_FORMAT_1("%1R", "None", Py_None);
CHECK_FORMAT_2("%1V", "None", unicode, "ignored");
CHECK_FORMAT_2("%1V", "None", NULL, "None");
CHECK_FORMAT_2("%1lV", "None", NULL, L"None");
// Strings: width > length
CHECK_FORMAT_1("%5s", " None", "None");
CHECK_FORMAT_1("%5ls", " None", L"None");
CHECK_FORMAT_1("%5U", " None", unicode);
CHECK_FORMAT_1("%5A", " None", Py_None);
CHECK_FORMAT_1("%5S", " None", Py_None);
CHECK_FORMAT_1("%5R", " None", Py_None);
CHECK_FORMAT_2("%5V", " None", unicode, "ignored");
CHECK_FORMAT_2("%5V", " None", NULL, "None");
CHECK_FORMAT_2("%5lV", " None", NULL, L"None");
// Strings: precision < length
CHECK_FORMAT_1("%.1s", "N", "None");
CHECK_FORMAT_1("%.1ls", "N", L"None");
CHECK_FORMAT_1("%.1U", "N", unicode);
CHECK_FORMAT_1("%.1A", "N", Py_None);
CHECK_FORMAT_1("%.1S", "N", Py_None);
CHECK_FORMAT_1("%.1R", "N", Py_None);
CHECK_FORMAT_2("%.1V", "N", unicode, "ignored");
CHECK_FORMAT_2("%.1V", "N", NULL, "None");
CHECK_FORMAT_2("%.1lV", "N", NULL, L"None");
// Strings: precision > length
CHECK_FORMAT_1("%.5s", "None", "None");
CHECK_FORMAT_1("%.5ls", "None", L"None");
CHECK_FORMAT_1("%.5U", "None", unicode);
CHECK_FORMAT_1("%.5A", "None", Py_None);
CHECK_FORMAT_1("%.5S", "None", Py_None);
CHECK_FORMAT_1("%.5R", "None", Py_None);
CHECK_FORMAT_2("%.5V", "None", unicode, "ignored");
CHECK_FORMAT_2("%.5V", "None", NULL, "None");
CHECK_FORMAT_2("%.5lV", "None", NULL, L"None");
// Strings: precision < length, width > length
CHECK_FORMAT_1("%5.1s", " N", "None");
CHECK_FORMAT_1("%5.1ls"," N", L"None");
CHECK_FORMAT_1("%5.1U", " N", unicode);
CHECK_FORMAT_1("%5.1A", " N", Py_None);
CHECK_FORMAT_1("%5.1S", " N", Py_None);
CHECK_FORMAT_1("%5.1R", " N", Py_None);
CHECK_FORMAT_2("%5.1V", " N", unicode, "ignored");
CHECK_FORMAT_2("%5.1V", " N", NULL, "None");
CHECK_FORMAT_2("%5.1lV"," N", NULL, L"None");
// Strings: width < length, precision > length
CHECK_FORMAT_1("%1.5s", "None", "None");
CHECK_FORMAT_1("%1.5ls", "None", L"None");
CHECK_FORMAT_1("%1.5U", "None", unicode);
CHECK_FORMAT_1("%1.5A", "None", Py_None);
CHECK_FORMAT_1("%1.5S", "None", Py_None);
CHECK_FORMAT_1("%1.5R", "None", Py_None);
CHECK_FORMAT_2("%1.5V", "None", unicode, "ignored");
CHECK_FORMAT_2("%1.5V", "None", NULL, "None");
CHECK_FORMAT_2("%1.5lV", "None", NULL, L"None");
Py_XDECREF(unicode);
Py_RETURN_NONE;

View file

@ -1849,14 +1849,11 @@ static PyObject *
kqueue_event_repr(kqueue_event_Object *s)
{
char buf[1024];
PyOS_snprintf(
buf, sizeof(buf),
return PyUnicode_FromFormat(
"<select.kevent ident=%zu filter=%d flags=0x%x fflags=0x%x "
"data=0x%llx udata=%p>",
(size_t)(s->e.ident), (int)s->e.filter, (unsigned int)s->e.flags,
(unsigned int)s->e.fflags, (long long)(s->e.data), (void *)s->e.udata);
return PyUnicode_FromString(buf);
}
static int

View file

@ -1339,8 +1339,6 @@ setbdaddr(const char *name, bdaddr_t *bdaddr)
static PyObject *
makebdaddr(bdaddr_t *bdaddr)
{
char buf[(6 * 2) + 5 + 1];
#ifdef MS_WINDOWS
int i;
unsigned int octets[6];
@ -1349,16 +1347,14 @@ makebdaddr(bdaddr_t *bdaddr)
octets[i] = ((*bdaddr) >> (8 * i)) & 0xFF;
}
sprintf(buf, "%02X:%02X:%02X:%02X:%02X:%02X",
return PyUnicode_FromFormat("%02X:%02X:%02X:%02X:%02X:%02X",
octets[5], octets[4], octets[3],
octets[2], octets[1], octets[0]);
#else
sprintf(buf, "%02X:%02X:%02X:%02X:%02X:%02X",
return PyUnicode_FromFormat("%02X:%02X:%02X:%02X:%02X:%02X",
bdaddr->b[5], bdaddr->b[4], bdaddr->b[3],
bdaddr->b[2], bdaddr->b[1], bdaddr->b[0]);
#endif
return PyUnicode_FromString(buf);
}
#endif

View file

@ -56,6 +56,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include "pycore_unicodeobject.h" // struct _Py_unicode_state
#include "pycore_unicodeobject_generated.h" // _PyUnicode_InitStaticStrings()
#include "stringlib/eq.h" // unicode_eq()
#include <stddef.h> // ptrdiff_t
#ifdef MS_WINDOWS
#include <windows.h>
@ -2285,14 +2286,15 @@ PyUnicode_AsUCS4Copy(PyObject *string)
return as_ucs4(string, NULL, 0, 1);
}
/* maximum number of characters required for output of %lld or %p.
We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits,
plus 1 for the sign. 53/22 is an upper bound for log10(256). */
#define MAX_LONG_LONG_CHARS (2 + (SIZEOF_LONG_LONG*53-1) / 22)
/* maximum number of characters required for output of %jo or %jd or %p.
We need at most ceil(log8(256)*sizeof(intmax_t)) digits,
plus 1 for the sign, plus 2 for the 0x prefix (for %p),
plus 1 for the terminal NUL. */
#define MAX_INTMAX_CHARS (5 + (sizeof(intmax_t)*8-1) / 3)
static int
unicode_fromformat_write_str(_PyUnicodeWriter *writer, PyObject *str,
Py_ssize_t width, Py_ssize_t precision)
Py_ssize_t width, Py_ssize_t precision, int flags)
{
Py_ssize_t length, fill, arglen;
Py_UCS4 maxchar;
@ -2314,8 +2316,8 @@ unicode_fromformat_write_str(_PyUnicodeWriter *writer, PyObject *str,
if (_PyUnicodeWriter_Prepare(writer, arglen, maxchar) == -1)
return -1;
if (width > length) {
fill = width - length;
fill = Py_MAX(width - length, 0);
if (fill && !(flags & F_LJUST)) {
if (PyUnicode_Fill(writer->buffer, writer->pos, fill, ' ') == -1)
return -1;
writer->pos += fill;
@ -2324,12 +2326,19 @@ unicode_fromformat_write_str(_PyUnicodeWriter *writer, PyObject *str,
_PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
str, 0, length);
writer->pos += length;
if (fill && (flags & F_LJUST)) {
if (PyUnicode_Fill(writer->buffer, writer->pos, fill, ' ') == -1)
return -1;
writer->pos += fill;
}
return 0;
}
static int
unicode_fromformat_write_cstr(_PyUnicodeWriter *writer, const char *str,
Py_ssize_t width, Py_ssize_t precision)
Py_ssize_t width, Py_ssize_t precision, int flags)
{
/* UTF-8 */
Py_ssize_t length;
@ -2349,24 +2358,58 @@ unicode_fromformat_write_cstr(_PyUnicodeWriter *writer, const char *str,
if (unicode == NULL)
return -1;
res = unicode_fromformat_write_str(writer, unicode, width, -1);
res = unicode_fromformat_write_str(writer, unicode, width, -1, flags);
Py_DECREF(unicode);
return res;
}
static int
unicode_fromformat_write_wcstr(_PyUnicodeWriter *writer, const wchar_t *str,
Py_ssize_t width, Py_ssize_t precision, int flags)
{
/* UTF-8 */
Py_ssize_t length;
PyObject *unicode;
int res;
if (precision == -1) {
length = wcslen(str);
}
else {
length = 0;
while (length < precision && str[length]) {
length++;
}
}
unicode = PyUnicode_FromWideChar(str, length);
if (unicode == NULL)
return -1;
res = unicode_fromformat_write_str(writer, unicode, width, -1, flags);
Py_DECREF(unicode);
return res;
}
#define F_LONG 1
#define F_LONGLONG 2
#define F_SIZE 3
#define F_PTRDIFF 4
#define F_INTMAX 5
static const char * const formats[] = {"%d", "%ld", "%lld", "%zd", "%td", "%jd"};
static const char * const formats_o[] = {"%o", "%lo", "%llo", "%zo", "%to", "%jo"};
static const char * const formats_u[] = {"%u", "%lu", "%llu", "%zu", "%tu", "%ju"};
static const char * const formats_x[] = {"%x", "%lx", "%llx", "%zx", "%tx", "%jx"};
static const char * const formats_X[] = {"%X", "%lX", "%llX", "%zX", "%tX", "%jX"};
static const char*
unicode_fromformat_arg(_PyUnicodeWriter *writer,
const char *f, va_list *vargs)
{
const char *p;
Py_ssize_t len;
int zeropad;
int flags = 0;
Py_ssize_t width;
Py_ssize_t precision;
int longflag;
int longlongflag;
int size_tflag;
Py_ssize_t fill;
p = f;
f++;
@ -2377,15 +2420,31 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
return f;
}
zeropad = 0;
if (*f == '0') {
zeropad = 1;
f++;
/* Parse flags. Example: "%-i" => flags=F_LJUST. */
/* Flags '+', ' ' and '#' are not particularly useful.
* They are not worth the implementation and maintenance costs.
* In addition, '#' should add "0" for "o" conversions for compatibility
* with printf, but it would confuse Python users. */
while (1) {
switch (*f++) {
case '-': flags |= F_LJUST; continue;
case '0': flags |= F_ZERO; continue;
}
f--;
break;
}
/* parse the width.precision part, e.g. "%2.5s" => width=2, precision=5 */
width = -1;
if (Py_ISDIGIT((unsigned)*f)) {
if (*f == '*') {
width = va_arg(*vargs, int);
if (width < 0) {
flags |= F_LJUST;
width = -width;
}
f++;
}
else if (Py_ISDIGIT((unsigned)*f)) {
width = *f - '0';
f++;
while (Py_ISDIGIT((unsigned)*f)) {
@ -2401,7 +2460,14 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
precision = -1;
if (*f == '.') {
f++;
if (Py_ISDIGIT((unsigned)*f)) {
if (*f == '*') {
precision = va_arg(*vargs, int);
if (precision < 0) {
precision = -2;
}
f++;
}
else if (Py_ISDIGIT((unsigned)*f)) {
precision = (*f - '0');
f++;
while (Py_ISDIGIT((unsigned)*f)) {
@ -2416,30 +2482,47 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
}
}
/* Handle %ld, %lu, %lld and %llu. */
longflag = 0;
longlongflag = 0;
size_tflag = 0;
int sizemod = 0;
if (*f == 'l') {
if (f[1] == 'd' || f[1] == 'u' || f[1] == 'i') {
longflag = 1;
++f;
}
else if (f[1] == 'l' &&
(f[2] == 'd' || f[2] == 'u' || f[2] == 'i')) {
longlongflag = 1;
if (f[1] == 'l') {
sizemod = F_LONGLONG;
f += 2;
}
else {
sizemod = F_LONG;
++f;
}
}
/* handle the size_t flag. */
else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u' || f[1] == 'i')) {
size_tflag = 1;
else if (*f == 'z') {
sizemod = F_SIZE;
++f;
}
else if (*f == 't') {
sizemod = F_PTRDIFF;
++f;
}
else if (*f == 'j') {
sizemod = F_INTMAX;
++f;
}
if (f[0] != '\0' && f[1] == '\0')
writer->overallocate = 0;
switch (*f) {
case 'd': case 'i': case 'o': case 'u': case 'x': case 'X':
break;
case 'c': case 'p':
if (sizemod || width >= 0 || precision >= 0) goto invalid_format;
break;
case 's':
case 'V':
if (sizemod && sizemod != F_LONG) goto invalid_format;
break;
default:
if (sizemod) goto invalid_format;
break;
}
switch (*f) {
case 'c':
{
@ -2454,91 +2537,98 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
break;
}
case 'i':
case 'd':
case 'u':
case 'x':
case 'd': case 'i':
case 'o': case 'u': case 'x': case 'X':
{
/* used by sprintf */
char buffer[MAX_LONG_LONG_CHARS];
Py_ssize_t arglen;
if (*f == 'u') {
if (longflag) {
len = sprintf(buffer, "%lu", va_arg(*vargs, unsigned long));
}
else if (longlongflag) {
len = sprintf(buffer, "%llu", va_arg(*vargs, unsigned long long));
}
else if (size_tflag) {
len = sprintf(buffer, "%zu", va_arg(*vargs, size_t));
}
else {
len = sprintf(buffer, "%u", va_arg(*vargs, unsigned int));
}
char buffer[MAX_INTMAX_CHARS];
const char *fmt = NULL;
switch (*f) {
case 'o': fmt = formats_o[sizemod]; break;
case 'u': fmt = formats_u[sizemod]; break;
case 'x': fmt = formats_x[sizemod]; break;
case 'X': fmt = formats_X[sizemod]; break;
default: fmt = formats[sizemod]; break;
}
else if (*f == 'x') {
len = sprintf(buffer, "%x", va_arg(*vargs, int));
}
else {
if (longflag) {
len = sprintf(buffer, "%li", va_arg(*vargs, long));
}
else if (longlongflag) {
len = sprintf(buffer, "%lli", va_arg(*vargs, long long));
}
else if (size_tflag) {
len = sprintf(buffer, "%zi", va_arg(*vargs, Py_ssize_t));
}
else {
len = sprintf(buffer, "%i", va_arg(*vargs, int));
}
int issigned = (*f == 'd' || *f == 'i');
switch (sizemod) {
case F_LONG:
len = issigned ?
sprintf(buffer, fmt, va_arg(*vargs, long)) :
sprintf(buffer, fmt, va_arg(*vargs, unsigned long));
break;
case F_LONGLONG:
len = issigned ?
sprintf(buffer, fmt, va_arg(*vargs, long long)) :
sprintf(buffer, fmt, va_arg(*vargs, unsigned long long));
break;
case F_SIZE:
len = issigned ?
sprintf(buffer, fmt, va_arg(*vargs, Py_ssize_t)) :
sprintf(buffer, fmt, va_arg(*vargs, size_t));
break;
case F_PTRDIFF:
len = sprintf(buffer, fmt, va_arg(*vargs, ptrdiff_t));
break;
case F_INTMAX:
len = issigned ?
sprintf(buffer, fmt, va_arg(*vargs, intmax_t)) :
sprintf(buffer, fmt, va_arg(*vargs, uintmax_t));
break;
default:
len = issigned ?
sprintf(buffer, fmt, va_arg(*vargs, int)) :
sprintf(buffer, fmt, va_arg(*vargs, unsigned int));
break;
}
assert(len >= 0);
int negative = (buffer[0] == '-');
len -= negative;
int sign = (buffer[0] == '-');
len -= sign;
precision = Py_MAX(precision, len);
width = Py_MAX(width, precision + negative);
arglen = Py_MAX(precision, width);
if (_PyUnicodeWriter_Prepare(writer, arglen, 127) == -1)
return NULL;
if (width > precision) {
if (negative && zeropad) {
if (_PyUnicodeWriter_WriteChar(writer, '-') == -1)
return NULL;
}
Py_UCS4 fillchar = zeropad?'0':' ';
fill = width - precision - negative;
if (PyUnicode_Fill(writer->buffer, writer->pos, fill, fillchar) == -1)
return NULL;
writer->pos += fill;
if (negative && !zeropad) {
if (_PyUnicodeWriter_WriteChar(writer, '-') == -1)
return NULL;
}
width = Py_MAX(width, precision + sign);
if ((flags & F_ZERO) && !(flags & F_LJUST)) {
precision = width - sign;
}
if (precision > len) {
fill = precision - len;
if (PyUnicode_Fill(writer->buffer, writer->pos, fill, '0') == -1)
Py_ssize_t spacepad = Py_MAX(width - precision - sign, 0);
Py_ssize_t zeropad = Py_MAX(precision - len, 0);
if (_PyUnicodeWriter_Prepare(writer, width, 127) == -1)
return NULL;
if (spacepad && !(flags & F_LJUST)) {
if (PyUnicode_Fill(writer->buffer, writer->pos, spacepad, ' ') == -1)
return NULL;
writer->pos += fill;
writer->pos += spacepad;
}
if (_PyUnicodeWriter_WriteASCIIString(writer, &buffer[negative], len) < 0)
if (sign) {
if (_PyUnicodeWriter_WriteChar(writer, '-') == -1)
return NULL;
}
if (zeropad) {
if (PyUnicode_Fill(writer->buffer, writer->pos, zeropad, '0') == -1)
return NULL;
writer->pos += zeropad;
}
if (_PyUnicodeWriter_WriteASCIIString(writer, &buffer[sign], len) < 0)
return NULL;
if (spacepad && (flags & F_LJUST)) {
if (PyUnicode_Fill(writer->buffer, writer->pos, spacepad, ' ') == -1)
return NULL;
writer->pos += spacepad;
}
break;
}
case 'p':
{
char number[MAX_LONG_LONG_CHARS];
char number[MAX_INTMAX_CHARS];
len = sprintf(number, "%p", va_arg(*vargs, void*));
assert(len >= 0);
@ -2561,10 +2651,17 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
case 's':
{
/* UTF-8 */
const char *s = va_arg(*vargs, const char*);
if (unicode_fromformat_write_cstr(writer, s, width, precision) < 0)
return NULL;
if (sizemod) {
const wchar_t *s = va_arg(*vargs, const wchar_t*);
if (unicode_fromformat_write_wcstr(writer, s, width, precision, flags) < 0)
return NULL;
}
else {
/* UTF-8 */
const char *s = va_arg(*vargs, const char*);
if (unicode_fromformat_write_cstr(writer, s, width, precision, flags) < 0)
return NULL;
}
break;
}
@ -2573,7 +2670,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
PyObject *obj = va_arg(*vargs, PyObject *);
assert(obj && _PyUnicode_CHECK(obj));
if (unicode_fromformat_write_str(writer, obj, width, precision) == -1)
if (unicode_fromformat_write_str(writer, obj, width, precision, flags) == -1)
return NULL;
break;
}
@ -2581,15 +2678,27 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
case 'V':
{
PyObject *obj = va_arg(*vargs, PyObject *);
const char *str = va_arg(*vargs, const char *);
const char *str;
const wchar_t *wstr;
if (sizemod) {
wstr = va_arg(*vargs, const wchar_t*);
}
else {
str = va_arg(*vargs, const char *);
}
if (obj) {
assert(_PyUnicode_CHECK(obj));
if (unicode_fromformat_write_str(writer, obj, width, precision) == -1)
if (unicode_fromformat_write_str(writer, obj, width, precision, flags) == -1)
return NULL;
}
else if (sizemod) {
assert(wstr != NULL);
if (unicode_fromformat_write_wcstr(writer, wstr, width, precision, flags) < 0)
return NULL;
}
else {
assert(str != NULL);
if (unicode_fromformat_write_cstr(writer, str, width, precision) < 0)
if (unicode_fromformat_write_cstr(writer, str, width, precision, flags) < 0)
return NULL;
}
break;
@ -2603,7 +2712,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
str = PyObject_Str(obj);
if (!str)
return NULL;
if (unicode_fromformat_write_str(writer, str, width, precision) == -1) {
if (unicode_fromformat_write_str(writer, str, width, precision, flags) == -1) {
Py_DECREF(str);
return NULL;
}
@ -2619,7 +2728,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
repr = PyObject_Repr(obj);
if (!repr)
return NULL;
if (unicode_fromformat_write_str(writer, repr, width, precision) == -1) {
if (unicode_fromformat_write_str(writer, repr, width, precision, flags) == -1) {
Py_DECREF(repr);
return NULL;
}
@ -2635,7 +2744,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
ascii = PyObject_ASCII(obj);
if (!ascii)
return NULL;
if (unicode_fromformat_write_str(writer, ascii, width, precision) == -1) {
if (unicode_fromformat_write_str(writer, ascii, width, precision, flags) == -1) {
Py_DECREF(ascii);
return NULL;
}
@ -2644,6 +2753,7 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
}
default:
invalid_format:
PyErr_Format(PyExc_SystemError, "invalid format string: %s", p);
return NULL;
}

View file

@ -1556,14 +1556,11 @@ verify_identifier(struct tok_state *tok)
tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
}
Py_DECREF(s);
// PyUnicode_FromFormatV() does not support %X
char hex[9];
(void)PyOS_snprintf(hex, sizeof(hex), "%04X", ch);
if (Py_UNICODE_ISPRINTABLE(ch)) {
syntaxerror(tok, "invalid character '%c' (U+%s)", ch, hex);
syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
}
else {
syntaxerror(tok, "invalid non-printable character U+%s", hex);
syntaxerror(tok, "invalid non-printable character U+%04X", ch);
}
return 0;
}
@ -2541,9 +2538,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
}
if (!Py_UNICODE_ISPRINTABLE(c)) {
char hex[9];
(void)PyOS_snprintf(hex, sizeof(hex), "%04X", c);
return MAKE_TOKEN(syntaxerror(tok, "invalid non-printable character U+%s", hex));
return MAKE_TOKEN(syntaxerror(tok, "invalid non-printable character U+%04X", c));
}
if( c == '=' && INSIDE_FSTRING_EXPR(current_tok)) {