bpo-41625: Expose the splice() system call in the os module (GH-21947)

This commit is contained in:
Pablo Galindo 2020-11-17 00:00:38 +00:00 committed by GitHub
parent cce3f0b0c8
commit a57b3d30f6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 349 additions and 78 deletions

View file

@ -1419,6 +1419,38 @@ or `the MSDN <https://msdn.microsoft.com/en-us/library/z0kc8e3z.aspx>`_ on Windo
.. versionadded:: 3.3
.. function:: splice(src, dst, count, offset_src=None, offset_dst=None)
Transfer *count* bytes from file descriptor *src*, starting from offset
*offset_src*, to file descriptor *dst*, starting from offset *offset_dst*.
At least one of the file descriptors must refer to a pipe. If *offset_src*
is None, then *src* is read from the current position; respectively for
*offset_dst*. The offset associated to the file descriptor that refers to a
pipe must be ``None``. The files pointed by *src* and *dst* must reside in
the same filesystem, otherwise an :exc:`OSError` is raised with
:attr:`~OSError.errno` set to :data:`errno.EXDEV`.
This copy is done without the additional cost of transferring data
from the kernel to user space and then back into the kernel. Additionally,
some filesystems could implement extra optimizations. The copy is done as if
both files are opened as binary.
Upon successful completion, returns the number of bytes spliced to or from
the pipe. A return value of 0 means end of input. If *src* refers to a
pipe, then this means that there was no data to transfer, and it would not
make sense to block because there are no writers connected to the write end
of the pipe.
.. availability:: Linux kernel >= 2.6.17 or glibc >= 2.5
.. versionadded:: 3.10
.. data:: SPLICE_F_MOVE
SPLICE_F_NONBLOCK
SPLICE_F_MORE
.. function:: readv(fd, buffers)
Read from a file descriptor *fd* into a number of mutable :term:`bytes-like

View file

@ -233,6 +233,11 @@ Added a new function :func:`os.eventfd` and related helpers to wrap the
``eventfd2`` syscall on Linux.
(Contributed by Christian Heimes in :issue:`41001`.)
Added :func:`os.splice()` that allows to move data between two file
descriptors without copying between kernel address space and user
address space, where one of the file descriptors must refer to a
pipe. (Contributed by Pablo Galindo in :issue:`41625`.)
py_compile
----------

View file

@ -381,6 +381,123 @@ def test_copy_file_range_offset(self):
self.assertEqual(read[out_seek:],
data[in_skip:in_skip+i])
@unittest.skipUnless(hasattr(os, 'splice'), 'test needs os.splice()')
def test_splice_invalid_values(self):
with self.assertRaises(ValueError):
os.splice(0, 1, -10)
@unittest.skipUnless(hasattr(os, 'splice'), 'test needs os.splice()')
def test_splice(self):
TESTFN2 = os_helper.TESTFN + ".3"
data = b'0123456789'
create_file(os_helper.TESTFN, data)
self.addCleanup(os_helper.unlink, os_helper.TESTFN)
in_file = open(os_helper.TESTFN, 'rb')
self.addCleanup(in_file.close)
in_fd = in_file.fileno()
read_fd, write_fd = os.pipe()
self.addCleanup(lambda: os.close(read_fd))
self.addCleanup(lambda: os.close(write_fd))
try:
i = os.splice(in_fd, write_fd, 5)
except OSError as e:
# Handle the case in which Python was compiled
# in a system with the syscall but without support
# in the kernel.
if e.errno != errno.ENOSYS:
raise
self.skipTest(e)
else:
# The number of copied bytes can be less than
# the number of bytes originally requested.
self.assertIn(i, range(0, 6));
self.assertEqual(os.read(read_fd, 100), data[:i])
@unittest.skipUnless(hasattr(os, 'splice'), 'test needs os.splice()')
def test_splice_offset_in(self):
TESTFN4 = os_helper.TESTFN + ".4"
data = b'0123456789'
bytes_to_copy = 6
in_skip = 3
create_file(os_helper.TESTFN, data)
self.addCleanup(os_helper.unlink, os_helper.TESTFN)
in_file = open(os_helper.TESTFN, 'rb')
self.addCleanup(in_file.close)
in_fd = in_file.fileno()
read_fd, write_fd = os.pipe()
self.addCleanup(lambda: os.close(read_fd))
self.addCleanup(lambda: os.close(write_fd))
try:
i = os.splice(in_fd, write_fd, bytes_to_copy, offset_src=in_skip)
except OSError as e:
# Handle the case in which Python was compiled
# in a system with the syscall but without support
# in the kernel.
if e.errno != errno.ENOSYS:
raise
self.skipTest(e)
else:
# The number of copied bytes can be less than
# the number of bytes originally requested.
self.assertIn(i, range(0, bytes_to_copy+1));
read = os.read(read_fd, 100)
# 012 are skipped (in_skip)
# 345678 are copied in the file (in_skip + bytes_to_copy)
self.assertEqual(read, data[in_skip:in_skip+i])
@unittest.skipUnless(hasattr(os, 'splice'), 'test needs os.splice()')
def test_splice_offset_out(self):
TESTFN4 = os_helper.TESTFN + ".4"
data = b'0123456789'
bytes_to_copy = 6
out_seek = 3
create_file(os_helper.TESTFN, data)
self.addCleanup(os_helper.unlink, os_helper.TESTFN)
read_fd, write_fd = os.pipe()
self.addCleanup(lambda: os.close(read_fd))
self.addCleanup(lambda: os.close(write_fd))
os.write(write_fd, data)
out_file = open(TESTFN4, 'w+b')
self.addCleanup(os_helper.unlink, TESTFN4)
self.addCleanup(out_file.close)
out_fd = out_file.fileno()
try:
i = os.splice(read_fd, out_fd, bytes_to_copy, offset_dst=out_seek)
except OSError as e:
# Handle the case in which Python was compiled
# in a system with the syscall but without support
# in the kernel.
if e.errno != errno.ENOSYS:
raise
self.skipTest(e)
else:
# The number of copied bytes can be less than
# the number of bytes originally requested.
self.assertIn(i, range(0, bytes_to_copy+1));
with open(TESTFN4, 'rb') as in_file:
read = in_file.read()
# seeked bytes (5) are zero'ed
self.assertEqual(read[:out_seek], b'\x00'*out_seek)
# 012 are skipped (in_skip)
# 345678 are copied in the file (in_skip + bytes_to_copy)
self.assertEqual(read[out_seek:], data[:i])
# Test attributes on return values from os.*stat* family.
class StatAttributeTests(unittest.TestCase):
def setUp(self):

View file

@ -0,0 +1,2 @@
Expose the :c:func:`splice` as :func:`os.splice` in the :mod:`os` module.
Patch by Pablo Galindo

View file

@ -5674,6 +5674,106 @@ exit:
#endif /* defined(HAVE_COPY_FILE_RANGE) */
#if defined(HAVE_SPLICE)
PyDoc_STRVAR(os_splice__doc__,
"splice($module, /, src, dst, count, offset_src=None, offset_dst=None,\n"
" flags=0)\n"
"--\n"
"\n"
"Transfer count bytes from one pipe to a descriptor or vice versa.\n"
"\n"
" src\n"
" Source file descriptor.\n"
" dst\n"
" Destination file descriptor.\n"
" count\n"
" Number of bytes to copy.\n"
" offset_src\n"
" Starting offset in src.\n"
" offset_dst\n"
" Starting offset in dst.\n"
" flags\n"
" Flags to modify the semantics of the call.\n"
"\n"
"If offset_src is None, then src is read from the current position;\n"
"respectively for offset_dst. The offset associated to the file\n"
"descriptor that refers to a pipe must be None.");
#define OS_SPLICE_METHODDEF \
{"splice", (PyCFunction)(void(*)(void))os_splice, METH_FASTCALL|METH_KEYWORDS, os_splice__doc__},
static PyObject *
os_splice_impl(PyObject *module, int src, int dst, Py_ssize_t count,
PyObject *offset_src, PyObject *offset_dst,
unsigned int flags);
static PyObject *
os_splice(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"src", "dst", "count", "offset_src", "offset_dst", "flags", NULL};
static _PyArg_Parser _parser = {NULL, _keywords, "splice", 0};
PyObject *argsbuf[6];
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 3;
int src;
int dst;
Py_ssize_t count;
PyObject *offset_src = Py_None;
PyObject *offset_dst = Py_None;
unsigned int flags = 0;
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 3, 6, 0, argsbuf);
if (!args) {
goto exit;
}
src = _PyLong_AsInt(args[0]);
if (src == -1 && PyErr_Occurred()) {
goto exit;
}
dst = _PyLong_AsInt(args[1]);
if (dst == -1 && PyErr_Occurred()) {
goto exit;
}
{
Py_ssize_t ival = -1;
PyObject *iobj = _PyNumber_Index(args[2]);
if (iobj != NULL) {
ival = PyLong_AsSsize_t(iobj);
Py_DECREF(iobj);
}
if (ival == -1 && PyErr_Occurred()) {
goto exit;
}
count = ival;
}
if (!noptargs) {
goto skip_optional_pos;
}
if (args[3]) {
offset_src = args[3];
if (!--noptargs) {
goto skip_optional_pos;
}
}
if (args[4]) {
offset_dst = args[4];
if (!--noptargs) {
goto skip_optional_pos;
}
}
if (!_PyLong_UnsignedInt_Converter(args[5], &flags)) {
goto exit;
}
skip_optional_pos:
return_value = os_splice_impl(module, src, dst, count, offset_src, offset_dst, flags);
exit:
return return_value;
}
#endif /* defined(HAVE_SPLICE) */
#if defined(HAVE_MKFIFO)
PyDoc_STRVAR(os_mkfifo__doc__,
@ -8864,6 +8964,10 @@ exit:
#define OS_COPY_FILE_RANGE_METHODDEF
#endif /* !defined(OS_COPY_FILE_RANGE_METHODDEF) */
#ifndef OS_SPLICE_METHODDEF
#define OS_SPLICE_METHODDEF
#endif /* !defined(OS_SPLICE_METHODDEF) */
#ifndef OS_MKFIFO_METHODDEF
#define OS_MKFIFO_METHODDEF
#endif /* !defined(OS_MKFIFO_METHODDEF) */
@ -9059,4 +9163,4 @@ exit:
#ifndef OS_WAITSTATUS_TO_EXITCODE_METHODDEF
#define OS_WAITSTATUS_TO_EXITCODE_METHODDEF
#endif /* !defined(OS_WAITSTATUS_TO_EXITCODE_METHODDEF) */
/*[clinic end generated code: output=49b7ed768242ef7c input=a9049054013a1b77]*/
/*[clinic end generated code: output=8a59e91178897267 input=a9049054013a1b77]*/

View file

@ -6521,7 +6521,6 @@ os_spawnve_impl(PyObject *module, int mode, path_t *path, PyObject *argv,
#endif /* HAVE_SPAWNV */
#ifdef HAVE_FORK
/* Helper function to validate arguments.
@ -10370,6 +10369,75 @@ os_copy_file_range_impl(PyObject *module, int src, int dst, Py_ssize_t count,
}
#endif /* HAVE_COPY_FILE_RANGE*/
#ifdef HAVE_SPLICE
/*[clinic input]
os.splice
src: int
Source file descriptor.
dst: int
Destination file descriptor.
count: Py_ssize_t
Number of bytes to copy.
offset_src: object = None
Starting offset in src.
offset_dst: object = None
Starting offset in dst.
flags: unsigned_int = 0
Flags to modify the semantics of the call.
Transfer count bytes from one pipe to a descriptor or vice versa.
If offset_src is None, then src is read from the current position;
respectively for offset_dst. The offset associated to the file
descriptor that refers to a pipe must be None.
[clinic start generated code]*/
static PyObject *
os_splice_impl(PyObject *module, int src, int dst, Py_ssize_t count,
PyObject *offset_src, PyObject *offset_dst,
unsigned int flags)
/*[clinic end generated code: output=d0386f25a8519dc5 input=047527c66c6d2e0a]*/
{
off_t offset_src_val, offset_dst_val;
off_t *p_offset_src = NULL;
off_t *p_offset_dst = NULL;
Py_ssize_t ret;
int async_err = 0;
if (count < 0) {
PyErr_SetString(PyExc_ValueError, "negative value for 'count' not allowed");
return NULL;
}
if (offset_src != Py_None) {
if (!Py_off_t_converter(offset_src, &offset_src_val)) {
return NULL;
}
p_offset_src = &offset_src_val;
}
if (offset_dst != Py_None) {
if (!Py_off_t_converter(offset_dst, &offset_dst_val)) {
return NULL;
}
p_offset_dst = &offset_dst_val;
}
do {
Py_BEGIN_ALLOW_THREADS
ret = splice(src, p_offset_src, dst, p_offset_dst, count, flags);
Py_END_ALLOW_THREADS
} while (ret < 0 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
if (ret < 0) {
return (!async_err) ? posix_error() : NULL;
}
return PyLong_FromSsize_t(ret);
}
#endif /* HAVE_SPLICE*/
#ifdef HAVE_MKFIFO
/*[clinic input]
os.mkfifo
@ -14550,6 +14618,7 @@ static PyMethodDef posix_methods[] = {
OS_POSIX_SPAWNP_METHODDEF
OS_READLINK_METHODDEF
OS_COPY_FILE_RANGE_METHODDEF
OS_SPLICE_METHODDEF
OS_RENAME_METHODDEF
OS_REPLACE_METHODDEF
OS_RMDIR_METHODDEF
@ -15072,6 +15141,13 @@ all_ins(PyObject *m)
if (PyModule_AddIntConstant(m, "RWF_APPEND", RWF_APPEND)) return -1;
#endif
/* constants for splice */
#ifdef HAVE_SPLICE
if (PyModule_AddIntConstant(m, "SPLICE_F_MOVE", SPLICE_F_MOVE)) return -1;
if (PyModule_AddIntConstant(m, "SPLICE_F_NONBLOCK", SPLICE_F_NONBLOCK)) return -1;
if (PyModule_AddIntConstant(m, "SPLICE_F_MORE", SPLICE_F_MORE)) return -1;
#endif
/* constants for posix_spawn */
#ifdef HAVE_POSIX_SPAWN
if (PyModule_AddIntConstant(m, "POSIX_SPAWN_OPEN", POSIX_SPAWN_OPEN)) return -1;

80
aclocal.m4 vendored
View file

@ -67,7 +67,7 @@ AS_VAR_POPDEF([CACHEVAR])dnl
])dnl AX_CHECK_COMPILE_FLAGS
# pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*-
# serial 11 (pkg-config-0.29.1)
# serial 12 (pkg-config-0.29.2)
dnl Copyright © 2004 Scott James Remnant <scott@netsplit.com>.
dnl Copyright © 2012-2015 Dan Nicholson <dbn.lists@gmail.com>
@ -109,7 +109,7 @@ dnl
dnl See the "Since" comment for each macro you use to see what version
dnl of the macros you require.
m4_defun([PKG_PREREQ],
[m4_define([PKG_MACROS_VERSION], [0.29.1])
[m4_define([PKG_MACROS_VERSION], [0.29.2])
m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1,
[m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])])
])dnl PKG_PREREQ
@ -210,7 +210,7 @@ AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl
AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl
pkg_failed=no
AC_MSG_CHECKING([for $1])
AC_MSG_CHECKING([for $2])
_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2])
_PKG_CONFIG([$1][_LIBS], [libs], [$2])
@ -220,11 +220,11 @@ and $1[]_LIBS to avoid the need to call pkg-config.
See the pkg-config man page for more details.])
if test $pkg_failed = yes; then
AC_MSG_RESULT([no])
AC_MSG_RESULT([no])
_PKG_SHORT_ERRORS_SUPPORTED
if test $_pkg_short_errors_supported = yes; then
$1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1`
else
else
$1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1`
fi
# Put the nasty error message in config.log where it belongs
@ -241,7 +241,7 @@ installed software in a non-standard prefix.
_PKG_TEXT])[]dnl
])
elif test $pkg_failed = untried; then
AC_MSG_RESULT([no])
AC_MSG_RESULT([no])
m4_default([$4], [AC_MSG_FAILURE(
[The pkg-config script could not be found or is too old. Make sure it
is in your PATH or set the PKG_CONFIG environment variable to the full
@ -342,73 +342,5 @@ AS_VAR_COPY([$1], [pkg_cv_][$1])
AS_VAR_IF([$1], [""], [$5], [$4])dnl
])dnl PKG_CHECK_VAR
dnl PKG_WITH_MODULES(VARIABLE-PREFIX, MODULES,
dnl [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND],
dnl [DESCRIPTION], [DEFAULT])
dnl ------------------------------------------
dnl
dnl Prepare a "--with-" configure option using the lowercase
dnl [VARIABLE-PREFIX] name, merging the behaviour of AC_ARG_WITH and
dnl PKG_CHECK_MODULES in a single macro.
AC_DEFUN([PKG_WITH_MODULES],
[
m4_pushdef([with_arg], m4_tolower([$1]))
m4_pushdef([description],
[m4_default([$5], [build with ]with_arg[ support])])
m4_pushdef([def_arg], [m4_default([$6], [auto])])
m4_pushdef([def_action_if_found], [AS_TR_SH([with_]with_arg)=yes])
m4_pushdef([def_action_if_not_found], [AS_TR_SH([with_]with_arg)=no])
m4_case(def_arg,
[yes],[m4_pushdef([with_without], [--without-]with_arg)],
[m4_pushdef([with_without],[--with-]with_arg)])
AC_ARG_WITH(with_arg,
AS_HELP_STRING(with_without, description[ @<:@default=]def_arg[@:>@]),,
[AS_TR_SH([with_]with_arg)=def_arg])
AS_CASE([$AS_TR_SH([with_]with_arg)],
[yes],[PKG_CHECK_MODULES([$1],[$2],$3,$4)],
[auto],[PKG_CHECK_MODULES([$1],[$2],
[m4_n([def_action_if_found]) $3],
[m4_n([def_action_if_not_found]) $4])])
m4_popdef([with_arg])
m4_popdef([description])
m4_popdef([def_arg])
])dnl PKG_WITH_MODULES
dnl PKG_HAVE_WITH_MODULES(VARIABLE-PREFIX, MODULES,
dnl [DESCRIPTION], [DEFAULT])
dnl -----------------------------------------------
dnl
dnl Convenience macro to trigger AM_CONDITIONAL after PKG_WITH_MODULES
dnl check._[VARIABLE-PREFIX] is exported as make variable.
AC_DEFUN([PKG_HAVE_WITH_MODULES],
[
PKG_WITH_MODULES([$1],[$2],,,[$3],[$4])
AM_CONDITIONAL([HAVE_][$1],
[test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"])
])dnl PKG_HAVE_WITH_MODULES
dnl PKG_HAVE_DEFINE_WITH_MODULES(VARIABLE-PREFIX, MODULES,
dnl [DESCRIPTION], [DEFAULT])
dnl ------------------------------------------------------
dnl
dnl Convenience macro to run AM_CONDITIONAL and AC_DEFINE after
dnl PKG_WITH_MODULES check. HAVE_[VARIABLE-PREFIX] is exported as make
dnl and preprocessor variable.
AC_DEFUN([PKG_HAVE_DEFINE_WITH_MODULES],
[
PKG_HAVE_WITH_MODULES([$1],[$2],[$3],[$4])
AS_IF([test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"],
[AC_DEFINE([HAVE_][$1], 1, [Enable ]m4_tolower([$1])[ support])])
])dnl PKG_HAVE_DEFINE_WITH_MODULES
m4_include([m4/ax_c_float_words_bigendian.m4])
m4_include([m4/ax_check_openssl.m4])

2
configure vendored
View file

@ -11726,7 +11726,7 @@ for ac_func in alarm accept4 setitimer getitimer bind_textdomain_codeset chown \
sched_get_priority_max sched_setaffinity sched_setscheduler sched_setparam \
sched_rr_get_interval \
sigaction sigaltstack sigfillset siginterrupt sigpending sigrelse \
sigtimedwait sigwait sigwaitinfo snprintf strftime strlcpy strsignal symlinkat sync \
sigtimedwait sigwait sigwaitinfo snprintf splice strftime strlcpy strsignal symlinkat sync \
sysconf tcgetpgrp tcsetpgrp tempnam timegm times tmpfile tmpnam tmpnam_r \
truncate uname unlinkat utimensat utimes vfork waitid waitpid wait3 wait4 \
wcscoll wcsftime wcsxfrm wmemcmp writev _getpty rtpSpawn

View file

@ -3684,7 +3684,7 @@ AC_CHECK_FUNCS(alarm accept4 setitimer getitimer bind_textdomain_codeset chown \
sched_get_priority_max sched_setaffinity sched_setscheduler sched_setparam \
sched_rr_get_interval \
sigaction sigaltstack sigfillset siginterrupt sigpending sigrelse \
sigtimedwait sigwait sigwaitinfo snprintf strftime strlcpy strsignal symlinkat sync \
sigtimedwait sigwait sigwaitinfo snprintf splice strftime strlcpy strsignal symlinkat sync \
sysconf tcgetpgrp tcsetpgrp tempnam timegm times tmpfile tmpnam tmpnam_r \
truncate uname unlinkat utimensat utimes vfork waitid waitpid wait3 wait4 \
wcscoll wcsftime wcsxfrm wmemcmp writev _getpty rtpSpawn)

View file

@ -1018,6 +1018,9 @@
/* Define to 1 if you have the <spawn.h> header file. */
#undef HAVE_SPAWN_H
/* Define to 1 if you have the `splice' function. */
#undef HAVE_SPLICE
/* Define if your compiler provides ssize_t */
#undef HAVE_SSIZE_T