From fac99b8b0df209ca6546545193b1873672d536ca Mon Sep 17 00:00:00 2001
From: "Gregory P. Smith" <greg@krypto.org>
Date: Wed, 21 Feb 2024 19:27:16 -0800
Subject: [PATCH] gh-111140: Improve PyLong_AsNativeBytes API doc example &
 improve the test (#115380)

This expands the examples to cover both realistic use cases for the API.

I noticed thing in the test that could be done better so I added those as well: We need to guarantee that all bytes of the result are overwritten and that too many are not written.  Tests now pre-fills the result with data in order to ensure that.

Co-authored-by: Steve Dower <steve.dower@microsoft.com>
---
 Doc/c-api/long.rst              | 74 +++++++++++++++++++++++++--------
 Lib/test/test_capi/test_long.py | 30 ++++++++++---
 2 files changed, 82 insertions(+), 22 deletions(-)

diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst
index f24282e76a3..582f5c7bf05 100644
--- a/Doc/c-api/long.rst
+++ b/Doc/c-api/long.rst
@@ -358,46 +358,86 @@ distinguished from a number.  Use :c:func:`PyErr_Occurred` to disambiguate.
 
    Copy the Python integer value to a native *buffer* of size *n_bytes*::
 
-      int value;
-      Py_ssize_t bytes = PyLong_AsNativeBytes(v, &value, sizeof(value), -1);
+      int32_t value;
+      Py_ssize_t bytes = PyLong_AsNativeBits(pylong, &value, sizeof(value), -1);
       if (bytes < 0) {
-          // Error occurred
+          // A Python exception was set with the reason.
           return NULL;
       }
       else if (bytes <= (Py_ssize_t)sizeof(value)) {
           // Success!
       }
       else {
-          // Overflow occurred, but 'value' contains truncated value
+          // Overflow occurred, but 'value' contains the truncated
+          // lowest bits of pylong.
       }
 
+   The above example may look *similar* to
+   :c:func:`PyLong_As* <PyLong_AsSize_t>`
+   but instead fills in a specific caller defined type and never raises an
+   error about of the :class:`int` *pylong*'s value regardless of *n_bytes*
+   or the returned byte count.
+
+   To get at the entire potentially big Python value, this can be used to
+   reserve enough space and copy it::
+
+      // Ask how much space we need.
+      Py_ssize_t expected = PyLong_AsNativeBits(pylong, NULL, 0, -1);
+      if (expected < 0) {
+          // A Python exception was set with the reason.
+          return NULL;
+      }
+      assert(expected != 0);  // Impossible per the API definition.
+      uint8_t *bignum = malloc(expected);
+      if (!bignum) {
+          PyErr_SetString(PyExc_MemoryError, "bignum malloc failed.");
+          return NULL;
+      }
+      // Safely get the entire value.
+      Py_ssize_t bytes = PyLong_AsNativeBits(pylong, bignum, expected, -1);
+      if (bytes < 0) {  // Exception set.
+          free(bignum);
+          return NULL;
+      }
+      else if (bytes > expected) {  // Be safe, should not be possible.
+          PyErr_SetString(PyExc_RuntimeError,
+              "Unexpected bignum truncation after a size check.");
+          free(bignum);
+          return NULL;
+      }
+      // The expected success given the above pre-check.
+      // ... use bignum ...
+      free(bignum);
+
    *endianness* may be passed ``-1`` for the native endian that CPython was
    compiled with, or ``0`` for big endian and ``1`` for little.
 
-   Return ``-1`` with an exception raised if *pylong* cannot be interpreted as
+   Returns ``-1`` with an exception raised if *pylong* cannot be interpreted as
    an integer. Otherwise, return the size of the buffer required to store the
    value. If this is equal to or less than *n_bytes*, the entire value was
-   copied.
+   copied. ``0`` will never be returned.
 
-   Unless an exception is raised, all *n_bytes* of the buffer will be written
-   with as much of the value as can fit. This allows the caller to ignore all
-   non-negative results if the intent is to match the typical behavior of a
-   C-style downcast. No exception is set for this case.
+   Unless an exception is raised, all *n_bytes* of the buffer will always be
+   written. In the case of truncation, as many of the lowest bits of the value
+   as could fit are written. This allows the caller to ignore all non-negative
+   results if the intent is to match the typical behavior of a C-style
+   downcast. No exception is set on truncation.
 
-   Values are always copied as two's-complement, and sufficient buffer will be
+   Values are always copied as two's-complement and sufficient buffer will be
    requested to include a sign bit. For example, this may cause an value that
    fits into 8 bytes when treated as unsigned to request 9 bytes, even though
    all eight bytes were copied into the buffer. What has been omitted is the
-   zero sign bit, which is redundant when the intention is to treat the value as
-   unsigned.
+   zero sign bit -- redundant if the caller's intention is to treat the value
+   as unsigned.
 
-   Passing zero to *n_bytes* will return the requested buffer size.
+   Passing zero to *n_bytes* will return the size of a buffer that would
+   be large enough to hold the value. This may be larger than technically
+   necessary, but not unreasonably so.
 
    .. note::
 
-      When the value does not fit in the provided buffer, the requested size
-      returned from the function may be larger than necessary. Passing 0 to this
-      function is not an accurate way to determine the bit length of a value.
+      Passing *n_bytes=0* to this function is not an accurate way to determine
+      the bit length of a value.
 
    .. versionadded:: 3.13
 
diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py
index fc82cbfa66e..9f5ee507a8e 100644
--- a/Lib/test/test_capi/test_long.py
+++ b/Lib/test/test_capi/test_long.py
@@ -438,7 +438,12 @@ def test_long_asnativebytes(self):
         if support.verbose:
             print(f"SIZEOF_SIZE={SZ}\n{MAX_SSIZE=:016X}\n{MAX_USIZE=:016X}")
 
-        # These tests check that the requested buffer size is correct
+        # These tests check that the requested buffer size is correct.
+        # This matches our current implementation: We only specify that the
+        # return value is a size *sufficient* to hold the result when queried
+        # using n_bytes=0. If our implementation changes, feel free to update
+        # the expectations here -- or loosen them to be range checks.
+        # (i.e. 0 *could* be stored in 1 byte and 512 in 2)
         for v, expect in [
             (0, SZ),
             (512, SZ),
@@ -453,12 +458,25 @@ def test_long_asnativebytes(self):
             (-(2**256-1), 33),
         ]:
             with self.subTest(f"sizeof-{v:X}"):
-                buffer = bytearray(1)
+                buffer = bytearray(b"\x5a")
                 self.assertEqual(expect, asnativebytes(v, buffer, 0, -1),
-                    "PyLong_AsNativeBytes(v, NULL, 0, -1)")
+                    "PyLong_AsNativeBytes(v, <unknown>, 0, -1)")
+                self.assertEqual(buffer, b"\x5a",
+                    "buffer overwritten when it should not have been")
                 # Also check via the __index__ path
                 self.assertEqual(expect, asnativebytes(Index(v), buffer, 0, -1),
-                    "PyLong_AsNativeBytes(Index(v), NULL, 0, -1)")
+                    "PyLong_AsNativeBytes(Index(v), <unknown>, 0, -1)")
+                self.assertEqual(buffer, b"\x5a",
+                    "buffer overwritten when it should not have been")
+
+        # Test that we populate n=2 bytes but do not overwrite more.
+        buffer = bytearray(b"\x99"*3)
+        self.assertEqual(2, asnativebytes(4, buffer, 2, 0),  # BE
+            "PyLong_AsNativeBytes(v, <3 byte buffer>, 2, 0)  // BE")
+        self.assertEqual(buffer, b"\x00\x04\x99")
+        self.assertEqual(2, asnativebytes(4, buffer, 2, 1),  # LE
+            "PyLong_AsNativeBytes(v, <3 byte buffer>, 2, 1)  // LE")
+        self.assertEqual(buffer, b"\x04\x00\x99")
 
         # We request as many bytes as `expect_be` contains, and always check
         # the result (both big and little endian). We check the return value
@@ -510,7 +528,9 @@ def test_long_asnativebytes(self):
         ]:
             with self.subTest(f"{v:X}-{len(expect_be)}bytes"):
                 n = len(expect_be)
-                buffer = bytearray(n)
+                # Fill the buffer with dummy data to ensure all bytes
+                # are overwritten.
+                buffer = bytearray(b"\xa5"*n)
                 expect_le = expect_be[::-1]
 
                 self.assertEqual(expect_n, asnativebytes(v, buffer, n, 0),