gh-95865: Further reduce quote_from_bytes memory consumption (#96860)

on large input values. Based on Dennis Sweeney's chunking idea.
2024-09-18 22:01:40 +00:00 · 2022-09-19 16:06:25 -07:00 · 2022-09-19 16:06:25 -07:00 · e61ca22431
parent 04f4977f50
commit e61ca22431
3 changed files with 16 additions and 1 deletions
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@ -985,6 +985,10 @@ def test_quote_from_bytes(self):
        self.assertEqual(result, 'archaeological%20arcana')
        result = urllib.parse.quote_from_bytes(b'')
        self.assertEqual(result, '')
+        result = urllib.parse.quote_from_bytes(b'A'*10_000)
+        self.assertEqual(result, 'A'*10_000)
+        result = urllib.parse.quote_from_bytes(b'z\x01/ '*253_183)
+        self.assertEqual(result, 'z%01/%20'*253_183)

    def test_unquote_to_bytes(self):
        result = urllib.parse.unquote_to_bytes('abc%20def')
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@ -29,6 +29,7 @@

 from collections import namedtuple
 import functools
+import math
 import re
 import types
 import warnings
@ -906,7 +907,14 @@ def quote_from_bytes(bs, safe='/'):
    if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):
        return bs.decode()
    quoter = _byte_quoter_factory(safe)
-    return ''.join(map(quoter, bs))
+    if (bs_len := len(bs)) < 200_000:
+        return ''.join(map(quoter, bs))
+    else:
+        # This saves memory - https://github.com/python/cpython/issues/95865
+        chunk_size = math.isqrt(bs_len)
+        chunks = [''.join(map(quoter, bs[i:i+chunk_size]))
+                  for i in range(0, bs_len, chunk_size)]
+        return ''.join(chunks)

 def urlencode(query, doseq=False, safe='', encoding=None, errors=None,
              quote_via=quote_plus):
--- a/Misc/NEWS.d/next/Library/2022-09-16-07-53-29.gh-issue-95865.oHjX0A.rst
+++ b/Misc/NEWS.d/next/Library/2022-09-16-07-53-29.gh-issue-95865.oHjX0A.rst
@ -0,0 +1,3 @@
+Reduce :func:`urllib.parse.quote_from_bytes` memory use on large values.
+
+Contributed by Dennis Sweeney.