From 08916a49c777cb6e000eec092881eb93ec22076c Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sat, 27 May 2023 19:06:13 +1200 Subject: [PATCH] [core] Improve HTTP redirect handling (#7094) Aligns HTTP redirect handling with what browsers commonly do and RFC standards. Fixes issues https://github.com/yt-dlp/yt-dlp/commit/afac4caa7db30804bebac33e53c3cb0237958224 missed. Authored by: coletdjnz --- test/test_YoutubeDL.py | 6 - test/test_http.py | 288 +++++++++++++++++++++++++++++++++++++---- yt_dlp/utils/_utils.py | 59 +++------ 3 files changed, 281 insertions(+), 72 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 477fd220e..ee6c52713 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -10,7 +10,6 @@ import copy import json -import urllib.error from test.helper import FakeYDL, assertRegexpMatches from yt_dlp import YoutubeDL @@ -1097,11 +1096,6 @@ def test_selection(params, expected_ids, evaluate_all=False): test_selection({'playlist_items': '-15::2'}, INDICES[1::2], True) test_selection({'playlist_items': '-15::15'}, [], True) - def test_urlopen_no_file_protocol(self): - # see https://github.com/ytdl-org/youtube-dl/issues/8227 - ydl = YDL() - self.assertRaises(urllib.error.URLError, ydl.urlopen, 'file:///etc/passwd') - def test_do_not_override_ie_key_in_url_transparent(self): ydl = YDL() diff --git a/test/test_http.py b/test/test_http.py index 5ca0d7a47..d684905da 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -7,40 +7,163 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - +import gzip +import http.cookiejar import http.server +import io +import pathlib import ssl +import tempfile import threading +import urllib.error import urllib.request from test.helper import http_server_port from yt_dlp import YoutubeDL +from yt_dlp.utils import sanitized_Request, urlencode_postdata + +from .helper import FakeYDL TEST_DIR = os.path.dirname(os.path.abspath(__file__)) class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): + protocol_version = 'HTTP/1.1' + def log_message(self, format, *args): pass + def _headers(self): + payload = str(self.headers).encode('utf-8') + self.send_response(200) + self.send_header('Content-Type', 'application/json') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + + def _redirect(self): + self.send_response(int(self.path[len('/redirect_'):])) + self.send_header('Location', '/method') + self.send_header('Content-Length', '0') + self.end_headers() + + def _method(self, method, payload=None): + self.send_response(200) + self.send_header('Content-Length', str(len(payload or ''))) + self.send_header('Method', method) + self.end_headers() + if payload: + self.wfile.write(payload) + + def _status(self, status): + payload = f'{status} NOT FOUND'.encode() + self.send_response(int(status)) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + + def _read_data(self): + if 'Content-Length' in self.headers: + return self.rfile.read(int(self.headers['Content-Length'])) + + def do_POST(self): + data = self._read_data() + if self.path.startswith('/redirect_'): + self._redirect() + elif self.path.startswith('/method'): + self._method('POST', data) + elif self.path.startswith('/headers'): + self._headers() + else: + self._status(404) + + def do_HEAD(self): + if self.path.startswith('/redirect_'): + self._redirect() + elif self.path.startswith('/method'): + self._method('HEAD') + else: + self._status(404) + + def do_PUT(self): + data = self._read_data() + if self.path.startswith('/redirect_'): + self._redirect() + elif self.path.startswith('/method'): + self._method('PUT', data) + else: + self._status(404) + def do_GET(self): if self.path == '/video.html': + payload = b'