]> jfr.im git - yt-dlp.git/commitdiff
[core] Prevent `Cookie` leaks on HTTP redirect
authorcoletdjnz <redacted>
Tue, 6 Jun 2023 08:44:51 +0000 (20:44 +1200)
committerpukkandan <redacted>
Thu, 6 Jul 2023 17:44:39 +0000 (23:14 +0530)
Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj

Authored by: coletdjnz

test/test_http.py
yt_dlp/utils/_utils.py

index 3941a6e7762e4f23c55f22793b7e0ec755d38ec0..e4e66dce18399c1b500046053eb585f6659ff796 100644 (file)
@@ -132,6 +132,11 @@ def do_GET(self):
             self._method('GET')
         elif self.path.startswith('/headers'):
             self._headers()
+        elif self.path.startswith('/308-to-headers'):
+            self.send_response(308)
+            self.send_header('Location', '/headers')
+            self.send_header('Content-Length', '0')
+            self.end_headers()
         elif self.path == '/trailing_garbage':
             payload = b'<html><video src="/vid.mp4" /></html>'
             self.send_response(200)
@@ -270,6 +275,7 @@ def do_req(redirect_status, method):
             self.assertEqual(do_req(303, 'PUT'), ('', 'GET'))
 
             # 301 and 302 turn POST only into a GET
+            # XXX: we should also test if the Content-Type and Content-Length headers are removed
             self.assertEqual(do_req(301, 'POST'), ('', 'GET'))
             self.assertEqual(do_req(301, 'HEAD'), ('', 'HEAD'))
             self.assertEqual(do_req(302, 'POST'), ('', 'GET'))
@@ -313,6 +319,31 @@ def test_cookiejar(self):
             data = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
             self.assertIn(b'Cookie: test=ytdlp', data)
 
+    def test_passed_cookie_header(self):
+        # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
+        with FakeYDL() as ydl:
+            # Specified Cookie header should be used
+            res = ydl.urlopen(
+                sanitized_Request(f'http://127.0.0.1:{self.http_port}/headers',
+                                  headers={'Cookie': 'test=test'})).read().decode('utf-8')
+            self.assertIn('Cookie: test=test', res)
+
+            # Specified Cookie header should be removed on any redirect
+            res = ydl.urlopen(
+                sanitized_Request(f'http://127.0.0.1:{self.http_port}/308-to-headers', headers={'Cookie': 'test=test'})).read().decode('utf-8')
+            self.assertNotIn('Cookie: test=test', res)
+
+            # Specified Cookie header should override global cookiejar for that request
+            ydl.cookiejar.set_cookie(http.cookiejar.Cookie(
+                version=0, name='test', value='ytdlp', port=None, port_specified=False,
+                domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
+                path_specified=True, secure=False, expires=None, discard=False, comment=None,
+                comment_url=None, rest={}))
+
+            data = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'Cookie': 'test=test'})).read()
+            self.assertNotIn(b'Cookie: test=ytdlp', data)
+            self.assertIn(b'Cookie: test=test', data)
+
     def test_no_compression_compat_header(self):
         with FakeYDL() as ydl:
             data = ydl.urlopen(
index f68cdb96868b45ae0b393a806094dd69778856a7..82d9ba4d57884dbf435da39964747deb5c511899 100644 (file)
@@ -1556,7 +1556,12 @@ def redirect_request(self, req, fp, code, msg, headers, newurl):
 
         new_method = req.get_method()
         new_data = req.data
-        remove_headers = []
+
+        # Technically the Cookie header should be in unredirected_hdrs,
+        # however in practice some may set it in normal headers anyway.
+        # We will remove it here to prevent any leaks.
+        remove_headers = ['Cookie']
+
         # A 303 must either use GET or HEAD for subsequent request
         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
         if code == 303 and req.get_method() != 'HEAD':
@@ -1573,7 +1578,7 @@ def redirect_request(self, req, fp, code, msg, headers, newurl):
             new_data = None
             remove_headers.extend(['Content-Length', 'Content-Type'])
 
-        new_headers = {k: v for k, v in req.headers.items() if k.lower() not in remove_headers}
+        new_headers = {k: v for k, v in req.headers.items() if k.title() not in remove_headers}
 
         return urllib.request.Request(
             newurl, headers=new_headers, origin_req_host=req.origin_req_host,