]> jfr.im git - yt-dlp.git/commitdiff
Add brotli content-encoding support (#2433)
authorcoletdev <redacted>
Tue, 8 Mar 2022 16:44:05 +0000 (05:44 +1300)
committerGitHub <redacted>
Tue, 8 Mar 2022 16:44:05 +0000 (08:44 -0800)
Authored by: coletdjnz

README.md
pyinst.py
requirements.txt
setup.py
yt_dlp/YoutubeDL.py
yt_dlp/compat.py
yt_dlp/utils.py

index ce5af129eae54a185c2165c478aa6efd1bc1005e..81b5d417d01d6a68028fe915a9ccc839f7b381b9 100644 (file)
--- a/README.md
+++ b/README.md
@@ -268,6 +268,7 @@ ## DEPENDENCIES
 * [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licensed under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE)
 * [**secretstorage**](https://github.com/mitya57/secretstorage) - For accessing the Gnome keyring while decrypting cookies of Chromium-based browsers on Linux. Licensed under [BSD](https://github.com/mitya57/secretstorage/blob/master/LICENSE)
 * [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING)
+* [**brotli**](https://github.com/google/brotli) or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT <sup>[1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) </sup>
 * [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu)
 * [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright)
 * [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD3](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD)
@@ -284,7 +285,7 @@ ## DEPENDENCIES
 ## COMPILE
 
 **For Windows**:
-To build the Windows executable, you must have pyinstaller (and optionally mutagen, pycryptodomex, websockets). Once you have all the necessary dependencies installed, (optionally) build lazy extractors using `devscripts/make_lazy_extractors.py`, and then just run `pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it.
+To build the Windows executable, you must have pyinstaller (and any of yt-dlp's optional dependencies if needed). Once you have all the necessary dependencies installed, (optionally) build lazy extractors using `devscripts/make_lazy_extractors.py`, and then just run `pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it.
 
     py -m pip install -U pyinstaller -r requirements.txt
     py devscripts/make_lazy_extractors.py
index f135ec90d2b6d06eda5b59724c7b5bfe0481b1e1..ca115fd78fbab1ef3db9ab2e2f71ee4f1cb05e2a 100644 (file)
--- a/pyinst.py
+++ b/pyinst.py
@@ -74,7 +74,7 @@ def version_to_list(version):
 
 
 def dependency_options():
-    dependencies = [pycryptodome_module(), 'mutagen'] + collect_submodules('websockets')
+    dependencies = [pycryptodome_module(), 'mutagen', 'brotli'] + collect_submodules('websockets')
     excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc']
 
     yield from (f'--hidden-import={module}' for module in dependencies)
index cecd08eae81b2f7482526fac814cd1b1b34da25b..cb0eece46429b14702ea57613df05fd70f619fe9 100644 (file)
@@ -1,3 +1,5 @@
 mutagen
 pycryptodomex
 websockets
+brotli; platform_python_implementation=='CPython'
+brotlicffi; platform_python_implementation!='CPython'
\ No newline at end of file
index f08ae2309d43c1faac51989ed96b8535f9a99692..3e599cd95cd6ea3781a126afe1ab09381ca1c752 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -23,7 +23,7 @@
     '**PS**: Some links in this document will not work since this is a copy of the README.md from Github',
     open('README.md', 'r', encoding='utf-8').read()))
 
-REQUIREMENTS = ['mutagen', 'pycryptodomex', 'websockets']
+REQUIREMENTS = open('requirements.txt').read().splitlines()
 
 
 if sys.argv[1:2] == ['py2exe']:
index 57201b6dc1b24625597d0c880b8882eedebf3bc0..51a89bd2374aeba57dc9eca510b255d5f4f4cd43 100644 (file)
@@ -32,6 +32,7 @@
 
 from .compat import (
     compat_basestring,
+    compat_brotli,
     compat_get_terminal_size,
     compat_kwargs,
     compat_numeric_types,
@@ -3675,6 +3676,7 @@ def python_implementation():
         from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
 
         lib_str = join_nonempty(
+            compat_brotli and compat_brotli.__name__,
             compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
             SECRETSTORAGE_AVAILABLE and 'secretstorage',
             has_mutagen and 'mutagen',
index 2bc6a6b7fad04392a3f04246df5d95b9dd9e8d39..0a0d3b351abafe8552247015b09f4582393ecd35 100644 (file)
@@ -170,6 +170,13 @@ def compat_expanduser(path):
     except ImportError:
         compat_pycrypto_AES = None
 
+try:
+    import brotlicffi as compat_brotli
+except ImportError:
+    try:
+        import brotli as compat_brotli
+    except ImportError:
+        compat_brotli = None
 
 WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
 
@@ -258,6 +265,7 @@ def windows_enable_vt_mode():  # TODO: Do this the proper way https://bugs.pytho
     'compat_asyncio_run',
     'compat_b64decode',
     'compat_basestring',
+    'compat_brotli',
     'compat_chr',
     'compat_collections_abc',
     'compat_cookiejar',
index 9406eb834d42d263fdbaa06c3c6940f01ccb0d19..f6e41f8373d68921f97a713940c6edbc053ac8fb 100644 (file)
@@ -47,6 +47,7 @@
     compat_HTMLParser,
     compat_HTTPError,
     compat_basestring,
+    compat_brotli,
     compat_chr,
     compat_cookiejar,
     compat_ctypes_WINFUNCTYPE,
@@ -143,10 +144,16 @@ def random_user_agent():
     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 
 
+SUPPORTED_ENCODINGS = [
+    'gzip', 'deflate'
+]
+if compat_brotli:
+    SUPPORTED_ENCODINGS.append('br')
+
 std_headers = {
     'User-Agent': random_user_agent(),
     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-    'Accept-Encoding': 'gzip, deflate',
+    'Accept-Encoding': ', '.join(SUPPORTED_ENCODINGS),
     'Accept-Language': 'en-us,en;q=0.5',
     'Sec-Fetch-Mode': 'navigate',
 }
@@ -1357,6 +1364,12 @@ def deflate(data):
         except zlib.error:
             return zlib.decompress(data)
 
+    @staticmethod
+    def brotli(data):
+        if not data:
+            return data
+        return compat_brotli.decompress(data)
+
     def http_request(self, req):
         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
         # always respected by websites, some tend to give out URLs with non percent-encoded
@@ -1417,6 +1430,12 @@ def http_response(self, req, resp):
             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
             resp.msg = old_resp.msg
             del resp.headers['Content-encoding']
+        # brotli
+        if resp.headers.get('Content-encoding', '') == 'br':
+            resp = compat_urllib_request.addinfourl(
+                io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
+            resp.msg = old_resp.msg
+            del resp.headers['Content-encoding']
         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
         # https://github.com/ytdl-org/youtube-dl/issues/6457).
         if 300 <= resp.code < 400: