]> jfr.im git - yt-dlp.git/commitdiff
#86 [youtube_live_chat] Use POST API (Closes #82)
authorsiikamiika <redacted>
Mon, 15 Feb 2021 09:57:21 +0000 (11:57 +0200)
committerGitHub <redacted>
Mon, 15 Feb 2021 09:57:21 +0000 (15:27 +0530)
YouTube has removed support for the old GET based live chat API, and it's now returning 404

Authored by siikamiika

youtube_dlc/downloader/fragment.py
youtube_dlc/downloader/http.py
youtube_dlc/downloader/youtube_live_chat.py

index f4104c7132deb501316205bea54273dd7768d74d..5bc7f50f6818903b420ddd4f542170af9c583ed5 100644 (file)
@@ -95,11 +95,12 @@ def _write_ytdl_file(self, ctx):
         frag_index_stream.write(json.dumps({'downloader': downloader}))
         frag_index_stream.close()
 
-    def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
+    def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_data=None):
         fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
         fragment_info_dict = {
             'url': frag_url,
             'http_headers': headers or info_dict.get('http_headers'),
+            'request_data': request_data,
         }
         success = ctx['dl'].download(fragment_filename, fragment_info_dict)
         if not success:
index d8ac41dcc13b80c98808c0d538dfa3b146640ced..bf77f4427086476e83cc9febf8e372c7fa12d906 100644 (file)
@@ -27,6 +27,7 @@
 class HttpFD(FileDownloader):
     def real_download(self, filename, info_dict):
         url = info_dict['url']
+        request_data = info_dict.get('request_data', None)
 
         class DownloadContext(dict):
             __getattr__ = dict.get
@@ -101,7 +102,7 @@ def establish_connection():
                 range_end = ctx.data_len - 1
             has_range = range_start is not None
             ctx.has_range = has_range
-            request = sanitized_Request(url, None, headers)
+            request = sanitized_Request(url, request_data, headers)
             if has_range:
                 set_range(request, range_start, range_end)
             # Establish connection
@@ -152,7 +153,7 @@ def establish_connection():
                     try:
                         # Open the connection again without the range header
                         ctx.data = self.ydl.urlopen(
-                            sanitized_Request(url, None, headers))
+                            sanitized_Request(url, request_data, headers))
                         content_length = ctx.data.info()['Content-Length']
                     except (compat_urllib_error.HTTPError, ) as err:
                         if err.code < 500 or err.code >= 600:
index 5ac24c0201a617636ed40f7f102204f4df4200b2..8e173d8b58c44b0ee42b6d85470626f428d70743 100644 (file)
@@ -1,11 +1,13 @@
 from __future__ import division, unicode_literals
 
-import re
 import json
 
 from .fragment import FragmentFD
 from ..compat import compat_urllib_error
-from ..utils import try_get
+from ..utils import (
+    try_get,
+    RegexNotFoundError,
+)
 from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
 
 
@@ -27,40 +29,28 @@ def real_download(self, filename, info_dict):
             'total_frags': None,
         }
 
-        def dl_fragment(url):
-            headers = info_dict.get('http_headers', {})
-            return self._download_fragment(ctx, url, info_dict, headers)
+        ie = YT_BaseIE(self.ydl)
 
-        def parse_yt_initial_data(data):
-            patterns = (
-                r'%s\\s*%s' % (YT_BaseIE._YT_INITIAL_DATA_RE, YT_BaseIE._YT_INITIAL_BOUNDARY_RE),
-                r'%s' % YT_BaseIE._YT_INITIAL_DATA_RE)
-            data = data.decode('utf-8', 'replace')
-            for patt in patterns:
-                try:
-                    raw_json = re.search(patt, data).group(1)
-                    return json.loads(raw_json)
-                except AttributeError:
-                    continue
+        def dl_fragment(url, data=None, headers=None):
+            http_headers = info_dict.get('http_headers', {})
+            if headers:
+                http_headers = http_headers.copy()
+                http_headers.update(headers)
+            return self._download_fragment(ctx, url, info_dict, http_headers, data)
 
-        def download_and_parse_fragment(url, frag_index):
+        def download_and_parse_fragment(url, frag_index, request_data):
             count = 0
             while count <= fragment_retries:
                 try:
-                    success, raw_fragment = dl_fragment(url)
+                    success, raw_fragment = dl_fragment(url, request_data, {'content-type': 'application/json'})
                     if not success:
                         return False, None, None
-                    data = parse_yt_initial_data(raw_fragment)
+                    try:
+                        data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
+                    except RegexNotFoundError:
+                        data = None
                     if not data:
-                        raw_data = json.loads(raw_fragment)
-                        # sometimes youtube replies with a list
-                        if not isinstance(raw_data, list):
-                            raw_data = [raw_data]
-                        try:
-                            data = next(item['response'] for item in raw_data if 'response' in item)
-                        except StopIteration:
-                            data = {}
-
+                        data = json.loads(raw_fragment)
                     live_chat_continuation = try_get(
                         data,
                         lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
@@ -93,22 +83,37 @@ def download_and_parse_fragment(url, frag_index):
             'https://www.youtube.com/watch?v={}'.format(video_id))
         if not success:
             return False
-        data = parse_yt_initial_data(raw_fragment)
+        try:
+            data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
+        except RegexNotFoundError:
+            return False
         continuation_id = try_get(
             data,
             lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
         # no data yet but required to call _append_fragment
         self._append_fragment(ctx, b'')
 
+        ytcfg = ie._extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
+
+        if not ytcfg:
+            return False
+        api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
+        innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
+        if not api_key or not innertube_context:
+            return False
+        url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
+
         frag_index = offset = 0
         while continuation_id is not None:
             frag_index += 1
-            url = ''.join((
-                'https://www.youtube.com/live_chat_replay',
-                '/get_live_chat_replay' if frag_index > 1 else '',
-                '?continuation=%s' % continuation_id,
-                '&playerOffsetMs=%d&hidden=false&pbj=1' % max(offset - 5000, 0) if frag_index > 1 else ''))
-            success, continuation_id, offset = download_and_parse_fragment(url, frag_index)
+            request_data = {
+                'context': innertube_context,
+                'continuation': continuation_id,
+            }
+            if frag_index > 1:
+                request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
+            success, continuation_id, offset = download_and_parse_fragment(
+                url, frag_index, json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n')
             if not success:
                 return False
             if test: