]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/downloader/youtube_live_chat.py
[compat] Remove more functions
[yt-dlp.git] / yt_dlp / downloader / youtube_live_chat.py
index f30dcb6bfb9085d1d73217f0d0a73dc35231fb71..5334c6c95692e1bff6deda406f695b68f370c268 100644 (file)
@@ -1,27 +1,20 @@
-from __future__ import division, unicode_literals
-
 import json
 import time
+import urllib.error
 
 from .fragment import FragmentFD
-from ..compat import compat_urllib_error
-from ..utils import (
-    try_get,
-    dict_get,
-    int_or_none,
-    RegexNotFoundError,
-)
-from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
+from ..utils import RegexNotFoundError, dict_get, int_or_none, try_get
 
 
 class YoutubeLiveChatFD(FragmentFD):
     """ Downloads YouTube live chats fragment by fragment """
 
-    FD_NAME = 'youtube_live_chat'
-
     def real_download(self, filename, info_dict):
         video_id = info_dict['video_id']
         self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
+        if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
+            self.report_warning('Live chat download runs until the livestream ends. '
+                                'If you wish to download the video simultaneously, run a separate yt-dlp instance')
 
         fragment_retries = self.params.get('fragment_retries', 0)
         test = self.params.get('test', False)
@@ -32,7 +25,9 @@ def real_download(self, filename, info_dict):
             'total_frags': None,
         }
 
-        ie = YT_BaseIE(self.ydl)
+        from ..extractor.youtube import YoutubeBaseInfoExtractor
+
+        ie = YoutubeBaseInfoExtractor(self.ydl)
 
         start_time = int(time.time() * 1000)
 
@@ -44,26 +39,43 @@ def dl_fragment(url, data=None, headers=None):
             return self._download_fragment(ctx, url, info_dict, http_headers, data)
 
         def parse_actions_replay(live_chat_continuation):
-            offset = continuation_id = None
+            offset = continuation_id = click_tracking_params = None
             processed_fragment = bytearray()
             for action in live_chat_continuation.get('actions', []):
                 if 'replayChatItemAction' in action:
                     replay_chat_item_action = action['replayChatItemAction']
                     offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
                 processed_fragment.extend(
-                    json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
+                    json.dumps(action, ensure_ascii=False).encode() + b'\n')
             if offset is not None:
-                continuation_id = try_get(
+                continuation = try_get(
                     live_chat_continuation,
-                    lambda x: x['continuations'][0]['liveChatReplayContinuationData']['continuation'])
+                    lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict)
+                if continuation:
+                    continuation_id = continuation.get('continuation')
+                    click_tracking_params = continuation.get('clickTrackingParams')
             self._append_fragment(ctx, processed_fragment)
-            return continuation_id, offset
+            return continuation_id, offset, click_tracking_params
+
+        def try_refresh_replay_beginning(live_chat_continuation):
+            # choose the second option that contains the unfiltered live chat replay
+            refresh_continuation = try_get(
+                live_chat_continuation,
+                lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict)
+            if refresh_continuation:
+                # no data yet but required to call _append_fragment
+                self._append_fragment(ctx, b'')
+                refresh_continuation_id = refresh_continuation.get('continuation')
+                offset = 0
+                click_tracking_params = refresh_continuation.get('trackingParams')
+                return refresh_continuation_id, offset, click_tracking_params
+            return parse_actions_replay(live_chat_continuation)
 
         live_offset = 0
 
         def parse_actions_live(live_chat_continuation):
             nonlocal live_offset
-            continuation_id = None
+            continuation_id = click_tracking_params = None
             processed_fragment = bytearray()
             for action in live_chat_continuation.get('actions', []):
                 timestamp = self.parse_live_timestamp(action)
@@ -76,7 +88,7 @@ def parse_actions_live(live_chat_continuation):
                     'isLive': True,
                 }
                 processed_fragment.extend(
-                    json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n')
+                    json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n')
             continuation_data_getters = [
                 lambda x: x['continuations'][0]['invalidationContinuationData'],
                 lambda x: x['continuations'][0]['timedContinuationData'],
@@ -84,45 +96,54 @@ def parse_actions_live(live_chat_continuation):
             continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
             if continuation_data:
                 continuation_id = continuation_data.get('continuation')
+                click_tracking_params = continuation_data.get('clickTrackingParams')
                 timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
                 if timeout_ms is not None:
                     time.sleep(timeout_ms / 1000)
             self._append_fragment(ctx, processed_fragment)
-            return continuation_id, live_offset
-
-        if info_dict['protocol'] == 'youtube_live_chat_replay':
-            parse_actions = parse_actions_replay
-        elif info_dict['protocol'] == 'youtube_live_chat':
-            parse_actions = parse_actions_live
+            return continuation_id, live_offset, click_tracking_params
 
-        def download_and_parse_fragment(url, frag_index, request_data, headers):
+        def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
             count = 0
             while count <= fragment_retries:
                 try:
-                    success, raw_fragment = dl_fragment(url, request_data, headers)
+                    success = dl_fragment(url, request_data, headers)
                     if not success:
-                        return False, None, None
-                    data = json.loads(raw_fragment)
+                        return False, None, None, None
+                    raw_fragment = self._read_fragment(ctx)
+                    try:
+                        data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
+                    except RegexNotFoundError:
+                        data = None
+                    if not data:
+                        data = json.loads(raw_fragment)
                     live_chat_continuation = try_get(
                         data,
                         lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
-                    continuation_id, offset = parse_actions(live_chat_continuation)
-                    return True, continuation_id, offset
-                except compat_urllib_error.HTTPError as err:
+                    if info_dict['protocol'] == 'youtube_live_chat_replay':
+                        if frag_index == 1:
+                            continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation)
+                        else:
+                            continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation)
+                    elif info_dict['protocol'] == 'youtube_live_chat':
+                        continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation)
+                    return True, continuation_id, offset, click_tracking_params
+                except urllib.error.HTTPError as err:
                     count += 1
                     if count <= fragment_retries:
                         self.report_retry_fragment(err, frag_index, count, fragment_retries)
             if count > fragment_retries:
                 self.report_error('giving up after %s fragment retries' % fragment_retries)
-                return False, None, None
+                return False, None, None, None
 
-        self._prepare_and_start_frag_download(ctx)
+        self._prepare_and_start_frag_download(ctx, info_dict)
 
-        success, raw_fragment = dl_fragment(info_dict['url'])
+        success = dl_fragment(info_dict['url'])
         if not success:
             return False
+        raw_fragment = self._read_fragment(ctx)
         try:
-            data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
+            data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
         except RegexNotFoundError:
             return False
         continuation_id = try_get(
@@ -131,7 +152,7 @@ def download_and_parse_fragment(url, frag_index, request_data, headers):
         # no data yet but required to call _append_fragment
         self._append_fragment(ctx, b'')
 
-        ytcfg = ie._extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
+        ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
 
         if not ytcfg:
             return False
@@ -142,10 +163,13 @@ def download_and_parse_fragment(url, frag_index, request_data, headers):
         visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
         if info_dict['protocol'] == 'youtube_live_chat_replay':
             url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
+            chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
         elif info_dict['protocol'] == 'youtube_live_chat':
             url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
+            chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id
 
         frag_index = offset = 0
+        click_tracking_params = None
         while continuation_id is not None:
             frag_index += 1
             request_data = {
@@ -154,17 +178,22 @@ def download_and_parse_fragment(url, frag_index, request_data, headers):
             }
             if frag_index > 1:
                 request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
-            headers = ie._generate_api_headers(ytcfg, visitor_data=visitor_data)
-            headers.update({'content-type': 'application/json'})
-            fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n'
-            success, continuation_id, offset = download_and_parse_fragment(
-                url, frag_index, fragment_request_data, headers)
+                if click_tracking_params:
+                    request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
+                headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
+                headers.update({'content-type': 'application/json'})
+                fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n'
+                success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
+                    url, frag_index, fragment_request_data, headers)
+            else:
+                success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
+                    chat_page_url, frag_index)
             if not success:
                 return False
             if test:
                 break
 
-        self._finish_frag_download(ctx)
+        self._finish_frag_download(ctx, info_dict)
         return True
 
     @staticmethod