]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/downloader/youtube_live_chat.py
[compat] Remove more functions
[yt-dlp.git] / yt_dlp / downloader / youtube_live_chat.py
index 8e173d8b58c44b0ee42b6d85470626f428d70743..5334c6c95692e1bff6deda406f695b68f370c268 100644 (file)
@@ -1,24 +1,20 @@
-from __future__ import division, unicode_literals
-
 import json
+import time
+import urllib.error
 
 from .fragment import FragmentFD
-from ..compat import compat_urllib_error
-from ..utils import (
-    try_get,
-    RegexNotFoundError,
-)
-from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
-
+from ..utils import RegexNotFoundError, dict_get, int_or_none, try_get
 
-class YoutubeLiveChatReplayFD(FragmentFD):
-    """ Downloads YouTube live chat replays fragment by fragment """
 
-    FD_NAME = 'youtube_live_chat_replay'
+class YoutubeLiveChatFD(FragmentFD):
+    """ Downloads YouTube live chats fragment by fragment """
 
     def real_download(self, filename, info_dict):
         video_id = info_dict['video_id']
         self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
+        if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
+            self.report_warning('Live chat download runs until the livestream ends. '
+                                'If you wish to download the video simultaneously, run a separate yt-dlp instance')
 
         fragment_retries = self.params.get('fragment_retries', 0)
         test = self.params.get('test', False)
@@ -29,7 +25,11 @@ def real_download(self, filename, info_dict):
             'total_frags': None,
         }
 
-        ie = YT_BaseIE(self.ydl)
+        from ..extractor.youtube import YoutubeBaseInfoExtractor
+
+        ie = YoutubeBaseInfoExtractor(self.ydl)
+
+        start_time = int(time.time() * 1000)
 
         def dl_fragment(url, data=None, headers=None):
             http_headers = info_dict.get('http_headers', {})
@@ -38,15 +38,81 @@ def dl_fragment(url, data=None, headers=None):
                 http_headers.update(headers)
             return self._download_fragment(ctx, url, info_dict, http_headers, data)
 
-        def download_and_parse_fragment(url, frag_index, request_data):
+        def parse_actions_replay(live_chat_continuation):
+            offset = continuation_id = click_tracking_params = None
+            processed_fragment = bytearray()
+            for action in live_chat_continuation.get('actions', []):
+                if 'replayChatItemAction' in action:
+                    replay_chat_item_action = action['replayChatItemAction']
+                    offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
+                processed_fragment.extend(
+                    json.dumps(action, ensure_ascii=False).encode() + b'\n')
+            if offset is not None:
+                continuation = try_get(
+                    live_chat_continuation,
+                    lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict)
+                if continuation:
+                    continuation_id = continuation.get('continuation')
+                    click_tracking_params = continuation.get('clickTrackingParams')
+            self._append_fragment(ctx, processed_fragment)
+            return continuation_id, offset, click_tracking_params
+
+        def try_refresh_replay_beginning(live_chat_continuation):
+            # choose the second option that contains the unfiltered live chat replay
+            refresh_continuation = try_get(
+                live_chat_continuation,
+                lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict)
+            if refresh_continuation:
+                # no data yet but required to call _append_fragment
+                self._append_fragment(ctx, b'')
+                refresh_continuation_id = refresh_continuation.get('continuation')
+                offset = 0
+                click_tracking_params = refresh_continuation.get('trackingParams')
+                return refresh_continuation_id, offset, click_tracking_params
+            return parse_actions_replay(live_chat_continuation)
+
+        live_offset = 0
+
+        def parse_actions_live(live_chat_continuation):
+            nonlocal live_offset
+            continuation_id = click_tracking_params = None
+            processed_fragment = bytearray()
+            for action in live_chat_continuation.get('actions', []):
+                timestamp = self.parse_live_timestamp(action)
+                if timestamp is not None:
+                    live_offset = timestamp - start_time
+                # compatibility with replay format
+                pseudo_action = {
+                    'replayChatItemAction': {'actions': [action]},
+                    'videoOffsetTimeMsec': str(live_offset),
+                    'isLive': True,
+                }
+                processed_fragment.extend(
+                    json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n')
+            continuation_data_getters = [
+                lambda x: x['continuations'][0]['invalidationContinuationData'],
+                lambda x: x['continuations'][0]['timedContinuationData'],
+            ]
+            continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
+            if continuation_data:
+                continuation_id = continuation_data.get('continuation')
+                click_tracking_params = continuation_data.get('clickTrackingParams')
+                timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
+                if timeout_ms is not None:
+                    time.sleep(timeout_ms / 1000)
+            self._append_fragment(ctx, processed_fragment)
+            return continuation_id, live_offset, click_tracking_params
+
+        def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
             count = 0
             while count <= fragment_retries:
                 try:
-                    success, raw_fragment = dl_fragment(url, request_data, {'content-type': 'application/json'})
+                    success = dl_fragment(url, request_data, headers)
                     if not success:
-                        return False, None, None
+                        return False, None, None, None
+                    raw_fragment = self._read_fragment(ctx)
                     try:
-                        data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
+                        data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
                     except RegexNotFoundError:
                         data = None
                     if not data:
@@ -54,37 +120,30 @@ def download_and_parse_fragment(url, frag_index, request_data):
                     live_chat_continuation = try_get(
                         data,
                         lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
-                    offset = continuation_id = None
-                    processed_fragment = bytearray()
-                    for action in live_chat_continuation.get('actions', []):
-                        if 'replayChatItemAction' in action:
-                            replay_chat_item_action = action['replayChatItemAction']
-                            offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
-                        processed_fragment.extend(
-                            json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
-                    if offset is not None:
-                        continuation_id = try_get(
-                            live_chat_continuation,
-                            lambda x: x['continuations'][0]['liveChatReplayContinuationData']['continuation'])
-                    self._append_fragment(ctx, processed_fragment)
-
-                    return True, continuation_id, offset
-                except compat_urllib_error.HTTPError as err:
+                    if info_dict['protocol'] == 'youtube_live_chat_replay':
+                        if frag_index == 1:
+                            continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation)
+                        else:
+                            continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation)
+                    elif info_dict['protocol'] == 'youtube_live_chat':
+                        continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation)
+                    return True, continuation_id, offset, click_tracking_params
+                except urllib.error.HTTPError as err:
                     count += 1
                     if count <= fragment_retries:
                         self.report_retry_fragment(err, frag_index, count, fragment_retries)
             if count > fragment_retries:
                 self.report_error('giving up after %s fragment retries' % fragment_retries)
-                return False, None, None
+                return False, None, None, None
 
-        self._prepare_and_start_frag_download(ctx)
+        self._prepare_and_start_frag_download(ctx, info_dict)
 
-        success, raw_fragment = dl_fragment(
-            'https://www.youtube.com/watch?v={}'.format(video_id))
+        success = dl_fragment(info_dict['url'])
         if not success:
             return False
+        raw_fragment = self._read_fragment(ctx)
         try:
-            data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
+            data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
         except RegexNotFoundError:
             return False
         continuation_id = try_get(
@@ -93,7 +152,7 @@ def download_and_parse_fragment(url, frag_index, request_data):
         # no data yet but required to call _append_fragment
         self._append_fragment(ctx, b'')
 
-        ytcfg = ie._extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
+        ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
 
         if not ytcfg:
             return False
@@ -101,9 +160,16 @@ def download_and_parse_fragment(url, frag_index, request_data):
         innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
         if not api_key or not innertube_context:
             return False
-        url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
+        visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
+        if info_dict['protocol'] == 'youtube_live_chat_replay':
+            url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
+            chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
+        elif info_dict['protocol'] == 'youtube_live_chat':
+            url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
+            chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id
 
         frag_index = offset = 0
+        click_tracking_params = None
         while continuation_id is not None:
             frag_index += 1
             request_data = {
@@ -112,12 +178,56 @@ def download_and_parse_fragment(url, frag_index, request_data):
             }
             if frag_index > 1:
                 request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
-            success, continuation_id, offset = download_and_parse_fragment(
-                url, frag_index, json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n')
+                if click_tracking_params:
+                    request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
+                headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
+                headers.update({'content-type': 'application/json'})
+                fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n'
+                success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
+                    url, frag_index, fragment_request_data, headers)
+            else:
+                success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
+                    chat_page_url, frag_index)
             if not success:
                 return False
             if test:
                 break
 
-        self._finish_frag_download(ctx)
+        self._finish_frag_download(ctx, info_dict)
         return True
+
+    @staticmethod
+    def parse_live_timestamp(action):
+        action_content = dict_get(
+            action,
+            ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
+        if not isinstance(action_content, dict):
+            return None
+        item = dict_get(action_content, ['item', 'bannerRenderer'])
+        if not isinstance(item, dict):
+            return None
+        renderer = dict_get(item, [
+            # text
+            'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
+            'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
+            # ticker
+            'liveChatTickerPaidMessageItemRenderer',
+            'liveChatTickerSponsorItemRenderer',
+            # banner
+            'liveChatBannerRenderer',
+        ])
+        if not isinstance(renderer, dict):
+            return None
+        parent_item_getters = [
+            lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
+            lambda x: x['contents'],
+        ]
+        parent_item = try_get(renderer, parent_item_getters, dict)
+        if parent_item:
+            renderer = dict_get(parent_item, [
+                'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
+                'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
+            ])
+            if not isinstance(renderer, dict):
+                return None
+        return int_or_none(renderer.get('timestampUsec'), 1000)