]> jfr.im git - yt-dlp.git/commitdiff
[youtube_live_chat] Support ongoing live chat (#422)
authorsiikamiika <redacted>
Wed, 23 Jun 2021 00:12:39 +0000 (03:12 +0300)
committerGitHub <redacted>
Wed, 23 Jun 2021 00:12:39 +0000 (05:42 +0530)
Authored by: siikamiika

yt_dlp/downloader/__init__.py
yt_dlp/downloader/youtube_live_chat.py
yt_dlp/extractor/youtube.py

index e469b512d209d26e38e145445ce86bddf1ba0b1e..6769cf8e6413af75cf388dbcf2bbc5739f097e2f 100644 (file)
@@ -25,7 +25,7 @@ def _get_real_downloader(info_dict, protocol=None, *args, **kwargs):
 from .mhtml import MhtmlFD
 from .niconico import NiconicoDmcFD
 from .websocket import WebSocketFragmentFD
-from .youtube_live_chat import YoutubeLiveChatReplayFD
+from .youtube_live_chat import YoutubeLiveChatFD
 from .external import (
     get_external_downloader,
     FFmpegFD,
@@ -44,7 +44,8 @@ def _get_real_downloader(info_dict, protocol=None, *args, **kwargs):
     'mhtml': MhtmlFD,
     'niconico_dmc': NiconicoDmcFD,
     'websocket_frag': WebSocketFragmentFD,
-    'youtube_live_chat_replay': YoutubeLiveChatReplayFD,
+    'youtube_live_chat': YoutubeLiveChatFD,
+    'youtube_live_chat_replay': YoutubeLiveChatFD,
 }
 
 
index a6c13335e59acf3f0cd70ecc42cf885baf8f3241..f30dcb6bfb9085d1d73217f0d0a73dc35231fb71 100644 (file)
@@ -1,20 +1,23 @@
 from __future__ import division, unicode_literals
 
 import json
+import time
 
 from .fragment import FragmentFD
 from ..compat import compat_urllib_error
 from ..utils import (
     try_get,
+    dict_get,
+    int_or_none,
     RegexNotFoundError,
 )
 from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
 
 
-class YoutubeLiveChatReplayFD(FragmentFD):
-    """ Downloads YouTube live chat replays fragment by fragment """
+class YoutubeLiveChatFD(FragmentFD):
+    """ Downloads YouTube live chats fragment by fragment """
 
-    FD_NAME = 'youtube_live_chat_replay'
+    FD_NAME = 'youtube_live_chat'
 
     def real_download(self, filename, info_dict):
         video_id = info_dict['video_id']
@@ -31,6 +34,8 @@ def real_download(self, filename, info_dict):
 
         ie = YT_BaseIE(self.ydl)
 
+        start_time = int(time.time() * 1000)
+
         def dl_fragment(url, data=None, headers=None):
             http_headers = info_dict.get('http_headers', {})
             if headers:
@@ -38,36 +43,70 @@ def dl_fragment(url, data=None, headers=None):
                 http_headers.update(headers)
             return self._download_fragment(ctx, url, info_dict, http_headers, data)
 
-        def download_and_parse_fragment(url, frag_index, request_data):
+        def parse_actions_replay(live_chat_continuation):
+            offset = continuation_id = None
+            processed_fragment = bytearray()
+            for action in live_chat_continuation.get('actions', []):
+                if 'replayChatItemAction' in action:
+                    replay_chat_item_action = action['replayChatItemAction']
+                    offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
+                processed_fragment.extend(
+                    json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
+            if offset is not None:
+                continuation_id = try_get(
+                    live_chat_continuation,
+                    lambda x: x['continuations'][0]['liveChatReplayContinuationData']['continuation'])
+            self._append_fragment(ctx, processed_fragment)
+            return continuation_id, offset
+
+        live_offset = 0
+
+        def parse_actions_live(live_chat_continuation):
+            nonlocal live_offset
+            continuation_id = None
+            processed_fragment = bytearray()
+            for action in live_chat_continuation.get('actions', []):
+                timestamp = self.parse_live_timestamp(action)
+                if timestamp is not None:
+                    live_offset = timestamp - start_time
+                # compatibility with replay format
+                pseudo_action = {
+                    'replayChatItemAction': {'actions': [action]},
+                    'videoOffsetTimeMsec': str(live_offset),
+                    'isLive': True,
+                }
+                processed_fragment.extend(
+                    json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n')
+            continuation_data_getters = [
+                lambda x: x['continuations'][0]['invalidationContinuationData'],
+                lambda x: x['continuations'][0]['timedContinuationData'],
+            ]
+            continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
+            if continuation_data:
+                continuation_id = continuation_data.get('continuation')
+                timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
+                if timeout_ms is not None:
+                    time.sleep(timeout_ms / 1000)
+            self._append_fragment(ctx, processed_fragment)
+            return continuation_id, live_offset
+
+        if info_dict['protocol'] == 'youtube_live_chat_replay':
+            parse_actions = parse_actions_replay
+        elif info_dict['protocol'] == 'youtube_live_chat':
+            parse_actions = parse_actions_live
+
+        def download_and_parse_fragment(url, frag_index, request_data, headers):
             count = 0
             while count <= fragment_retries:
                 try:
-                    success, raw_fragment = dl_fragment(url, request_data, {'content-type': 'application/json'})
+                    success, raw_fragment = dl_fragment(url, request_data, headers)
                     if not success:
                         return False, None, None
-                    try:
-                        data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
-                    except RegexNotFoundError:
-                        data = None
-                    if not data:
-                        data = json.loads(raw_fragment)
+                    data = json.loads(raw_fragment)
                     live_chat_continuation = try_get(
                         data,
                         lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
-                    offset = continuation_id = None
-                    processed_fragment = bytearray()
-                    for action in live_chat_continuation.get('actions', []):
-                        if 'replayChatItemAction' in action:
-                            replay_chat_item_action = action['replayChatItemAction']
-                            offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
-                        processed_fragment.extend(
-                            json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
-                    if offset is not None:
-                        continuation_id = try_get(
-                            live_chat_continuation,
-                            lambda x: x['continuations'][0]['liveChatReplayContinuationData']['continuation'])
-                    self._append_fragment(ctx, processed_fragment)
-
+                    continuation_id, offset = parse_actions(live_chat_continuation)
                     return True, continuation_id, offset
                 except compat_urllib_error.HTTPError as err:
                     count += 1
@@ -100,7 +139,11 @@ def download_and_parse_fragment(url, frag_index, request_data):
         innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
         if not api_key or not innertube_context:
             return False
-        url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
+        visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
+        if info_dict['protocol'] == 'youtube_live_chat_replay':
+            url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
+        elif info_dict['protocol'] == 'youtube_live_chat':
+            url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
 
         frag_index = offset = 0
         while continuation_id is not None:
@@ -111,8 +154,11 @@ def download_and_parse_fragment(url, frag_index, request_data):
             }
             if frag_index > 1:
                 request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
+            headers = ie._generate_api_headers(ytcfg, visitor_data=visitor_data)
+            headers.update({'content-type': 'application/json'})
+            fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n'
             success, continuation_id, offset = download_and_parse_fragment(
-                url, frag_index, json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n')
+                url, frag_index, fragment_request_data, headers)
             if not success:
                 return False
             if test:
@@ -120,3 +166,39 @@ def download_and_parse_fragment(url, frag_index, request_data):
 
         self._finish_frag_download(ctx)
         return True
+
+    @staticmethod
+    def parse_live_timestamp(action):
+        action_content = dict_get(
+            action,
+            ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
+        if not isinstance(action_content, dict):
+            return None
+        item = dict_get(action_content, ['item', 'bannerRenderer'])
+        if not isinstance(item, dict):
+            return None
+        renderer = dict_get(item, [
+            # text
+            'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
+            'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
+            # ticker
+            'liveChatTickerPaidMessageItemRenderer',
+            'liveChatTickerSponsorItemRenderer',
+            # banner
+            'liveChatBannerRenderer',
+        ])
+        if not isinstance(renderer, dict):
+            return None
+        parent_item_getters = [
+            lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
+            lambda x: x['contents'],
+        ]
+        parent_item = try_get(renderer, parent_item_getters, dict)
+        if parent_item:
+            renderer = dict_get(parent_item, [
+                'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
+                'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
+            ])
+            if not isinstance(renderer, dict):
+                return None
+        return int_or_none(renderer.get('timestampUsec'), 1000)
index 375fc19096715361b6610ddcaca0c5c12752d066..ad2cdb05246bea16d5ab60a54eb2894846c0787f 100644 (file)
@@ -2339,18 +2339,17 @@ def process_language(container, base_url, lang_code, sub_name, query):
             initial_data = self._call_api(
                 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
 
-        if not is_live:
-            try:
-                # This will error if there is no livechat
-                initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
-                info['subtitles']['live_chat'] = [{
-                    'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
-                    'video_id': video_id,
-                    'ext': 'json',
-                    'protocol': 'youtube_live_chat_replay',
-                }]
-            except (KeyError, IndexError, TypeError):
-                pass
+        try:
+            # This will error if there is no livechat
+            initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
+            info['subtitles']['live_chat'] = [{
+                'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
+                'video_id': video_id,
+                'ext': 'json',
+                'protocol': 'youtube_live_chat' if is_live else 'youtube_live_chat_replay',
+            }]
+        except (KeyError, IndexError, TypeError):
+            pass
 
         if initial_data:
             chapters = self._extract_chapters_from_json(