]> jfr.im git - yt-dlp.git/commitdiff
support youtube live chat replay
authorsiikamiika <redacted>
Tue, 4 Aug 2020 22:02:23 +0000 (01:02 +0300)
committersiikamiika <redacted>
Tue, 4 Aug 2020 22:02:23 +0000 (01:02 +0300)
youtube_dl/downloader/__init__.py
youtube_dl/downloader/youtube_live_chat.py [new file with mode: 0644]
youtube_dl/extractor/youtube.py

index 2e485df9dac09e197af6183ded3570e57805fad2..4ae81f516e63958c30c798c917e8c5df44aa867f 100644 (file)
@@ -8,6 +8,7 @@
 from .dash import DashSegmentsFD
 from .rtsp import RtspFD
 from .ism import IsmFD
+from .youtube_live_chat import YoutubeLiveChatReplayFD
 from .external import (
     get_external_downloader,
     FFmpegFD,
@@ -26,6 +27,7 @@
     'f4m': F4mFD,
     'http_dash_segments': DashSegmentsFD,
     'ism': IsmFD,
+    'youtube_live_chat_replay': YoutubeLiveChatReplayFD,
 }
 
 
diff --git a/youtube_dl/downloader/youtube_live_chat.py b/youtube_dl/downloader/youtube_live_chat.py
new file mode 100644 (file)
index 0000000..64d1d20
--- /dev/null
@@ -0,0 +1,88 @@
+from __future__ import division, unicode_literals
+
+import re
+import json
+
+from .fragment import FragmentFD
+
+
+class YoutubeLiveChatReplayFD(FragmentFD):
+    """ Downloads YouTube live chat replays fragment by fragment """
+
+    FD_NAME = 'youtube_live_chat_replay'
+
+    def real_download(self, filename, info_dict):
+        video_id = info_dict['video_id']
+        self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
+
+        test = self.params.get('test', False)
+
+        ctx = {
+            'filename': filename,
+            'live': True,
+            'total_frags': None,
+        }
+
+        def dl_fragment(url):
+            headers = info_dict.get('http_headers', {})
+            return self._download_fragment(ctx, url, info_dict, headers)
+
+        def parse_yt_initial_data(data):
+            raw_json = re.search(b'window\["ytInitialData"\]\s*=\s*(.*);', data).group(1)
+            return json.loads(raw_json)
+
+        self._prepare_and_start_frag_download(ctx)
+
+        success, raw_fragment = dl_fragment(
+            'https://www.youtube.com/watch?v={}'.format(video_id))
+        if not success:
+            return False
+        data = parse_yt_initial_data(raw_fragment)
+        continuation_id = data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
+        # no data yet but required to call _append_fragment
+        self._append_fragment(ctx, b'')
+
+        first = True
+        offset = None
+        while continuation_id is not None:
+            data = None
+            if first:
+                url = 'https://www.youtube.com/live_chat_replay?continuation={}'.format(continuation_id)
+                success, raw_fragment = dl_fragment(url)
+                if not success:
+                    return False
+                data = parse_yt_initial_data(raw_fragment)
+            else:
+                url = ('https://www.youtube.com/live_chat_replay/get_live_chat_replay'
+                       + '?continuation={}'.format(continuation_id)
+                       + '&playerOffsetMs={}'.format(offset - 5000)
+                       + '&hidden=false'
+                       + '&pbj=1')
+                success, raw_fragment = dl_fragment(url)
+                if not success:
+                    return False
+                data = json.loads(raw_fragment)['response']
+
+            first = False
+            continuation_id = None
+
+            live_chat_continuation = data['continuationContents']['liveChatContinuation']
+            offset = None
+            processed_fragment = bytearray()
+            if 'actions' in live_chat_continuation:
+                for action in live_chat_continuation['actions']:
+                    if 'replayChatItemAction' in action:
+                        replay_chat_item_action = action['replayChatItemAction']
+                        offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
+                    processed_fragment.extend(
+                        json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
+                continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation']
+
+            self._append_fragment(ctx, processed_fragment)
+
+            if test or offset is None:
+                break
+
+        self._finish_frag_download(ctx)
+
+        return True
index b35bf03aafc7c7c45b3c35735a68d00f86aed988..e554702e77c1d9eaa6706a28305c53af7fc29308 100644 (file)
@@ -1462,6 +1462,14 @@ def _get_subtitles(self, video_id, webpage):
                     'ext': ext,
                 })
             sub_lang_list[lang] = sub_formats
+        # TODO check that live chat replay actually exists
+        sub_lang_list['live_chat'] = [
+            {
+                'video_id': video_id,
+                'ext': 'json',
+                'protocol': 'youtube_live_chat_replay',
+            },
+        ]
         if not sub_lang_list:
             self._downloader.report_warning('video doesn\'t have subtitles')
             return {}