]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/downloader/youtube_live_chat.py
[ie/cloudflarestream] Fix `_VALID_URL` and embed extraction (#10215)
[yt-dlp.git] / yt_dlp / downloader / youtube_live_chat.py
index 5e05426e63d65f6d8a42960027adf63f91594d0d..961938d4491fd7ffff9c062fcbc1908617e94fb7 100644 (file)
@@ -1,29 +1,28 @@
-from __future__ import division, unicode_literals
-
 import json
 import time
 
 from .fragment import FragmentFD
-from ..compat import compat_urllib_error
+from ..networking.exceptions import HTTPError
 from ..utils import (
-    try_get,
+    RegexNotFoundError,
+    RetryManager,
     dict_get,
     int_or_none,
-    RegexNotFoundError,
+    try_get,
 )
-from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
+from ..utils.networking import HTTPHeaderDict
 
 
 class YoutubeLiveChatFD(FragmentFD):
     """ Downloads YouTube live chats fragment by fragment """
 
-    FD_NAME = 'youtube_live_chat'
-
     def real_download(self, filename, info_dict):
         video_id = info_dict['video_id']
-        self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
+        self.to_screen(f'[{self.FD_NAME}] Downloading live chat')
+        if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
+            self.report_warning('Live chat download runs until the livestream ends. '
+                                'If you wish to download the video simultaneously, run a separate yt-dlp instance')
 
-        fragment_retries = self.params.get('fragment_retries', 0)
         test = self.params.get('test', False)
 
         ctx = {
@@ -32,15 +31,14 @@ def real_download(self, filename, info_dict):
             'total_frags': None,
         }
 
-        ie = YT_BaseIE(self.ydl)
+        from ..extractor.youtube import YoutubeBaseInfoExtractor
+
+        ie = YoutubeBaseInfoExtractor(self.ydl)
 
         start_time = int(time.time() * 1000)
 
         def dl_fragment(url, data=None, headers=None):
-            http_headers = info_dict.get('http_headers', {})
-            if headers:
-                http_headers = http_headers.copy()
-                http_headers.update(headers)
+            http_headers = HTTPHeaderDict(info_dict.get('http_headers'), headers)
             return self._download_fragment(ctx, url, info_dict, http_headers, data)
 
         def parse_actions_replay(live_chat_continuation):
@@ -51,7 +49,7 @@ def parse_actions_replay(live_chat_continuation):
                     replay_chat_item_action = action['replayChatItemAction']
                     offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
                 processed_fragment.extend(
-                    json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
+                    json.dumps(action, ensure_ascii=False).encode() + b'\n')
             if offset is not None:
                 continuation = try_get(
                     live_chat_continuation,
@@ -93,7 +91,7 @@ def parse_actions_live(live_chat_continuation):
                     'isLive': True,
                 }
                 processed_fragment.extend(
-                    json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n')
+                    json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n')
             continuation_data_getters = [
                 lambda x: x['continuations'][0]['invalidationContinuationData'],
                 lambda x: x['continuations'][0]['timedContinuationData'],
@@ -109,12 +107,12 @@ def parse_actions_live(live_chat_continuation):
             return continuation_id, live_offset, click_tracking_params
 
         def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
-            count = 0
-            while count <= fragment_retries:
+            for retry in RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=frag_index):
                 try:
-                    success, raw_fragment = dl_fragment(url, request_data, headers)
+                    success = dl_fragment(url, request_data, headers)
                     if not success:
                         return False, None, None, None
+                    raw_fragment = self._read_fragment(ctx)
                     try:
                         data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
                     except RegexNotFoundError:
@@ -124,27 +122,22 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None
                     live_chat_continuation = try_get(
                         data,
                         lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
-                    if info_dict['protocol'] == 'youtube_live_chat_replay':
-                        if frag_index == 1:
-                            continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation)
-                        else:
-                            continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation)
-                    elif info_dict['protocol'] == 'youtube_live_chat':
-                        continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation)
-                    return True, continuation_id, offset, click_tracking_params
-                except compat_urllib_error.HTTPError as err:
-                    count += 1
-                    if count <= fragment_retries:
-                        self.report_retry_fragment(err, frag_index, count, fragment_retries)
-            if count > fragment_retries:
-                self.report_error('giving up after %s fragment retries' % fragment_retries)
-                return False, None, None, None
-
-        self._prepare_and_start_frag_download(ctx)
-
-        success, raw_fragment = dl_fragment(info_dict['url'])
+
+                    func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live
+                            or frag_index == 1 and try_refresh_replay_beginning
+                            or parse_actions_replay)
+                    return (True, *func(live_chat_continuation))
+                except HTTPError as err:
+                    retry.error = err
+                    continue
+            return False, None, None, None
+
+        self._prepare_and_start_frag_download(ctx, info_dict)
+
+        success = dl_fragment(info_dict['url'])
         if not success:
             return False
+        raw_fragment = self._read_fragment(ctx)
         try:
             data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
         except RegexNotFoundError:
@@ -183,9 +176,9 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None
                 request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
                 if click_tracking_params:
                     request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
-                headers = ie.generate_api_headers(ytcfg, visitor_data=visitor_data)
+                headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
                 headers.update({'content-type': 'application/json'})
-                fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n'
+                fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n'
                 success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
                     url, frag_index, fragment_request_data, headers)
             else:
@@ -196,8 +189,7 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None
             if test:
                 break
 
-        self._finish_frag_download(ctx)
-        return True
+        return self._finish_frag_download(ctx, info_dict)
 
     @staticmethod
     def parse_live_timestamp(action):