[ie/cloudflarestream] Fix `_VALID_URL` and embed extraction (#10215)

[yt-dlp.git] / yt_dlp / downloader / youtube_live_chat.py
diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py

index 5e05426e63d65f6d8a42960027adf63f91594d0d..961938d4491fd7ffff9c062fcbc1908617e94fb7 100644 (file)
--- a/yt_dlp/downloader/youtube_live_chat.py
+++ b/yt_dlp/downloader/youtube_live_chat.py
@@ -1,29 +1,28 @@
-from __future__ import division, unicode_literals
-
  import json
  import time
  
  from .fragment import FragmentFD
-from ..compat import compat_urllib_error
+from ..networking.exceptions import HTTPError
  from ..utils import (
-    try_get,
+    RegexNotFoundError,
+    RetryManager,
      dict_get,
      int_or_none,
-    RegexNotFoundError,
+    try_get,
  )
-from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
+from ..utils.networking import HTTPHeaderDict
  
  
  class YoutubeLiveChatFD(FragmentFD):
      """ Downloads YouTube live chats fragment by fragment """
  
-    FD_NAME = 'youtube_live_chat'
-
      def real_download(self, filename, info_dict):
          video_id = info_dict['video_id']
-        self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
+        self.to_screen(f'[{self.FD_NAME}] Downloading live chat')
+        if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
+            self.report_warning('Live chat download runs until the livestream ends. '
+                                'If you wish to download the video simultaneously, run a separate yt-dlp instance')
  
-        fragment_retries = self.params.get('fragment_retries', 0)
          test = self.params.get('test', False)
  
          ctx = {
@@ -32,15 +31,14 @@ def real_download(self, filename, info_dict):
              'total_frags': None,
          }
  
-        ie = YT_BaseIE(self.ydl)
+        from ..extractor.youtube import YoutubeBaseInfoExtractor
+
+        ie = YoutubeBaseInfoExtractor(self.ydl)
  
          start_time = int(time.time() * 1000)
  
          def dl_fragment(url, data=None, headers=None):
-            http_headers = info_dict.get('http_headers', {})
-            if headers:
-                http_headers = http_headers.copy()
-                http_headers.update(headers)
+            http_headers = HTTPHeaderDict(info_dict.get('http_headers'), headers)
              return self._download_fragment(ctx, url, info_dict, http_headers, data)
  
          def parse_actions_replay(live_chat_continuation):
@@ -51,7 +49,7 @@ def parse_actions_replay(live_chat_continuation):
                      replay_chat_item_action = action['replayChatItemAction']
                      offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
                  processed_fragment.extend(
-                    json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
+                    json.dumps(action, ensure_ascii=False).encode() + b'\n')
              if offset is not None:
                  continuation = try_get(
                      live_chat_continuation,
@@ -93,7 +91,7 @@ def parse_actions_live(live_chat_continuation):
                      'isLive': True,
                  }
                  processed_fragment.extend(
-                    json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n')
+                    json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n')
              continuation_data_getters = [
                  lambda x: x['continuations'][0]['invalidationContinuationData'],
                  lambda x: x['continuations'][0]['timedContinuationData'],
@@ -109,12 +107,12 @@ def parse_actions_live(live_chat_continuation):
              return continuation_id, live_offset, click_tracking_params
  
          def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
-            count = 0
-            while count <= fragment_retries:
+            for retry in RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=frag_index):
                  try:
-                    success, raw_fragment = dl_fragment(url, request_data, headers)
+                    success = dl_fragment(url, request_data, headers)
                      if not success:
                          return False, None, None, None
+                    raw_fragment = self._read_fragment(ctx)
                      try:
                          data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
                      except RegexNotFoundError:
@@ -124,27 +122,22 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None
                      live_chat_continuation = try_get(
                          data,
                          lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
-                    if info_dict['protocol'] == 'youtube_live_chat_replay':
-                        if frag_index == 1:
-                            continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation)
-                        else:
-                            continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation)
-                    elif info_dict['protocol'] == 'youtube_live_chat':
-                        continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation)
-                    return True, continuation_id, offset, click_tracking_params
-                except compat_urllib_error.HTTPError as err:
-                    count += 1
-                    if count <= fragment_retries:
-                        self.report_retry_fragment(err, frag_index, count, fragment_retries)
-            if count > fragment_retries:
-                self.report_error('giving up after %s fragment retries' % fragment_retries)
-                return False, None, None, None
-
-        self._prepare_and_start_frag_download(ctx)
-
-        success, raw_fragment = dl_fragment(info_dict['url'])
+
+                    func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live
+                            or frag_index == 1 and try_refresh_replay_beginning
+                            or parse_actions_replay)
+                    return (True, *func(live_chat_continuation))
+                except HTTPError as err:
+                    retry.error = err
+                    continue
+            return False, None, None, None
+
+        self._prepare_and_start_frag_download(ctx, info_dict)
+
+        success = dl_fragment(info_dict['url'])
          if not success:
              return False
+        raw_fragment = self._read_fragment(ctx)
          try:
              data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
          except RegexNotFoundError:
@@ -183,9 +176,9 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None
                  request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
                  if click_tracking_params:
                      request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
-                headers = ie.generate_api_headers(ytcfg, visitor_data=visitor_data)
+                headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
                  headers.update({'content-type': 'application/json'})
-                fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n'
+                fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n'
                  success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
                      url, frag_index, fragment_request_data, headers)
              else:
@@ -196,8 +189,7 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None
              if test:
                  break
  
-        self._finish_frag_download(ctx)
-        return True
+        return self._finish_frag_download(ctx, info_dict)
  
      @staticmethod
      def parse_live_timestamp(action):