[ie/patreon] Fix Vimeo embed extraction (#9712)

[yt-dlp.git] / yt_dlp / extractor / reddit.py
diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py

index 3e458456c18106aceb8a1adf95617d01f9830631..62f669f35da0140ab6de78058a55828f40b1f352 100644 (file)
--- a/yt_dlp/extractor/reddit.py
+++ b/yt_dlp/extractor/reddit.py
@@ -8,11 +8,13 @@
      traverse_obj,
      try_get,
      unescapeHTML,
+    urlencode_postdata,
      url_or_none,
  )
  
  
  class RedditIE(InfoExtractor):
+    _NETRC_MACHINE = 'reddit'
      _VALID_URL = r'https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))'
      _TESTS = [{
          'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
@@ -176,6 +178,25 @@ class RedditIE(InfoExtractor):
          'only_matching': True,
      }]
  
+    def _perform_login(self, username, password):
+        captcha = self._download_json(
+            'https://www.reddit.com/api/requires_captcha/login.json', None,
+            'Checking login requirement')['required']
+        if captcha:
+            raise ExtractorError('Reddit is requiring captcha before login', expected=True)
+        login = self._download_json(
+            f'https://www.reddit.com/api/login/{username}', None, data=urlencode_postdata({
+                'op': 'login-main',
+                'user': username,
+                'passwd': password,
+                'api_type': 'json',
+            }), note='Logging in', errnote='Login request failed')
+        errors = '; '.join(traverse_obj(login, ('json', 'errors', ..., 1)))
+        if errors:
+            raise ExtractorError(f'Unable to login, Reddit API says {errors}', expected=True)
+        elif not traverse_obj(login, ('json', 'data', 'cookie', {str})):
+            raise ExtractorError('Unable to login, no cookie was returned')
+
      def _real_extract(self, url):
          host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id')
  
@@ -219,6 +240,7 @@ def add_thumbnail(src):
                  'url': unescapeHTML(thumbnail_url),
                  'width': int_or_none(src.get('width')),
                  'height': int_or_none(src.get('height')),
+                'http_headers': {'Accept': '*/*'},
              })
  
          for image in try_get(data, lambda x: x['preview']['images']) or []:
@@ -297,16 +319,20 @@ def add_thumbnail(src):
                  'format_id': 'fallback',
                  'format_note': 'DASH video, mp4_dash',
              }]
-            formats.extend(self._extract_m3u8_formats(
-                hls_playlist_url, display_id, 'mp4', m3u8_id='hls', fatal=False))
-            formats.extend(self._extract_mpd_formats(
-                dash_playlist_url, display_id, mpd_id='dash', fatal=False))
+            hls_fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
+                hls_playlist_url, display_id, 'mp4', m3u8_id='hls', fatal=False)
+            formats.extend(hls_fmts)
+            dash_fmts, dash_subs = self._extract_mpd_formats_and_subtitles(
+                dash_playlist_url, display_id, mpd_id='dash', fatal=False)
+            formats.extend(dash_fmts)
+            self._merge_subtitles(dash_subs, target=subtitles)
  
              return {
                  **info,
                  'id': video_id,
                  'display_id': display_id,
                  'formats': formats,
+                'subtitles': subtitles,
                  'duration': int_or_none(reddit_video.get('duration')),
              }