[ie/mlbtv] Fix extraction (#10296)

[yt-dlp.git] / yt_dlp / extractor / pluralsight.py
diff --git a/yt_dlp/extractor/pluralsight.py b/yt_dlp/extractor/pluralsight.py

index abd08bc285c3c107bd548ddf0ec1c86fe77ecd9a..d3f03f7eeca798b2172b9269bc23d7e47784c71f 100644 (file)
--- a/yt_dlp/extractor/pluralsight.py
+++ b/yt_dlp/extractor/pluralsight.py
@@ -1,22 +1,18 @@
-from __future__ import unicode_literals
-
  import collections
  import json
  import os
  import random
  import re
+import urllib.parse
  
  from .common import InfoExtractor
-from ..compat import (
-    compat_str,
-    compat_urlparse,
-)
  from ..utils import (
-    dict_get,
      ExtractorError,
+    dict_get,
      float_or_none,
      int_or_none,
      parse_duration,
+    parse_qs,
      qualities,
      srt_subtitles_timecode,
      try_get,
@@ -28,7 +24,7 @@
  class PluralsightBaseIE(InfoExtractor):
      _API_BASE = 'https://app.pluralsight.com'
  
-    _GRAPHQL_EP = '%s/player/api/graphql' % _API_BASE
+    _GRAPHQL_EP = f'{_API_BASE}/player/api/graphql'
      _GRAPHQL_HEADERS = {
          'Content-Type': 'application/json;charset=UTF-8',
      }
@@ -96,8 +92,8 @@ def _download_course_rpc(self, course_id, url, display_id):
          response = self._download_json(
              self._GRAPHQL_EP, display_id, data=json.dumps({
                  'query': self._GRAPHQL_COURSE_TMPL % course_id,
-                'variables': {}
-            }).encode('utf-8'), headers=self._GRAPHQL_HEADERS)
+                'variables': {},
+            }).encode(), headers=self._GRAPHQL_HEADERS)
  
          course = try_get(
              response, lambda x: x['data']['rpc']['bootstrapPlayer']['course'],
@@ -106,7 +102,7 @@ def _download_course_rpc(self, course_id, url, display_id):
              return course
  
          raise ExtractorError(
-            '%s said: %s' % (self.IE_NAME, response['error']['message']),
+            '{} said: {}'.format(self.IE_NAME, response['error']['message']),
              expected=True)
  
  
@@ -161,14 +157,7 @@ class PluralsightIE(PluralsightBaseIE):
    }
  }'''
  
-    def _real_initialize(self):
-        self._login()
-
-    def _login(self):
-        username, password = self._get_login_info()
-        if username is None:
-            return
-
+    def _perform_login(self, username, password):
          login_page = self._download_webpage(
              self._LOGIN_URL, None, 'Downloading login page')
  
@@ -184,7 +173,7 @@ def _login(self):
              'post url', default=self._LOGIN_URL, group='url')
  
          if not post_url.startswith('http'):
-            post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
+            post_url = urllib.parse.urljoin(self._LOGIN_URL, post_url)
  
          response = self._download_webpage(
              post_url, None, 'Logging in',
@@ -195,7 +184,7 @@ def _login(self):
              r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>',
              response, 'error message', default=None)
          if error:
-            raise ExtractorError('Unable to login: %s' % error, expected=True)
+            raise ExtractorError(f'Unable to login: {error}', expected=True)
  
          if all(not re.search(p, response) for p in (
                  r'__INITIAL_STATE__', r'["\']currentUser["\']',
@@ -204,13 +193,12 @@ def _login(self):
              BLOCKED = 'Your account has been blocked due to suspicious activity'
              if BLOCKED in response:
                  raise ExtractorError(
-                    'Unable to login: %s' % BLOCKED, expected=True)
+                    f'Unable to login: {BLOCKED}', expected=True)
              MUST_AGREE = 'To continue using Pluralsight, you must agree to'
              if any(p in response for p in (MUST_AGREE, '>Disagree<', '>Agree<')):
                  raise ExtractorError(
-                    'Unable to login: %s some documents. Go to pluralsight.com, '
-                    'log in and agree with what Pluralsight requires.'
-                    % MUST_AGREE, expected=True)
+                    f'Unable to login: {MUST_AGREE} some documents. Go to pluralsight.com, '
+                    'log in and agree with what Pluralsight requires.', expected=True)
  
              raise ExtractorError('Unable to log in')
  
@@ -218,8 +206,7 @@ def _get_subtitles(self, author, clip_idx, clip_id, lang, name, duration, video_
          captions = None
          if clip_id:
              captions = self._download_json(
-                '%s/transcript/api/v1/caption/json/%s/%s'
-                % (self._API_BASE, clip_id, lang), video_id,
+                f'{self._API_BASE}/transcript/api/v1/caption/json/{clip_id}/{lang}', video_id,
                  'Downloading captions JSON', 'Unable to download captions JSON',
                  fatal=False)
          if not captions:
@@ -230,9 +217,9 @@ def _get_subtitles(self, author, clip_idx, clip_id, lang, name, duration, video_
                  'm': name,
              }
              captions = self._download_json(
-                '%s/player/retrieve-captions' % self._API_BASE, video_id,
+                f'{self._API_BASE}/player/retrieve-captions', video_id,
                  'Downloading captions JSON', 'Unable to download captions JSON',
-                fatal=False, data=json.dumps(captions_post).encode('utf-8'),
+                fatal=False, data=json.dumps(captions_post).encode(),
                  headers={'Content-Type': 'application/json;charset=utf-8'})
          if captions:
              return {
@@ -242,7 +229,7 @@ def _get_subtitles(self, author, clip_idx, clip_id, lang, name, duration, video_
                  }, {
                      'ext': 'srt',
                      'data': self._convert_subtitles(duration, captions),
-                }]
+                }],
              }
  
      @staticmethod
@@ -263,27 +250,25 @@ def _convert_subtitles(duration, subs):
                  continue
              srt += os.linesep.join(
                  (
-                    '%d' % num,
-                    '%s --> %s' % (
-                        srt_subtitles_timecode(start),
-                        srt_subtitles_timecode(end)),
+                    f'{num}',
+                    f'{srt_subtitles_timecode(start)} --> {srt_subtitles_timecode(end)}',
                      text,
                      os.linesep,
                  ))
          return srt
  
      def _real_extract(self, url):
-        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+        qs = parse_qs(url)
  
          author = qs.get('author', [None])[0]
          name = qs.get('name', [None])[0]
          clip_idx = qs.get('clip', [None])[0]
          course_name = qs.get('course', [None])[0]
  
-        if any(not f for f in (author, name, clip_idx, course_name,)):
+        if any(not f for f in (author, name, clip_idx, course_name)):
              raise ExtractorError('Invalid URL', expected=True)
  
-        display_id = '%s-%s' % (name, clip_idx)
+        display_id = f'{name}-{clip_idx}'
  
          course = self._download_course(course_name, url, display_id)
  
@@ -299,7 +284,7 @@ def _real_extract(self, url):
                          clip_index = clip_.get('index')
                      if clip_index is None:
                          continue
-                    if compat_str(clip_index) == clip_idx:
+                    if str(clip_index) == clip_idx:
                          clip = clip_
                          break
  
@@ -316,14 +301,14 @@ def _real_extract(self, url):
              'high-widescreen': {'width': 1280, 'height': 720},
          }
  
-        QUALITIES_PREFERENCE = ('low', 'medium', 'high', 'high-widescreen',)
+        QUALITIES_PREFERENCE = ('low', 'medium', 'high', 'high-widescreen')
          quality_key = qualities(QUALITIES_PREFERENCE)
  
          AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities'])
  
          ALLOWED_QUALITIES = (
-            AllowedQuality('webm', ['high', ]),
-            AllowedQuality('mp4', ['low', 'medium', 'high', ]),
+            AllowedQuality('webm', ['high']),
+            AllowedQuality('mp4', ['low', 'medium', 'high']),
          )
  
          # Some courses also offer widescreen resolution for high quality (see
@@ -337,11 +322,11 @@ def _real_extract(self, url):
          # In order to minimize the number of calls to ViewClip API and reduce
          # the probability of being throttled or banned by Pluralsight we will request
          # only single format until formats listing was explicitly requested.
-        if self._downloader.params.get('listformats', False):
+        if self.get_param('listformats', False):
              allowed_qualities = ALLOWED_QUALITIES
          else:
              def guess_allowed_qualities():
-                req_format = self._downloader.params.get('format') or 'best'
+                req_format = self.get_param('format') or 'best'
                  req_format_split = req_format.split('-', 1)
                  if len(req_format_split) > 1:
                      req_ext, req_quality = req_format_split
@@ -349,7 +334,7 @@ def guess_allowed_qualities():
                      for allowed_quality in ALLOWED_QUALITIES:
                          if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities:
                              return (AllowedQuality(req_ext, (req_quality, )), )
-                req_ext = 'webm' if self._downloader.params.get('prefer_free_formats') else 'mp4'
+                req_ext = 'webm' if self.get_param('prefer_free_formats') else 'mp4'
                  return (AllowedQuality(req_ext, (best_quality, )), )
              allowed_qualities = guess_allowed_qualities()
  
@@ -367,23 +352,23 @@ def guess_allowed_qualities():
                      'mediaType': ext,
                      'quality': '%dx%d' % (f['width'], f['height']),
                  }
-                format_id = '%s-%s' % (ext, quality)
+                format_id = f'{ext}-{quality}'
  
                  try:
                      viewclip = self._download_json(
                          self._GRAPHQL_EP, display_id,
-                        'Downloading %s viewclip graphql' % format_id,
+                        f'Downloading {format_id} viewclip graphql',
                          data=json.dumps({
                              'query': self.GRAPHQL_VIEWCLIP_TMPL % clip_post,
-                            'variables': {}
-                        }).encode('utf-8'),
+                            'variables': {},
+                        }).encode(),
                          headers=self._GRAPHQL_HEADERS)['data']['viewClip']
                  except ExtractorError:
                      # Still works but most likely will go soon
                      viewclip = self._download_json(
-                        '%s/video/clips/viewclip' % self._API_BASE, display_id,
-                        'Downloading %s viewclip JSON' % format_id, fatal=False,
-                        data=json.dumps(clip_post).encode('utf-8'),
+                        f'{self._API_BASE}/video/clips/viewclip', display_id,
+                        f'Downloading {format_id} viewclip JSON', fatal=False,
+                        data=json.dumps(clip_post).encode(),
                          headers={'Content-Type': 'application/json;charset=utf-8'})
  
                  # Pluralsight tracks multiple sequential calls to ViewClip API and start
@@ -393,7 +378,7 @@ def guess_allowed_qualities():
                  # To somewhat reduce the probability of these consequences
                  # we will sleep random amount of time before each call to ViewClip.
                  self._sleep(
-                    random.randint(2, 5), display_id,
+                    random.randint(5, 10), display_id,
                      '%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling')
  
                  if not viewclip:
@@ -412,14 +397,12 @@ def guess_allowed_qualities():
                      clip_f.update({
                          'url': clip_url,
                          'ext': ext,
-                        'format_id': '%s-%s' % (format_id, cdn) if cdn else format_id,
+                        'format_id': f'{format_id}-{cdn}' if cdn else format_id,
                          'quality': quality_key(quality),
                          'source_preference': int_or_none(clip_url_data.get('rank')),
                      })
                      formats.append(clip_f)
  
-        self._sort_formats(formats)
-
          duration = int_or_none(
              clip.get('duration')) or parse_duration(clip.get('formattedDuration'))
  
@@ -482,7 +465,7 @@ def _real_extract(self, url):
                  if clip_index is None:
                      continue
                  clip_url = update_url_query(
-                    '%s/player' % self._API_BASE, query={
+                    f'{self._API_BASE}/player', query={
                          'mode': 'live',
                          'course': course_name,
                          'author': author,