]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/pluralsight.py
[ie/mlbtv] Fix extraction (#10296)
[yt-dlp.git] / yt_dlp / extractor / pluralsight.py
index abd08bc285c3c107bd548ddf0ec1c86fe77ecd9a..d3f03f7eeca798b2172b9269bc23d7e47784c71f 100644 (file)
@@ -1,22 +1,18 @@
-from __future__ import unicode_literals
-
 import collections
 import json
 import os
 import random
 import re
+import urllib.parse
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_str,
-    compat_urlparse,
-)
 from ..utils import (
-    dict_get,
     ExtractorError,
+    dict_get,
     float_or_none,
     int_or_none,
     parse_duration,
+    parse_qs,
     qualities,
     srt_subtitles_timecode,
     try_get,
@@ -28,7 +24,7 @@
 class PluralsightBaseIE(InfoExtractor):
     _API_BASE = 'https://app.pluralsight.com'
 
-    _GRAPHQL_EP = '%s/player/api/graphql' % _API_BASE
+    _GRAPHQL_EP = f'{_API_BASE}/player/api/graphql'
     _GRAPHQL_HEADERS = {
         'Content-Type': 'application/json;charset=UTF-8',
     }
@@ -96,8 +92,8 @@ def _download_course_rpc(self, course_id, url, display_id):
         response = self._download_json(
             self._GRAPHQL_EP, display_id, data=json.dumps({
                 'query': self._GRAPHQL_COURSE_TMPL % course_id,
-                'variables': {}
-            }).encode('utf-8'), headers=self._GRAPHQL_HEADERS)
+                'variables': {},
+            }).encode(), headers=self._GRAPHQL_HEADERS)
 
         course = try_get(
             response, lambda x: x['data']['rpc']['bootstrapPlayer']['course'],
@@ -106,7 +102,7 @@ def _download_course_rpc(self, course_id, url, display_id):
             return course
 
         raise ExtractorError(
-            '%s said: %s' % (self.IE_NAME, response['error']['message']),
+            '{} said: {}'.format(self.IE_NAME, response['error']['message']),
             expected=True)
 
 
@@ -161,14 +157,7 @@ class PluralsightIE(PluralsightBaseIE):
   }
 }'''
 
-    def _real_initialize(self):
-        self._login()
-
-    def _login(self):
-        username, password = self._get_login_info()
-        if username is None:
-            return
-
+    def _perform_login(self, username, password):
         login_page = self._download_webpage(
             self._LOGIN_URL, None, 'Downloading login page')
 
@@ -184,7 +173,7 @@ def _login(self):
             'post url', default=self._LOGIN_URL, group='url')
 
         if not post_url.startswith('http'):
-            post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
+            post_url = urllib.parse.urljoin(self._LOGIN_URL, post_url)
 
         response = self._download_webpage(
             post_url, None, 'Logging in',
@@ -195,7 +184,7 @@ def _login(self):
             r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>',
             response, 'error message', default=None)
         if error:
-            raise ExtractorError('Unable to login: %s' % error, expected=True)
+            raise ExtractorError(f'Unable to login: {error}', expected=True)
 
         if all(not re.search(p, response) for p in (
                 r'__INITIAL_STATE__', r'["\']currentUser["\']',
@@ -204,13 +193,12 @@ def _login(self):
             BLOCKED = 'Your account has been blocked due to suspicious activity'
             if BLOCKED in response:
                 raise ExtractorError(
-                    'Unable to login: %s' % BLOCKED, expected=True)
+                    f'Unable to login: {BLOCKED}', expected=True)
             MUST_AGREE = 'To continue using Pluralsight, you must agree to'
             if any(p in response for p in (MUST_AGREE, '>Disagree<', '>Agree<')):
                 raise ExtractorError(
-                    'Unable to login: %s some documents. Go to pluralsight.com, '
-                    'log in and agree with what Pluralsight requires.'
-                    % MUST_AGREE, expected=True)
+                    f'Unable to login: {MUST_AGREE} some documents. Go to pluralsight.com, '
+                    'log in and agree with what Pluralsight requires.', expected=True)
 
             raise ExtractorError('Unable to log in')
 
@@ -218,8 +206,7 @@ def _get_subtitles(self, author, clip_idx, clip_id, lang, name, duration, video_
         captions = None
         if clip_id:
             captions = self._download_json(
-                '%s/transcript/api/v1/caption/json/%s/%s'
-                % (self._API_BASE, clip_id, lang), video_id,
+                f'{self._API_BASE}/transcript/api/v1/caption/json/{clip_id}/{lang}', video_id,
                 'Downloading captions JSON', 'Unable to download captions JSON',
                 fatal=False)
         if not captions:
@@ -230,9 +217,9 @@ def _get_subtitles(self, author, clip_idx, clip_id, lang, name, duration, video_
                 'm': name,
             }
             captions = self._download_json(
-                '%s/player/retrieve-captions' % self._API_BASE, video_id,
+                f'{self._API_BASE}/player/retrieve-captions', video_id,
                 'Downloading captions JSON', 'Unable to download captions JSON',
-                fatal=False, data=json.dumps(captions_post).encode('utf-8'),
+                fatal=False, data=json.dumps(captions_post).encode(),
                 headers={'Content-Type': 'application/json;charset=utf-8'})
         if captions:
             return {
@@ -242,7 +229,7 @@ def _get_subtitles(self, author, clip_idx, clip_id, lang, name, duration, video_
                 }, {
                     'ext': 'srt',
                     'data': self._convert_subtitles(duration, captions),
-                }]
+                }],
             }
 
     @staticmethod
@@ -263,27 +250,25 @@ def _convert_subtitles(duration, subs):
                 continue
             srt += os.linesep.join(
                 (
-                    '%d' % num,
-                    '%s --> %s' % (
-                        srt_subtitles_timecode(start),
-                        srt_subtitles_timecode(end)),
+                    f'{num}',
+                    f'{srt_subtitles_timecode(start)} --> {srt_subtitles_timecode(end)}',
                     text,
                     os.linesep,
                 ))
         return srt
 
     def _real_extract(self, url):
-        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+        qs = parse_qs(url)
 
         author = qs.get('author', [None])[0]
         name = qs.get('name', [None])[0]
         clip_idx = qs.get('clip', [None])[0]
         course_name = qs.get('course', [None])[0]
 
-        if any(not f for f in (author, name, clip_idx, course_name,)):
+        if any(not f for f in (author, name, clip_idx, course_name)):
             raise ExtractorError('Invalid URL', expected=True)
 
-        display_id = '%s-%s' % (name, clip_idx)
+        display_id = f'{name}-{clip_idx}'
 
         course = self._download_course(course_name, url, display_id)
 
@@ -299,7 +284,7 @@ def _real_extract(self, url):
                         clip_index = clip_.get('index')
                     if clip_index is None:
                         continue
-                    if compat_str(clip_index) == clip_idx:
+                    if str(clip_index) == clip_idx:
                         clip = clip_
                         break
 
@@ -316,14 +301,14 @@ def _real_extract(self, url):
             'high-widescreen': {'width': 1280, 'height': 720},
         }
 
-        QUALITIES_PREFERENCE = ('low', 'medium', 'high', 'high-widescreen',)
+        QUALITIES_PREFERENCE = ('low', 'medium', 'high', 'high-widescreen')
         quality_key = qualities(QUALITIES_PREFERENCE)
 
         AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities'])
 
         ALLOWED_QUALITIES = (
-            AllowedQuality('webm', ['high']),
-            AllowedQuality('mp4', ['low', 'medium', 'high']),
+            AllowedQuality('webm', ['high']),
+            AllowedQuality('mp4', ['low', 'medium', 'high']),
         )
 
         # Some courses also offer widescreen resolution for high quality (see
@@ -337,11 +322,11 @@ def _real_extract(self, url):
         # In order to minimize the number of calls to ViewClip API and reduce
         # the probability of being throttled or banned by Pluralsight we will request
         # only single format until formats listing was explicitly requested.
-        if self._downloader.params.get('listformats', False):
+        if self.get_param('listformats', False):
             allowed_qualities = ALLOWED_QUALITIES
         else:
             def guess_allowed_qualities():
-                req_format = self._downloader.params.get('format') or 'best'
+                req_format = self.get_param('format') or 'best'
                 req_format_split = req_format.split('-', 1)
                 if len(req_format_split) > 1:
                     req_ext, req_quality = req_format_split
@@ -349,7 +334,7 @@ def guess_allowed_qualities():
                     for allowed_quality in ALLOWED_QUALITIES:
                         if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities:
                             return (AllowedQuality(req_ext, (req_quality, )), )
-                req_ext = 'webm' if self._downloader.params.get('prefer_free_formats') else 'mp4'
+                req_ext = 'webm' if self.get_param('prefer_free_formats') else 'mp4'
                 return (AllowedQuality(req_ext, (best_quality, )), )
             allowed_qualities = guess_allowed_qualities()
 
@@ -367,23 +352,23 @@ def guess_allowed_qualities():
                     'mediaType': ext,
                     'quality': '%dx%d' % (f['width'], f['height']),
                 }
-                format_id = '%s-%s' % (ext, quality)
+                format_id = f'{ext}-{quality}'
 
                 try:
                     viewclip = self._download_json(
                         self._GRAPHQL_EP, display_id,
-                        'Downloading %s viewclip graphql' % format_id,
+                        f'Downloading {format_id} viewclip graphql',
                         data=json.dumps({
                             'query': self.GRAPHQL_VIEWCLIP_TMPL % clip_post,
-                            'variables': {}
-                        }).encode('utf-8'),
+                            'variables': {},
+                        }).encode(),
                         headers=self._GRAPHQL_HEADERS)['data']['viewClip']
                 except ExtractorError:
                     # Still works but most likely will go soon
                     viewclip = self._download_json(
-                        '%s/video/clips/viewclip' % self._API_BASE, display_id,
-                        'Downloading %s viewclip JSON' % format_id, fatal=False,
-                        data=json.dumps(clip_post).encode('utf-8'),
+                        f'{self._API_BASE}/video/clips/viewclip', display_id,
+                        f'Downloading {format_id} viewclip JSON', fatal=False,
+                        data=json.dumps(clip_post).encode(),
                         headers={'Content-Type': 'application/json;charset=utf-8'})
 
                 # Pluralsight tracks multiple sequential calls to ViewClip API and start
@@ -393,7 +378,7 @@ def guess_allowed_qualities():
                 # To somewhat reduce the probability of these consequences
                 # we will sleep random amount of time before each call to ViewClip.
                 self._sleep(
-                    random.randint(2, 5), display_id,
+                    random.randint(5, 10), display_id,
                     '%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling')
 
                 if not viewclip:
@@ -412,14 +397,12 @@ def guess_allowed_qualities():
                     clip_f.update({
                         'url': clip_url,
                         'ext': ext,
-                        'format_id': '%s-%s' % (format_id, cdn) if cdn else format_id,
+                        'format_id': f'{format_id}-{cdn}' if cdn else format_id,
                         'quality': quality_key(quality),
                         'source_preference': int_or_none(clip_url_data.get('rank')),
                     })
                     formats.append(clip_f)
 
-        self._sort_formats(formats)
-
         duration = int_or_none(
             clip.get('duration')) or parse_duration(clip.get('formattedDuration'))
 
@@ -482,7 +465,7 @@ def _real_extract(self, url):
                 if clip_index is None:
                     continue
                 clip_url = update_url_query(
-                    '%s/player' % self._API_BASE, query={
+                    f'{self._API_BASE}/player', query={
                         'mode': 'live',
                         'course': course_name,
                         'author': author,