]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/vrv.py
[ie/box] Fix formats extraction (#8649)
[yt-dlp.git] / yt_dlp / extractor / vrv.py
index 6e51469b094363879749bf2926714662e26a1858..523c442e653848a6c9af00cac99d0e8e7a20f005 100644 (file)
@@ -1,24 +1,21 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
 import base64
-import json
 import hashlib
 import hmac
+import json
 import random
 import string
 import time
+import urllib.parse
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_HTTPError,
-    compat_urllib_parse_urlencode,
-    compat_urllib_parse,
-)
+from ..compat import compat_urllib_parse_urlencode
+from ..networking.exceptions import HTTPError
 from ..utils import (
     ExtractorError,
     float_or_none,
     int_or_none,
+    join_nonempty,
+    traverse_obj,
 )
 
 
@@ -34,7 +31,7 @@ def _call_api(self, path, video_id, note, data=None):
         base_url = self._API_DOMAIN + '/core/' + path
         query = [
             ('oauth_consumer_key', self._API_PARAMS['oAuthKey']),
-            ('oauth_nonce', ''.join([random.choice(string.ascii_letters) for _ in range(32)])),
+            ('oauth_nonce', ''.join(random.choices(string.ascii_letters, k=32))),
             ('oauth_signature_method', 'HMAC-SHA1'),
             ('oauth_timestamp', int(time.time())),
         ]
@@ -47,19 +44,19 @@ def _call_api(self, path, video_id, note, data=None):
             headers['Content-Type'] = 'application/json'
         base_string = '&'.join([
             'POST' if data else 'GET',
-            compat_urllib_parse.quote(base_url, ''),
-            compat_urllib_parse.quote(encoded_query, '')])
+            urllib.parse.quote(base_url, ''),
+            urllib.parse.quote(encoded_query, '')])
         oauth_signature = base64.b64encode(hmac.new(
             (self._API_PARAMS['oAuthSecret'] + '&' + self._TOKEN_SECRET).encode('ascii'),
             base_string.encode(), hashlib.sha1).digest()).decode()
-        encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '')
+        encoded_query += '&oauth_signature=' + urllib.parse.quote(oauth_signature, '')
         try:
             return self._download_json(
                 '?'.join([base_url, encoded_query]), video_id,
                 note='Downloading %s JSON metadata' % note, headers=headers, data=data)
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
-                raise ExtractorError(json.loads(e.cause.read().decode())['message'], expected=True)
+            if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+                raise ExtractorError(json.loads(e.cause.response.read().decode())['message'], expected=True)
             raise
 
     def _call_cms(self, path, video_id, note):
@@ -83,7 +80,30 @@ def _get_cms_resource(self, resource_key, video_id):
                 'resource_key': resource_key,
             })['__links__']['cms_resource']['href']
 
-    def _real_initialize(self):
+    def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
+        if not url or stream_format not in ('hls', 'dash', 'adaptive_hls'):
+            return []
+        format_id = join_nonempty(
+            stream_format,
+            audio_lang and 'audio-%s' % audio_lang,
+            hardsub_lang and 'hardsub-%s' % hardsub_lang)
+        if 'hls' in stream_format:
+            adaptive_formats = self._extract_m3u8_formats(
+                url, video_id, 'mp4', m3u8_id=format_id,
+                note='Downloading %s information' % format_id,
+                fatal=False)
+        elif stream_format == 'dash':
+            adaptive_formats = self._extract_mpd_formats(
+                url, video_id, mpd_id=format_id,
+                note='Downloading %s information' % format_id,
+                fatal=False)
+        if audio_lang:
+            for f in adaptive_formats:
+                if f.get('acodec') != 'none':
+                    f['language'] = audio_lang
+        return adaptive_formats
+
+    def _set_api_params(self):
         webpage = self._download_webpage(
             'https://vrv.co/', None, headers=self.geo_verification_headers())
         self._API_PARAMS = self._parse_json(self._search_regex(
@@ -122,47 +142,17 @@ class VRVIE(VRVBaseIE):
     }]
     _NETRC_MACHINE = 'vrv'
 
-    def _real_initialize(self):
-        super(VRVIE, self)._real_initialize()
-
-        email, password = self._get_login_info()
-        if email is None:
-            return
-
+    def _perform_login(self, username, password):
         token_credentials = self._call_api(
             'authenticate/by:credentials', None, 'Token Credentials', data={
-                'email': email,
+                'email': username,
                 'password': password,
             })
         self._TOKEN = token_credentials['oauth_token']
         self._TOKEN_SECRET = token_credentials['oauth_token_secret']
 
-    def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
-        if not url or stream_format not in ('hls', 'dash', 'adaptive_hls'):
-            return []
-        stream_id_list = []
-        if audio_lang:
-            stream_id_list.append('audio-%s' % audio_lang)
-        if hardsub_lang:
-            stream_id_list.append('hardsub-%s' % hardsub_lang)
-        format_id = stream_format
-        if stream_id_list:
-            format_id += '-' + '-'.join(stream_id_list)
-        if 'hls' in stream_format:
-            adaptive_formats = self._extract_m3u8_formats(
-                url, video_id, 'mp4', m3u8_id=format_id,
-                note='Downloading %s information' % format_id,
-                fatal=False)
-        elif stream_format == 'dash':
-            adaptive_formats = self._extract_mpd_formats(
-                url, video_id, mpd_id=format_id,
-                note='Downloading %s information' % format_id,
-                fatal=False)
-        if audio_lang:
-            for f in adaptive_formats:
-                if f.get('acodec') != 'none':
-                    f['language'] = audio_lang
-        return adaptive_formats
+    def _initialize_pre_login(self):
+        return self._set_api_params()
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -203,7 +193,6 @@ def _real_extract(self, url):
                     formats.extend(self._extract_vrv_formats(
                         stream.get('url'), video_id, stream_type.split('_')[1],
                         audio_locale, stream.get('hardsub_locale')))
-        self._sort_formats(formats)
 
         subtitles = {}
         for k in ('captions', 'subtitles'):
@@ -217,7 +206,7 @@ def _real_extract(self, url):
                 })
 
         thumbnails = []
-        for thumbnail in video_data.get('images', {}).get('thumbnails', []):
+        for thumbnail in traverse_obj(video_data, ('images', 'thumbnail', ..., ...)) or []:
             thumbnail_url = thumbnail.get('source')
             if not thumbnail_url:
                 continue
@@ -257,6 +246,9 @@ class VRVSeriesIE(VRVBaseIE):
         'playlist_mincount': 11,
     }
 
+    def _initialize_pre_login(self):
+        return self._set_api_params()
+
     def _real_extract(self, url):
         series_id = self._match_id(url)