[ie/box] Fix formats extraction (#8649)

[yt-dlp.git] / yt_dlp / extractor / vrv.py
diff --git a/yt_dlp/extractor/vrv.py b/yt_dlp/extractor/vrv.py

index 6e51469b094363879749bf2926714662e26a1858..523c442e653848a6c9af00cac99d0e8e7a20f005 100644 (file)
--- a/yt_dlp/extractor/vrv.py
+++ b/yt_dlp/extractor/vrv.py
@@ -1,24 +1,21 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
  import base64
-import json
  import hashlib
  import hmac
+import json
  import random
  import string
  import time
+import urllib.parse
  
  from .common import InfoExtractor
-from ..compat import (
-    compat_HTTPError,
-    compat_urllib_parse_urlencode,
-    compat_urllib_parse,
-)
+from ..compat import compat_urllib_parse_urlencode
+from ..networking.exceptions import HTTPError
  from ..utils import (
      ExtractorError,
      float_or_none,
      int_or_none,
+    join_nonempty,
+    traverse_obj,
  )
  
  
@@ -34,7 +31,7 @@ def _call_api(self, path, video_id, note, data=None):
          base_url = self._API_DOMAIN + '/core/' + path
          query = [
              ('oauth_consumer_key', self._API_PARAMS['oAuthKey']),
-            ('oauth_nonce', ''.join([random.choice(string.ascii_letters) for _ in range(32)])),
+            ('oauth_nonce', ''.join(random.choices(string.ascii_letters, k=32))),
              ('oauth_signature_method', 'HMAC-SHA1'),
              ('oauth_timestamp', int(time.time())),
          ]
@@ -47,19 +44,19 @@ def _call_api(self, path, video_id, note, data=None):
              headers['Content-Type'] = 'application/json'
          base_string = '&'.join([
              'POST' if data else 'GET',
-            compat_urllib_parse.quote(base_url, ''),
-            compat_urllib_parse.quote(encoded_query, '')])
+            urllib.parse.quote(base_url, ''),
+            urllib.parse.quote(encoded_query, '')])
          oauth_signature = base64.b64encode(hmac.new(
              (self._API_PARAMS['oAuthSecret'] + '&' + self._TOKEN_SECRET).encode('ascii'),
              base_string.encode(), hashlib.sha1).digest()).decode()
-        encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '')
+        encoded_query += '&oauth_signature=' + urllib.parse.quote(oauth_signature, '')
          try:
              return self._download_json(
                  '?'.join([base_url, encoded_query]), video_id,
                  note='Downloading %s JSON metadata' % note, headers=headers, data=data)
          except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
-                raise ExtractorError(json.loads(e.cause.read().decode())['message'], expected=True)
+            if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+                raise ExtractorError(json.loads(e.cause.response.read().decode())['message'], expected=True)
              raise
  
      def _call_cms(self, path, video_id, note):
@@ -83,7 +80,30 @@ def _get_cms_resource(self, resource_key, video_id):
                  'resource_key': resource_key,
              })['__links__']['cms_resource']['href']
  
-    def _real_initialize(self):
+    def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
+        if not url or stream_format not in ('hls', 'dash', 'adaptive_hls'):
+            return []
+        format_id = join_nonempty(
+            stream_format,
+            audio_lang and 'audio-%s' % audio_lang,
+            hardsub_lang and 'hardsub-%s' % hardsub_lang)
+        if 'hls' in stream_format:
+            adaptive_formats = self._extract_m3u8_formats(
+                url, video_id, 'mp4', m3u8_id=format_id,
+                note='Downloading %s information' % format_id,
+                fatal=False)
+        elif stream_format == 'dash':
+            adaptive_formats = self._extract_mpd_formats(
+                url, video_id, mpd_id=format_id,
+                note='Downloading %s information' % format_id,
+                fatal=False)
+        if audio_lang:
+            for f in adaptive_formats:
+                if f.get('acodec') != 'none':
+                    f['language'] = audio_lang
+        return adaptive_formats
+
+    def _set_api_params(self):
          webpage = self._download_webpage(
              'https://vrv.co/', None, headers=self.geo_verification_headers())
          self._API_PARAMS = self._parse_json(self._search_regex(
@@ -122,47 +142,17 @@ class VRVIE(VRVBaseIE):
      }]
      _NETRC_MACHINE = 'vrv'
  
-    def _real_initialize(self):
-        super(VRVIE, self)._real_initialize()
-
-        email, password = self._get_login_info()
-        if email is None:
-            return
-
+    def _perform_login(self, username, password):
          token_credentials = self._call_api(
              'authenticate/by:credentials', None, 'Token Credentials', data={
-                'email': email,
+                'email': username,
                  'password': password,
              })
          self._TOKEN = token_credentials['oauth_token']
          self._TOKEN_SECRET = token_credentials['oauth_token_secret']
  
-    def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
-        if not url or stream_format not in ('hls', 'dash', 'adaptive_hls'):
-            return []
-        stream_id_list = []
-        if audio_lang:
-            stream_id_list.append('audio-%s' % audio_lang)
-        if hardsub_lang:
-            stream_id_list.append('hardsub-%s' % hardsub_lang)
-        format_id = stream_format
-        if stream_id_list:
-            format_id += '-' + '-'.join(stream_id_list)
-        if 'hls' in stream_format:
-            adaptive_formats = self._extract_m3u8_formats(
-                url, video_id, 'mp4', m3u8_id=format_id,
-                note='Downloading %s information' % format_id,
-                fatal=False)
-        elif stream_format == 'dash':
-            adaptive_formats = self._extract_mpd_formats(
-                url, video_id, mpd_id=format_id,
-                note='Downloading %s information' % format_id,
-                fatal=False)
-        if audio_lang:
-            for f in adaptive_formats:
-                if f.get('acodec') != 'none':
-                    f['language'] = audio_lang
-        return adaptive_formats
+    def _initialize_pre_login(self):
+        return self._set_api_params()
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
@@ -203,7 +193,6 @@ def _real_extract(self, url):
                      formats.extend(self._extract_vrv_formats(
                          stream.get('url'), video_id, stream_type.split('_')[1],
                          audio_locale, stream.get('hardsub_locale')))
-        self._sort_formats(formats)
  
          subtitles = {}
          for k in ('captions', 'subtitles'):
@@ -217,7 +206,7 @@ def _real_extract(self, url):
                  })
  
          thumbnails = []
-        for thumbnail in video_data.get('images', {}).get('thumbnails', []):
+        for thumbnail in traverse_obj(video_data, ('images', 'thumbnail', ..., ...)) or []:
              thumbnail_url = thumbnail.get('source')
              if not thumbnail_url:
                  continue
@@ -257,6 +246,9 @@ class VRVSeriesIE(VRVBaseIE):
          'playlist_mincount': 11,
      }
  
+    def _initialize_pre_login(self):
+        return self._set_api_params()
+
      def _real_extract(self, url):
          series_id = self._match_id(url)