]> jfr.im git - yt-dlp.git/commitdiff
[ie/Vbox7] Fix extractor (#9100)
authorsepro <redacted>
Mon, 29 Jan 2024 20:16:46 +0000 (21:16 +0100)
committerGitHub <redacted>
Mon, 29 Jan 2024 20:16:46 +0000 (21:16 +0100)
Closes #1098, Closes #5661
Authored by: seproDev

yt_dlp/extractor/vbox7.py

index be35dad1c3a14327c5dec004f9c07ae8f810b103..21bf4232b5d89dba210a5b6c753940d6e4fc4b2f 100644 (file)
@@ -1,5 +1,6 @@
 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import ExtractorError, base_url, int_or_none, url_basename
+from ..utils.traversal import traverse_obj
 
 
 class Vbox7IE(InfoExtractor):
@@ -19,7 +20,7 @@ class Vbox7IE(InfoExtractor):
     _GEO_COUNTRIES = ['BG']
     _TESTS = [{
         'url': 'http://vbox7.com/play:0946fff23c',
-        'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
+        'md5': '50ca1f78345a9c15391af47d8062d074',
         'info_dict': {
             'id': '0946fff23c',
             'ext': 'mp4',
@@ -29,19 +30,25 @@ class Vbox7IE(InfoExtractor):
             'timestamp': 1470982814,
             'upload_date': '20160812',
             'uploader': 'zdraveibulgaria',
-        },
-        'params': {
-            'proxy': '127.0.0.1:8118',
+            'view_count': int,
+            'duration': 2640,
         },
     }, {
         'url': 'http://vbox7.com/play:249bb972c2',
-        'md5': '99f65c0c9ef9b682b97313e052734c3f',
+        'md5': 'da1dd2eb245200cb86e6d09d43232116',
         'info_dict': {
             'id': '249bb972c2',
             'ext': 'mp4',
             'title': 'Смях! Чудо - чист за секунди - Скрита камера',
+            'uploader': 'svideteliat_ot_varshava',
+            'view_count': int,
+            'timestamp': 1360215023,
+            'thumbnail': 'https://i49.vbox7.com/design/iconci/png/noimg6.png',
+            'description': 'Смях! Чудо - чист за секунди - Скрита камера',
+            'upload_date': '20130207',
+            'duration': 83,
         },
-        'skip': 'georestricted',
+        'expected_warnings': ['Failed to download m3u8 information'],
     }, {
         'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
         'only_matching': True,
@@ -53,41 +60,38 @@ class Vbox7IE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        response = self._download_json(
-            'https://www.vbox7.com/ajax/video/nextvideo.php?vid=%s' % video_id,
-            video_id)
-
-        if 'error' in response:
-            raise ExtractorError(
-                '%s said: %s' % (self.IE_NAME, response['error']), expected=True)
-
-        video = response['options']
-
-        title = video['title']
-        video_url = video['src']
-
-        if '/na.mp4' in video_url:
-            self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+        data = self._download_json(
+            'https://www.vbox7.com/aj/player/item/options', video_id,
+            query={'vid': video_id})['options']
 
-        uploader = video.get('uploader')
+        src_url = data.get('src')
+        if src_url in (None, '', 'blank'):
+            raise ExtractorError('Video is unavailable', expected=True)
 
-        webpage = self._download_webpage(
-            'http://vbox7.com/play:%s' % video_id, video_id, fatal=None)
+        fmt_base = url_basename(src_url).rsplit('.', 1)[0].rsplit('_', 1)[0]
+        if fmt_base == 'vn':
+            self.raise_geo_restricted()
 
-        info = {}
+        fmt_base = base_url(src_url) + fmt_base
 
-        if webpage:
-            info = self._search_json_ld(
-                webpage.replace('"/*@context"', '"@context"'), video_id,
-                fatal=False)
+        formats = self._extract_m3u8_formats(
+            f'{fmt_base}.m3u8', video_id, m3u8_id='hls', fatal=False)
+        # TODO: Add MPD formats, when dash range support is added
+        for res in traverse_obj(data, ('resolutions', lambda _, v: v != 0, {int})):
+            formats.append({
+                'url': f'{fmt_base}_{res}.mp4',
+                'format_id': f'http-{res}',
+                'height': res,
+            })
 
-        info.update({
+        return {
             'id': video_id,
-            'title': title,
-            'url': video_url,
-            'uploader': uploader,
-            'thumbnail': self._proto_relative_url(
-                info.get('thumbnail') or self._og_search_thumbnail(webpage),
-                'http:'),
-        })
-        return info
+            'formats': formats,
+            **self._search_json_ld(self._download_webpage(
+                f'https://www.vbox7.com/play:{video_id}', video_id, fatal=False) or '', video_id, fatal=False),
+            **traverse_obj(data, {
+                'title': ('title', {str}),
+                'uploader': ('uploader', {str}),
+                'duration': ('duration', {int_or_none}),
+            }),
+        }