]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/vidio.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / vidio.py
index 74b92cebc1a50edfc62ddf0c28699f7fa9981c20..770aa284da9590fc81b30ca8b98f5f786e784c06 100644 (file)
@@ -1,11 +1,8 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
 from .common import InfoExtractor
 from ..utils import (
+    clean_html,
     ExtractorError,
+    format_field,
     get_element_by_class,
     int_or_none,
     parse_iso8601,
@@ -22,11 +19,7 @@ class VidioBaseIE(InfoExtractor):
     _LOGIN_URL = 'https://www.vidio.com/users/login'
     _NETRC_MACHINE = 'vidio'
 
-    def _login(self):
-        username, password = self._get_login_info()
-        if username is None:
-            return
-
+    def _perform_login(self, username, password):
         def is_logged_in():
             res = self._download_json(
                 'https://www.vidio.com/interactions.json', None, 'Checking if logged in', fatal=False) or {}
@@ -47,16 +40,24 @@ def is_logged_in():
             self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), expected_status=[302, 401])
 
         if login_post_urlh.status == 401:
-            reason = get_element_by_class('onboarding-form__general-error', login_post)
-            if reason:
+            if get_element_by_class('onboarding-content-register-popup__title', login_post):
                 raise ExtractorError(
-                    'Unable to log in: %s' % reason, expected=True)
+                    'Unable to log in: The provided email has not registered yet.', expected=True)
+
+            reason = get_element_by_class('onboarding-form__general-error', login_post) or get_element_by_class('onboarding-modal__title', login_post)
+            if 'Akun terhubung ke' in reason:
+                raise ExtractorError(
+                    'Unable to log in: Your account is linked to a social media account. '
+                    'Use --cookies to provide account credentials instead', expected=True)
+            elif reason:
+                subreason = get_element_by_class('onboarding-modal__description-text', login_post) or ''
+                raise ExtractorError(
+                    'Unable to log in: %s. %s' % (reason, clean_html(subreason)), expected=True)
             raise ExtractorError('Unable to log in')
 
-    def _real_initialize(self):
+    def _initialize_pre_login(self):
         self._api_key = self._download_json(
             'https://www.vidio.com/auth', None, data=b'')['api_key']
-        self._login()
 
     def _call_api(self, url, video_id, note=None):
         return self._download_json(url, video_id, note=note, headers={
@@ -66,10 +67,10 @@ def _call_api(self, url, video_id, note=None):
 
 
 class VidioIE(VidioBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?vidio\.com/(watch|embed)/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
     _TESTS = [{
         'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015',
-        'md5': 'cd2801394afc164e9775db6a140b91fe',
+        'md5': 'abac81b1a205a8d94c609a473b5ea62a',
         'info_dict': {
             'id': '165683',
             'display_id': 'dj_ambred-booyah-live-2015',
@@ -88,7 +89,8 @@ class VidioIE(VidioBaseIE):
             'view_count': int,
             'dislike_count': int,
             'comment_count': int,
-            'tags': 'count:4',
+            'tags': 'count:3',
+            'uploader_url': 'https://www.vidio.com/@twelvepictures',
         },
     }, {
         'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
@@ -97,10 +99,34 @@ class VidioIE(VidioBaseIE):
         # Premier-exclusive video
         'url': 'https://www.vidio.com/watch/1550718-stand-by-me-doraemon',
         'only_matching': True
+    }, {
+        # embed url from https://enamplus.liputan6.com/read/5033648/video-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah
+        'url': 'https://www.vidio.com/embed/7115874-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah',
+        'info_dict': {
+            'id': '7115874',
+            'ext': 'mp4',
+            'channel_id': '40172876',
+            'comment_count': int,
+            'uploader_id': 'liputan6',
+            'view_count': int,
+            'dislike_count': int,
+            'upload_date': '20220804',
+            'uploader': 'Liputan6.com',
+            'display_id': 'fakta-temuan-suspek-cacar-monyet-di-jawa-tengah',
+            'channel': 'ENAM PLUS 165',
+            'timestamp': 1659605520,
+            'title': 'Fakta Temuan Suspek Cacar Monyet di Jawa Tengah',
+            'duration': 59,
+            'like_count': int,
+            'tags': ['monkeypox indonesia', 'cacar monyet menyebar', 'suspek cacar monyet di indonesia', 'fakta', 'hoax atau bukan?', 'jawa tengah'],
+            'thumbnail': 'https://thumbor.prod.vidiocdn.com/83PN-_BKm5sS7emLtRxl506MLqQ=/640x360/filters:quality(70)/vidio-web-prod-video/uploads/video/image/7115874/fakta-suspek-cacar-monyet-di-jawa-tengah-24555a.jpg',
+            'uploader_url': 'https://www.vidio.com/@liputan6',
+            'description': 'md5:6d595a18d3b19ee378e335a6f288d5ac',
+        },
     }]
 
     def _real_extract(self, url):
-        match = re.match(self._VALID_URL, url).groupdict()
+        match = self._match_valid_url(url).groupdict()
         video_id, display_id = match.get('id'), match.get('display_id')
         data = self._call_api('https://api.vidio.com/videos/' + video_id, display_id)
         video = data['videos'][0]
@@ -130,8 +156,6 @@ def _real_extract(self, url):
             formats, subs = self._extract_m3u8_formats_and_subtitles(
                 hls_url, display_id, 'mp4', 'm3u8_native')
 
-        self._sort_formats(formats)
-
         get_first = lambda x: try_get(data, lambda y: y[x + 's'][0], dict) or {}
         channel = get_first('channel')
         user = get_first('user')
@@ -151,7 +175,7 @@ def _real_extract(self, url):
             'uploader': user.get('name'),
             'timestamp': parse_iso8601(video.get('created_at')),
             'uploader_id': username,
-            'uploader_url': 'https://www.vidio.com/@' + username if username else None,
+            'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),
             'channel': channel.get('name'),
             'channel_id': str_or_none(channel.get('id')),
             'view_count': get_count('view_count'),
@@ -184,7 +208,7 @@ def _playlist_entries(self, playlist_url, display_id):
 
     def _real_extract(self, url):
         url, idata = unsmuggle_url(url, {})
-        playlist_id, display_id = re.match(self._VALID_URL, url).groups()
+        playlist_id, display_id = self._match_valid_url(url).groups()
 
         playlist_url = idata.get('url')
         if playlist_url:  # Smuggled data contains an API URL. Download only that playlist
@@ -226,7 +250,7 @@ class VidioLiveIE(VidioBaseIE):
     }]
 
     def _real_extract(self, url):
-        video_id, display_id = re.match(self._VALID_URL, url).groups()
+        video_id, display_id = self._match_valid_url(url).groups()
         stream_data = self._call_api(
             'https://www.vidio.com/api/livestreamings/%s/detail' % video_id, display_id)
         stream_meta = stream_data['livestreamings'][0]
@@ -238,8 +262,7 @@ def _real_extract(self, url):
         formats = []
         if stream_meta.get('is_drm'):
             if not self.get_param('allow_unplayable_formats'):
-                self.raise_no_formats(
-                    'This video is DRM protected.', expected=True)
+                self.report_drm(video_id)
         if stream_meta.get('is_premium'):
             sources = self._download_json(
                 'https://www.vidio.com/interactions_stream.json?video_id=%s&type=livestreamings' % video_id,
@@ -268,7 +291,6 @@ def _real_extract(self, url):
             if stream_meta.get('stream_url'):
                 formats.extend(self._extract_m3u8_formats(
                     stream_meta['stream_url'], display_id, 'mp4', 'm3u8_native'))
-        self._sort_formats(formats)
 
         return {
             'id': video_id,
@@ -283,5 +305,5 @@ def _real_extract(self, url):
             'uploader': user.get('name'),
             'timestamp': parse_iso8601(stream_meta.get('start_time')),
             'uploader_id': username,
-            'uploader_url': 'https://www.vidio.com/@' + username if username else None,
+            'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),
         }