yt_dlp/extractor/vidio.py

   1 from .common import InfoExtractor
   2 from ..utils import (
   3     clean_html,
   4     ExtractorError,
   5     format_field,
   6     get_element_by_class,
   7     int_or_none,
   8     parse_iso8601,
   9     smuggle_url,
  10     str_or_none,
  11     strip_or_none,
  12     try_get,
  13     unsmuggle_url,
  14     urlencode_postdata,
  15 )
  16
  17
  18 class VidioBaseIE(InfoExtractor):
  19     _LOGIN_URL = 'https://www.vidio.com/users/login'
  20     _NETRC_MACHINE = 'vidio'
  21
  22     def _perform_login(self, username, password):
  23         def is_logged_in():
  24             res = self._download_json(
  25                 'https://www.vidio.com/interactions.json', None, 'Checking if logged in', fatal=False) or {}
  26             return bool(res.get('current_user'))
  27
  28         if is_logged_in():
  29             return
  30
  31         login_page = self._download_webpage(
  32             self._LOGIN_URL, None, 'Downloading log in page')
  33
  34         login_form = self._form_hidden_inputs("login-form", login_page)
  35         login_form.update({
  36             'user[login]': username,
  37             'user[password]': password,
  38         })
  39         login_post, login_post_urlh = self._download_webpage_handle(
  40             self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), expected_status=[302, 401])
  41
  42         if login_post_urlh.status == 401:
  43             if get_element_by_class('onboarding-content-register-popup__title', login_post):
  44                 raise ExtractorError(
  45                     'Unable to log in: The provided email has not registered yet.', expected=True)
  46
  47             reason = get_element_by_class('onboarding-form__general-error', login_post) or get_element_by_class('onboarding-modal__title', login_post)
  48             if 'Akun terhubung ke' in reason:
  49                 raise ExtractorError(
  50                     'Unable to log in: Your account is linked to a social media account. '
  51                     'Use --cookies to provide account credentials instead', expected=True)
  52             elif reason:
  53                 subreason = get_element_by_class('onboarding-modal__description-text', login_post) or ''
  54                 raise ExtractorError(
  55                     'Unable to log in: %s. %s' % (reason, clean_html(subreason)), expected=True)
  56             raise ExtractorError('Unable to log in')
  57
  58     def _initialize_pre_login(self):
  59         self._api_key = self._download_json(
  60             'https://www.vidio.com/auth', None, data=b'')['api_key']
  61
  62     def _call_api(self, url, video_id, note=None):
  63         return self._download_json(url, video_id, note=note, headers={
  64             'Content-Type': 'application/vnd.api+json',
  65             'X-API-KEY': self._api_key,
  66         })
  67
  68
  69 class VidioIE(VidioBaseIE):
  70     _VALID_URL = r'https?://(?:www\.)?vidio\.com/(watch|embed)/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
  71     _TESTS = [{
  72         'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015',
  73         'md5': 'abac81b1a205a8d94c609a473b5ea62a',
  74         'info_dict': {
  75             'id': '165683',
  76             'display_id': 'dj_ambred-booyah-live-2015',
  77             'ext': 'mp4',
  78             'title': 'DJ_AMBRED - Booyah (Live 2015)',
  79             'description': 'md5:27dc15f819b6a78a626490881adbadf8',
  80             'thumbnail': r're:^https?://.*\.jpg$',
  81             'duration': 149,
  82             'like_count': int,
  83             'uploader': 'TWELVE Pic',
  84             'timestamp': 1444902800,
  85             'upload_date': '20151015',
  86             'uploader_id': 'twelvepictures',
  87             'channel': 'Cover Music Video',
  88             'channel_id': '280236',
  89             'view_count': int,
  90             'dislike_count': int,
  91             'comment_count': int,
  92             'tags': 'count:3',
  93             'uploader_url': 'https://www.vidio.com/@twelvepictures',
  94         },
  95     }, {
  96         'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
  97         'only_matching': True,
  98     }, {
  99         # Premier-exclusive video
 100         'url': 'https://www.vidio.com/watch/1550718-stand-by-me-doraemon',
 101         'only_matching': True
 102     }, {
 103         # embed url from https://enamplus.liputan6.com/read/5033648/video-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah
 104         'url': 'https://www.vidio.com/embed/7115874-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah',
 105         'info_dict': {
 106             'id': '7115874',
 107             'ext': 'mp4',
 108             'channel_id': '40172876',
 109             'comment_count': int,
 110             'uploader_id': 'liputan6',
 111             'view_count': int,
 112             'dislike_count': int,
 113             'upload_date': '20220804',
 114             'uploader': 'Liputan6.com',
 115             'display_id': 'fakta-temuan-suspek-cacar-monyet-di-jawa-tengah',
 116             'channel': 'ENAM PLUS 165',
 117             'timestamp': 1659605520,
 118             'title': 'Fakta Temuan Suspek Cacar Monyet di Jawa Tengah',
 119             'duration': 59,
 120             'like_count': int,
 121             'tags': ['monkeypox indonesia', 'cacar monyet menyebar', 'suspek cacar monyet di indonesia', 'fakta', 'hoax atau bukan?', 'jawa tengah'],
 122             'thumbnail': 'https://thumbor.prod.vidiocdn.com/83PN-_BKm5sS7emLtRxl506MLqQ=/640x360/filters:quality(70)/vidio-web-prod-video/uploads/video/image/7115874/fakta-suspek-cacar-monyet-di-jawa-tengah-24555a.jpg',
 123             'uploader_url': 'https://www.vidio.com/@liputan6',
 124             'description': 'md5:6d595a18d3b19ee378e335a6f288d5ac',
 125         },
 126     }]
 127
 128     def _real_extract(self, url):
 129         match = self._match_valid_url(url).groupdict()
 130         video_id, display_id = match.get('id'), match.get('display_id')
 131         data = self._call_api('https://api.vidio.com/videos/' + video_id, display_id)
 132         video = data['videos'][0]
 133         title = video['title'].strip()
 134         is_premium = video.get('is_premium')
 135
 136         if is_premium:
 137             sources = self._download_json(
 138                 'https://www.vidio.com/interactions_stream.json?video_id=%s&type=videos' % video_id,
 139                 display_id, note='Downloading premier API JSON')
 140             if not (sources.get('source') or sources.get('source_dash')):
 141                 self.raise_login_required('This video is only available for registered users with the appropriate subscription')
 142
 143             formats, subs = [], {}
 144             if sources.get('source'):
 145                 hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles(
 146                     sources['source'], display_id, 'mp4', 'm3u8_native')
 147                 formats.extend(hls_formats)
 148                 subs.update(hls_subs)
 149             if sources.get('source_dash'):  # TODO: Find video example with source_dash
 150                 dash_formats, dash_subs = self._extract_mpd_formats_and_subtitles(
 151                     sources['source_dash'], display_id, 'dash')
 152                 formats.extend(dash_formats)
 153                 subs.update(dash_subs)
 154         else:
 155             hls_url = data['clips'][0]['hls_url']
 156             formats, subs = self._extract_m3u8_formats_and_subtitles(
 157                 hls_url, display_id, 'mp4', 'm3u8_native')
 158
 159         self._sort_formats(formats)
 160
 161         get_first = lambda x: try_get(data, lambda y: y[x + 's'][0], dict) or {}
 162         channel = get_first('channel')
 163         user = get_first('user')
 164         username = user.get('username')
 165         get_count = lambda x: int_or_none(video.get('total_' + x))
 166
 167         return {
 168             'id': video_id,
 169             'display_id': display_id,
 170             'title': title,
 171             'description': strip_or_none(video.get('description')),
 172             'thumbnail': video.get('image_url_medium'),
 173             'duration': int_or_none(video.get('duration')),
 174             'like_count': get_count('likes'),
 175             'formats': formats,
 176             'subtitles': subs,
 177             'uploader': user.get('name'),
 178             'timestamp': parse_iso8601(video.get('created_at')),
 179             'uploader_id': username,
 180             'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),
 181             'channel': channel.get('name'),
 182             'channel_id': str_or_none(channel.get('id')),
 183             'view_count': get_count('view_count'),
 184             'dislike_count': get_count('dislikes'),
 185             'comment_count': get_count('comments'),
 186             'tags': video.get('tag_list'),
 187         }
 188
 189
 190 class VidioPremierIE(VidioBaseIE):
 191     _VALID_URL = r'https?://(?:www\.)?vidio\.com/premier/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
 192     _TESTS = [{
 193         'url': 'https://www.vidio.com/premier/2885/badai-pasti-berlalu',
 194         'playlist_mincount': 14,
 195     }, {
 196         # Series with both free and premier-exclusive videos
 197         'url': 'https://www.vidio.com/premier/2567/sosmed',
 198         'only_matching': True,
 199     }]
 200
 201     def _playlist_entries(self, playlist_url, display_id):
 202         index = 1
 203         while playlist_url:
 204             playlist_json = self._call_api(playlist_url, display_id, 'Downloading API JSON page %s' % index)
 205             for video_json in playlist_json.get('data', []):
 206                 link = video_json['links']['watchpage']
 207                 yield self.url_result(link, 'Vidio', video_json['id'])
 208             playlist_url = try_get(playlist_json, lambda x: x['links']['next'])
 209             index += 1
 210
 211     def _real_extract(self, url):
 212         url, idata = unsmuggle_url(url, {})
 213         playlist_id, display_id = self._match_valid_url(url).groups()
 214
 215         playlist_url = idata.get('url')
 216         if playlist_url:  # Smuggled data contains an API URL. Download only that playlist
 217             playlist_id = idata['id']
 218             return self.playlist_result(
 219                 self._playlist_entries(playlist_url, playlist_id),
 220                 playlist_id=playlist_id, playlist_title=idata.get('title'))
 221
 222         playlist_data = self._call_api('https://api.vidio.com/content_profiles/%s/playlists' % playlist_id, display_id)
 223
 224         return self.playlist_from_matches(
 225             playlist_data.get('data', []), playlist_id=playlist_id, ie=self.ie_key(),
 226             getter=lambda data: smuggle_url(url, {
 227                 'url': data['relationships']['videos']['links']['related'],
 228                 'id': data['id'],
 229                 'title': try_get(data, lambda x: x['attributes']['name'])
 230             }))
 231
 232
 233 class VidioLiveIE(VidioBaseIE):
 234     _VALID_URL = r'https?://(?:www\.)?vidio\.com/live/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
 235     _TESTS = [{
 236         'url': 'https://www.vidio.com/live/204-sctv',
 237         'info_dict': {
 238             'id': '204',
 239             'title': 'SCTV',
 240             'uploader': 'SCTV',
 241             'uploader_id': 'sctv',
 242             'thumbnail': r're:^https?://.*\.jpg$',
 243         },
 244     }, {
 245         # Premier-exclusive livestream
 246         'url': 'https://www.vidio.com/live/6362-tvn',
 247         'only_matching': True,
 248     }, {
 249         # DRM premier-exclusive livestream
 250         'url': 'https://www.vidio.com/live/6299-bein-1',
 251         'only_matching': True,
 252     }]
 253
 254     def _real_extract(self, url):
 255         video_id, display_id = self._match_valid_url(url).groups()
 256         stream_data = self._call_api(
 257             'https://www.vidio.com/api/livestreamings/%s/detail' % video_id, display_id)
 258         stream_meta = stream_data['livestreamings'][0]
 259         user = stream_data.get('users', [{}])[0]
 260
 261         title = stream_meta.get('title')
 262         username = user.get('username')
 263
 264         formats = []
 265         if stream_meta.get('is_drm'):
 266             if not self.get_param('allow_unplayable_formats'):
 267                 self.report_drm(video_id)
 268         if stream_meta.get('is_premium'):
 269             sources = self._download_json(
 270                 'https://www.vidio.com/interactions_stream.json?video_id=%s&type=livestreamings' % video_id,
 271                 display_id, note='Downloading premier API JSON')
 272             if not (sources.get('source') or sources.get('source_dash')):
 273                 self.raise_login_required('This video is only available for registered users with the appropriate subscription')
 274
 275             if str_or_none(sources.get('source')):
 276                 token_json = self._download_json(
 277                     'https://www.vidio.com/live/%s/tokens' % video_id,
 278                     display_id, note='Downloading HLS token JSON', data=b'')
 279                 formats.extend(self._extract_m3u8_formats(
 280                     sources['source'] + '?' + token_json.get('token', ''), display_id, 'mp4', 'm3u8_native'))
 281             if str_or_none(sources.get('source_dash')):
 282                 pass
 283         else:
 284             if stream_meta.get('stream_token_url'):
 285                 token_json = self._download_json(
 286                     'https://www.vidio.com/live/%s/tokens' % video_id,
 287                     display_id, note='Downloading HLS token JSON', data=b'')
 288                 formats.extend(self._extract_m3u8_formats(
 289                     stream_meta['stream_token_url'] + '?' + token_json.get('token', ''),
 290                     display_id, 'mp4', 'm3u8_native'))
 291             if stream_meta.get('stream_dash_url'):
 292                 pass
 293             if stream_meta.get('stream_url'):
 294                 formats.extend(self._extract_m3u8_formats(
 295                     stream_meta['stream_url'], display_id, 'mp4', 'm3u8_native'))
 296         self._sort_formats(formats)
 297
 298         return {
 299             'id': video_id,
 300             'display_id': display_id,
 301             'title': title,
 302             'is_live': True,
 303             'description': strip_or_none(stream_meta.get('description')),
 304             'thumbnail': stream_meta.get('image'),
 305             'like_count': int_or_none(stream_meta.get('like')),
 306             'dislike_count': int_or_none(stream_meta.get('dislike')),
 307             'formats': formats,
 308             'uploader': user.get('name'),
 309             'timestamp': parse_iso8601(stream_meta.get('start_time')),
 310             'uploader_id': username,
 311             'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),
 312         }